コード例 #1
0
ファイル: atom_groups.py プロジェクト: prabhjotSL/graphbrain
    def generate_synonyms(self):
        # find atoms and parent-child synonym relationships
        children = {}
        for i in range(len(self.sorted_atoms)):
            parents = []
            satom1 = self.sorted_atoms[i][0]
            atom1 = ed.str2edge(satom1)
            cur_depth = self.sorted_atoms[i][1]
            start = i
            while start < len(self.sorted_atoms
                              ) and self.sorted_atoms[start][1] <= cur_depth:
                start += 1
            for j in range(start, len(self.sorted_atoms)):
                satom2 = self.sorted_atoms[j][0]
                atom2 = ed.str2edge(satom2)
                if atom1 in atom2:
                    parents.append(satom2)
            if len(parents) == 1:
                satom2 = parents[0]
                if satom2 not in children:
                    children[satom2] = []
                children[satom2].append(satom1)
                if satom1 in children:
                    children[satom2] += children[satom1]

        # build synonym sets
        self.synonym_sets = []
        for satom in children:
            synonyms = [satom] + children[satom]
            count = 0
            for synonym in synonyms:
                count += self.edge_counts[synonym]
                del self.atoms[synonym]
            self.synonym_sets.append({
                'edges': synonyms,
                'count': count,
                'index': len(self.synonym_sets)
            })
            print(synonyms)

        for atom in self.atoms:
            self.synonym_sets.append({
                'edges': [atom],
                'count': self.edge_counts[atom],
                'index': len(self.synonym_sets)
            })

        # build synonym map
        self.synonym_map = {}
        for sset in self.synonym_sets:
            for synonym in sset['edges']:
                self.synonym_map[synonym] = sset['index']
            self.synonym_map[sset['index']] = sset
コード例 #2
0
ファイル: similarity.py プロジェクト: FlyDogFan/graphbrain
    def similar_edges(self, targ_edge):
        edges = self.hg.all()

        targ_eedge = enrich_edge(self.parser, targ_edge)

        sims = {}
        for edge in edges:
            if edge != targ_edge and not exclude(edge):
                eedge = enrich_edge(self.parser, edge)
                total_sim = eedge_similarity(targ_eedge, eedge)
                if total_sim >= self.sim_threshold:
                    sims[ed.edge2str(edge)] = total_sim

        sorted_edges = sorted(sims.items(),
                              key=operator.itemgetter(1),
                              reverse=True)

        result = []
        for e in sorted_edges:
            edge_data = {
                'edge': e[0],
                'sim': e[1],
                'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')
            }
            result.append(edge_data)
        return result
コード例 #3
0
ファイル: similarity.py プロジェクト: FlyDogFan/graphbrain
    def edges_with_similar_concepts(self, targ_edge):
        edges = self.hg.all()

        targ_eedge = enrich_edge(self.parser, targ_edge)

        sims = {}
        for edge in edges:
            if edge != targ_edge and not exclude(edge):
                eedge = enrich_edge(self.parser, edge)
                total_sim, worst_sim, complete, matches = edge_concepts_similarity(
                    targ_eedge, eedge)
                if complete and worst_sim >= self.sim_threshold:
                    sims[ed.edge2str(edge)] = (worst_sim, total_sim, matches)

        sorted_edges = sorted(sims.items(),
                              key=operator.itemgetter(1),
                              reverse=True)

        result = []
        for e in sorted_edges:
            edge_data = {
                'edge': e[0],
                'worst_sim': e[1][0],
                'sim': e[1][1],
                'matches': e[1][2],
                'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')
            }
            result.append(edge_data)
        return result
コード例 #4
0
def write_edge_data(edge_data, file_path):
    f = open(file_path, 'w')
    for e in edge_data:
        # f.write('%s\n' % json.dumps(e, separators=(',', ':')))
        f.write('%s\n' % str(e['sim']))
        f.write('%s\n' % e['text'])
        f.write('%s\n' % ed.edge2str(ed.without_namespaces(ed.str2edge(e['edge']))))
    f.close()
コード例 #5
0
ファイル: leveldb.py プロジェクト: mazieres/graphbrain
    def all(self):
        """Returns a lazy sequence of all the vertices in the hypergraph."""
        start_str = 'v'
        end_str = str_plus_1(start_str)
        start_key = (u'%s' % start_str).encode('utf-8')
        end_key = (u'%s' % end_str).encode('utf-8')

        for key, value in self.db.iterator(start=start_key, stop=end_key):
            vert = ed.str2edge(key.decode('utf-8')[1:])
            yield vert
コード例 #6
0
ファイル: sql.py プロジェクト: graphbrain/graphbrain
 def f_all(self, f):
     """Returns a lazy sequence resulting from applying f to every
        vertex map (including non-atomic) in the hypergraph.
        A vertex map contains the keys vertex and degree."""
     cur = self.open_cursor()
     cur.execute('SELECT id, degree FROM vertices')
     for row in cur:
         vmap = {'vertex': ed.str2edge(row[0]), 'degree': row[1]}
         yield f(vmap)
     self.close_cursor(cur, local=True, commit=False)
コード例 #7
0
ファイル: leveldb.py プロジェクト: prabhjotSL/graphbrain
def perm2edge(perm_str):
    """Transforms a permutation string from a database query into an edge."""
    try:
        tokens = ed.split_edge_str(perm_str[1:])
        nper = int(tokens[-1])
        tokens = tokens[:-1]
        tokens = unpermutate(tokens, nper)
        return ed.str2edge(' '.join(tokens))
    except ValueError as v:
        print(u'VALUE ERROR! %s perm2edge %s' % (v, perm_str))
コード例 #8
0
    def generate_synonyms(self):
        sorted_atoms = sorted(self.atoms.items(), key=operator.itemgetter(1), reverse=False)
        for atom_pair in sorted_atoms:
            orig = self.graph.vs.find(atom_pair[0])
            edges = self.graph.incident(orig.index, mode='in')
            if len(edges) > 0:
                max_weight = max([self.graph.es[e]['weight'] for e in edges])
            else:
                max_weight = 0.
            if max_weight > .1:
                for e in edges:
                    edge = self.graph.es[e]
                    if edge['weight'] == max_weight:
                        source = self.graph.vs[edge.source]['name']
                        target = self.graph.vs[edge.target]['name']
                        source_syn_id = self.syn_id(source)
                        target_syn_id = self.syn_id(target)

                        if not (source_syn_id and target_syn_id):
                            if self.valid_synonym_parent(source):
                                if source_syn_id:
                                    self.syn_ids[target] = source_syn_id
                                elif target_syn_id:
                                    self.syn_ids[source] = target_syn_id
                                else:
                                    syn_id = self.new_syn_id()
                                    self.syn_ids[source] = syn_id
                                    self.syn_ids[target] = syn_id
                            else:
                                if not target_syn_id:
                                    syn_id = self.new_syn_id()
                                    self.syn_ids[target] = syn_id

        # filter out multiple synonyms
        delete_synonyms = set()
        for atom in self.syn_ids:
            if len(self.synonym_ids_in(ed.str2edge(atom))) > 1:
                delete_synonyms.add(self.syn_ids[atom])

        # generate synonym sets
        for atom in self.atoms:
            syn_id = self.syn_id(atom)
            if syn_id:
                if syn_id in delete_synonyms:
                    new_id = self.new_syn_id()
                    self.syn_ids[atom] = new_id
                    self.synonym_sets[new_id] = {atom}
                else:
                    if syn_id not in self.synonym_sets:
                        self.synonym_sets[syn_id] = set()
                    self.synonym_sets[syn_id].add(atom)
            else:
                new_id = self.new_syn_id()
                self.syn_ids[atom] = new_id
                self.synonym_sets[new_id] = {atom}
コード例 #9
0
 def synonym_label(self, syn_id, short=False):
     if short:
         best_size = 0
         best_edge = None
         for atom in self.synonym_sets[syn_id]:
             edge = ed.str2edge(atom)
             if ed.size(edge) > best_size:
                 best_edge = edge
                 best_size = ed.size(edge)
         return edge2label(best_edge).replace('"', ' ')
     return '{%s}' % ', '.join([atom for atom in self.synonym_sets[syn_id]])
コード例 #10
0
ファイル: leveldb.py プロジェクト: mazieres/graphbrain
    def symbols_with_root(self, root):
        """Find all symbols with the given root."""
        start_str = '%s/' % root
        end_str = str_plus_1(start_str)
        start_key = (u'v%s' % start_str).encode('utf-8')
        end_key = (u'v%s' % end_str).encode('utf-8')

        symbs = set()
        for key, value in self.db.iterator(start=start_key, stop=end_key):
            symb = ed.str2edge(key.decode('utf-8')[1:])
            symbs.add(symb)
        return symbs
コード例 #11
0
ファイル: sql.py プロジェクト: graphbrain/graphbrain
def cur2edges(cur):
    """Transforms a cursor from a database query into a set of edges."""
    edges = []
    for row in cur:
        res = row[0]
        tokens = ed.split_edge_str(res)
        nper = int(tokens[-1])
        tokens = tokens[:-1]
        tokens = unpermutate(tokens, nper)
        edge = ed.str2edge(' '.join(tokens))
        edges.append(edge)
    return set(edges)
コード例 #12
0
ファイル: leveldb.py プロジェクト: mazieres/graphbrain
def perm2edge(perm_str):
    """Transforms a permutation string from a database query into an edge."""
    try:
        tokens = ed.split_edge_str(perm_str[1:])
        if tokens is None:
            return None
        nper = int(tokens[-1])
        tokens = tokens[:-1]
        tokens = unpermutate(tokens, nper)
        return ed.str2edge(' '.join(tokens))
    except ValueError as v:
        return None
コード例 #13
0
ファイル: leveldb.py プロジェクト: mazieres/graphbrain
    def all_attributes(self):
        """Returns a lazy sequence with a tuple for each vertex in the hypergraph.
           The first element of the tuple is the vertex itself,
           the second is a dictionary of attribute values (as strings)."""
        start_str = 'v'
        end_str = str_plus_1(start_str)
        start_key = (u'%s' % start_str).encode('utf-8')
        end_key = (u'%s' % end_str).encode('utf-8')

        for key, value in self.db.iterator(start=start_key, stop=end_key):
            vert = ed.str2edge(key.decode('utf-8')[1:])
            attributes = decode_attributes(value)
            yield (vert, attributes)
コード例 #14
0
ファイル: test_edge.py プロジェクト: graphbrain/graphbrain
 def test_str2edge(self):
     self.assertEqual(ed.str2edge('(is graphbrain/1 great/1)'), ('is', 'graphbrain/1', 'great/1'))
     self.assertEqual(ed.str2edge('(size graphbrain/1 7)'), ('size', 'graphbrain/1', 7))
     self.assertEqual(ed.str2edge('(size graphbrain/1 7.0)'), ('size', 'graphbrain/1', 7.))
     self.assertEqual(ed.str2edge('(size graphbrain/1 -7)'), ('size', 'graphbrain/1', -7))
     self.assertEqual(ed.str2edge('(size graphbrain/1 -7.0)'), ('size', 'graphbrain/1', -7.))
     self.assertEqual(ed.str2edge('(src graphbrain/1 (is graphbrain/1 great/1))'),
                      ('src', 'graphbrain/1', ('is', 'graphbrain/1', 'great/1')))
コード例 #15
0
ファイル: atom_groups.py プロジェクト: prabhjotSL/graphbrain
    def generate_atoms(self):
        # create atoms map -- edges with more than one occurrence
        self.atoms = {}
        for key in self.edge_counts:
            if self.edge_counts[key] > 1:
                self.atoms[key] = ed.depth(ed.str2edge(key))

        # build atom_set
        self.atom_set = set([atom for atom in self.atoms])

        # sorted by depth
        self.sorted_atoms = sorted(self.atoms.items(),
                                   key=operator.itemgetter(1),
                                   reverse=False)
コード例 #16
0
ファイル: vertex.py プロジェクト: prabhjotSL/graphbrain
def html(hg, eid):
    vertex = ed.str2edge(eid)
    if sym.sym_type(vertex) == sym.SymbolType.EDGE:
        title = edge_html(hg, vertex)
    else:
        title = '<h1>%s</h1>' % sym.symbol2str(eid)
    return """
<div class="container" role="main">
    <div class="page-header">
        %s
        <h4>%s</h4>
    </div>
    %s
</div>
    """ % (title, eid, edges_html(hg, vertex))
コード例 #17
0
 def test_str2edge(self):
     self.assertEqual(ed.str2edge('(is graphbrain/1 great/1)'),
                      ('is', 'graphbrain/1', 'great/1'))
     self.assertEqual(ed.str2edge('(size graphbrain/1 7)'),
                      ('size', 'graphbrain/1', 7))
     self.assertEqual(ed.str2edge('(size graphbrain/1 7.0)'),
                      ('size', 'graphbrain/1', 7.))
     self.assertEqual(ed.str2edge('(size graphbrain/1 -7)'),
                      ('size', 'graphbrain/1', -7))
     self.assertEqual(ed.str2edge('(size graphbrain/1 -7.0)'),
                      ('size', 'graphbrain/1', -7.))
     self.assertEqual(
         ed.str2edge('(src graphbrain/1 (is graphbrain/1 great/1))'),
         ('src', 'graphbrain/1', ('is', 'graphbrain/1', 'great/1')))
     self.assertEqual(ed.str2edge('((is my) graphbrain/1 (super great/1))'),
                      (('is', 'my'), 'graphbrain/1', ('super', 'great/1')))
     self.assertEqual(ed.str2edge('.'), '.')
コード例 #18
0
ファイル: atom_groups.py プロジェクト: prabhjotSL/graphbrain
 def print_atom_groups(self):
     n = 0
     for k in self.atom_groups:
         atom_group = self.atom_groups[k]
         size = len(atom_group['sentences'])
         if size > 3:
             n += 1
             print('ATOM_GROUP id: %s' % n)
             print('Base concepts: %s' % atom_group['label'])
             print('size: %s' % size)
             print('sentences:')
             for sentence in atom_group['sentences']:
                 print('* %s' % sentence)
             print('edges:')
             for edge in atom_group['edges']:
                 print(
                     '* %s' %
                     ed.edge2str(ed.without_namespaces(ed.str2edge(edge))))
             print()
コード例 #19
0
ファイル: graph.py プロジェクト: prabhjotSL/graphbrain
    return True


if __name__ == '__main__':
    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    # read data
    # edge_data = json_tools.read('edges_similar_concepts.json')
    edge_data = json_tools.read('all.json')

    # build full edges list
    full_edges = []
    for it in edge_data:
        full_edges.append(ed.without_namespaces(ed.str2edge(it['edge'])))

    # synonym_set
    synset1 = []
    synset2 = []
    synset3 = []

    synset1 = ['trump', 'donald', '(+ donald trump)']
    # synset2 = ['ryan', '(+ paul ryan)', 'paul']
    synset2 = ['vladimir', '(+ vladimir putin)', 'putin']
    concepts1 = [ed.str2edge(x) for x in synset1]
    concepts2 = [ed.str2edge(x) for x in synset2]
    concepts3 = [ed.str2edge(x) for x in synset3]

    concept_sets = [concepts1, concepts2, concepts3]
コード例 #20
0
                'sim': e[1][1],
                'matches': e[1][2],
                'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')
            }
            result.append(edge_data)
        return result

    def write_similar_edges(self, targ_edge, file_path):
        edge_data = self.similar_edges(targ_edge)
        write_edge_data(edge_data, file_path)

    def write_edges_with_similar_concepts(self, targ_edge, file_path):
        edge_data = self.edges_with_similar_concepts(targ_edge)
        write_edge_data(edge_data, file_path)


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({'backend': 'leveldb', 'hg': 'reddit-politics.hg'})

    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    te = '(clinches/nlp.clinch.verb clinton/nlp.clinton.noun ' \
         '(+/gb democratic/nlp.democratic.adj nomination/nlp.nomination.noun))'

    s = SimilarityFilter(hgr, par)
    s.write_edges_with_similar_concepts(ed.str2edge(te),
                                        'edges_similar_concepts.json')
    # s.write_similar_edges(ed.str2edge(te), 'similar_edges.json')
コード例 #21
0
from sklearn.cluster import DBSCAN
import gb.tools.json as json_tools
import gb.hypergraph.edge as ed
import gb.nlp.parser as par
from gb.explore.similarity import edge_similarity

if __name__ == '__main__':
    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    edge_data = json_tools.read('edges_similar_concepts.json')

    extra_edges = {}
    for item in edge_data:
        edge = ed.str2edge(item['edge'])
        matched = [ed.str2edge(match[1]) for match in item['matches']]
        for part in edge[1:]:
            if part not in matched:
                key = ed.edge2str(part)
                if key in extra_edges:
                    extra_edges[key] += 1
                else:
                    extra_edges[key] = 1

    sorted_edges = sorted(extra_edges.items(),
                          key=operator.itemgetter(1),
                          reverse=False)
    print(sorted_edges)
    print(len(sorted_edges))
コード例 #22
0
ファイル: similarity.py プロジェクト: FlyDogFan/graphbrain
                'worst_sim': e[1][0],
                'sim': e[1][1],
                'matches': e[1][2],
                'text': self.hg.get_str_attribute(ed.str2edge(e[0]), 'text')
            }
            result.append(edge_data)
        return result

    def write_similar_edges(self, targ_edge, file_path):
        edge_data = self.similar_edges(targ_edge)
        write_edge_data(edge_data, file_path)

    def write_edges_with_similar_concepts(self, targ_edge, file_path):
        edge_data = self.edges_with_similar_concepts(targ_edge)
        write_edge_data(edge_data, file_path)


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({'backend': 'leveldb', 'hg': 'reddit-politics.hg'})

    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    te = '(clinches/nlp.clinch.verb clinton/nlp.clinton.noun ' \
         '(+/gb democratic/nlp.democratic.adj nomination/nlp.nomination.noun))'

    s = Similarity(hgr, par)
    # s.write_edges_with_similar_concepts(ed.str2edge(te), 'edges_similar_concepts.json')
    s.write_similar_edges(ed.str2edge(te), 'similar_edges.json')
コード例 #23
0
    def generate_synonyms(self):
        # init synonym data
        self.syn_ids = {}
        self.synonym_sets = {}
        self.cur_syn_id = 0

        total_atoms = len(self.atoms)

        # generate synonyms
        print('generating synonyms')
        i = 0
        with progressbar.ProgressBar(max_value=total_atoms) as bar:
            sorted_atoms = sorted(self.atoms.items(),
                                  key=operator.itemgetter(1),
                                  reverse=False)
            for atom_pair in sorted_atoms:
                orig = self.graph.vs.find(atom_pair[0])
                edges = self.graph.incident(orig.index, mode='in')
                edges = [self.graph.es[edge] for edge in edges]
                edges = [(self.graph.vs[edge.source]['name'],
                          self.graph.vs[edge.target]['name'], edge['weight'],
                          edge['norm_weight']) for edge in edges]
                edges = sorted(edges, key=operator.itemgetter(3), reverse=True)

                ambiguous = False

                for pos in range(len(edges)):
                    is_synonym = False

                    edge = edges[pos]
                    source = edge[0]
                    target = edge[1]
                    weight = edge[2]
                    norm_weight = edge[3]

                    source_edge = ed.str2edge(source)
                    if weight > WEIGHT_THRESHOLD:
                        if semantic_synonyms(source, target):
                            is_synonym = True
                        elif not ambiguous and norm_weight >= NORM_WEIGHT_THRESHOLD and is_candidate(
                                source_edge):
                            pos_next = next_candidate_pos(edges, pos)
                            if pos_next < 0:
                                is_synonym = True
                            else:
                                next_weight = edges[pos_next][3]
                                if next_weight < NORM_WEIGHT_THRESHOLD:
                                    is_synonym = True
                                else:
                                    ambiguous = True

                    if is_synonym:
                        source_syn_id = self.syn_id(source)
                        target_syn_id = self.syn_id(target)

                        if target_syn_id:
                            self.syn_ids[source] = target_syn_id
                        elif source_syn_id:
                            self.syn_ids[target] = source_syn_id
                        else:
                            syn_id = self.new_syn_id()
                            self.syn_ids[source] = syn_id
                            self.syn_ids[target] = syn_id

                i += 1
                if (i % 1000) == 0:
                    bar.update(i)
            bar.update(i)

        # generate synonym sets
        print('generating synonym sets')
        i = 0
        with progressbar.ProgressBar(max_value=total_atoms) as bar:
            for atom in self.atoms:
                syn_id = self.syn_id(atom)
                if syn_id:
                    if syn_id not in self.synonym_sets:
                        self.synonym_sets[syn_id] = set()
                    self.synonym_sets[syn_id].add(atom)
                else:
                    new_id = self.new_syn_id()
                    self.syn_ids[atom] = new_id
                    self.synonym_sets[new_id] = {atom}
                i += 1
                if (i % 1000) == 0:
                    bar.update(i)
            bar.update(i)
コード例 #24
0
def next_candidate_pos(edges, pos):
    for i in range(pos + 1, len(edges)):
        if is_candidate(ed.str2edge(edges[i][0])):
            return i
    return -1
コード例 #25
0
ファイル: graph.py プロジェクト: prabhjotSL/graphbrain
 def contains_synonym(self, full_edge, syn_id):
     for atom in self.meronomy.synonym_sets[syn_id]:
         edge = ed.str2edge(atom)
         if ed.contains(full_edge, edge, deep=True):
             return True
     return False
コード例 #26
0
ファイル: atom_groups.py プロジェクト: prabhjotSL/graphbrain
    # build extra edges list
    # extra_edges = []
    # full_edges = []
    # for it in edge_data:
    #     e = ed.str2edge(it['edge'])
    #     full_edges.append(e)
    #     matched = [ed.str2edge(match[1]) for match in it['matches']]
    #     for part in e[1:]:
    #         if part not in matched:
    #             extra_edges.append(part)

    edge_data = json_tools.read('all.json')
    # build full edges list
    extra_edges = []
    for it in edge_data:
        extra_edges.append(ed.without_namespaces(ed.str2edge(it['edge'])))
    full_edges = extra_edges

    ag = AtomGroups(par)
    print('set edges')
    ag.set_edges(extra_edges)
    print('generate_atoms')
    ag.generate_atoms()
    print('generate synonyms')
    ag.generate_synonyms()
    print('generate atom groups')
    ag.generate_atom_groups()
    ag.print_atom_groups()
    print('generate atom group clusters')
    ag.generate_atom_group_clusters(full_edges)
    ag.print_atom_group_clusters()
コード例 #27
0
ファイル: atom_groups.py プロジェクト: prabhjotSL/graphbrain
    def generate_atom_groups(self):
        nsyns = len(self.synonym_sets)

        # build coocurrence sparse matrix
        synonym_cooc = sps.lil_matrix((nsyns, nsyns))
        for edge in extra_edges:
            co_synonyms = self.find_co_synonyms(edge)
            if len(co_synonyms) > 1:
                for pair in itertools.combinations(co_synonyms, 2):
                    synonym_cooc[pair[0], pair[1]] += 1
                    synonym_cooc[pair[1], pair[0]] += 1

        # normalize matrix
        synonym_cooc = normalize(synonym_cooc, norm='l1', axis=1, copy=False)

        # iterate matrix, build graph
        gedges = []
        weights = []
        cx = synonym_cooc.tocoo()
        for i, j, v in zip(cx.row, cx.col, cx.data):
            gedges.append((i, j))
            weights.append(v)
        g = igraph.Graph()
        g.add_vertices(nsyns)
        g.add_edges(gedges)
        g.es['weight'] = weights

        # community detection
        comms = igraph.Graph.community_multilevel(g,
                                                  weights='weight',
                                                  return_levels=False)

        # build atom_groups
        self.atom_groups = {}
        for i in range(len(comms)):
            comm = comms[i]
            count = 0
            syns = []
            sentences = set()
            edges = []
            for item in comm:
                edges += self.synonym_map[item]['edges']

                for atom in self.synonym_map[item]['edges']:
                    for edat in edge_data:
                        if ed.contains(ed.str2edge(
                                ed.edge2str(ed.str2edge(edat['edge']),
                                            namespaces=False)),
                                       ed.str2edge(atom),
                                       deep=True):
                            if edat['text']:
                                sentences.add(edat['text'])
                syns.append(self.synonym_map[item])
                count += self.synonym_map[item]['count']
            label = ', '.join(edges)
            atom_group = {
                'label': label,
                'syns': syns,
                'count': count,
                'sentences': sentences,
                'edges': edges
            }
            self.atom_groups[i] = atom_group