def _compose(self, ga, gb):
        feature_size, bitmask = set_feature_size(nbits=14)
        df = decompose_break(min_size=self.min_size,
                             max_size=self.max_size,
                             n_edges=self.n_edges)
        encoding_func = make_encoder(df,
                                     preprocessors=None,
                                     bitmask=bitmask,
                                     seed=1)
        codes_a, fragments_a = encoding_func(ga)
        codes_b, fragments_b = encoding_func(gb)
        gs = []
        for fragment_a in fragments_a:
            fragment_a = nx.convert_node_labels_to_integers(fragment_a)
            for fragment_b in fragments_b:
                #choose a node in a and one in b and join them with an edge
                fragment_b = nx.convert_node_labels_to_integers(fragment_b)
                ia = random.choice(range(len(fragment_a)))
                jb = random.choice(range(len(fragment_b))) + len(fragment_a)
                g0 = nx.disjoint_union(fragment_a, fragment_b)
                assert (jb in list(g0.nodes()))
                g0.add_edge(ia, jb, label=random.choice(self.edge_labels))

                gs.append(g0.copy())
        return gs
Exemple #2
0
def build_node_proximity_data_matrix(nodes_mtx,
                                     data_matrix,
                                     nbits,
                                     max_num_node_features=1):
    feature_size, bitmask = set_feature_size(nbits=nbits)
    data, row, col = [], [], []
    for node_id, (node_vec, row_vec) in enumerate(zip(nodes_mtx, data_matrix)):
        all_combinations = [
            sorted(comb)
            for combinations_order in range(1, max_num_node_features + 1)
            for comb in combinations(node_vec.indices, combinations_order)
        ]
        for node_feature_id_combinations in all_combinations:
            node_feature_id_combinations = list(node_feature_id_combinations)
            for remote_feature_id, remote_feature_val in zip(
                    row_vec.indices, row_vec.data):
                new_feature_id = fast_hash(node_feature_id_combinations +
                                           [remote_feature_id],
                                           bitmask=bitmask)
                row.append(node_id)
                col.append(new_feature_id)
                data.append(remote_feature_val)
    shape = (max(row) + 1, feature_size)
    new_data_matrix = csr_matrix((data, (row, col)),
                                 shape=shape,
                                 dtype=np.float64)
    return new_data_matrix
Exemple #3
0
 def __init__(self, decompose_func=None, preprocessor=None, nbits=14):
     feature_size, bitmask = set_feature_size(nbits=nbits)
     self.feature_size = feature_size
     self.bitmask = bitmask
     encoding_func = make_encoder(decompose_func,
                                  preprocessors=preprocessor,
                                  bitmask=self.bitmask,
                                  seed=1)
     self.encoding_func = encoding_func
     self.estimator = None
Exemple #4
0
def real_vectorize(graphs,
                   decomposition_funcs,
                   preprocessors=None,
                   nbits=14,
                   seed=1):
    """real_vectorize."""
    feature_size, bitmask = set_feature_size(nbits=nbits)
    attributed_vecs_list = [
        _real_vectorize_single(graph, decomposition_funcs, bitmask,
                               feature_size) for graph in graphs
    ]
    attributed_vecs_mtx = sp.sparse.vstack(attributed_vecs_list)
    return attributed_vecs_mtx
def draw_decomposition_graphs(graphs, decompose_funcs, preprocessors=None, draw_graphs=None):
    feature_size, bitmask = set_feature_size(nbits=14)
    encoding_func = make_encoder(decompose_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=1)
    for g in graphs:
        print('_'*80)
        draw_graphs([g],[''])
        codes, fragments = encoding_func(g)
        unique_codes, unique_fragments, code_counts = select_unique(codes, fragments)
        titles = ['%d   #%d'%(id,code_counts[id]) for id in unique_codes]
        print('%d unique components in %d fragments'%(len(unique_codes),len(codes)))
        if unique_fragments:
            draw_graphs(unique_fragments, titles, n_graphs_per_line=6)
        else:
            print('No fragments')
Exemple #6
0
 def __init__(self,
              decompose_func=None,
              preprocessor=None,
              nbits=14,
              n_estimators=10000,
              seed=1):
     feature_size, bitmask = set_feature_size(nbits=nbits)
     self.feature_size = feature_size
     self.bitmask = bitmask
     self.encoding_func = make_encoder(decompose_func,
                                       preprocessors=preprocessor,
                                       bitmask=self.bitmask,
                                       seed=seed)
     self.estimator = ExtraTreesRegressor(n_estimators=n_estimators,
                                          random_state=seed)
Exemple #7
0
 def __init__(self,
              decompose_func=None,
              preprocessor=None,
              nbits=15,
              seed=1):
     self.decompose_func = decompose_func
     self.nbits = nbits
     feature_size, bitmask = set_feature_size(nbits=nbits)
     self.feature_size = feature_size
     self.bitmask = bitmask
     self.encoding_func = make_encoder(decompose_func,
                                       preprocessors=preprocessor,
                                       bitmask=self.bitmask,
                                       seed=seed)
     self.classifier = SGDRegressor(penalty='elasticnet')
Exemple #8
0
def real_node_vectorize(graphs,
                        decomposition_funcs,
                        preprocessors=None,
                        nbits=14,
                        seed=1):
    """real_node_vectorize."""
    feature_size, bitmask = set_feature_size(nbits=nbits)
    attributed_vecs_mtx_list = []
    for graph in graphs:
        attributed_vecs_list = [
            _real_node_vectorize_single(graph, node_id, decomposition_funcs,
                                        preprocessors, bitmask, feature_size)
            for node_id in graph.nodes()
        ]
        attributed_vecs_mtx = sp.sparse.vstack(attributed_vecs_list)
        attributed_vecs_mtx_list.append(attributed_vecs_mtx)
    return attributed_vecs_mtx_list
Exemple #9
0
 def __init__(self,
              decompose_func=None,
              preprocessor=None,
              nbits=15,
              seed=1):
     self.decompose_func = decompose_func
     self.nbits = nbits
     feature_size, bitmask = set_feature_size(nbits=nbits)
     self.feature_size = feature_size
     self.bitmask = bitmask
     self.encoding_func = make_encoder(decompose_func,
                                       preprocessors=preprocessor,
                                       bitmask=self.bitmask,
                                       seed=seed)
     self.classifier = BernoulliNB(alpha=0.1,
                                   binarize=None,
                                   fit_prior=True,
                                   class_prior=None)
Exemple #10
0
def make_abstract_graph(graphs, decomposition=None, preprocessors=None):
    df = do_decompose(decomposition,
                      compose_function=decompose_abstract_and_non_abstract)
    #df = do_decompose(decomposition, compose_function=decompose_abstract)

    feature_size, bitmask = set_feature_size(nbits=14)
    encoding_func = make_encoder(df,
                                 preprocessors=preprocessors,
                                 bitmask=bitmask,
                                 seed=1)

    abstract_graphs = []
    for g in graphs:
        codes, fragments = encoding_func(g)
        assert (len(fragments) == 1
                ), "expecting 1 fragment but got:%d" % len(fragments)
        abstract_graph = fragments[0]
        abstract_graphs.append(abstract_graph)
    return abstract_graphs
Exemple #11
0
def node_vectorize(graph, decomposition_funcs, preprocessors=None, nbits=16):
    feature_size, bitmask = set_feature_size(nbits=nbits)
    return _node_vectorize(graph, decomposition_funcs, preprocessors, bitmask,
                           feature_size)