def _compose(self, ga, gb): feature_size, bitmask = set_feature_size(nbits=14) df = decompose_break(min_size=self.min_size, max_size=self.max_size, n_edges=self.n_edges) encoding_func = make_encoder(df, preprocessors=None, bitmask=bitmask, seed=1) codes_a, fragments_a = encoding_func(ga) codes_b, fragments_b = encoding_func(gb) gs = [] for fragment_a in fragments_a: fragment_a = nx.convert_node_labels_to_integers(fragment_a) for fragment_b in fragments_b: #choose a node in a and one in b and join them with an edge fragment_b = nx.convert_node_labels_to_integers(fragment_b) ia = random.choice(range(len(fragment_a))) jb = random.choice(range(len(fragment_b))) + len(fragment_a) g0 = nx.disjoint_union(fragment_a, fragment_b) assert (jb in list(g0.nodes())) g0.add_edge(ia, jb, label=random.choice(self.edge_labels)) gs.append(g0.copy()) return gs
def build_node_proximity_data_matrix(nodes_mtx, data_matrix, nbits, max_num_node_features=1): feature_size, bitmask = set_feature_size(nbits=nbits) data, row, col = [], [], [] for node_id, (node_vec, row_vec) in enumerate(zip(nodes_mtx, data_matrix)): all_combinations = [ sorted(comb) for combinations_order in range(1, max_num_node_features + 1) for comb in combinations(node_vec.indices, combinations_order) ] for node_feature_id_combinations in all_combinations: node_feature_id_combinations = list(node_feature_id_combinations) for remote_feature_id, remote_feature_val in zip( row_vec.indices, row_vec.data): new_feature_id = fast_hash(node_feature_id_combinations + [remote_feature_id], bitmask=bitmask) row.append(node_id) col.append(new_feature_id) data.append(remote_feature_val) shape = (max(row) + 1, feature_size) new_data_matrix = csr_matrix((data, (row, col)), shape=shape, dtype=np.float64) return new_data_matrix
def __init__(self, decompose_func=None, preprocessor=None, nbits=14): feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=1) self.encoding_func = encoding_func self.estimator = None
def real_vectorize(graphs, decomposition_funcs, preprocessors=None, nbits=14, seed=1): """real_vectorize.""" feature_size, bitmask = set_feature_size(nbits=nbits) attributed_vecs_list = [ _real_vectorize_single(graph, decomposition_funcs, bitmask, feature_size) for graph in graphs ] attributed_vecs_mtx = sp.sparse.vstack(attributed_vecs_list) return attributed_vecs_mtx
def draw_decomposition_graphs(graphs, decompose_funcs, preprocessors=None, draw_graphs=None): feature_size, bitmask = set_feature_size(nbits=14) encoding_func = make_encoder(decompose_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=1) for g in graphs: print('_'*80) draw_graphs([g],['']) codes, fragments = encoding_func(g) unique_codes, unique_fragments, code_counts = select_unique(codes, fragments) titles = ['%d #%d'%(id,code_counts[id]) for id in unique_codes] print('%d unique components in %d fragments'%(len(unique_codes),len(codes))) if unique_fragments: draw_graphs(unique_fragments, titles, n_graphs_per_line=6) else: print('No fragments')
def __init__(self, decompose_func=None, preprocessor=None, nbits=14, n_estimators=10000, seed=1): feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.estimator = ExtraTreesRegressor(n_estimators=n_estimators, random_state=seed)
def __init__(self, decompose_func=None, preprocessor=None, nbits=15, seed=1): self.decompose_func = decompose_func self.nbits = nbits feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.classifier = SGDRegressor(penalty='elasticnet')
def real_node_vectorize(graphs, decomposition_funcs, preprocessors=None, nbits=14, seed=1): """real_node_vectorize.""" feature_size, bitmask = set_feature_size(nbits=nbits) attributed_vecs_mtx_list = [] for graph in graphs: attributed_vecs_list = [ _real_node_vectorize_single(graph, node_id, decomposition_funcs, preprocessors, bitmask, feature_size) for node_id in graph.nodes() ] attributed_vecs_mtx = sp.sparse.vstack(attributed_vecs_list) attributed_vecs_mtx_list.append(attributed_vecs_mtx) return attributed_vecs_mtx_list
def __init__(self, decompose_func=None, preprocessor=None, nbits=15, seed=1): self.decompose_func = decompose_func self.nbits = nbits feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.classifier = BernoulliNB(alpha=0.1, binarize=None, fit_prior=True, class_prior=None)
def make_abstract_graph(graphs, decomposition=None, preprocessors=None): df = do_decompose(decomposition, compose_function=decompose_abstract_and_non_abstract) #df = do_decompose(decomposition, compose_function=decompose_abstract) feature_size, bitmask = set_feature_size(nbits=14) encoding_func = make_encoder(df, preprocessors=preprocessors, bitmask=bitmask, seed=1) abstract_graphs = [] for g in graphs: codes, fragments = encoding_func(g) assert (len(fragments) == 1 ), "expecting 1 fragment but got:%d" % len(fragments) abstract_graph = fragments[0] abstract_graphs.append(abstract_graph) return abstract_graphs
def node_vectorize(graph, decomposition_funcs, preprocessors=None, nbits=16): feature_size, bitmask = set_feature_size(nbits=nbits) return _node_vectorize(graph, decomposition_funcs, preprocessors, bitmask, feature_size)