def _compose(self, ga, gb): feature_size, bitmask = set_feature_size(nbits=14) df = decompose_break(min_size=self.min_size, max_size=self.max_size, n_edges=self.n_edges) encoding_func = make_encoder(df, preprocessors=None, bitmask=bitmask, seed=1) codes_a, fragments_a = encoding_func(ga) codes_b, fragments_b = encoding_func(gb) gs = [] for fragment_a in fragments_a: fragment_a = nx.convert_node_labels_to_integers(fragment_a) for fragment_b in fragments_b: #choose a node in a and one in b and join them with an edge fragment_b = nx.convert_node_labels_to_integers(fragment_b) ia = random.choice(range(len(fragment_a))) jb = random.choice(range(len(fragment_b))) + len(fragment_a) g0 = nx.disjoint_union(fragment_a, fragment_b) assert (jb in list(g0.nodes())) g0.add_edge(ia, jb, label=random.choice(self.edge_labels)) gs.append(g0.copy()) return gs
def __init__(self, decompose_func=None, preprocessor=None, nbits=14): feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=1) self.encoding_func = encoding_func self.estimator = None
def hash_graph(graph, decomposition_funcs=None, preprocessors=None, nbits=14, seed=1): feature_size, bitmask = set_feature_size(nbits=nbits) encoding_func = make_encoder(decomposition_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=seed) codes, fragments = encoding_func(graph) dat = tuple(sorted(codes)) return int(hash(dat) & bitmask) + 1
def vectorize(graphs, decomposition_funcs=None, preprocessors=None, nbits=14, seed=1): feature_size, bitmask = set_feature_size(nbits=nbits) encoding_func = make_encoder(decomposition_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=seed) mtx = vectorize_graphs(graphs, encoding_func=encoding_func, feature_size=feature_size) return mtx
def draw_decomposition_graphs(graphs, decompose_funcs, preprocessors=None, draw_graphs=None): feature_size, bitmask = set_feature_size(nbits=14) encoding_func = make_encoder(decompose_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=1) for g in graphs: print('_'*80) draw_graphs([g],['']) codes, fragments = encoding_func(g) unique_codes, unique_fragments, code_counts = select_unique(codes, fragments) titles = ['%d #%d'%(id,code_counts[id]) for id in unique_codes] print('%d unique components in %d fragments'%(len(unique_codes),len(codes))) if unique_fragments: draw_graphs(unique_fragments, titles, n_graphs_per_line=6) else: print('No fragments')
def get_feature_set(graphs, decomposition_funcs=None, preprocessors=None, nbits=14, seed=1): feature_size, bitmask = set_feature_size(nbits=nbits) encoding_func = make_encoder(decomposition_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=seed) feature_set = set() for graph in graphs: codes, fragments = encoding_func(graph) feature_set.add(codes) return feature_set
def __init__(self, decompose_func=None, preprocessor=None, nbits=14, n_estimators=10000, seed=1): feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.estimator = ExtraTreesRegressor(n_estimators=n_estimators, random_state=seed)
def __init__(self, decompose_func=None, preprocessor=None, nbits=15, seed=1): self.decompose_func = decompose_func self.nbits = nbits feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.classifier = SGDRegressor(penalty='elasticnet')
def __init__(self, decompose_func=None, preprocessor=None, nbits=15, seed=1): self.decompose_func = decompose_func self.nbits = nbits feature_size, bitmask = set_feature_size(nbits=nbits) self.feature_size = feature_size self.bitmask = bitmask self.encoding_func = make_encoder(decompose_func, preprocessors=preprocessor, bitmask=self.bitmask, seed=seed) self.classifier = BernoulliNB(alpha=0.1, binarize=None, fit_prior=True, class_prior=None)
def make_abstract_graph(graphs, decomposition=None, preprocessors=None): df = do_decompose(decomposition, compose_function=decompose_abstract_and_non_abstract) #df = do_decompose(decomposition, compose_function=decompose_abstract) feature_size, bitmask = set_feature_size(nbits=14) encoding_func = make_encoder(df, preprocessors=preprocessors, bitmask=bitmask, seed=1) abstract_graphs = [] for g in graphs: codes, fragments = encoding_func(g) assert (len(fragments) == 1 ), "expecting 1 fragment but got:%d" % len(fragments) abstract_graph = fragments[0] abstract_graphs.append(abstract_graph) return abstract_graphs
def get_feature_dict(graphs, decomposition_funcs=None, preprocessors=None, nbits=14, seed=1, return_counts=False): feature_size, bitmask = set_feature_size(nbits=nbits) encoding_func = make_encoder(decomposition_funcs, preprocessors=preprocessors, bitmask=bitmask, seed=seed) feature_dict = dict() feature_counts_dict = defaultdict(int) for graph in graphs: codes, fragments = encoding_func(graph) feature_dict.update(zip(codes, fragments)) for code in codes: feature_counts_dict[code] += 1 if return_counts: return feature_dict, feature_counts_dict else: return feature_dict
def set_decomposition(self, decomposition): self.encoder = make_encoder(decomposition, bitmask=2**20 - 1)