コード例 #1
0
    def transform2(self, X):

        self.g = helperigraph.load_matrix(X)
        self.g['level'] = 0

        while not self.g['level'] == self.max_levels:

            matching = range(self.g.vcount())
            levels = self.g['level']

            levels += 1
            self.g['similarity'] = getattr(
                Similarity(self.g, self.g['adjlist']), self.similarity)
            start = sum(self.g['vertices'][0:1])
            end = sum(self.g['vertices'][0:1 + 1])
            vertices = range(start, end)
            param = dict(reduction_factor=self.reduction_factor)
            if self.matching in ['gmb', 'rgmb']:
                param['vertices'] = vertices

            if self.matching in ['hem', 'lem', 'rm']:
                one_mode_graph = self.g.weighted_one_mode_projection(vertices)
                matching_method = getattr(one_mode_graph, self.matching)
            else:
                matching_method = getattr(self.g, self.matching)

            matching_method(matching, **param)

            coarse = self.g.contract(matching)
            coarse['level'] = levels
            self.g = coarse

        return helperigraph.biajcent_matrix(self.g)
コード例 #2
0
def find_similarities(text_1, text_2):
    model = SentenceTransformer("distilbert-base-nli-stsb-mean-tokens")
    sequences, distances = get_distances(model, window_tokenizer, text_1, text_2)
    similarities = []
    score = []    
    for i, qi in enumerate(sequences[0]):        
        for j, qj in enumerate(sequences[1]):
            distanceNormalized = 1 - distances[i][j]            
            if(distanceNormalized > BIAS):                    
                    similarities.append(Similarity(distanceNormalized, qi, qj))    
            score.append(distanceNormalized)
    return similarities,score
コード例 #3
0
    def weighted_one_mode_projection(self,
                                     vertices,
                                     similarity='common_neighbors'):
        """
        Application of a one-mode projection to a bipartite network generates
        two unipartite networks, one for each layer, so that vertices with
        common neighbors are connected by edges in their respective projection.
        """

        graph = MGraph()
        graph.add_vertices(vertices)
        graph['source_vertices'] = self.vcount()
        graph['source_edges'] = self.ecount()
        graph.vs['name'] = self.vs[vertices]['name']
        name_to_id = dict(zip(vertices, range(graph.vcount())))

        dict_edges = dict()
        visited = [0] * self.vcount()
        for vertex in vertices:
            neighborhood = self.neighborhood(vertices=vertex, order=2)
            twohops = neighborhood[(len(self['adjlist'][vertex]) + 1):]
            for twohop in twohops:
                if visited[twohop] == 1:
                    continue
                dict_edges[(name_to_id[vertex],
                            name_to_id[twohop])] = self['projection'](vertex,
                                                                      twohop)
            visited[vertex] = 1

        if len(dict_edges) > 0:
            edges, weights = list(zip(*dict_edges.items()))
            graph.add_edges(edges)
            graph.es['weight'] = weights

        graph['adjlist'] = list(map(set, graph.get_adjlist()))
        graph['similarity'] = getattr(Similarity(graph, graph['adjlist']),
                                      similarity)

        return graph
コード例 #4
0
    def run(self):

        graph = self.source_graph.copy()
        while True:

            level = graph['level']
            contract = False

            args = []
            for layer in range(graph['layers']):
                do_matching = True
                if self.global_min_vertices[layer] is None and level[
                        layer] >= self.max_levels[layer]:
                    do_matching = False
                elif self.global_min_vertices[layer] and graph['vertices'][
                        layer] <= self.global_min_vertices[layer]:
                    do_matching = False

                if do_matching:
                    contract = True
                    level[layer] += 1

                    graph['similarity'] = getattr(
                        Similarity(graph, graph['adjlist']),
                        self.similarity[layer])

                    kwargs = dict(
                        reduction_factor=self.reduction_factor[layer])

                    if self.matching[layer] in ['mlpb', 'gmb', 'rgmb']:
                        kwargs['vertices'] = graph['vertices_by_type'][layer]
                        kwargs['reverse'] = self.reverse[layer]
                    if self.matching[layer] in ['mlpb', 'rgmb']:
                        kwargs['seed_priority'] = self.seed_priority[layer]
                    if self.matching[layer] in ['mlpb']:
                        kwargs['upper_bound'] = self.upper_bound[layer]
                        kwargs['n'] = self.source_graph['vertices'][layer]
                        kwargs[
                            'global_min_vertices'] = self.global_min_vertices[
                                layer]
                        kwargs['tolerance'] = self.tolerance[layer]
                        kwargs['itr'] = self.itr[layer]

                    if self.matching[layer] in ['hem', 'lem', 'rm']:
                        one_mode_graph = graph.weighted_one_mode_projection(
                            graph['vertices_by_type'][layer])
                        matching_function = getattr(one_mode_graph,
                                                    self.matching[layer])
                    else:
                        matching_function = getattr(graph,
                                                    self.matching[layer])

                    # Create a args for the engine multiprocessing.pool
                    args.append([(matching_function, kwargs)])

            if contract:
                # Create pools
                pool = mp.Pool(processes=self.threads)
                processes = []
                for arg in args:
                    processes.append(
                        pool.starmap_async(modified_starmap_async, arg))

                # Merge solutions
                import sys
                numpy.set_printoptions(threshold=sys.maxsize)
                matching = numpy.arange(graph.vcount())
                for process in processes:
                    result = process.get()[0]
                    vertices = numpy.where(result > -1)[0]
                    matching[vertices] = result[vertices]

                # Close processes
                pool.close()
                pool.join()

                coarsened_graph = graph.contract(matching)
                coarsened_graph['level'] = level

                if coarsened_graph.vcount() == graph.vcount():
                    break

                self.hierarchy_graphs.append(coarsened_graph)
                self.hierarchy_levels.append(level[:])
                graph = coarsened_graph
            else:
                break
コード例 #5
0
 def __init__(self, cfg):
     super(EventBert, self).__init__()
     self.similarity = Similarity(cfg)
     self.bert = BERTSearch(cfg)
     self.fnn = nn.Linear(2, 1)
コード例 #6
0
def main():
	"""
	Main entry point for the application when run from the command line.
	"""

	# Timing instanciation
	timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])

	with timing.timeit_context_add('Pre-processing'):

		# Setup parse options command line
		current_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
		parser = args.setup_parser(current_path + '/args/mdr.json')
		options = parser.parse_args()
		args.update_json(options)
		args.check_output(options)

		# Log instanciation
		log = helper.initialize_logger(dir='log', output='log')

		if options.input and options.vertices is None:
			log.warning('Vertices are required when input is given.')
			sys.exit(1)

		# Create default values for optional parameters
		if options.reduction_factor is None:
			options.reduction_factor = 0.5
		if options.max_levels is None:
			options.max_levels = 3
		if options.matching is None:
			options.matching = 'greedy_seed_twohops'
		if options.similarity is None:
			options.similarity = 'weighted_common_neighbors'

		# Validation of matching method
		valid_matching = ['gmb', 'rgmb', 'hem', 'lem', 'rm']
		if options.matching.lower() not in valid_matching:
			log.warning('Matching method is unvalid.')
			sys.exit(1)

		# Validation of input extension
		valid_input = ['.arff', '.dat']
		if options.extension not in valid_input:
			log.warning('Input is unvalid.')
			sys.exit(1)

		# Validation of similarity measure
		valid_similarity = ['common_neighbors', 'weighted_common_neighbors',
		'salton', 'preferential_attachment', 'jaccard', 'adamic_adar',
		'resource_allocation', 'sorensen', 'hub_promoted', 'hub_depressed',
		'leicht_holme_newman', 'weighted_jaccard']
		if options.similarity.lower() not in valid_similarity:
			log.warning('Similarity misure is unvalid.')
			sys.exit(1)

		options.vertices = map(int, options.vertices)
		options.max_levels = int(options.max_levels)
		options.reduction_factor = float(options.reduction_factor)

	# Load bipartite graph
	with timing.timeit_context_add('Load'):
		if options.extension == '.arff':
			graph = helperigraph.load_csr(options.input)
		elif options.extension == '.dat':
			graph = helperigraph.load_dat(options.input, skip_last_column=options.skip_last_column, skip_rows=options.skip_rows)
		graph['level'] = 0

	# Coarsening
	with timing.timeit_context_add('Coarsening'):
		hierarchy_graphs = []
		hierarchy_levels = []
		while not graph['level'] == options.max_levels:

			matching = range(graph.vcount())
			levels = graph['level']

			levels += 1
			graph['similarity'] = getattr(Similarity(graph, graph['adjlist']), options.similarity)
			start = sum(graph['vertices'][0:1])
			end = sum(graph['vertices'][0:1 + 1])
			if options.matching in ['hem', 'lem', 'rm']:
				one_mode_graph = graph.weighted_one_mode_projection(vertices)
				matching_method = getattr(one_mode_graph, options.matching)
				matching_method(matching, reduction_factor=options.reduction_factor)
			else:
				matching_method = getattr(graph, options.matching)
				matching_method(range(start, end), matching, reduction_factor=options.reduction_factor)

			coarse = graph.contract(matching)
			coarse['level'] = levels
			graph = coarse
			if options.save_hierarchy or (graph['level'] == options.max_levels):
				hierarchy_graphs.append(graph)
				hierarchy_levels.append(levels)

	# Save
	with timing.timeit_context_add('Save'):

		output = options.output
		for index, obj in enumerate(reversed(zip(hierarchy_levels, hierarchy_graphs))):
			levels, graph = obj

			if options.save_conf:
				with open(output + '-' + str(index) + '.conf', 'w+') as f:
					d = {}
					d['source_filename'] = options.input
					d['source_v0'] = options.vertices[0]
					d['source_v1'] = options.vertices[1]
					d['source_vertices'] = options.vertices[0] + options.vertices[1]
					d['edges'] = graph.ecount()
					d['vertices'] = graph.vcount()
					d['reduction_factor'] = options.reduction_factor
					d['max_levels'] = options.max_levels
					d['similarity'] = options.similarity
					d['matching'] = options.matching
					d['levels'] = levels
					for layer in range(graph['layers']):
						vcount = str(len(graph.vs.select(type=layer)))
						attr = 'v' + str(layer)
						d[attr] = vcount
					json.dump(d, f, indent=4)

			if options.save_ncol:
				graph.write(output + '-' + str(index) + '.ncol', format='ncol')

			if options.save_source:
				with open(output + '-' + str(index) + '.source', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['source'])) + '\n')

			if options.save_predecessor:
				with open(output + '-' + str(index) + '.predecessor', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['predecessor'])) + '\n')

			if options.save_successor:
				numpy.savetxt(output + '-' + str(index) + '.successor', graph.vs['successor'], fmt='%d')

			if options.save_weight:
				numpy.savetxt(output + '-' + str(index) + '.weight', graph.vs['weight'], fmt='%d')

			if options.save_adjacency:
				numpy.savetxt(output + '-' + str(index) + '.dat', helperigraph.biajcent_matrix(graph), fmt='%.2f')

			if options.save_gml:
				del graph['adjlist']
				del graph['similarity']
				graph['layers'] = str(graph['layers'])
				graph['vertices'] = ','.join(map(str, graph['vertices']))
				graph['level'] = str(graph['level'])
				graph.vs['name'] = map(str, range(0, graph.vcount()))
				graph.vs['type'] = map(str, graph.vs['type'])
				graph.vs['weight'] = map(str, graph.vs['weight'])
				graph.vs['successor'] = map(str, graph.vs['successor'])
				for v in graph.vs():
					v['source'] = ','.join(map(str, v['source']))
					v['predecessor'] = ','.join(map(str, v['predecessor']))
				graph.write(output + '-' + str(index) + '.gml', format='gml')

			if not options.save_hierarchy:
				break

	if options.show_timing:
		timing.print_tabular()
	if options.save_timing_csv:
		timing.save_csv(output + '-timing.csv')
	if options.save_timing_json:
		timing.save_json(output + '-timing.csv')
コード例 #7
0
    def run(self):

        graph = self.source_graph.copy()

        while True:

            level = graph['level']
            debug_print("------------------------------------------------------")
            debug_print("level: ")
            debug_print(level)
            debug_print(graph)
            debug_print("------------------------------------------------------")

            contract = False

            args = []
            spark_args = []
            broadcast_kwargs = []
            current_layer = 0
            for layer in range(graph['layers']):
                current_layer = current_layer + 1
                do_matching = True
                if self.global_min_vertices[layer] is None and level[layer] >= self.max_levels[layer]:
                    debug_print("------------------")
                    debug_print("max")
                    debug_print(self.global_min_vertices[layer])
                    debug_print(level[layer])
                    debug_print(self.max_levels[layer])
                    debug_print("------------------")
                    do_matching = False
                elif self.global_min_vertices[layer] and graph['vertices'][layer] <= self.global_min_vertices[layer]:
                    debug_print("min")
                    do_matching = False

                if do_matching:
                    debug_print("do_matching")
                    debug_print(do_matching)

                    contract = True
                    level[layer] += 1

                    graph['similarity'] = getattr(Similarity(graph, graph['adjlist']), self.similarity[layer])

                    kwargs = dict(reduction_factor=self.reduction_factor[layer])

                    if self.matching[layer] in ['mlpb', 'gmb', 'rgmb']:
                        kwargs['vertices'] = graph['vertices_by_type'][layer]
                        kwargs['reverse'] = self.reverse[layer]
                    if self.matching[layer] in ['mlpb', 'rgmb']:
                        kwargs['seed_priority'] = self.seed_priority[layer]
                    if self.matching[layer] in ['mlpb']:
                        kwargs['upper_bound'] = self.upper_bound[layer]
                        kwargs['n'] = self.source_graph['vertices'][layer]
                        kwargs['global_min_vertices'] = self.global_min_vertices[layer]
                        kwargs['tolerance'] = self.tolerance[layer]
                        kwargs['itr'] = self.itr[layer]

                    if self.matching[layer] in ['hem', 'lem', 'rm']:
                        one_mode_graph = graph.weighted_one_mode_projection(graph['vertices_by_type'][layer])
                        matching_function = getattr(one_mode_graph, self.matching[layer])
                        # TODO: This could be removed because gmb_pure is hardcoded on the spark approach
                        matching_function_spark = getattr(graph, 'pure_gmb' if self.spark is True and self.matching[
                            layer] == 'gmb' else self.matching[layer])
                    else:
                        matching_function_spark = getattr(graph, 'pure_gmb' if self.spark is True and self.matching[
                            layer] == 'gmb' else self.matching[layer])
                        matching_function = getattr(graph, self.matching[layer])

                    # Create a args for the engine multiprocessing.pool
                    args.append([(matching_function, kwargs)])
                    spark_args.append([(matching_function_spark, kwargs, current_layer)])
                    broadcast_kwargs.append(kwargs)

            graph_similarity = self.sparkContext.broadcast(graph['similarity'])

            def flat_map(arrays, function) -> list:
                mapped_array = []
                for array in arrays:
                    for item in function(array):
                        mapped_array.append(item)
                return mapped_array

            if contract:
                debug_print("contract")
                debug_print(contract)

                vertices = flat_map(broadcast_kwargs, lambda arg: arg["vertices"])
                final_matching = []
                broadcastGraph = self.sparkContext.broadcast(graph)

                if self.spark:
                    sorted_edges_by_layer = self.sparkContext.parallelize(spark_args) \
                        .flatMap(lambda arg: gmb_pure_flat_map_two_layers_into_one_list_with_neighborhood(arg, broadcastGraph)) \
                        .flatMap(lambda arg: gmb_pure_compute_neigh_list_with_similarity(arg, graph_similarity)) \
                        .reduceByKey(lambda a, b: gmb_pure_map_neight_with_great_similarity(a, b)) \
                        .map(gmb_pure_map_by_layer_reduced) \
                        .sortBy(sort_by_similarity) \
                        .groupByKey() \
                        .collect()

                    final_matching = gmb_matching_pure_spark(graph, sorted_edges_by_layer, broadcast_kwargs)

                    debug_print("1==================================================")
                    # for layer in sorted_edges_by_layer:
                    #     for element in layer[1]:
                    #         debug_print("{},".format(element))
                    debug_print("==================================================1")

                coarsened_graph = contract_pure(input_graph=graph, matching=final_matching)
                coarsened_graph['level'] = level

                if coarsened_graph.vcount() == graph.vcount():
                    debug_print("break:vcount")
                    break

                self.hierarchy_graphs.append(coarsened_graph)
                self.hierarchy_levels.append(level[:])
                graph = coarsened_graph

                debug_print('------------------------------------------graph------------------------------------------')
                debug_print(graph)
                debug_print('------------------------------------------graph------------------------------------------')
                # break

            else:
                print('------------------------------------------graph------------------------------------------')
                print(graph)
                print('------------------------------------------graph------------------------------------------')

                # contract  === false
                # do_matching
                debug_print("break:else")
                break
コード例 #8
0
    def transform(self, X):

        self.g = helperigraph.load_matrix(X)
        n = self.g['vertices'][1]
        self.g['level'] = 0

        new_min = 0.1
        new_max = 10
        old_min = min(self.g.es['weight'])
        old_max = max(self.g.es['weight'])
        with open("../bnoc-src/output/cbrson.ncol", "w+") as f:
            for e in self.g.es():
                e['weight'] = helper.remap(e['weight'], old_min, old_max,
                                           new_min, new_max)
                f.write(
                    str(e.tuple[0]) + ' ' + str(e.tuple[1]) + ' ' +
                    str(e['weight']) + '\n')

        # print self.g.ecount()
        # # print self.g['vertices']
        # dd = self.g.degree_distribution()
        # print dd
        # print self.g['vertices']
        # print 'grau zero', len(self.g.vs.select(_degree = 0))
        # print 'grau um', len(self.g.vs.select(_degree = 1))
        # print 'grau dois', len(self.g.vs.select(_degree = 2))
        # print 'grau tres', len(self.g.vs.select(_degree = 3))
        # print 'grau quatro', len(self.g.vs.select(_degree = 4))
        # exit()
        # plt.plot(dd).show()
        # xs, ys = zip(*[(left, count) for left, _, count in self.g.degree_distribution().bins()])
        # pylab.bar(xs, ys)
        # pylab.show()

        running = True
        while running:
            running = False

            membership = range(self.g.vcount())
            levels = self.g['level']
            contract = False

            matching_layer = True
            if (self.global_min_vertices is None):
                if levels >= self.max_levels:
                    matching_layer = False
            elif (int(self.g['vertices'][1]) <= int(self.global_min_vertices)):
                matching_layer = False

            if matching_layer:
                contract = True
                running = True
                levels += 1

                self.g['similarity'] = getattr(
                    Similarity(self.g, self.g['adjlist']), self.similarity)
                start = sum(self.g['vertices'][0:1])
                end = sum(self.g['vertices'][0:1 + 1])
                vertices = range(start, end)

                param = dict(reduction_factor=self.reduction_factor)

                if self.matching in ['mlpb', 'nmlpb', 'nmb']:
                    param['upper_bound'] = self.upper_bound
                    param['n'] = n
                    param['global_min_vertices'] = self.global_min_vertices
                if self.matching in ['mlpb', 'nmlpb', 'gmb', 'rgmb']:
                    param['vertices'] = vertices
                if self.matching in ['mlpb']:
                    param['tolerance'] = self.tolerance
                    param['itr'] = self.itr

                if self.matching in ['hem', 'lem', 'rm']:
                    one_mode_graph = self.g.weighted_one_mode_projection(
                        vertices)
                    matching_method = getattr(one_mode_graph, self.matching)
                else:
                    matching_method = getattr(self.g, self.matching)

                matching_method(membership, **param)

            if contract:
                coarse = self.g.contract(membership)
                coarse['level'] = levels

                if coarse.vcount() == self.g.vcount():
                    break

                self.g = coarse

        return helperigraph.biajcent_matrix(self.g)