def run(): task = sys.argv[1] f = open(sys.argv[2]) # parameters kwLimit = utils.get_parameter('kwLimit') edge_th = utils.get_parameter('edge_th') dispth = utils.get_parameter('dispth') ethunit = utils.get_parameter('ethunit') for years in f.readlines(): currentyears = years.replace('\n', '').split(";") print('--> running ' + task + 'for' + str(currentyears)) if task == '--raw-network': # keywords relevance relevant.relevant_full_corpus(currentyears, kwLimit, edge_th) # full network graph.construct_graph(currentyears, kwLimit, edge_th) # sensitivity graph.sensitivity(currentyears, kwLimit, edge_th) if task == '--classification': # construct communities graph.construct_communities(currentyears, kwLimit, edge_th, dispth, ethunit) # post processing postprocessing.export_classification(currentyears, kwLimit, edge_th, dispth, ethunit) if task == '--custom': print("custom")
def train(self, X, w=None, **kwargs): W, D, L = construct_graph(X, **kwargs) W[W > 1E-50] = 1 L = D - W eD, eV = np.linalg.eig(L) eidx = np.argsort(eD) Y = np.real(eV[:, eidx[:self._k]].T) return super(SpectralClustering, self).train(Y.T, w=w)
def integration_test(): grammar = { 'root': { 'type': 'rule', 'oneOrMore': False, 'value': [['diagnosis'], ['examination']] }, 'diagnosis': { 'type': 'rule', 'oneOrMore': False, 'value': ['diagnosed', 'patient', 'with', 'disease'] }, 'examination': { 'type': 'rule', 'oneOrMore': False, 'value': ['performed', 'exams', 'on', 'patient'] }, 'patient': { 'type': 'variable', 'oneOrMore': False, 'value': '[a-zA-Z]+' }, 'disease': { 'type': 'rule', 'oneOrMore': False, 'value': [['cancer'], ['diabetes'], ['aids']] }, 'exams': { 'type': 'rule', 'oneOrMore': True, 'value': [['colonoscopy'], ['mri'], ['catscan']] } } graph = construct_graph(grammar) assert len(graph) == 5 assert graph['root'] == ['diagnosis', 'examination'] assert graph['diagnosis'] == ['disease'] assert graph['examination'] == ['exams'] assert graph['disease'] == [] assert graph['exams'] == [] valid, reason, rules = verify_structure(graph) assert valid keywords, variables = generate_keywords_and_variables(grammar) assert 'with' in keywords assert 'mri' in keywords assert variables == ['patient']
def process_event(prefix, pt_min, n_phi_sectors, select_phi_sector, phi_slope_max, phi_slope_mid_max, phi_slope_outer_max, z0_max, no_missing_hits, n_tracks): # Load the data evtid = int(prefix[-9:]) logging.info('Event %i, loading data' % evtid) hits, particles, truth = dataset.load_event( prefix, parts=['hits', 'particles', 'truth']) # Apply hit selection logging.info('Event %i, selecting hits' % evtid) hits = select_hits(hits, truth, particles, pt_min=pt_min, no_missing_hits=no_missing_hits).assign(evtid=evtid) hits_sectors = split_phi_sectors(hits, n_phi_sectors=n_phi_sectors, select_phi_sector=select_phi_sector) # Graph features and scale feature_names = ['r', 'phi', 'z'] feature_scale = np.array([1000., np.pi / n_phi_sectors, 1000.]) # Define adjacent layers n_det_layers = 10 l = np.arange(n_det_layers) layer_pairs = np.stack([l[:-1], l[1:]], axis=1) # Construct the graph logging.info('Event %i, constructing graphs' % evtid) graphs = [ construct_graph(sector_hits, layer_pairs=layer_pairs, phi_slope_max=phi_slope_max, phi_slope_mid_max=phi_slope_mid_max, phi_slope_outer_max=phi_slope_outer_max, z0_max=z0_max, feature_names=feature_names, feature_scale=feature_scale, max_tracks=n_tracks, no_missing_hits=no_missing_hits) for sector_hits in hits_sectors ] return graphs
def process_event(prefix, pt_min, n_phi_sectors, phi_slope_max, z0_max, n_tracks): # Load the data evtid = int(prefix[-9:]) logging.info('Event %i, loading data' % evtid) hits, particles, truth = dataset.load_event( prefix, parts=['hits', 'particles', 'truth']) blacklist_particles = pd.read_csv( prefix + "-blacklist_particles.csv").particle_id.values # Apply hit selection logging.info('Event %i, selecting hits' % evtid) hits = select_hits(hits, truth, particles, pt_min=pt_min).assign(evtid=evtid) hits_sectors = split_phi_sectors(hits, blacklist_particles, n_tracks, n_phi_sectors=n_phi_sectors) # Graph features and scale feature_names = ['x', 'y', 'z'] # Define adjacent layers n_det_layers = 10 l = np.arange(n_det_layers) layer_pairs = np.stack([l[:-1], l[1:]], axis=1) # Construct the graph logging.info('Event %i, constructing graphs' % evtid) graphs = [ construct_graph(sector_hits, layer_pairs=layer_pairs, phi_slope_max=phi_slope_max, z0_max=z0_max, feature_names=feature_names) for sector_hits in hits_sectors ] return graphs
def process_grammar(grammar_id, grammar): # Produce a graph representing the grammar. graph = construct_graph(grammar) # Verify the structural integrity of the graph. # Rules are returned in topological order of dependency. valid, reason, rules = verify_structure(graph) if not valid: return valid, reason # Verify that there is only one user_id rule. user_id_rules = get_user_id_rules(grammar) if len(user_id_rules) > 1: return False, 'Only one of {} can be the user id'.format(', '.join(user_id_rules)) # Parse to generate the keywords and variables from the grammar. keywords, variables = generate_keywords_and_variables(grammar) # Hash the grammar deterministically hsh = hash_grammar(grammar) # Attempt to persist the grammar to mongo. return persist_grammar(grammar_id, grammar, hsh, rules, keywords, variables)
# -*- coding: UTF-8 -*- # Program to do a breadth first traversal of a graph import graph def bfs(graph, start): visited = [] queue = [start] visited.append(start) while queue: # pop the first element of the queue s = queue[0] print s del queue[0] for k in graph.edges[s]: if k not in visited: visited.append(k) queue.append(k) if __name__ == '__main__': graph = graph.construct_graph() bfs(graph, 2)
topological_sort(g1, i, visited, stack) stack.append(start) def SCC(g1): visited = [] stack = [] for i in range(g1.V): if i not in visited: topological_sort(g1, i, visited, stack) # transpose the current graph gr = transpose(g1) # apply dfs on the transposed graph visited = [] while (stack): s = stack[-1] del stack[-1] if s not in visited: print "Next SCC" dfs(gr, s, visited) g1 = graph.construct_graph() SCC(g1)
def test_construct_graph(): years = ['1976', '1977', '1978', '1979', '1980'] kwLimit = 100000.0 min_edge_th = 10.0 graph.construct_graph(years, kwLimit, min_edge_th)
for key in clusters: outf_1.write(' Cluster ' + str(key) + '\n' + ' NB: ' + str(clusters[key][2]['NB']) + '\n' + ' BRCA: ' + str(clusters[key][2]['BRCA']) + '\n' + ' KIRC: ' + str(clusters[key][2]['KIRC']) + '\n' + ' COAD/READ: ' + str(clusters[key][2]['COAD/READ']) + '\n') outf_1.close() ############################################################################## ################### test hcs algorithm ####################################### # construct graph with approximately 0.1*n(n-1)/2 edges dist = 1 perc = 0.1 G, threshold = construct_graph(data, None, perc, dist) # make singleton set G, S = singleton(G) # retrieve sets of connected nodes cc = connectedComponents(G) # HCS algorithm mincut_trials = 100 subgraphs = [] singles = [] # printing statements can be used to keep track of results for i in range(len(cc)): # print("Connected component", i) G = cc[i]