Esempio n. 1
0
def calculate_features(queue, g_file, pairs):
    print("Started!")
    G = nx.read_graphml(graph_file)
    gen = nxa.connected_components(G)
    mainLst = next(gen)
    G = G.subgraph(mainLst)
    f = Featurator(G)
    print("File: " + g_file)
    print("Pairs: " + str(len(pairs)))

    count = 0

    for pair in pairs:
        #queue.put(pair)
        count += 1
        #continue
        h1 = pair[0]
        h2 = pair[1]
        res = f.get_feature_dict(h1, h2)
        #res = dict()
        res['pair'] = h1 + h2 if h1 < h2 else h2 + h1
        res['h1'] = h1 if h1 < h2 else h2
        res['h2'] = h2 if h1 < h2 else h1
        # PUT PAIR IN QUEUE!
        queue.put(res)
    print("Calculated everyone! - " + str(count))
    queue.put('done')
Esempio n. 2
0
    print("Usage: ./generate_csv.py graph_file output_file [#processes]")
    sys.exit()

graph_file = sys.argv[1]
output_file = sys.argv[2]
processes = 1 if len(sys.argv) < 4 else int(sys.argv[3])

print("Loading graph file...")
G = nx.read_graphml(graph_file)

print("Obtaining largest connected component...")
gen = nxa.connected_components(G)
mainLst = next(gen)
G = G.subgraph(mainLst)

f = Featurator(G)

csv_fields = ['pair', 'h1', 'h2'] + f.feature_list()

csvfile = open(output_file, 'w')
writer = csv.DictWriter(csvfile, fieldnames=csv_fields)
writer.writeheader()

count = 0

pairs = [(h1, h2) for i, h1 in enumerate(G.nodes())
         for j, h2 in enumerate(G.nodes()) if j > i]

print("Starting...")
for pair in pairs:
    h1 = pair[0]
Esempio n. 3
0

pair_chunks = list(chunks(pairs, len(pairs) // processes + 1))
print(str(len(pair_chunks)))

start_time = time.time()

pList = []
q = Queue()
for i in range(processes):
    p = Process(target=calculate_features,
                args=(q, graph_file, pair_chunks[i]))
    p.start()
    pList.append(p)

f = Featurator(G, radicals)

csv_fields = ['pair', 'h1', 'h2'] + f.feature_list()

csvfile = open(output_file, 'w')
writer = csv.DictWriter(csvfile, fieldnames=csv_fields)
writer.writeheader()

done = 0
count = 0
while True:
    res = q.get()
    count += 1
    if count % 1000 == 1:
        secs = int(time.time() - start_time)
        hours = secs // 3600