def citations2syn(dbpath, outpath): net = Net(outpath) conn = sqlite3.connect(dbpath) cur = conn.cursor() nodes = {} timestamps = {} cur.execute("SELECT id, title, timestamp FROM articles") for row in cur: label = '%s [%d]' % (row[1], row[0]) nodes[row[0]] = net.add_node(label=label) timestamps[row[0]] = row[2] cur.execute("SELECT orig_id, targ_id FROM citations WHERE targ_id>=0") for row in cur: try: net.add_edge(nodes[row[0]], nodes[row[1]], timestamps[row[0]]) except: print 'oops.' cur.close() conn.close() print('Done.')
def authorcitations2syn(dbpath, outpath): net = Net(outpath) conn = sqlite3.connect(dbpath) cur = conn.cursor() nodes = {} cur.execute("SELECT id, name FROM authors") for row in cur: label = "%s [%d]" % (row[1], row[0]) nodes[row[0]] = net.add_node(label=label) cur.execute("SELECT orig_id, targ_id, timestamp FROM author_citations") for row in cur: net.add_edge(nodes[row[0]], nodes[row[1]], row[2]) cur.close() conn.close() print("Done.")
def net2syn(dbpath, outpath): net = Net(outpath) net.set_perm_edges(False) conn = sqlite3.connect(dbpath) cur = conn.cursor() cur.execute("SELECT count(id) FROM article") ncount = cur.fetchone()[0] print ncount, 'nodes total' cur.execute("SELECT count(id) FROM link") lcount = cur.fetchone()[0] print lcount, 'links total' count = 0 nodes = {} cur.execute("SELECT id, title FROM article") for row in cur: label = '%s [%d]' % (row[1], row[0]) nodes[row[0]] = net.add_node(label=label) if (count % 100000) == 0: print 'adding nodes %f%% (%d/%d)' % ((float(count)/ float(ncount)) * 100, count, ncount) count += 1 count = 0 cur.execute("SELECT orig_id, targ_id, start_ts, end_ts FROM link") for row in cur: net.add_edge(nodes[row[0]], nodes[row[1]], row[2], row[3]) if (count % 100000) == 0: print 'adding links %f%% (%d/%d)' % ((float(count)/ float(lcount)) * 100, count, lcount) count += 1 cur.close() conn.close() print('Done.')