def make_graph(items): graph = networkx.Graph() weights = {} n2i = {} for item in items: for link in extractd.getmessages(item): u, v = link[0], link[1] if u == v: continue uid = extractd.getid(n2i, u) vid = extractd.getid(n2i, v) graph.add_edge(uid, vid) extractd.countup(weights, (uid, vid)) extractd.countup(weights, (vid, uid)) with file('%s.wpairs' % sys.argv[1], 'w') as opened: for e in graph.edges(): w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])] opened.write( '%d\t%d\t%d\n' % (e[0], e[1], w) ) with file('%s.n2i' % sys.argv[1], 'w') as opened: for u in n2i: opened.write('%s\t%d\n' % (u, n2i[u]))
def makegraph(items): graph = networkx.Graph() weights = {} n2i = {} for item in items: for link in extractd.getmessages(item): u, v = link[0], link[1] if u == v: continue uid = utils.getid(n2i, u) vid = utils.getid(n2i, v) graph.add_edge(uid, vid) utils.count(weights, (uid, vid)) utils.count(weights, (vid, uid)) weighted_edges = {} for e in graph.edges(): w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])] weighted_edges[ (e[0], e[1]) ] = w edges = utils.filter_gt(weighted_edges, 2) bigraph = networkx.Graph() for e in edges: bigraph.add_edge(e[0], e[1], weight = edges[e]) return bigraph, n2i
def getmessages(db, items): replied_users = set([]) for item in items: for reply_item in extractd.getmessages(item): if reply_item[0] != reply_item[1]: replied_users.add( reply_item[1] ) for v in replied_users: try: v_items = useritems(db, v) except tweepy.error.TweepError: continue for item in v_items: db.append(item) logging( "%s updated (%d items added)" % (v, len(v_items)) ) logging( "\t%d remains" % getlimits_api(api))
if __name__ == "__main__": args = parse_args() db = Corpus(database=args.database, collection=args.items) db_stats = Corpus(database=args.database, collection=args.itemstats) try: latstats = db_stats.findsorted({}, key="id")[0]["id"] except IndexError: latstats = 0L for i, item in enumerate(db.find({ "id": { "$gt": latstats }})): words = extractd.getwords(item) messages = extractd.getmessages(item) tags = extractd.gethashtags(item) urls = extractd.geturls(item) db_stats.append({ "screen_name": item["screen_name"] , "words": words , "messages": messages , "hashtags": tags , "urls": urls , "created_at": item["created_at"] , "id": item["id"] }) print(i, item["id"])