Example #1
0
def make_graph(items):

    graph = networkx.Graph()
    weights = {}
    n2i = {}

    for item in items:
        for link in extractd.getmessages(item):
            u, v = link[0], link[1]
            if u == v:
                continue

            uid = extractd.getid(n2i, u)
            vid = extractd.getid(n2i, v)

            graph.add_edge(uid, vid)
            
            extractd.countup(weights, (uid, vid))
            extractd.countup(weights, (vid, uid))

    with file('%s.wpairs' % sys.argv[1], 'w') as opened:

        for e in graph.edges():
            w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])]
            opened.write( '%d\t%d\t%d\n' % (e[0], e[1], w) )

    with file('%s.n2i' % sys.argv[1], 'w') as opened:
        for u in n2i:
            opened.write('%s\t%d\n' % (u, n2i[u]))
Example #2
0
def makegraph(items):
    
    
    graph = networkx.Graph()
    weights = {}
    n2i = {}

    for item in items:
        for link in extractd.getmessages(item):
            u, v = link[0], link[1]
            if u == v:
                continue

            uid = utils.getid(n2i, u)
            vid = utils.getid(n2i, v)

            graph.add_edge(uid, vid)
            
            utils.count(weights, (uid, vid))
            utils.count(weights, (vid, uid))
   
    weighted_edges = {}
    for e in graph.edges():
        w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])]
        weighted_edges[ (e[0], e[1]) ] = w

    edges = utils.filter_gt(weighted_edges, 2)

    bigraph = networkx.Graph()
    for e in edges:
        bigraph.add_edge(e[0], e[1], weight = edges[e])
    
    return bigraph, n2i
Example #3
0
def getmessages(db, items):

    replied_users = set([])

    for item in items:
        for reply_item in extractd.getmessages(item):

            if reply_item[0] != reply_item[1]:
                replied_users.add( reply_item[1] )
    
    for v in replied_users:
        try:
            v_items = useritems(db, v)
        except tweepy.error.TweepError:
            continue

        for item in v_items: db.append(item)
        logging( "%s updated (%d items added)" % (v, len(v_items)) )
        logging( "\t%d remains" % getlimits_api(api))
Example #4
0
if __name__ == "__main__":

    args = parse_args()

    db = Corpus(database=args.database, collection=args.items)
    db_stats = Corpus(database=args.database, collection=args.itemstats)
    
    try:
        latstats = db_stats.findsorted({}, key="id")[0]["id"]
    except IndexError:
        latstats = 0L

    for i, item in enumerate(db.find({ "id": { "$gt": latstats }})):

        words = extractd.getwords(item)
        messages = extractd.getmessages(item)
        tags = extractd.gethashtags(item)
        urls = extractd.geturls(item)
        
        db_stats.append({
            "screen_name": item["screen_name"]
            , "words": words
            , "messages": messages
            , "hashtags": tags
            , "urls": urls
            , "created_at": item["created_at"]
            , "id": item["id"] })
       
        print(i, item["id"])