def init_data(user_id, api):
    db = db_mongo.init_db()

    users = find_1hop_user(user_id, api)
    coll_user = db_mongo.get_doc("user_1hop", db)
    db_mongo.update_user(users, coll_user)

    relationship_1hop = find_1hop_relationship_incoming(user_id, api)
    coll_relationship = db_mongo.get_doc("relationship_1hop", db)
    db_mongo.update_relatioship(relationship_1hop, coll_relationship)

    print "ensure"
    db_mongo.ensure_consistency("relationship_1hop", "user_1hop", db)

    coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find()
    find_2hop_relationship_incoming(coll_relationship1_find, api)
def find_2hop_relationship_incoming(user_ids, api):
    """
    Gets the 1hop relationship for the specified 1hop users
    :param user_ids: 1hop users
    :param api:
    """
    relationship = []
    index = 0
    for u in user_ids:
        print index
        index += 1
        try:
            ids = api.followers_ids(u['id'])
            print len(ids)
            lim = 0
            for i in ids:
                if (lim > 100):
                    break
                lim += 1
                relationship.append({"id": i, "edge": [u['id'], i]})
            lim = 0
            db = db_mongo.init_db()
            coll = db_mongo.get_doc("relationship_2hop", db)
            db_mongo.update_relatioship(relationship, coll)
            db_mongo.ensure_single(db)
            relationship = []
        except tweepy.RateLimitError:
            fun_logging.set_log("RateLimitError please waiting 15 minutes", 3)
            print "RateLimitError please waiting 15 minutes"
            time.sleep(15 * 60)
            fun_logging.set_log("Re start getting data", 0)
            print "Re start getting data"
        except tweepy.TweepError:
            print "TweepError"
            continue
Beispiel #3
0
def load_friends_training_set(api, tabel_name, num1, num2=5):
    """
    Gets specified number of users' specified number of tweet as training set
    :param api:
    :param tabel_name: user id list
    :param num1: users' number
    :param num2: number of tweet per user
    :return: tweet list
    """
    result = []
    db = db_mongo.init_db()
    coll = db_mongo.get_doc(tabel_name, db).find()
    for i in range(num1):
        print coll[i]['id']
        try:
            one_user_result = load_friend_training_set(api, coll[i]['id'],
                                                       num2)
            for i in one_user_result:
                a = re.subn("[a-zA-z]+://[^\s]*", "", i)
                result.append(a[0])
        except tweepy.RateLimitError:
            print "wait"
            time.sleep(15 * 60)
            print "restart"
        except tweepy.TweepError:
            print "error"
            continue
    return result
Beispiel #4
0
def get_gra(task_id):
    nodes = [{"y": 0, "x": 5000 , "size": 2, "id": "0", "label": "AndySgd1995"}]
    edges = []
    db = db_mongo.init_db()
    coll = db_mongo.get_doc("user_1hop", db).find()
    coll2 = db_mongo.get_doc("relationship_2hop",db).find()
    j = 0
    for i in coll:
        j += 1
        legth = len(str(i['followers_count']))
        nodes.append({"id":str(i['id']), "label":j, "x":0 + random.randint(100 * j,100 * j + 50),"y":0 + random.randint(400,500),"size":legth})
        edges.append({"id":str(i['id']), "source":0,"target":str(i['id'])})
    j += 1

    for i in coll2:
        if (str(i['edge'][0]) != str(i['edge'][1])):
            nodes.append({"id":str(i['edge'][1]),"label":j, "x":random.randint(j,j+20),"y":0 + random.randint(2000,6000),"size":2})
            edges.append({"id":j,"source":str(i['edge'][0]),"target":str(i['edge'][1])})
            j += 1
    result = {"nodes":nodes,"edges":edges}
    return json.dumps(result)
Beispiel #5
0
def classification(api, id):
    tweet = ""
    for status in tweepy.Cursor(api.user_timeline, id).items(5):
        tweet += status.text
    tweet = re.subn("[a-zA-z]+://[^\s]*", "", tweet)[0]
    db = db_mongo.init_db()
    coll = db_mongo.get_doc("friends_training_set", db).find()
    result = []
    for i in coll:
        result.append(i['content'])
    result.append(tweet)
    name, array = tfidf(result)
    l = list(array[len(array) - 1])
    keyword = {}
    for i in range(len(l) - 1):
        if l[i] > 0:
            print name[i]
            print l[i]
            keyword[str(name[i])] = l[i]
    keyword_sorted = sorted(keyword.iteritems(),
                            key=lambda x: x[1],
                            reverse=True)
    return keyword_sorted

def init_data(user_id, api):
    db = db_mongo.init_db()

    users = find_1hop_user(user_id, api)
    coll_user = db_mongo.get_doc("user_1hop", db)
    db_mongo.update_user(users, coll_user)

    relationship_1hop = find_1hop_relationship_incoming(user_id, api)
    coll_relationship = db_mongo.get_doc("relationship_1hop", db)
    db_mongo.update_relatioship(relationship_1hop, coll_relationship)

    print "ensure"
    db_mongo.ensure_consistency("relationship_1hop", "user_1hop", db)

    coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find()
    find_2hop_relationship_incoming(coll_relationship1_find, api)


if __name__ == "__main__":
    api = init_twitter()
    db = db_mongo.init_db()
    # while True:
    #     init_data("AndySgd1995", api)
    #     print 1
    #     fun_logging.set_log("finished once data crawling", 0)
    #     time.sleep(24 * 60 * 60)
    coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find()
    find_2hop_relationship_incoming(coll_relationship1_find, api)