def find_2hop_relationship_incoming(user_ids, api): """ Gets the 1hop relationship for the specified 1hop users :param user_ids: 1hop users :param api: """ relationship = [] index = 0 for u in user_ids: print index index += 1 try: ids = api.followers_ids(u['id']) print len(ids) lim = 0 for i in ids: if (lim > 100): break lim += 1 relationship.append({"id": i, "edge": [u['id'], i]}) lim = 0 db = db_mongo.init_db() coll = db_mongo.get_doc("relationship_2hop", db) db_mongo.update_relatioship(relationship, coll) db_mongo.ensure_single(db) relationship = [] except tweepy.RateLimitError: fun_logging.set_log("RateLimitError please waiting 15 minutes", 3) print "RateLimitError please waiting 15 minutes" time.sleep(15 * 60) fun_logging.set_log("Re start getting data", 0) print "Re start getting data" except tweepy.TweepError: print "TweepError" continue
def load_friends_training_set(api, tabel_name, num1, num2=5): """ Gets specified number of users' specified number of tweet as training set :param api: :param tabel_name: user id list :param num1: users' number :param num2: number of tweet per user :return: tweet list """ result = [] db = db_mongo.init_db() coll = db_mongo.get_doc(tabel_name, db).find() for i in range(num1): print coll[i]['id'] try: one_user_result = load_friend_training_set(api, coll[i]['id'], num2) for i in one_user_result: a = re.subn("[a-zA-z]+://[^\s]*", "", i) result.append(a[0]) except tweepy.RateLimitError: print "wait" time.sleep(15 * 60) print "restart" except tweepy.TweepError: print "error" continue return result
def init_data(user_id, api): db = db_mongo.init_db() users = find_1hop_user(user_id, api) coll_user = db_mongo.get_doc("user_1hop", db) db_mongo.update_user(users, coll_user) relationship_1hop = find_1hop_relationship_incoming(user_id, api) coll_relationship = db_mongo.get_doc("relationship_1hop", db) db_mongo.update_relatioship(relationship_1hop, coll_relationship) print "ensure" db_mongo.ensure_consistency("relationship_1hop", "user_1hop", db) coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find() find_2hop_relationship_incoming(coll_relationship1_find, api)
def get_gra(task_id): nodes = [{"y": 0, "x": 5000 , "size": 2, "id": "0", "label": "AndySgd1995"}] edges = [] db = db_mongo.init_db() coll = db_mongo.get_doc("user_1hop", db).find() coll2 = db_mongo.get_doc("relationship_2hop",db).find() j = 0 for i in coll: j += 1 legth = len(str(i['followers_count'])) nodes.append({"id":str(i['id']), "label":j, "x":0 + random.randint(100 * j,100 * j + 50),"y":0 + random.randint(400,500),"size":legth}) edges.append({"id":str(i['id']), "source":0,"target":str(i['id'])}) j += 1 for i in coll2: if (str(i['edge'][0]) != str(i['edge'][1])): nodes.append({"id":str(i['edge'][1]),"label":j, "x":random.randint(j,j+20),"y":0 + random.randint(2000,6000),"size":2}) edges.append({"id":j,"source":str(i['edge'][0]),"target":str(i['edge'][1])}) j += 1 result = {"nodes":nodes,"edges":edges} return json.dumps(result)
def classification(api, id): tweet = "" for status in tweepy.Cursor(api.user_timeline, id).items(5): tweet += status.text tweet = re.subn("[a-zA-z]+://[^\s]*", "", tweet)[0] db = db_mongo.init_db() coll = db_mongo.get_doc("friends_training_set", db).find() result = [] for i in coll: result.append(i['content']) result.append(tweet) name, array = tfidf(result) l = list(array[len(array) - 1]) keyword = {} for i in range(len(l) - 1): if l[i] > 0: print name[i] print l[i] keyword[str(name[i])] = l[i] keyword_sorted = sorted(keyword.iteritems(), key=lambda x: x[1], reverse=True) return keyword_sorted
def init_data(user_id, api): db = db_mongo.init_db() users = find_1hop_user(user_id, api) coll_user = db_mongo.get_doc("user_1hop", db) db_mongo.update_user(users, coll_user) relationship_1hop = find_1hop_relationship_incoming(user_id, api) coll_relationship = db_mongo.get_doc("relationship_1hop", db) db_mongo.update_relatioship(relationship_1hop, coll_relationship) print "ensure" db_mongo.ensure_consistency("relationship_1hop", "user_1hop", db) coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find() find_2hop_relationship_incoming(coll_relationship1_find, api) if __name__ == "__main__": api = init_twitter() db = db_mongo.init_db() # while True: # init_data("AndySgd1995", api) # print 1 # fun_logging.set_log("finished once data crawling", 0) # time.sleep(24 * 60 * 60) coll_relationship1_find = db_mongo.get_doc("relationship_1hop", db).find() find_2hop_relationship_incoming(coll_relationship1_find, api)