def store_userinfo_loc_to_db(self, userInfo_for_regester_time): unique_user_info = UserInfo_for_regester_time_store(uid=userInfo_for_regester_time.uid, nickname=userInfo_for_regester_time.nickname, \ location=userInfo_for_regester_time.location, sex=userInfo_for_regester_time.sex, \ birth=userInfo_for_regester_time.birth, regester_time=userInfo_for_regester_time.regester_time) try: unique_user_info.save() except NotUniqueError: pass except: WeiboSearchLog().get_scheduler_logger().info(self.name + " insert to database, something wrong !") pass WeiboSearchLog().get_scheduler_logger().info(self.name + " insert to database, success !") pass
def crawl_userinfo_for_regester_time(): uid_crawl_list = [] count = 1 for one_user in UserInfo_store.objects: uid = one_user['uid_or_uname'] if len(UserInfo_for_regester_time_store.objects(uid=uid))==0: uid_crawl_list.append(uid) print count count += 1 print len(uid_crawl_list) how_many_uids_one_thread = len(uid_crawl_list) / 10 all_thrads_list = [] start = 0 end = how_many_uids_one_thread count = 0 while end < len(uid_crawl_list): all_thrads_list.append(crawl_userinfo_3_for_regester_time(uid_crawl_list[start:end], "crawl_userinfo_for_regestertime_" + str(count))) start = start + how_many_uids_one_thread end = end + how_many_uids_one_thread count = count + 1 if start < len(uid_crawl_list): all_thrads_list.append(crawl_userinfo_3_for_regester_time(uid_crawl_list[start:len(uid_crawl_list)], "crawl_userinfo_for_regestertime_" + str(count))) for thread in all_thrads_list: thread.start() for thread in all_thrads_list: thread.join() pass