class Client: client_uid = '' client_song_id_set = set() api_server = '' user_pool = '' proxy_pool = '' account_pool = '' record_pool = '' uid_queue = '' most_similar_uid = 0 same_song_num = -1 similar_user_list = [] similar_min = 15 fail_search = 0 success_search = 0 cheat_search = 0 block_search = 0 threads = [] terminate = False def __init__(self, db_server, api_server, proxy_server, client_uid, similar_min=15): self.api_server = api_server self.client_uid = client_uid self.account_pool = AccountPool(db_server, api_server, proxy_server) self.user_pool = UserPool(db_server, api_server, proxy_server) self.proxy_pool = ProxyPool(proxy_server) self.record_pool = RecordPool(db_server, api_server) self.similar_min = similar_min def print(self, content): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ') print(content) def set_terminate(self): self.proxy_pool.set_terminate() self.user_pool.set_terminate() self.account_pool.set_terminate() self.terminate = True def get_all_client_song_ids(self): play_list_ids = self.get_all_client_play_list_ids() threads = [] for play_list_id in play_list_ids: thread = threading.Thread(target=self.get_all_song_ids_in_play_list_to_set, args=[play_list_id]) thread.start() threads.append(thread) # self.get_all_song_ids_in_play_list_to_set(play_list_id) for thread in threads: thread.join() self.print('Success: Finish fetching all ' + str(len(self.client_song_id_set)) + ' client song ids') def get_all_client_play_list_ids(self): get_play_list_api = '/user/playlist' params = {'uid': self.client_uid} response = requests.get(self.api_server + get_play_list_api, params=params) play_lists = response.json()['playlist'] play_list_ids = [] for play_list in play_lists: creator_id = play_list['creator']['userId'] play_list_id = play_list['id'] if creator_id == self.client_uid: play_list_ids.append(play_list_id) # print(play_list_id) return play_list_ids def get_all_song_ids_in_play_list_to_set(self, play_list_id): get_all_song_ids_in_play_list_api = '/playlist/detail' params = {'id': play_list_id} response = requests.get(self.api_server + get_all_song_ids_in_play_list_api, params=params) songs = response.json()['playlist']['tracks'] for song in songs: song_id = song['id'] self.client_song_id_set.add(song_id) def get_favourite_id_set(self, uid): get_favourite_api = '/user/record' params = {'uid': uid, 'type': 0} cookie_unit = self.account_pool.get_cookie_unit() cookies = cookie_unit['cookies'] if not self.account_pool.is_available() or not self.proxy_pool.is_available(): # print('Fail: The account pool or proxy pool is not available') self.uid_queue.put(uid) return [] response = requests.get(self.api_server + get_favourite_api, params=params, proxies=self.proxy_pool.get(), cookies=cookies).json() if response['code'] == -460: # self.print('Fail: Detect cheating') self.fail_search += 1 self.cheat_search += 1 self.account_pool.remove_cheat_source(cookie_unit['phone']) return [] if response['code'] == -2: # self.print('Fail: The user ' + str(uid) + ' block the favourite playlist') self.user_pool.delete_one_user(uid) self.fail_search += 1 self.block_search += 1 return [] songs = response['allData'] song_ids = set() for song in songs: song_ids.add(song['song']['song']['id']) self.success_search += 1 total = self.success_search + self.fail_search if total % 50 == 0: self.print('Success: Finish ' + str(total) + ' in total, ' + str(self.success_search) + ' success , ' + str(self.cheat_search) + ' cheat, ' + str(self.block_search) + ' block') self.print('The most similar user found is ') print(self.similar_user_list) return song_ids def compare_song_list_with_one_uid_thread(self): while not self.terminate: self.compare_song_list_with_one_uid() def compare_song_list_with_one_uid(self): if self.uid_queue.qsize() > 0: target_user = self.uid_queue.get() target_uid = target_user['uid'] target_nickname = target_user['nickname'] target_gender = target_user['gender'] target_favourite_song_id_set = self.get_favourite_id_set(target_uid) count = 0 for song_id in target_favourite_song_id_set: if song_id in self.client_song_id_set: count += 1 if count > self.similar_min: target_user = {'target_uid': target_uid, 'same_num': count, 'target_nickname': target_nickname, 'target_gender': target_gender} self.similar_user_list.append(target_user) self.record_pool.upload_one_record(self.client_uid, target_user) else: self.set_terminate() def find_most_similar_user_in_samples(self, sample_num, special): self.print('Pending: Start looking for most similar user') start_time = datetime.datetime.now() # determine the sample if special: self.uid_queue = self.user_pool.get_girl_user_sample_queue(sample_num) else: self.uid_queue = self.user_pool.get_uid_sample_queue(sample_num) self.get_all_client_song_ids() for i in range(0, 100): thread = threading.Thread(target=self.compare_song_list_with_one_uid_thread) self.threads.append(thread) thread.start() for thread in self.threads: thread.join() end_time = datetime.datetime.now() run_time = end_time - start_time self.set_terminate() self.print('Success: ' + str(self.success_search) + ' success search in ' + str(run_time.total_seconds()) + ' seconds') # self.record_pool.upload_all_records(self.client_uid, self.similar_user_list) # self.print('The most similar user found is ' + str(self.most_similar_uid)) # self.print('You have ' + str(self.same_song_num) + ' songs in common')
def get(): p = ProxyPool.get() return p
class UserPool: api_server = '' db = '' proxy_pool = '' upload_queue = queue.Queue() upload_queue_min_size = 0 upload_queue_max_size = 1000 waiting_for_search_queue = queue.Queue() waiting_for_search_queue_min_size = 10 waiting_for_search_queue_max_size = 500 upload_threads = [] refill_threads = [] search_threads = [] success_upload = 0 fail_upload = 0 uploaded_num = 0 terminate = False def __init__(self, db_server, api_server, proxy_server): self.api_server = api_server self.db = pymongo.MongoClient(db_server, 27017).net_ease.user self.proxy_pool = ProxyPool(proxy_server) def print(self, content): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ') print(content) def set_terminate(self): self.terminate = True self.proxy_pool.set_terminate() def delete_all_users(self): self.db.delete_many({}) self.print('Success: Finish delete all users') def delete_one_user(self, uid): self.db.delete_one({'uid': uid}) def delete_duplicates(self): self.print('Success: Start Delete duplicates') cursor = self.db.aggregate( [ {"$group": {"_id": "$uid", "unique_ids": {"$addToSet": "$_id"}, "count": {"$sum": 1}}}, {"$match": {"count": { "$gte": 2 }}} ] ) response = [] for doc in cursor: del doc["unique_ids"][0] for id in doc["unique_ids"]: response.append(id) print(response) self.db.remove({"_id": {"$in": response}}) self.print('Success: Finish deleting ' + str(len(response)) + ' duplicates') def search_neighbours_thread(self): while not self.terminate: if self.upload_queue.qsize() < self.upload_queue_max_size: self.search_neighbours() def search_neighbours(self): if self.waiting_for_search_queue.qsize() > 0: get_followers_api = '/user/followeds' uid = self.waiting_for_search_queue.get() params = {'uid': uid} response = requests.get(self.api_server + get_followers_api, params=params, proxies=self.proxy_pool.get()).json() if response['code'] != 200: self.print('Fail: Unable to search neighbours of ' + str(uid)) self.print(response) return False neighbours = response['followeds'] # set the uid searched myquery = { 'uid': uid } newvalues = { "$set": { "searched": True } } self.db.update_one(myquery, newvalues) # put the result in the upload queue for neighbour in neighbours: user = { 'uid': neighbour['userId'], 'gender': neighbour['gender'], 'nickname': neighbour['nickname'], 'searched': False, 'gender': neighbour['gender'], } self.upload_queue.put(user) def upload_one_user(self, user): try: self.db.insert_one(user) except: self.fail_upload += 1 self.success_upload += 1 def upload_result(self): if self.upload_queue.qsize() > 0: user = self.upload_queue.get() self.upload_one_user(user) if (self.fail_upload+self.success_upload) % 200 == 0: self.print('Success: Finish upload ' + str(self.fail_upload+self.success_upload) + ' results, ' + str(self.success_upload) + ' success, ' + str(self.fail_upload) + ' fail') self.print('Success: ' + str(self.upload_queue.qsize()) + ' to be uploaded ' + str(self.waiting_for_search_queue.qsize()) + ' waiting for search') if self.upload_queue.qsize() < self.waiting_for_search_queue_max_size: self.search_neighbours() def upload_thread(self): while not self.terminate: self.upload_result() def refill_waiting_for_search_queue(self, size): users = list(self.db.find({ 'searched': False }).limit(size)) for user in users: self.waiting_for_search_queue.put(user['uid']) self.print('Success: Finish refill the task queue with ' + str(len(users)) + ' data' + ', ' + str(self.waiting_for_search_queue.qsize()) + ' wating for search' ) def refill_waiting_for_search_queue_thread(self): while not self.terminate: if self.waiting_for_search_queue.qsize() < self.waiting_for_search_queue_min_size: self.refill_waiting_for_search_queue(1000) def start_searching_valid_users(self, upload_thread_num): self.print('Pending: Start searching valid users') thread = threading.Thread(target=self.refill_waiting_for_search_queue_thread) self.refill_threads.append(thread) thread.start() for i in range(0, 100): thread = threading.Thread(target=self.upload_thread) self.refill_threads.append(thread) thread.start() def get_uid_sample_queue(self, size): user_queue = queue.Queue() query = [ { '$sample': { 'size': size } }, { '$match': {'searched': False} } ] for user in self.db.aggregate(query): user_queue.put(user) return user_queue def get_girl_user_sample_queue(self, size): user_queue = queue.Queue() query = [ { '$sample': { 'size': size } }, { '$match': {'gender': 2} } ] for user in self.db.aggregate(query): user_queue.put(user) print('') print(user_queue.qsize()) print('') return user_queue
class AccountPool: api_server = '' db_server = '' db = '' source_cookies = [] cookie_queue = queue.Queue() cookie_queue_max_size = 1000 cookie_queue_min_size = 200 proxy_pool = '' refill_thread = '' terminate = False account_for_login_queue = queue.Queue() success_login = 0 fail_login = 0 login_threads = [] error_accounts = set() session = '' lowerst_account_num = 80 def __init__(self, db_server, api_server, proxy_server): self.print('Pending: Start initializing the account pool') self.api_server = api_server self.db_server = db_server self.session = requests.session() self.db = pymongo.MongoClient(self.db_server, 27017).net_ease.account self.proxy_pool = ProxyPool(proxy_server) self.login_accounts() self.refill_thread = threading.Thread(target=self.refill_tasks) self.refill_thread.start() self.print('Success: Finish initializing the account pool') def print(self, content): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ') print(content) def insert_one_phone(self, phone, password): sames = list(self.db.find({'phone': str(phone)})) if len(sames) > 0: self.print('Fail: Unable to insert repeated ' + str(phone)) return False self.db.insert_one({'phone': str(phone), 'password': password}) self.print('Success: Finish inserting phone ' + str(phone)) return True def insert_all_phones(self, phones, password): success = 0 fail = 0 for phone in phones: if self.insert_one_phone(phone, password): success += 1 else: fail += 1 self.print('Success: Finish inserting all phones, ' + str(success) + ' success, ' + str(fail) + ' fail') def delete_all_phones(self): self.db.delete_many() self.print('Success: Finish deleting all phones') def login_accounts(self): accounts = list(self.db.find()) for account in accounts: self.account_for_login_queue.put(account) for i in range(0, 100): thread = threading.Thread(target=self.login_thread) self.login_threads.append(thread) thread.start() for thread in self.login_threads: thread.join() self.print('Success: Finish login, ' + str(self.success_login) + ' success, ' + str(self.fail_login) + ' fail') def login_one_account(self): if self.account_for_login_queue.qsize() > 0: account = self.account_for_login_queue.get() params = { 'phone': account['phone'], 'password': account['password'] } response = self.session.get(self.api_server + '/login/cellphone', params=params, proxies=self.proxy_pool.get()) if response.json()['code'] == 415: self.print('Fail: Unable to login for ' + str(account['phone']) + ', the proxy is invalid, try again later') self.account_for_login_queue.put(account) self.fail_login += 1 return if response.json()['code'] == 406: if self.success_login > self.lowerst_account_num: return self.print('Fail: The account ' + str(account['phone'] + ' cannot login')) self.error_accounts.add(account['phone']) self.account_for_login_queue.put(account) self.fail_login += 1 return if response.json()['code'] == 460: self.print('Fail: Cheating') self.fail_login += 1 return if account['phone'] in self.error_accounts: print('miracle!!!!!!') self.success_login += 1 self.source_cookies.append({ 'phone': account['phone'], 'cookies': response.cookies }) def login_thread(self): while self.account_for_login_queue.qsize() > 0: self.login_one_account() def refill(self): for cookie_unit in self.source_cookies: self.cookie_queue.put(cookie_unit) def refill_tasks(self): while not self.terminate: if self.cookie_queue.qsize() < self.cookie_queue_max_size: self.refill() else: time.sleep(1) def set_terminate(self): self.terminate = True self.proxy_pool.set_terminate() def is_available(self): return self.cookie_queue.qsize() > self.cookie_queue_min_size def get_cookie_unit(self): return self.cookie_queue.get() def load_accounts(self, filename): file = open(filename, 'r') for line in file: is_pass = False username = '' password = '' for word in line.split(): if not is_pass: username = word is_pass = True else: password = word self.insert_one_phone(username, password) def remove_cheat_source(self, phone): for cookie in self.source_cookies: if cookie['phone'] == phone: self.source_cookies.remove(cookie)