Esempio n. 1
0
class Client:

    client_uid = ''
    client_song_id_set = set()

    api_server = ''

    user_pool = ''
    proxy_pool = ''
    account_pool = ''
    record_pool = ''

    uid_queue = ''

    most_similar_uid = 0
    same_song_num = -1

    similar_user_list = []
    similar_min = 15

    fail_search = 0
    success_search = 0
    cheat_search = 0
    block_search = 0


    threads = []
    terminate = False

    def __init__(self, db_server, api_server, proxy_server, client_uid, similar_min=15):
        self.api_server = api_server
        self.client_uid = client_uid

        self.account_pool = AccountPool(db_server, api_server, proxy_server)
        self.user_pool = UserPool(db_server, api_server, proxy_server)
        self.proxy_pool = ProxyPool(proxy_server)
        self.record_pool = RecordPool(db_server, api_server)
        self.similar_min = similar_min
        
    def print(self, content):
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ')
        print(content)

    def set_terminate(self):
        self.proxy_pool.set_terminate()
        self.user_pool.set_terminate()
        self.account_pool.set_terminate()
        self.terminate = True





    def get_all_client_song_ids(self):
        play_list_ids = self.get_all_client_play_list_ids()
        threads = []
        for play_list_id in play_list_ids:
            thread = threading.Thread(target=self.get_all_song_ids_in_play_list_to_set, args=[play_list_id])
            thread.start()
            threads.append(thread)
            # self.get_all_song_ids_in_play_list_to_set(play_list_id)
        for thread in threads:
            thread.join()
        self.print('Success: Finish fetching all ' + str(len(self.client_song_id_set)) + ' client song ids')

    def get_all_client_play_list_ids(self):
        get_play_list_api = '/user/playlist'
        params = {'uid': self.client_uid}
        response = requests.get(self.api_server + get_play_list_api, params=params)
        play_lists = response.json()['playlist']
        play_list_ids = []
        for play_list in play_lists:
            creator_id = play_list['creator']['userId']
            play_list_id = play_list['id']
            if creator_id == self.client_uid:
                play_list_ids.append(play_list_id)
                # print(play_list_id)
        return play_list_ids

    def get_all_song_ids_in_play_list_to_set(self, play_list_id):
        get_all_song_ids_in_play_list_api = '/playlist/detail'
        params = {'id': play_list_id}
        response = requests.get(self.api_server + get_all_song_ids_in_play_list_api, params=params)
        songs = response.json()['playlist']['tracks']
        for song in songs:
            song_id = song['id']
            self.client_song_id_set.add(song_id)
            








    def get_favourite_id_set(self, uid):
        get_favourite_api = '/user/record'
        params = {'uid': uid, 'type': 0}

        cookie_unit = self.account_pool.get_cookie_unit()
        cookies = cookie_unit['cookies']

        if not self.account_pool.is_available() or not self.proxy_pool.is_available():
            # print('Fail: The account pool or proxy pool is not available')
            self.uid_queue.put(uid)
            return []
        response = requests.get(self.api_server + get_favourite_api, params=params, proxies=self.proxy_pool.get(), cookies=cookies).json()

        if response['code'] == -460:
            # self.print('Fail: Detect cheating')
            self.fail_search += 1
            self.cheat_search += 1
            self.account_pool.remove_cheat_source(cookie_unit['phone'])
            return []

        if response['code'] == -2:
            # self.print('Fail: The user ' + str(uid) + ' block the favourite playlist')
            self.user_pool.delete_one_user(uid)
            self.fail_search += 1
            self.block_search += 1
            return []
        
        songs = response['allData']
        song_ids = set()
        for song in songs:
            song_ids.add(song['song']['song']['id'])
        self.success_search += 1

        total = self.success_search + self.fail_search
        if total % 50 == 0:
            self.print('Success: Finish ' + str(total) + ' in total, ' + str(self.success_search) + ' success , ' + str(self.cheat_search) + ' cheat, ' + str(self.block_search) + ' block')
            self.print('The most similar user found is ')
            print(self.similar_user_list)
        return song_ids

    def compare_song_list_with_one_uid_thread(self):
        while not self.terminate:
            self.compare_song_list_with_one_uid()

    def compare_song_list_with_one_uid(self):
        if self.uid_queue.qsize() > 0:
            target_user =  self.uid_queue.get()
            target_uid =  target_user['uid']
            target_nickname = target_user['nickname']
            target_gender = target_user['gender']

            target_favourite_song_id_set = self.get_favourite_id_set(target_uid)
            count = 0
            for song_id in target_favourite_song_id_set:
                if song_id in self.client_song_id_set:
                    count += 1
            if count > self.similar_min:
                target_user = {'target_uid': target_uid, 'same_num': count, 'target_nickname': target_nickname, 'target_gender': target_gender}
                self.similar_user_list.append(target_user)
                self.record_pool.upload_one_record(self.client_uid, target_user)
        else: 
            self.set_terminate()


    def find_most_similar_user_in_samples(self, sample_num, special):
        self.print('Pending: Start looking for most similar user')
        start_time = datetime.datetime.now()

        # determine the sample
        if special:
            self.uid_queue = self.user_pool.get_girl_user_sample_queue(sample_num)
        else:
            self.uid_queue = self.user_pool.get_uid_sample_queue(sample_num)

        self.get_all_client_song_ids()
        for i in range(0, 100):
            thread = threading.Thread(target=self.compare_song_list_with_one_uid_thread)
            self.threads.append(thread)
            thread.start()
        
        for thread in self.threads:
            thread.join()
        
        end_time = datetime.datetime.now()
        run_time = end_time - start_time
        self.set_terminate()

        self.print('Success: ' + str(self.success_search) + ' success search in ' + str(run_time.total_seconds()) + ' seconds')
        
        # self.record_pool.upload_all_records(self.client_uid, self.similar_user_list)
        # self.print('The most similar user found is ' + str(self.most_similar_uid))
        # self.print('You have ' + str(self.same_song_num) + ' songs in common')
Esempio n. 2
0
def get():
    p = ProxyPool.get()
    return p
Esempio n. 3
0
class UserPool:

    api_server = ''

    db = ''

    proxy_pool = ''

    upload_queue = queue.Queue()
    upload_queue_min_size = 0
    upload_queue_max_size = 1000

    waiting_for_search_queue = queue.Queue()
    waiting_for_search_queue_min_size = 10
    waiting_for_search_queue_max_size = 500

    upload_threads = []
    refill_threads = []
    search_threads = []

    success_upload = 0
    fail_upload = 0
    uploaded_num = 0

    terminate = False
    

    def __init__(self, db_server, api_server, proxy_server):
        self.api_server = api_server
        self.db = pymongo.MongoClient(db_server, 27017).net_ease.user
        self.proxy_pool = ProxyPool(proxy_server)

    def print(self, content):
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ')
        print(content)
        
    def set_terminate(self):
        self.terminate = True
        self.proxy_pool.set_terminate()

    def delete_all_users(self):
        self.db.delete_many({})
        self.print('Success: Finish delete all users')

    def delete_one_user(self, uid):
        self.db.delete_one({'uid': uid})

    def delete_duplicates(self):
        self.print('Success: Start Delete duplicates')
        cursor = self.db.aggregate(
            [
                {"$group": {"_id": "$uid", "unique_ids": {"$addToSet": "$_id"}, "count": {"$sum": 1}}},
                {"$match": {"count": { "$gte": 2 }}}
            ]
        )
        response = []
        for doc in cursor:
            del doc["unique_ids"][0]
            for id in doc["unique_ids"]:
                response.append(id)
        print(response)
        self.db.remove({"_id": {"$in": response}})
        self.print('Success: Finish deleting ' + str(len(response)) + ' duplicates')







    def search_neighbours_thread(self):
        while not self.terminate:
            if self.upload_queue.qsize() < self.upload_queue_max_size:
                self.search_neighbours()


    def search_neighbours(self):
        if self.waiting_for_search_queue.qsize() > 0:
            get_followers_api = '/user/followeds'
            uid = self.waiting_for_search_queue.get()
            params = {'uid': uid}
            response = requests.get(self.api_server + get_followers_api, params=params, proxies=self.proxy_pool.get()).json()
            if response['code'] != 200:
                self.print('Fail: Unable to search neighbours of ' + str(uid))
                self.print(response)
                return False
            neighbours = response['followeds']
            # set the uid searched
            myquery = { 'uid': uid }
            newvalues = { "$set": { "searched": True } }
            self.db.update_one(myquery, newvalues)
            # put the result in the upload queue
            for neighbour in neighbours:
                user = {
                    'uid': neighbour['userId'],
                    'gender': neighbour['gender'],
                    'nickname': neighbour['nickname'],
                    'searched': False,
                    'gender': neighbour['gender'],
                }
                self.upload_queue.put(user)

    def upload_one_user(self, user):
        try:
            self.db.insert_one(user)
        except:
            self.fail_upload += 1
        self.success_upload += 1


    def upload_result(self):
        if self.upload_queue.qsize() > 0:
            user = self.upload_queue.get()
            self.upload_one_user(user)
            if (self.fail_upload+self.success_upload) % 200 == 0:
                self.print('Success: Finish upload ' + str(self.fail_upload+self.success_upload) + ' results, ' + str(self.success_upload) + ' success, ' + str(self.fail_upload) + ' fail')
                self.print('Success: ' + str(self.upload_queue.qsize()) + ' to be uploaded ' + str(self.waiting_for_search_queue.qsize()) + ' waiting for search')
        if self.upload_queue.qsize() < self.waiting_for_search_queue_max_size:
            self.search_neighbours()

    def upload_thread(self):
        while not self.terminate:
            self.upload_result()
            




    def refill_waiting_for_search_queue(self, size):
        users = list(self.db.find({ 'searched': False }).limit(size))
        for user in users:
            self.waiting_for_search_queue.put(user['uid'])
        self.print('Success: Finish refill the task queue with ' + str(len(users)) + ' data' + ', ' + str(self.waiting_for_search_queue.qsize()) + ' wating for search' )

    def refill_waiting_for_search_queue_thread(self):
        while not self.terminate:
            if self.waiting_for_search_queue.qsize() < self.waiting_for_search_queue_min_size:
                self.refill_waiting_for_search_queue(1000)




    def start_searching_valid_users(self, upload_thread_num):
        self.print('Pending: Start searching valid users')
        
        thread = threading.Thread(target=self.refill_waiting_for_search_queue_thread)
        self.refill_threads.append(thread)
        thread.start()

        for i in range(0, 100):
            thread = threading.Thread(target=self.upload_thread)
            self.refill_threads.append(thread)
            thread.start()
        





        
    def get_uid_sample_queue(self, size):
        user_queue = queue.Queue()
        query = [
            { '$sample': { 'size': size } },
            { '$match': {'searched': False} }
        ]
        for user in self.db.aggregate(query):
            user_queue.put(user)
        return user_queue



    def get_girl_user_sample_queue(self, size):
        user_queue = queue.Queue()
        query = [
            { '$sample': { 'size': size } },
            { '$match': {'gender': 2} }
        ]
        for user in self.db.aggregate(query):
            user_queue.put(user)
        print('')
        print(user_queue.qsize())
        print('')
        return user_queue
Esempio n. 4
0
class AccountPool:

    api_server = ''
    db_server = ''

    db = ''

    source_cookies = []

    cookie_queue = queue.Queue()
    cookie_queue_max_size = 1000
    cookie_queue_min_size = 200

    proxy_pool = ''

    refill_thread = ''
    terminate = False

    account_for_login_queue = queue.Queue()
    success_login = 0
    fail_login = 0
    login_threads = []

    error_accounts = set()

    session = ''

    lowerst_account_num = 80

    def __init__(self, db_server, api_server, proxy_server):
        self.print('Pending: Start initializing the account pool')
        self.api_server = api_server
        self.db_server = db_server

        self.session = requests.session()

        self.db = pymongo.MongoClient(self.db_server, 27017).net_ease.account

        self.proxy_pool = ProxyPool(proxy_server)
        self.login_accounts()
        self.refill_thread = threading.Thread(target=self.refill_tasks)
        self.refill_thread.start()

        self.print('Success: Finish initializing the account pool')

    def print(self, content):
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), end=': ')
        print(content)

    def insert_one_phone(self, phone, password):
        sames = list(self.db.find({'phone': str(phone)}))
        if len(sames) > 0:
            self.print('Fail: Unable to insert repeated ' + str(phone))
            return False
        self.db.insert_one({'phone': str(phone), 'password': password})
        self.print('Success: Finish inserting phone ' + str(phone))
        return True

    def insert_all_phones(self, phones, password):
        success = 0
        fail = 0
        for phone in phones:
            if self.insert_one_phone(phone, password):
                success += 1
            else:
                fail += 1
        self.print('Success: Finish inserting all phones, ' + str(success) +
                   ' success, ' + str(fail) + ' fail')

    def delete_all_phones(self):
        self.db.delete_many()
        self.print('Success: Finish deleting all phones')

    def login_accounts(self):
        accounts = list(self.db.find())
        for account in accounts:
            self.account_for_login_queue.put(account)
        for i in range(0, 100):
            thread = threading.Thread(target=self.login_thread)
            self.login_threads.append(thread)
            thread.start()
        for thread in self.login_threads:
            thread.join()
        self.print('Success: Finish login, ' + str(self.success_login) +
                   ' success, ' + str(self.fail_login) + ' fail')

    def login_one_account(self):
        if self.account_for_login_queue.qsize() > 0:
            account = self.account_for_login_queue.get()
            params = {
                'phone': account['phone'],
                'password': account['password']
            }
            response = self.session.get(self.api_server + '/login/cellphone',
                                        params=params,
                                        proxies=self.proxy_pool.get())
            if response.json()['code'] == 415:
                self.print('Fail: Unable to login for ' +
                           str(account['phone']) +
                           ', the proxy is invalid, try again later')
                self.account_for_login_queue.put(account)
                self.fail_login += 1
                return
            if response.json()['code'] == 406:
                if self.success_login > self.lowerst_account_num:
                    return
                self.print('Fail: The account ' +
                           str(account['phone'] + ' cannot login'))
                self.error_accounts.add(account['phone'])
                self.account_for_login_queue.put(account)
                self.fail_login += 1
                return
            if response.json()['code'] == 460:
                self.print('Fail: Cheating')
                self.fail_login += 1
                return
            if account['phone'] in self.error_accounts:
                print('miracle!!!!!!')
            self.success_login += 1
            self.source_cookies.append({
                'phone': account['phone'],
                'cookies': response.cookies
            })

    def login_thread(self):
        while self.account_for_login_queue.qsize() > 0:
            self.login_one_account()

    def refill(self):
        for cookie_unit in self.source_cookies:
            self.cookie_queue.put(cookie_unit)

    def refill_tasks(self):
        while not self.terminate:
            if self.cookie_queue.qsize() < self.cookie_queue_max_size:
                self.refill()
            else:
                time.sleep(1)

    def set_terminate(self):
        self.terminate = True
        self.proxy_pool.set_terminate()

    def is_available(self):
        return self.cookie_queue.qsize() > self.cookie_queue_min_size

    def get_cookie_unit(self):
        return self.cookie_queue.get()

    def load_accounts(self, filename):
        file = open(filename, 'r')
        for line in file:
            is_pass = False
            username = ''
            password = ''
            for word in line.split():
                if not is_pass:
                    username = word
                    is_pass = True
                else:
                    password = word
            self.insert_one_phone(username, password)

    def remove_cheat_source(self, phone):
        for cookie in self.source_cookies:
            if cookie['phone'] == phone:
                self.source_cookies.remove(cookie)