예제 #1
0
 def __init__(self):
     self.user = defaults.USER
     self.pass_ = defaults.PASS
     self.token = self._get_token()
     print(self.token)
     self.redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)
     self.bf_server = BloomFilterRedis(server=self.redis_server, key=defaults.BLOOM_KEY, blockNum=1)
     self.fp = open(full_data_file_name, 'w', encoding='utf-8')
예제 #2
0
class MiLiMaCrawl(object):  # 米粒验证码
    name = 'milima'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = defaults.USER
        self.pass_ = defaults.PASS
        self.token = self._get_token()
        print(self.token)
        self.bf_server = BloomFilterRedis(server=self.redis_server,
                                          key=defaults.BLOOM_KEY,
                                          blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')

    def _get_token(self):
        params = {
            'name': self.user,
            'psw': self.pass_,
        }
        login_url = API_URL + 'UserLoginStr'
        r = requests.get(login_url, params=params)
        token = r.content.decode().split('&')[0]
        return token

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'xmid': ItemId,  # 必填,项目需要先收藏
            'token': self.token,  # 必填
            'sl': '1',
            'lx': '0',
            'ks': '0',
            'a1': '',
            'a2': '',
            'pk': '',
            'rj': '',
        }
        get_phone_url = API_URL + 'GetHM2Str'
        r = requests.get(get_phone_url, params=params)
        return r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        params = {'token': self.token, 'hm': phone}
        release_url = API_URL + 'sfHmStr'
        r = requests.get(release_url, params=params)
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            # 取号
            phone_ = self.get_phone()
            record_msg('接码平台取号返回 -> %s' % phone_)

            # 账户异常退出
            res = utils.return_phone_error_check(phone_)
            if res[0]:
                record_msg('账户异常退出,返回 -> %s -> %s' % (phone_, res[1]))
                break

            phone_list = self._extract_phone(phone_)
            if phone_list:
                for phone in phone_list:
                    phone_dict = {}
                    phone_dict['phone'] = phone
                    phone_dict['source'] = MiLiMaCrawl.name
                    # print(phone_dict)
                    utils.update_phone_dict(phone_dict)
                    record_msg(str(phone_dict))
                    if not self.bf_server.is_exists(phone):
                        self.fp.write(str(phone_dict) + '\n')
                        self.fp.flush()
                    else:
                        record_msg('过滤了重复手机号码 -> %s' % phone_dict)

                    time.sleep(defaults.RELEASE_DELAY)

                    # 释放手机号码
                    res = self.release_url(phone)
                    record_msg('释放手机号 -> %s' % res)

            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        # self.fp.close()
        pass
예제 #3
0
class JieSuJieMaCrawl(object):  # YZ验证码
    name = 'jisujiema'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = defaults.USER
        self.pass_ = defaults.PASS
        # self.token = self._get_token()
        # print(self.token)
        # JieSuJieMaCrawl.redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)
        self.bf_server = BloomFilterRedis(server=self.redis_server,
                                          key=defaults.BLOOM_KEY,
                                          blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')

    def _get_token(self):
        params = {
            'name': self.user,
            'psw': self.pass_,
        }
        login_url = API_URL + 'UserLoginStr'
        r = requests.get(login_url, params=params)
        token = r.content.decode().split('&')[0]
        return token

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'xmid': ItemId,  # 必填,项目需要先收藏
            'token': self.token,  # 必填
            'sl': '1',
            'lx': '0',
            'ks': '0',
            'a1': '',
            'a2': '',
            'pk': '',
            'rj': '',
        }
        get_phone_url = API_URL + 'GetHM2Str'
        r = requests.get(get_phone_url, params=params)
        return r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        params = {'token': self.token, 'hm': phone}
        release_url = API_URL + 'sfHmStr'
        r = requests.get(release_url, params=params)
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        i = 1
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            print(os.getpid())

            # self.fp.write('fasfsa' + '\n')
            # self.fp.flush()
            i += 1
            if i > 3:
                break
            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        # self.fp.close()
        pass
예제 #4
0
class YunMaCrawl(object):  # 云码
    name = 'yunma'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = defaults.USER
        self.pass_ = defaults.PASS
        self.token = self._get_token()
        self.bf_server = BloomFilterRedis(server=self.redis_server,
                                          key=defaults.BLOOM_KEY,
                                          blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')

    def _get_token(self):
        params = {
            'uName': self.user,
            'pWord': self.pass_,
        }
        login_url = API_URL + 'Login'
        r = requests.get(login_url, params=params)
        token = r.content.decode().split('&')[0]
        return token

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'ItemId': itemId,  # 必填,项目需要先收藏
            'token': self.token,  # 必填
            'num': '1',  # 非必填,默认为 1
            'PhoneType': '0',  # 非必填,默认为0
        }
        get_phone_url = API_URL + 'getPhone'
        r = requests.get(get_phone_url, params=params)
        return r.content.decode('gbk')

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        release_url = API_URL + 'releasePhone?token=%s&phoneList=%s-%s;' \
                      % (self.token, phone, itemId)
        r = requests.get(release_url)
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            # 取号
            phone_ = self.get_phone()
            record_msg('接码平台取号返回 -> %s' % phone_)

            if 'Session 过期' in phone_:  # 解决过一段时间 Session 过期
                self.token = self._get_token()
                record_msg(phone_)

            # 账户异常退出
            res = utils.return_phone_error_check(phone_)
            if res[0]:
                record_msg('账户异常退出,返回 -> %s -> %s' % (phone_, res[1]))
                break

            phone_list = self._extract_phone(phone_)
            if phone_list:
                for phone in phone_list:
                    phone_dict = {}
                    phone_dict['phone'] = phone
                    phone_dict['source'] = YunMaCrawl.name
                    # print(phone_dict)
                    utils.update_phone_dict(phone_dict)
                    record_msg(str(phone_dict))
                    if not self.bf_server.is_exists(phone):
                        self.fp.write(str(phone_dict) + '\n')
                        self.fp.flush()
                    else:
                        record_msg('过滤了重复手机号码 -> %s' % phone_dict)

                    time.sleep(defaults.RELEASE_DELAY)

                    # 释放手机号码
                    res = self.release_url(phone)
                    record_msg('释放手机号 -> %s' % res)

            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        pass
예제 #5
0
class ShenHuaCrawl(object):
    name = 'shenhua'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = defaults.USER
        self.pass_ = defaults.PASS
        self.token = self._get_token()
        self.bf_server = BloomFilterRedis(server=self.redis_server, key=defaults.BLOOM_KEY, blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')


    def _get_token(self):
        params = {
            'uName': self.user,
            'pWord': self.pass_,
            'Developer': 'cvZpfUej8AVQSZPa31W5Lw%3d%3d'
        }
        login_url = 'http://api.shjmpt.com:9002/pubApi/uLogin?' \
                    + ''.join(['%s=%s&' % (k, y) for k, y in params.items()])
        r = requests.get(login_url)
        token = r.content.decode().split('&')[0]
        return token

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'ItemId': itemId,  # 必填,项目需要先收藏
            'token': self.token,  # 必填
            'num': '1',  # 非必填,默认为 1
        }
        get_phone_url = 'http://api.shjmpt.com:9002/pubApi/GetPhone?' \
                        + ''.join(['%s=%s&' % (k, y) for k, y in params.items()])

        r = requests.get(get_phone_url)
        return r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        release_url = 'http://api.shjmpt.com:9002/pubApi/ReleasePhone?token=%s&phoneList=%s-%s;' \
                      % (self.token, phone, itemId)
        r = requests.get(release_url)
        return r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def check_out(self):  # 平台要求登出账号
        logout_url = 'http://api.shjmpt.com:9002/pubApi/uExit?token=%s' % self.token
        r = requests.get(logout_url)
        # print(r.content.decode())
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            # 取号
            phone_ = self.get_phone()
            record_msg('接码平台取号返回 -> %s' % phone_)

            if 'Session 过期' in phone_:  # 解决过一段时间 Session 过期
                self.token = self._get_token()
                record_msg(phone_)

            # 账户异常退出
            res = utils.return_phone_error_check(phone_)
            if res[0]:
                record_msg('账户异常退出,返回 -> %s -> %s' % (phone_, res[1]))
                break

            phone_list = self._extract_phone(phone_)
            if phone_list:
                for phone in phone_list:
                    phone_dict = {}
                    phone_dict['phone'] = phone
                    phone_dict['source'] = 'shenhua'
                    # print(phone_dict)
                    utils.update_phone_dict(phone_dict)
                    record_msg(str(phone_dict))
                    if not self.bf_server.is_exists(phone):
                        self.fp.write(str(phone_dict) + '\n')
                        self.fp.flush()
                    else:
                        record_msg('过滤了重复手机号码 -> %s' % phone_dict)

                    time.sleep(defaults.RELEASE_DELAY)        

                    # 释放手机号码
                    res = self.release_url(phone)
                    record_msg('释放手机号 -> %s' % res)

            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        pass
예제 #6
0
class TheWolfCrawl(object):  # thewolf接码
    name = 'thewolf'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = USER[0]
        self.pass_ = PASS[0]
        self.req_session = requests.session()
        self.token = self._get_token()
        self.bf_server = BloomFilterRedis(server=self.redis_server,
                                          key=defaults.BLOOM_KEY,
                                          blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')

    def _get_token(self):
        params = {
            'backurl': 'http',
            'user': self.user,
            'steplogin': '******',
            'password': self.pass_,
            'remember': '1',
            'x': '42',
            'y': '16',
        }
        login_url = 'http://thewolf.yyyzmpt.com/reg.php?act=login'
        r = self.req_session.post(login_url, data=params)
        # token = r.json()
        print(r.content.decode())
        return r.cookies, r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'api': 'get_number',
            'tp': '1',
            't': '1',
            'pid': itemId,
        }
        get_phone_url = 'http://thewolf.yyyzmpt.com/api_getsms.php'
        r = self.req_session.post(get_phone_url, data=params)
        return str(re.findall(r'("qh_number":"\d{11}")', r.content.decode()))

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        params = {
            'api': 'complete_number',
            'get_number': phone,
            'tp': '1',
            'success': str(random.randint(7, 10)),
        }
        release_url = 'http://thewolf.yyyzmpt.com/api_getsms.php'
        r = self.req_session.post(release_url, data=params)
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            # 取号
            phone_ = self.get_phone()
            record_msg('接码平台取号返回 -> %s' % phone_)

            if 'Session 过期' in phone_:  # 解决过一段时间 Session 过期
                self.token = self._get_token()
                record_msg(phone_)

            # 账户异常退出
            res = utils.return_phone_error_check(phone_)
            if res[0]:
                record_msg('账户异常退出,返回 -> %s -> %s' % (phone_, res[1]))
                break

            phone_list = self._extract_phone(phone_)
            if phone_list:
                for phone in phone_list:
                    phone_dict = {}
                    phone_dict['phone'] = phone
                    phone_dict['source'] = TheWolfCrawl.name
                    # print(phone_dict)
                    utils.update_phone_dict(phone_dict)
                    record_msg(str(phone_dict))
                    if not self.bf_server.is_exists(phone):
                        self.fp.write(str(phone_dict) + '\n')
                        self.fp.flush()
                    else:
                        record_msg('过滤了重复手机号码 -> %s' % phone_dict)

                    time.sleep(defaults.RELEASE_DELAY)

                    # 释放手机号码
                    res = self.release_url(phone)
                    record_msg('释放手机号 -> %s' % res)

            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        pass
예제 #7
0
class Ma60Crawl(object):  # 60接码
    name = 'ma60'
    redis_server = bloom_filter_from_defaults(defaults.BLOOM_REDIS_URL)

    def __init__(self):
        self.user = defaults.USER
        self.pass_ = defaults.PASS
        self.token = self._get_token()
        self.bf_server = BloomFilterRedis(server=self.redis_server, key=defaults.BLOOM_KEY, blockNum=1)
        self.fp = open(full_data_file_name, 'w', encoding='utf-8')

    def _get_token(self):
        params = {
            'cmd': 'login',
            'encode': 'utf-8',
            'username': self.user,
            'password': hashlib.md5(self.pass_.encode()).hexdigest(),
            'dtype': 'json',
            # 'Developer': 'cvZpfUej8AVQSZPa31W5Lw%3d%3d'
        }
        login_url = API_URL + 'loginuser'
        r = requests.get(login_url, params=params).json()
        self.userID = r['Return']['UserID']
        # self.userKey = r['Return']['UserKey']
        return r['Return']['UserKey']

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def get_phone(self):
        params = {
            'cmd': 'gettelnum',
            'encode': 'utf-8',
            'dtype': 'json',
            'userid': self.userID,
            'userkey': self.token,
            'docks': itemId,

        }
        get_phone_url = API_URL + 'newsmssrv'
        r = requests.get(get_phone_url, params=params)
        return r.content.decode()

    @retry(stop_max_attempt_number=RETRY_TIMES)
    def release_url(self, phone):  # 释放手机号码
        params = {
            'cmd': 'freetelnum',
            'encode': 'utf-8',
            'dtype': 'json',
            'userid': self.userID,
            'userkey': self.token,
            'docks': itemId,
            'telnum': phone,

        }
        release_url = API_URL + 'newsmssrv'
        r = requests.get(release_url, params=params)
        return r.content.decode()

    def _extract_phone(self, raw):
        return re.findall(r'\d{11}', raw)

    @utils.need_save_pid_files(pid_files_path=full_PID_file_name)
    @utils.account_band_judge(server=redis_server, spider_name=spider_name)
    def run(self):
        global exit_signal
        while True:
            if exit_signal:  # 退出信号
                self.fp.close()  # 结束退出
                record_msg(' <- 使用signal退出')
                break

            # 取号
            phone_ = self.get_phone()
            record_msg('接码平台取号返回 -> %s' % phone_)

            # 账户异常退出
            res = utils.return_phone_error_check(phone_)
            if res[0]:
                record_msg('账户异常退出,返回 -> %s -> %s' % (phone_, res[1]))
                break

            phone_list = self._extract_phone(phone_)
            if phone_list:
                for phone in phone_list:
                    phone_dict = {}
                    phone_dict['phone'] = phone
                    phone_dict['source'] = Ma60Crawl.name
                    # print(phone_dict)
                    utils.update_phone_dict(phone_dict)
                    record_msg(str(phone_dict))
                    if not self.bf_server.is_exists(phone):
                        self.fp.write(str(phone_dict) + '\n')
                        self.fp.flush()
                    else:
                        record_msg('过滤了重复手机号码 -> %s' % phone_dict)

                    time.sleep(defaults.RELEASE_DELAY)        

                    # 释放手机号码
                    res = self.release_url(phone)
                    record_msg('释放手机号 -> %s' % res)

            time.sleep(defaults.GET_PHONE_DELAY)

    def __del__(self):
        pass