Beispiel #1
0
 def get_uniqueid(self):
     return helper_crypto.get_md5_from_str(self._get_write_lines())
Beispiel #2
0
    def get_all_country(self):
        """
        下载所有城市的zip,解压并解析
        :return:
        """
        # 下载
        # widgets = [
        #     'Downloading allCountries.zip: ',
        #     progressbar.Bar(),
        #     ' ',
        #     progressbar.Counter(format='%(value)d Mb/%(max_value)d Mb'),
        # ]

        filename = self.tmpfile / 'allinfo.zip'
        url = 'http://download.geonames.org/export/dump/allCountries.zip'
        headers = {
            'Accept':
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
            'Accept-Encoding':
            "gzip, deflate",
            'Accept-Language':
            "zh-CN,zh;q=0.9,en;q=0.8",
            'Cache-Control':
            "no-cache",
            'Connection':
            "keep-alive",
            'Host':
            "download.geonames.org",
            'Pragma':
            "no-cache",
            'Referer':
            "http://download.geonames.org/export/dump/",
            'Upgrade-Insecure-Requests':
            "1",
            'User-Agent':
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
        }
        count = 0
        with requests.get(url, headers=headers, stream=True) as r:
            r.raise_for_status()
            total_length = math.ceil(
                int(r.headers.get('content-length')) / (1024 * 1024))
            with filename.open('wb') as f:
                # with progressbar.ProgressBar(max_value=total_length, widgets=widgets) as bar:
                for chunk in r.iter_content(chunk_size=1024 * 1024):
                    count += 1
                    # bar.update(count)
                    self._logger.info(
                        f'Downloading allCountries.zip: {count} Mb/ {total_length} Mb'
                    )
                    # self._logger.info(f'{count} times Downloaded 1Mb, and waiting...')
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
        self._logger.info('Download all city info zip success')
        # 解压
        self._logger.debug('Start unzip')
        zip_file = zipfile.ZipFile(filename, 'r')
        zip_file.extractall(self.tmpfile)
        zipinfo = zip_file.namelist()
        self._logger.debug(f'Get unzip file, name:{zipinfo[0]}')
        zip_file.close()
        # 并解析
        info_path = self.tmpfile / zipinfo[0]
        for g_data in self.__get_geoname(info_path):
            geo_dict = g_data.__dict__
            # 增量下载
            geostr = json.dumps(geo_dict, ensure_ascii=False)
            geo_md5 = helper_crypto.get_md5_from_str(geostr)
            # 判断数据是否重复,如果重复那么就不不输出数据,这样是用客户端分担了server的运算量
            if self.is_geodata_unique(geo_md5):
                continue
            # 数据不重复表示新数据,输出数据,保存数据的唯一标识
            self.write_text(geostr, self.suffix)
            self.store_geodata_unique(geo_md5)

        # 下载完成数据后删除文件,
        # 因为这个文件好像每个月都在更新,
        # 所以拿到数据后就去从新下载比较好
        # 最后清理下载的数据
        time.sleep(5)
        info_path.unlink()
        filename.unlink()
        self._logger.debug('Delete source zip file')
Beispiel #3
0
 def get_uniqueid(self):
     alllines = ""
     for lines in self._get_write_lines():
         alllines += lines
     return helper_crypto.get_md5_from_str(alllines)
Beispiel #4
0
    def _parse_one_ipwhois(self, ip: str, jcontent: dict,
                           reason) -> IPWhoisData:
        """parse one ipwhois, same as ipwhois history.content"""
        res: IPWhoisData = None
        try:
            if not isinstance(jcontent, dict):
                return res

            handle = jcontent.get("handle")
            ip_ver = jcontent.get("ipVersion")
            allocate_type = jcontent.get("type")
            netname = jcontent.get("name")
            country_code = jcontent.get("country")
            if country_code is None:
                # 整理因为修改了mmdb的数据库,所以会返回组织和运营商
                geo, org, isp = self._dbip.get_ip_mmdbinfo(1, ip)
                country_code = geo._country_code

            raw: str = json.dumps(jcontent)
            md5 = helper_crypto.get_md5_from_str(raw)

            # construct obj
            res = IPWhoisData(reason, md5, raw, handle, allocate_type, netname,
                              country_code, ip_ver)

            # last_modified
            jevents = jcontent.get("events")
            if not jevents is None and len(jevents) > 0:
                for je in jevents:
                    if je.__contains__("eventAction") and \
                            je.__contains__("eventDate"):
                        jea = je["eventAction"]
                        jval = je["eventDate"]
                        if jea == "last changed":
                            res.last_modified = jval
                        elif jea == "registration":
                            res.applicable_from = jval
                        else:
                            self._logger.warn(
                                "Unknown eventAction for ipwhois: ip={}, action={}, val={}"
                                    .format(ip, jea, jval))

            # remarks
            jremarks = jcontent.get("remarks")
            if not jremarks is None and len(jremarks) > 0:
                remarks = ''
                for jr in jremarks:
                    jdes = jr.get("description")
                    if jdes is None or len(jdes) < 1:
                        continue
                    for jd in jdes:
                        remarks += (jd + "\r\n")
                if not remarks is None and remarks != "":
                    res.remarks = remarks

            # cidrs
            jcidrs = jcontent.get("cidr0_cidrs")
            if not jcidrs is None and len(jcidrs) > 0:
                for jc in jcidrs:
                    k = None
                    if jc.__contains__("v4prefix"):
                        k = jc['v4prefix']
                    elif jc.__contains__("v6prefix"):
                        k = jc['v6prefix']
                    v = jc.get("length")
                    if v is None:
                        continue
                    res.set_cidrs("{}/{}".format(k, v))

            # entities
            jentity = jcontent.get("entities")
            if not jentity is None and len(jentity) > 0:
                for jen in jentity:
                    en = self._parse_entity(ip, jen)
                    if en is None:
                        continue
                    res.set_entity(en)

        except Exception:
            self._logger.debug(
                "Parse one ipwhois error: ip:{}, error: {}".format(
                    ip, traceback.format_exc()))
        return res
Beispiel #5
0
    def _check_registration(self):
        """
        查询手机号是否注册了百度贴吧
        :param account:
        :return:
        """
        t = time.strftime('%Y-%m-%d %H:%M:%S')
        ti = int(
            datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000)
        try:
            html = self._ha.getstring(
                'https://passport.baidu.com/v2/?reg&tpl=tb&u=//tieba.baidu.com',
                headers="""
Host: passport.baidu.com
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
""",
                encoding='utf-8')
            html.encoding = 'utf-8'
            # print(html.text)
            gid = 'DD9D1FD-752B-4AC4-9BE0-CB699316505D'
            gid = str(uuid.uuid1()).upper()[1:]
            gid = gid[:13] + '4' + gid[14:]
            js = """
            getUniqueId = function(e) {
                return e + Math.floor(2147483648 * Math.random()).toString(36)
            }"""
            ctx = execjs.compile(js)
            callback = ctx.call('getUniqueId', 'bd__cbs__')
            html = self._ha.getstring(
                f'https://passport.baidu.com/v2/api/?getapi&tpl=tb&apiver=v3&tt={ti}&class=regPhone&gid={gid}&app=&traceid=&callback={callback}',
                headers="""
Host: passport.baidu.com
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
Accept: */*
Referer: https://passport.baidu.com/v2/?reg&tpl=tb&u=//tieba.baidu.com
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
""")
            token = substring(html, '"token" : "', '"')
            #
            # js = """
            # function hex_md5(s) {
            #     return binl2hex(core_md5(str2binl(s), s.length * chrsz))
            # }
            # function get_moonshad(phone) {
            #     n = hex_md5(phone + "Moonshadow");
            #     n = n.replace(/o/, "ow").replace(/d/, "do").replace(/a/, "ad"),
            #     n = n.replace(/h/, "ha").replace(/s/, "sh").replace(/n/, "ns").replace(/m/, "mo"),
            #     return n
            # }
            # """
            # moon = execjs.compile(js)
            # moonshad = moon.call('get_moonshad', self.task.phone)
            moonshad = helper_crypto.get_md5_from_str(self.task.phone +
                                                      "Moonshadow")
            moonshad = re.sub(r'o', 'o~', moonshad, 1)
            moonshad = re.sub(r'd', 'd!', moonshad, 1)
            moonshad = re.sub(r'a', 'a@', moonshad, 1)
            moonshad = re.sub(r'h', 'h#', moonshad, 1)
            moonshad = re.sub(r's', 's$', moonshad, 1)
            moonshad = re.sub(r'n', 'n%', moonshad, 1)
            moonshad = re.sub(r'm', 'm^', moonshad, 1)
            moonshad = moonshad.replace('~', 'w').replace('!', 'o').replace(
                '@',
                'd').replace('#',
                             'a').replace('$',
                                          'h').replace('%',
                                                       's').replace('^', 'n')
            callback = ctx.call('getUniqueId', 'bd__cbs__')
            url = f"https://passport.baidu.com/v2/?regphonecheck&token={token}&tpl=tb&apiver=v3&tt={ti}&phone={self.task.phone}&moonshad={moonshad}&countrycode=&gid={gid}&exchange=0&isexchangeable=1&action=reg&traceid=&callback={callback}"
            headers = """
Host: passport.baidu.com
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36
Accept: */*
Referer: https://passport.baidu.com/v2/?reg&tpl=tb&u=//tieba.baidu.com
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9
"""
            response = self._ha.get_response(url, headers=headers)
            response.encoding = 'utf-8'
            # print(response.text)
            if '"400001"' in response.text:
                self._write_task_back(ECommandStatus.Succeed, 'Registered', t,
                                      EBackResult.Registerd)
            else:
                self._write_task_back(ECommandStatus.Succeed, 'Not Registered',
                                      t, EBackResult.UnRegisterd)
        except Exception:
            self._logger.error('Uber check registration fail: {}'.format(
                traceback.format_exc()))
            self._write_task_back(ECommandStatus.Failed,
                                  'Check registration fail', t,
                                  EBackResult.CheckRegisterdFail)
        return
Beispiel #6
0
 def get_uniqueid(self):
     return helper_crypto.get_md5_from_str("{}{}{}".format(
         self._apptype, self._userid, self._orderid))
Beispiel #7
0
 def get_uniqueid(self):
     return helper_crypto.get_md5_from_str("{}{}{}".format(
         self.resourceid, self._task.platform, self._url))
Beispiel #8
0
 def get_uniqueid(self):
     return helper_crypto.get_md5_from_str("{}{}{}".format(
         self._parentobj, self._task.platform, self.url))
Beispiel #9
0
 def get_uniqueid(self):
     return helper_crypto.get_md5_from_str("{}{}".format(
         self._userid, self._messageid))
Beispiel #10
0
    def _parse_chatlog(self, msg: map, threadtype: str, ownerid: str) -> iter:
        """接收 json解出来的map消息msg对象,返回ICHATLOG_ONE和RESOURCE对象迭代器\n
        msg:json解出来的map消息msg对象\n
        threadtype:一个json中解出来的字段,应该是表示会话类型"""
        try:
            if msg is None:
                self._logger.error(
                    "Invalid msg map object for parseing chat log: {}".format(
                        msg))
                return
            chattype: int = 0  # 0私聊,1群聊
            msgtype: str = None  # 图片视频等
            sendtime: str = None  # 发送时间
            if not msg.__contains__('message_sender') or not msg[
                    'message_sender'].__contains__(
                        'id') or not msg.__contains__('message_id'):
                return
            if not threadtype is None:
                if threadtype != "ONE_TO_ONE":
                    chattype = 1
                else:
                    chattype = 0
            # 消息类型
            if not msg.__contains__('__typename') or msg[
                    '__typename'] is None or msg['__typename'] == '':
                return
            msgtype = self._judge_message_type(msg['__typename'])
            # 发送时间戳
            timestamp_precise = None
            if msg.__contains__('timestamp_precise'):
                try:
                    tmp = msg['timestamp_precise']
                    tmp = int(tmp)
                    timestamp_precise = tmp
                    sendtime = helper_time.timespan_to_datestr(tmp)
                except Exception:
                    sendtime = helper_time.timespan_to_datestr(
                        helper_time.ts_since_1970())

            # 构建消息对象
            ctg = ICHATLOG_ONE(self.task, self._appcfg._apptype, self._userid,
                               msgtype, ownerid, chattype, msg['message_id'],
                               msg['message_sender']['id'], sendtime)
            ctg.remarks = timestamp_precise

            # 已读未读
            if msg.__contains__('unread'):
                if msg['unread'].strip().lower() == 'true':
                    ctg.isread = 0
                else:
                    ctg.isread = 1
            # 表情资源
            if msg.__contains__(
                    'sticker'
            ) and not msg['sticker'] is None and msg['sticker'].__contains__(
                    'url') and msg['sticker'].__contains__('label'):
                sjstk = msg['sticker']
                if sjstk.__contains__('url'):
                    url = sjstk['url'].replace('\\', '').rstrip()
                    rscid = helper_crypto.get_md5_from_str(url)
                    if sjstk.__contains__('id'):
                        rscid = sjstk['id']
                    for rsc in self._fetch_resources(url,
                                                     EResourceType.Picture,
                                                     rscid):
                        ctg.append_resource(rsc)
                        yield rsc
            # 片段,系统消息说明
            if msg.__contains__('snippet'):
                ctg.content += msg['snippet']
            # answered对方是否响应
            if msg.__contains__('answered'):
                if msg['answered'] == 'false':
                    ctg.answered = 0
                else:
                    ctg.answered = 1
            # blob_attachments
            if msg.__contains__('blob_attachments'
                                ) and not msg['blob_attachments'] is None:
                for blob in msg['blob_attachments']:
                    if not blob.__contains__('__typename'):
                        continue
                    # 拿附件url,附件类型/type
                    url, rsctype = self._get_attachments_type_and_url(blob)
                    if not isinstance(url, str) or url == "":
                        self._logger.warn(
                            "Get attachment url failed: {}".format(blob))
                        continue
                    rscid: str = None
                    if blob.__contains__('legacy_attachment_id'):
                        rscid = blob['legacy_attachment_id']
                    elif blob.__contains__('message_file_fbid'):
                        rscid = blob['message_file_fbid']
                    if not isinstance(rscid, str) or rscid == "":
                        rscid = helper_crypto.get_md5_from_str(url)
                    # 附件名
                    finame = None
                    if blob.__contains__('filename'):
                        finame = blob['filename']
                    # 下载
                    for rsc in self._fetch_resources(url, rsctype, rscid,
                                                     finame):
                        ctg.append_resource(rsc)
                        yield rsc
            if msg.__contains__(
                    'extensible_attachment'
            ) and not msg['extensible_attachment'] is None and msg[
                    'extensible_attachment'].__contains__(
                        'legacy_attachment_id'):
                resourceid = msg['extensible_attachment'][
                    'legacy_attachment_id']
                if msg['extensible_attachment'].__contains__("story_attachment") \
                        and msg['extensible_attachment']['story_attachment'].__contains__('media'):
                    jmedia = msg['extensible_attachment']['story_attachment'][
                        'media']

                    if jmedia.__contains__('is_playable') and jmedia[
                            'is_playable'] == 'true' and jmedia.__contains__(
                                'playable_url'):
                        url = jmedia['playable_url'].rstrip().replace(
                            '\\', '').rstrip()
                        for rsc in self._fetch_resources(
                                url, EResourceType.Video, resourceid):
                            ctg.append_resource(rsc)
                            yield rsc

                    if jmedia.__contains__('image') \
                            and jmedia['image'].__contains__('uri'):
                        url = jmedia['image']['uri'].rstrip().replace(
                            '\\', '').rstrip()

                        for rsc in self._fetch_resources(
                                url, EResourceType.Picture, resourceid):
                            ctg.append_resource(rsc)
                            yield rsc
            # message
            if msg.__contains__('message') and not msg['message'] is None:
                if msg['message'].__contains__('text'):
                    if not msg['message']['text'] is None and not msg[
                            'message']['text'] == '':
                        ctg.content += msg['message']['text']

            yield ctg

        except Exception:
            self._logger.error(
                "Parse one chatlog msg error:\nmsg:{}\nerror:{}".format(
                    msg, traceback.format_exc()))
Beispiel #11
0
 def get_uniqueid(self):
     """子类实现时,返回当前数据的唯一标识id,用于去重数据,和增量下载"""
     return helper_crypto.get_md5_from_str(self.get_write_lines())
Beispiel #12
0
 def get_uniqueid(self) -> str:
     return helper_crypto.get_md5_from_str("{}{}{}".format(
         self._userid, self._contactid, self._apptype))