Example #1
0
def main():
    source_db = MongDb(mongo_db_conf['host'],
                       mongo_db_conf['port'],
                       mongo_db_conf['db'],
                       mongo_db_conf['username'],
                       mongo_db_conf['password'],
                       log=log)

    count = 0
    total = 0
    already = 0
    with open(conf_name) as p_file:
        for line in p_file:
            total += 1
            company = line.strip("\n").strip("\r").strip(" ")
            item = source_db.find_one(table_name, {'company': company})
            if item is None:
                log.error("当前企业没有抓到: {company}".format(company=company))
                count += 1
            else:
                log.info("已抓到企业: {}".format(company))
                already += 1
        log.info("总共企业数目为: {}".format(total))
        log.info("当前已抓到的个数: {}".format(already))
        log.info("当前总共没有抓到企业数目为: {}".format(count))
Example #2
0
def search_task():
    log = Gsxtlogger('hunan.log').get_logger()
    mongo_db_conf = {
        'host': '172.16.215.16',
        'port': 40042,
        'db': 'app_data',
        'username': '******',
        'password': '******'
    }

    # 搜索列表存储表
    source_db = MongDb(mongo_db_conf['host'], mongo_db_conf['port'], mongo_db_conf['db'],
                       mongo_db_conf['username'],
                       mongo_db_conf['password'], log=log)

    for company in data_list:
        item = source_db.find_one('enterprise_data_gov', {'company': company})
        if item is None:
            log.error(company)
            continue

        if 'shareholder_information' not in item:
            log.warn(company)
            continue
Example #3
0
class YizhoucpCrawl(object):
    __START_URL = "https://api.myrightone.com/api/feed/moment-list"
    __LIKE_PID_URL = "https://api.myrightone.com/api/feed/like"

    __CRACK_SIGN_URL = "http://wx.zxiaoji.com/cp"

    __HOST = "api.myrightone.com"

    def __init__(self, secret_key, token, user_id, check_code, log):
        self.log = log
        self.secret_key = secret_key
        self.user_id = user_id
        self.token = token
        self.check_code = check_code
        self.request = self.__init_reqeust()
        self.cp_mongo = MongDb(LocalMongoConfig.HOST,
                               LocalMongoConfig.PORT,
                               LocalMongoConfig.DB,
                               LocalMongoConfig.USER,
                               LocalMongoConfig.PASSWD,
                               log=self.log)

        self.cp_table = "yizhou_cp"

    def __init_reqeust(self):
        headers = {
            "Host": self.__HOST,
            "App-Id": self.token.split("_")[0],
            "Platform": "ios",
            "Token": self.token,
            "User-Agent":
            "Right-iOS/3.33.2 (com.myrightone.datecha; build:224; iOS 12.1.2) Alamofire/4.8.0",
            "Accept": "*/*",
            "Accept-Encoding": "gzip;q=1.0, compress;q=0.5",
            "Accept-Language": "zh-Hans-CN;q=1.0, en-CN;q=0.9",
        }
        self.request = requests.Session()
        self.request.headers = headers
        return self.request

    def __get_sign(self, params):
        req = requests.get(self.__CRACK_SIGN_URL,
                           params={
                               "secret_key": self.secret_key,
                               "check_code": self.check_code,
                               "params": json.dumps(params)
                           },
                           timeout=30)
        req_json = req.json()
        if req_json.get("status") != 1:
            self.log.error("提取sign发生错误,错误原因是:")
            self.log.error(req_json.get("data"))
            return None
        return req_json.get("data")

    def get_moment_list(self):
        self.log.info("开始采集动态页")
        params = {
            "num": 20,
            "start": 0,
            "timestamp": int(time.time()),
            "type": "recommend",
            "user_id": self.user_id,
            "last_object_id": "",
        }

        sign = self.__get_sign(params)
        if not sign:
            return
        params["sign"] = sign
        resp = self.request.get(self.__START_URL,
                                params=params,
                                verify=False,
                                timeout=30)
        resp_json = resp.json()
        return resp_json

    def like_sex(self, post_data, sex=2, exclude_cp=True):
        """
        :param fid: 文章id
        :param sex: 性别
        :return:
        """

        is_cp = post_data.get('left_user', None)
        if exclude_cp and is_cp:
            self.log.info("过滤掉cp组")
            return False
        category = post_data.get("category")
        if category == "topic":
            self.log.info("过滤掉话题..")
            return False

        fid = post_data.get("fid")
        nick_name = post_data["user"].get("nickname")
        post_text = post_data["payload"].get("text")

        mongo_exists = self.__update_like_mongo(fid, nick_name, post_text)
        if mongo_exists == -1:
            self.log.info("之前已对这条数据点过赞了,跳过...")
            return False

        raw_sex = post_data["user"].get('sex')

        if raw_sex == sex:
            fid_params = {
                "cancel": "0",
                "fid": fid,
                "timestamp": "0",
                "user_id": self.user_id,
            }
            sign = self.__get_sign(fid_params)
            if not sign:
                return False
            fid_params["sign"] = sign
            resp = self.request.get(self.__LIKE_PID_URL,
                                    params=fid_params,
                                    verify=False,
                                    timeout=30)
            resp_json = resp.json()
            if resp_json.get("message") == "success":
                nick_name = post_data["user"].get("nickname")
                post_text = post_data["payload"].get("text")
                self.log.info("给用户({})发布的【{}】点赞成功".format(
                    nick_name, post_text))
                return True

    def start(self, *args, **kwargs):
        count = 0
        like_count = 0
        while True:
            count += 1
            moment_data = self.get_moment_list()
            like_count_batch = 0
            for per_post in moment_data["data"]["list"]:
                like_succeed = self.like_sex(per_post)
                if like_succeed:
                    like_count_batch += 1
                    like_count += 1
                time.sleep(random.randint(1, 2))
                if like_count % 100 == 0:
                    self.log.info("当前已经对 {} 位小姐姐点过赞了...".format(like_count))
            self.log.info("当前已经遍历了第 {} 次动态".format(count))
            time.sleep(
                random.randint(7 * like_count_batch, 10 * like_count_batch))
            now = datetime.datetime.now()
            if now.hour in range(2, 6):
                time.sleep(random.randint(3600, 4000))

    def __update_like_mongo(self, fid, nick_name, post_text):
        exist_data = self.cp_mongo.find_one(self.cp_table, {"_id": fid})
        if exist_data:
            self.log.info(">>>找到相同的数据啦...")
            count = exist_data['count']
            count += 1
            exist_data.update({"count": count})
            self.cp_mongo.insert_batch_data(self.cp_table, [exist_data])
            return -1
        new_data = {
            "_id": fid,
            "nick_name": nick_name,
            "post_text": post_text,
            "count": 1
        }
        self.cp_mongo.insert_batch_data(self.cp_table, [new_data], insert=True)
        return 1