Beispiel #1
0
def process():
    logger.info("itjuzi_news_parser begin...")
    items = parser_db_util.find_process(SOURCE, TYPE)
    for item in items:
        logger.info(item["key_int"])
        logger.info(item["url"])
        flag = parser(item)
        if flag:
            parser_db_util.update_processed(item["_id"])
        #break
    logger.info("itjuzi_news_parser end.")
def process():
    logger.info("itjuzi_investorfirm_parser begin...")
    items = parser_db_util.find_process(SOURCE, TYPE)
    for item in items:
        logger.info(item["key"])
        logger.info(item["url"])
        r = parser(item)
        if r is None:
            continue
        parser_db_util.save_investfirm(r, SOURCE, download_crawler)
        parser_db_util.update_processed(item["_id"])
    logger.info("itjuzi_investorfirm_parser end.")
Beispiel #3
0
def process():
    logger.info("Demo8_next_parser begin...")

    items = parser_db_util.find_process(SOURCE, TYPE)

    for item in items:
        logger.info(item["url"])
        r = parse_base(item)
        if r is None:
            continue
        #logger.info(r)
        source_company_id = parser_db_util.save_company(r, SOURCE)
        logger.info("source_company_id=%s", source_company_id)

        parser_db_util.save_company_score(source_company_id, r["score"])
        parser_db_util.save_artifacts(source_company_id, r["artifacts"])

        parser_db_util.update_processed(item["_id"])
        #break

    logger.info("Demo8_next_parser end.")
Beispiel #4
0
def process():
    logger.info("itjuzi_next_parser begin...")

    items = parser_db_util.find_process(SOURCE, TYPE)

    for item in items:
        logger.info(item["url"])

        r = parse_base(item)
        if r is None:
            continue
        #logger.info(r)
        source_company_id = parser_db_util.save_company(
            r, SOURCE, download_crawler)
        logger.info("source_company_id=%s", source_company_id)

        parser_db_util.save_company_score(source_company_id, r["score"])

        artifacts = []
        for artifact in r["artifacts"]:
            link = artifact["link"]
            type, app_market, app_id = url_helper.get_market(link)
            if type is None:
                continue
            if type == 4040 or type == 4050:
                if app_id is None:
                    continue
            artifact["type"] = type
            artifact["domain"] = app_id
            artifacts.append(artifact)

        parser_db_util.save_artifacts(source_company_id, artifacts)

        parser_db_util.update_processed(item["_id"])
        #break

    logger.info("itjuzi_next_parser end.")
Beispiel #5
0
def process():
    logger.info("36kr_next_parser begin...")

    items = parser_db_util.find_process(SOURCE, TYPE)

    for item in items:
        logger.info(item["url"])

        r = parse_base(item)
        if r is None:
            continue
        #logger.info(r)
        try:
            source_company_id = parser_db_util.save_company(r, SOURCE, download_crawler)
            logger.info("source_company_id=%s", source_company_id)

            parser_db_util.save_company_score(source_company_id, r["score"])
            parser_db_util.save_artifacts(source_company_id, r["artifacts"])

            parser_db_util.update_processed(item["_id"])
            #break
        except Exception,ex:
            logger.info(ex)
            continue
Beispiel #6
0
def process():
    logger.info("itjuzi_funding_parser2 begin...")

    items = parser_db_util.find_process(SOURCE, TYPE)
    # items = [parser_db_util.find_process_one(SOURCE, TYPE, 9551657)]

    for item in items:
        logger.info(item["url"])

        f = parse(item)
        if f is None:
            continue
        if f == -1:
            parser_db_util.update_processed(item["_id"])
            continue

        flag, source_funding_id = parser_db_util.save_funding(f, 13030)
        if flag:
            # pass
            parser_db_util.update_processed(item["_id"])

        # break
    logger.info("itjuzi_funding_parser2 end.")
    logger.info(nokeys)
def process():
    logger.info("fellowPlus_investor_parser begin...")
    items = parser_db_util.find_process(SOURCE, TYPE)
    # items = [parser_db_util.find_process_one_key(SOURCE, TYPE, "126_9")]
    for item in items:
        key = item["key"]
        info = parser(item)
        # break
        # collection_content = {
        #     "date":datetime.datetime.now(),
        #     "source":SOURCE,
        #     "type":TYPE,
        #     "url":item['url'],
        #     "key":key,
        #     "info":investor_info
        # }
        info["createTime"] = datetime.datetime.now()
        info["source"] = SOURCE
        info["type"] = TYPE
        info["url"] = item["url"]
        info["key"] = item["key"]

        parser_item = collection_investor.find_one({
            "source": SOURCE,
            "type": TYPE,
            "key": key
        })
        if parser_item is not None:
            collection_investor.delete_one({
                "source": SOURCE,
                "type": TYPE,
                "key": key
            })
        collection_investor.insert_one(info)

        # user_focus_field = investor_info['user_focus_field']
        # fields = user_focus_field.split('、')
        # for field in fields:
        #     tag_item =collection_field.find_one({'name': field})
        #     if tag_item is None:
        #         field_content = {'name': field, 'count': 1}
        #         collection_field.insert_one(field_content)
        #     else:
        #         tag_item['count'] = tag_item['count']+1
        #         _id = tag_item["_id"]
        #         collection_field.update_one({'_id':_id}, {"$set": tag_item})

        # org_name = info['org_name']
        # org_item = collection_org.find_one({'org_name': org_name})
        # users = []
        # user = {'name': info['name'], 'position': info['org_position']}
        # if org_item is None:
        #     users.append(user)
        #     org_content = {'org_name': org_name, 'users': users}
        #     collection_org.insert_one(org_content)
        # else:
        #     users = org_item['users']
        #     e_flag = False
        #     for u in users:
        #         if u["name"] == user["name"]: e_flag = True; break
        #     if e_flag is False:
        #         users.append(user)
        #         _id = org_item["_id"]
        #         collection_org.update_one({'_id':_id}, {"$set": org_item})

        # parser_db_util.update_processed(item["_id"])
        #break
    logger.info("fellowPlus_investor_parser end.")