예제 #1
0
def decompose(company_id, hard=True):
    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)
    scs = list(
        conn.query(
            "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by source",
            company_id))
    conn.close()

    if len(scs) < 2:
        logger.info(
            "Company : %s has one active source company, no need decompose",
            company_id)
        return True

    fullName = company["fullName"]
    name = company["name"]
    description = company["description"]
    # init crawler
    beian_links_crawler = beian_links.BeianLinksCrawler()
    icp_chinaz_crawler = icp_chinaz.IcpchinazCrawler()
    screenshot_crawler = screenshot_website.phantomjsScreenshot()

    for sc in scs:

        company_info_expand.expand_source_company(sc["id"],
                                                  beian_links_crawler,
                                                  icp_chinaz_crawler,
                                                  screenshot_crawler)
        company_aggregator_dev.aggregator(sc)

    return True
예제 #2
0
def decompose(company_id, hard=True):
    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)
    scs = list(conn.query(
        "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by source",
        company_id))
    conn.close()

    if len(scs) < 2:
        logger.info("Company : %s has one active source company, no need decompose", company_id)
        return True

    fullName = company["fullName"]
    name = company["name"]
    description = company["description"]
    # init crawler
    beian_links_crawler = beian_links.BeianLinksCrawler()
    icp_chinaz_crawler = icp_chinaz.IcpchinazCrawler()
    screenshot_crawler = screenshot_website.phantomjsScreenshot()

    reserve_sc = None
    for sc in scs:
        logger.info("source company: %s, source: %s, sourceId: %s", sc["id"], sc["source"], sc["sourceId"])
        if sc["name"].strip() != "" and sc["name"] == name:
            # logger.info("Reserve source company: %s, %s for company: %s, %s", sc["id"], sc["name"], company["id"], company["name"])
            reserve_sc = sc
            break
            # update_column(company,sc)
            # delete_old_data(company_id)
            # company_info_expand.expand_source_company(sc["id"], beian_links_crawler, icp_chinaz_crawler,screenshot_crawler)
            # set_processStatus_zero(company_id, sc["id"])
            # company_aggregator.aggregator(sc)
            # return True
            #
    #Must find one sc for decompose

    # #if no source_company can match company
    # sc_ids = [str(sc["id"]) for sc in scs if sc.has_key("id")]
    # logger.info("Can not locate source companys (%s) for company: %s", sc_ids, company_id)
    # return False
    if reserve_sc is None:
        reserve_sc = scs[0]

    logger.info("Reserve source company: %s, %s for company: %s, %s", reserve_sc["id"], reserve_sc["name"], company["id"], company["name"])
    update_column(company,reserve_sc)
    delete_old_data(company_id)
    company_info_expand.expand_source_company(reserve_sc["id"], beian_links_crawler, icp_chinaz_crawler,screenshot_crawler)
    set_processStatus_zero(company_id, reserve_sc["id"], hard)

    for sc in scs:
        set_funding_processStatus(sc["id"])

    company_aggregator.aggregator(reserve_sc)
    return True
예제 #3
0
def expand(sourceCompanyId):
    company_info_expand.expand_source_company(sourceCompanyId,
                                              beian_links_crawler,
                                              icp_chinaz_crawler,
                                              screenshot_crawler,
                                              test=True)
예제 #4
0
        company_id = company["id"]
        conn = db.connect_torndb()
        scs = list(
            conn.query(
                "select * from source_company where (active is null or active='Y') and (source is not null and source != 13002 and (source < 13100 or source >= 13110)) and companyStatus!=2020 and companyId=%s order by id",
                company_id))
        if len(scs) > 1:
            conn = db.connect_torndb()
            #delete from test tables;
            clean_test_tables(test)
            #re-do aggregator for each source_company:
            ids = []
            for sc in scs:
                company_info_expand.expand_source_company(sc["id"],
                                                          beian_links_crawler,
                                                          icp_chinaz_crawler,
                                                          screenshot_crawler,
                                                          test=True)
                ids.append(str(sc["id"]))
            logger.info("Company: %s has %s source companies: %s", company_id,
                        len(scs), ",".join(ids))

            round_max = len(scs)
            round = 0

            sc0 = scs.pop(0)
            logger.info("Insert New company with source company: %s",
                        sc0["id"])
            company_aggregator.aggregator(sc0, test=True)

            while True: