Ejemplo n.º 1
0
def aggregator(source_company, test=False):
    sc = source_company
    source_company_id = sc["id"]

    #find company_id
    if not test:
        company_id = sc["companyId"]
        if company_id is not None:
            logger.info("sourceCompanyId=%s has been merged before.",
                        source_company_id)
        else:
            if sc["aggregateGrade"] == 1:
                company_id = find_company.find_company_grade2(sc)
            elif sc["source"] in [13120]:
                company_id = find_company.find_reference(sc)
            elif sc["source"] in [13400, 13401, 13402]:
                company_id = find_company.find_company_grade2(sc)
            else:
                company_id = find_company.find_company_new(sc)
    else:
        #test
        # company_id = find_company.find_company(sc, test)
        return

    logger.info("matched company_id=%s", company_id)

    #merge company base info
    if company_id is None:
        if source_company['source'] == 13120:
            logger.info("reference")

        else:
            logger.info("sourceCompanyId=%s is a new company",
                        source_company_id)
            company_id = company_aggregator_baseinfo.create_company(sc, test)
            logger.info("new company_id %s", company_id)

        if company_id is None:
            if not test:
                set_sourcecompany_processstatus(sc["id"])
            return
        else:
            if source_company["source"] not in [13099, 13050]:
                send_message_task(company_id, "company_newcover",
                                  source_company["source"])
            elif source_company["source"] in [13099]:
                send_message_task(company_id, "company_create",
                                  source_company["source"])

    else:
        if source_company['source'] == 13120:
            aggregator_db_util.update_source_company_found(
                company_id, source_company_id)
            set_sourcecompany_processstatus(sc["id"])
            return
        logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"],
                    company_id)
        if not test:
            aggregator_db_util.update_source_company_found(
                company_id, source_company_id)
            company_aggregator_baseinfo.aggregate(
                company_id,
                source_company_id,
            )

    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)
    if company["corporateId"] is not None:
        corporate = conn.get("select * from corporate where id=%s",
                             company["corporateId"])
    else:
        corporate = None
    conn.close()
    if company["modifyUser"] is not None and company[
            "active"] is not None and company["active"] != 'P':
        logger.info("company %s modified", company["id"])
        company_aggregator_artifact.aggregate_artifact(company_id,
                                                       source_company_id, test)
        company_aggregator_member.aggregate_member(company_id,
                                                   source_company_id, test)
        # set_sourcecompany_processstatus(sc["id"])
        # return

    else:
        #merge others
        company_aggregator_artifact.aggregate_artifact(company_id,
                                                       source_company_id, test)
        company_aggregator_member.aggregate_member(company_id,
                                                   source_company_id, test)
        # company_aggregator_funding.aggregate_funding(company_id, sc, test)
        if not test:
            company_aggregator_footprint.aggregate_footprint(
                company_id, source_company_id)
            #company_aggregator_job.aggregate_job(company_id, source_company_id)
        #news ITjuzi news parser直接聚合, toutian单独聚合

        company_aggregator_baseinfo.add_company_alias(company_id,
                                                      source_company_id, test)

    if not test:
        # conn = db.connect_torndb()
        # company = conn.get("select * from company where id=%s", company_id)
        # conn.close()

        if company["verify"] == "Y" or (company["modifyUser"] is not None
                                        and company["active"] is not None
                                        and company["active"] != 'P'):

            pass
        else:
            company_aggregator_baseinfo.patch_company_establish_date(
                company_id)
            company_aggregator_baseinfo.patch_company_location(company_id)
            company_aggregator_baseinfo.patch_company_fullname(company_id)
            company_aggregator_baseinfo.patch_company_status(company_id)
            company_aggregator_baseinfo.patch_website(company_id)
            company_aggregator_baseinfo.patch_logo(company_id)
            company_aggregator_baseinfo.patch_should_index(company_id)

        if corporate is not None and corporate["modifyUser"] is not None and \
                 corporate["active"] is not None and corporate["active"] != 'P':
            logger.info("corporate %s modified", corporate["id"])
            if source_company["source"] in [13400, 13401, 13402, 13030, 13022]:
                # add corporate_alias
                corporate_aggregator.add_corporate_alias(
                    source_company_id, company_id, corporate["id"])
        else:
            # insert company_corporate or update company_corporate
            corporate_id = corporate_aggregator.update_corporate(company_id)
            # add corporate_alias
            corporate_aggregator.add_corporate_alias(source_company_id,
                                                     company_id, corporate_id)
            # add funding corporateId
            corporate_aggregator.add_funding_corporateId(
                company_id, corporate_id)
            # set corporateId
            corporate_aggregator.set_corporateId(company_id, corporate_id)
            company_aggregator_baseinfo.patch_corporate_fullname(corporate_id)

        #double add company fullName
        if company["verify"] == "Y" or (company["modifyUser"] is not None
                                        and company["active"] is not None
                                        and company["active"] != 'P'):
            pass
        else:
            company_aggregator_baseinfo.patch_company_fullname(company_id)
            # company_aggregator_baseinfo.patch_corporate_fullname(corporate_id)
        set_sourcecompany_processstatus(sc["id"])
        if sc["source"] in [13050]:
            mongo = db.connect_mongo()
            collection_company = mongo.job.company
            collection_company.update_one(
                {
                    "source": sc["source"],
                    "sourceId": {
                        "$in": [str(sc["sourceId"]),
                                int(sc["sourceId"])]
                    }
                }, {"$set": {
                    "mapChecked": None
                }})
            mongo.close()
Ejemplo n.º 2
0
import pymongo

reload(sys)
sys.setdefaultencoding("utf-8")
sys.path.append(
    os.path.join(
        os.path.split(os.path.realpath(__file__))[0], '../../../util'))
sys.path.append(
    os.path.join(
        os.path.split(os.path.realpath(__file__))[0], '../../support'))
import loghelper
import db

import company_aggregator_baseinfo

#logger
loghelper.init_logger("repair_company_status", stream=True)
logger = loghelper.get_logger("repair_company_status")

if __name__ == '__main__':
    logger.info("Begin...")
    conn = db.connect_torndb()
    cs = conn.query("select id,code,name from company")
    conn.close()

    for c in cs:
        logger.info("id: %s, code: %s, name: %s", c["id"], c["code"],
                    c["name"])
        company_aggregator_baseinfo.patch_company_status(c["id"])

    logger.info("End.")
Ejemplo n.º 3
0
def aggregator(source_company, test=False):
    sc = source_company
    source_company_id = sc["id"]

    #find company_id
    if not test:
        company_id = sc["companyId"]
        if company_id is not None:
            logger.info("sourceCompanyId=%s has been merged before.", source_company_id)
        else:
            if sc["aggregateGrade"] == 1:
                company_id = find_company.find_company_grade1(sc)
            else:
                company_id = find_company.find_company(sc)
    else:
        #test
        company_id = find_company.find_company(sc, test)

    logger.info("matched company_id=%s", company_id)

    #merge company base info
    if company_id is None:
        logger.info("sourceCompanyId=%s is a new company", source_company_id)
        company_id  = company_aggregator_baseinfo.create_company(sc, test)
        logger.info("new company_id %s", company_id)
        if company_id is None:
            if not test:
                set_sourcecompany_processstatus(sc["id"])
            return
    else:
        logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id)
        if not test:
            aggregator_db_util.update_source_company_found(company_id, source_company_id)
            company_aggregator_baseinfo.aggregate(company_id, source_company_id,)

    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)
    if company["corporateId"] is not None:
        corporate = conn.get("select * from corporate where id=%s", company["corporateId"])
    else:
        corporate = None
    conn.close()

    # merge others
    company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test)
    company_aggregator_member.aggregate_member(company_id, source_company_id, test)

    if company["modifyUser"] is not None:
        logger.info("company %s modified", company["id"])
        set_sourcecompany_processstatus(sc["id"])
        return

    # company_aggregator_funding.aggregate_funding(company_id, sc, test)
    if not test:
        company_aggregator_footprint.aggregate_footprint(company_id, source_company_id)
        #company_aggregator_job.aggregate_job(company_id, source_company_id)
    #news ITjuzi news parser直接聚合, toutian单独聚合

    company_aggregator_baseinfo.add_company_alias(company_id, source_company_id, test)

    if not test:
        # conn = db.connect_torndb()
        # company = conn.get("select * from company where id=%s", company_id)
        # conn.close()

        if company["verify"] == "Y":
            pass
        else:
            company_aggregator_baseinfo.patch_company_establish_date(company_id)
            company_aggregator_baseinfo.patch_company_location(company_id)
            company_aggregator_baseinfo.patch_company_fullname(company_id)
            company_aggregator_baseinfo.patch_company_status(company_id)
            company_aggregator_baseinfo.patch_website(company_id)
            company_aggregator_baseinfo.patch_logo(company_id)
            company_aggregator_baseinfo.patch_should_index(company_id)


        if corporate is not None and corporate["modifyUser"] is not None:
            logger.info("corporate %s modified", corporate["id"])
        else:
            # insert company_corporate or update company_corporate
            corporate_id = corporate_aggregator.update_corporate(company_id)
            # add corporate_alias
            corporate_aggregator.add_corporate_alias(company_id, corporate_id)
            # add funding corporateId
            corporate_aggregator.add_funding_corporateId(company_id, corporate_id)
            # set corporateId
            corporate_aggregator.set_corporateId(company_id,corporate_id)

        set_sourcecompany_processstatus(sc["id"])
        send_message(company_id,"create")
Ejemplo n.º 4
0
def aggregator(source_company, test=False, idmax=0):
    sc = source_company
    source_company_id = sc["id"]


    company_ids = find_company.find_company_grade2(sc, idmax)

    if len(company_ids)  == 0:
        company_id = None
    else:
        company_id = None
        for cid in company_ids:
            if int(cid)> idmax:
                company_id = cid
                break

    logger.info("matched company_id=%s", company_id)

    #merge company base info
    if company_id is None:
        logger.info("sourceCompanyId=%s is a new company", source_company_id)
        company_id  = company_aggregator_baseinfo.create_company_dev(sc, test)
        logger.info("new company_id %s", company_id)
        if company_id is None:
            exit()
        else:
            if source_company["source"] not in [13020,13030]:
                # send_message_task(company_id,"company_newcover",source_company["source"])
                pass

    else:
        logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id)
        if not test:
            # aggregator_db_util.update_source_company_found(company_id, source_company_id)
            company_aggregator_baseinfo.aggregate(company_id, source_company_id,)

    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)
    if company["corporateId"] is not None:
        corporate = conn.get("select * from corporate where id=%s", company["corporateId"])
    else:
        corporate = None
    conn.close()
    if company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P':
        logger.info("company %s modified", company["id"])
        # set_sourcecompany_processstatus(sc["id"])
        # return

    else:
        #merge others
        company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test)
        company_aggregator_member.aggregate_member(company_id, source_company_id, test)
        # company_aggregator_funding.aggregate_funding(company_id, sc, test)
        if not test:
            company_aggregator_footprint.aggregate_footprint(company_id, source_company_id)
            #company_aggregator_job.aggregate_job(company_id, source_company_id)
        #news ITjuzi news parser直接聚合, toutian单独聚合

        company_aggregator_baseinfo.add_company_alias(company_id, source_company_id, test)

    if not test:
        # conn = db.connect_torndb()
        # company = conn.get("select * from company where id=%s", company_id)
        # conn.close()

        if company["verify"] == "Y" or (
                    company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'):
            pass
        else:
            company_aggregator_baseinfo.patch_company_establish_date(company_id)
            company_aggregator_baseinfo.patch_company_location(company_id)
            company_aggregator_baseinfo.patch_company_fullname(company_id)
            company_aggregator_baseinfo.patch_company_status(company_id)
            company_aggregator_baseinfo.patch_website(company_id)
            company_aggregator_baseinfo.patch_logo(company_id)
            company_aggregator_baseinfo.patch_should_index(company_id)


        if corporate is not None and corporate["modifyUser"] is not None and \
                        corporate["active"] is not None and corporate["active"] != 'P':
            logger.info("corporate %s modified", corporate["id"])
        else:
            # insert company_corporate or update company_corporate
            corporate_id = corporate_aggregator.update_corporate(company_id)
            # add corporate_alias
            corporate_aggregator.add_corporate_alias(source_company_id, company_id, corporate_id)
            # add funding corporateId
            # corporate_aggregator.add_funding_corporateId(company_id, corporate_id)
            # set corporateId
            corporate_aggregator.set_corporateId(company_id,corporate_id)
            company_aggregator_baseinfo.patch_corporate_fullname(corporate_id)

        #double add company fullName
        if company["verify"] == "Y" or (
                    company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'):
            pass
        else:
            company_aggregator_baseinfo.patch_company_fullname(company_id)
            # company_aggregator_baseinfo.patch_corporate_fullname(corporate_id)
        set_sourcecompany_processstatus(sc["id"])
        # send_message(company_id,"create")
        return company_id
Ejemplo n.º 5
0
def aggregator(source_company, test=False):
    sc = source_company
    source_company_id = sc["id"]

    #find company_id
    if not test:
        company_id = sc["companyId"]
        if company_id is not None:
            logger.info("sourceCompanyId=%s has been merged before.", source_company_id)
        else:
            if sc["aggregateGrade"] == 1:
                company_id = find_company.find_company_grade2(sc)
            elif sc["source"] in [13120]:
                company_id = find_company.find_reference(sc)
            elif sc["source"] in [13400,13401,13402]:
                company_id = find_company.find_company_grade2(sc)
            else:
                company_id = find_company.find_company_new(sc)
    else:
        #test
        # company_id = find_company.find_company(sc, test)
        return

    logger.info("matched company_id=%s", company_id)

    #merge company base info

    if company_id is None:

        #qimingpian
        if source_company['source'] == 13120:
            logger.info("reference")

        else:
            logger.info("sourceCompanyId=%s is a new company", source_company_id)
            #create new company and new corporate here with new structure
            # company_id = company_aggregator_baseinfo.create_company(sc, test)
            company_id = company_aggregator_baseinfo.create_company_new(sc, test)
            logger.info("new company_id %s", company_id)

        if company_id is None:
            if not test:
                set_sourcecompany_processstatus(sc["id"])
            #source=13120 return
            return
        else:
            #
            if source_company["source"] not in [13099, 13050, 13055, 13121, 13130]:
                send_message_task(company_id,"company_newcover",source_company["source"])
            elif source_company["source"] in [13130]:
                send_message_task(company_id, "company_create", source_company["source"])
            elif source_company["source"] in [13099]:
                send_message_task(company_id, "gongshang_create_online", source_company["source"])

    else:
        # 13130 crunchbase 13120 qimingpianI(only have short name)
        if source_company['source'] in [13120, 13130]:
            aggregator_db_util.update_source_company_found(company_id, source_company_id)
            set_sourcecompany_processstatus(sc["id"])
            # re check for operationTeam
            if source_company["source"] == 13130:
                # victor do not send task for 'P' company
                conn = db.connect_torndb()
                company = conn.get("select * from company where id=%s", company_id)
                if company["active"] in ["P"]:
                    conn.update("update company set active='A' where id=%s",company_id)
                conn.close()
                send_message_task(company_id, "company_funding", source_company["source"])
            return
        logger.info("sourceCompanyId=%s was found, companyId=%s", sc["id"], company_id)
        if not test:
            aggregator_db_util.update_source_company_found(company_id, source_company_id)
            company_aggregator_baseinfo.aggregate(company_id, source_company_id,)

    conn = db.connect_torndb()
    company = conn.get("select * from company where id=%s", company_id)

    ##check corporate
    if company is None:
        logger.info("company: %s not existed", company_id)
        exit()

    corporate = None
    if company["corporateId"] is not None:
        corporate = conn.get("select * from corporate where id=%s", company["corporateId"])
        if corporate is None:
            logger.info("company:%s|%s has no corporate,please check", company["name"], company["id"])
            exit()

    else:
        logger.info("company:%s|%s has no corporate,please check", company["name"], company["id"])
        exit()

    conn.close()


    if company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P':
        logger.info("company %s modified", company["id"])
        company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test)
        company_aggregator_member.aggregate_member(company_id, source_company_id, test)

    else:
        #merge others
        company_aggregator_artifact.aggregate_artifact(company_id, source_company_id, test)
        company_aggregator_member.aggregate_member(company_id, source_company_id, test)

        if not test:
            company_aggregator_footprint.aggregate_footprint(company_id, source_company_id)

        #new add_company_alias without type
        company_aggregator_baseinfo.add_company_alias_new(company_id, source_company_id, test)

    if not test:
        # conn = db.connect_torndb()
        # company = conn.get("select * from company where id=%s", company_id)
        # conn.close()

        if company["verify"] == "Y" or (
                    company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'):

            pass
        else:
            # company_aggregator_baseinfo.patch_company_establish_date(company_id)
            # company_aggregator_baseinfo.patch_company_location(company_id)
            # company_aggregator_baseinfo.patch_company_fullname(company_id)
            company_aggregator_baseinfo.patch_company_status(company_id)
            company_aggregator_baseinfo.patch_website(company_id)
            company_aggregator_baseinfo.patch_logo(company_id)
            # company_aggregator_baseinfo.patch_should_index(company_id)

        if corporate is not None and corporate["modifyUser"] is not None and \
                 corporate["active"] is not None and corporate["active"] != 'P':
            logger.info("corporate %s modified", corporate["id"])
            if source_company["source"] in [13400, 13401, 13402, 13030, 13022]:
                # add corporate_alias
                # new add_corporate_alias without type
                corporate_aggregator.add_corporate_alias_new(source_company_id, company_id, corporate["id"])
        else:

            # add corporate_alias without type
            corporate_aggregator.add_corporate_alias_new(source_company_id, company_id, corporate["id"])
            # add funding corporateId
            corporate_aggregator.add_funding_corporateId(company_id, corporate["id"])
            # set corporateId
            # corporate_aggregator.set_corporateId(company_id,corporate_id)
            company_aggregator_baseinfo.patch_corporate_fullname_new(corporate["id"])
            company_aggregator_baseinfo.patch_corporate_establish_date(corporate["id"])
            company_aggregator_baseinfo.patch_corporate_location(corporate["id"])

        #double add company fullName
        if company["verify"] == "Y" or (
                    company["modifyUser"] is not None and company["active"] is not None and company["active"] != 'P'):
            pass
        else:
            #no need
            pass
            # company_aggregator_baseinfo.patch_company_fullname(company_id)

        set_sourcecompany_processstatus(sc["id"])
        if sc["source"] in [13050]:
            mongo = db.connect_mongo()
            collection_company = mongo.job.company
            collection_company.update_one({"source": sc["source"],
                                           "sourceId":{"$in":[str(sc["sourceId"]),int(sc["sourceId"])]}},
                                          {"$set": {"mapChecked": None}})
            mongo.close()
        send_message(company_id,"create")