コード例 #1
0
ファイル: recruit_neitui_parser.py プロジェクト: yujiye/Codes
    source_artifact = {
        "sourceCompanyId": source_company_id,
        "name": name,
        "description": None,
        "link": link,
        "type": 4010
    }

    parser_util.insert_source_artifact(source_artifact)

    return source_company_id


if __name__ == '__main__':
    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init("recruit_neitui",
                                               "crawler_recruit_neitui")

    while True:
        try:
            for message in kafka_consumer:
                try:
                    # logger.info("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                    #                                            message.offset, message.key,
                    #                                            message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    job_key = msg["job_key"]

                    if type == "job":
                        parse_job(job_key)
コード例 #2
0
        source_company_member_rel = {
            "sourceCompanyId": source_company_id,
            "position": m.get("position"),
            "joinDate": None,
            "leaveDate": None,
            "type": type
        }

        parser_util.insert_source_member(source_member,
                                         source_company_member_rel)


if __name__ == '__main__':
    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init("kr36", "crawler_kr36_v2")

    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" %
                                (message.topic, message.partition,
                                 message.offset, message.key, message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    company_key = msg["company_key"]

                    if type == "company":
                        parse_company(company_key)
                except Exception, e:
コード例 #3
0
     news_collection) = parser_util.parser_news_init("toutiao", "toutiao")

    i = 0
    threads = []
    msgs = []
    while True:
        try:
            for message in kafka_consumer:
                try:
                    # logger.info("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                    #                                            message.offset, message.key,
                    #                                            message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    news_key = msg["news_key"]

                    if type == "direct_news":
                        parse_news(news_key)

                    # kafka_consumer.task_done(message)
                    # kafka_consumer.commit()
                except Exception, e:
                    logger.error(e)
                    traceback.print_exc()
        except Exception, e:
            logger.error(e)
            traceback.print_exc()
            time.sleep(60)
            (logger, fromdb, kafka_producer,
             kafka_consumer) = parser_util.parser_init("toutiao", "toutiao")
コード例 #4
0
ファイル: pencil_news_parser.py プロジェクト: yujiye/Codes
    i = 0
    threads = []
    msgs = []
    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" %
                                (message.topic, message.partition,
                                 message.offset, message.key, message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    news_key = msg["news_key"]

                    if type == "direct_news":
                        parse_news(news_key)

                    kafka_consumer.task_done(message)
                    kafka_consumer.commit()
                except Exception, e:
                    logger.error(e)
                    traceback.print_exc()
        except Exception, e:
            logger.error(e)
            traceback.print_exc()
            time.sleep(60)
            (logger, fromdb, kafka_producer,
             kafka_consumer) = parser_util.parser_init("pencil_news",
                                                       "pencil_news")
コード例 #5
0
            "salary": salary,
            "description": desc,
            "domain": domain,
            "locationId": location_id,
            "educationType": education_type,
            "workYearType": workYear_type,
            "startDate": born_time,
            "updateDate": update_time,
        }

        parser_util.insert_source_job(source_job)


if __name__ == '__main__':
    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init("recruit_jobtong",
                                               "crawler_recruit_jobtong")

    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" %
                                (message.topic, message.partition,
                                 message.offset, message.key, message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    company_key = msg["company_key"]

                    if type == "company":
                        parse_company(company_key)
コード例 #6
0
            'description': None,
            'link': link,
            'rank': rank,
            'sourceId': source_id,
            'type': 9030
        }

        parser_util.insert_source_cf_document(source_cf_document)

    msg = {"type": "cf", "id": source_cf_id}
    kafka_producer.send_messages("parser_v2", json.dumps(msg))


if __name__ == '__main__':
    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init("cf_36kr", "crawler_cf_36kr_v2")

    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" %
                                (message.topic, message.partition,
                                 message.offset, message.key, message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    cf_key = msg["cf_key"]

                    if type == "cf":
                        parse_cf(cf_key)
コード例 #7
0
ファイル: jd_parser.py プロジェクト: yujiye/Codes
def parse_patch(cf_key):
    item = fromdb.cf.find_one({"source": source, "cf_key": cf_key})
    if item is None:
        return

    desc = item["desc"]

    source_cf_id = parser_util.update_source_cf_desc(cf_key, desc)

    msg = {"type": "jd_patch", "id": source_cf_id}
    kafka_producer.send_messages("parser_v2", json.dumps(msg))


if __name__ == '__main__':
    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init("cf_jd", "crawler_cf_jd_v2")

    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" %
                                (message.topic, message.partition,
                                 message.offset, message.key, message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    cf_key = msg["cf_key"]

                    if type == "cf":
                        parse_cf(cf_key)
コード例 #8
0
ファイル: recruit_lagou_parser.py プロジェクト: yujiye/Codes
                    "position": position,
                    "salary": salary,
                    "description": None,
                    "domain": domain,
                    "locationId": location_id,
                    "educationType": education_type,
                    "workYearType": workYear_type,
                    "startDate": born_time,
                    "updateDate": update_time,
                    }

        parser_util.insert_source_job(source_job)


if __name__ == '__main__':
    (logger, fromdb, kafka_producer, kafka_consumer) = parser_util.parser_init("recruit_lagou", "crawler_recruit_lagou")

    i = 0
    threads = []
    msgs = []
    while True:
        try:
            for message in kafka_consumer:
                try:
                    logger.info("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                                                               message.offset, message.key,
                                                               message.value))
                    msg = json.loads(message.value)
                    type = msg["type"]
                    company_key = msg["company_key"]
コード例 #9
0
        os.path.split(os.path.realpath(__file__))[0], '../../../util'))
sys.path.append(
    os.path.join(
        os.path.split(os.path.realpath(__file__))[0], '../../support'))
import requests
import util
import parser_util

if __name__ == '__main__':
    if len(sys.argv) > 2:
        parser_name = sys.argv[1]
        msg_name = sys.argv[2]
    else:
        print "usage: /opt/py-env/bin/python consume_messages.py recruit_jobtong crawler_recruit_jobtong "
        exit(0)

    (logger, fromdb, kafka_producer,
     kafka_consumer) = parser_util.parser_init(parser_name, msg_name)

    for message in kafka_consumer:
        logger.info("%s:%d:%d: key=%s value=%s" %
                    (message.topic, message.partition, message.offset,
                     message.key, message.value))
        msg = json.loads(message.value)
        type = msg["type"]
        company_key = msg["company_key"]

        if type == "company":
            kafka_consumer.task_done(message)
            kafka_consumer.commit()