Ejemplo n.º 1
0
def init_kafka():

    global consumer_news_task, producer_news_task

    url = tsbconfig.get_kafka_config()
    # HashedPartitioner is default
    consumer_news_task = KafkaConsumer("task",
                                       group_id="news_task_post",
                                       bootstrap_servers=[url],
                                       auto_offset_reset='smallest')
    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_news_task = SimpleProducer(kafka)
Ejemplo n.º 2
0
def makeup():

    from kafka import KafkaClient, SimpleProducer
    from kafka.errors import FailedPayloadsError

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_track = SimpleProducer(kafka)

    db = dbcon.connect_torndb()
    for cm in db.query(
            'select * from company_message where trackdimension=8001'):
        if not cm.relateId:
            continue
        fa = db.get('select * from company_fa where id=%s;', cm.relateId)
        if not fa.createTime:
            continue
        if cm.createTime > (fa.createTime + timedelta(days=15)):
            continue
        else:
            db.execute('update company_message set active="P" where id=%s;',
                       cm.id)
    for im in db.query(
            'select * from investor_message where trackdimension=8001'):
        if not im.relateId:
            continue
        fa = db.get('select * from company_fa where id=%s;', im.relateId)
        if not fa.createTime:
            continue
        if im.createTime > (fa.createTime + timedelta(days=15)):
            continue
        else:
            db.execute('update investor_message set active="P" where id=%s;',
                       im.id)
Ejemplo n.º 3
0
def init_kafka():

    global producer_tag
    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_tag = SimpleProducer(kafka)
Ejemplo n.º 4
0
def initKafka():
    global kafkaProducer

    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaProducer = SimpleProducer(kafka)
Ejemplo n.º 5
0
    def __init__(self):

        url = tsbconfig.get_kafka_config()
        self.consumer_alarm = KafkaConsumer("alarm", group_id="exception_catch",
                                            bootstrap_servers=[url], auto_offset_reset='smallest')
        self.configs = self.__load_config()
        self.reload_circle = 10
Ejemplo n.º 6
0
def generate_task_company_incremental():

    tcg = TaskCompanyGenerator()
    tcg.logger.info('Task Company Generating')
    url = tsbconfig.get_kafka_config()
    consumer_tc = KafkaConsumer("task_company",
                                group_id="tc_generation",
                                bootstrap_servers=[url],
                                auto_offset_reset='smallest')
    while True:
        try:
            tcg.logger.info('TCG restarting')
            for message in consumer_tc:
                locker = open(
                    os.path.join(
                        os.path.split(os.path.realpath(__file__))[0],
                        'tc.lock'))
                fcntl.flock(locker, fcntl.LOCK_EX)
                try:
                    tcg.generate_tc(message.value)
                except Exception, e:
                    tcg.logger.exception('Fail to process %s, %s' %
                                         (message, e))
                finally:
                    fcntl.flock(locker, fcntl.LOCK_UN)
                    locker.close()
Ejemplo n.º 7
0
Archivo: test.py Proyecto: yujiye/Codes
def initKafka():
    global kafkaConsumer
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    kafkaConsumer = KafkaConsumer("coldcall",
                                  group_id="test",
                                  metadata_broker_list=[url],
                                  auto_offset_reset='smallest')
Ejemplo n.º 8
0
def init_kafka():
    global kafkaConsumer, kafkaProducer
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("send_verify_code", group_id="sms_send",
                bootstrap_servers=[url],
                auto_offset_reset='smallest')
Ejemplo n.º 9
0
def init_kafka():
    global kafkaConsumer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("aggregator_v2",
                                  group_id="process_company_update",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest')
Ejemplo n.º 10
0
def init_kafka():

    global consumer

    url = tsbconfig.get_kafka_config()
    # HashedPartitioner is default
    consumer = KafkaConsumer("task_company", group_id="test5",
                             bootstrap_servers=[url], auto_offset_reset='smallest')
Ejemplo n.º 11
0
def init_kafka():
    global kafkaConsumer, kafkaProducer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("user_log",
                                  group_id="user_visit",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='largest',
                                  enable_auto_commit=True)
Ejemplo n.º 12
0
def init_kafka():
    global kafkaProducer, kafkaConsumer
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaProducer = SimpleProducer(kafka)
    kafkaConsumer = KafkaConsumer("test", group_id="test2",
                bootstrap_servers=[url],
                auto_offset_reset='smallest')
Ejemplo n.º 13
0
def init_kafka():

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    consumer_search = KafkaConsumer("keyword_v2",
                                    group_id="create search index new1",
                                    bootstrap_servers=[url],
                                    auto_offset_reset='smallest')
    return consumer_search
Ejemplo n.º 14
0
Archivo: deal.py Proyecto: yujiye/Codes
def init_kafka(index):

    global consumer_deal

    url = tsbconfig.get_kafka_config()
    consumer_deal = KafkaConsumer("track",
                                  group_id="deal search%s index" % index,
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest')
Ejemplo n.º 15
0
def init_kafka():
    global kafkaConsumer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("subscription",
                                  group_id="process_subscription",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest',
                                  enable_auto_commit=False)
Ejemplo n.º 16
0
def kafka_init(parser_name, group_name):
    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    kafka_producer = SimpleProducer(kafka)
    kafka_consumer = KafkaConsumer(parser_name,
                                   group_id=group_name,
                                   metadata_broker_list=[url],
                                   auto_offset_reset='smallest')

    return kafka_producer, kafka_consumer
Ejemplo n.º 17
0
def initKafka(topic, group):

    global consumer

    # url = nlpconfig.get_kafka_config().get('url')
    # consumer = KafkaConsumer(topic, group_id="test",
    #                          metadata_broker_list=[url], auto_offset_reset='smallest')
    url = tsbconfig.get_kafka_config()
    consumer = KafkaConsumer(topic, group_id=group,
                             bootstrap_servers=[url], auto_offset_reset='smallest')
Ejemplo n.º 18
0
Archivo: key.py Proyecto: yujiye/Codes
def init_kafka():

    global producer_tag, consumer_tag

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_tag = SimpleProducer(kafka)
    consumer_tag = KafkaConsumer("aggregator_v2", group_id="keyword_extract",
                                 bootstrap_servers=[url], auto_offset_reset='smallest')
Ejemplo n.º 19
0
def init_kafka():
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    return KafkaConsumer("track_conf",
                         group_id="run_track_conf",
                         bootstrap_servers=[url],
                         auto_offset_reset='smallest',
                         enable_auto_commit=False,
                         max_poll_records=1,
                         session_timeout_ms=60000,
                         request_timeout_ms=70000)
Ejemplo n.º 20
0
def clear_kafka():

    url = tsbconfig.get_kafka_config()
    consumer_rec = KafkaConsumer("keyword_v2", group_id="company recommend",
                                 bootstrap_servers=[url], auto_offset_reset='smallest')
    for index, message in enumerate(consumer_rec):
        cid = json.loads(message.value).get('id')
        if int(cid) < 180000:
            consumer_rec.commit()
        if index % 1000 == 0:
            print index, message
Ejemplo n.º 21
0
def init_kafka():
    global kafkaConsumer, kafkaProducer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("user_log",
                                  group_id="visit_stat",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest',
                                  enable_auto_commit=True)
    kafka = KafkaClient(url)
    kafkaProducer = SimpleProducer(kafka)
Ejemplo n.º 22
0
def initKafka():
    global kafkaProducer
    global kafkaConsumer

    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafkaProducer = SimpleProducer(kafka)
    kafkaConsumer = KafkaConsumer("parser_v2", group_id="beian",
                metadata_broker_list=[url],
                auto_offset_reset='smallest')
Ejemplo n.º 23
0
def init_kafka(index):

    global producer_search, consumer_search

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    producer_search = SimpleProducer(kafka)
    consumer_search = KafkaConsumer("keyword_v2",
                                    group_id="create search%s index" % index,
                                    bootstrap_servers=[url],
                                    auto_offset_reset='smallest')
Ejemplo n.º 24
0
def init_kafka():

    global producer_coldcall
    global consumer_coldcall

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_coldcall = SimpleProducer(kafka)
    consumer_coldcall = KafkaConsumer("coldcall", group_id="coldcall incremental",
                                      metadata_broker_list=[url], auto_offset_reset='smallest')
Ejemplo n.º 25
0
def init_kafka():

    global consumer_strack, producer_strack

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    producer_strack = SimpleProducer(kafka)
    consumer_strack = KafkaConsumer("track_message",
                                    group_id="funding_track",
                                    bootstrap_servers=[url],
                                    auto_offset_reset='smallest')
Ejemplo n.º 26
0
def init_kafka():
    global kafkaConsumer
    global kafkaProducer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("track_message_v2",
                                  group_id="push_hot_news",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest',
                                  enable_auto_commit=False)
    kafka = KafkaClient(url)
    kafkaProducer = SimpleProducer(kafka)
Ejemplo n.º 27
0
def init_kafka(index):

    global consumer

    url = tsbconfig.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    consumer = SimpleProducer(kafka)
    consumer = KafkaConsumer("keyword_v2",
                             group_id="create search%s index" % index,
                             bootstrap_servers=[url],
                             auto_offset_reset='smallest')
Ejemplo n.º 28
0
def parser_init(spider_name, msg_name):
    # logger
    loghelper.init_logger("parser(" + spider_name + ")", stream=True)
    logger = loghelper.get_logger("parser(" + spider_name + ")")

    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafka_producer = SimpleProducer(kafka)
    kafka_consumer = KafkaConsumer(msg_name,
                                   group_id="parser_" + spider_name,
                                   metadata_broker_list=[url],
                                   auto_offset_reset='smallest')

    return logger, fromdb, kafka_producer, kafka_consumer
Ejemplo n.º 29
0
def init_kafka():
    global kafkaConsumer
    global kafkaProducer
    (url) = config.get_kafka_config()
    # HashedPartitioner is default
    kafkaConsumer = KafkaConsumer("track_message_v2",
                                  group_id="process_topic_message",
                                  bootstrap_servers=[url],
                                  auto_offset_reset='smallest',
                                  enable_auto_commit=False,
                                  max_poll_records=5,
                                  session_timeout_ms=60000,
                                  request_timeout_ms=70000)
    kafka = KafkaClient(url)
    kafkaProducer = SimpleProducer(kafka)
Ejemplo n.º 30
0
def parser_news_init(spider_name, msg_name):
    # logger
    loghelper.init_logger("parser(" + spider_name + ")", stream=True)
    logger = loghelper.get_logger("parser(" + spider_name + ")")

    (url) = config.get_kafka_config()
    kafka = KafkaClient(url)
    # HashedPartitioner is default
    kafka_producer = SimpleProducer(kafka)
    kafka_consumer = KafkaConsumer(msg_name,
                                   group_id="parser_" + spider_name,
                                   metadata_broker_list=[url],
                                   auto_offset_reset='smallest')

    news_collection = mongo.parser_v2.direct_news
    news_collection.create_index([("source", pymongo.DESCENDING),
                                  ("news_key", pymongo.DESCENDING)],
                                 unique=True)

    return logger, fromdb, kafka_producer, kafka_consumer, news_collection