def init_kafka(): global consumer_news_task, producer_news_task url = tsbconfig.get_kafka_config() # HashedPartitioner is default consumer_news_task = KafkaConsumer("task", group_id="news_task_post", bootstrap_servers=[url], auto_offset_reset='smallest') url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_news_task = SimpleProducer(kafka)
def makeup(): from kafka import KafkaClient, SimpleProducer from kafka.errors import FailedPayloadsError url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_track = SimpleProducer(kafka) db = dbcon.connect_torndb() for cm in db.query( 'select * from company_message where trackdimension=8001'): if not cm.relateId: continue fa = db.get('select * from company_fa where id=%s;', cm.relateId) if not fa.createTime: continue if cm.createTime > (fa.createTime + timedelta(days=15)): continue else: db.execute('update company_message set active="P" where id=%s;', cm.id) for im in db.query( 'select * from investor_message where trackdimension=8001'): if not im.relateId: continue fa = db.get('select * from company_fa where id=%s;', im.relateId) if not fa.createTime: continue if im.createTime > (fa.createTime + timedelta(days=15)): continue else: db.execute('update investor_message set active="P" where id=%s;', im.id)
def init_kafka(): global producer_tag url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_tag = SimpleProducer(kafka)
def initKafka(): global kafkaProducer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka)
def __init__(self): url = tsbconfig.get_kafka_config() self.consumer_alarm = KafkaConsumer("alarm", group_id="exception_catch", bootstrap_servers=[url], auto_offset_reset='smallest') self.configs = self.__load_config() self.reload_circle = 10
def generate_task_company_incremental(): tcg = TaskCompanyGenerator() tcg.logger.info('Task Company Generating') url = tsbconfig.get_kafka_config() consumer_tc = KafkaConsumer("task_company", group_id="tc_generation", bootstrap_servers=[url], auto_offset_reset='smallest') while True: try: tcg.logger.info('TCG restarting') for message in consumer_tc: locker = open( os.path.join( os.path.split(os.path.realpath(__file__))[0], 'tc.lock')) fcntl.flock(locker, fcntl.LOCK_EX) try: tcg.generate_tc(message.value) except Exception, e: tcg.logger.exception('Fail to process %s, %s' % (message, e)) finally: fcntl.flock(locker, fcntl.LOCK_UN) locker.close()
def initKafka(): global kafkaConsumer (url) = config.get_kafka_config() kafka = KafkaClient(url) kafkaConsumer = KafkaConsumer("coldcall", group_id="test", metadata_broker_list=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer, kafkaProducer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaConsumer = KafkaConsumer("send_verify_code", group_id="sms_send", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("aggregator_v2", group_id="process_company_update", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global consumer url = tsbconfig.get_kafka_config() # HashedPartitioner is default consumer = KafkaConsumer("task_company", group_id="test5", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer, kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("user_log", group_id="user_visit", bootstrap_servers=[url], auto_offset_reset='largest', enable_auto_commit=True)
def init_kafka(): global kafkaProducer, kafkaConsumer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka) kafkaConsumer = KafkaConsumer("test", group_id="test2", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) consumer_search = KafkaConsumer("keyword_v2", group_id="create search index new1", bootstrap_servers=[url], auto_offset_reset='smallest') return consumer_search
def init_kafka(index): global consumer_deal url = tsbconfig.get_kafka_config() consumer_deal = KafkaConsumer("track", group_id="deal search%s index" % index, bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("subscription", group_id="process_subscription", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=False)
def kafka_init(parser_name, group_name): (url) = config.get_kafka_config() kafka = KafkaClient(url) kafka_producer = SimpleProducer(kafka) kafka_consumer = KafkaConsumer(parser_name, group_id=group_name, metadata_broker_list=[url], auto_offset_reset='smallest') return kafka_producer, kafka_consumer
def initKafka(topic, group): global consumer # url = nlpconfig.get_kafka_config().get('url') # consumer = KafkaConsumer(topic, group_id="test", # metadata_broker_list=[url], auto_offset_reset='smallest') url = tsbconfig.get_kafka_config() consumer = KafkaConsumer(topic, group_id=group, bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global producer_tag, consumer_tag url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_tag = SimpleProducer(kafka) consumer_tag = KafkaConsumer("aggregator_v2", group_id="keyword_extract", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): (url) = config.get_kafka_config() # HashedPartitioner is default return KafkaConsumer("track_conf", group_id="run_track_conf", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=False, max_poll_records=1, session_timeout_ms=60000, request_timeout_ms=70000)
def clear_kafka(): url = tsbconfig.get_kafka_config() consumer_rec = KafkaConsumer("keyword_v2", group_id="company recommend", bootstrap_servers=[url], auto_offset_reset='smallest') for index, message in enumerate(consumer_rec): cid = json.loads(message.value).get('id') if int(cid) < 180000: consumer_rec.commit() if index % 1000 == 0: print index, message
def init_kafka(): global kafkaConsumer, kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("user_log", group_id="visit_stat", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=True) kafka = KafkaClient(url) kafkaProducer = SimpleProducer(kafka)
def initKafka(): global kafkaProducer global kafkaConsumer (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafkaProducer = SimpleProducer(kafka) kafkaConsumer = KafkaConsumer("parser_v2", group_id="beian", metadata_broker_list=[url], auto_offset_reset='smallest')
def init_kafka(index): global producer_search, consumer_search url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) producer_search = SimpleProducer(kafka) consumer_search = KafkaConsumer("keyword_v2", group_id="create search%s index" % index, bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global producer_coldcall global consumer_coldcall url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_coldcall = SimpleProducer(kafka) consumer_coldcall = KafkaConsumer("coldcall", group_id="coldcall incremental", metadata_broker_list=[url], auto_offset_reset='smallest')
def init_kafka(): global consumer_strack, producer_strack url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default producer_strack = SimpleProducer(kafka) consumer_strack = KafkaConsumer("track_message", group_id="funding_track", bootstrap_servers=[url], auto_offset_reset='smallest')
def init_kafka(): global kafkaConsumer global kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("track_message_v2", group_id="push_hot_news", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=False) kafka = KafkaClient(url) kafkaProducer = SimpleProducer(kafka)
def init_kafka(index): global consumer url = tsbconfig.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default consumer = SimpleProducer(kafka) consumer = KafkaConsumer("keyword_v2", group_id="create search%s index" % index, bootstrap_servers=[url], auto_offset_reset='smallest')
def parser_init(spider_name, msg_name): # logger loghelper.init_logger("parser(" + spider_name + ")", stream=True) logger = loghelper.get_logger("parser(" + spider_name + ")") (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafka_producer = SimpleProducer(kafka) kafka_consumer = KafkaConsumer(msg_name, group_id="parser_" + spider_name, metadata_broker_list=[url], auto_offset_reset='smallest') return logger, fromdb, kafka_producer, kafka_consumer
def init_kafka(): global kafkaConsumer global kafkaProducer (url) = config.get_kafka_config() # HashedPartitioner is default kafkaConsumer = KafkaConsumer("track_message_v2", group_id="process_topic_message", bootstrap_servers=[url], auto_offset_reset='smallest', enable_auto_commit=False, max_poll_records=5, session_timeout_ms=60000, request_timeout_ms=70000) kafka = KafkaClient(url) kafkaProducer = SimpleProducer(kafka)
def parser_news_init(spider_name, msg_name): # logger loghelper.init_logger("parser(" + spider_name + ")", stream=True) logger = loghelper.get_logger("parser(" + spider_name + ")") (url) = config.get_kafka_config() kafka = KafkaClient(url) # HashedPartitioner is default kafka_producer = SimpleProducer(kafka) kafka_consumer = KafkaConsumer(msg_name, group_id="parser_" + spider_name, metadata_broker_list=[url], auto_offset_reset='smallest') news_collection = mongo.parser_v2.direct_news news_collection.create_index([("source", pymongo.DESCENDING), ("news_key", pymongo.DESCENDING)], unique=True) return logger, fromdb, kafka_producer, kafka_consumer, news_collection