def fetch(self, **kwargs): """Fetch resource from database""" def extract_x(obj): if not obj.get('description'): return val = obj.get('description') if val and isinstance(val, Binary): val = gzip.decompress(val.value) in_x.append(val) iid.append(obj.get('entity_id')) config = get_config() chunksize = int( getattr(config, 'producer_chunksize', Default.PRODUCER_CHUNKSIZE)) brk = get_brk(OrganizationTable.ID) gen = brk._iscan(chunksize=chunksize, \ filter_expr=Attr('data_type').eq(OrganizationTable.DT.value)) for chunk in gen: if not chunk: continue chunk = chunk.get('Items') iid, in_x = [], [] list(map(extract_x, chunk)) if iid and in_x: self.total += len(in_x) print(self, self.total) yield { 'id': iid, 'input_x': in_x, }
def fetch(self, **kwargs): """Fetch resource from database""" def extract_x(obj): if not obj.get('abstract'): return iid.append(obj.get('npl_id')) in_x.append(obj.get('abstract')) config = get_config() chunksize = int( getattr(config, 'producer_chunksize', Default.PRODUCER_CHUNKSIZE)) brk = get_brk(JournalArticle.ID) gen = brk._iscan(chunksize=chunksize, \ filter_expr=Attr('data_type').eq(JournalArticle.DT.value)) for chunk in gen: if not chunk: continue chunk = chunk.get('Items') iid, in_x = [], [] list(map(extract_x, chunk)) if iid and in_x: self.total += len(in_x) print(self, self.total) yield { 'id': iid, 'input_x': in_x, }
def start(): pool = [] try: for hdr_name in get_config().consumers.split(','): pool.append( mp.Process(target=run_async, args=(hdr_name, ), name=hdr_name)) list(map(lambda p: p.start(), pool)) except KeyboardInterrupt: list(map(lambda p: p.join(), pool)) print("++ [terminate]")
def setup_s3cli(self): config = get_config() if getattr(config, 'aws_access_key', None) and \ getattr(config, 'aws_secret_key', None): self.s3_cli = boto3.client( 's3', aws_access_key=config.aws_access_key, aws_secret_key=config.aws_secret_key, ) else: self.s3_cli = boto3.resource('s3').meta.client
def __init__(self, **kwargs): super(FeedDict, self).__init__(**kwargs) brokers = get_config().kafka_brokers.split(',') self.topic = kwargs.get('topic') client_id = kwargs.get('client_id') kw = { 'bootstrap_servers': brokers, 'value_serializer': msgpack.dumps, } if client_id: kw['client_id'] = client_id self.pro = KafkaProducer(**kw)
def setup_kafka(self, **kwargs): """ Setup kafka consumer and producer Args: topic_pair: A `dict` represents in-topic and out-topic """ kw = { 'bootstrap_servers': get_config().kafka_brokers.split(','), } self.topic_pair = kwargs.get('topic_pair', {}) self.cons = KafkaConsumer(*self.topic_pair.keys(), value_deserializer=msgpack.unpackb, **kw) self.pro = KafkaProducer(value_serializer=msgpack.dumps, **kw)
def __init__(self, *args, **kwargs): super(Prediction, self).__init__() brokers = get_config().kafka_brokers.split(',') client_id = kwargs.get('client_id') kw = { 'bootstrap_servers': brokers, 'value_deserializer': msgpack.unpackb, } if client_id: kw['client_id'] = client_id self.topics = kwargs.get('topics') self.cons = KafkaConsumer(*self.topics, **kw) self.setup_s3cli()
def setup_s3cli(self): config = get_config() if not (getattr(config, 'local_model', 'false') and bool(config.local_model)): config.raise_on_not_set('aws_s3_bucket') self.bucket_name = config.aws_s3_bucket if getattr(config, 'aws_access_key', None) and \ getattr(config, 'aws_secret_key', None): self.s3_cli = boto3.client( 's3', aws_access_key=config.aws_access_key, aws_secret_key=config.aws_secret_key, ) else: self.s3_cli = boto3.resource('s3').meta.client
def _precheck(self): config = get_config() if not (getattr(config, 'local_model', 'false') and bool(config.local_model)): config.raise_on_not_set('aws_s3_bucket') self.bucket_name = config.aws_s3_bucket