Beispiel #1
0
    def fetch(self, **kwargs):
        """Fetch resource from database"""
        def extract_x(obj):
            if not obj.get('description'):
                return
            val = obj.get('description')
            if val and isinstance(val, Binary):
                val = gzip.decompress(val.value)
            in_x.append(val)
            iid.append(obj.get('entity_id'))

        config = get_config()
        chunksize = int(
            getattr(config, 'producer_chunksize', Default.PRODUCER_CHUNKSIZE))

        brk = get_brk(OrganizationTable.ID)
        gen = brk._iscan(chunksize=chunksize, \
                filter_expr=Attr('data_type').eq(OrganizationTable.DT.value))

        for chunk in gen:
            if not chunk:
                continue
            chunk = chunk.get('Items')
            iid, in_x = [], []

            list(map(extract_x, chunk))
            if iid and in_x:
                self.total += len(in_x)
                print(self, self.total)
                yield {
                    'id': iid,
                    'input_x': in_x,
                }
Beispiel #2
0
    def fetch(self, **kwargs):
        """Fetch resource from database"""
        def extract_x(obj):
            if not obj.get('abstract'):
                return
            iid.append(obj.get('npl_id'))
            in_x.append(obj.get('abstract'))

        config = get_config()
        chunksize = int(
            getattr(config, 'producer_chunksize', Default.PRODUCER_CHUNKSIZE))

        brk = get_brk(JournalArticle.ID)
        gen = brk._iscan(chunksize=chunksize, \
                filter_expr=Attr('data_type').eq(JournalArticle.DT.value))

        for chunk in gen:
            if not chunk:
                continue
            chunk = chunk.get('Items')
            iid, in_x = [], []

            list(map(extract_x, chunk))
            if iid and in_x:
                self.total += len(in_x)
                print(self, self.total)
                yield {
                    'id': iid,
                    'input_x': in_x,
                }
Beispiel #3
0
def start():
    pool = []
    try:
        for hdr_name in get_config().consumers.split(','):
            pool.append(
                mp.Process(target=run_async, args=(hdr_name, ), name=hdr_name))
        list(map(lambda p: p.start(), pool))
    except KeyboardInterrupt:
        list(map(lambda p: p.join(), pool))
        print("++ [terminate]")
Beispiel #4
0
 def setup_s3cli(self):
     config = get_config()
     if getattr(config, 'aws_access_key', None) and \
             getattr(config, 'aws_secret_key', None):
         self.s3_cli = boto3.client(
             's3',
             aws_access_key=config.aws_access_key,
             aws_secret_key=config.aws_secret_key,
         )
     else:
         self.s3_cli = boto3.resource('s3').meta.client
Beispiel #5
0
    def __init__(self, **kwargs):
        super(FeedDict, self).__init__(**kwargs)
        brokers = get_config().kafka_brokers.split(',')
        self.topic = kwargs.get('topic')
        client_id = kwargs.get('client_id')

        kw = {
            'bootstrap_servers': brokers,
            'value_serializer': msgpack.dumps,
        }

        if client_id:
            kw['client_id'] = client_id

        self.pro = KafkaProducer(**kw)
Beispiel #6
0
    def setup_kafka(self, **kwargs):
        """
        Setup kafka consumer and producer

        Args:
        topic_pair: A `dict` represents in-topic and out-topic
        """
        kw = {
            'bootstrap_servers': get_config().kafka_brokers.split(','),
        }
        self.topic_pair = kwargs.get('topic_pair', {})
        self.cons = KafkaConsumer(*self.topic_pair.keys(),
                                  value_deserializer=msgpack.unpackb,
                                  **kw)
        self.pro = KafkaProducer(value_serializer=msgpack.dumps, **kw)
Beispiel #7
0
    def __init__(self, *args, **kwargs):
        super(Prediction, self).__init__()
        brokers = get_config().kafka_brokers.split(',')
        client_id = kwargs.get('client_id')

        kw = {
            'bootstrap_servers': brokers,
            'value_deserializer': msgpack.unpackb,
        }

        if client_id:
            kw['client_id'] = client_id

        self.topics = kwargs.get('topics')
        self.cons = KafkaConsumer(*self.topics, **kw)
        self.setup_s3cli()
Beispiel #8
0
    def setup_s3cli(self):
        config = get_config()

        if not (getattr(config, 'local_model', 'false')
                and bool(config.local_model)):
            config.raise_on_not_set('aws_s3_bucket')
            self.bucket_name = config.aws_s3_bucket

        if getattr(config, 'aws_access_key', None) and \
                getattr(config, 'aws_secret_key', None):
            self.s3_cli = boto3.client(
                's3',
                aws_access_key=config.aws_access_key,
                aws_secret_key=config.aws_secret_key,
            )
        else:
            self.s3_cli = boto3.resource('s3').meta.client
Beispiel #9
0
 def _precheck(self):
     config = get_config()
     if not (getattr(config, 'local_model', 'false')
             and bool(config.local_model)):
         config.raise_on_not_set('aws_s3_bucket')
         self.bucket_name = config.aws_s3_bucket