コード例 #1
0
ファイル: tables.py プロジェクト: strategist922/picarus
 def post_slice(self, start_row, stop_row, params, files):
     action = params['action']
     with thrift_lock() as thrift:
         manager = PicarusManager(thrift=thrift)
         if action == 'io/thumbnail':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.image_thumbnail(start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/exif':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.image_exif(start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/preprocess':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.image_preprocessor(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/classify':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.feature_to_prediction(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/feature':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.takeout_link_job(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/link':
             self._slice_validate(start_row, stop_row, 'rw')
             model_key = base64.urlsafe_b64decode(params['model'])
             chain_input, model_link = _takeout_model_link_from_key(manager, model_key)
             manager.takeout_chain_job([model_link], chain_input, model_key, start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/chain':
             self._slice_validate(start_row, stop_row, 'rw')
             model_key = base64.urlsafe_b64decode(params['model'])
             chain_inputs, model_chain = zip(*_takeout_model_chain_from_key(manager, model_key))
             manager.takeout_chain_job(list(model_chain), chain_inputs[0], model_key, start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'io/hash':
             self._slice_validate(start_row, stop_row, 'rw')
             manager.feature_to_hash(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row)
             return {}
         elif action == 'i/dedupe/identical':
             self._slice_validate(start_row, stop_row, 'r')
             col = base64.urlsafe_b64decode(params['column'])
             features = {}
             dedupe_feature = lambda x, y: features.setdefault(base64.b64encode(hashlib.md5(y).digest()), []).append(base64.urlsafe_b64encode(x))
             for cur_row, cur_col in hadoopy_hbase.scanner_row_column(thrift, self.table, column=col,
                                                                      start_row=start_row, per_call=10,
                                                                      stop_row=stop_row):
                 dedupe_feature(cur_row, cur_col)
             bottle.response.headers["Content-type"] = "application/json"
             return json.dumps([{'rows': y} for x, y in features.items() if len(y) > 1])
         elif action == 'o/crawl/flickr':
             self._slice_validate(start_row, stop_row, 'w')
             # Only slices where the start_row can be used as a prefix may be used
             assert start_row and ord(start_row[-1]) != 255 and start_row[:-1] + chr(ord(start_row[-1]) + 1) == stop_row
             p = {}
             row_prefix = start_row
             assert row_prefix.find(':') != -1
             class_name = params['className']
             query = params.get('query')
             query = class_name if query is None else query
             p['lat'] = query = params.get('lat')
             p['lon'] = query = params.get('lon')
             p['radius'] = query = params.get('radius')
             p['api_key'] = params.get('apiKey', FLICKR_API_KEY)
             p['api_secret'] = params.get('apiSecret', FLICKR_API_SECRET)
             if 'hasGeo' in params:
                 p['has_geo'] = params['hasGeo'] == '1'
             try:
                 p['min_upload_date'] = int(params['minUploadDate'])
             except KeyError:
                 pass
             try:
                 p['max_upload_date'] = int(params['maxUploadDate'])
             except KeyError:
                 pass
             try:
                 p['page'] = int(params['page'])
             except KeyError:
                 pass
             return {'numRows': crawlers.flickr_crawl(crawlers.HBaseCrawlerStore(thrift, row_prefix), class_name, query, **p)}
         elif action in ('io/annotate/image/query', 'io/annotate/image/entity', 'io/annotate/image/query_batch'):
             self._slice_validate(start_row, stop_row, 'r')
             secret = base64.urlsafe_b64encode(uuid.uuid4().bytes)[:-2]
             task = base64.urlsafe_b64encode(uuid.uuid4().bytes)[:-2]
             p = {}
             image_column = base64.urlsafe_b64decode(params['imageColumn'])
             if action == 'io/annotate/image/entity':
                 entity_column = base64.urlsafe_b64decode(params['entityColumn'])
                 data = 'hbase://localhost:9090/images/%s/%s?entity=%s&image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row),
                                                                                    entity_column, image_column)
                 p['type'] = 'image_entity'
             elif action == 'io/annotate/image/query':
                 query = params['query']
                 data = 'hbase://localhost:9090/images/%s/%s?image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row), image_column)
                 p['type'] = 'image_query'
                 p['query'] = query
             elif action == 'io/annotate/image/query_batch':
                 query = params['query']
                 data = 'hbase://localhost:9090/images/%s/%s?image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row), image_column)
                 p['type'] = 'image_query_batch'
                 p['query'] = query
             else:
                 bottle.abort(400)
             p['num_tasks'] = 100
             p['mode'] = 'standalone'
             try:
                 redis_host, redis_port = ANNOTATORS.add_task(task, self.owner, secret, data, p).split(':')
             except annotators.CapacityException:
                 bottle.abort(503)
             p['setup'] = True
             p['reset'] = True
             p['secret'] = secret
             p['redis_address'] = redis_host
             p['redis_port'] = int(redis_port)
             mturk_vision.manager(data=data, **p)
             return {'task': task}
         else:
             bottle.abort(400)