def post_slice(self, start_row, stop_row, params, files): action = params['action'] with thrift_lock() as thrift: manager = PicarusManager(thrift=thrift) if action == 'io/thumbnail': self._slice_validate(start_row, stop_row, 'rw') manager.image_thumbnail(start_row=start_row, stop_row=stop_row) return {} elif action == 'io/exif': self._slice_validate(start_row, stop_row, 'rw') manager.image_exif(start_row=start_row, stop_row=stop_row) return {} elif action == 'io/preprocess': self._slice_validate(start_row, stop_row, 'rw') manager.image_preprocessor(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row) return {} elif action == 'io/classify': self._slice_validate(start_row, stop_row, 'rw') manager.feature_to_prediction(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row) return {} elif action == 'io/feature': self._slice_validate(start_row, stop_row, 'rw') manager.takeout_link_job(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row) return {} elif action == 'io/link': self._slice_validate(start_row, stop_row, 'rw') model_key = base64.urlsafe_b64decode(params['model']) chain_input, model_link = _takeout_model_link_from_key(manager, model_key) manager.takeout_chain_job([model_link], chain_input, model_key, start_row=start_row, stop_row=stop_row) return {} elif action == 'io/chain': self._slice_validate(start_row, stop_row, 'rw') model_key = base64.urlsafe_b64decode(params['model']) chain_inputs, model_chain = zip(*_takeout_model_chain_from_key(manager, model_key)) manager.takeout_chain_job(list(model_chain), chain_inputs[0], model_key, start_row=start_row, stop_row=stop_row) return {} elif action == 'io/hash': self._slice_validate(start_row, stop_row, 'rw') manager.feature_to_hash(base64.urlsafe_b64decode(params['model']), start_row=start_row, stop_row=stop_row) return {} elif action == 'i/dedupe/identical': self._slice_validate(start_row, stop_row, 'r') col = base64.urlsafe_b64decode(params['column']) features = {} dedupe_feature = lambda x, y: features.setdefault(base64.b64encode(hashlib.md5(y).digest()), []).append(base64.urlsafe_b64encode(x)) for cur_row, cur_col in hadoopy_hbase.scanner_row_column(thrift, self.table, column=col, start_row=start_row, per_call=10, stop_row=stop_row): dedupe_feature(cur_row, cur_col) bottle.response.headers["Content-type"] = "application/json" return json.dumps([{'rows': y} for x, y in features.items() if len(y) > 1]) elif action == 'o/crawl/flickr': self._slice_validate(start_row, stop_row, 'w') # Only slices where the start_row can be used as a prefix may be used assert start_row and ord(start_row[-1]) != 255 and start_row[:-1] + chr(ord(start_row[-1]) + 1) == stop_row p = {} row_prefix = start_row assert row_prefix.find(':') != -1 class_name = params['className'] query = params.get('query') query = class_name if query is None else query p['lat'] = query = params.get('lat') p['lon'] = query = params.get('lon') p['radius'] = query = params.get('radius') p['api_key'] = params.get('apiKey', FLICKR_API_KEY) p['api_secret'] = params.get('apiSecret', FLICKR_API_SECRET) if 'hasGeo' in params: p['has_geo'] = params['hasGeo'] == '1' try: p['min_upload_date'] = int(params['minUploadDate']) except KeyError: pass try: p['max_upload_date'] = int(params['maxUploadDate']) except KeyError: pass try: p['page'] = int(params['page']) except KeyError: pass return {'numRows': crawlers.flickr_crawl(crawlers.HBaseCrawlerStore(thrift, row_prefix), class_name, query, **p)} elif action in ('io/annotate/image/query', 'io/annotate/image/entity', 'io/annotate/image/query_batch'): self._slice_validate(start_row, stop_row, 'r') secret = base64.urlsafe_b64encode(uuid.uuid4().bytes)[:-2] task = base64.urlsafe_b64encode(uuid.uuid4().bytes)[:-2] p = {} image_column = base64.urlsafe_b64decode(params['imageColumn']) if action == 'io/annotate/image/entity': entity_column = base64.urlsafe_b64decode(params['entityColumn']) data = 'hbase://localhost:9090/images/%s/%s?entity=%s&image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row), entity_column, image_column) p['type'] = 'image_entity' elif action == 'io/annotate/image/query': query = params['query'] data = 'hbase://localhost:9090/images/%s/%s?image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row), image_column) p['type'] = 'image_query' p['query'] = query elif action == 'io/annotate/image/query_batch': query = params['query'] data = 'hbase://localhost:9090/images/%s/%s?image=%s' % (base64.urlsafe_b64encode(start_row), base64.urlsafe_b64encode(stop_row), image_column) p['type'] = 'image_query_batch' p['query'] = query else: bottle.abort(400) p['num_tasks'] = 100 p['mode'] = 'standalone' try: redis_host, redis_port = ANNOTATORS.add_task(task, self.owner, secret, data, p).split(':') except annotators.CapacityException: bottle.abort(503) p['setup'] = True p['reset'] = True p['secret'] = secret p['redis_address'] = redis_host p['redis_port'] = int(redis_port) mturk_vision.manager(data=data, **p) return {'task': task} else: bottle.abort(400)