class AlveoSegmentationRoute(SegmenterWrapper): decorators = [ auth_required, limiter.limit("15 per minute"), limiter.limit("150 per day") ] def _processor_get(self, user_id, remote_path): api_key = g.user.remote_api_key verify_access(remote_path, api_key) short_path = shorten_path(remote_path) result = get_binary_object(short_path) if result is None: result = segment_document(remote_path, api_key) if result is None: abort(400, 'Could not access requested document') else: create_binary_object(short_path, result) return result def _processor_post(self, user_id, audiofile): result = segment_audio_data(audiofile.read()) if result is None: abort(400, "Uploaded file is not a valid .wav audio file.") return result
class AlveoASRRetrieveJobRoute(RetrieveJobWrapper): decorators = [ auth_required, limiter.limit("25 per minute"), limiter.limit("1000 per day") ] def _processor_get(self, user_id, job_id): jobs = job_query(user_id=user_id, job_id=job_id) if len(jobs) < 1: abort(404, "You have no job matching that job_id") job = jobs[0] status = job.status ds_object = job.datastore data = { "job_id": job_id, "status": JobTypes(status).name, "description": job.description } if status is JobTypes.FINISHED.value: data["result"] = export_asrdata(ds_object) return data
class AlveoASRCancelJobRoute(CancelJobWrapper): decorators = [ auth_required, limiter.limit("50 per minute"), limiter.limit("1000 per hour"), limiter.limit("5000 per day") ] def _processor_get(self, user_id, job_id): jobs = job_query(user_id=user_id, job_id=job_id) if len(jobs) < 1: abort(404, "You have no job matching that job_id") job_model = jobs[0] status = job_model.status if status is not JobTypes.QUEUED.value: abort(401, "Job ID `%s` is not queued" % job_id) job_object = redis_queue.fetch_job(job_model.external_id) if job_object is None: job_model.status = JobTypes.FAILED abort( 401, "Job ID `%s` couldn't be found. Moving to 'failed' pool" % job_id) job_object.cancel() job_model.status = JobTypes.CANCELLED db.session.delete(job_model.datastore) job_model.datastore = None db.session.commit() return {"status": "cancelled", "job_id": job_id}
class AlveoListByUserRoute(ListByUserWrapper): decorators = [ auth_required, limiter.limit("75 per minute"), limiter.limit("1000 per hour"), limiter.limit("5000 per day") ] def _processor_get(self, user_id): return datastore_list(user_id=user_id)
class AlveoExportByUserRoute(ExportByUserWrapper): decorators = [ auth_required, limiter.limit("10 per minute"), limiter.limit("40 per hour"), limiter.limit("200 per day") ] def _processor_get(self, user_id): return datastore_export(user_id=user_id)
class AlveoASRAddJobRoute(AddJobWrapper): decorators = [ auth_required, limiter.limit("5 per minute"), limiter.limit("50 per day") ] def _processor_get(self, user_id, remote_path): api_key = g.user.remote_api_key verify_access(remote_path, api_key) short_path = shorten_path(remote_path) # Check cache first. # Note/TODO: Does not check the job queue. Would that even be ideal? # Another user could cancel their job. Jobs would need multiple authors. cached_asr = Datastore.query.filter( Datastore.key == '%s:%s:%s' % (DOMAIN, ENGINE, short_path)).filter( Datastore.user_id == g.user.id).first() if cached_asr is not None: if cached_asr.alias == "ready": return { "status": "cached", "result": export_asrdata(cached_asr) } else: return { "status": "pending", "job_id": cached_asr.alias.split(":")[1] } worker = redis_queue.enqueue(transcribe_document, remote_path, api_key) ds = Datastore(key="%s:%s:%s" % (DOMAIN, ENGINE, short_path), value="{}", storage_spec="asr-engine-gcloud/json/1.0", user=g.user, alias="init:%s" % worker.id) job = Job(external_id=worker.id, user=g.user, datastore=ds, description="ASR via engine 'gcloud' for item: %s" % short_path) db.session.add(ds) db.session.add(job) db.session.commit() return {"status": "queued", "job_id": job.id} def _processor_post(self, user_id, audiofile): result = transcribe(audiofile.read()) if result is None: abort(400, "Uploaded file is not a valid .wav audio file.") return result
class AlveoSegmentationRoute(SegmenterWrapper): decorators = [ auth_required, limiter.limit("15 per minute"), limiter.limit("150 per day") ] def _processor_get(self, user_id, remote_path): api_path = str(urlparse(remote_path).path) if '/' not in api_path or api_path == "/": abort( 400, 'Request did not include an Alveo document identifier to segment' ) # We care more about the user itself than the user_id, another option # is to query the database for something that matches the key but that # would be slower api_key = g.user.remote_api_key alveo_metadata = get_module_metadata("alveo") api_url = alveo_metadata['api_url'] client = pyalveo.Client(api_url=api_url, api_key=api_key, use_cache=False, update_cache=False, cache_dir=None) # Check if we can access the list first. # Would be good if we could just check Alveo permissions instead of retrieving the item directly. # https://github.com/Alveo/pyalveo/issues/11 try: itemlist_path = remote_path.split('/document/')[0] itemlist = client.get_item(itemlist_path) except APIError as e: abort(400, "Response from remote host: \n" + str(e)) result = get_cached_result(shorten_path(remote_path)) if result is None: result = segment_document(remote_path, api_key) if result is None: abort(400, 'Could not access requested document') else: cache_result(shorten_path(remote_path), result) return result def _processor_post(self, user_id, audiofile): result = segment_audio_data(audiofile.read()) if result is None: abort(400, "Uploaded file is not a valid .wav audio file.") return result
class AlveoStoreRoute(StoreWrapper): decorators = [ auth_required, limiter.limit("1000 per minute"), limiter.limit("20000 per hour"), limiter.limit("100000 per day") ] def _processor_get(self, object_id, user_id=None, version=None): query = Datastore.query.filter(Datastore.id == object_id).first() if query is None: abort(404, 'No match for the provided id') user = User.query.filter(User.id == query.user_id).first() if not user.domain == DOMAIN: abort( 403, 'You don\'t have permission to read the storage of an external user' ) base_query = query if version != None: try: query = query.versions[version] except: abort(404, 'Version doesn\'t exist for provided id') else: version = query.versions.count() - 1 data = json.loads(query.value.decode()) original_author = base_query.versions[0].user total_versions = base_query.versions.count() version_author = query.user return { 'id': query.id, 'key': str(query.key.split(':', 1)[1]), 'version': version, 'total_versions': total_versions, 'transcription': data, 'alias': query.alias, 'annotations_total': len(data), 'timestamp': query.timestamp.isoformat(), 'storage_spec': query.storage_spec, 'author': { 'original': { 'ats_id': original_author.id, 'domain': original_author.domain, 'remote_id': original_author.remote_id }, 'version': { 'ats_id': version_author.id, 'domain': version_author.domain, 'remote_id': version_author.remote_id } } } def _processor_post(self, key, value, storage_spec, alias=None): if key is None or len(key) < 2: abort(400, 'Key is invalid or too short') if alias is None or len(alias) < 1: alias = "default" validate_data(value) key = '%s:%s' % (DOMAIN, key) model = Datastore.query.filter(Datastore.key == key).filter( Datastore.alias == alias).filter( Datastore.user_id == g.user.id).first() data = json.dumps(value) if model is None: model = Datastore(key, data, storage_spec, g.user, alias) db.session.add(model) else: model.set_value(data) model.storage_spec = storage_spec model.timestamp = datetime.datetime.now() db.session.commit() return { 'id': model.id, 'version': model.versions.count() - 1, 'timestamp': model.timestamp, 'alias': alias }