def add_user(): # Check all required fields for field in ['email', 'password']: if field not in flask.request.json: raise InvalidUsage('Field {0} is mising.', status_code=422, enum='POST_ERROR') email = flask.request.json['email'] password = flask.request.json['password'] # Check if user already exists user = User.query.filter_by(email=email).first() if user is not None: db.session.close() raise InvalidUsage('User already exists', status_code=409, enum='USER_EXISTS') user = User(email, password) db.session.add(user) db.session.commit() response = { 'message': 'Registration was successful', 'username': '', 'email': user.email, 'auth_token': user.get_auth_token() } log.print_log(app.name, 'Registered new user {}'.format(user)) return flask.make_response(jsonify(response), 200)
def ml_download(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) # TODO: This checks can be replaced: if preview exists (is Ready), then get it from Lexonomy and download it # TODO: otherwise notify user to send ml output to preview # check if ml output is ready for download if dataset.xml_ml_out is None or dataset.xml_ml_out is '': raise InvalidUsage('No file for download. Try running ML first.', status_code=409, enum='STATUS_ERROR') elif dataset.status['ml'] in [ None, 'Starting_ML', 'Lex2ML_Error', 'ML_Format', 'ML_Error', 'ML_Annotated', 'ML2Lex_Error' ]: raise InvalidUsage( 'File is not ready for download. Wait for ML to finish first.', status_code=409, enum='STATUS_ERROR') tmp_file = dataset.xml_ml_out.split(".xml")[0] + "_TEI.xml" # stop if already preparing download if dataset.status['download'] == 'Preparing_download': return flask.make_response( { 'msg': 'Dataset is preparing for download', 'status': dataset.status }, 200) # if download is ready, return file elif dataset.status['download'] == 'Ready': dataset.status['download'] = None Datasets.dataset_status(dsid, set=True, status=dataset.status) @after_this_request def after(response): response.headers['x-suggested-filename'] = filename response.headers.add('Access-Control-Expose-Headers', '*') os.remove(tmp_file) return response filename = dataset.name.split('.')[0] + '-transformed.xml' return flask.send_file(tmp_file, attachment_filename=filename, as_attachment=True, conditional=True) # prepare download dataset.status['download'] = 'Preparing_download' Datasets.dataset_status(dsid, set=True, status=dataset.status) character_map = Datasets.dataset_character_map(dsid) prepare_TEI_download.apply_async( args=[dsid, dataset.xml_ml_out, tmp_file, character_map]) return flask.make_response( { 'msg': 'Dataset is preparing for download', 'status': dataset.status['download'] }, 200)
def verify_user(token): if not token: raise InvalidUsage("No auth token provided.", status_code=401, enum="UNAUTHORIZED") uid = User.decode_auth_token(token) if isinstance(uid, str): raise InvalidUsage(uid, status_code=401, enum="UNAUTHORIZED") #elif is_blacklisted(engine, token): # raise InvalidUsage('User logged out. Please log in again.', status_code=401, enum="UNAUTHORIZED") else: return uid
def xf_delete_transform(xfid): token = flask.request.headers.get('Authorization') id = verify_user(token) resp = controllers.delete_transform(id, xfid) if resp is None: raise InvalidUsage("Transformation does not exist.", status_code=404, enum="TRANSFORMATION_DOESNT_EXIST") elif not resp: raise InvalidUsage("You do not own this transformation", status_code=401, enum="UNAUTHORIZED") else: return flask.make_response({'deleted': xfid}, 200)
def ml_run(dsid): """ Dataset should be annotated at Lexonomy so we can download it and start ML process. ML statuses: Starting_ML -> ML_Format -> ML_Annotated -> Lex_Format Error statuses: Lex2ML_Error, ML_Error, ML2Lex_Error """ token = flask.request.headers.get('Authorization') uid = verify_user(token) # get annotations first, so we get lex_xml path in db dataset = Datasets.list_datasets(uid, dsid=dsid) if dataset.status['annotate'] != 'Ready': raise InvalidUsage('File is not annotated at Lexonomy.', status_code=409, enum='STATUS_ERROR') get_lex_xml(uid, dsid) dataset = Datasets.list_datasets(uid, dsid=dsid) # deleting preview dataset.status['preview'] = None Datasets.dataset_add_ml_lexonomy_access(dsid) if dataset.lexonomy_ml_delete is not None: requests.post(dataset.lexonomy_ml_delete, headers={ "Content-Type": 'application/json', "Authorization": app.config['LEXONOMY_AUTH_KEY'] }) if dataset.status['ml'] in ['Starting_ML', 'ML_Format', 'ML_Annotated']: raise InvalidUsage('ML is already running.', status_code=409, enum='STATUS_ERROR') print_log(app.name, '{} Starting ML'.format(dataset)) dataset.status['ml'] = 'Starting_ML' Datasets.dataset_status(dsid, set=True, status=dataset.status) # Get files ready xml_raw = dataset.xml_file_path xml_ml_out = dataset.xml_lex[:-4] + '-ML_OUT.xml' Datasets.dataset_add_ml_paths(dsid, xml_lex=dataset.xml_lex, xml_ml_out=xml_ml_out) # Run ml task = run_pdf2lex_ml_scripts.apply_async( args=[uid, dsid, xml_raw, dataset.xml_lex, xml_ml_out], countdown=0) Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id) return flask.make_response( { 'message': 'ok', 'dsid': dsid, 'status': dataset.status['ml'] }, 200)
def lexonomy_download(uid, dsid): if flask.request.headers.get('Authorization') != app.config['LEXONOMY_AUTH_KEY']: raise InvalidUsage("Shared secret is not valid!", status_code=401, enum='UNAUTHORIZED') ml = flask.request.args.get('ml', default="False", type=str) == "True" additional_pages = flask.request.args.get('add_pages', default="False", type=str) == "True" dataset = Datasets.list_datasets(uid, dsid=dsid) if ml: # Set datasets status dataset.status['preview'] = 'Processing' Datasets.dataset_status(dsid, set=True, status=dataset.status) else: dataset.status['annotate'] = 'Processing' Datasets.dataset_status(dsid, set=True, status=dataset.status) temp_fname = dataset.xml_file_path.split(".xml")[0] + "-tmp.xml" @after_this_request def remove_file(response): os.remove(temp_fname) return response if ml: # Send ml file split_preview(dataset.xml_ml_out, temp_fname, 100) return flask.send_file(temp_fname, attachment_filename=dataset.xml_ml_out.split('/')[-1], as_attachment=True) elif not additional_pages: # Send first 20 pages file first_n_pages(dataset.xml_file_path, temp_fname, 20) return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True) else: # Send additional 20 pages file additional_n_pages(dataset.xml_file_path, dataset.xml_lex, temp_fname, 20) return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True)
def user_delete(userid): # THIS IS NOT USED AND IT DOESN'T WORK token = flask.request.headers.get('Authorization') id = verify_user(token) if id != userid: raise InvalidUsage("User ids don't match", status_code=401, enum="UNAUTHORIZED") controllers.delete_user(engine, userid) return flask.make_response(jsonify({ 'message': 'OK'}), 200)
def xf_update_transform(xfid): token = flask.request.headers.get('Authorization') id = verify_user(token) xfspec = flask.request.json.get('xfspec', None) saved = flask.request.json.get('saved', False) name = flask.request.json.get('name', None) print_log(app.name, 'Update transform {}'.format(xfid)) if xfspec is None: raise InvalidUsage("Invalid API call.", status_code=422, enum="POST_ERROR") rv = controllers.update_transform(xfid, xfspec, name, saved) return flask.make_response({'updated': rv}, 200)
def delete_error_log(e_id): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() #user = controllers.user_data(db, id) if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") delete_error_log(db, e_id) return flask.make_response({'message': 'ok'}, 200)
def xf_new_transform(): token = flask.request.headers.get('Authorization') id = verify_user(token) dsuuid = flask.request.json.get('dsuuid', None) dsid = flask.request.json.get('dsid', None) xfname = flask.request.json.get('xfname', None) entry_spec = flask.request.json.get('entry_spec', None) headword = flask.request.json.get('hw', None) saved = flask.request.json.get('saved', False) if dsuuid is None or xfname is None or dsid is None or entry_spec is None: raise InvalidUsage("Invalid API call.", status_code=422, enum="POST_ERROR") xfid = controllers.new_transform(xfname, dsid, entry_spec, headword, saved) isok, retmsg = controllers.prepare_dataset(id, dsid, xfid, entry_spec, headword) if not isok: raise InvalidUsage(retmsg, status_code=422, enum="POST_ERROR") return flask.make_response({'xfid': xfid}, 200)
def login(): # Sketch-engine login if 'sketch_token' in flask.request.json: user_data = User.decode_sketch_token(flask.request.json['sketch_token']) user = User.query.filter_by(sketch_engine_uid=user_data['id']).first() # check if ske user exists if user is None: user = User(user_data['email'], None, sketch_engine_uid=user_data['id']) db.session.add(user) db.session.commit() user = User.query.filter_by(sketch_engine_uid=user_data['id']).first() db.session.close() # Regular login else: # Check required fields for field in ['login', 'password']: if field not in flask.request.json: raise InvalidUsage("Field {0:s} is missing".format(field), status_code=422, enum='POST_ERROR') email = flask.request.json['login'] password = flask.request.json['password'] user = User.query.filter_by(email=email).first() db.session.close() if user is None or not user.check_password(password): # proper login handling if more time ... (?) raise InvalidUsage("Wrong password or user does not exist!", status_code=403, enum="LOGIN_ERROR") # Return auth token auth_token = user.get_auth_token() response = { 'auth_token': auth_token, 'username': user.username, 'email': user.email, } return flask.make_response(jsonify(response), 200)
def user_data(): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None: response = { 'username': user.username, 'email': user.email, 'admin': user.admin } return flask.make_response(jsonify(response),200) else: raise InvalidUsage('Provide a valid auth token.', status_code=409, enum="INVALID_AUTH_TOKEN")
def ml_preview(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) dataset = Datasets.list_datasets(uid, dsid=dsid) if dataset.status[ 'ml'] == 'Lex_Format' and dataset.xml_ml_out is None or dataset.xml_ml_out is '': raise InvalidUsage('No file for preview. Try running ML first.', status_code=409, enum='STATUS_ERROR') ds_sendML_to_lexonomy(uid, dsid) return flask.make_response( { 'message': 'ok', 'dsid': dsid, 'status': dataset.status }, 200)
def view_error_log(e_id): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") log = get_error_log(db, e_id=e_id) dataset = Datasets.query.filter_by(id=log.dsid).first() pdf = flask.request.args.get('pdf', default=0, type=int) == 1 xml_lex = flask.request.args.get('xml_lex', default=0, type=int) == 1 xml_raw = flask.request.args.get('xml_raw', default=0, type=int) == 1 if xml_raw: return flask.send_file(dataset.xml_file_path, attachment_filename='{0}_xml_raw.xml'.format( dataset.id), as_attachment=True) elif xml_lex: file_path = dataset.xml_file_path.split('.xml')[0] + '-LEX.xml' return flask.send_file(file_path, attachment_filename='{0}_xml_lex.xml'.format( dataset.id), as_attachment=True) elif pdf: file_path = dataset.file_path return flask.send_file(file_path, attachment_filename='{0}_dictionary.pdf'.format( dataset.id), as_attachment=True) # If no params, return log log.message = re.sub('\n', '<br/>', log.message) return flask.make_response( { 'id': log.id, 'dsid': log.dsid, 'tag': log.tag, 'message': log.message, 'time': log.created_ts }, 200)
def list_error_logs(): token = flask.request.headers.get('Authorization') id = verify_user(token) user = User.query.filter_by(id=id).first() db.session.close() if user is not None and not user.admin: raise InvalidUsage('User is not admin.', status_code=401, enum="UNAUTHORIZED") logs = get_error_log(db) logs = [{ 'id': log.id, 'dsid': log.dsid, 'tag': log.tag, 'message': log.message, 'time': log.created_ts } for log in logs] return flask.make_response({'logs': logs}, 200)
def ds_machine_learning(dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) xml_format = flask.request.args.get('xml_format', default=None, type=str) == 'True' get_file = flask.request.args.get('get_file', default=None, type=str) == 'True' run_ml = flask.request.args.get('run_ml', default=None, type=str) == 'True' send_file = flask.request.args.get('send_file', default=None, type=str) == 'True' # TODO: Save paths to DB dataset = Datasets.list_datasets(uid, dsid=dsid) xml_lex = dataset.xml_lex xml_raw = dataset.xml_file_path print('xml_lex:', xml_lex, 'xml_raw:', xml_raw) if xml_lex == None: xml_ml_out = None else: xml_ml_out = xml_lex[:-4] + "-ML_OUT.xml" Datasets.dataset_add_ml_paths(dsid, xml_lex=dataset.xml_lex, xml_ml_out=xml_ml_out) # Check if all params are None if xml_format is None and get_file is None and run_ml is None and send_file is None: raise InvalidUsage("Invalid API call. No params.", status_code=422, enum="GET_ERROR") # Check if to many params elif xml_format and (get_file or run_ml or send_file): raise InvalidUsage("Invalid API call. Can't work on file and send it.", status_code=422, enum="GET_ERROR") dataset.ml_task_id = Datasets.dataset_ml_task_id(dsid) status = dataset.status # Check if dataset has ml_task, then send status if dataset.ml_task_id: return flask.make_response( { "message": "File is still processing.", "dsid": dsid, "Status": status }, 200) # Check if user wants file and then return it if xml_format and status not in [ 'Starting_ML', 'ML_Format', 'ML_Annotated', 'Lex2ML_Error', 'ML_Error', 'ML2Lex_Error' ]: # TODO: get the latest annotated version from Lexonomy Datasets.update_dataset_status(dsid, 'Preparing_download') tmp_file = xml_ml_out.split(".xml")[0] + "_TEI.xml" character_map = Datasets.dataset_character_map(dsid) prepare_TEI_download(dsid, xml_ml_out, tmp_file, character_map) #tokenized2TEI(dsid, xml_ml_out, tmp_file, character_map) @after_this_request def after(response): response.headers['x-suggested-filename'] = filename response.headers.add('Access-Control-Expose-Headers', '*') Datasets.update_dataset_status(dsid, 'Lex_Format') os.remove(tmp_file) return response filename = dataset.name.split('.')[0] + '-transformed.xml' return flask.send_file(tmp_file, attachment_filename=filename, as_attachment=True) elif xml_format: raise InvalidUsage("File is not ready. Try running ML again", status_code=202, enum="STATUS_ERROR") # Run ML scripts if get_file: # Get file from Lexonomy status = "Lexonomy_Annotated" get_lex_xml(uid, dsid) Datasets.update_dataset_status(dsid, status) elif run_ml: status = "Starting_ML" Datasets.update_dataset_status(dsid, status) task = run_pdf2lex_ml_scripts.apply_async( args=[uid, dsid, xml_raw, xml_lex, xml_ml_out], countdown=0) Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id) elif send_file: # Send file to Lexonomy # stauts = "ML_Annotated_@Lexonomy" ds_sendML_to_lexonomy(uid, dsid) return flask.make_response( { "message": "OK", "dsid": dsid, "Status": status }, 200)
def ds_download2(xfid, dsid): token = flask.request.headers.get('Authorization') uid = verify_user(token) status = controllers.transformer_download_status(xfid) get_status = flask.request.args.get('status', default='false', type=str) == 'true' if get_status: return flask.make_response({'status': status}, 200) elif status is None: print_log( app.name, 'Transformed dataset download started uid: {0:s}, xfid: {1:s} , dsid: {2:s}' .format(str(uid), str(xfid), str(dsid))) strip_ns = flask.request.args.get('strip_ns', default='false', type=str) == 'true' strip_header = flask.request.args.get('strip_header', default='false', type=str) == 'true' strip_DictScrap = flask.request.args.get('strip_DictScrap', default='false', type=str) == 'true' strip_DictScrap = strip_ns # TODO: remove this, when added to FE # Check if transformer exists try: transform = controllers.list_transforms(dsid, xfid=xfid) xf = transform.transform except: raise InvalidUsage('Transformer does not exist.', status_code=409) if xf is None: # Not sure why this is needed here? return flask.make_response( { 'spec': None, 'entity_xml': None, 'output': None }, 200) else: # start download task prepare_download.apply_async(args=[ uid, xfid, dsid, strip_ns, strip_header, strip_DictScrap ], countdown=0) status = 'Processing' controllers.transformer_download_status(xfid, set=True, download_status=status) elif status == "Processing": return flask.make_response({'message': 'File is still processing'}, 200) elif status == "Ready": print_log( app.name, 'Transformed dataset download finished uid: {0:s}, xfid: {1:s} , dsid: {2:s}' .format(str(uid), str(xfid), str(dsid))) # return file and delete afterwards dataset = Datasets.list_datasets(uid, dsid=dsid) file_name, file_type = dataset.name.split('.') target_file_name = file_name + '_' + str(xfid) + '_TEI.' + file_type target_path = os.path.join(app.config['APP_MEDIA'], target_file_name) @after_this_request def remove_file(response): response.headers['x-suggested-filename'] = out_name response.headers.add('Access-Control-Expose-Headers', '*') os.remove(target_path) return response controllers.transformer_download_status(xfid, set=True) # reset status transform_name = controllers.list_transforms(dsid, xfid=xfid).name out_name = dataset.name[:-4] + '-' + transform_name + '.xml' return flask.send_file(target_path, attachment_filename=out_name, as_attachment=True) return flask.make_response({'message': 'ok', 'status': status}, 200)
def ds_upload_new_dataset(): token = flask.request.headers.get('Authorization') uid = verify_user(token) # file metadata = flask.request.form.get('metadata', None) dictname = flask.request.files.get('dictname', None) file_content = flask.request.files.get('file', None) total_filesize = flask.request.form.get('dztotalfilesize', None) dzuuid = flask.request.form.get('dzuuid', None) current_chunk = int(flask.request.form.get('dzchunkindex')) total_chunks = int(flask.request.form.get('dztotalchunkcount', None)) chunk_offset = int(flask.request.form.get('dzchunkbyteoffset', None)) # get file extension try: orig_filename = file_content.filename extension = '.' + file_content.filename.split('.')[-1] except AttributeError: orig_filename = 'Dictionary' extension = '.xml' filename = "tempFile_USER-{0:s}".format(str(uid)) + extension filepath = os.path.join(app.config['APP_MEDIA'], secure_filename(filename)) if os.path.exists(filepath) and current_chunk == 0: os.remove(filepath) raise InvalidUsage('File already exists.', status_code=400, enum='FILE_EXISTS') try: # write to file with open(filepath, 'ab') as f: f.seek(chunk_offset) f.write(file_content.stream.read()) except OSError: raise InvalidUsage( "Not sure why, but we couldn't write the file to disk.", status_code=500, enum="FILE_ERROR") if current_chunk != total_chunks: return flask.make_response( jsonify({ 'status': 'OK', 'filename': filename, 'current_chunk': current_chunk, 'total_chunks': total_chunks }), 200) else: # finish upload if os.path.getsize(filepath) != int(total_filesize): os.remove(filepath) raise InvalidUsage("Size mismatch.", status_code=500, enum="FILE_ERROR") else: new_random_name = generate_filename(filename) new_path = os.path.join(app.config['APP_MEDIA'], secure_filename(new_random_name)) os.rename(filepath, new_path) dsid = controllers.add_dataset(db, uid, total_filesize, orig_filename, new_path, dzuuid) controllers.dataset_metadata(dsid, set=True, metadata=metadata) # prepare dataset dataset = controllers.list_datasets(uid, dsid) if "pdf" in dataset.upload_mimetype: controllers.transform_pdf2xml.apply_async(args=[dsid]) else: controllers.clean_empty_namespace(dsid) controllers.map_xml_tags.apply_async(args=[dsid]) return flask.make_response(Datasets.to_dict(dataset), 200)
def ds_dataset_preview(dsid): raise InvalidUsage('Not implemented', status_code=501) pass
def ds_rename_dataset(dsid): token = flask.request.headers.get('Authorization') id = verify_user(token) raise InvalidUsage('Not implemented', status_code=501) pass