Esempio n. 1
0
def add_user():
    # Check all required fields
    for field in ['email', 'password']:
        if field not in flask.request.json:
            raise InvalidUsage('Field {0} is mising.', status_code=422, enum='POST_ERROR')

    email = flask.request.json['email']
    password = flask.request.json['password']

    # Check if user already exists
    user = User.query.filter_by(email=email).first()
    if user is not None:
        db.session.close()
        raise InvalidUsage('User already exists', status_code=409, enum='USER_EXISTS')

    user = User(email, password)
    db.session.add(user)
    db.session.commit()

    response = {
        'message': 'Registration was successful',
        'username': '',
        'email': user.email,
        'auth_token': user.get_auth_token()
    }
    log.print_log(app.name, 'Registered new user {}'.format(user))
    return flask.make_response(jsonify(response), 200)
Esempio n. 2
0
def ml_download(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    # TODO: This checks can be replaced: if preview exists (is Ready), then get it from Lexonomy and download it
    # TODO: otherwise notify user to send ml output to preview
    # check if ml output is ready for download
    if dataset.xml_ml_out is None or dataset.xml_ml_out is '':
        raise InvalidUsage('No file for download. Try running ML first.',
                           status_code=409,
                           enum='STATUS_ERROR')
    elif dataset.status['ml'] in [
            None, 'Starting_ML', 'Lex2ML_Error', 'ML_Format', 'ML_Error',
            'ML_Annotated', 'ML2Lex_Error'
    ]:
        raise InvalidUsage(
            'File is not ready for download. Wait for ML to finish first.',
            status_code=409,
            enum='STATUS_ERROR')

    tmp_file = dataset.xml_ml_out.split(".xml")[0] + "_TEI.xml"

    # stop if already preparing download
    if dataset.status['download'] == 'Preparing_download':
        return flask.make_response(
            {
                'msg': 'Dataset is preparing for download',
                'status': dataset.status
            }, 200)
    # if download is ready, return file
    elif dataset.status['download'] == 'Ready':
        dataset.status['download'] = None
        Datasets.dataset_status(dsid, set=True, status=dataset.status)

        @after_this_request
        def after(response):
            response.headers['x-suggested-filename'] = filename
            response.headers.add('Access-Control-Expose-Headers', '*')
            os.remove(tmp_file)
            return response

        filename = dataset.name.split('.')[0] + '-transformed.xml'
        return flask.send_file(tmp_file,
                               attachment_filename=filename,
                               as_attachment=True,
                               conditional=True)

    # prepare download
    dataset.status['download'] = 'Preparing_download'
    Datasets.dataset_status(dsid, set=True, status=dataset.status)
    character_map = Datasets.dataset_character_map(dsid)
    prepare_TEI_download.apply_async(
        args=[dsid, dataset.xml_ml_out, tmp_file, character_map])
    return flask.make_response(
        {
            'msg': 'Dataset is preparing for download',
            'status': dataset.status['download']
        }, 200)
Esempio n. 3
0
def verify_user(token):
    if not token:
        raise InvalidUsage("No auth token provided.", status_code=401, enum="UNAUTHORIZED")

    uid = User.decode_auth_token(token)
    if isinstance(uid, str):
        raise InvalidUsage(uid, status_code=401, enum="UNAUTHORIZED")
    #elif is_blacklisted(engine, token):
    #    raise InvalidUsage('User logged out. Please log in again.', status_code=401, enum="UNAUTHORIZED")
    else:
        return uid
Esempio n. 4
0
def xf_delete_transform(xfid):
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    resp = controllers.delete_transform(id, xfid)
    if resp is None:
        raise InvalidUsage("Transformation does not exist.",
                           status_code=404,
                           enum="TRANSFORMATION_DOESNT_EXIST")
    elif not resp:
        raise InvalidUsage("You do not own this transformation",
                           status_code=401,
                           enum="UNAUTHORIZED")
    else:
        return flask.make_response({'deleted': xfid}, 200)
Esempio n. 5
0
def ml_run(dsid):
    """
    Dataset should be annotated at Lexonomy so we can download it and start ML process.
    ML statuses: Starting_ML -> ML_Format -> ML_Annotated -> Lex_Format
    Error statuses: Lex2ML_Error, ML_Error, ML2Lex_Error
    """
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    # get annotations first, so we get lex_xml path in db
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if dataset.status['annotate'] != 'Ready':
        raise InvalidUsage('File is not annotated at Lexonomy.',
                           status_code=409,
                           enum='STATUS_ERROR')
    get_lex_xml(uid, dsid)
    dataset = Datasets.list_datasets(uid, dsid=dsid)

    # deleting preview
    dataset.status['preview'] = None
    Datasets.dataset_add_ml_lexonomy_access(dsid)
    if dataset.lexonomy_ml_delete is not None:
        requests.post(dataset.lexonomy_ml_delete,
                      headers={
                          "Content-Type": 'application/json',
                          "Authorization": app.config['LEXONOMY_AUTH_KEY']
                      })

    if dataset.status['ml'] in ['Starting_ML', 'ML_Format', 'ML_Annotated']:
        raise InvalidUsage('ML is already running.',
                           status_code=409,
                           enum='STATUS_ERROR')
    print_log(app.name, '{} Starting ML'.format(dataset))
    dataset.status['ml'] = 'Starting_ML'
    Datasets.dataset_status(dsid, set=True, status=dataset.status)
    # Get files ready
    xml_raw = dataset.xml_file_path
    xml_ml_out = dataset.xml_lex[:-4] + '-ML_OUT.xml'
    Datasets.dataset_add_ml_paths(dsid,
                                  xml_lex=dataset.xml_lex,
                                  xml_ml_out=xml_ml_out)
    # Run ml
    task = run_pdf2lex_ml_scripts.apply_async(
        args=[uid, dsid, xml_raw, dataset.xml_lex, xml_ml_out], countdown=0)
    Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id)
    return flask.make_response(
        {
            'message': 'ok',
            'dsid': dsid,
            'status': dataset.status['ml']
        }, 200)
Esempio n. 6
0
def lexonomy_download(uid, dsid):
    if flask.request.headers.get('Authorization') != app.config['LEXONOMY_AUTH_KEY']:
        raise InvalidUsage("Shared secret is not valid!", status_code=401, enum='UNAUTHORIZED')

    ml = flask.request.args.get('ml', default="False", type=str) == "True"
    additional_pages = flask.request.args.get('add_pages', default="False", type=str) == "True"
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if ml:  # Set datasets status
        dataset.status['preview'] = 'Processing'
        Datasets.dataset_status(dsid, set=True, status=dataset.status)
    else:
        dataset.status['annotate'] = 'Processing'
        Datasets.dataset_status(dsid, set=True, status=dataset.status)

    temp_fname = dataset.xml_file_path.split(".xml")[0] + "-tmp.xml"

    @after_this_request
    def remove_file(response):
        os.remove(temp_fname)
        return response

    if ml:
        # Send ml file
        split_preview(dataset.xml_ml_out, temp_fname, 100)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_ml_out.split('/')[-1], as_attachment=True)

    elif not additional_pages:
        # Send first 20 pages file
        first_n_pages(dataset.xml_file_path, temp_fname, 20)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True)
    else:
        # Send additional 20 pages file
        additional_n_pages(dataset.xml_file_path, dataset.xml_lex, temp_fname, 20)
        return flask.send_file(temp_fname, attachment_filename=dataset.xml_file_path.split('/')[-1], as_attachment=True)
Esempio n. 7
0
def user_delete(userid):
    # THIS IS NOT USED AND IT DOESN'T WORK
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    if id != userid:
        raise InvalidUsage("User ids don't match", status_code=401, enum="UNAUTHORIZED")
    controllers.delete_user(engine, userid)
    return flask.make_response(jsonify({ 'message': 'OK'}), 200)
Esempio n. 8
0
def xf_update_transform(xfid):
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    xfspec = flask.request.json.get('xfspec', None)
    saved = flask.request.json.get('saved', False)
    name = flask.request.json.get('name', None)
    print_log(app.name, 'Update transform {}'.format(xfid))
    if xfspec is None:
        raise InvalidUsage("Invalid API call.",
                           status_code=422,
                           enum="POST_ERROR")
    rv = controllers.update_transform(xfid, xfspec, name, saved)
    return flask.make_response({'updated': rv}, 200)
Esempio n. 9
0
def delete_error_log(e_id):
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    user = User.query.filter_by(id=id).first()
    db.session.close()
    #user = controllers.user_data(db, id)
    if user is not None and not user.admin:
        raise InvalidUsage('User is not admin.',
                           status_code=401,
                           enum="UNAUTHORIZED")

    delete_error_log(db, e_id)
    return flask.make_response({'message': 'ok'}, 200)
Esempio n. 10
0
def xf_new_transform():
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)

    dsuuid = flask.request.json.get('dsuuid', None)
    dsid = flask.request.json.get('dsid', None)
    xfname = flask.request.json.get('xfname', None)
    entry_spec = flask.request.json.get('entry_spec', None)
    headword = flask.request.json.get('hw', None)
    saved = flask.request.json.get('saved', False)

    if dsuuid is None or xfname is None or dsid is None or entry_spec is None:
        raise InvalidUsage("Invalid API call.",
                           status_code=422,
                           enum="POST_ERROR")

    xfid = controllers.new_transform(xfname, dsid, entry_spec, headword, saved)
    isok, retmsg = controllers.prepare_dataset(id, dsid, xfid, entry_spec,
                                               headword)

    if not isok:
        raise InvalidUsage(retmsg, status_code=422, enum="POST_ERROR")
    return flask.make_response({'xfid': xfid}, 200)
Esempio n. 11
0
def login():
    # Sketch-engine login
    if 'sketch_token' in flask.request.json:
        user_data = User.decode_sketch_token(flask.request.json['sketch_token'])
        user = User.query.filter_by(sketch_engine_uid=user_data['id']).first()
        # check if ske user exists
        if user is None:
            user = User(user_data['email'], None, sketch_engine_uid=user_data['id'])
            db.session.add(user)
            db.session.commit()
            user = User.query.filter_by(sketch_engine_uid=user_data['id']).first()
            db.session.close()

    # Regular login
    else:
        # Check required fields
        for field in ['login', 'password']:
            if field not in flask.request.json:
                raise InvalidUsage("Field {0:s} is missing".format(field), status_code=422, enum='POST_ERROR')

        email = flask.request.json['login']
        password = flask.request.json['password']
        user = User.query.filter_by(email=email).first()
        db.session.close()

        if user is None or not user.check_password(password):
            # proper login handling if more time ... (?)
            raise InvalidUsage("Wrong password or user does not exist!", status_code=403, enum="LOGIN_ERROR")

    # Return auth token
    auth_token = user.get_auth_token()
    response = {
        'auth_token': auth_token,
        'username': user.username,
        'email': user.email,
    }
    return flask.make_response(jsonify(response), 200)
Esempio n. 12
0
def user_data():
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    user = User.query.filter_by(id=id).first()
    db.session.close()

    if user is not None:
        response = {
            'username': user.username,
            'email': user.email,
            'admin': user.admin
        }
        return flask.make_response(jsonify(response),200)
    else:
        raise InvalidUsage('Provide a valid auth token.', status_code=409, enum="INVALID_AUTH_TOKEN")
Esempio n. 13
0
def ml_preview(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    if dataset.status[
            'ml'] == 'Lex_Format' and dataset.xml_ml_out is None or dataset.xml_ml_out is '':
        raise InvalidUsage('No file for preview. Try running ML first.',
                           status_code=409,
                           enum='STATUS_ERROR')
    ds_sendML_to_lexonomy(uid, dsid)
    return flask.make_response(
        {
            'message': 'ok',
            'dsid': dsid,
            'status': dataset.status
        }, 200)
Esempio n. 14
0
def view_error_log(e_id):
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    user = User.query.filter_by(id=id).first()
    db.session.close()
    if user is not None and not user.admin:
        raise InvalidUsage('User is not admin.',
                           status_code=401,
                           enum="UNAUTHORIZED")

    log = get_error_log(db, e_id=e_id)

    dataset = Datasets.query.filter_by(id=log.dsid).first()
    pdf = flask.request.args.get('pdf', default=0, type=int) == 1
    xml_lex = flask.request.args.get('xml_lex', default=0, type=int) == 1
    xml_raw = flask.request.args.get('xml_raw', default=0, type=int) == 1

    if xml_raw:
        return flask.send_file(dataset.xml_file_path,
                               attachment_filename='{0}_xml_raw.xml'.format(
                                   dataset.id),
                               as_attachment=True)

    elif xml_lex:
        file_path = dataset.xml_file_path.split('.xml')[0] + '-LEX.xml'
        return flask.send_file(file_path,
                               attachment_filename='{0}_xml_lex.xml'.format(
                                   dataset.id),
                               as_attachment=True)

    elif pdf:
        file_path = dataset.file_path
        return flask.send_file(file_path,
                               attachment_filename='{0}_dictionary.pdf'.format(
                                   dataset.id),
                               as_attachment=True)

    # If no params, return log
    log.message = re.sub('\n', '<br/>', log.message)
    return flask.make_response(
        {
            'id': log.id,
            'dsid': log.dsid,
            'tag': log.tag,
            'message': log.message,
            'time': log.created_ts
        }, 200)
Esempio n. 15
0
def list_error_logs():
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    user = User.query.filter_by(id=id).first()
    db.session.close()
    if user is not None and not user.admin:
        raise InvalidUsage('User is not admin.',
                           status_code=401,
                           enum="UNAUTHORIZED")
    logs = get_error_log(db)
    logs = [{
        'id': log.id,
        'dsid': log.dsid,
        'tag': log.tag,
        'message': log.message,
        'time': log.created_ts
    } for log in logs]
    return flask.make_response({'logs': logs}, 200)
Esempio n. 16
0
def ds_machine_learning(dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)

    xml_format = flask.request.args.get('xml_format', default=None,
                                        type=str) == 'True'
    get_file = flask.request.args.get('get_file', default=None,
                                      type=str) == 'True'
    run_ml = flask.request.args.get('run_ml', default=None, type=str) == 'True'
    send_file = flask.request.args.get('send_file', default=None,
                                       type=str) == 'True'

    # TODO: Save paths to DB
    dataset = Datasets.list_datasets(uid, dsid=dsid)
    xml_lex = dataset.xml_lex
    xml_raw = dataset.xml_file_path
    print('xml_lex:', xml_lex, 'xml_raw:', xml_raw)

    if xml_lex == None:
        xml_ml_out = None
    else:
        xml_ml_out = xml_lex[:-4] + "-ML_OUT.xml"
    Datasets.dataset_add_ml_paths(dsid,
                                  xml_lex=dataset.xml_lex,
                                  xml_ml_out=xml_ml_out)

    # Check if all params are None
    if xml_format is None and get_file is None and run_ml is None and send_file is None:
        raise InvalidUsage("Invalid API call. No params.",
                           status_code=422,
                           enum="GET_ERROR")
    # Check if to many params
    elif xml_format and (get_file or run_ml or send_file):
        raise InvalidUsage("Invalid API call. Can't work on file and send it.",
                           status_code=422,
                           enum="GET_ERROR")

    dataset.ml_task_id = Datasets.dataset_ml_task_id(dsid)
    status = dataset.status

    # Check if dataset has ml_task, then send status
    if dataset.ml_task_id:
        return flask.make_response(
            {
                "message": "File is still processing.",
                "dsid": dsid,
                "Status": status
            }, 200)

    # Check if user wants file and then return it
    if xml_format and status not in [
            'Starting_ML', 'ML_Format', 'ML_Annotated', 'Lex2ML_Error',
            'ML_Error', 'ML2Lex_Error'
    ]:
        # TODO: get the latest annotated version from Lexonomy
        Datasets.update_dataset_status(dsid, 'Preparing_download')
        tmp_file = xml_ml_out.split(".xml")[0] + "_TEI.xml"
        character_map = Datasets.dataset_character_map(dsid)
        prepare_TEI_download(dsid, xml_ml_out, tmp_file, character_map)
        #tokenized2TEI(dsid, xml_ml_out, tmp_file, character_map)

        @after_this_request
        def after(response):
            response.headers['x-suggested-filename'] = filename
            response.headers.add('Access-Control-Expose-Headers', '*')
            Datasets.update_dataset_status(dsid, 'Lex_Format')
            os.remove(tmp_file)
            return response

        filename = dataset.name.split('.')[0] + '-transformed.xml'
        return flask.send_file(tmp_file,
                               attachment_filename=filename,
                               as_attachment=True)
    elif xml_format:
        raise InvalidUsage("File is not ready. Try running ML again",
                           status_code=202,
                           enum="STATUS_ERROR")

    # Run ML scripts
    if get_file:  # Get file from Lexonomy
        status = "Lexonomy_Annotated"
        get_lex_xml(uid, dsid)
        Datasets.update_dataset_status(dsid, status)

    elif run_ml:
        status = "Starting_ML"
        Datasets.update_dataset_status(dsid, status)
        task = run_pdf2lex_ml_scripts.apply_async(
            args=[uid, dsid, xml_raw, xml_lex, xml_ml_out], countdown=0)
        Datasets.dataset_ml_task_id(dsid, set=True, task_id=task.id)

    elif send_file:  # Send file to Lexonomy
        # stauts = "ML_Annotated_@Lexonomy"
        ds_sendML_to_lexonomy(uid, dsid)

    return flask.make_response(
        {
            "message": "OK",
            "dsid": dsid,
            "Status": status
        }, 200)
Esempio n. 17
0
def ds_download2(xfid, dsid):
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)
    status = controllers.transformer_download_status(xfid)

    get_status = flask.request.args.get('status', default='false',
                                        type=str) == 'true'

    if get_status:
        return flask.make_response({'status': status}, 200)

    elif status is None:
        print_log(
            app.name,
            'Transformed dataset download started uid: {0:s}, xfid: {1:s} , dsid: {2:s}'
            .format(str(uid), str(xfid), str(dsid)))
        strip_ns = flask.request.args.get('strip_ns',
                                          default='false',
                                          type=str) == 'true'
        strip_header = flask.request.args.get('strip_header',
                                              default='false',
                                              type=str) == 'true'
        strip_DictScrap = flask.request.args.get('strip_DictScrap',
                                                 default='false',
                                                 type=str) == 'true'
        strip_DictScrap = strip_ns  # TODO: remove this, when added to FE

        # Check if transformer exists
        try:
            transform = controllers.list_transforms(dsid, xfid=xfid)
            xf = transform.transform
        except:
            raise InvalidUsage('Transformer does not exist.', status_code=409)

        if xf is None:  # Not sure why this is needed here?
            return flask.make_response(
                {
                    'spec': None,
                    'entity_xml': None,
                    'output': None
                }, 200)
        else:
            # start download task
            prepare_download.apply_async(args=[
                uid, xfid, dsid, strip_ns, strip_header, strip_DictScrap
            ],
                                         countdown=0)
            status = 'Processing'
            controllers.transformer_download_status(xfid,
                                                    set=True,
                                                    download_status=status)

    elif status == "Processing":
        return flask.make_response({'message': 'File is still processing'},
                                   200)

    elif status == "Ready":
        print_log(
            app.name,
            'Transformed dataset download finished uid: {0:s}, xfid: {1:s} , dsid: {2:s}'
            .format(str(uid), str(xfid), str(dsid)))
        # return file and delete afterwards
        dataset = Datasets.list_datasets(uid, dsid=dsid)
        file_name, file_type = dataset.name.split('.')
        target_file_name = file_name + '_' + str(xfid) + '_TEI.' + file_type
        target_path = os.path.join(app.config['APP_MEDIA'], target_file_name)

        @after_this_request
        def remove_file(response):
            response.headers['x-suggested-filename'] = out_name
            response.headers.add('Access-Control-Expose-Headers', '*')
            os.remove(target_path)
            return response

        controllers.transformer_download_status(xfid, set=True)  # reset status
        transform_name = controllers.list_transforms(dsid, xfid=xfid).name
        out_name = dataset.name[:-4] + '-' + transform_name + '.xml'
        return flask.send_file(target_path,
                               attachment_filename=out_name,
                               as_attachment=True)

    return flask.make_response({'message': 'ok', 'status': status}, 200)
Esempio n. 18
0
def ds_upload_new_dataset():
    token = flask.request.headers.get('Authorization')
    uid = verify_user(token)

    # file
    metadata = flask.request.form.get('metadata', None)
    dictname = flask.request.files.get('dictname', None)
    file_content = flask.request.files.get('file', None)
    total_filesize = flask.request.form.get('dztotalfilesize', None)
    dzuuid = flask.request.form.get('dzuuid', None)

    current_chunk = int(flask.request.form.get('dzchunkindex'))
    total_chunks = int(flask.request.form.get('dztotalchunkcount', None))
    chunk_offset = int(flask.request.form.get('dzchunkbyteoffset', None))

    # get file extension
    try:
        orig_filename = file_content.filename
        extension = '.' + file_content.filename.split('.')[-1]
    except AttributeError:
        orig_filename = 'Dictionary'
        extension = '.xml'

    filename = "tempFile_USER-{0:s}".format(str(uid)) + extension
    filepath = os.path.join(app.config['APP_MEDIA'], secure_filename(filename))

    if os.path.exists(filepath) and current_chunk == 0:
        os.remove(filepath)
        raise InvalidUsage('File already exists.',
                           status_code=400,
                           enum='FILE_EXISTS')

    try:  # write to file
        with open(filepath, 'ab') as f:
            f.seek(chunk_offset)
            f.write(file_content.stream.read())
    except OSError:
        raise InvalidUsage(
            "Not sure why, but we couldn't write the file to disk.",
            status_code=500,
            enum="FILE_ERROR")

    if current_chunk != total_chunks:
        return flask.make_response(
            jsonify({
                'status': 'OK',
                'filename': filename,
                'current_chunk': current_chunk,
                'total_chunks': total_chunks
            }), 200)
    else:  # finish upload
        if os.path.getsize(filepath) != int(total_filesize):
            os.remove(filepath)
            raise InvalidUsage("Size mismatch.",
                               status_code=500,
                               enum="FILE_ERROR")
        else:
            new_random_name = generate_filename(filename)
            new_path = os.path.join(app.config['APP_MEDIA'],
                                    secure_filename(new_random_name))
            os.rename(filepath, new_path)
            dsid = controllers.add_dataset(db, uid, total_filesize,
                                           orig_filename, new_path, dzuuid)
            controllers.dataset_metadata(dsid, set=True, metadata=metadata)

            # prepare dataset
            dataset = controllers.list_datasets(uid, dsid)
            if "pdf" in dataset.upload_mimetype:
                controllers.transform_pdf2xml.apply_async(args=[dsid])
            else:
                controllers.clean_empty_namespace(dsid)
                controllers.map_xml_tags.apply_async(args=[dsid])
        return flask.make_response(Datasets.to_dict(dataset), 200)
Esempio n. 19
0
def ds_dataset_preview(dsid):
    raise InvalidUsage('Not implemented', status_code=501)
    pass
Esempio n. 20
0
def ds_rename_dataset(dsid):
    token = flask.request.headers.get('Authorization')
    id = verify_user(token)
    raise InvalidUsage('Not implemented', status_code=501)
    pass