コード例 #1
0
ファイル: api.py プロジェクト: MathieuRietman/asreview
def api_set_algorithms(project_id):  # noqa: F401

    # check if there is a kwargs file
    try:
        # open the projects file
        with open(get_kwargs_path(project_id), "r") as f_read:
            kargs_dict = json.load(f_read)

    except FileNotFoundError:
        # set the kwargs dict to setup kwargs
        kargs_dict = deepcopy(app.config['asr_kwargs'])

    # add the machine learning model to the kwargs
    # TODO@{Jonathan} validate model choice on server side
    ml_model = request.form.get("model", None)
    ml_query_strategy = request.form.get("query_strategy", None)
    ml_feature_extraction = request.form.get("feature_extraction", None)
    if ml_model:
        kargs_dict["model"] = ml_model
    if ml_query_strategy:
        kargs_dict["query_strategy"] = ml_query_strategy
    if ml_feature_extraction:
        kargs_dict["feature_extraction"] = ml_feature_extraction

    # write the kwargs to a file
    with open(get_kwargs_path(project_id), "w") as f_write:
        json.dump(kargs_dict, f_write)

    response = jsonify({'success': True})
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
コード例 #2
0
ファイル: api.py プロジェクト: MathieuRietman/asreview
def api_get_project_info(project_id):  # noqa: F401
    """Get info on the article"""

    logging.info("get project info")

    try:

        # read the file with project info
        with open(get_project_file_path(project_id), "r") as fp:

            project_info = json.load(fp)

        # check if there is a dataset
        try:
            get_data_file_path(project_id)
            project_info["projectHasDataset"] = True
        except Exception:
            project_info["projectHasDataset"] = False

        # check if there is a prior knowledge (check if there is a model set),
        # if this is the case, the reviewer past the prior knowledge screen.
        project_info["projectHasPriorKnowledge"] = \
            get_kwargs_path(project_id).exists()

        # check if there is a prior knowledge (check if there is a model set),
        # if this is the case, the reviewer past the prior knowledge screen.
        project_info["projectHasAlgorithms"] = \
            get_kwargs_path(project_id).exists()

        # backwards support <0.10
        if "projectInitReady" not in project_info:
            if project_info["projectHasPriorKnowledge"]:
                project_info["projectInitReady"] = True
            else:
                project_info["projectInitReady"] = False

        response = jsonify(project_info)

    except FileNotFoundError as err:
        logging.error(err)
        response = jsonify(message="Project not found.")

        return response, 400

    except Exception as err:
        logging.error(err)
        response = jsonify(message="Internal Server Error.")

        return response, 500

    return response
コード例 #3
0
ファイル: api.py プロジェクト: openefsa/asreview
def api_start(project_id):  # noqa: F401
    """Start training the model
    """

    # get the CLI arguments
    asr_kwargs = deepcopy(app.config['asr_kwargs'])

    # add the machine learning model to the kwargs
    # TODO@{Jonathan} validate model choice on server side
    ml_model = request.form.get("machine_learning_model", None)
    if ml_model:
        asr_kwargs["model"] = ml_model

    # write the kwargs to a file
    with open(get_kwargs_path(project_id), "w") as fp:
        json.dump(asr_kwargs, fp)

    # start training the model

    py_exe = _get_executable()
    run_command = [
        py_exe,
        "-m", "asreview",
        "web_run_model",
        project_id,
        "--label_method",
        "prior"
    ]
    subprocess.Popen(run_command)

    response = jsonify({'success': True})
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
コード例 #4
0
ファイル: api.py プロジェクト: jm4rc05/asreview
def api_get_project_info(project_id):  # noqa: F401
    """Get info on the article"""

    try:

        # read the file with project info
        with open(get_project_file_path(project_id), "r") as fp:

            project_info = json.load(fp)

        # check if there is a dataset
        try:
            get_data_file_path(project_id)
            project_info["projectHasDataset"] = True
        except Exception:
            project_info["projectHasDataset"] = False

        # check if there is a prior knowledge (check if there is a model set),
        # if this is the case, the reviewer past the prior knowledge screen.
        project_info["projectHasPriorKnowledge"] = \
            get_kwargs_path(project_id).exists()

        # check if there is a prior knowledge (check if there is a model set),
        # if this is the case, the reviewer past the prior knowledge screen.
        project_info["projectHasAlgorithms"] = \
            get_kwargs_path(project_id).exists()

        # backwards support <0.10
        if "projectInitReady" not in project_info:
            if project_info["projectHasPriorKnowledge"]:
                project_info["projectInitReady"] = True
            else:
                project_info["projectInitReady"] = False

    except FileNotFoundError:
        raise ProjectNotFoundError()

    return jsonify(project_info)
コード例 #5
0
ファイル: api.py プロジェクト: MathieuRietman/asreview
def api_get_algorithms(project_id):  # noqa: F401

    # check if there is a kwargs file
    try:
        # open the projects file
        with open(get_kwargs_path(project_id), "r") as f_read:
            kargs_dict = json.load(f_read)

    except FileNotFoundError:
        # set the kwargs dict to setup kwargs
        kargs_dict = deepcopy(app.config['asr_kwargs'])

    response = jsonify(kargs_dict)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
コード例 #6
0
ファイル: project.py プロジェクト: mao-wang/asreview
def init_project(project_id,
                 project_name=None,
                 project_description=None,
                 project_authors=None):
    """Initialize the necessary files specific to the web app."""

    if not project_id and not isinstance(project_id, str) \
            and len(project_id) >= 3:
        raise ValueError("Project name can't be None or empty string")

    # get the directory with the projects
    project_dir = asreview_path() / project_id

    if project_dir.exists():
        raise ValueError("Project already exists")

    try:
        project_dir.mkdir()

        fp_data = project_dir / "data"
        fp_data.mkdir()

        # create a file with project info
        with open(get_project_file_path(project_id), "w") as fp:
            json.dump(
                {
                    'version': asreview_version,  # todo: Fail without git?
                    'id': project_id,
                    'name': project_name,
                    'description': project_description,
                    'authors': project_authors
                },
                fp)

        asr_kwargs = deepcopy(app.config['asr_kwargs'])  # remove config
        with open(get_kwargs_path(project_id), "w") as fp:
            json.dump(asr_kwargs, fp)

        # make a copy of the arguments to the state file
        asr_kwargs['state_file'] = str(get_state_path(project_id))

    except Exception as err:
        # remove all generated folders and raise error
        shutil.rmtree(project_dir)
        raise err
コード例 #7
0
ファイル: api.py プロジェクト: jm4rc05/asreview
def api_get_algorithms(project_id):  # noqa: F401

    # check if there is a kwargs file
    try:
        # open the projects file
        with open(get_kwargs_path(project_id), "r") as f_read:
            kwargs_dict = json.load(f_read)

    except FileNotFoundError:
        # set the kwargs dict to setup kwargs
        kwargs_dict = deepcopy(app.config['asr_kwargs'])
        kwargs_dict["model"] = DEFAULT_MODEL
        kwargs_dict["feature_extraction"] = DEFAULT_FEATURE_EXTRACTION
        kwargs_dict["query_strategy"] = DEFAULT_QUERY_STRATEGY
        kwargs_dict["balance_strategy"] = DEFAULT_BALANCE_STRATEGY
        kwargs_dict["n_instances"] = DEFAULT_N_INSTANCES

    response = jsonify(kwargs_dict)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
コード例 #8
0
def train_model(project_id, label_method=None):
    """Add the new labels to the review and do the modeling.

    It uses a lock to ensure only one model is running at the same time.
    Old results directories are deleted after 4 iterations.

    It has one argument on the CLI, which is the base project directory.
    """

    logging.info(f"Project {project_id} - Train a new model for project")

    # get file locations
    asr_kwargs_file = get_kwargs_path(project_id)
    lock_file = get_lock_path(project_id)

    # Lock so that only one training run is running at the same time.
    # It doesn't lock the flask server/client.
    with SQLiteLock(lock_file,
                    blocking=False,
                    lock_name="training",
                    project_id=project_id) as lock:

        # If the lock is not acquired, another training instance is running.
        if not lock.locked():
            logging.info("Project {project_id} - "
                         "Cannot acquire lock, other instance running.")
            return

        # Lock the current state. We want to have a consistent active state.
        # This does communicate with the flask backend; it prevents writing and
        # reading to the same files at the same time.
        with SQLiteLock(lock_file,
                        blocking=True,
                        lock_name="active",
                        project_id=project_id) as lock:
            # Get the all labels since last run. If no new labels, quit.
            new_label_history = read_label_history(project_id)

        data_fp = str(get_data_file_path(project_id))
        as_data = read_data(project_id)
        state_file = get_state_path(project_id)

        # collect command line arguments and pass them to the reviewer
        with open(asr_kwargs_file, "r") as fp:
            asr_kwargs = json.load(fp)
        asr_kwargs['state_file'] = str(state_file)
        reviewer = get_reviewer(dataset=data_fp, mode="minimal", **asr_kwargs)

        with open_state(state_file) as state:
            old_label_history = get_label_train_history(state)

        diff_history = get_diff_history(new_label_history, old_label_history)

        if len(diff_history) == 0:
            logging.info(
                "Project {project_id} - No new labels since last run.")
            return

        query_idx = np.array([x[0] for x in diff_history], dtype=int)
        inclusions = np.array([x[1] for x in diff_history], dtype=int)

        # Classify the new labels, train and store the results.
        with open_state(state_file) as state:
            reviewer.classify(query_idx,
                              inclusions,
                              state,
                              method=label_method)
            reviewer.train()
            reviewer.log_probabilities(state)
            new_query_idx = reviewer.query(reviewer.n_pool()).tolist()
            reviewer.log_current_query(state)
            proba = state.pred_proba.tolist()

        with SQLiteLock(lock_file,
                        blocking=True,
                        lock_name="active",
                        project_id=project_id) as lock:
            current_pool = read_pool(project_id)
            in_current_pool = np.zeros(len(as_data))
            in_current_pool[current_pool] = 1
            new_pool = [x for x in new_query_idx if in_current_pool[x]]
            write_pool(project_id, new_pool)
            write_proba(project_id, proba)
コード例 #9
0
def train_model(project_id, label_method=None):
    """Add the new labels to the review and do the modeling.

    It uses a lock to ensure only one model is running at the same time.
    Old results directories are deleted after 4 iterations.

    It has one argument on the CLI, which is the base project directory.
    """

    logging.info(f"Project {project_id} - Train a new model for project")

    # get file locations
    asr_kwargs_file = get_kwargs_path(project_id)
    lock_file = get_lock_path(project_id)

    # Lock so that only one training run is running at the same time.
    # It doesn't lock the flask server/client.
    with SQLiteLock(
            lock_file, blocking=False, lock_name="training",
            project_id=project_id) as lock:

        # If the lock is not acquired, another training instance is running.
        if not lock.locked():
            logging.info("Project {project_id} - "
                         "Cannot acquire lock, other instance running.")
            return

        # Lock the current state. We want to have a consistent active state.
        # This does communicate with the flask backend; it prevents writing and
        # reading to the same files at the same time.
        with SQLiteLock(
                lock_file,
                blocking=True,
                lock_name="active",
                project_id=project_id) as lock:
            # Get the all labels since last run. If no new labels, quit.
            new_label_history = read_label_history(project_id)

        data_fp = str(get_data_file_path(project_id))
        as_data = read_data(project_id)
        state_file = get_state_path(project_id)

        # collect command line arguments and pass them to the reviewer
        with open(asr_kwargs_file, "r") as fp:
            asr_kwargs = json.load(fp)

        try:
            del asr_kwargs["abstract_only"]
        except KeyError:
            pass

        asr_kwargs['state_file'] = str(state_file)
        reviewer = get_reviewer(dataset=data_fp, mode="minimal", **asr_kwargs)

        with open_state(state_file) as state:
            old_label_history = _get_label_train_history(state)

        diff_history = _get_diff_history(new_label_history, old_label_history)

        if len(diff_history) == 0:
            logging.info(
                "Project {project_id} - No new labels since last run.")
            return

        query_record_ids = np.array([x[0] for x in diff_history], dtype=int)
        inclusions = np.array([x[1] for x in diff_history], dtype=int)

        query_idx = convert_id_to_idx(as_data, query_record_ids)

        # Classify the new labels, train and store the results.
        with open_state(state_file) as state:
            reviewer.classify(
                query_idx, inclusions, state, method=label_method)
            reviewer.train()
            reviewer.log_probabilities(state)
            new_query_idx = reviewer.query(reviewer.n_pool()).tolist()
            reviewer.log_current_query(state)

            # write the proba to a pandas dataframe with record_ids as index
            proba = pd.DataFrame(
                {"proba": state.pred_proba.tolist()},
                index=pd.Index(as_data.record_ids, name="record_id")
            )

        # update the pool and output the proba's
        # important: pool is sorted on query
        with SQLiteLock(
                lock_file,
                blocking=True,
                lock_name="active",
                project_id=project_id) as lock:

            # read the pool
            current_pool = read_pool(project_id)

            # diff pool and new_query_ind
            current_pool_idx = convert_id_to_idx(as_data, current_pool)
            current_pool_idx = frozenset(current_pool_idx)
            new_pool_idx = [x for x in new_query_idx if x in current_pool_idx]

            # convert new_pool_idx back to record_ids
            new_pool = convert_idx_to_id(as_data, new_pool_idx)

            # write the pool and proba
            write_pool(project_id, new_pool)
            write_proba(project_id, proba)