Ejemplo n.º 1
0
        def load_data(name, *args, **kwargs):
            """Load data from file, URL, or plugin.

            Parameters
            ----------
            name: str, pathlib.Path
                File path, URL, or alias of extension dataset.

            Returns
            -------
            asreview.ASReviewData:
                Inititalized ASReview data object.
            """

            # check is file or URL
            if Path(name).exists() or is_url(name):
                return ASReviewData.from_file(name, *args, **kwargs)

            # check if dataset is plugin dataset\
            try:
                dataset_path = DatasetManager().find(name).get()
                return ASReviewData.from_file(dataset_path, *args, **kwargs)
            except DataSetNotFoundError:
                pass

            # Could not find dataset, return None.
            raise FileNotFoundError(
                f"File, URL, or dataset does not exist: '{name}'")
Ejemplo n.º 2
0
def read_data(project_id, use_cache=True, save_cache=True):
    """Get ASReviewData object from file.

    Parameters
    ----------
    project_id: str, iterable
        The project identifier.
    use_cache: bool
        Use the pickle file if available.
    save_cache: bool
        Save the file to a pickle file if not available.

    Returns
    -------
    ASReviewData:
        The data object for internal use in ASReview.

    """

    # use cache file
    if use_cache:
        try:
            return _read_data_from_cache(project_id)
        except CacheDataError:
            pass

    # load from file
    fp_data = get_data_file_path(project_id)
    data_obj = ASReviewData.from_file(fp_data)

    # save a pickle version
    if save_cache:
        _write_data_to_cache(project_id, data_obj)

    return data_obj
Ejemplo n.º 3
0
def read_data(project_id, save_tmp=True):
    """Get ASReviewData object from file.

    Parameters
    ----------
    project_id: str, iterable
        The project identifier.
    save_tmp: bool
        Save the file to a pickle file if not available.

    Returns
    -------
    ASReviewData:
        The data object for internal use in ASReview.

    """
    fp_data = get_data_file_path(project_id)
    fp_data_pickle = Path(fp_data).with_suffix(fp_data.suffix + ".pickle")

    try:
        # get the pickle data
        with open(fp_data_pickle, 'rb') as f_pickle_read:
            data_obj = pickle.load(f_pickle_read)
        return data_obj
    except FileNotFoundError:
        # file not available
        data_obj = ASReviewData.from_file(fp_data)
    except pickle.PickleError:
        # problem loading pickle file
        # remove the pickle file
        os.remove(fp_data_pickle)

        data_obj = ASReviewData.from_file(fp_data)

    # save a pickle version
    if save_tmp:
        logging.info("Store a copy of the data in a pickle file.")
        with open(fp_data_pickle, 'wb') as f_pickle:
            pickle.dump(data_obj, f_pickle)

    return data_obj
Ejemplo n.º 4
0
def test_no_seed():
    n_test_max = 100
    as_data = ASReviewData.from_file(data_fp)
    n_priored = np.zeros(len(as_data), dtype=int)

    for _ in range(n_test_max):
        reviewer = get_reviewer(data_fp,
                                mode="simulate",
                                model="nb",
                                state_file=None,
                                init_seed=None,
                                n_prior_excluded=1,
                                n_prior_included=1)
        assert len(reviewer.start_idx) == 2
        n_priored[reviewer.start_idx] += 1
        if np.all(n_priored > 0):
            return
    raise ValueError(f"Error getting all priors in {n_test_max} iterations.")
Ejemplo n.º 5
0
def read_data(project_id):
    """Get ASReviewData object of the dataset"""
    dataset = get_data_file_path(project_id)
    return ASReviewData.from_file(dataset)
Ejemplo n.º 6
0
def create_as_data(dataset,
                   included_dataset=[],
                   excluded_dataset=[],
                   prior_dataset=[],
                   new=False):
    """Create ASReviewData object from multiple datasets."""
    if isinstance(dataset, (str, PurePath)):
        dataset = [dataset]

    if isinstance(included_dataset, (str, PurePath)):
        included_dataset = [included_dataset]

    if isinstance(excluded_dataset, (str, PurePath)):
        excluded_dataset = [excluded_dataset]

    if isinstance(prior_dataset, (str, PurePath)):
        prior_dataset = [prior_dataset]

    as_data = ASReviewData()
    # Find the URL of the datasets if the dataset is an example dataset.
    for data in dataset:
        as_data.append(ASReviewData.from_file(find_data(data)))

    if new:
        as_data.labels = np.full((len(as_data), ), LABEL_NA, dtype=int)
    for data in included_dataset:
        as_data.append(
            ASReviewData.from_file(find_data(data), data_type="included"))
    for data in excluded_dataset:
        as_data.append(
            ASReviewData.from_file(find_data(data), data_type="excluded"))
    for data in prior_dataset:
        as_data.append(
            ASReviewData.from_file(find_data(data), data_type="prior"))
    return as_data