def load_data(name, *args, **kwargs): """Load data from file, URL, or plugin. Parameters ---------- name: str, pathlib.Path File path, URL, or alias of extension dataset. Returns ------- asreview.ASReviewData: Inititalized ASReview data object. """ # check is file or URL if Path(name).exists() or is_url(name): return ASReviewData.from_file(name, *args, **kwargs) # check if dataset is plugin dataset\ try: dataset_path = DatasetManager().find(name).get() return ASReviewData.from_file(dataset_path, *args, **kwargs) except DataSetNotFoundError: pass # Could not find dataset, return None. raise FileNotFoundError( f"File, URL, or dataset does not exist: '{name}'")
def read_data(project_id, use_cache=True, save_cache=True): """Get ASReviewData object from file. Parameters ---------- project_id: str, iterable The project identifier. use_cache: bool Use the pickle file if available. save_cache: bool Save the file to a pickle file if not available. Returns ------- ASReviewData: The data object for internal use in ASReview. """ # use cache file if use_cache: try: return _read_data_from_cache(project_id) except CacheDataError: pass # load from file fp_data = get_data_file_path(project_id) data_obj = ASReviewData.from_file(fp_data) # save a pickle version if save_cache: _write_data_to_cache(project_id, data_obj) return data_obj
def read_data(project_id, save_tmp=True): """Get ASReviewData object from file. Parameters ---------- project_id: str, iterable The project identifier. save_tmp: bool Save the file to a pickle file if not available. Returns ------- ASReviewData: The data object for internal use in ASReview. """ fp_data = get_data_file_path(project_id) fp_data_pickle = Path(fp_data).with_suffix(fp_data.suffix + ".pickle") try: # get the pickle data with open(fp_data_pickle, 'rb') as f_pickle_read: data_obj = pickle.load(f_pickle_read) return data_obj except FileNotFoundError: # file not available data_obj = ASReviewData.from_file(fp_data) except pickle.PickleError: # problem loading pickle file # remove the pickle file os.remove(fp_data_pickle) data_obj = ASReviewData.from_file(fp_data) # save a pickle version if save_tmp: logging.info("Store a copy of the data in a pickle file.") with open(fp_data_pickle, 'wb') as f_pickle: pickle.dump(data_obj, f_pickle) return data_obj
def test_no_seed(): n_test_max = 100 as_data = ASReviewData.from_file(data_fp) n_priored = np.zeros(len(as_data), dtype=int) for _ in range(n_test_max): reviewer = get_reviewer(data_fp, mode="simulate", model="nb", state_file=None, init_seed=None, n_prior_excluded=1, n_prior_included=1) assert len(reviewer.start_idx) == 2 n_priored[reviewer.start_idx] += 1 if np.all(n_priored > 0): return raise ValueError(f"Error getting all priors in {n_test_max} iterations.")
def read_data(project_id): """Get ASReviewData object of the dataset""" dataset = get_data_file_path(project_id) return ASReviewData.from_file(dataset)
def create_as_data(dataset, included_dataset=[], excluded_dataset=[], prior_dataset=[], new=False): """Create ASReviewData object from multiple datasets.""" if isinstance(dataset, (str, PurePath)): dataset = [dataset] if isinstance(included_dataset, (str, PurePath)): included_dataset = [included_dataset] if isinstance(excluded_dataset, (str, PurePath)): excluded_dataset = [excluded_dataset] if isinstance(prior_dataset, (str, PurePath)): prior_dataset = [prior_dataset] as_data = ASReviewData() # Find the URL of the datasets if the dataset is an example dataset. for data in dataset: as_data.append(ASReviewData.from_file(find_data(data))) if new: as_data.labels = np.full((len(as_data), ), LABEL_NA, dtype=int) for data in included_dataset: as_data.append( ASReviewData.from_file(find_data(data), data_type="included")) for data in excluded_dataset: as_data.append( ASReviewData.from_file(find_data(data), data_type="excluded")) for data in prior_dataset: as_data.append( ASReviewData.from_file(find_data(data), data_type="prior")) return as_data