def load_data(name, *args, **kwargs): """Load data from file, URL, or plugin. Parameters ---------- name: str, pathlib.Path File path, URL, or alias of extension dataset. Returns ------- asreview.ASReviewData: Inititalized ASReview data object. """ # check is file or URL if Path(name).exists() or is_url(name): return ASReviewData.from_file(name, *args, **kwargs) # check if dataset is plugin dataset\ try: dataset_path = DatasetManager().find(name).get() return ASReviewData.from_file(dataset_path, *args, **kwargs) except DataSetNotFoundError: pass # Could not find dataset, return None. raise FileNotFoundError( f"File, URL, or dataset does not exist: '{name}'")
def from_file(cls, fp, read_fn=None, data_name=None, data_type=None): """Create instance from csv/ris/excel file. It works in two ways; either manual control where the conversion functions are supplied or automatic, where it searches in the entry points for the right conversion functions. Arguments --------- fp: str, pathlib.Path Read the data from this file. read_fn: callable Function to read the file. It should return a standardized dataframe. data_name: str Name of the data. data_type: str What kind of data it is. Special names: 'included', 'excluded', 'prior'. """ if is_url(fp): path = urlparse(fp).path new_data_name = Path(path.split("/")[-1]).stem else: path = str(Path(fp).resolve()) new_data_name = Path(fp).stem if data_name is None: data_name = new_data_name if read_fn is not None: return cls(read_fn(fp), data_name=data_name, data_type=data_type) entry_points = { entry.name: entry for entry in pkg_resources.iter_entry_points('asreview.readers') } best_suffix = None for suffix, entry in entry_points.items(): if path.endswith(suffix): if best_suffix is None or len(suffix) > len(best_suffix): best_suffix = suffix if best_suffix is None: raise ValueError(f"Error reading file {fp}, no capabilities for " "reading such a file.") read_fn = entry_points[best_suffix].load() df, column_spec = read_fn(fp) return cls(df, column_spec=column_spec, data_name=data_name, data_type=data_type)
def from_config(cls, config_file): if is_url(config_file): with urlopen(config_file) as f: config = json.loads(f.read().decode()) elif isinstance(config_file, dict): config = config_file else: with open(config_file, "r") as f: config = json.load(f) dataset = cls() for attr, val in config.items(): setattr(dataset, attr, val) return dataset
def from_config(cls, config_file): """Create DataSet from a JSON configuration file. Parameters ---------- config_file: str, dict Can be a link to a config file or one on the disk. Another option is to supply a dictionary with the metadata. """ if is_url(config_file): with urlopen(config_file) as f: config = json.loads(f.read().decode()) elif isinstance(config_file, dict): config = config_file else: with open(config_file, "r") as f: config = json.load(f) # Set the attributes of the dataset. dataset = cls() for attr, val in config.items(): setattr(dataset, attr, val) return dataset