예제 #1
0
        def load_data(name, *args, **kwargs):
            """Load data from file, URL, or plugin.

            Parameters
            ----------
            name: str, pathlib.Path
                File path, URL, or alias of extension dataset.

            Returns
            -------
            asreview.ASReviewData:
                Inititalized ASReview data object.
            """

            # check is file or URL
            if Path(name).exists() or is_url(name):
                return ASReviewData.from_file(name, *args, **kwargs)

            # check if dataset is plugin dataset\
            try:
                dataset_path = DatasetManager().find(name).get()
                return ASReviewData.from_file(dataset_path, *args, **kwargs)
            except DataSetNotFoundError:
                pass

            # Could not find dataset, return None.
            raise FileNotFoundError(
                f"File, URL, or dataset does not exist: '{name}'")
예제 #2
0
파일: base.py 프로젝트: personx000/asreview
    def from_file(cls, fp, read_fn=None, data_name=None, data_type=None):
        """Create instance from csv/ris/excel file.

        It works in two ways; either manual control where the conversion
        functions are supplied or automatic, where it searches in the entry
        points for the right conversion functions.

        Arguments
        ---------
        fp: str, pathlib.Path
            Read the data from this file.
        read_fn: callable
            Function to read the file. It should return a standardized
            dataframe.
        data_name: str
            Name of the data.
        data_type: str
            What kind of data it is. Special names: 'included', 'excluded',
            'prior'.
        """
        if is_url(fp):
            path = urlparse(fp).path
            new_data_name = Path(path.split("/")[-1]).stem
        else:
            path = str(Path(fp).resolve())
            new_data_name = Path(fp).stem

        if data_name is None:
            data_name = new_data_name

        if read_fn is not None:
            return cls(read_fn(fp), data_name=data_name, data_type=data_type)

        entry_points = {
            entry.name: entry
            for entry in pkg_resources.iter_entry_points('asreview.readers')
        }
        best_suffix = None
        for suffix, entry in entry_points.items():
            if path.endswith(suffix):
                if best_suffix is None or len(suffix) > len(best_suffix):
                    best_suffix = suffix

        if best_suffix is None:
            raise ValueError(f"Error reading file {fp}, no capabilities for "
                             "reading such a file.")

        read_fn = entry_points[best_suffix].load()
        df, column_spec = read_fn(fp)
        return cls(df,
                   column_spec=column_spec,
                   data_name=data_name,
                   data_type=data_type)
예제 #3
0
    def from_config(cls, config_file):
        if is_url(config_file):
            with urlopen(config_file) as f:
                config = json.loads(f.read().decode())
        elif isinstance(config_file, dict):
            config = config_file
        else:
            with open(config_file, "r") as f:
                config = json.load(f)

        dataset = cls()
        for attr, val in config.items():
            setattr(dataset, attr, val)
        return dataset
예제 #4
0
    def from_config(cls, config_file):
        """Create DataSet from a JSON configuration file.

        Parameters
        ----------
        config_file: str, dict
            Can be a link to a config file or one on the disk.
            Another option is to supply a dictionary with the metadata.
        """
        if is_url(config_file):
            with urlopen(config_file) as f:
                config = json.loads(f.read().decode())
        elif isinstance(config_file, dict):
            config = config_file
        else:
            with open(config_file, "r") as f:
                config = json.load(f)

        # Set the attributes of the dataset.
        dataset = cls()
        for attr, val in config.items():
            setattr(dataset, attr, val)
        return dataset