def get_dataset_metadata(exclude=None, include=None): manager = DatasetManager() groups = manager.groups.copy() if exclude is not None: # make iterable if not the case if not is_iterable(exclude): exclude = [exclude] # pop items for group_id in exclude: try: groups.remove(group_id) except ValueError: pass if include is not None: # make iterable if not the case if not is_iterable(include): include = [include] # pop items for group_id in groups: if group_id not in include: groups.remove(group_id) # get datasets all_datasets = manager.list(group_name=groups, latest_only=False, raise_on_error=True) result_datasets = [] for group_id, data_list in all_datasets.items(): for dataset in data_list: if isinstance(dataset, BaseVersionedDataSet): cur_data = [] for vdata in dataset.datasets: vdata.dataset_id = f"{group_id}:{vdata.dataset_id}" cur_data.append(vdata.to_dict()) result_datasets.append(cur_data) else: dataset.dataset_id = f"{group_id}:{dataset.dataset_id}" result_datasets.append(dataset.to_dict()) return result_datasets
def load_data(name, *args, **kwargs): """Load data from file, URL, or plugin. Parameters ---------- name: str, pathlib.Path File path, URL, or alias of extension dataset. Returns ------- asreview.ASReviewData: Inititalized ASReview data object. """ # check is file or URL if Path(name).exists() or is_url(name): return ASReviewData.from_file(name, *args, **kwargs) # check if dataset is plugin dataset\ try: dataset_path = DatasetManager().find(name).get() return ASReviewData.from_file(dataset_path, *args, **kwargs) except DataSetNotFoundError: pass # Could not find dataset, return None. raise FileNotFoundError( f"File, URL, or dataset does not exist: '{name}'")
def get_dataset_metadata(exclude=None, include=None): all_datasets = DatasetManager().list(latest_only=False) if exclude is not None: if not is_iterable(exclude): exclude = [exclude] for group_id in exclude: all_datasets.pop(group_id, None) if include is not None: if not is_iterable(include): include = [include] for group_id in list(all_datasets): if group_id not in include: all_datasets.pop(group_id, None) result_datasets = [] for group_id, data_list in all_datasets.items(): for dataset in data_list: if isinstance(dataset, BaseVersionedDataSet): cur_data = [] for vdata in dataset.datasets: vdata.dataset_id = f"{group_id}:{vdata.dataset_id}" cur_data.append(vdata.to_dict()) result_datasets.append(cur_data) else: dataset.dataset_id = f"{group_id}:{dataset.dataset_id}" result_datasets.append([dataset.to_dict()]) return result_datasets
def api_upload_data_to_project(project_id): # noqa: F401 """Get info on the article""" if not is_project(project_id): response = jsonify(message="Project not found.") return response, 404 if request.form.get('demo_data', None): # download file and save to folder demo_data = DatasetManager().find(request.form['demo_data']) if demo_data.dataset_id in ["hall", "ace", "ptsd"]: download_url = demo_data.url_demo else: download_url = demo_data.url url_parts = urllib.parse.urlparse(download_url) filename = secure_filename(url_parts.path.rsplit('/', 1)[-1]) urlretrieve(download_url, get_data_path(project_id) / filename) elif request.form.get('url', None): # download file and save to folder download_url = request.form['url'] try: url_parts = urllib.parse.urlparse(download_url) filename = secure_filename(url_parts.path.rsplit('/', 1)[-1]) urlretrieve(download_url, get_data_path(project_id) / filename) except ValueError as err: logging.error(err) message = f"Invalid URL '{download_url}'." if isinstance(download_url, str) \ and not download_url.startswith("http"): message += " Usually, the URL starts with 'http' or 'https'." return jsonify(message=message), 400 except Exception as err: logging.error(err) message = f"Can't retrieve data from URL {download_url}." return jsonify(message=message), 400 elif 'file' in request.files: data_file = request.files['file'] # check the file is file is in a correct format check_dataset(data_file) # TODO{qubixes}: implement val strategy try: filename = secure_filename(data_file.filename) fp_data = get_data_path(project_id) / filename # save the file data_file.save(str(fp_data)) except Exception as err: logging.error(err) response = jsonify( message=f"Failed to upload file '{filename}'. {err}") return response, 400 else: response = jsonify(message="No file or dataset found to upload.") return response, 400 try: # add the file to the project add_dataset_to_project(project_id, filename) # Bad format. TODO{Jonathan} Return informative message with link. except BadFileFormatError as err: message = f"Failed to upload file '{filename}'. {err}" return jsonify(message=message), 400 response = jsonify({'success': True}) response.headers.add('Access-Control-Allow-Origin', '*') return response
def test_datasets(data_name): data = DatasetManager().find(data_name) assert exists(data.get())
def api_upload_data_to_project(project_id): # noqa: F401 """Get info on the article""" if not is_project(project_id): response = jsonify(message="Project not found.") return response, 404 if request.form.get('plugin', None): plugin_data = DatasetManager().find(request.form['plugin']) url_parts = urllib.parse.urlparse(plugin_data.url) filename = secure_filename(url_parts.path.rsplit('/', 1)[-1]) urlretrieve(plugin_data.url, get_data_path(project_id) / filename) elif request.form.get('benchmark', None): benchmark_dataset_id = DatasetManager().find(request.form['benchmark']) # read dataset df = pd.read_csv(benchmark_dataset_id.url) # rename label column df.rename({"label_included": "debug_label"}, axis=1, inplace=True) # define export filepath url_parts = urllib.parse.urlparse(benchmark_dataset_id.url) filename = secure_filename(url_parts.path.rsplit('/', 1)[-1]) export_fp = get_data_path(project_id) / filename # export file df.to_csv(export_fp, index=False) elif request.form.get('url', None): # download file and save to folder download_url = request.form['url'] try: url_parts = urllib.parse.urlparse(download_url) filename = secure_filename(url_parts.path.rsplit('/', 1)[-1]) urlretrieve(download_url, get_data_path(project_id) / filename) except ValueError as err: logging.error(err) message = f"Invalid URL '{download_url}'." if isinstance(download_url, str) \ and not download_url.startswith("http"): message += " Usually, the URL starts with 'http' or 'https'." return jsonify(message=message), 400 except Exception as err: logging.error(err) message = f"Can't retrieve data from URL {download_url}." return jsonify(message=message), 400 elif 'file' in request.files: data_file = request.files['file'] # check the file is file is in a correct format check_dataset(data_file) # TODO{qubixes}: implement val strategy try: filename = secure_filename(data_file.filename) fp_data = get_data_path(project_id) / filename # save the file data_file.save(str(fp_data)) except Exception as err: logging.error(err) response = jsonify( message=f"Failed to upload file '{filename}'. {err}") return response, 400 else: response = jsonify(message="No file or dataset found to upload.") return response, 400 try: # add the file to the project add_dataset_to_project(project_id, filename) # Bad format. TODO{Jonathan} Return informative message with link. except BadFileFormatError as err: message = f"Failed to upload file '{filename}'. {err}" return jsonify(message=message), 400 response = jsonify({'success': True}) response.headers.add('Access-Control-Allow-Origin', '*') return response