def import_file(args): """ Given a dataset and a local path, upload and import the file(s). Command arguments (args): * create_dataset * template_id * genome_build * follow (default: False) * auto_approve (default: False) * dataset * file (list) """ if not solvebio.api_key: solvebio.login() # Ensure the dataset exists. Create if necessary. try: dataset = solvebio.Dataset.retrieve(args.dataset) except solvebio.SolveError as e: if e.status_code != 404: raise e print("Dataset not found: {0}".format(args.dataset)) if args.create_dataset: dataset = create_dataset(args) else: print("Tip: use the --create-dataset flag " "to create one automatically") sys.exit(1) # Generate a manifest from the local files manifest = solvebio.Manifest() manifest.add(*args.file) # Create the manifest-based import imp = solvebio.DatasetImport.create( dataset_id=dataset.id, manifest=manifest.manifest, genome_build=args.genome_build, auto_approve=args.auto_approve) if args.follow: imp.follow() else: mesh_url = 'https://my.solvebio.com/jobs/imports/{0}'.format(imp.id) print("Your import has been submitted, view details at: {0}" .format(mesh_url))
def import_file(args): """ Given a dataset and a local path, upload and import the file(s). Command arguments (args): * create_dataset * template_id * full_path * vault (optional, overrides the vault in full_path) * path (optional, overrides the path in full_path) * commit_mode * capacity * file (list) * follow (default: False) """ full_path, path_dict = Object.validate_full_path(args.full_path, vault=args.vault, path=args.path) # Ensure the dataset exists. Create if necessary. if args.create_dataset: dataset = create_dataset(args) else: try: dataset = solvebio.Dataset.get_by_full_path(full_path) except solvebio.SolveError as e: if e.status_code != 404: raise e print("Dataset not found: {0}".format(full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) # Generate a manifest from the local files manifest = solvebio.Manifest() manifest.add(*args.file) # Create the manifest-based import imp = solvebio.DatasetImport.create(dataset_id=dataset.id, manifest=manifest.manifest, commit_mode=args.commit_mode) if args.follow: imp.follow() else: mesh_url = 'https://my.solvebio.com/activity/' print("Your import has been submitted, view details at: {0}".format( mesh_url))
def import_file(args): """ Given a dataset and a local path, upload and import the file(s). Command arguments (args): * create_dataset and it's args * capacity * template_id * template_file * capacity * tag * metadata * metadata_json_file * create_vault * full_path * commit_mode * remote_source * dry_run * follow * file (list) """ if args.dry_run: print("NOTE: Running import command in dry run mode") full_path, path_dict = Object.validate_full_path(args.full_path) files_list = [] if args.remote_source: # Validate files for file_fp in args.file: files_ = list(Object.all(glob=file_fp, limit=1000)) if not files_: print("Did not find any {}files at path {}".format( 'remote ' if args.remote_source else '', file_fp)) else: for file_ in files_: print("Found file: {}".format(file_.full_path)) files_list.append(file_) else: # Local files # Note: if these are globs or folders, then this will # create a multi-file manifest which is deprecated # and should be updated to one file per import. files_list = [fp for fp in args.file] if not files_list: print("Exiting. No files were found at the following {}paths: {}" .format('remote ' if args.remote_source else '', ', '.join(args.file))) sys.exit(1) if args.template_id: try: template = DatasetTemplate.retrieve(args.template_id) except SolveError as e: if e.status_code != 404: raise e print("No template with ID {0} found!".format(args.template_id)) sys.exit(1) elif args.template_file: template = _create_template_from_file(args.template_file, args.dry_run) else: template = None # Ensure the dataset exists. Create if necessary. if args.create_dataset: dataset = create_dataset(args, template=template) else: try: dataset = Object.get_by_full_path(full_path, assert_type='dataset') except solvebio.errors.NotFoundError: print("Dataset not found: {0}".format(full_path)) print("Tip: use the --create-dataset flag " "to create one from a template") sys.exit(1) if args.dry_run: print("Importing the following files/paths into dataset: {}" .format(full_path)) for file_ in files_list: if args.remote_source: print(file_.full_path) else: print(file_) return # Generate a manifest from the local files imports = [] for file_ in files_list: if args.remote_source: kwargs = dict(object_id=file_.id) else: manifest = solvebio.Manifest() manifest.add(file_) kwargs = dict(manifest=manifest.manifest) # Add template params if template: kwargs.update(template.import_params) # Create the import import_ = DatasetImport.create( dataset_id=dataset.id, commit_mode=args.commit_mode, **kwargs ) imports.append(import_) if args.follow: dataset.activity(follow=True) else: mesh_url = 'https://my.solvebio.com/activity/' print("Your import has been submitted, view details at: {0}" .format(mesh_url)) return imports, dataset
solvebio.login() vault = solvebio.Vault.get_personal_vault() # The folders that will contain your dataset path = '/SampleImport/1.0.0' # The name of your dataset dataset_name = 'SampleDataset' # Create a dataset dataset = solvebio.Dataset.get_or_create_by_full_path( '{0}:/{1}/{2}'.format(vault.name, path, dataset_name), ) # Create a manifest object and a file to it manifest = solvebio.Manifest() manifest.add_file('path/to/file.vcf.gz') # Create the import imp = solvebio.DatasetImport.create(dataset_id=dataset.id, manifest=manifest.manifest) # Prints updates as the data is processed # and indexed into SolveBio imp.follow() # # You now have data! # # Let's add some more records that include a new field