Beispiel #1
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * genome_build
        * follow (default: False)
        * auto_approve (default: False)
        * dataset
        * file (list)

    """
    if not solvebio.api_key:
        solvebio.login()

    # Ensure the dataset exists. Create if necessary.
    try:
        dataset = solvebio.Dataset.retrieve(args.dataset)
    except solvebio.SolveError as e:
        if e.status_code != 404:
            raise e

        print("Dataset not found: {0}".format(args.dataset))

        if args.create_dataset:
            dataset = create_dataset(args)
        else:
            print("Tip: use the --create-dataset flag "
                  "to create one automatically")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(
        dataset_id=dataset.id,
        manifest=manifest.manifest,
        genome_build=args.genome_build,
        auto_approve=args.auto_approve)

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/jobs/imports/{0}'.format(imp.id)
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))
Beispiel #2
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset
        * template_id
        * full_path
        * vault (optional, overrides the vault in full_path)
        * path (optional, overrides the path in full_path)
        * commit_mode
        * capacity
        * file (list)
        * follow (default: False)

    """
    full_path, path_dict = Object.validate_full_path(args.full_path,
                                                     vault=args.vault,
                                                     path=args.path)

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args)
    else:
        try:
            dataset = solvebio.Dataset.get_by_full_path(full_path)
        except solvebio.SolveError as e:
            if e.status_code != 404:
                raise e

            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    # Generate a manifest from the local files
    manifest = solvebio.Manifest()
    manifest.add(*args.file)

    # Create the manifest-based import
    imp = solvebio.DatasetImport.create(dataset_id=dataset.id,
                                        manifest=manifest.manifest,
                                        commit_mode=args.commit_mode)

    if args.follow:
        imp.follow()
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}".format(
            mesh_url))
Beispiel #3
0
def import_file(args):
    """
    Given a dataset and a local path, upload and import the file(s).

    Command arguments (args):

        * create_dataset and it's args
            * capacity
            * template_id
            * template_file
            * capacity
            * tag
            * metadata
            * metadata_json_file
            * create_vault
        * full_path
        * commit_mode
        * remote_source
        * dry_run
        * follow
        * file (list)

    """
    if args.dry_run:
        print("NOTE: Running import command in dry run mode")

    full_path, path_dict = Object.validate_full_path(args.full_path)

    files_list = []
    if args.remote_source:
        # Validate files
        for file_fp in args.file:
            files_ = list(Object.all(glob=file_fp, limit=1000))
            if not files_:
                print("Did not find any {}files at path {}".format(
                    'remote ' if args.remote_source else '', file_fp))
            else:
                for file_ in files_:
                    print("Found file: {}".format(file_.full_path))
                    files_list.append(file_)

    else:
        # Local files
        # Note: if these are globs or folders, then this will
        # create a multi-file manifest which is deprecated
        # and should be updated to one file per import.
        files_list = [fp for fp in args.file]

    if not files_list:
        print("Exiting. No files were found at the following {}paths: {}"
              .format('remote ' if args.remote_source else '',
                      ', '.join(args.file)))
        sys.exit(1)

    if args.template_id:
        try:
            template = DatasetTemplate.retrieve(args.template_id)
        except SolveError as e:
            if e.status_code != 404:
                raise e
            print("No template with ID {0} found!".format(args.template_id))
            sys.exit(1)
    elif args.template_file:
        template = _create_template_from_file(args.template_file, args.dry_run)
    else:
        template = None

    # Ensure the dataset exists. Create if necessary.
    if args.create_dataset:
        dataset = create_dataset(args, template=template)
    else:
        try:
            dataset = Object.get_by_full_path(full_path, assert_type='dataset')
        except solvebio.errors.NotFoundError:
            print("Dataset not found: {0}".format(full_path))
            print("Tip: use the --create-dataset flag "
                  "to create one from a template")
            sys.exit(1)

    if args.dry_run:
        print("Importing the following files/paths into dataset: {}"
              .format(full_path))
        for file_ in files_list:
            if args.remote_source:
                print(file_.full_path)
            else:
                print(file_)
        return

    # Generate a manifest from the local files
    imports = []
    for file_ in files_list:
        if args.remote_source:
            kwargs = dict(object_id=file_.id)
        else:
            manifest = solvebio.Manifest()
            manifest.add(file_)
            kwargs = dict(manifest=manifest.manifest)

        # Add template params
        if template:
            kwargs.update(template.import_params)

        # Create the import
        import_ = DatasetImport.create(
            dataset_id=dataset.id,
            commit_mode=args.commit_mode,
            **kwargs
        )

        imports.append(import_)

    if args.follow:
        dataset.activity(follow=True)
    else:
        mesh_url = 'https://my.solvebio.com/activity/'
        print("Your import has been submitted, view details at: {0}"
              .format(mesh_url))

    return imports, dataset
solvebio.login()

vault = solvebio.Vault.get_personal_vault()

# The folders that will contain your dataset
path = '/SampleImport/1.0.0'

# The name of your dataset
dataset_name = 'SampleDataset'

# Create a dataset
dataset = solvebio.Dataset.get_or_create_by_full_path(
    '{0}:/{1}/{2}'.format(vault.name, path, dataset_name), )

# Create a manifest object and a file to it
manifest = solvebio.Manifest()
manifest.add_file('path/to/file.vcf.gz')

# Create the import
imp = solvebio.DatasetImport.create(dataset_id=dataset.id,
                                    manifest=manifest.manifest)

# Prints updates as the data is processed
# and indexed into SolveBio
imp.follow()

#
# You now have data!
#

# Let's add some more records that include a new field