def test_validate_appendables(ocx):
    initial_appendables = {"metadata": {"foo": "bar", "starred": "true"}, "tags": ["baz"]}
    final_appendables = dict(initial_appendables)
    final_appendables["valid_metadata"] = {"starred": True}
    final_appendables["valid_metadata"]["custom"] = {"foo": "bar"}
    final_appendables["valid_tags"] = [{"name": "baz"}]
    appendables = validate_appendables(initial_appendables, ocx)
    assert appendables == final_appendables
Beispiel #2
0
def test_validate_appendables(ocx):
    initial_appendables = {"metadata": {"foo": "bar", "starred": "true"}, "tags": ["baz"]}
    final_appendables = dict(initial_appendables)
    final_appendables["valid_metadata"] = {"starred": True}
    final_appendables["valid_metadata"]["custom"] = {"foo": "bar"}
    final_appendables["valid_tags"] = [{"name": "baz"}]
    appendables = validate_appendables(initial_appendables, ocx)
    assert appendables == final_appendables
Beispiel #3
0
def test_validate_appendables():
    initial_appendables = {
        'metadata': {
            'foo': 'bar',
            'starred': 'true'
        },
        'tags': ['baz']
    }
    final_appendables = dict(initial_appendables)
    final_appendables['valid_metadata'] = {'starred': True}
    final_appendables['valid_metadata']['custom'] = {'foo': 'bar'}
    final_appendables['valid_tags'] = [{'name': 'baz'}]
    appendables = validate_appendables(initial_appendables, ocx())
    assert appendables == final_appendables
Beispiel #4
0
def upload(
    ctx, files, max_threads, prompt, forward, reverse, tags, metadata, project_id, coerce_ascii
):
    """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex"""

    appendables = {}
    if tags:
        appendables["tags"] = []
        for tag in tags:
            appendables["tags"].append(tag)

    if metadata:
        appendables["metadata"] = {}
        for metadata_kv in metadata:
            split_metadata = metadata_kv.split("=", 1)
            if len(split_metadata) > 1:
                metadata_value = split_metadata[1]
                appendables["metadata"][split_metadata[0]] = metadata_value

    appendables = validate_appendables(appendables, ctx.obj["API"])

    if (forward or reverse) and not (forward and reverse):
        click.echo("You must specify both forward and reverse files", err=True)
        ctx.exit(1)

    if forward and reverse:
        if len(files) > 0:
            click.echo(
                "You may not pass a FILES argument when using the "
                " --forward and --reverse options.",
                err=True,
            )
            ctx.exit(1)
        files = [(forward, reverse)]
    elif len(files) == 0:
        click.echo(ctx.get_help())
        return
    else:
        files = list(files)

        # "intelligently" find paired files and tuple them
        paired_files = []
        single_files = set(files)

        for filename in files:
            # convert "read 1" filenames into "read 2" and check that they exist; if they do
            # upload the files as a pair, autointerleaving them
            pair = re.sub("[._][Rr]1[._]", lambda x: x.group().replace("1", "2"), filename)

            # we don't necessary need the R2 to have been passed in; we infer it anyways
            if pair != filename and os.path.exists(pair):
                if not prompt and pair not in single_files:
                    # if we're not prompting, don't automatically pull in files
                    # not in the list the user passed in
                    continue

                paired_files.append((filename, pair))

                if pair in single_files:
                    single_files.remove(pair)

                single_files.remove(filename)

        auto_pair = True

        if prompt and len(paired_files) > 0:
            pair_list = ""
            for p in paired_files:
                pair_list += "\n  {}  &  {}".format(os.path.basename(p[0]), os.path.basename(p[1]))

            answer = click.confirm(
                "It appears there are paired files:{}\nInterleave them after upload?".format(
                    pair_list
                ),
                default="Y",
            )

            if not answer:
                auto_pair = False

        if auto_pair:
            files = paired_files + list(single_files)

    total_size = sum(
        [
            (_file_size(x[0], uncompressed=True) + _file_size(x[1], uncompressed=True))
            if isinstance(x, tuple)
            else _file_size(x, uncompressed=False)
            for x in files
        ]
    )

    upload_kwargs = {
        "metadata": appendables["valid_metadata"],
        "tags": appendables["valid_tags"],
        "project": project_id,
        "coerce_ascii": coerce_ascii,
        "progressbar": progressbar(length=total_size, label="Uploading..."),
    }

    run_via_threadpool(
        ctx.obj["API"].Samples.upload,
        files,
        upload_kwargs,
        max_threads=max_threads,
        graceful_exit=False,
    )
Beispiel #5
0
def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate,
           forward, reverse, tags, metadata):
    """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex"""

    appendables = {}
    if tags:
        appendables['tags'] = []
        for tag in tags:
            appendables['tags'].append(tag)

    if metadata:
        appendables['metadata'] = {}
        for metadata_kv in metadata:
            split_metadata = metadata_kv.split('=', 1)
            if len(split_metadata) > 1:
                metadata_value = split_metadata[1]
                appendables['metadata'][snake_case(
                    split_metadata[0])] = metadata_value

    appendables = validate_appendables(appendables, ctx.obj['API'])

    if (forward or reverse) and not (forward and reverse):
        click.echo('You must specify both forward and reverse files', err=True)
        sys.exit(1)
    if forward and reverse:
        if len(files) > 0:
            click.echo(
                'You may not pass a FILES argument when using the '
                ' --forward and --reverse options.',
                err=True)
            sys.exit(1)
        files = [(forward, reverse)]
        no_interleave = True
    if len(files) == 0:
        click.echo(ctx.get_help())
        return
    else:
        files = list(files)

    if not no_interleave:
        # "intelligently" find paired files and tuple them
        paired_files = []
        single_files = set(files)
        for filename in files:
            # convert "read 1" filenames into "read 2" and check that they exist; if they do
            # upload the files as a pair, autointerleaving them
            pair = re.sub('[._][Rr]1[._]',
                          lambda x: x.group().replace('1', '2'), filename)
            # we don't necessary need the R2 to have been passed in; we infer it anyways
            if pair != filename and os.path.exists(pair):
                if not prompt and pair not in single_files:
                    # if we're not prompting, don't automatically pull in files
                    # not in the list the user passed in
                    continue

                paired_files.append((filename, pair))
                if pair in single_files:
                    single_files.remove(pair)
                single_files.remove(filename)

        auto_pair = True
        if prompt and len(paired_files) > 0:
            pair_list = ''
            for p in paired_files:
                pair_list += '\n  {}  &  {}'.format(os.path.basename(p[0]),
                                                    os.path.basename(p[1]))

            answer = click.confirm(
                'It appears there are paired files:{}\nInterleave them after upload?'
                .format(pair_list),
                default='Y')
            if not answer:
                auto_pair = False

        if auto_pair:
            files = paired_files + list(single_files)

    if not clean:
        warnings.filterwarnings('error', category=ValidationWarning)

    try:
        # do the uploading
        ctx.obj['API'].Samples.upload(files,
                                      threads=max_threads,
                                      validate=validate,
                                      metadata=appendables['valid_metadata'],
                                      tags=appendables['valid_tags'])

    except ValidationWarning as e:
        sys.stderr.write('\nERROR: {}. {}'.format(
            e, 'Running with the --clean flag will suppress this error.'))
        sys.exit(1)
    except (ValidationError, UploadException, Exception) as e:
        # TODO: Some day improve specific other exception error messages, e.g., gzip CRC IOError
        sys.stderr.write('\nERROR: {}'.format(e))
        sys.stderr.write(
            '\nPlease feel free to contact us for help at [email protected]\n\n'
        )
        sys.exit(1)
Beispiel #6
0
def upload(
    ctx,
    files,
    max_threads,
    coerce_ascii,
    forward,
    reverse,
    prompt,
    tags,
    metadata,
    project_id,
    sample_id,
    external_sample_id,
):
    """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex."""
    appendables = {}
    if tags:
        appendables["tags"] = []
        for tag in tags:
            appendables["tags"].append(tag)

    if metadata:
        appendables["metadata"] = {}
        for metadata_kv in metadata:
            split_metadata = metadata_kv.split("=", 1)
            if len(split_metadata) > 1:
                metadata_value = split_metadata[1]
                appendables["metadata"][split_metadata[0]] = metadata_value

    appendables = validate_appendables(appendables, ctx.obj["API"])

    if (forward or reverse) and not (forward and reverse):
        click.echo("You must specify both forward and reverse files", err=True)
        ctx.exit(1)

    if forward and reverse:
        if len(files) > 0:
            click.echo(
                "You may not pass a FILES argument when using the "
                " --forward and --reverse options.",
                err=True,
            )
            ctx.exit(1)
        files = [(forward, reverse)]
    elif len(files) == 0:
        click.echo(ctx.get_help())
        return
    else:
        files = list(files)

        # "intelligently" find paired files and tuple them
        paired_files = []
        single_files = set(files)

        if single_files.symmetric_difference(files):
            click.echo(
                "Duplicate filenames detected in command line--please specific each file only once",
                err=True,
            )
            ctx.exit(1)

        for filename in files:
            # convert "read 1" filenames into "read 2" and check that they exist; if they do
            # upload the files as a pair, autointerleaving them
            pair = re.sub("[._][Rr]1[._]", lambda x: x.group().replace("1", "2"), filename)

            # we don't necessary need the R2 to have been passed in; we infer it anyways
            if pair != filename and os.path.exists(pair):
                if not prompt and pair not in single_files:
                    # if we're not prompting, don't automatically pull in files
                    # not in the list the user passed in
                    continue

                paired_files.append((filename, pair))

                if pair in single_files:
                    single_files.remove(pair)

                single_files.remove(filename)

        auto_pair = True

        if prompt and len(paired_files) > 0:
            pair_list = ""
            for p in paired_files:
                pair_list += "\n  {}  &  {}".format(os.path.basename(p[0]), os.path.basename(p[1]))

            answer = click.confirm(
                "It appears there are {n_paired_files} paired files (of {n_files} total):{pair_list}\nInterleave them after upload?".format(
                    n_paired_files=len(paired_files) * 2,
                    n_files=len(paired_files) * 2 + len(single_files),
                    pair_list=pair_list,
                ),
                default="Y",
            )

            if not answer:
                auto_pair = False

        if auto_pair:
            files = paired_files + list(single_files)

    total_size = sum(
        [
            (os.path.getsize(x[0]) + os.path.getsize(x[1]))
            if isinstance(x, tuple)
            else os.path.getsize(x)
            for x in files
        ]
    )

    upload_kwargs = {
        "metadata": appendables["valid_metadata"],
        "tags": appendables["valid_tags"],
        "project": project_id,
        "coerce_ascii": coerce_ascii,
        "progressbar": progressbar(length=total_size, label="Uploading..."),
        "sample_id": sample_id,
        "external_sample_id": external_sample_id,
    }

    if (sample_id or external_sample_id) and len(files) > 1:
        click.echo(
            "Please only specify a single file or pair of files to upload if using `sample_id` or `external_sample_id`",
            err=True,
        )
        ctx.exit(1)

    run_via_threadpool(
        ctx.obj["API"].Samples.upload,
        files,
        upload_kwargs,
        max_threads=8 if max_threads > 8 else max_threads,
        graceful_exit=False,
    )