def test_validate_appendables(ocx): initial_appendables = {"metadata": {"foo": "bar", "starred": "true"}, "tags": ["baz"]} final_appendables = dict(initial_appendables) final_appendables["valid_metadata"] = {"starred": True} final_appendables["valid_metadata"]["custom"] = {"foo": "bar"} final_appendables["valid_tags"] = [{"name": "baz"}] appendables = validate_appendables(initial_appendables, ocx) assert appendables == final_appendables
def test_validate_appendables(): initial_appendables = { 'metadata': { 'foo': 'bar', 'starred': 'true' }, 'tags': ['baz'] } final_appendables = dict(initial_appendables) final_appendables['valid_metadata'] = {'starred': True} final_appendables['valid_metadata']['custom'] = {'foo': 'bar'} final_appendables['valid_tags'] = [{'name': 'baz'}] appendables = validate_appendables(initial_appendables, ocx()) assert appendables == final_appendables
def upload( ctx, files, max_threads, prompt, forward, reverse, tags, metadata, project_id, coerce_ascii ): """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex""" appendables = {} if tags: appendables["tags"] = [] for tag in tags: appendables["tags"].append(tag) if metadata: appendables["metadata"] = {} for metadata_kv in metadata: split_metadata = metadata_kv.split("=", 1) if len(split_metadata) > 1: metadata_value = split_metadata[1] appendables["metadata"][split_metadata[0]] = metadata_value appendables = validate_appendables(appendables, ctx.obj["API"]) if (forward or reverse) and not (forward and reverse): click.echo("You must specify both forward and reverse files", err=True) ctx.exit(1) if forward and reverse: if len(files) > 0: click.echo( "You may not pass a FILES argument when using the " " --forward and --reverse options.", err=True, ) ctx.exit(1) files = [(forward, reverse)] elif len(files) == 0: click.echo(ctx.get_help()) return else: files = list(files) # "intelligently" find paired files and tuple them paired_files = [] single_files = set(files) for filename in files: # convert "read 1" filenames into "read 2" and check that they exist; if they do # upload the files as a pair, autointerleaving them pair = re.sub("[._][Rr]1[._]", lambda x: x.group().replace("1", "2"), filename) # we don't necessary need the R2 to have been passed in; we infer it anyways if pair != filename and os.path.exists(pair): if not prompt and pair not in single_files: # if we're not prompting, don't automatically pull in files # not in the list the user passed in continue paired_files.append((filename, pair)) if pair in single_files: single_files.remove(pair) single_files.remove(filename) auto_pair = True if prompt and len(paired_files) > 0: pair_list = "" for p in paired_files: pair_list += "\n {} & {}".format(os.path.basename(p[0]), os.path.basename(p[1])) answer = click.confirm( "It appears there are paired files:{}\nInterleave them after upload?".format( pair_list ), default="Y", ) if not answer: auto_pair = False if auto_pair: files = paired_files + list(single_files) total_size = sum( [ (_file_size(x[0], uncompressed=True) + _file_size(x[1], uncompressed=True)) if isinstance(x, tuple) else _file_size(x, uncompressed=False) for x in files ] ) upload_kwargs = { "metadata": appendables["valid_metadata"], "tags": appendables["valid_tags"], "project": project_id, "coerce_ascii": coerce_ascii, "progressbar": progressbar(length=total_size, label="Uploading..."), } run_via_threadpool( ctx.obj["API"].Samples.upload, files, upload_kwargs, max_threads=max_threads, graceful_exit=False, )
def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate, forward, reverse, tags, metadata): """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex""" appendables = {} if tags: appendables['tags'] = [] for tag in tags: appendables['tags'].append(tag) if metadata: appendables['metadata'] = {} for metadata_kv in metadata: split_metadata = metadata_kv.split('=', 1) if len(split_metadata) > 1: metadata_value = split_metadata[1] appendables['metadata'][snake_case( split_metadata[0])] = metadata_value appendables = validate_appendables(appendables, ctx.obj['API']) if (forward or reverse) and not (forward and reverse): click.echo('You must specify both forward and reverse files', err=True) sys.exit(1) if forward and reverse: if len(files) > 0: click.echo( 'You may not pass a FILES argument when using the ' ' --forward and --reverse options.', err=True) sys.exit(1) files = [(forward, reverse)] no_interleave = True if len(files) == 0: click.echo(ctx.get_help()) return else: files = list(files) if not no_interleave: # "intelligently" find paired files and tuple them paired_files = [] single_files = set(files) for filename in files: # convert "read 1" filenames into "read 2" and check that they exist; if they do # upload the files as a pair, autointerleaving them pair = re.sub('[._][Rr]1[._]', lambda x: x.group().replace('1', '2'), filename) # we don't necessary need the R2 to have been passed in; we infer it anyways if pair != filename and os.path.exists(pair): if not prompt and pair not in single_files: # if we're not prompting, don't automatically pull in files # not in the list the user passed in continue paired_files.append((filename, pair)) if pair in single_files: single_files.remove(pair) single_files.remove(filename) auto_pair = True if prompt and len(paired_files) > 0: pair_list = '' for p in paired_files: pair_list += '\n {} & {}'.format(os.path.basename(p[0]), os.path.basename(p[1])) answer = click.confirm( 'It appears there are paired files:{}\nInterleave them after upload?' .format(pair_list), default='Y') if not answer: auto_pair = False if auto_pair: files = paired_files + list(single_files) if not clean: warnings.filterwarnings('error', category=ValidationWarning) try: # do the uploading ctx.obj['API'].Samples.upload(files, threads=max_threads, validate=validate, metadata=appendables['valid_metadata'], tags=appendables['valid_tags']) except ValidationWarning as e: sys.stderr.write('\nERROR: {}. {}'.format( e, 'Running with the --clean flag will suppress this error.')) sys.exit(1) except (ValidationError, UploadException, Exception) as e: # TODO: Some day improve specific other exception error messages, e.g., gzip CRC IOError sys.stderr.write('\nERROR: {}'.format(e)) sys.stderr.write( '\nPlease feel free to contact us for help at [email protected]\n\n' ) sys.exit(1)
def upload( ctx, files, max_threads, coerce_ascii, forward, reverse, prompt, tags, metadata, project_id, sample_id, external_sample_id, ): """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex.""" appendables = {} if tags: appendables["tags"] = [] for tag in tags: appendables["tags"].append(tag) if metadata: appendables["metadata"] = {} for metadata_kv in metadata: split_metadata = metadata_kv.split("=", 1) if len(split_metadata) > 1: metadata_value = split_metadata[1] appendables["metadata"][split_metadata[0]] = metadata_value appendables = validate_appendables(appendables, ctx.obj["API"]) if (forward or reverse) and not (forward and reverse): click.echo("You must specify both forward and reverse files", err=True) ctx.exit(1) if forward and reverse: if len(files) > 0: click.echo( "You may not pass a FILES argument when using the " " --forward and --reverse options.", err=True, ) ctx.exit(1) files = [(forward, reverse)] elif len(files) == 0: click.echo(ctx.get_help()) return else: files = list(files) # "intelligently" find paired files and tuple them paired_files = [] single_files = set(files) if single_files.symmetric_difference(files): click.echo( "Duplicate filenames detected in command line--please specific each file only once", err=True, ) ctx.exit(1) for filename in files: # convert "read 1" filenames into "read 2" and check that they exist; if they do # upload the files as a pair, autointerleaving them pair = re.sub("[._][Rr]1[._]", lambda x: x.group().replace("1", "2"), filename) # we don't necessary need the R2 to have been passed in; we infer it anyways if pair != filename and os.path.exists(pair): if not prompt and pair not in single_files: # if we're not prompting, don't automatically pull in files # not in the list the user passed in continue paired_files.append((filename, pair)) if pair in single_files: single_files.remove(pair) single_files.remove(filename) auto_pair = True if prompt and len(paired_files) > 0: pair_list = "" for p in paired_files: pair_list += "\n {} & {}".format(os.path.basename(p[0]), os.path.basename(p[1])) answer = click.confirm( "It appears there are {n_paired_files} paired files (of {n_files} total):{pair_list}\nInterleave them after upload?".format( n_paired_files=len(paired_files) * 2, n_files=len(paired_files) * 2 + len(single_files), pair_list=pair_list, ), default="Y", ) if not answer: auto_pair = False if auto_pair: files = paired_files + list(single_files) total_size = sum( [ (os.path.getsize(x[0]) + os.path.getsize(x[1])) if isinstance(x, tuple) else os.path.getsize(x) for x in files ] ) upload_kwargs = { "metadata": appendables["valid_metadata"], "tags": appendables["valid_tags"], "project": project_id, "coerce_ascii": coerce_ascii, "progressbar": progressbar(length=total_size, label="Uploading..."), "sample_id": sample_id, "external_sample_id": external_sample_id, } if (sample_id or external_sample_id) and len(files) > 1: click.echo( "Please only specify a single file or pair of files to upload if using `sample_id` or `external_sample_id`", err=True, ) ctx.exit(1) run_via_threadpool( ctx.obj["API"].Samples.upload, files, upload_kwargs, max_threads=8 if max_threads > 8 else max_threads, graceful_exit=False, )