def _call_init_upload(file_name, file_size, metadata, tags, project, samples_resource): """Call init_upload at the One Codex API and return data used to upload the file. Parameters ---------- file_name : `string` The file_name you wish to associate this fastx file with at One Codex. file_size : `integer` Accurate size of file to be uploaded, in bytes. metadata : `dict`, optional tags : `list`, optional project : `string`, optional UUID of project to associate this sample with. samples_resource : `onecodex.models.Samples` Wrapped potion-client object exposing `init_upload` and `confirm_upload` routes to mainline. Returns ------- `dict` Contains, at a minimum, 'upload_url' and 'sample_id'. Should also contain various additional data used to upload the file to fastx-proxy, a user's S3 bucket, or an intermediate bucket. """ upload_args = { "filename": file_name, "size": file_size, "upload_type": "standard", # this is multipart form data } if metadata: # format metadata keys as snake case new_metadata = {} for md_key, md_val in metadata.items(): new_metadata[snake_case(md_key)] = md_val upload_args["metadata"] = new_metadata if tags: upload_args["tags"] = tags if project: upload_args["project"] = getattr(project, "id", project) try: upload_info = samples_resource.init_upload(upload_args) except requests.exceptions.HTTPError as e: raise_api_error(e.response, state="init") except requests.exceptions.ConnectionError: raise_connectivity_error(file_name) return upload_info
def build_upload_dict(metadata, tags, project): """Build the metadata/tags/projects in a dict compatible with what the OneCodex backend expects.""" upload_args = {} if metadata: # format metadata keys as snake case new_metadata = {} for md_key, md_val in metadata.items(): new_metadata[snake_case(md_key)] = md_val upload_args["metadata"] = new_metadata if tags: upload_args["tags"] = tags if project: upload_args["project"] = getattr(project, "id", project) return upload_args
def _make_retry_fields(file_name, metadata, tags, project): """Generate fields to send to init_multipart_upload in the case that a Sample upload via fastx-proxy fails. Parameters ---------- file_name : `string` The file_name you wish to associate this fastx file with at One Codex. metadata : `dict`, optional tags : `list`, optional project : `string`, optional UUID of project to associate this sample with. Returns ------- `dict` Contains metadata fields that will be integrated into the Sample model created when init_multipart_upload is called. """ upload_args = {"filename": file_name} if metadata: # format metadata keys as snake case new_metadata = {} for md_key, md_val in metadata.items(): new_metadata[snake_case(md_key)] = md_val upload_args["metadata"] = new_metadata if tags: upload_args["tags"] = tags if project: upload_args["project"] = getattr(project, "id", project) return upload_args
def test_snake_case(): test_cases = ["SnakeCase", "snakeCase", "SNAKE_CASE"] for test_case in test_cases: assert snake_case(test_case) == "snake_case"
def upload(ctx, files, max_threads, clean, no_interleave, prompt, validate, forward, reverse, tags, metadata): """Upload a FASTA or FASTQ (optionally gzip'd) to One Codex""" appendables = {} if tags: appendables['tags'] = [] for tag in tags: appendables['tags'].append(tag) if metadata: appendables['metadata'] = {} for metadata_kv in metadata: split_metadata = metadata_kv.split('=', 1) if len(split_metadata) > 1: metadata_value = split_metadata[1] appendables['metadata'][snake_case( split_metadata[0])] = metadata_value appendables = validate_appendables(appendables, ctx.obj['API']) if (forward or reverse) and not (forward and reverse): click.echo('You must specify both forward and reverse files', err=True) sys.exit(1) if forward and reverse: if len(files) > 0: click.echo( 'You may not pass a FILES argument when using the ' ' --forward and --reverse options.', err=True) sys.exit(1) files = [(forward, reverse)] no_interleave = True if len(files) == 0: click.echo(ctx.get_help()) return else: files = list(files) if not no_interleave: # "intelligently" find paired files and tuple them paired_files = [] single_files = set(files) for filename in files: # convert "read 1" filenames into "read 2" and check that they exist; if they do # upload the files as a pair, autointerleaving them pair = re.sub('[._][Rr]1[._]', lambda x: x.group().replace('1', '2'), filename) # we don't necessary need the R2 to have been passed in; we infer it anyways if pair != filename and os.path.exists(pair): if not prompt and pair not in single_files: # if we're not prompting, don't automatically pull in files # not in the list the user passed in continue paired_files.append((filename, pair)) if pair in single_files: single_files.remove(pair) single_files.remove(filename) auto_pair = True if prompt and len(paired_files) > 0: pair_list = '' for p in paired_files: pair_list += '\n {} & {}'.format(os.path.basename(p[0]), os.path.basename(p[1])) answer = click.confirm( 'It appears there are paired files:{}\nInterleave them after upload?' .format(pair_list), default='Y') if not answer: auto_pair = False if auto_pair: files = paired_files + list(single_files) if not clean: warnings.filterwarnings('error', category=ValidationWarning) try: # do the uploading ctx.obj['API'].Samples.upload(files, threads=max_threads, validate=validate, metadata=appendables['valid_metadata'], tags=appendables['valid_tags']) except ValidationWarning as e: sys.stderr.write('\nERROR: {}. {}'.format( e, 'Running with the --clean flag will suppress this error.')) sys.exit(1) except (ValidationError, UploadException, Exception) as e: # TODO: Some day improve specific other exception error messages, e.g., gzip CRC IOError sys.stderr.write('\nERROR: {}'.format(e)) sys.stderr.write( '\nPlease feel free to contact us for help at [email protected]\n\n' ) sys.exit(1)
def test_snake_case(): test_cases = ['SnakeCase', 'snakeCase', 'SNAKE_CASE'] for test_case in test_cases: assert snake_case(test_case) == 'snake_case'