def upload_workflow_input_files(self, wdl_file, json_file): """ Given workflow inputs, parse them to determine which inputs are files or files containing file paths, and upload those files to the bucket specified by the SingleBucket instance. :param wdl_file: File containing workflow description. Used to determine which workflow inputs are files. :param json_file: JSON inputs to wdl. Contains actual paths to files. :return: A list of files that were uploaded. """ v = Validator(wdl_file, json_file) # get all the wdl arguments and figure out which ones are file inputs, store them in an array. wdl_args = v.get_wdl_args() file_keys = {k: v for k, v in wdl_args.iteritems() if 'File' in v}.keys() json_dict = v.get_json() files_to_upload = list() for file_key in file_keys: # need to make sure that keys in wdl args but not json dict aren't processed as file keys. # also, need to skip gs urls since they are already uploaded. if file_key in json_dict.keys( ) and "gs://" not in json_dict[file_key]: if 'fofn' in file_key: # get files listed in the fofn and add them to list of files to upload files_to_upload.extend( get_files_from_fofn(json_dict[file_key])) """ Next don't want to upload the original fofn because it won't have the 'gs://' prefix for the files in. Therefore need to create a new fofn that has updated paths, and we add that to files_to_upload. """ new_fofn = update_fofn(json_dict[file_key], self.bucket.name) files_to_upload.append(new_fofn) else: if isinstance(json_dict[file_key], list): for f in json_dict[file_key]: if "gs://" not in f: files_to_upload.append(f) elif isinstance(json_dict[file_key], dict): file_dict = json_dict[file_key] for k, v in file_dict.iteritems( ): #assume all Map with File are Map[?,File] if "gs://" not in v: files_to_upload.append(v) else: files_to_upload.append(json_dict[file_key]) self.upload_files(files_to_upload) return files_to_upload