Esempio n. 1
0
    def upload_workflow_input_files(self, wdl_file, json_file):
        """
        Given workflow inputs, parse them to determine which inputs are files or files containing file paths, and
        upload those files to the bucket specified by the SingleBucket instance.
        :param wdl_file: File containing workflow description. Used to determine which workflow inputs are files.
        :param json_file: JSON inputs to wdl. Contains actual paths to files.
        :return: A list of files that were uploaded.
        """
        v = Validator(wdl_file, json_file)
        # get all the wdl arguments and figure out which ones are file inputs, store them in an array.
        wdl_args = v.get_wdl_args()
        file_keys = {k: v
                     for k, v in wdl_args.iteritems() if 'File' in v}.keys()
        json_dict = v.get_json()

        files_to_upload = list()
        for file_key in file_keys:
            # need to make sure that keys in wdl args but not json dict aren't processed as file keys.
            # also, need to skip gs urls since they are already uploaded.
            if file_key in json_dict.keys(
            ) and "gs://" not in json_dict[file_key]:
                if 'fofn' in file_key:
                    # get files listed in the fofn and add them to list of files to upload
                    files_to_upload.extend(
                        get_files_from_fofn(json_dict[file_key]))
                    """
                    Next don't want to upload the original fofn because it won't have the 'gs://' prefix for the files in.
                    Therefore need to create a new fofn that has updated paths, and we add that to files_to_upload.
                    """
                    new_fofn = update_fofn(json_dict[file_key],
                                           self.bucket.name)
                    files_to_upload.append(new_fofn)
                else:
                    if isinstance(json_dict[file_key], list):
                        for f in json_dict[file_key]:
                            if "gs://" not in f:
                                files_to_upload.append(f)
                    elif isinstance(json_dict[file_key], dict):
                        file_dict = json_dict[file_key]

                        for k, v in file_dict.iteritems(
                        ):  #assume all Map with File are Map[?,File]
                            if "gs://" not in v:
                                files_to_upload.append(v)
                    else:
                        files_to_upload.append(json_dict[file_key])

        self.upload_files(files_to_upload)

        return files_to_upload