예제 #1
0
 def test_beyond_filename(self):
     base = "/path/with.dot/filename.ext"
     sec = "^^.txt"
     self.assertEqual(
         "/path/with.dot/filename.txt",
         apply_secondary_file_format_to_filename(base, sec),
     )
예제 #2
0
 def test_none(self):
     base = None
     sec = ".tsv"
     self.assertIsNone(apply_secondary_file_format_to_filename(base, sec))
예제 #3
0
 def test_way_too_many_extensions(self):
     base = "filename.ext"
     sec = "^^^^^^^^.tsv"
     self.assertEqual("filename.tsv",
                      apply_secondary_file_format_to_filename(base, sec))
예제 #4
0
 def test_one_extension(self):
     base = "filename.ext"
     sec = "^.tsv"
     self.assertEqual("filename.tsv",
                      apply_secondary_file_format_to_filename(base, sec))
예제 #5
0
 def test_append(self):
     base = "filename.ext"
     sec = ".tsv"
     self.assertEqual("filename.ext.tsv",
                      apply_secondary_file_format_to_filename(base, sec))
예제 #6
0
    def copy_output(
        self,
        fs: FileScheme,
        outputid,
        prefix,
        tag,
        secondaries,
        extension,
        iscopyable,
        engine_output: Union[WorkflowOutputModel, Any, List[Any]],
        shard=None,
    ):

        # the output_folder is an array of an array, for each

        if isinstance(engine_output, list):
            outs = []
            nshards = len(engine_output)
            prev_shards = shard or []

            # This is a little complicated, we want to output the set of tags that have the same length as we
            # we have shards. We'll only let this work if there's one element in the array with the appropriate amount
            # of shards.

            # find the index

            def find_element_where_length_is(iterable, n):
                if iterable is None:
                    return None
                for i in range(len(iterable)):
                    ii = iterable[i]
                    if isinstance(ii, list) and len(ii) == n:
                        return i
                return None

            def explode_at_index(iterable, index_to_explode, index_to_select):
                ar = iterable[:index_to_explode] + [
                    iterable[index_to_explode][index_to_select]
                ]
                if index_to_explode + 1 < len(iterable):
                    ar.extend(iterable[1 + tag_index_to_explode :])
                return ar

            tag_index_to_explode = find_element_where_length_is(tag, nshards)

            for i in range(nshards):
                eout = engine_output[i]

                new_shard = [*prev_shards, i]

                # choose tag
                new_prefix = prefix
                if isinstance(new_prefix, list) and len(new_prefix) > 1:
                    new_prefix = new_prefix[i]
                    new_shard = new_shard[min(len(new_shard), 1) :]

                new_tag = tag
                if tag_index_to_explode is not None:
                    new_tag = explode_at_index(tag, tag_index_to_explode, i)
                    new_shard = new_shard[min(len(new_shard), 1) :]

                outs.append(
                    self.copy_output(
                        fs,
                        outputid=outputid,
                        tag=new_tag,
                        prefix=new_prefix,
                        engine_output=eout,
                        shard=new_shard,
                        secondaries=secondaries,
                        extension=extension,
                        iscopyable=iscopyable,
                    )
                )

            return [o[0] for o in outs], [o[1] for o in outs]

        final_tags = tag

        outfn = outputid

        if final_tags and any(isinstance(t, list) for t in final_tags):
            Logger.critical(
                f"One of the final output tags {str(final_tags)} was still an array, outputs will be written directly into the output directory"
            )
            final_tags = None

        if prefix:
            if isinstance(prefix, list):
                if len(prefix) > 1:
                    Logger.critical(
                        f"Expected only one output_name for this copy, but found ({', '.join(prefix)}) [{len(prefix)}], using the first outputname"
                    )
                else:
                    outfn = prefix[0]
            else:
                outfn = prefix

        if final_tags is None:
            final_tags = []

        outdir = os.path.join(self.path, "/".join(final_tags))

        fs.mkdirs(outdir)

        if shard is not None:
            for s in shard:
                outfn += f"_shard-{s}"

        # copy output

        original_filepath = None
        newoutputfilepath = os.path.join(outdir, outfn)

        if isinstance(engine_output, WorkflowOutputModel):
            original_filepath = engine_output.originalpath
            if original_filepath and iscopyable:
                ext = extension or get_extension(engine_output.originalpath)
                if ext:
                    dot = "" if ext[0] == "." else "."
                    outfn += dot + ext
                    newoutputfilepath += dot + ext
                fs.cp_from(engine_output.originalpath, newoutputfilepath, force=True)
            elif engine_output.value:
                if isinstance(fs, LocalFileScheme):
                    # Write engine_output to outpath
                    with open(newoutputfilepath, "w+") as outfile:
                        outfile.write(str(engine_output.value))
        else:
            original_filepath = engine_output
            if isinstance(fs, LocalFileScheme):
                # Write engine_output to outpath
                with open(newoutputfilepath, "w+") as outfile:
                    outfile.write(str(engine_output))

        for sec in secondaries or []:
            frompath = apply_secondary_file_format_to_filename(original_filepath, sec)
            tofn = apply_secondary_file_format_to_filename(outfn, sec)
            topath = os.path.join(outdir, tofn)
            fs.cp_from(frompath, topath, force=True)

        return [original_filepath, newoutputfilepath]