Esempio n. 1
0
def process_single_infile(wdl_file: WDLFile, fileStore: AbstractFileStore) -> WDLFile:
    f = wdl_file.file_path
    logger.info(f'Importing {f} into the jobstore.')
    if f.startswith('http://') or f.startswith('https://') or \
            f.startswith('file://') or f.startswith('wasb://'):
        filepath = fileStore.importFile(f)
        preserveThisFilename = os.path.basename(f)
    elif f.startswith('s3://'):
        try:
            filepath = fileStore.importFile(f)
            preserveThisFilename = os.path.basename(f)
        except:
            from toil.lib.ec2nodes import EC2Regions
            success = False
            for region in EC2Regions:
                try:
                    html_path = f'http://s3.{region}.amazonaws.com/' + f[5:]
                    filepath = fileStore.importFile(html_path)
                    preserveThisFilename = os.path.basename(f)
                    success = True
                except:
                    pass
            if not success:
                raise RuntimeError('Unable to import: ' + f)
    elif f.startswith('gs://'):
        f = 'https://storage.googleapis.com/' + f[5:]
        filepath = fileStore.importFile(f)
        preserveThisFilename = os.path.basename(f)
    else:
        filepath = fileStore.importFile("file://" + os.path.abspath(f))
        preserveThisFilename = os.path.basename(f)
    return WDLFile(file_path=filepath, file_name=preserveThisFilename, imported=True)
Esempio n. 2
0
def process_single_outfile(wdl_file: WDLFile, fileStore, workDir, outDir) -> WDLFile:
    f = wdl_file.file_path
    if os.path.exists(f):
        output_f_path = f
    elif os.path.exists(os.path.abspath(f)):
        output_f_path = os.path.abspath(f)
    elif os.path.exists(os.path.join(workDir, 'execution', f)):
        output_f_path = os.path.join(workDir, 'execution', f)
    elif os.path.exists(os.path.join('execution', f)):
        output_f_path = os.path.join('execution', f)
    elif os.path.exists(os.path.join(workDir, f)):
        output_f_path = os.path.join(workDir, f)
    elif os.path.exists(os.path.join(outDir, f)):
        output_f_path = os.path.join(outDir, f)
    else:
        tmp = subprocess.check_output(['ls', '-lha', workDir]).decode('utf-8')
        exe = subprocess.check_output(['ls', '-lha', os.path.join(workDir, 'execution')]).decode('utf-8')
        for std_file in ('stdout', 'stderr'):
            std_file = os.path.join(workDir, 'execution', std_file)
            if os.path.exists(std_file):
                with open(std_file, 'rb') as f:
                    logger.info(f.read())

        raise RuntimeError('OUTPUT FILE: {} was not found in {}!\n'
                           '{}\n\n'
                           '{}\n'.format(f, os.getcwd(), tmp, exe))
    output_file = fileStore.writeGlobalFile(output_f_path)
    preserveThisFilename = os.path.basename(output_f_path)
    fileStore.export_file(output_file, "file://" + os.path.join(os.path.abspath(outDir), preserveThisFilename))
    return WDLFile(file_path=output_file, file_name=preserveThisFilename, imported=True)
Esempio n. 3
0
 def testFn_Size(self):
     """Test the wdl built-in functional equivalent of 'size()',
     which returns a file's size based on the path."""
     from toil.common import Toil
     from toil.job import Job
     from toil.wdl.wdl_types import WDLFile
     options = Job.Runner.getDefaultOptions('./toilWorkflowRun')
     options.clean = 'always'
     with Toil(options) as toil:
         small = process_infile(WDLFile(file_path=os.path.abspath('src/toil/test/wdl/testfiles/vocab.wdl')), toil)
         small_file = size(small)
         large = process_infile(WDLFile(file_path=self.encode_data), toil)
         larger_file = size(large)
         larger_file_in_mb = size(large, 'mb')
         assert small_file >= 1800, small_file
         assert larger_file >= 70000000, larger_file
         assert larger_file_in_mb >= 70, larger_file_in_mb
Esempio n. 4
0
def size(f: Optional[Union[str, WDLFile, List[Union[str, WDLFile]]]] = None,
         unit: Optional[str] = 'B',
         fileStore: Optional[AbstractFileStore] = None) -> float:
    """
    Given a `File` and a `String` (optional), returns the size of the file in Bytes
    or in the unit specified by the second argument.

    Supported units are KiloByte ("K", "KB"), MegaByte ("M", "MB"), GigaByte
    ("G", "GB"), TeraByte ("T", "TB") (powers of 1000) as well as their binary version
    (https://en.wikipedia.org/wiki/Binary_prefix) "Ki" ("KiB"), "Mi" ("MiB"),
    "Gi" ("GiB"), "Ti" ("TiB") (powers of 1024). Default unit is Bytes ("B").

    WDL syntax: Float size(File, [String])
    Varieties:  Float size(File?, [String])
                Float size(Array[File], [String])
                Float size(Array[File?], [String])
    """

    if f is None:
        return 0

    # it is possible that size() is called directly (e.g.: size('file')) and so it is not treated as a file.
    if isinstance(f, str):
        f = WDLFile(file_path=f)
    elif isinstance(f, list):
        f = [WDLFile(file_path=sf) if isinstance(sf, str) else sf for sf in f]

    assert isinstance(f, (
        WDLFile,
        list)), f'size() excepts a "File" or "File?" argument!  Not: {type(f)}'

    # validate the input. fileStore is only required if the input is not processed.
    f = process_infile(f, fileStore)

    divisor = return_bytes(unit)

    if isinstance(f, list):
        total_size = sum(file.file_path.size for file in f)
        return total_size / divisor

    fileID = f.file_path
    return fileID.size / divisor
Esempio n. 5
0
def abspath_single_file(f: WDLFile, cwd: str) -> WDLFile:
    path = f.file_path
    if path != os.path.abspath(path):
        f.file_path = os.path.join(cwd, path)
    return f