Beispiel #1
0
def stages(commands: Sequence[Sequence[Union[str, Callable]]],
           subdirs: Sequence[str] = None,
           keep_data: bool = False) -> str:
    """
    Execute a list of commands in a temporary directory
    cleaning them up unless otherwise requested.

    Parameters
    ----------
    commands : Sequence[Sequence[Union[str, Callable]]]
        A collection of tuples of commands composed either of
        str elements or callable objects which will be invoked
        with the keyword argument "tempdir". The resulting list
        of strings will be passed to subprocess.check_call.
    subdirs : Sequence[str]
        A collection of paths which should be created as subdirectories
        within the temporary directory used by this invocation.
    keep_data : bool
        If not true, shutil.rmtree will be called on the temporary
        directory used by this invocation.

    Return
    ------
    str :
        Path to the temporary directory used by this invocation.

    Environment variables
    ---------------------
    STARFISH_COVERAGE :
         If set, then command lists will have `coverage run ...` options
         prepended before execution.
    """

    if keep_data:
        tempobj = None
        tempdir = tempfile.mkdtemp()
    else:
        tempobj = tempfile.TemporaryDirectory()
        tempdir = tempobj.name

    def callback(interval):
        print(" ".join(stage[:2]), " ==> {} seconds".format(interval))

    try:

        if subdirs:
            for subdir in subdirs:
                os.makedirs(
                    "{tempdir}".format(tempdir=os.path.join(tempdir, subdir)))

        for stage in commands:
            cmdline = prepare_stage(stage, tempdir)
            with clock.timeit(callback):
                subprocess.check_call(cmdline)

        return tempdir

    finally:
        if tempobj:
            tempobj.cleanup()
Beispiel #2
0
    def test_run_pipeline(self):
        tempdir = tempfile.mkdtemp()
        coverage_enabled = "STARFISH_COVERAGE" in os.environ

        def callback(interval):
            print(" ".join(stage[:2]), " ==> {} seconds".format(interval))

        try:
            for subdir in TestWithIssData.SUBDIRS:
                os.makedirs(
                    "{tempdir}".format(tempdir=os.path.join(tempdir, subdir)))
            for stage in TestWithIssData.STAGES:
                cmdline = [
                    element(tempdir=tempdir) if callable(element) else element
                    for element in stage
                ]
                if cmdline[0] == "starfish" and coverage_enabled:
                    coverage_cmdline = [
                        "coverage",
                        "run",
                        "-p",
                        "--source",
                        "starfish",
                        "-m",
                        "starfish",
                    ]
                    coverage_cmdline.extend(cmdline[1:])
                    cmdline = coverage_cmdline
                with clock.timeit(callback):
                    subprocess.check_call(cmdline)
            with open(os.path.join(tempdir, "results",
                                   "decoded_table.json")) as fh:
                results = json.load(fh)

            counts = collections.defaultdict(lambda: 0)
            for record in results:
                counts[record["barcode"]] += 1
            tuples = [(count, barcode) for barcode, count in counts.items()]
            tuples.sort(reverse=True)
            self.assertEqual("AAGC", tuples[0][1])
            self.assertEqual("AGGC", tuples[1][1])
        finally:
            if os.getenv("TEST_ISS_KEEP_DATA") is None:
                shutil.rmtree(tempdir)
Beispiel #3
0
    def test_run_pipeline(self):
        tempdir = tempfile.mkdtemp()

        def callback(interval):
            print(" ".join(stage[:2]), " ==> {} seconds".format(interval))

        try:
            for subdir in TestWithIssData.SUBDIRS:
                os.makedirs(
                    "{tempdir}".format(tempdir=os.path.join(tempdir, subdir)))
            for stage in TestWithIssData.STAGES:
                cmdline = [
                    element(tempdir=tempdir) if callable(element) else element
                    for element in stage
                ]
                if cmdline[0] == 'starfish':
                    coverage_cmdline = [
                        "coverage",
                        "run",
                        "-p",
                        "--source",
                        "starfish",
                        "-m",
                        "starfish",
                    ]
                    coverage_cmdline.extend(cmdline[1:])
                    cmdline = coverage_cmdline
                with clock.timeit(callback):
                    subprocess.check_call(cmdline)
            with open(os.path.join(tempdir, "results",
                                   "decoder_table.csv")) as fh:
                lines = fh.readlines()
                counts = collections.defaultdict(lambda: 0)
                for line in lines[1:]:
                    line_barcode = line.split(",")[0]
                    counts[line_barcode] += 1

                tuples = [(count, barcode)
                          for barcode, count in counts.items()]
                tuples.sort(reverse=True)
                self.assertEqual("AAGC", tuples[0][1])
                self.assertEqual("AGGC", tuples[1][1])
        finally:
            shutil.rmtree(tempdir)
Beispiel #4
0
    def test_run_pipeline(self):
        tempdir = tempfile.mkdtemp()
        coverage_enabled = "STARFISH_COVERAGE" in os.environ

        def callback(interval):
            print(" ".join(stage[:2]), " ==> {} seconds".format(interval))

        try:
            for subdir in TestWithIssData.SUBDIRS:
                os.makedirs(
                    "{tempdir}".format(tempdir=os.path.join(tempdir, subdir)))
            for stage in TestWithIssData.STAGES:
                cmdline = [
                    element(tempdir=tempdir) if callable(element) else element
                    for element in stage
                ]
                if cmdline[0] == "starfish" and coverage_enabled:
                    coverage_cmdline = [
                        "coverage",
                        "run",
                        "-p",
                        "--source",
                        "starfish",
                        "-m",
                        "starfish",
                    ]
                    coverage_cmdline.extend(cmdline[1:])
                    cmdline = coverage_cmdline
                with clock.timeit(callback):
                    subprocess.check_call(cmdline)

            intensities = IntensityTable.load(
                os.path.join(tempdir, "results", "spots.nc"))
            genes, counts = np.unique(
                intensities.coords[Codebook.Constants.GENE.value],
                return_counts=True)
            gene_counts = pd.Series(counts, genes)
            assert gene_counts['ACTB_human'] > gene_counts['ACTB_mouse']

        finally:
            if os.getenv("TEST_ISS_KEEP_DATA") is None:
                shutil.rmtree(tempdir)