def test_make_fastq_links(self):
     """Test making fastq links"""
     # Assume Illumina/SciLife data structure
     tl = target_generator(indir=self.project)
     fql = make_fastq_links(tl, indir=self.project, outdir="tmp")
     self.assertTrue(os.path.lexists(os.path.join("tmp", os.path.relpath(tl[0][2], self.project) + "_R1_001.fastq.gz")))
     self.assertTrue(os.path.lexists(os.path.join("tmp", os.path.dirname(os.path.relpath(tl[0][2], self.project)),
                                                  "SampleSheet.csv")))
Exemple #2
0
 def _setup(self):
     # List requirements for completion, consisting of classes above
     if self.indir is None:
         logger.error("Need input directory to run")
         self.targets = []
     if self.outdir is None:
         self.outdir = self.indir
     self.targets = [tgt for tgt in self.target_iterator()]
     if self.outdir != self.indir and self.targets:
         self.targets = make_fastq_links(self.targets, self.indir, self.outdir)
     # Finally register targets in backend
     backend.__global_vars__["targets"] = self.targets
Exemple #3
0
 def _setup(self):
     # List requirements for completion, consisting of classes above
     if self.indir is None:
         logger.error("Need input directory to run")
         self.targets = []
     if self.outdir is None:
         self.outdir = self.indir
     self.targets = [tgt for tgt in self.target_iterator()]
     if self.outdir != self.indir:
         self.targets = make_fastq_links(self.targets, self.indir, self.outdir)
     # Finally register targets in backend
     backend.__global_vars__["targets"] = self.targets
Exemple #4
0
    def requires(self):
        if not self.indir:
            return
        if self.outdir is None:
            self.outdir = self.indir
        tgt_fun = self.set_target_generator_function()
        if not tgt_fun:
            return []
        targets = tgt_fun(self.indir, sample=self.sample, flowcell=self.flowcell, lane=self.lane)
        if self.outdir != self.indir:
            targets = make_fastq_links(targets, self.indir, self.outdir)

        picard_metrics_targets = ["{}.{}".format(x[1], "sort.merge.dup") for x in targets]
        return [PicardMetrics(target=tgt) for tgt in picard_metrics_targets]
                tgt_gen_fun = hdl

    # Collect information about what samples to run, and on how many
    # nodes. This is somewhat convoluted since ratatosk_run_scilife
    # also collects sample information, but this step is necessary as
    # we need to wrap ratatosk_run_scilife.py in drmaa
    targets = tgt_gen_fun(indir=pargs.indir, sample=pargs.sample,
                          flowcell=pargs.flowcell, lane=pargs.lane)
    # After getting run list, if output directory is different to
    # input directory, link raw data files to output directory and
    # remember to use this directory for ratatosk tasks. In this way
    # we actually can run on subsets of sample runs or flowcells
    if not pargs.outdir:
        pargs.outdir = pargs.indir
    if pargs.outdir != pargs.indir:
        targets = make_fastq_links(targets, pargs.indir, pargs.outdir)
    # Group samples
    sorted_samples = sorted(targets, key=lambda t:t.sample_id())
    samples = {}
    for k, g in itertools.groupby(sorted_samples, key=lambda t:t.sample_id()):
        samples[k] = list(g)

    # Initialize command
    if pargs.sample_target_suffix or pargs.run_target_suffix:
        cmd = [RATATOSK_RUN, "GenericWrapper", '--workers', pargs.workers, 
               '--scheduler-host', pargs.scheduler_host, '--task', pargs.task]
    else:
        cmd = [RATATOSK_RUN, pargs.task, '--indir', pargs.indir, '--outdir', pargs.outdir, 
               '--workers', pargs.workers, '--scheduler-host', pargs.scheduler_host]
    if pargs.config_file:
        logging.info("setting config to {}".format(pargs.config_file))