def test_make_fastq_links(self): """Test making fastq links""" # Assume Illumina/SciLife data structure tl = target_generator(indir=self.project) fql = make_fastq_links(tl, indir=self.project, outdir="tmp") self.assertTrue(os.path.lexists(os.path.join("tmp", os.path.relpath(tl[0][2], self.project) + "_R1_001.fastq.gz"))) self.assertTrue(os.path.lexists(os.path.join("tmp", os.path.dirname(os.path.relpath(tl[0][2], self.project)), "SampleSheet.csv")))
def _setup(self): # List requirements for completion, consisting of classes above if self.indir is None: logger.error("Need input directory to run") self.targets = [] if self.outdir is None: self.outdir = self.indir self.targets = [tgt for tgt in self.target_iterator()] if self.outdir != self.indir and self.targets: self.targets = make_fastq_links(self.targets, self.indir, self.outdir) # Finally register targets in backend backend.__global_vars__["targets"] = self.targets
def _setup(self): # List requirements for completion, consisting of classes above if self.indir is None: logger.error("Need input directory to run") self.targets = [] if self.outdir is None: self.outdir = self.indir self.targets = [tgt for tgt in self.target_iterator()] if self.outdir != self.indir: self.targets = make_fastq_links(self.targets, self.indir, self.outdir) # Finally register targets in backend backend.__global_vars__["targets"] = self.targets
def requires(self): if not self.indir: return if self.outdir is None: self.outdir = self.indir tgt_fun = self.set_target_generator_function() if not tgt_fun: return [] targets = tgt_fun(self.indir, sample=self.sample, flowcell=self.flowcell, lane=self.lane) if self.outdir != self.indir: targets = make_fastq_links(targets, self.indir, self.outdir) picard_metrics_targets = ["{}.{}".format(x[1], "sort.merge.dup") for x in targets] return [PicardMetrics(target=tgt) for tgt in picard_metrics_targets]
tgt_gen_fun = hdl # Collect information about what samples to run, and on how many # nodes. This is somewhat convoluted since ratatosk_run_scilife # also collects sample information, but this step is necessary as # we need to wrap ratatosk_run_scilife.py in drmaa targets = tgt_gen_fun(indir=pargs.indir, sample=pargs.sample, flowcell=pargs.flowcell, lane=pargs.lane) # After getting run list, if output directory is different to # input directory, link raw data files to output directory and # remember to use this directory for ratatosk tasks. In this way # we actually can run on subsets of sample runs or flowcells if not pargs.outdir: pargs.outdir = pargs.indir if pargs.outdir != pargs.indir: targets = make_fastq_links(targets, pargs.indir, pargs.outdir) # Group samples sorted_samples = sorted(targets, key=lambda t:t.sample_id()) samples = {} for k, g in itertools.groupby(sorted_samples, key=lambda t:t.sample_id()): samples[k] = list(g) # Initialize command if pargs.sample_target_suffix or pargs.run_target_suffix: cmd = [RATATOSK_RUN, "GenericWrapper", '--workers', pargs.workers, '--scheduler-host', pargs.scheduler_host, '--task', pargs.task] else: cmd = [RATATOSK_RUN, pargs.task, '--indir', pargs.indir, '--outdir', pargs.outdir, '--workers', pargs.workers, '--scheduler-host', pargs.scheduler_host] if pargs.config_file: logging.info("setting config to {}".format(pargs.config_file))