def process(self): # Reduce inputs to only first element if hasattr(self.input_dir, '__iter__'): self.input_dir = self.input_dir[0] self.input_dir = os.path.join(self.input_dir, "Data/Intensities/BaseCalls/") if type(self.sample_sheet) == list: if len(self.sample_sheet) > 1: raise Exception('Too many sample sheet files: %s' % ','.join(self.sample_sheet)) else: self.sample_sheet = self.sample_sheet[0] ss = SampleSheet(self.sample_sheet) mask_length, double_idx = ss.get_mask_length() if double_idx: self.use_base_mask = "y*,I{0},I{0},Y*".format(mask_length) else: self.use_base_mask = "y*,I{0},Y*".format(mask_length) self.use_base_mask = str(self.use_base_mask) super(CasavaDemux, self).process() prj_dir = os.path.join(self.output_dir, 'Project_' + self.meta['pipeline']['project_name']) self.output_files = utils.find(prj_dir, "*.fastq.gz") #set the metadata self.meta['job']['sample_id'] = [] sample_ids = ss.get_sample_ids() for output_file in self.output_files: for sample_id in sample_ids: if os.path.basename(output_file).startswith("%s_" % sample_id): self.meta['job']['sample_id'].append(sample_id) break
def process(self): self.output_files = ut.find(self.input_dirs, self.pattern, self.regex_match)