예제 #1
0
    def process(self):

        # Reduce inputs to only first element
        if hasattr(self.input_dir, '__iter__'):
            self.input_dir = self.input_dir[0]

        if not self.input_dir.endswith('/'):
            self.input_dir += '/'

        (parent_dir, flowcell_dir) = os.path.split(os.path.dirname(self.input_dir))

        parsed = re.search(r'''(?P<DATE>\d{6})_
                               (?P<HISEQ_SN>\w{6})_
                               (?P<RUN_COUNT>\d{4})_
                               (?P<FC_POS>[AB])
                               (?P<FC_ID>.*$)''', flowcell_dir, re.X)

        ss = SampleSheet(os.path.join(self.input_dir, 'SampleSheet.csv'))
        ss_validated = os.path.join(self.output_dir, 'sample_sheet_validated.csv')
        project_name = ss.get_project_name() or 'DefaultProject'

        run_desc = 'Flowcell %s on %s/%s' % (parsed.group('FC_ID'),
                                             os.path.basename(parent_dir),
                                             parsed.group('FC_POS'))

        self.meta.update({
                'pipeline': {
                    'date'         : parsed.group('DATE'),
                    'descr'        : run_desc,
                    'fc_id'        : parsed.group('FC_ID'),
                    'fc_pos'       : parsed.group('FC_POS'),
                    'hiseq'        : os.path.basename(parent_dir),
                    'hiseq_sn'     : parsed.group('HISEQ_SN'),
                    'project_name' : project_name,
                    'run_count'    : int(parsed.group('RUN_COUNT')),
                    'nfiles'       : ss.get_lines_count()
                }
            })

        ss_validated = ss.validate(project_name, ss_validated)

        self.output_files = [ss_validated]