Python SampleSheet.get_sample_ids Examples

Programming Language: Python

Namespace/Package Name: pypers.utils.samplesheet

Class/Type: SampleSheet

Method/Function: get_sample_ids

Examples at hotexamples.com: 3

Python SampleSheet.get_sample_ids - 3 examples found. These are the top rated real world Python examples of pypers.utils.samplesheet.SampleSheet.get_sample_ids extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SampleSheet(3)

get_sample_ids(2)

get_lines_count(1)

get_mask_length(1)

get_project_name(1)

validate(1)

Example #1

Show file

    def process(self):

        # Reduce inputs to only first element
        if hasattr(self.input_dir, '__iter__'):
            self.input_dir = self.input_dir[0]

        self.input_dir = os.path.join(self.input_dir, "Data/Intensities/BaseCalls/")
        if type(self.sample_sheet) == list:
            if len(self.sample_sheet) > 1:
                raise Exception('Too many sample sheet files: %s' % ','.join(self.sample_sheet))
            else:
                self.sample_sheet = self.sample_sheet[0]
            
        ss = SampleSheet(self.sample_sheet) 
        mask_length, double_idx = ss.get_mask_length()

        if double_idx:
            self.use_base_mask = "y*,I{0},I{0},Y*".format(mask_length)
        else:
            self.use_base_mask = "y*,I{0},Y*".format(mask_length)

        self.use_base_mask = str(self.use_base_mask)
        super(CasavaDemux, self).process()

        prj_dir = os.path.join(self.output_dir, 'Project_' + self.meta['pipeline']['project_name'])
        self.output_files = utils.find(prj_dir, "*.fastq.gz")

        #set the metadata
        self.meta['job']['sample_id'] = []
        sample_ids = ss.get_sample_ids()
        for output_file in self.output_files:
            for sample_id in sample_ids:
                if os.path.basename(output_file).startswith("%s_" % sample_id):
                    self.meta['job']['sample_id'].append(sample_id)
                    break

Example #2

Show file

File: fofn.py Project: blankenberg/pypers

    def create(sample_sheet, input_dir, output_dir=None, output_file_name=None):
        """
        Crete a file of file names and return the path to it

        Args:
             sample_sheet:
                full path to the sample sheet

             input_dir:
                path to the directory containing the input files

             output_file:
                name of the output fofn. If is not specified the name
        """


        if not os.path.exists(sample_sheet):
            raise Exception("input error: parameter `sample_sheet` %s does not exist" % sample_sheet)

        if not os.path.exists(input_dir):
            raise Exception("input error: parameter `input_dir` %s does not exist" % sample_sheet)


        print("*********************************")
        print("sample_sheet: %s" % os.path.abspath(sample_sheet))
        print("input_dir: %s" % os.path.abspath(input_dir))
        print("*********************************")

        #set default name of the output fofn
        if not output_file_name:
            output_file_name = os.path.basename(sample_sheet).rsplit(".", 1)[0] + "_fofn.csv"

        if not output_dir:
            output_dir = os.path.dirname(sample_sheet)

        output_file = os.path.join(output_dir, output_file_name)

        with open(output_file, 'w') as f_fofn:
            ss = SampleSheet(sample_sheet)
            sample_id_list = ss.get_sample_ids()
            for sample_id in sample_id_list:
                print("*********************************")
                print "sample_id : %s" %sample_id
                for root, dirs, file_list in os.walk(input_dir):
                    #group the files by sample id and read number
                    r1_files = [
                        os.path.join(root, file_name) for file_name in file_list if (
                            '%s_'%sample_id in file_name and Fofn.r1_regex.search(file_name)
                        )
                    ]
                    r2_files = [
                        os.path.join(root, file_name) for file_name in file_list if (
                            '%s_'%sample_id in file_name and Fofn.r2_regex.search(file_name)
                        )
                    ]

                    r1_file = ""
                    r2_file = ""

                    if r1_files:
                        for r1_file in r1_files:
                            #filter the R2 files that match the R1 file base
                            r2_matchs = [
                                r2_file for r2_file in r2_files if (
                                    Fofn.r1_regex.search(r1_file).group(1) in r2_file)
                            ]
                            if r2_matchs:
                                Fofn._write_record(f_fofn, r1_file, r2_matchs[0], sample_id)
                            else:
                                if r2_files:
                                    print("No R2 found for sample Id %s" % sample_id)
                                Fofn._write_record(f_fofn, r1_file, '', sample_id)
                    else:
                        if r2_files:
                            for r2_file in r2_files:
                                Fofn._write_record(f_fofn, r1_file, r2_file, sample_id)


        return output_file

Example #3

Show file

File: fofn.py Project: fronga/pypers

    def create(sample_sheet,
               input_dir,
               output_dir=None,
               output_file_name=None):
        """
        Crete a file of file names and return the path to it

        Args:
             sample_sheet:
                full path to the sample sheet

             input_dir:
                path to the directory containing the input files

             output_file:
                name of the output fofn. If is not specified the name
        """

        if not os.path.exists(sample_sheet):
            raise Exception(
                "input error: parameter `sample_sheet` %s does not exist" %
                sample_sheet)

        if not os.path.exists(input_dir):
            raise Exception(
                "input error: parameter `input_dir` %s does not exist" %
                sample_sheet)

        print("*********************************")
        print("sample_sheet: %s" % os.path.abspath(sample_sheet))
        print("input_dir: %s" % os.path.abspath(input_dir))
        print("*********************************")

        #set default name of the output fofn
        if not output_file_name:
            output_file_name = os.path.basename(sample_sheet).rsplit(
                ".", 1)[0] + "_fofn.csv"

        if not output_dir:
            output_dir = os.path.dirname(sample_sheet)

        output_file = os.path.join(output_dir, output_file_name)

        with open(output_file, 'w') as f_fofn:
            ss = SampleSheet(sample_sheet)
            sample_id_list = ss.get_sample_ids()
            for sample_id in sample_id_list:
                print("*********************************")
                print "sample_id : %s" % sample_id
                for root, dirs, file_list in os.walk(input_dir):
                    #group the files by sample id and read number
                    r1_files = [
                        os.path.join(root, file_name)
                        for file_name in file_list
                        if ('%s_' % sample_id in file_name
                            and Fofn.r1_regex.search(file_name))
                    ]
                    r2_files = [
                        os.path.join(root, file_name)
                        for file_name in file_list
                        if ('%s_' % sample_id in file_name
                            and Fofn.r2_regex.search(file_name))
                    ]

                    r1_file = ""
                    r2_file = ""

                    if r1_files:
                        for r1_file in r1_files:
                            #filter the R2 files that match the R1 file base
                            r2_matchs = [
                                r2_file for r2_file in r2_files
                                if (Fofn.r1_regex.search(r1_file).group(1) in
                                    r2_file)
                            ]
                            if r2_matchs:
                                Fofn._write_record(f_fofn, r1_file,
                                                   r2_matchs[0], sample_id)
                            else:
                                if r2_files:
                                    print("No R2 found for sample Id %s" %
                                          sample_id)
                                Fofn._write_record(f_fofn, r1_file, '',
                                                   sample_id)
                    else:
                        if r2_files:
                            for r2_file in r2_files:
                                Fofn._write_record(f_fofn, r1_file, r2_file,
                                                   sample_id)

        return output_file