Esempio n. 1
0
    def unsplit_fa(self, input_file_path, output_file_path):
        input = fa.SequenceSource(input_file_path)
        output = fa.FastaOutput(output_file_path)

        while input.next():
            output.store(input, split=False)
        output.close()
Esempio n. 2
0
    def combine_w_gast_fa(self, input_file_path, output_file_path):
        output = fa.FastaOutput(output_file_path)

        fa_input = fa.SequenceSource(input_file_path)
        gast_file_name = input_file_path + ".gast"
        while fa_input.next():
            file = open(gast_file_name, "r")
            gast_file_content = file.readlines()
            res = self.lines_that_contain(fa_input.id, gast_file_content)
            gast_taxonomy = res[0].split("\t")
            id_gast = fa_input.id + "|" + gast_taxonomy[1]
            fa_input.id = id_gast
            output.store(fa_input, split=False)
        output.close()
Esempio n. 3
0
    def move_out_chimeric(self):
        chimeric_ids = self.get_chimeric_ids()
        for idx_key in self.input_file_names:
            fasta_file_path    = os.path.join(self.indir, self.input_file_names[idx_key])   
            read_fasta         = fa.ReadFasta(fasta_file_path)
            read_fasta.close()
            
            non_chimeric_file  = fasta_file_path + self.nonchimeric_suffix
            non_chimeric_fasta = fa.FastaOutput(non_chimeric_file)

            fasta              = fa.SequenceSource(fasta_file_path, lazy_init = False) 
            while fasta.next():
                if not fasta.id in chimeric_ids:
                    non_chimeric_fasta.store(fasta, store_frequencies = False)
            non_chimeric_fasta.close()
Esempio n. 4
0
    def move_out_chimeric(self):
        txt_ids = self.get_chimeric_ids(
            os.path.join(self.dir_name, self.chimeric_file_name_txt))
        db_ids = self.get_chimeric_ids(
            os.path.join(self.dir_name, self.chimeric_file_name_db))
        all_chimeric_ids = set(txt_ids) | set(db_ids)
        print("len(all_chimeric_ids) = ")
        print(len(all_chimeric_ids))

        non_chimeric_fasta = fa.FastaOutput(
            os.path.join(self.dir_name, self.output_file_name))
        orig_fasta = fa.SequenceSource(os.path.join(self.dir_name,
                                                    self.chg_file),
                                       lazy_init=False)

        while next(orig_fasta):
            if not orig_fasta.id in all_chimeric_ids:
                non_chimeric_fasta.store(orig_fasta, store_frequencies=False)
        non_chimeric_fasta.close()
Esempio n. 5
0
    def write_clean_fasta_file(self):
        """
        def to write a new fasta from the original fasta file
                using the deleted file

        The deleted file contains the trimming deleted as well
        as the chimera deleted
        Then write the uniques from Meren's fastalib
        """
        sleep(2)
        for lane_key in self.lane_keys:
            logger.debug("write_clean_fasta_file working on lanekey: " +
                         lane_key)
            deleted_id_list = []
            original_trimmed_file = os.path.join(self.trim_dir,
                                                 lane_key + ".trimmed.fa")
            new_trimmed_file_name = os.path.join(self.trim_dir,
                                                 lane_key + ".newtrimmed.fa")
            new_trimmed_file = fa.FastaOutput(new_trimmed_file_name)

            # open trimmed file and read a line
            trimmedfasta = fa.SequenceSource(original_trimmed_file)
            logger.debug(
                "write_clean_fasta_file about to check trimmedfasta file")
            deleted_id_list = self.deleted_ids[lane_key]
            if len(deleted_id_list) == 0:
                continue
            while trimmedfasta.next():
                if trimmedfasta.id not in deleted_id_list:
                    new_trimmed_file.store(trimmedfasta)
            new_trimmed_file.close()

            # rename to newtrimmed => trimmed
            os.rename(
                original_trimmed_file,
                os.path.join(self.trim_dir,
                             lane_key + ".trimmed_with_chimera.fa"))
            os.rename(new_trimmed_file_name, original_trimmed_file)
Esempio n. 6
0
    def write_clean_uniques_file(self):
        """
        Write out a new unique file with all the deleted ids removed
           especially the chimeras which were detected after the original unique file
           was created.
        """
        for lane_key in self.lane_keys:

            deleted_id_list = []
            new_unique_file_name = os.path.join(self.trim_dir,
                                                lane_key + ".newunique.fa")
            new_unique_file = fa.FastaOutput(new_unique_file_name)
            original_unique_file = os.path.join(self.trim_dir,
                                                lane_key + '.unique.fa')

            deleted_id_list = self.deleted_ids[lane_key]
            if len(deleted_id_list) == 0:
                continue

            # open unique file and read a line
            uniquesfasta = fa.SequenceSource(original_unique_file)
            while uniquesfasta.next():
                #print(uniquesfasta.id,self.orphans[lane_key])

                if uniquesfasta.id in self.orphans[lane_key].keys():
                    #print("found orphan",uniquesfasta.id)
                    uniquesfasta.id = self.orphans[lane_key][
                        uniquesfasta.id][0]
                    #print("new id",uniquesfasta.id)
                if uniquesfasta.id not in deleted_id_list:
                    new_unique_file.store(uniquesfasta)
            new_unique_file.close()

            # rename to newuniques => uniques
            os.rename(
                original_unique_file,
                os.path.join(self.trim_dir, lane_key + ".unique_dirty.fa"))
            os.rename(new_unique_file_name, original_unique_file)