Python SeqIO.parseの例、Utils.Bio.SeqIO.parse Pythonの例

コード例 #1

0

ファイルを表示

ファイル: handler.py プロジェクト: Ambuj-UF/ConCat-1.0

 def fileOpenID(self):
     
     """
        Quick Record Import. This program creates a list of records for all files.
        It can handle Nexus record objects.
        
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"]
     
     fileList = glob.glob(extList[file_format - 1])
 
     dict = {}
 
     for filename in fileList:
         handle = open(filename, "rU")
         idList = []
         for record in SeqIO.parse(handle, typeList[file_format - 1]):
             idList.append(record.id)
         gene = filename.split(".")[0]
         dict[gene] = idList
         handle.close()
     
     return dict

コード例 #2

0

ファイルを表示

ファイル: functions.py プロジェクト: Ambuj-UF/ConCat-1.0

def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')
    
    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus")); g.close()

    else:
        try:
            handle = open(filename, 'rU'); record = list(SeqIO.parse(handle, input))
            fp = open(filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w')
            SeqIO.write(record, fp, output); fp.close(); handle.close()
        
        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" %filename.split('.')[0] + '.' + formDict[output].split('.')[1])

コード例 #3

0

ファイルを表示

    def fileOpenID(self):
        """
           Quick Record Import. This program creates a list of records for all files.
           It can handle Nexus record objects.
           
        """

        file_format = self.file_format

        extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
        typeList = [
            "fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"
        ]

        fileList = glob.glob(extList[file_format - 1])

        dict = {}

        for filename in fileList:
            handle = open(filename, "rU")
            idList = []
            for record in SeqIO.parse(handle, typeList[file_format - 1]):
                idList.append(record.id)
            gene = filename.split(".")[0]
            dict[gene] = idList
            handle.close()

        return dict

コード例 #4

0

ファイルを表示

    def nexML(self, filename):
        """
            Produces concatenated alignment file in NexML format.
        """

        fp = open('Results.xml', 'w')
        handleXML = open(filename, 'rU')
        recordsXML = list(SeqIO.parse(handleXML, "nexus"))
        SeqIO.write(recordsXML, fp, "seqxml")
        fp.close()
        handleXML.close()

コード例 #5

0

ファイルを表示

ファイル: handler.py プロジェクト: Ambuj-UF/ConCat-1.0

 def nexML(self, filename):
     
     """
         Produces concatenated alignment file in NexML format.
     """
     
     fp = open('Results.xml', 'w')
     handleXML = open(filename, 'rU')
     recordsXML = list(SeqIO.parse(handleXML, "nexus"))
     SeqIO.write(recordsXML, fp, "seqxml")
     fp.close()
     handleXML.close()

コード例 #6

0

ファイルを表示

def mrnaAlign(inputFile, pkg, arguments=None):
    if pkg != 'muscle' and arguments == None:
        pkg = 'muscle'

    if pkg == 'muscle':
        if 'Darwin' in platform.system():
            subprocess.call("./src/muscle/muscle -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
        else:
            subprocess.call("./src/muscle/muscleLinux -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
    else:
        arguments = arguments.replace('[', '').replace(']', '')
        subprocess.call(
            "./src/mafft/mafft.bat %s %s > %s" %
            (arguments, "Align/" + inputFile, "Input/" + inputFile),
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT)
        handle = open("Input/" + inputFile, 'rU')
        record = list(SeqIO.parse(handle, 'fasta'))
        with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(record, fp, 'nexus')
        os.remove("Input/" + inputFile)

コード例 #7

0

ファイルを表示

    def fileOpenConcNex(self):
        """
           This functions creates a list of alignment records from the files stored in RNAdata
           directory.
           
           Returns - List of alignment records.
        """

        fileList = glob.glob("*.nex")
        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, "nexus"))
            recordList.append(record)

        return recordList

コード例 #8

0

ファイルを表示

def cdsAlign(inputFile, pkg='muscle', omit=False, ign=False, CT=None):

    codonTables = [
        'Ascidian Mitochondrial', 'SGC9', 'Coelenterate Mitochondrial',
        'Protozoan Mitochondrial', 'Vertebrate Mitochondrial', 'Plant Plastid',
        'Thraustochytrium Mitochondrial', 'Blepharisma Macronuclear',
        'Mold Mitochondrial', 'Invertebrate Mitochondrial', 'Standard',
        'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial',
        'Euplotid Nuclear', 'Yeast Mitochondrial', 'Spiroplasma',
        'Alternative Flatworm Mitochondrial', 'Ciliate Nuclear', 'SGC8',
        'Alternative Yeast Nuclear', 'Hexamita Nuclear', 'SGC5', 'SGC4',
        'SGC3', 'SGC2', 'SGC1', 'SGC0', 'Flatworm Mitochondrial',
        'Dasycladacean Nuclear', 'Chlorophycean Mitochondrial', 'Mycoplasma',
        'Bacterial', 'Echinoderm Mitochondrial'
    ]

    if CT == None:
        table = CodonTable.ambiguous_dna_by_id[1]
    elif CT != None and CT in codonTables:
        table = CodonTable.ambiguous_generic_by_name[CT]
    else:
        table = CodonTable.ambiguous_generic_by_name['Standard']

    handle = open("Align/" + inputFile, 'rU')
    records = list(SeqIO.parse(handle, 'fasta'))
    for j, rec in enumerate(records):
        if 'TAA' in rec.seq[-3:] or 'TGA' in rec.seq[-3:] or 'TAG' in rec.seq[
                -3:]:
            records[j].seq = rec.seq[0:-3]

    if omit == True:
        badQuality = list()
        fdata = open("Align/" + inputFile.split('.')[0] + '.log',
                     'r').readlines()
        for lines in fdata:
            badQuality.append(lines.split(' ')[0])

        newRecords = list()
        for rec in records:
            if rec.id.split('|')[1] not in badQuality:
                newRecords.append(rec)

        records = newRecords

    records = _translator(records, ign, omit, table)
    _alignP(pkg)
    _cleanAli(records, omit, inputFile)

コード例 #9

0

ファイルを表示

ファイル: handler.py プロジェクト: Ambuj-UF/ConCat-1.0

 def fileOpenConcNex(self):
     
     """
        This functions creates a list of alignment records from the files stored in RNAdata
        directory.
        
        Returns - List of alignment records.
     """
     
     fileList = glob.glob("*.nex")
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, "nexus"))
         recordList.append(record)
         
     return recordList

コード例 #10

0

ファイルを表示

    def fileOpenConc(self):
        """
            This function is used in opening input alignment files for concatenation.
            It can handle file formats other than Nexus.
            
        """

        file_format = self.file_format

        extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
        typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
        fileList = glob.glob(extList[file_format - 1])

        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, typeList[file_format - 1]))
            recordList.append(record)

        return recordList

コード例 #11

0

ファイルを表示

def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')

    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename),
                                 "fasta",
                                 alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus"))
        g.close()

    else:
        try:
            handle = open(filename, 'rU')
            record = list(SeqIO.parse(handle, input))
            fp = open(
                filename.split('.')[0] + '.' + formDict[output].split('.')[1],
                'w')
            SeqIO.write(record, fp, output)
            fp.close()
            handle.close()

        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" % filename.split('.')[0] + '.' +
          formDict[output].split('.')[1])

コード例 #12

0

ファイルを表示

ファイル: handler.py プロジェクト: Ambuj-UF/ConCat-1.0

 def fileOpenConc(self):
     
     """
         This function is used in opening input alignment files for concatenation.
         It can handle file formats other than Nexus.
         
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
     fileList = glob.glob(extList[file_format-1])
 
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, typeList[file_format - 1]))
         recordList.append(record)
 
     return recordList

コード例 #13

0

ファイルを表示

def _cleanAli(recordNuc, omit, fileName):
    handleP = open('tAligned.fas', 'rU')
    records = list(SeqIO.parse(handleP, 'fasta'))

    store = list()
    for i, rec in enumerate(records):
        nucData = [x.seq for x in recordNuc if x.id in rec.id]
        nucSeqData = _spliter(nucData[0], 3)
        sequence = Seq("", generic_dna)
        pos = 0
        for j, amino in enumerate(rec.seq):
            if amino == '-':
                sequence = sequence + Seq("---", generic_dna)
            elif amino == 'Z':
                sequence = sequence + Seq("NNN", generic_dna)
                pos = pos + 1
            else:
                try:
                    sequence = sequence + nucSeqData[pos]
                    pos = pos + 1
                except:
                    if rec.id not in store:
                        store.append(rec.id)

        records[i].seq = Seq(str(sequence), generic_dna)

    records = [x for x in records if x.id not in store]
    if store != []:
        print("Failed to align following sequences: %s" % store)

    if omit == False:
        with open("Input/" + fileName.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(records, fp, "nexus")
    else:
        with open("Input/" + fileName.split('.')[0] + "_omited.nex",
                  'w') as fp:
            SeqIO.write(records, fp, "nexus")

    os.remove('translated.fas')
    os.remove('tAligned.fas')

コード例 #14

0

ファイルを表示

    def RYcoding(self, file, position, msaObject):
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()

        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y = 0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1

        newSeqList = []

        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)

        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))

            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa

コード例 #15

0

ファイルを表示

ファイル: handler.py プロジェクト: Ambuj-UF/ConCat-1.0

    def RYcoding(self, file, position, msaObject):
        
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string
                    
        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()
        
        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y=0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1
            
        
        newSeqList = []
        
        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)


        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))
                                  
            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa