Example #1
0
 def fileOpenID(self):
     
     """
        Quick Record Import. This program creates a list of records for all files.
        It can handle Nexus record objects.
        
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"]
     
     fileList = glob.glob(extList[file_format - 1])
 
     dict = {}
 
     for filename in fileList:
         handle = open(filename, "rU")
         idList = []
         for record in SeqIO.parse(handle, typeList[file_format - 1]):
             idList.append(record.id)
         gene = filename.split(".")[0]
         dict[gene] = idList
         handle.close()
     
     return dict
Example #2
0
def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')
    
    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus")); g.close()

    else:
        try:
            handle = open(filename, 'rU'); record = list(SeqIO.parse(handle, input))
            fp = open(filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w')
            SeqIO.write(record, fp, output); fp.close(); handle.close()
        
        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" %filename.split('.')[0] + '.' + formDict[output].split('.')[1])
Example #3
0
    def fileOpenID(self):
        """
           Quick Record Import. This program creates a list of records for all files.
           It can handle Nexus record objects.
           
        """

        file_format = self.file_format

        extList = ["*.fas", "*.nex", "*.phy", "*.phy", "*.phy"]
        typeList = [
            "fasta", "nexus", "phylip", "phylip-sequential", "phylip-relaxed"
        ]

        fileList = glob.glob(extList[file_format - 1])

        dict = {}

        for filename in fileList:
            handle = open(filename, "rU")
            idList = []
            for record in SeqIO.parse(handle, typeList[file_format - 1]):
                idList.append(record.id)
            gene = filename.split(".")[0]
            dict[gene] = idList
            handle.close()

        return dict
Example #4
0
    def nexML(self, filename):
        """
            Produces concatenated alignment file in NexML format.
        """

        fp = open('Results.xml', 'w')
        handleXML = open(filename, 'rU')
        recordsXML = list(SeqIO.parse(handleXML, "nexus"))
        SeqIO.write(recordsXML, fp, "seqxml")
        fp.close()
        handleXML.close()
Example #5
0
 def nexML(self, filename):
     
     """
         Produces concatenated alignment file in NexML format.
     """
     
     fp = open('Results.xml', 'w')
     handleXML = open(filename, 'rU')
     recordsXML = list(SeqIO.parse(handleXML, "nexus"))
     SeqIO.write(recordsXML, fp, "seqxml")
     fp.close()
     handleXML.close()
Example #6
0
def mrnaAlign(inputFile, pkg, arguments=None):
    if pkg != 'muscle' and arguments == None:
        pkg = 'muscle'

    if pkg == 'muscle':
        if 'Darwin' in platform.system():
            subprocess.call("./src/muscle/muscle -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
        else:
            subprocess.call("./src/muscle/muscleLinux -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
    else:
        arguments = arguments.replace('[', '').replace(']', '')
        subprocess.call(
            "./src/mafft/mafft.bat %s %s > %s" %
            (arguments, "Align/" + inputFile, "Input/" + inputFile),
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT)
        handle = open("Input/" + inputFile, 'rU')
        record = list(SeqIO.parse(handle, 'fasta'))
        with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(record, fp, 'nexus')
        os.remove("Input/" + inputFile)
Example #7
0
    def fileOpenConcNex(self):
        """
           This functions creates a list of alignment records from the files stored in RNAdata
           directory.
           
           Returns - List of alignment records.
        """

        fileList = glob.glob("*.nex")
        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, "nexus"))
            recordList.append(record)

        return recordList
Example #8
0
def cdsAlign(inputFile, pkg='muscle', omit=False, ign=False, CT=None):

    codonTables = [
        'Ascidian Mitochondrial', 'SGC9', 'Coelenterate Mitochondrial',
        'Protozoan Mitochondrial', 'Vertebrate Mitochondrial', 'Plant Plastid',
        'Thraustochytrium Mitochondrial', 'Blepharisma Macronuclear',
        'Mold Mitochondrial', 'Invertebrate Mitochondrial', 'Standard',
        'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial',
        'Euplotid Nuclear', 'Yeast Mitochondrial', 'Spiroplasma',
        'Alternative Flatworm Mitochondrial', 'Ciliate Nuclear', 'SGC8',
        'Alternative Yeast Nuclear', 'Hexamita Nuclear', 'SGC5', 'SGC4',
        'SGC3', 'SGC2', 'SGC1', 'SGC0', 'Flatworm Mitochondrial',
        'Dasycladacean Nuclear', 'Chlorophycean Mitochondrial', 'Mycoplasma',
        'Bacterial', 'Echinoderm Mitochondrial'
    ]

    if CT == None:
        table = CodonTable.ambiguous_dna_by_id[1]
    elif CT != None and CT in codonTables:
        table = CodonTable.ambiguous_generic_by_name[CT]
    else:
        table = CodonTable.ambiguous_generic_by_name['Standard']

    handle = open("Align/" + inputFile, 'rU')
    records = list(SeqIO.parse(handle, 'fasta'))
    for j, rec in enumerate(records):
        if 'TAA' in rec.seq[-3:] or 'TGA' in rec.seq[-3:] or 'TAG' in rec.seq[
                -3:]:
            records[j].seq = rec.seq[0:-3]

    if omit == True:
        badQuality = list()
        fdata = open("Align/" + inputFile.split('.')[0] + '.log',
                     'r').readlines()
        for lines in fdata:
            badQuality.append(lines.split(' ')[0])

        newRecords = list()
        for rec in records:
            if rec.id.split('|')[1] not in badQuality:
                newRecords.append(rec)

        records = newRecords

    records = _translator(records, ign, omit, table)
    _alignP(pkg)
    _cleanAli(records, omit, inputFile)
Example #9
0
 def fileOpenConcNex(self):
     
     """
        This functions creates a list of alignment records from the files stored in RNAdata
        directory.
        
        Returns - List of alignment records.
     """
     
     fileList = glob.glob("*.nex")
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, "nexus"))
         recordList.append(record)
         
     return recordList
Example #10
0
    def fileOpenConc(self):
        """
            This function is used in opening input alignment files for concatenation.
            It can handle file formats other than Nexus.
            
        """

        file_format = self.file_format

        extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
        typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
        fileList = glob.glob(extList[file_format - 1])

        recordList = []
        for filename in fileList:
            handle = open(filename, "rU")
            record = list(SeqIO.parse(handle, typeList[file_format - 1]))
            recordList.append(record)

        return recordList
Example #11
0
def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')

    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename),
                                 "fasta",
                                 alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus"))
        g.close()

    else:
        try:
            handle = open(filename, 'rU')
            record = list(SeqIO.parse(handle, input))
            fp = open(
                filename.split('.')[0] + '.' + formDict[output].split('.')[1],
                'w')
            SeqIO.write(record, fp, output)
            fp.close()
            handle.close()

        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" % filename.split('.')[0] + '.' +
          formDict[output].split('.')[1])
Example #12
0
 def fileOpenConc(self):
     
     """
         This function is used in opening input alignment files for concatenation.
         It can handle file formats other than Nexus.
         
     """
     
     file_format = self.file_format
     
     extList = ["*.fas", "*.phy", "*.phy", "*.phy"]
     typeList = ["fasta", "phylip", "phylip-sequential", "phylip-relaxed"]
     fileList = glob.glob(extList[file_format-1])
 
     recordList = []
     for filename in fileList:
         handle = open(filename, "rU")
         record = list(SeqIO.parse(handle, typeList[file_format - 1]))
         recordList.append(record)
 
     return recordList
Example #13
0
def _cleanAli(recordNuc, omit, fileName):
    handleP = open('tAligned.fas', 'rU')
    records = list(SeqIO.parse(handleP, 'fasta'))

    store = list()
    for i, rec in enumerate(records):
        nucData = [x.seq for x in recordNuc if x.id in rec.id]
        nucSeqData = _spliter(nucData[0], 3)
        sequence = Seq("", generic_dna)
        pos = 0
        for j, amino in enumerate(rec.seq):
            if amino == '-':
                sequence = sequence + Seq("---", generic_dna)
            elif amino == 'Z':
                sequence = sequence + Seq("NNN", generic_dna)
                pos = pos + 1
            else:
                try:
                    sequence = sequence + nucSeqData[pos]
                    pos = pos + 1
                except:
                    if rec.id not in store:
                        store.append(rec.id)

        records[i].seq = Seq(str(sequence), generic_dna)

    records = [x for x in records if x.id not in store]
    if store != []:
        print("Failed to align following sequences: %s" % store)

    if omit == False:
        with open("Input/" + fileName.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(records, fp, "nexus")
    else:
        with open("Input/" + fileName.split('.')[0] + "_omited.nex",
                  'w') as fp:
            SeqIO.write(records, fp, "nexus")

    os.remove('translated.fas')
    os.remove('tAligned.fas')
Example #14
0
    def RYcoding(self, file, position, msaObject):
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i - 1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i - 1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()

        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y = 0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1

        newSeqList = []

        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)

        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))

            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa
Example #15
0
    def RYcoding(self, file, position, msaObject):
        
        """
           RY-coding program: It replaces A & G to R and C & T to Y either user defined positions
           or at all the positions. It depends on user selection.
           
           @parameter file -
           @parameter position - user defined position to perform RY coding in alignment matrix
           @parameter msaObject - Input multiple sequence alignment matri data
           
           Return - Multiple sequence alignment object with RY coding
        """
        
        def ReplaceThird(self, string, position):
            for i in range(position, len(string), 3):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string
                    
        def ReplaceAll(self, string):
            for i in range(1, len(string), 1):
                if string[i] == 'A' or string[i] == 'G':
                    string = string[:i-1] + "R" + string[i:]
                elif string[i] == 'C' or string[i] == 'T':
                    string = string[:i-1] + "Y" + string[i:]
            return string

        handle = open("Results.nex", "rU")
        records = list(SeqIO.parse(handle, "nexus"))
        handle.close()
        
        msa = msaObject

        seqlist = []
        idlist = []
        data = []
        x = 0
        while x < len(msa):
            sequence = ""
            y=0
            idlist.append(msa[x].id)
            while y < len(msa[1]):
                sequence = sequence + msa[x][y]
                y = y + 1
            seqlist.append(sequence)
            x = x + 1
            
        
        newSeqList = []
        
        if position == 'all':
            for seqData in seqlist:
                newSeqData = ReplaceAll(self, seqData)
                newSeqList.append(newSeqData)
        else:
            for seqData in seqlist:
                newSeqData = ReplaceThird(self, seqData, int(position))
                newSeqList.append(newSeqData)


        counter = 0
        while counter < len(newSeqList):
            data.append(SeqRecord(Seq(newSeqList[counter], generic_dna),\
                                  id = records[counter].id, name = records[counter].name,\
                                  description = records[counter].description))
                                  
            counter = counter + 1

        newmsa = MultipleSeqAlignment(data)

        return newmsa