Exemple #1
0
def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')
    
    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename), "fasta", alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus")); g.close()

    else:
        try:
            handle = open(filename, 'rU'); record = list(SeqIO.parse(handle, input))
            fp = open(filename.split('.')[0] + '.' + formDict[output].split('.')[1], 'w')
            SeqIO.write(record, fp, output); fp.close(); handle.close()
        
        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" %filename.split('.')[0] + '.' + formDict[output].split('.')[1])
Exemple #2
0
    def nexML(self, filename):
        """
            Produces concatenated alignment file in NexML format.
        """

        fp = open('Results.xml', 'w')
        handleXML = open(filename, 'rU')
        recordsXML = list(SeqIO.parse(handleXML, "nexus"))
        SeqIO.write(recordsXML, fp, "seqxml")
        fp.close()
        handleXML.close()
Exemple #3
0
 def nexML(self, filename):
     
     """
         Produces concatenated alignment file in NexML format.
     """
     
     fp = open('Results.xml', 'w')
     handleXML = open(filename, 'rU')
     recordsXML = list(SeqIO.parse(handleXML, "nexus"))
     SeqIO.write(recordsXML, fp, "seqxml")
     fp.close()
     handleXML.close()
Exemple #4
0
def Convert(input, output, filename):
    """
        File format conversion program (fasta, strict-phylip, sequential-phylip, relaxed-phylip and nexus).
        @parameter input - Input file format.
        @parameter output - Output file format.
        @parameter filename - Input filename.
        """
    formDict = {
        'fasta': '*.fas',
        'nexus': '*.nex',
        'phylip': '*.phy',
        'phylip-sequential': '*.phy',
        'phylip-relaxed': '*.phy'
    }

    os.chdir('..')

    if input == 'fasta' and output == 'nexus':
        alignment = AlignIO.read(open(filename),
                                 "fasta",
                                 alphabet=Gapped(IUPAC.protein))
        g = open(filename.split(".")[0] + '.nex', 'w')
        g.write(alignment.format("nexus"))
        g.close()

    else:
        try:
            handle = open(filename, 'rU')
            record = list(SeqIO.parse(handle, input))
            fp = open(
                filename.split('.')[0] + '.' + formDict[output].split('.')[1],
                'w')
            SeqIO.write(record, fp, output)
            fp.close()
            handle.close()

        except:
            print("Bad Alignment\n")

    print("Final output saved in %s" % filename.split('.')[0] + '.' +
          formDict[output].split('.')[1])
Exemple #5
0
def _cleanAli(recordNuc, omit, fileName):
    handleP = open('tAligned.fas', 'rU')
    records = list(SeqIO.parse(handleP, 'fasta'))

    store = list()
    for i, rec in enumerate(records):
        nucData = [x.seq for x in recordNuc if x.id in rec.id]
        nucSeqData = _spliter(nucData[0], 3)
        sequence = Seq("", generic_dna)
        pos = 0
        for j, amino in enumerate(rec.seq):
            if amino == '-':
                sequence = sequence + Seq("---", generic_dna)
            elif amino == 'Z':
                sequence = sequence + Seq("NNN", generic_dna)
                pos = pos + 1
            else:
                try:
                    sequence = sequence + nucSeqData[pos]
                    pos = pos + 1
                except:
                    if rec.id not in store:
                        store.append(rec.id)

        records[i].seq = Seq(str(sequence), generic_dna)

    records = [x for x in records if x.id not in store]
    if store != []:
        print("Failed to align following sequences: %s" % store)

    if omit == False:
        with open("Input/" + fileName.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(records, fp, "nexus")
    else:
        with open("Input/" + fileName.split('.')[0] + "_omited.nex",
                  'w') as fp:
            SeqIO.write(records, fp, "nexus")

    os.remove('translated.fas')
    os.remove('tAligned.fas')
Exemple #6
0
    def RNAfoldConsensus(self):
        """
           Creates RNA structure data from consensus alignment using RNAfold program.
           Output is stored in RNAConsensus.txt file
           
        """

        os.chdir("RNAdata")
        fileList = glob.glob('*.nex')

        newFileList = []

        for name in fileList:
            file_name = name.split('.')[0]
            newName = file_name + '.aln'
            newFileList.append(newName)

        recordList = self.fileOpenConcNex()
        os.chdir("..")
        n = 0
        while n < len(fileList):
            record = recordList[n]
            file_Write = open(newFileList[n], 'w')
            SeqIO.write(record, file_Write, "clustal")
            file_Write.close()
            n = n + 1

        fp = open("RNAConsensus.txt", 'w')

        for name in newFileList:
            print("RNA structure     |    %s|    %s" %
                  (time.strftime("%c"), name))
            fp.write("[ %s ]\n" % name.split('.')[0])
            fp.write(
                subprocess.check_output("RNAalifold < %s" % name, shell=True))
            fp.write('\n\n')

        fp.close()
Exemple #7
0
    def RNAfoldConsensus(self):
        
        """
           Creates RNA structure data from consensus alignment using RNAfold program.
           Output is stored in RNAConsensus.txt file
           
        """
        
        os.chdir("RNAdata")
        fileList = glob.glob('*.nex')

        newFileList = []
    
        for name in fileList:
            file_name = name.split('.')[0]
            newName = file_name + '.aln'
            newFileList.append(newName)
        
        recordList = self.fileOpenConcNex()
        os.chdir("..")
        n = 0
        while n < len(fileList):
            record = recordList[n]
            file_Write = open(newFileList[n], 'w')
            SeqIO.write(record, file_Write, "clustal")
            file_Write.close()
            n = n + 1
                              
        fp = open("RNAConsensus.txt", 'w')

        for name in newFileList:
            print("RNA structure     |    %s|    %s" %(time.strftime("%c"), name))
            fp.write("[ %s ]\n" % name.split('.')[0])
            fp.write(subprocess.check_output("RNAalifold < %s" % name, shell = True))
            fp.write('\n\n')
            
        fp.close()
Exemple #8
0
 def alignOutput(self, combine):
     
     """
        alignOutput creates an output file in user defined file format
        
        @parameter combine - concatenated alignment matrix
        
     """
     
     output_format = self.file_format
     if output_format == 1:
         filecompname = "Result1.fasta"
         file_Write = open(filecompname, 'w')
         SeqIO.write(combine, file_Write, "fasta")
         file_Write.close()
         #This section is for cleaning any unknown description tag from the final fasta file
     
         fin = open("Result1.fasta", "r")
         fout = open("Result.fasta", "w+")
     
         input_data = fin.readlines()
     
         for line in input_data:
             if "<unknown description>" in line:
                 line = line.replace("<unknown description>", "")
             fout.write(line)
         fin.close()
         fout.close()
     
     elif output_format == 2:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip")
         file_Write.close()
 
     elif output_format == 3:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip-sequential")
         file_Write.close()
 
     elif output_format == 4:
         file_Write = open("Result.phy", 'w')
         SeqIO.write(combine, file_Write, "phylip-relaxed")
         file_Write.close()
 
     else:
         sys.exit("You have enetered wrong value \n Program Terminated...")
Exemple #9
0
    def alignOutput(self, combine):
        """
           alignOutput creates an output file in user defined file format
           
           @parameter combine - concatenated alignment matrix
           
        """

        output_format = self.file_format
        if output_format == 1:
            filecompname = "Result1.fasta"
            file_Write = open(filecompname, 'w')
            SeqIO.write(combine, file_Write, "fasta")
            file_Write.close()
            #This section is for cleaning any unknown description tag from the final fasta file

            fin = open("Result1.fasta", "r")
            fout = open("Result.fasta", "w+")

            input_data = fin.readlines()

            for line in input_data:
                if "<unknown description>" in line:
                    line = line.replace("<unknown description>", "")
                fout.write(line)
            fin.close()
            fout.close()

        elif output_format == 2:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip")
            file_Write.close()

        elif output_format == 3:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip-sequential")
            file_Write.close()

        elif output_format == 4:
            file_Write = open("Result.phy", 'w')
            SeqIO.write(combine, file_Write, "phylip-relaxed")
            file_Write.close()

        else:
            sys.exit("You have enetered wrong value \n Program Terminated...")
Exemple #10
0
def mrnaAlign(inputFile, pkg, arguments=None):
    if pkg != 'muscle' and arguments == None:
        pkg = 'muscle'

    if pkg == 'muscle':
        if 'Darwin' in platform.system():
            subprocess.call("./src/muscle/muscle -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
        else:
            subprocess.call("./src/muscle/muscleLinux -in %s -out %s" %
                            ("Align/" + inputFile, "Input/" + inputFile),
                            shell=True,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
            handle = open("Input/" + inputFile, 'rU')
            record = list(SeqIO.parse(handle, 'fasta'))
            with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
                SeqIO.write(record, fp, 'nexus')
            os.remove("Input/" + inputFile)
    else:
        arguments = arguments.replace('[', '').replace(']', '')
        subprocess.call(
            "./src/mafft/mafft.bat %s %s > %s" %
            (arguments, "Align/" + inputFile, "Input/" + inputFile),
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT)
        handle = open("Input/" + inputFile, 'rU')
        record = list(SeqIO.parse(handle, 'fasta'))
        with open("Input/" + inputFile.split('.')[0] + ".nex", 'w') as fp:
            SeqIO.write(record, fp, 'nexus')
        os.remove("Input/" + inputFile)
Exemple #11
0
def mrnaImport(geneName, group, ortho):
    """
        @ geneName - name of the gene
        @ group - organism name
        @ creates a taxon mRNA aligned fasta file as output for the set of genes given as input
        """

    if ortho != None:
        inpTerm = ortho + "[sym] AND " + group + "[orgn]"
    elif group != None:
        inpTerm = geneName + "[sym] AND " + group + "[orgn]"

    Entrez.email = '*****@*****.**'

    try:
        handle = Entrez.esearch(db="gene",
                                term=inpTerm,
                                rettype='xml',
                                RetMax=300)
    except:
        raise RuntimeError(
            "Failed to import sequence from NCBI. Check your internet connection.\nThis might also occur due to NCBI failure"
        )

    records = Entrez.read(handle)
    idList = records["IdList"]

    inpTerm = "ortholog_gene_" + str(idList[0]) + "[group]"
    handle = Entrez.esearch(db="gene",
                            term=inpTerm,
                            rettype='xml',
                            RetMax=300,
                            warning=False)
    records = Entrez.read(handle)
    idList = records["IdList"]

    outRecord = list()
    for ids in idList:
        _xmlcreate(ids)
        refIds = _xmlparser()
        os.remove('export.xml')
        recordList = list()
        for inIDs in refIds:
            recordList.append(mrnaExt(inIDs))

        try:
            longestRec = recordList[0]
        except:
            continue
        for rec in recordList:
            longestRec = rec if len(rec.seq) > len(
                longestRec.seq) else longestRec
        print("%s" % longestRec.description)
        outRecord.append(longestRec)

    with open("Align/" + geneName + '.fas', 'w') as fp:
        SeqIO.write(outRecord, fp, 'fasta')

    fdata = open("Align/" + geneName + '.fas', 'r').readlines()
    with open("Align/" + geneName + '.fas', 'w') as fp:
        for lines in fdata:
            if '>' in lines and 'PREDICTED' in lines:
                newLine = '>' + lines.split(' ')[2] + '_' + lines.split(
                    ' ')[3] + '|' + lines.split(' ')[0].lstrip('>')
                fp.write('%s\n' % newLine)
            elif '>' in lines and 'PREDICTED' not in lines:
                newLine = '>' + lines.split(' ')[1] + '_' + lines.split(
                    ' ')[2] + '|' + lines.split(' ')[0].lstrip('>')
                fp.write('%s\n' % newLine)
            else:
                fp.write('%s' % lines)
Exemple #12
0
def _translator(recordData, ign, omit, table):
    proteinSeqList = list()
    recordsFunc = recordData

    for i, rec in enumerate(recordsFunc):
        counter = dict()
        seqT = _translate_str(str(rec.seq), table)

        if ign == False:
            if "*" in seqT:
                counter['one'] = seqT.count('*')
                seqT = _translate_str(
                    str(rec.seq[1:len(rec.seq)] + Seq("N", generic_dna)),
                    table)
                if "*" in seqT:
                    counter['two'] = seqT.count('*')
                    seqT = _translate_str(
                        str(rec.seq[2:len(rec.seq)] + Seq("NN", generic_dna)),
                        table)
                    if "*" in seqT:
                        counter['three'] = seqT.count('*')
                        if omit == False:
                            if min(counter, key=counter.get) == 'one':
                                seqT = _translate_str(str(rec.seq), table)
                            elif min(counter, key=counter.get) == 'two':
                                seqT = _translate_str(
                                    str(rec.seq[1:len(rec.seq)] +
                                        Seq("N", generic_dna)), table)
                                recordsFunc[i].seq = recordsFunc[i].seq[
                                    1:len(rec.seq)] + Seq("N", generic_dna)
                            elif min(counter, key=counter.get) == 'three':
                                seqT = _translate_str(
                                    str(rec.seq[2:len(rec.seq)] +
                                        Seq("NN", generic_dna)), table)
                                recordsFunc[i].seq = recordsFunc[i].seq[
                                    2:len(rec.seq)] + Seq("NN", generic_dna)

                    else:
                        seqT = _translate_str(
                            str(rec.seq[2:len(rec.seq)] +
                                Seq("NN", generic_dna)), table)
                        recordsFunc[
                            i].seq = recordsFunc[i].seq[2:len(rec.seq)] + Seq(
                                "NN", generic_dna)
                else:
                    seqT = _translate_str(
                        str(rec.seq[1:len(rec.seq)] + Seq("N", generic_dna)),
                        table)
                    recordsFunc[
                        i].seq = recordsFunc[i].seq[1:len(rec.seq)] + Seq(
                            "N", generic_dna)

            else:
                pass

        for j, obj in enumerate(seqT):
            if '*' in obj:
                seqT = seqT[:j] + 'Z' + seqT[j + 1:]

        proteinSeqList.append(
            SeqRecord(Seq(seqT, IUPAC.protein),
                      id=rec.id,
                      name=rec.name,
                      description=rec.description))

    with open('translated.fas', 'w') as fp:
        SeqIO.write(proteinSeqList, fp, 'fasta')

    return recordsFunc