Esempio n. 1
0
    def execute_megablast(self, fasta1, fasta2):
        import os
        from TPutils import shell_command
        for one_fasta in fasta2:
            out_prefix = os.path.basename(one_fasta).split('.')[0]

            cmd1 = "formatdb -i %s -p F" % one_fasta
            cmd2 = 'blastn -task megablast -query %s -db %s -evalue 1e-5 -outfmt 6 -out blast_result_%s.tab' % (
                fasta1, one_fasta, out_prefix)
            a, b, c = shell_command.shell_command(cmd1)
            a, b, c = shell_command.shell_command(cmd2)
Esempio n. 2
0
def delta_file2start_stop_list(delta_input,
                               contigs_add,
                               algo='nucmer',
                               minimum_identity=85):
    '''

    :param coords_input:
    :param contig_add:
    :param algo:
    :return:
    '''

    import re
    from TPutils import shell_command

    out_name = delta_input.split('.')[0]

    coords_file = 'gaps_%s.coords' % out_name

    cmd2 = 'show-coords -T -r -c -L 100 -I %s %s > %s' % (
        minimum_identity, delta_input, coords_file)
    print(cmd2)
    a, b, c = shell_command.shell_command(cmd2)
    if c != 0:
        print('nucmer error!!!')
        print(a, b, c)
        print(cmd2)
        import sys
        sys.exit()

    with open("gaps_%s.coords" % out_name, 'rU') as infile:
        rawLinks = justLinks(infile.readlines())

    if algo == 'promer':
        shift = 4
    elif algo == 'nucmer':
        shift = 0

    contig2start_stop_list = {}

    i = 1
    for n, row in enumerate(rawLinks):

        l = re.split(r'\t+', row.rstrip('\n'))

        start = int(l[0]) + contigs_add[l[9 + shift]][0]
        stop = int(l[1]) + contigs_add[l[9 + shift]][0]
        contig_ref = l[9 + shift]

        if contig_ref not in contig2start_stop_list:

            contig2start_stop_list[contig_ref] = {}
            contig2start_stop_list[contig_ref]["start"] = [start]
            contig2start_stop_list[contig_ref]["stop"] = [stop]
        else:
            contig2start_stop_list[contig_ref]["start"].append(start)
            contig2start_stop_list[contig_ref]["stop"].append(stop)

    return contig2start_stop_list, coords_file
Esempio n. 3
0
 def run_circos(self, config_file="circos.config", out_prefix="circos"):
     from TPutils import shell_command
     cmd = 'circos -outputfile %s.svg -conf %s' % (out_prefix, config_file)
     a, b, c = shell_command.shell_command(cmd)
     sys.stdout.write(str(a))
     sys.stdout.write(str(b))
     sys.stdout.write(str(c))
     sys.stdout.flush()
     if c == 255:
         raise CircosException("Circos problem, check files... quitting")
Esempio n. 4
0
    def blast2circos_file(self,
                          blast,
                          reference,
                          blastn=False,
                          identity_cutoff=80):
        '''

        tblastn vs contigs by default
        can be switch to blastn

        :param blast:
        :param reference:
        :param blastn:
        :return:
        '''

        from TPutils import shell_command
        from TPutils import blast_utils
        from Bio.Blast.Applications import NcbitblastnCommandline
        from Bio.Blast.Applications import NcbiblastnCommandline

        # todo catch IO errors, orther potential errors
        a, b, c = shell_command.shell_command('formatdb -i %s -p F' %
                                              (reference))
        # print a
        # print b
        print(c)
        if not blastn:
            blast_cline = NcbitblastnCommandline(
                query=blast,
                db=reference,
                evalue=0.00000001,  # 0.001
                outfmt=6,
                out="blast.tmp",
                max_target_seqs=1)
            print(blast_cline)
        else:
            blast_cline = NcbiblastnCommandline(query=blast,
                                                db=reference,
                                                evalue=0.001,
                                                outfmt=6,
                                                out="blast.tmp")
        stdout, stderr = blast_cline()

        # a,b,c = shell_command.shell_command('tblastn -query %s -db %s -evalue 1e-5 -max_target_seqs 1 -outfmt 6 > blast.tmp' % (blast, reference))
        # a,b,c = shell_command.shell_command('tblastn -query %s -db %s -evalue 1e-5 -max_target_seqs 1 -outfmt 6' % (blast, reference))
        print('############## BLAST ###################')
        # print a
        # print b
        # print c

        blast2data, queries = blast_utils.remove_blast_redundancy(
            ["blast.tmp"], check_overlap=False)

        o = open('circos_blast.txt', "w")
        l = open('circos_blast_labels.txt', "w")

        # with open(blast, 'r') as b:
        '''
        for line in a.split('\n'):

            data = line.rstrip().split('\t')
            #print data
            try:
                if float(data[2])>80:
                    location = sorted([data[8], data[9]])
                    o.write("%s\t%s\t%s\n" % (data[1], location[0], location[1]))
                    l.write("%s\t%s\t%s\t%s\n" % (data[1],  location[0], location[1], data[0]))
            except IndexError:
                continue
        '''
        for contig in blast2data:
            cname = re.sub("\|", "", contig)
            for gene in blast2data[contig]:
                if float(blast2data[contig][gene]
                         [0]) >= identity_cutoff:  # 80,20
                    location = sorted(blast2data[contig][gene][1:3])
                    o.write("%s\t%s\t%s\n" %
                            (contig, location[0] + self.contigs_add[cname][0],
                             location[1] + self.contigs_add[cname][0]))
                    l.write("%s\t%s\t%s\t%s\n" %
                            (contig, location[0] + self.contigs_add[cname][0],
                             location[1] + self.contigs_add[cname][0], gene))

        o.close()
Esempio n. 5
0
def execute_promer(fasta1,
                   fasta2,
                   algo="nucmer",
                   coords=True,
                   minimum_align_length=100,
                   minimum_identity=30):
    '''



    :param fasta1: reference fata
    :param fasta2: query fasta(s)
    :param algo: nucmer/promer
    :param coords: execute show-coords
    :param minimum_align_length: default:100
    :param minimum_identity: default:30
    :return: coords file list, delta file list
    '''

    from TPutils import shell_command
    import os

    #cmd1 = 'promer --mum -l 5 %s %s' % (fasta1, fasta2)

    delta_files = []
    coord_files = []
    for one_fasta in fasta2:
        if algo == 'nucmer':
            # 03.2017 => changed from -mum to -mumreference. 01.06.17 removed -mumreference
            cmd1 = 'nucmer -b 200 -c 65 -g 90 -l 20 -p %s %s %s' % (
                os.path.basename(one_fasta).split('.')[0], fasta1, one_fasta)
            print(cmd1)
            a, b, c = shell_command.shell_command(cmd1)
            if c != 0:
                raise Exception("%s" % b)

            if coords:
                cmd2 = 'show-coords -T -r -c -L %s -I %s %s.delta > %s.coords' % (
                    minimum_align_length, minimum_identity,
                    os.path.basename(one_fasta).split('.')[0],
                    os.path.basename(one_fasta).split('.')[0])
                print(cmd2)
                a, b, c = shell_command.shell_command(cmd2)
                if c != 0:
                    raise Exception("%s" % b)

        elif algo == 'promer':
            # promer --mum -l 5
            cmd1 = 'promer -l 5 -p %s %s %s' % (
                os.path.basename(one_fasta).split('.')[0], fasta1, one_fasta)

            a, b, c = shell_command.shell_command(cmd1)
            if c != 0:
                raise Exception("%s" % b)

            if coords:
                # show-coords -T -r -c -L 100 -I 30 out.delta
                cmd2 = 'show-coords -T -r -c -L %s -I %s %s.delta > %s.coords' % (
                    minimum_align_length, minimum_identity,
                    os.path.basename(one_fasta).split('.')[0],
                    os.path.basename(one_fasta).split('.')[0])
                print(cmd2)
                a, b, c = shell_command.shell_command(cmd2)
                if c != 0:
                    raise Exception("%s" % b)

        delta_files.append('%s.delta' %
                           os.path.basename(one_fasta).split('.')[0])
        if coords:
            coord_files.append('%s.coords' %
                               os.path.basename(one_fasta).split('.')[0])

    return coord_files, delta_files