コード例 #1
0
    def run(self):
        title1 = self.title1
        title2 = self.title2

        working1 = working_directory.Working(self.working_dir1)
        working2 = working_directory.Working(self.working_dir2)

        cutoff = self.cutoff

        sequence_names = [
            name for name, length in working1.get_reference().get_lengths()
        ]

        if title1 is None:
            title1 = working1.name
        if title2 is None:
            title2 = working2.name

        n = 1
        while significance([('A', n)], [('T', n)], 1.0) > cutoff:
            n += 1

        f = open(self.prefix + '.txt', 'wb')
        print >> f, '%g\tsignificance cutoff' % cutoff
        print >> f, '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n

        print >> f, 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % (
            title1, title2, title1, title2)

        for sequence_name in sequence_names:
            filename1 = working1 / (
                grace.filesystem_friendly_name(sequence_name) +
                '-evidence.txt')
            filename2 = working2 / (
                grace.filesystem_friendly_name(sequence_name) +
                '-evidence.txt')

            for (pos1, ins1, sub1, ref1, conins1,
                 consub1), (pos2, ins2, sub2, ref2, conins2,
                            consub2) in itertools.izip(read_file(filename1),
                                                       read_file(filename2)):
                assert pos1 == pos2 and ref1 == ref2

                if pos1 % 1000 == 0:
                    grace.status('Testing %s %d' % (sequence_name, pos1))

                dec_ins1 = io.decode_evidence(ins1)
                dec_ins2 = io.decode_evidence(ins2)
                if dec_ins1 and dec_ins2:
                    sig = significance(io.decode_evidence(ins1),
                                       io.decode_evidence(ins2), cutoff)
                    if sig is not None and sig <= cutoff:
                        print >> f, '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % (
                            sequence_name, pos1, 'insertion-before', ins1,
                            ins2, sig, conins1, conins2)
                        f.flush()

                dec_sub1 = io.decode_evidence(sub1)
                dec_sub2 = io.decode_evidence(sub2)
                if dec_sub1 and dec_sub2:
                    sig = significance(dec_sub1, dec_sub2, cutoff)
                    if sig is not None and sig <= cutoff:
                        if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-':
                            what = 'deletion'
                        elif dec_sub1[0][0] != dec_sub2[0][0]:
                            what = 'substitution'
                        else:
                            what = 'different mix'
                        print >> f, '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % (
                            sequence_name, pos1, what, ref1, sub1, sub2, sig,
                            consub1, consub2)
                        f.flush()

        f.close()

        grace.status('')
        return 0
コード例 #2
0
ファイル: fisher_diff.py プロジェクト: Slugger70/nesoni
def main(args):
    title1, args = grace.get_option_value(args, "--title1", str, None)
    title2, args = grace.get_option_value(args, "--title2", str, None)
    grace.expect_no_further_options(args)

    if len(args) != 3:
        print >> sys.stderr, USAGE
        return 1

    working_dir1 = args[0]
    working_dir2 = args[1]
    cutoff = float(args[2])

    sequence_names = [name for name, sequence in io.read_sequences(os.path.join(working_dir1, "reference.fa"))]

    if title1 is None:
        title1 = working_dir1
    if title2 is None:
        title2 = working_dir2

    n = 1
    while significance([("A", n)], [("T", n)], 1.0) > cutoff:
        n += 1

    print "%g\tsignificance cutoff" % cutoff
    print "%d\tdepth required to call substitution (greater if there are errors in the reads)" % n

    print "Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s" % (
        title1,
        title2,
        title1,
        title2,
    )

    for sequence_name in sequence_names:
        filename1 = os.path.join(working_dir1, grace.filesystem_friendly_name(sequence_name) + "-evidence.txt")
        filename2 = os.path.join(working_dir2, grace.filesystem_friendly_name(sequence_name) + "-evidence.txt")

        for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip(
            read_file(filename1), read_file(filename2)
        ):
            assert pos1 == pos2 and ref1 == ref2

            if pos1 % 1000 == 0:
                grace.status("Testing %s %d" % (sequence_name, pos1))

            dec_ins1 = io.decode_evidence(ins1)
            dec_ins2 = io.decode_evidence(ins2)
            if dec_ins1 and dec_ins2:
                sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff)
                if sig is not None and sig <= cutoff:
                    grace.status("")
                    print "%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s" % (
                        sequence_name,
                        pos1,
                        "insertion-before",
                        ins1,
                        ins2,
                        sig,
                        conins1,
                        conins2,
                    )

            dec_sub1 = io.decode_evidence(sub1)
            dec_sub2 = io.decode_evidence(sub2)
            if dec_sub1 and dec_sub2:
                sig = significance(dec_sub1, dec_sub2, cutoff)
                if sig is not None and sig <= cutoff:
                    if dec_sub1[0][0] == "-" or dec_sub2[0][0] == "-":
                        what = "deletion"
                    elif dec_sub1[0][0] != dec_sub2[0][0]:
                        what = "substitution"
                    else:
                        what = "different mix"
                    grace.status("")
                    print "%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s" % (
                        sequence_name,
                        pos1,
                        what,
                        ref1,
                        sub1,
                        sub2,
                        sig,
                        consub1,
                        consub2,
                    )

    grace.status("")
    return 0
コード例 #3
0
    def run(self):
        title1 = self.title1
        title2 = self.title2
        
        working1 = working_directory.Working(self.working_dir1)
        working2 = working_directory.Working(self.working_dir2)
        
        cutoff = self.cutoff
        
        sequence_names = [ name 
                           for name, length 
                           in working1.get_reference().get_lengths() ]
        
        if title1 is None:
            title1 = working1.name
        if title2 is None:
            title2 = working2.name
            
        n = 1
        while significance([('A',n)],[('T',n)],1.0) > cutoff:
            n += 1

        f = open(self.prefix + '.txt','wb')        
        print >> f, '%g\tsignificance cutoff' % cutoff
        print >> f, '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n
            
        print >> f, 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % (title1, title2, title1, title2)
    
        for sequence_name in sequence_names:
            filename1 = working1/(grace.filesystem_friendly_name(sequence_name) + '-evidence.txt')
            filename2 = working2/(grace.filesystem_friendly_name(sequence_name) + '-evidence.txt')
        
            for (pos1, ins1, sub1, ref1, conins1, consub1), (pos2, ins2, sub2, ref2, conins2, consub2) in itertools.izip(read_file(filename1), read_file(filename2)):
                assert pos1 == pos2 and ref1 == ref2
            
                if pos1 % 1000 == 0:
                    grace.status('Testing %s %d' % (sequence_name, pos1))
            
                dec_ins1 = io.decode_evidence(ins1)
                dec_ins2 = io.decode_evidence(ins2)
                if dec_ins1 and dec_ins2:
                    sig = significance(io.decode_evidence(ins1), io.decode_evidence(ins2), cutoff)    
                    if sig is not None and sig <= cutoff:
                        print >> f, '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % (sequence_name, pos1, 'insertion-before', ins1, ins2, sig, conins1, conins2)
                        f.flush()
            
                dec_sub1 = io.decode_evidence(sub1)
                dec_sub2 = io.decode_evidence(sub2)
                if dec_sub1 and dec_sub2:
                    sig = significance(dec_sub1, dec_sub2, cutoff)        
                    if sig is not None and sig <= cutoff:
                        if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-':
                            what = 'deletion'
                        elif dec_sub1[0][0] != dec_sub2[0][0]:
                            what = 'substitution'
                        else:
                            what = 'different mix'
                        print >> f, '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % (sequence_name, pos1, what, ref1, sub1, sub2, sig, consub1, consub2)
                        f.flush()
        
        f.close()
        
        grace.status('')
        return 0
コード例 #4
0
ファイル: fisher_diff.py プロジェクト: Slugger70/nesoni
def main(args):
    title1, args = grace.get_option_value(args, '--title1', str, None)
    title2, args = grace.get_option_value(args, '--title2', str, None)
    grace.expect_no_further_options(args)

    if len(args) != 3:
        print >> sys.stderr, USAGE
        return 1

    working_dir1 = args[0]
    working_dir2 = args[1]
    cutoff = float(args[2])

    sequence_names = [
        name for name, sequence in io.read_sequences(
            os.path.join(working_dir1, 'reference.fa'))
    ]

    if title1 is None:
        title1 = working_dir1
    if title2 is None:
        title2 = working_dir2

    n = 1
    while significance([('A', n)], [('T', n)], 1.0) > cutoff:
        n += 1

    print '%g\tsignificance cutoff' % cutoff
    print '%d\tdepth required to call substitution (greater if there are errors in the reads)' % n

    print 'Sequence\tPosition in reference\tChange type\tReference\t%s\t%s\tp-value (no correction for multiple testing)\t%s\t%s' % (
        title1, title2, title1, title2)

    for sequence_name in sequence_names:
        filename1 = os.path.join(
            working_dir1,
            grace.filesystem_friendly_name(sequence_name) + '-evidence.txt')
        filename2 = os.path.join(
            working_dir2,
            grace.filesystem_friendly_name(sequence_name) + '-evidence.txt')

        for (pos1, ins1, sub1, ref1, conins1,
             consub1), (pos2, ins2, sub2, ref2, conins2,
                        consub2) in itertools.izip(read_file(filename1),
                                                   read_file(filename2)):
            assert pos1 == pos2 and ref1 == ref2

            if pos1 % 1000 == 0:
                grace.status('Testing %s %d' % (sequence_name, pos1))

            dec_ins1 = io.decode_evidence(ins1)
            dec_ins2 = io.decode_evidence(ins2)
            if dec_ins1 and dec_ins2:
                sig = significance(io.decode_evidence(ins1),
                                   io.decode_evidence(ins2), cutoff)
                if sig is not None and sig <= cutoff:
                    grace.status('')
                    print '%s\t%d\t%s\t\t%s\t%s\t%g\t%s\t%s' % (
                        sequence_name, pos1, 'insertion-before', ins1, ins2,
                        sig, conins1, conins2)

            dec_sub1 = io.decode_evidence(sub1)
            dec_sub2 = io.decode_evidence(sub2)
            if dec_sub1 and dec_sub2:
                sig = significance(dec_sub1, dec_sub2, cutoff)
                if sig is not None and sig <= cutoff:
                    if dec_sub1[0][0] == '-' or dec_sub2[0][0] == '-':
                        what = 'deletion'
                    elif dec_sub1[0][0] != dec_sub2[0][0]:
                        what = 'substitution'
                    else:
                        what = 'different mix'
                    grace.status('')
                    print '%s\t%d\t%s\t%s\t%s\t%s\t%g\t%s\t%s' % (
                        sequence_name, pos1, what, ref1, sub1, sub2, sig,
                        consub1, consub2)

    grace.status('')
    return 0