Ejemplo n.º 1
0
def sort_vcf( input, reference, output ):
    contig_list = get_contig_list( reference )

    print "read {} contigs".format(len(contig_list))

    v = VCF( input, True )      # request index of VCF upon open

    with open( output, 'w') as fd_out:
        fd_out.writelines( [ line+"\n" for line in v.metadata ] )

        for contig in contig_list:
            print "writing entries for contig {}".format(contig)
            # filter lines from vcf by contig
            count = 0
            if v.seek(contig) < 0:
                print "skipped {} because it's not in the VCF".format( contig )
            else:
                for line in v.lines( True, \
                        lambda raw_line: VCFLine(raw_line).chr == contig ):
                    fd_out.write( line.line+"\n" )
                    count += 1
                print "wrote {} entries for {}".format( count, contig )