Beispiel #1
0
def read_vcf_file( reference, min_coverage, min_proportion, input_file ):
    genomes = {}
    file_path = get_file_path( input_file )
    with open( file_path, 'r' ) as vcf_filehandle:
        from nasp_objects import VCFGenome, Genome, ReferenceCallMismatch, VCFRecord
        #import vcf
        vcf_record = VCFRecord( file_path )
        #vcf_data_handle = vcf.Reader( vcf_filehandle )
        vcf_samples = vcf_record.get_samples()
        #print( vcf_samples )
        for vcf_sample in vcf_samples:
            genomes[vcf_sample] = VCFGenome()
            set_genome_metadata( genomes[vcf_sample], input_file )
            genomes[vcf_sample].set_nickname( vcf_sample )
        while vcf_record.fetch_next_record():
            current_contig = vcf_record.get_contig()
            current_pos = vcf_record.get_position()
            if current_pos <= reference.get_contig_length( current_contig ):
                reference_call = reference.get_call( current_pos, None, current_contig )
                simplified_refcall = Genome.simple_call( reference_call )
                if ( simplified_refcall != 'N' ) and ( simplified_refcall != Genome.simple_call( vcf_record.get_reference_call()[0] ) ):
                    raise ReferenceCallMismatch( reference_call, vcf_record.get_reference_call(), file_path, current_contig, current_pos )
                for vcf_sample in vcf_samples:
                    sample_info = vcf_record.get_sample_info( vcf_sample )
                    # FIXME indels
                    if sample_info['call'] is not None:
                        genomes[vcf_sample].set_call( sample_info['call'], current_pos, 'X', current_contig )
                    if sample_info['was_called']:
                        genomes[vcf_sample].set_was_called( 'Y', current_pos, current_contig )
                    if sample_info['coverage'] is not None:
                        if sample_info['coverage'] >= min_coverage:
                            genomes[vcf_sample].set_coverage_pass( 'Y', current_pos, current_contig )
                        else:
                            genomes[vcf_sample].set_coverage_pass( 'N', current_pos, current_contig )
                    if sample_info['proportion'] is not None:
                        if sample_info['proportion'] >= min_proportion:
                            genomes[vcf_sample].set_proportion_pass( 'Y', current_pos, current_contig )
                        else:
                            genomes[vcf_sample].set_proportion_pass( 'N', current_pos, current_contig )
                    elif not sample_info['is_a_snp']:
                        genomes[vcf_sample].set_proportion_pass( '-', current_pos, current_contig )
    #from sys import stdout
    #for genome in genomes:
    #    genomes[genome]._genome._send_to_fasta_handle( stdout )
    return genomes.values()