Example #1
0
def read_vcf_file( reference, min_coverage, min_proportion, input_file ):
    genomes = {}
    file_path = get_file_path( input_file )
    with open( file_path, 'r' ) as vcf_filehandle:
        from nasp_objects import VCFGenome, Genome, ReferenceCallMismatch, VCFRecord
        #import vcf
        vcf_record = VCFRecord( file_path )
        #vcf_data_handle = vcf.Reader( vcf_filehandle )
        vcf_samples = vcf_record.get_samples()
        #print( vcf_samples )
        for vcf_sample in vcf_samples:
            genomes[vcf_sample] = VCFGenome()
            set_genome_metadata( genomes[vcf_sample], input_file )
            genomes[vcf_sample].set_nickname( vcf_sample )
        while vcf_record.fetch_next_record():
            current_contig = vcf_record.get_contig()
            current_pos = vcf_record.get_position()
            if current_pos <= reference.get_contig_length( current_contig ):
                reference_call = reference.get_call( current_pos, None, current_contig )
                simplified_refcall = Genome.simple_call( reference_call )
                if ( simplified_refcall != 'N' ) and ( simplified_refcall != Genome.simple_call( vcf_record.get_reference_call()[0] ) ):
                    raise ReferenceCallMismatch( reference_call, vcf_record.get_reference_call(), file_path, current_contig, current_pos )
                for vcf_sample in vcf_samples:
                    sample_info = vcf_record.get_sample_info( vcf_sample )
                    # FIXME indels
                    if sample_info['call'] is not None:
                        genomes[vcf_sample].set_call( sample_info['call'], current_pos, 'X', current_contig )
                    if sample_info['was_called']:
                        genomes[vcf_sample].set_was_called( 'Y', current_pos, current_contig )
                    if sample_info['coverage'] is not None:
                        if sample_info['coverage'] >= min_coverage:
                            genomes[vcf_sample].set_coverage_pass( 'Y', current_pos, current_contig )
                        else:
                            genomes[vcf_sample].set_coverage_pass( 'N', current_pos, current_contig )
                    if sample_info['proportion'] is not None:
                        if sample_info['proportion'] >= min_proportion:
                            genomes[vcf_sample].set_proportion_pass( 'Y', current_pos, current_contig )
                        else:
                            genomes[vcf_sample].set_proportion_pass( 'N', current_pos, current_contig )
                    elif not sample_info['is_a_snp']:
                        genomes[vcf_sample].set_proportion_pass( '-', current_pos, current_contig )
    #from sys import stdout
    #for genome in genomes:
    #    genomes[genome]._genome._send_to_fasta_handle( stdout )
    return genomes.values()
def main():
    from nasp_objects import Genome, GenomeMeta
    commandline_args = _parse_args()
    external_nickname = commandline_args.name if commandline_args.name else GenomeMeta.generate_nickname_from_filename( commandline_args.external )
    external_genome = Genome()
    external_genome.import_fasta_file( commandline_args.external )
    generate_delta_file( commandline_args.nucmerpath, commandline_args.nucmerargs, commandline_args.deltafilterpath, external_nickname, commandline_args.reference, commandline_args.external )
    franken_genome = Genome()
    parse_delta_file( ( external_nickname + ".filtered.delta" ), franken_genome, external_genome )
    franken_genome.write_to_fasta_file( external_nickname + ".frankenfasta", "franken::" )
def _update_genome_from_delta_data( franken_genome, external_genome, parser_state, distance_covered, is_external_insert ):
    from nasp_objects import Genome
    if distance_covered == -1:
        distance_covered = parser_state['final_pos'] - parser_state['reference_pos'] + 1
        is_external_insert = True
    if distance_covered > 0:
        if parser_state['external_is_reversed']:
            matching_segment = Genome.reverse_complement( ''.join( external_genome.get_call( ( parser_state['external_pos'] - distance_covered + 1 ), parser_state['external_pos'] ) ) )
        else:
            matching_segment = ''.join( external_genome.get_call( parser_state['external_pos'], ( parser_state['external_pos'] + distance_covered - 1 ) ) )
        franken_genome.set_call( list( matching_segment ), parser_state['reference_pos'], 'X' )
    parser_state['reference_pos'] = parser_state['reference_pos'] + distance_covered
    parser_state['external_pos'] = parser_state['external_pos'] + ( -distance_covered if parser_state['external_is_reversed'] else distance_covered )
    if is_external_insert:
        parser_state['external_pos'] = parser_state['external_pos'] + ( -1 if parser_state['external_is_reversed'] else 1 ) 
    else:
        franken_genome.set_call( '.', parser_state['reference_pos'], '!' )
        parser_state['reference_pos'] = parser_state['reference_pos'] + 1
    return parser_state
Example #4
0
def main():
    from nasp_objects import Genome
    commandline_args = _parse_args()
    fasta_data = Genome()
    fasta_data.import_fasta_file( commandline_args.inputfasta )
    fasta_data.write_to_fasta_file( commandline_args.outputfasta )