def read_vcf_file( reference, min_coverage, min_proportion, input_file ): genomes = {} file_path = get_file_path( input_file ) with open( file_path, 'r' ) as vcf_filehandle: from nasp_objects import VCFGenome, Genome, ReferenceCallMismatch, VCFRecord #import vcf vcf_record = VCFRecord( file_path ) #vcf_data_handle = vcf.Reader( vcf_filehandle ) vcf_samples = vcf_record.get_samples() #print( vcf_samples ) for vcf_sample in vcf_samples: genomes[vcf_sample] = VCFGenome() set_genome_metadata( genomes[vcf_sample], input_file ) genomes[vcf_sample].set_nickname( vcf_sample ) while vcf_record.fetch_next_record(): current_contig = vcf_record.get_contig() current_pos = vcf_record.get_position() if current_pos <= reference.get_contig_length( current_contig ): reference_call = reference.get_call( current_pos, None, current_contig ) simplified_refcall = Genome.simple_call( reference_call ) if ( simplified_refcall != 'N' ) and ( simplified_refcall != Genome.simple_call( vcf_record.get_reference_call()[0] ) ): raise ReferenceCallMismatch( reference_call, vcf_record.get_reference_call(), file_path, current_contig, current_pos ) for vcf_sample in vcf_samples: sample_info = vcf_record.get_sample_info( vcf_sample ) # FIXME indels if sample_info['call'] is not None: genomes[vcf_sample].set_call( sample_info['call'], current_pos, 'X', current_contig ) if sample_info['was_called']: genomes[vcf_sample].set_was_called( 'Y', current_pos, current_contig ) if sample_info['coverage'] is not None: if sample_info['coverage'] >= min_coverage: genomes[vcf_sample].set_coverage_pass( 'Y', current_pos, current_contig ) else: genomes[vcf_sample].set_coverage_pass( 'N', current_pos, current_contig ) if sample_info['proportion'] is not None: if sample_info['proportion'] >= min_proportion: genomes[vcf_sample].set_proportion_pass( 'Y', current_pos, current_contig ) else: genomes[vcf_sample].set_proportion_pass( 'N', current_pos, current_contig ) elif not sample_info['is_a_snp']: genomes[vcf_sample].set_proportion_pass( '-', current_pos, current_contig ) #from sys import stdout #for genome in genomes: # genomes[genome]._genome._send_to_fasta_handle( stdout ) return genomes.values()
def main(): from nasp_objects import Genome, GenomeMeta commandline_args = _parse_args() external_nickname = commandline_args.name if commandline_args.name else GenomeMeta.generate_nickname_from_filename( commandline_args.external ) external_genome = Genome() external_genome.import_fasta_file( commandline_args.external ) generate_delta_file( commandline_args.nucmerpath, commandline_args.nucmerargs, commandline_args.deltafilterpath, external_nickname, commandline_args.reference, commandline_args.external ) franken_genome = Genome() parse_delta_file( ( external_nickname + ".filtered.delta" ), franken_genome, external_genome ) franken_genome.write_to_fasta_file( external_nickname + ".frankenfasta", "franken::" )
def _update_genome_from_delta_data( franken_genome, external_genome, parser_state, distance_covered, is_external_insert ): from nasp_objects import Genome if distance_covered == -1: distance_covered = parser_state['final_pos'] - parser_state['reference_pos'] + 1 is_external_insert = True if distance_covered > 0: if parser_state['external_is_reversed']: matching_segment = Genome.reverse_complement( ''.join( external_genome.get_call( ( parser_state['external_pos'] - distance_covered + 1 ), parser_state['external_pos'] ) ) ) else: matching_segment = ''.join( external_genome.get_call( parser_state['external_pos'], ( parser_state['external_pos'] + distance_covered - 1 ) ) ) franken_genome.set_call( list( matching_segment ), parser_state['reference_pos'], 'X' ) parser_state['reference_pos'] = parser_state['reference_pos'] + distance_covered parser_state['external_pos'] = parser_state['external_pos'] + ( -distance_covered if parser_state['external_is_reversed'] else distance_covered ) if is_external_insert: parser_state['external_pos'] = parser_state['external_pos'] + ( -1 if parser_state['external_is_reversed'] else 1 ) else: franken_genome.set_call( '.', parser_state['reference_pos'], '!' ) parser_state['reference_pos'] = parser_state['reference_pos'] + 1 return parser_state
def main(): from nasp_objects import Genome commandline_args = _parse_args() fasta_data = Genome() fasta_data.import_fasta_file( commandline_args.inputfasta ) fasta_data.write_to_fasta_file( commandline_args.outputfasta )