def type_sequences( input, grouping=GROUPING, exon_fofn=None, genomic_reference=None, cDNA_reference=None, loci=None): """ Pick the top Amplicon Analysis consensus seqs from a Fasta by Nreads """ log_file = get_log_file( input ) initialize_logger( log, log_file=log_file ) # First, get any references not specified by the user grouping = grouping or GROUPING exon_fofn = exon_fofn or get_exon_reference() genomic_reference = genomic_reference or get_genomic_reference() cDNA_reference = cDNA_reference or get_cDNA_reference() # Second, get the input file if a directory was specified sequence_file = get_input_file( input ) # Finally, run the Typing procedure renamed_file = rename_sequences( sequence_file ) raw_alignment = full_align_best_reference( renamed_file, genomic_reference ) reoriented = orient_sequences( renamed_file, alignment_file=raw_alignment ) selected = extract_alleles( reoriented, alignment_file=raw_alignment, method=grouping, loci=loci) gDNA_alignment = full_align_best_reference( selected, genomic_reference ) cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment ) cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference ) typing = summarize_typing( gDNA_alignment, cDNA_alignment ) return typing
def type_sequences( input_folder, exon_fofn, genomic_reference, cDNA_reference ): """ Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads """ sequence_file = os.path.join( input_folder, 'amplicon_analysis.fastq' ) csv_file = os.path.join( input_folder, 'amplicon_analysis.csv' ) # First we align the sequences to the reference and annotate typing raw_alignment = align_best_reference( sequence_file, genomic_reference ) reoriented = orient_sequences( sequence_file, alignment_file=raw_alignment ) reoriented_csv = orient_amp_analysis( csv_file, raw_alignment ) selected = extract_alleles( reoriented, alignment_file=raw_alignment ) selected_csv = subset_amp_analysis( reoriented_csv, selected ) gDNA_alignment = full_align_best_reference( selected, genomic_reference ) cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment ) cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference ) summarize_typing( gDNA_alignment, cDNA_alignment )
def type_sequences(input_folder, exon_fofn, genomic_reference, cDNA_reference): """ Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads """ sequence_file = os.path.join(input_folder, 'amplicon_analysis.fastq') csv_file = os.path.join(input_folder, 'amplicon_analysis.csv') # First we align the sequences to the reference and annotate typing raw_alignment = align_best_reference(sequence_file, genomic_reference) reoriented = orient_sequences(sequence_file, alignment_file=raw_alignment) reoriented_csv = orient_amp_analysis(csv_file, raw_alignment) selected = extract_alleles(reoriented, alignment_file=raw_alignment) selected_csv = subset_amp_analysis(reoriented_csv, selected) gDNA_alignment = full_align_best_reference(selected, genomic_reference) cDNA_file = extract_cDNA(selected, exon_fofn, alignment_file=gDNA_alignment) cDNA_alignment = align_by_identity(cDNA_file, cDNA_reference) summarize_typing(gDNA_alignment, cDNA_alignment)
def type_fasta( input_fofn, input_fasta, exon_fofn, genomic_reference, cDNA_reference ): """ Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads """ # First we align the sequences to the reference and annotate typing raw_alignment = align_best_reference( input_fasta, genomic_reference ) reoriented = orient_fasta( input_fasta, alignment_file=raw_alignment ) selected = extract_alleles( reoriented, alignment_file=raw_alignment ) gDNA_alignment = full_align_best_reference( selected, genomic_reference ) cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment ) cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference ) summarize_typing( gDNA_alignment, cDNA_alignment ) # Next we generate some mock chimera sequences chimera_file = create_chimeras( selected, alignment_file=gDNA_alignment ) basename = '.'.join( chimera_file.split('.')[:-2] ) combined_file = '%s.combined.fasta' % basename combine_fasta( [input_fasta, chimera_file], combined_file ) # Finally we use a competetive alignment of best-reads to summarize the allelic breakdown dirname = os.path.dirname( input_fasta ) best_reads = os.path.join( dirname, 'reads_of_insert.fasta' ) extract_best_reads( input_fofn, best_reads ) best_alignment = align_best_reference( best_reads, combined_file ) summarize_alleles( best_alignment, raw_alignment, selected )