Example #1
0
def type_sequences( input_folder, exon_fofn, genomic_reference, cDNA_reference ):
    """
    Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    sequence_file = os.path.join( input_folder, 'amplicon_analysis.fastq' )
    csv_file = os.path.join( input_folder, 'amplicon_analysis.csv' )
    # First we align the sequences to the reference and annotate typing
    raw_alignment = align_best_reference( sequence_file, genomic_reference )
    reoriented = orient_sequences( sequence_file, alignment_file=raw_alignment )
    reoriented_csv = orient_amp_analysis( csv_file, raw_alignment )
    selected = extract_alleles( reoriented, alignment_file=raw_alignment )
    selected_csv = subset_amp_analysis( reoriented_csv, selected )
    gDNA_alignment = full_align_best_reference( selected, genomic_reference )
    cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment )
    cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference )
    summarize_typing( gDNA_alignment, cDNA_alignment )
Example #2
0
def type_sequences( input, grouping=GROUPING,
                           exon_fofn=None,
                           genomic_reference=None,
                           cDNA_reference=None,
                           loci=None):
    """
    Pick the top Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    log_file = get_log_file( input )
    initialize_logger( log, log_file=log_file )

    # First, get any references not specified by the user
    grouping = grouping or GROUPING
    exon_fofn = exon_fofn or get_exon_reference()
    genomic_reference = genomic_reference or get_genomic_reference()
    cDNA_reference = cDNA_reference or get_cDNA_reference()

    # Second, get the input file if a directory was specified
    sequence_file = get_input_file( input )

    # Finally, run the Typing procedure
    renamed_file = rename_sequences( sequence_file )
    raw_alignment = full_align_best_reference( renamed_file, genomic_reference )
    reoriented = orient_sequences( renamed_file, alignment_file=raw_alignment )
    selected = extract_alleles( reoriented, alignment_file=raw_alignment,
                                            method=grouping,
                                            loci=loci)
    gDNA_alignment = full_align_best_reference( selected, genomic_reference )
    cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment )
    cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference )
    typing = summarize_typing( gDNA_alignment, cDNA_alignment )
    return typing
Example #3
0
def type_sequences(input_folder, exon_fofn, genomic_reference, cDNA_reference):
    """
    Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    sequence_file = os.path.join(input_folder, 'amplicon_analysis.fastq')
    csv_file = os.path.join(input_folder, 'amplicon_analysis.csv')
    # First we align the sequences to the reference and annotate typing
    raw_alignment = align_best_reference(sequence_file, genomic_reference)
    reoriented = orient_sequences(sequence_file, alignment_file=raw_alignment)
    reoriented_csv = orient_amp_analysis(csv_file, raw_alignment)
    selected = extract_alleles(reoriented, alignment_file=raw_alignment)
    selected_csv = subset_amp_analysis(reoriented_csv, selected)
    gDNA_alignment = full_align_best_reference(selected, genomic_reference)
    cDNA_file = extract_cDNA(selected,
                             exon_fofn,
                             alignment_file=gDNA_alignment)
    cDNA_alignment = align_by_identity(cDNA_file, cDNA_reference)
    summarize_typing(gDNA_alignment, cDNA_alignment)