コード例 #1
0
def type_sequences( input, grouping=GROUPING,
                           exon_fofn=None,
                           genomic_reference=None,
                           cDNA_reference=None,
                           loci=None):
    """
    Pick the top Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    log_file = get_log_file( input )
    initialize_logger( log, log_file=log_file )

    # First, get any references not specified by the user
    grouping = grouping or GROUPING
    exon_fofn = exon_fofn or get_exon_reference()
    genomic_reference = genomic_reference or get_genomic_reference()
    cDNA_reference = cDNA_reference or get_cDNA_reference()

    # Second, get the input file if a directory was specified
    sequence_file = get_input_file( input )

    # Finally, run the Typing procedure
    renamed_file = rename_sequences( sequence_file )
    raw_alignment = full_align_best_reference( renamed_file, genomic_reference )
    reoriented = orient_sequences( renamed_file, alignment_file=raw_alignment )
    selected = extract_alleles( reoriented, alignment_file=raw_alignment,
                                            method=grouping,
                                            loci=loci)
    gDNA_alignment = full_align_best_reference( selected, genomic_reference )
    cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment )
    cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference )
    typing = summarize_typing( gDNA_alignment, cDNA_alignment )
    return typing
コード例 #2
0
def type_sequences( input_folder, exon_fofn, genomic_reference, cDNA_reference ):
    """
    Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    sequence_file = os.path.join( input_folder, 'amplicon_analysis.fastq' )
    csv_file = os.path.join( input_folder, 'amplicon_analysis.csv' )
    # First we align the sequences to the reference and annotate typing
    raw_alignment = align_best_reference( sequence_file, genomic_reference )
    reoriented = orient_sequences( sequence_file, alignment_file=raw_alignment )
    reoriented_csv = orient_amp_analysis( csv_file, raw_alignment )
    selected = extract_alleles( reoriented, alignment_file=raw_alignment )
    selected_csv = subset_amp_analysis( reoriented_csv, selected )
    gDNA_alignment = full_align_best_reference( selected, genomic_reference )
    cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment )
    cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference )
    summarize_typing( gDNA_alignment, cDNA_alignment )
コード例 #3
0
def type_sequences(input_folder, exon_fofn, genomic_reference, cDNA_reference):
    """
    Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    sequence_file = os.path.join(input_folder, 'amplicon_analysis.fastq')
    csv_file = os.path.join(input_folder, 'amplicon_analysis.csv')
    # First we align the sequences to the reference and annotate typing
    raw_alignment = align_best_reference(sequence_file, genomic_reference)
    reoriented = orient_sequences(sequence_file, alignment_file=raw_alignment)
    reoriented_csv = orient_amp_analysis(csv_file, raw_alignment)
    selected = extract_alleles(reoriented, alignment_file=raw_alignment)
    selected_csv = subset_amp_analysis(reoriented_csv, selected)
    gDNA_alignment = full_align_best_reference(selected, genomic_reference)
    cDNA_file = extract_cDNA(selected,
                             exon_fofn,
                             alignment_file=gDNA_alignment)
    cDNA_alignment = align_by_identity(cDNA_file, cDNA_reference)
    summarize_typing(gDNA_alignment, cDNA_alignment)
コード例 #4
0
ファイル: type_fasta.py プロジェクト: la0hu2006/HlaTools
def type_fasta( input_fofn, input_fasta, exon_fofn, genomic_reference, cDNA_reference ):
    """
    Pick the top N Amplicon Analysis consensus seqs from a Fasta by Nreads
    """
    # First we align the sequences to the reference and annotate typing
    raw_alignment = align_best_reference( input_fasta, genomic_reference )
    reoriented = orient_fasta( input_fasta, alignment_file=raw_alignment )
    selected = extract_alleles( reoriented, alignment_file=raw_alignment )
    gDNA_alignment = full_align_best_reference( selected, genomic_reference )
    cDNA_file = extract_cDNA( selected, exon_fofn, alignment_file=gDNA_alignment )
    cDNA_alignment = align_by_identity( cDNA_file, cDNA_reference )
    summarize_typing( gDNA_alignment, cDNA_alignment )
    # Next we generate some mock chimera sequences
    chimera_file = create_chimeras( selected, alignment_file=gDNA_alignment )
    basename = '.'.join( chimera_file.split('.')[:-2] )
    combined_file = '%s.combined.fasta' % basename
    combine_fasta( [input_fasta, chimera_file], combined_file )
    # Finally we use a competetive alignment of best-reads to summarize the allelic breakdown
    dirname = os.path.dirname( input_fasta )
    best_reads = os.path.join( dirname, 'reads_of_insert.fasta' )
    extract_best_reads( input_fofn, best_reads )
    best_alignment = align_best_reference( best_reads, combined_file )
    summarize_alleles( best_alignment, raw_alignment, selected )