def clean_reference_alignment(region, refname,
                              VERBOSE=0,
                              subtype='B',
                             ):
    '''Clean reference alignment'''
    from hivwholeseq.reference import load_custom_reference
    from Bio import SeqIO
    from Bio.Align import MultipleSeqAlignment

    from hivwholeseq.cross_sectional.filenames import (
        get_subtype_reference_alignment_filename)


    fn = get_subtype_reference_alignment_filename(region, subtype=subtype,
                                                  refname=refname,
                                                  VERBOSE=VERBOSE)
    ali = AlignIO.read(fn, 'fasta')
    nseqs = len(ali)

    ali = MultipleSeqAlignment(filter(filter_sequence, ali))
    nseqsnew = len(ali)

    if VERBOSE >= 2:
        print refname, region, subtype+':', nseqsnew, 'of', nseqs, 'seqs kept'

    return ali
def clean_reference_alignment(
    region,
    refname,
    VERBOSE=0,
    subtype='B',
):
    '''Clean reference alignment'''
    from hivwholeseq.reference import load_custom_reference
    from Bio import SeqIO
    from Bio.Align import MultipleSeqAlignment

    from hivwholeseq.cross_sectional.filenames import (
        get_subtype_reference_alignment_filename)

    fn = get_subtype_reference_alignment_filename(region,
                                                  subtype=subtype,
                                                  refname=refname,
                                                  VERBOSE=VERBOSE)
    ali = AlignIO.read(fn, 'fasta')
    nseqs = len(ali)

    ali = MultipleSeqAlignment(filter(filter_sequence, ali))
    nseqsnew = len(ali)

    if VERBOSE >= 2:
        print refname, region, subtype + ':', nseqsnew, 'of', nseqs, 'seqs kept'

    return ali
def get_subtype_reference_alignment(region, subtype='B',
                                    refname='HXB2',
                                    type='nuc',
                                    VERBOSE=0):
    '''Get the observables from subtype B reference alignments'''
    from Bio import AlignIO
    ali_fn = get_subtype_reference_alignment_filename(region,
                                                      subtype=subtype,
                                                      refname=refname,
                                                      type=type,
                                                      VERBOSE=VERBOSE)
    ali = AlignIO.read(ali_fn, 'fasta')
    return ali
Ejemplo n.º 4
0
def get_subtype_reference_alignment(region,
                                    subtype='B',
                                    refname='HXB2',
                                    type='nuc',
                                    VERBOSE=0):
    '''Get the observables from subtype B reference alignments'''
    from Bio import AlignIO
    ali_fn = get_subtype_reference_alignment_filename(region,
                                                      subtype=subtype,
                                                      refname=refname,
                                                      type=type,
                                                      VERBOSE=VERBOSE)
    ali = AlignIO.read(ali_fn, 'fasta')
    return ali
# Script
if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Align to reference',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)    
    parser.add_argument('--region', required=True,
                        help='Region to anign (e.g. V3)')
    parser.add_argument('--reference', default='HXB2',
                        help='Reference to use for alignment')
    parser.add_argument('--verbose', type=int, default=0,
                        help='Verbosity level [0-4]')
    parser.add_argument('--subtype', default='B',
                        help='Subtype to analyze')

    args = parser.parse_args()
    region = args.region
    refname = args.reference
    VERBOSE = args.verbose
    subtype = args.subtype

    ali = clean_reference_alignment(region, refname,
                                    subtype=subtype,
                                    VERBOSE=VERBOSE)

    fn = get_subtype_reference_alignment_filename(region, subtype=subtype,
                                                  refname=refname,
                                                  VERBOSE=VERBOSE)
    AlignIO.write(ali, fn, 'fasta')

        description='Align to reference',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--region',
                        required=True,
                        help='Region to anign (e.g. V3)')
    parser.add_argument('--reference',
                        default='HXB2',
                        help='Reference to use for alignment')
    parser.add_argument('--verbose',
                        type=int,
                        default=0,
                        help='Verbosity level [0-4]')
    parser.add_argument('--subtype', default='B', help='Subtype to analyze')

    args = parser.parse_args()
    region = args.region
    refname = args.reference
    VERBOSE = args.verbose
    subtype = args.subtype

    ali = clean_reference_alignment(region,
                                    refname,
                                    subtype=subtype,
                                    VERBOSE=VERBOSE)

    fn = get_subtype_reference_alignment_filename(region,
                                                  subtype=subtype,
                                                  refname=refname,
                                                  VERBOSE=VERBOSE)
    AlignIO.write(ali, fn, 'fasta')