Esempio n. 1
0
def get_other_consensi_all(pname, fragment, VERBOSE=0):
    '''Get all other consensi except the ones from this patient'''
    from Bio import SeqIO
    from hivwholeseq.patients.filenames import get_consensi_alignment_filename
    fn = get_consensi_alignment_filename('all', fragment)
    consensi = []
    for seq in SeqIO.parse(fn, 'fasta'):
        # NOTE: the first of seq.name.split('_') is the patient name
        if seq.name.split('_')[0] != pname:
            seq.seq = seq.seq.ungap('-')
            consensi.append(seq)

    return consensi
def get_other_consensi_all(pname, fragment, VERBOSE=0):
    '''Get all other consensi except the ones from this patient'''
    from Bio import SeqIO
    from hivwholeseq.patients.filenames import get_consensi_alignment_filename
    fn = get_consensi_alignment_filename('all', fragment)
    consensi = []
    for seq in SeqIO.parse(fn, 'fasta'):
        # NOTE: the first of seq.name.split('_') is the patient name
        if seq.name.split('_')[0] != pname:
            seq.seq = seq.seq.ungap('-')
            consensi.append(seq)

    return consensi
Esempio n. 3
0
 def get_consensi_alignment_filename(self, region, format='fasta'):
     '''Get the filename of the multiple sequence alignment of all consensi'''
     from hivwholeseq.patients.filenames import get_consensi_alignment_filename
     return get_consensi_alignment_filename(self.name,
                                            region,
                                            format=format)
    samples = load_samples_sequenced()
    if seq_runs is not None:
        samples = samples.loc[samples['seq run'].isin(seq_runs)]
    
        if adaIDs is not None:
            samples = samples.loc[samples.adapter.isin(adaIDs)]
    
        if use_pats:
            samples = samples.loc[samples['patient sample'] != 'nan']
    else:
        samples = samples.loc[samplenames]

    if fragments is None:
        fragments = ['F'+str(i+1) for i in xrange(6)]

    alis = {fr: AlignIO.read(get_consensi_alignment_filename('all', fr), 'fasta')
            for fr in fragments}

    for samplename, sample in samples.iterrows():
        sample = SampleSeq(sample)
        data_folder = sample.seqrun_folder
        adaID = sample.adapter
        pname = sample.patientname

        for fragment in fragments:
            if VERBOSE >= 1:
                print sample['seq run'], adaID, fragment, samplename,

            # Read the summary filename of the filter_mapped, and find out whether
            # there are many distant reads (a few are normal)
            fn = get_filter_mapped_summary_filename(data_folder, adaID, fragment)
                            'subtype', 'confidence'),
                )
                write_json(tree_json, fn, indent=1)

        if use_joint:
            if VERBOSE >= 2:
                print 'Align all patients',
            ali_all = align_muscle(*seqs_all, sort=True)
            if VERBOSE >= 2:
                print 'OK'

            if use_save:
                if VERBOSE >= 2:
                    print 'Save all patients',
                reg_tmp = '_'.join(pcodes) + '_' + region
                fn_out = get_consensi_alignment_filename('all', reg_tmp)
                mkdirs(os.path.dirname(fn_out))
                AlignIO.write(ali, fn_out, 'fasta')
                if VERBOSE >= 2:
                    print 'OK'

            if VERBOSE >= 2:
                print 'Build local tree'
            tree = build_tree_fasttree(ali_all, VERBOSE=VERBOSE)

            if VERBOSE >= 2:
                print 'Infer ancestral sequences'
            a = ancestral_sequences(tree,
                                    ali_all,
                                    alphabet='ACGT-N',
                                    copy_tree=False,
                                        )
                write_json(tree_json, fn, indent=1)

        
        if use_joint:
            if VERBOSE >= 2:
                print 'Align all patients',
            ali_all = align_muscle(*seqs_all, sort=True)
            if VERBOSE >= 2:
                print 'OK'

            if use_save:
                if VERBOSE >= 2:
                    print 'Save all patients',
                reg_tmp = '_'.join(pcodes)+'_'+region
                fn_out = get_consensi_alignment_filename('all', reg_tmp)
                mkdirs(os.path.dirname(fn_out))
                AlignIO.write(ali, fn_out, 'fasta')
                if VERBOSE >= 2:
                    print 'OK'

            if VERBOSE >= 2:
                print 'Build local tree'
            tree = build_tree_fasttree(ali_all, VERBOSE=VERBOSE)
            
            if VERBOSE >= 2:
                print 'Infer ancestral sequences'
            a = ancestral_sequences(tree, ali_all, alphabet='ACGT-N', copy_tree=False,
                                    attrname='sequence', seqtype='str')
            a.calc_ancestral_sequences()
            a.cleanup_tree()
Esempio n. 7
0
 def get_consensi_alignment_filename(self, region, format='fasta'):
     '''Get the filename of the multiple sequence alignment of all consensi'''
     from hivwholeseq.patients.filenames import get_consensi_alignment_filename
     return get_consensi_alignment_filename(self.name, region, format=format)