if not os.path.isfile(fn) or args.regenerate: if args.subtype=='B': #patient_codes = ['p2','p3','p5','p8','p9','p10','p11'] # subtype B only patient_codes = ['p2','p3', 'p5','p7', 'p8','p9','p10', 'p11'] # patients else: patient_codes = ['p1','p2','p3', 'p5','p6','p7', 'p8','p9','p10', 'p11'] # patients #patient_codes = ['p1','p2','p3','p5','p6','p8','p9','p10', 'p11'] # patients data = collect_data(patient_codes, regions, args.subtype) with gzip.open(fn, 'w') as ofile: cPickle.dump(data, ofile) else: with gzip.open(fn) as ifile: data = cPickle.load(ifile) # calculate minor variant frequencies and entropy measures av = process_average_allele_frequencies(data, regions, nbootstraps=0,nstates=20) combined_af = av['combined_af'] combined_entropy = av['combined_entropy'] minor_af = av['minor_af'] # get association, calculate fitness costs associations = get_associations(regions) aa_mutation_rates, total_nonsyn_mutation_rates = calc_amino_acid_mutation_rates() selcoeff = {} for region in regions: s = fitness_costs_per_site(region, data, total_nonsyn_mutation_rates) s[s>1] = 1 selcoeff[region] = s aa_ref = 'NL4-3' global_ref = HIVreference(refname=aa_ref, subtype=args.subtype)
cPickle.dump(data, ofile) print('Data saved to file:', os.path.abspath(fn)) except IOError: print('Could not save data to file:', os.path.abspath(fn)) else: with gzip.open(fn) as ifile: data = cPickle.load(ifile) # Check whether all regions are present if not all([region in data['mut_rate'] for region in regions]): print("data loading failed or data doesn't match specified regions:", regions, ' got:', data['mut_rate'].keys()) # Average, annotate, and process allele frequencies av = process_average_allele_frequencies(data, genes, nbootstraps=0, synnonsyn=True) combined_af = av['combined_af'] combined_entropy = av['combined_entropy'] minor_af = av['minor_af'] synnonsyn = av['synnonsyn'] synnonsyn_unconstrained = av['synnonsyn_unconstrained'] av = process_average_allele_frequencies(data, ['genomewide'], nbootstraps=0, synnonsyn=False) combined_af.update(av['combined_af']) combined_entropy.update(av['combined_entropy']) minor_af.update(av['minor_af']) synnonsyn['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool) synnonsyn_unconstrained['genomewide'] = np.ones_like( minor_af['genomewide'], dtype=bool)
with gzip.open(fn, 'w') as ofile: cPickle.dump(data, ofile) print('Data saved to file:', os.path.abspath(fn)) except IOError: print('Could not save data to file:', os.path.abspath(fn)) else: with gzip.open(fn) as ifile: data = cPickle.load(ifile) # Check whether all regions are present if not all([region in data['mut_rate'] for region in regions]): print("data loading failed or data doesn't match specified regions:", regions, ' got:',data['mut_rate'].keys()) # Average, annotate, and process allele frequencies av = process_average_allele_frequencies(data, genes, nbootstraps=0, synnonsyn=True) combined_af = av['combined_af'] combined_entropy = av['combined_entropy'] minor_af = av['minor_af'] synnonsyn = av['synnonsyn'] synnonsyn_unconstrained = av['synnonsyn_unconstrained'] av = process_average_allele_frequencies(data, ['genomewide'], nbootstraps=0, synnonsyn=False) combined_af.update(av['combined_af']) combined_entropy.update(av['combined_entropy']) minor_af.update(av['minor_af']) synnonsyn['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool) synnonsyn_unconstrained['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool) for gene in genes: pos = [x for x in reference.annotation[gene]] synnonsyn_unconstrained['genomewide'][pos] = synnonsyn_unconstrained[gene]