Exemple #1
0
    if not os.path.isfile(fn) or args.regenerate:
        if args.subtype=='B':
            #patient_codes = ['p2','p3','p5','p8','p9','p10','p11'] # subtype B only
            patient_codes = ['p2','p3', 'p5','p7', 'p8','p9','p10', 'p11'] # patients
        else:
            patient_codes = ['p1','p2','p3', 'p5','p6','p7', 'p8','p9','p10', 'p11'] # patients
            #patient_codes = ['p1','p2','p3','p5','p6','p8','p9','p10', 'p11'] # patients
        data = collect_data(patient_codes, regions, args.subtype)
        with gzip.open(fn, 'w') as ofile:
            cPickle.dump(data, ofile)
    else:
        with gzip.open(fn) as ifile:
            data = cPickle.load(ifile)

    # calculate minor variant frequencies and entropy measures
    av = process_average_allele_frequencies(data, regions, nbootstraps=0,nstates=20)
    combined_af = av['combined_af']
    combined_entropy = av['combined_entropy']
    minor_af = av['minor_af']

    # get association, calculate fitness costs
    associations = get_associations(regions)
    aa_mutation_rates, total_nonsyn_mutation_rates = calc_amino_acid_mutation_rates()
    selcoeff = {}
    for region in regions:
        s = fitness_costs_per_site(region, data, total_nonsyn_mutation_rates)
        s[s>1] = 1
        selcoeff[region] = s

    aa_ref = 'NL4-3'
    global_ref = HIVreference(refname=aa_ref, subtype=args.subtype)
                cPickle.dump(data, ofile)
            print('Data saved to file:', os.path.abspath(fn))
        except IOError:
            print('Could not save data to file:', os.path.abspath(fn))
    else:
        with gzip.open(fn) as ifile:
            data = cPickle.load(ifile)

    # Check whether all regions are present
    if not all([region in data['mut_rate'] for region in regions]):
        print("data loading failed or data doesn't match specified regions:",
              regions, ' got:', data['mut_rate'].keys())

    # Average, annotate, and process allele frequencies
    av = process_average_allele_frequencies(data,
                                            genes,
                                            nbootstraps=0,
                                            synnonsyn=True)
    combined_af = av['combined_af']
    combined_entropy = av['combined_entropy']
    minor_af = av['minor_af']
    synnonsyn = av['synnonsyn']
    synnonsyn_unconstrained = av['synnonsyn_unconstrained']
    av = process_average_allele_frequencies(data, ['genomewide'],
                                            nbootstraps=0,
                                            synnonsyn=False)
    combined_af.update(av['combined_af'])
    combined_entropy.update(av['combined_entropy'])
    minor_af.update(av['minor_af'])
    synnonsyn['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool)
    synnonsyn_unconstrained['genomewide'] = np.ones_like(
        minor_af['genomewide'], dtype=bool)
            with gzip.open(fn, 'w') as ofile:
                cPickle.dump(data, ofile)
            print('Data saved to file:', os.path.abspath(fn))
        except IOError:
            print('Could not save data to file:', os.path.abspath(fn))
    else:
        with gzip.open(fn) as ifile:
            data = cPickle.load(ifile)

    # Check whether all regions are present
    if not all([region in data['mut_rate'] for region in regions]):
        print("data loading failed or data doesn't match specified regions:",
              regions, ' got:',data['mut_rate'].keys())

    # Average, annotate, and process allele frequencies
    av = process_average_allele_frequencies(data, genes, nbootstraps=0,
                                            synnonsyn=True)
    combined_af = av['combined_af']
    combined_entropy = av['combined_entropy']
    minor_af = av['minor_af']
    synnonsyn = av['synnonsyn']
    synnonsyn_unconstrained = av['synnonsyn_unconstrained']
    av = process_average_allele_frequencies(data, ['genomewide'], nbootstraps=0,
                                            synnonsyn=False)
    combined_af.update(av['combined_af'])
    combined_entropy.update(av['combined_entropy'])
    minor_af.update(av['minor_af'])
    synnonsyn['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool)
    synnonsyn_unconstrained['genomewide'] = np.ones_like(minor_af['genomewide'], dtype=bool)
    for gene in genes:
        pos = [x for x in reference.annotation[gene]]
        synnonsyn_unconstrained['genomewide'][pos] = synnonsyn_unconstrained[gene]