def get_recombination_stats(geno_array):
    int_arr = geno_array
    num_loci = int_arr.shape[0]
    num_pairs =  int((num_loci * (num_loci-1))/2)
    
    print('Starting, num_pairs = {}'.format(num_pairs))
    print(str(datetime.now()))
    time_start = datetime.now()
    sys.stdout.flush()
    
    pairs = itertools.combinations(int_arr, 2)
    R = np.fromiter(getR(pairs), dtype = np.int, count = num_pairs)
    time_R = datetime.now()
    print('Finished R')
    print(str(datetime.now()))
    sys.stdout.flush()
    
    pairs = itertools.combinations(int_arr, 2)
    NR = np.fromiter(getNR(pairs), dtype = np.int, count = num_pairs)
    time_NR = datetime.now()
    print('Finished NR')
    print(str(datetime.now()))
    sys.stdout.flush()
    
    ml_R_frac = get_ml_R_frac(R = R, NR = NR)
    time_RF = datetime.now()
    print('Finished RF')
    print(str(datetime.now()))
    
    sys.stdout.flush()
    Z = get_LOD(R = R, NR = NR, R_frac = ml_R_frac)
    time_Z = datetime.now()
    print('Finished Z')
    print(str(datetime.now()))
    sys.stdout.flush()
    
    N = R + NR
    MST = np.e**-(2*(N/2. - R)**2/N)
    print('Finished MST')
    time_MST = datetime.now()
    print(str(datetime.now()))
    sys.stdout.flush()
    
    print("R took: {}".format(str(time_R - time_start)))
    print("NR took: {}".format(str(time_NR - time_R)))
    print("RF took: {}".format(str(time_RF - time_NR)))
    print("Z took: {}".format(str(time_Z - time_RF)))
    print("MST took: {}".format(str(time_MST - time_Z)))   
    
    Z_mat = get_matrix(Z)
    RF_mat = get_matrix(ml_R_frac)
    R_mat = get_matrix(R)
    NR_mat = get_matrix(NR)
    MST_mat = get_matrix(MST)
    
    Recombination_stats = collections.namedtuple('Recombination_stats', "R NR RF Z MST" )
    my_stats = Recombination_stats(R_mat, NR_mat, RF_mat, Z_mat, MST_mat)
    return(my_stats)    
Example #2
0
int_arr = switchAlleles.convert_genotypes_to_int_array(genotypes_of_locus, ini_map)

num_loci = int_arr.shape[0]
num_pairs =  int((num_loci * (num_loci-1))/2)
pairs = itertools.combinations(int_arr, 2)

#import timeit
#timeit.timeit('next(switchAlleles.getR(pairs))', setup = "import switch_allele_functions as switchAlleles; from __main__ import int_arr; from __main__ import pairs", number = 100000)

R = numpy.fromiter(switchAlleles.getR(pairs), dtype = numpy.float64, count = num_pairs)

pairs = itertools.combinations(int_arr, 2)
NR = numpy.fromiter(switchAlleles.getNR(pairs), dtype = numpy.float64, count = num_pairs)

ml_R_frac = switchAlleles.get_ml_R_frac(R = R, NR = NR)
Z = switchAlleles.get_LOD(R = R, NR = NR, R_frac = ml_R_frac)

rf = switchAlleles.get_rf_matrix(ml_R_frac)
lod = switchAlleles.get_lod_matrix(Z)

index_of_lg = switchAlleles.get_index_of_LG(loci_on_lg)

lgs_longer_than_1 = switchAlleles.find_LGs_with_multiple_loci(index_of_lg, loci_on_lg)

mean_rf = switchAlleles.get_LG_pairwise_mean_rf(lgs_longer_than_1, rf, index_of_lg)
mean_lod = switchAlleles.get_LG_pairwise_mean_lod(lgs_longer_than_1,lod, index_of_lg)
sum_lod = switchAlleles.get_LG_pairwise_sum_lod(lgs_longer_than_1,lod, index_of_lg)

sq_sum_lod = switchAlleles.get_square_sum_of_lod(sum_lod, lgs_longer_than_1)