예제 #1
0
def test_one_sided_y_greater_x():
    p = permutation_test(treatment, control,
                         func=lambda x, y: np.mean(y) - np.mean(x))
    assert round(p, 3) == 1 - 0.03, p

    p = permutation_test(treatment, control,
                         func="x_mean < y_mean")
    assert round(p, 3) == 1 - 0.03, p
예제 #2
0
def test_one_sided_x_greater_y():
    p = permutation_test(treatment, control,
                         func=lambda x, y: np.mean(x) - np.mean(y))
    assert round(p, 4) == 0.0274, p

    p = permutation_test(treatment, control,
                         func="x_mean > y_mean")
    assert round(p, 4) == 0.0274, p
예제 #3
0
def test_one_sided_x_greater_y():
    p = permutation_test(treatment,
                         control,
                         func=lambda x, y: np.mean(x) - np.mean(y))
    assert round(p, 4) == 0.0274, p

    p = permutation_test(treatment, control, func="x_mean > y_mean")
    assert round(p, 4) == 0.0274, p
예제 #4
0
def test_two_sided():
    p = permutation_test(treatment, control,
                         func=lambda x, y: np.abs(np.mean(x) - np.mean(y)))
    assert round(p, 3) == 0.055, p

    p = permutation_test(treatment, control,
                         func="x_mean != y_mean")
    assert round(p, 3) == 0.055, p
예제 #5
0
def test_one_sided_y_greater_x():
    p = permutation_test(treatment,
                         control,
                         func=lambda x, y: np.mean(y) - np.mean(x))
    assert round(p, 3) == 1 - 0.03, p

    p = permutation_test(treatment, control, func="x_mean < y_mean")
    assert round(p, 3) == 1 - 0.03, p
예제 #6
0
def test_two_sided():
    p = permutation_test(treatment,
                         control,
                         func=lambda x, y: np.abs(np.mean(x) - np.mean(y)))
    assert round(p, 3) == 0.055, p

    p = permutation_test(treatment, control, func="x_mean != y_mean")
    assert round(p, 3) == 0.055, p
예제 #7
0
def analyze_by_tissue(tissue_list, positive_genes, negative_genes):
    fc_list = []
    sem_list = []
    p_list = []
    for item in tissue_list:
        print(item)
        col_list = ['Genes', item]
        data = pd.read_csv(filename, usecols=col_list)
        data = data.set_index('Genes')
        #print (data)
        data['Mean'] = data.mean(axis=1)

        positive_df = data.loc[positive_genes]
        #print ('positive', positive_df)
        positive_df = positive_df.dropna()
        positives = positive_df['Mean'].tolist()
        #print (positives)
        positive_values = np.array(positives)
        #print ('positives', len(positive_genes))
        #print ('positive', positive_df)
        #print (len(positive_values))

        negative_df = data.loc[negative_genes]
        #print ('negative', len(negative_genes), negative_genes[:5])
        #print ('negative', negative_df)
        negative_df = negative_df.dropna()

        negatives = negative_df['Mean'].tolist()
        #print (len(negatives))
        negative_values = np.array(negatives)
        #print (negative_values[:5])

        fc = np.mean(positive_values) / np.mean(negative_values)

        norm_pos = positive_values / np.mean(negative_values)
        pos_sem = stats.sem(norm_pos)

        p_value = permutation_test(positive_values,
                                   negative_values,
                                   method='approximate',
                                   num_rounds=10000,
                                   seed=0)

        #print (len(positive_values), len(negative_values))

        #plot_distributions(positive_values, negative_values, item)

        fc_list.append(fc)
        p_list.append(p_value)
        sem_list.append(pos_sem)

    tissue_list = [x.capitalize() for x in tissue_list]

    df = pd.DataFrame({
        'Tissues': tissue_list,
        'Fold_Change': fc_list,
        'Significance': p_list,
        'SEM': sem_list
    })
    return df
예제 #8
0
def gridcell_history(array_stack_1, array_stack_2):
    """this is a support function that gathers the instances of each gridcell from the n*m grid for every year and collects
    them in a vector, for both of the n*m*t array stacks given in the input. the vector is t long, where t is the third dimension of the array stacks
    (note that the n*m dimensions of the array stack must be the same but the t axes of the array stacks may not be the same.) It then does the 
    permutation test on each of these gridcell histories and returns the corresponding pvalue"""
    #building "history" of each grid cell
    [x, y] = np.shape(array_stack_1[0])
    pvals = np.zeros([x, y])
    count = 0
    total = x * y
    for i in range(
            0,
            x,
    ):
        for j in range(0, y):
            t = time.time()
            array_stack_1_sample = []
            array_stack_2_sample = []
            for arr in array_stack_1:
                array_stack_1_sample.append(arr[i, j])
            for arr in array_stack_2:
                array_stack_2_sample.append(arr[i, j])
            array_stack_1_sample = np.asarray(array_stack_1_sample)
            array_stack_2_sample = np.asarray(array_stack_2_sample)
            print "doing permutation test {}/{}".format(count, total)
            pvals[i, j] = permutation_test(array_stack_1_sample,
                                           array_stack_2_sample,
                                           method='approximate',
                                           num_rounds=1000,
                                           seed=0)
            elapsed = time.time() - t
            print pvals[i, j]
            print "time required to do one operation is {}".format(elapsed)
            count += 1
    return pvals
예제 #9
0
def permutation_ttest(W, B):
    p_value = permutation_test(W, B,
                               method='approximate',
                               num_rounds=100,
                               func=lambda W, B: stats.ttest_ind(W, B),
                               seed=0)
    return 1 if p_value < 0.05 else 0
예제 #10
0
def trend_diff(Score, n_ch_comp):

    p_value = []
    alpha = 0.01  #/n_ch_comp #set alpha value to define significant change; this can be scaled on the number of comparisons

    for i in range(n_ch_comp):

        treatment = Score[i + 1]
        control = Score[i]

        p_value.append(
            permutation_test(treatment,
                             control,
                             method='approximate',
                             num_rounds=int(alpha**(-1) * 100)))

    significance = [i < alpha for i in p_value]

    print('Number of changes: ', sum(significance))
    idx = np.nonzero(significance)[0]
    idx = idx.tolist(
    )  # indeces of comparison number where change was detected

    print('Changes after n number of channels: ', np.array(idx) + 1)

    return p_value, significance, alpha, idx
예제 #11
0
def p_value_one_sided(treatment, control):
    p_value = permutation_test(treatment, control,
                            method='approximate',
                            num_rounds=10000,
                            seed=42,
                            func=lambda x, y: np.mean(y) - np.mean(x))
    return p_value
def calculate_permutation_test(single_predictions, joint_predictions):
    p_value = permutation_test(single_predictions,
                               joint_predictions,
                               method='approximate',
                               num_rounds=10000,
                               seed=0)
    return p_value
예제 #13
0
def plot_sim_score_dist():
	all_pos=[]
	all_neg=[]
	for i in range(5):
		final=make_avg_score_df(i)
		print (final)
		pos=final[final['group']==1]
		print (pos)
		pos_mean=pos['mean'].tolist()
		all_pos.append(pos_mean)
		

		neg=final[final['group']==0]
		print (neg)
		neg_mean=neg['mean'].tolist()
		all_neg.append(neg_mean)

	pos_list = [item for sublist in all_pos for item in sublist]
	neg_list = [item for sublist in all_neg for item in sublist]

	p_value = permutation_test(pos_list, neg_list,
	                           method='approximate',
	                           num_rounds=10000,
	                           seed=0)
	print(p_value)

	plot_distributions(pos_list, neg_list)
예제 #14
0
def find_permutation(positives, negatives):
    p_value = permutation_test(positives,
                               negatives,
                               method='approximate',
                               num_rounds=10000,
                               seed=0)
    #print (p_value)
    return p_value
예제 #15
0
def test_approximate():
    p = permutation_test(treatment,
                         control,
                         method='approximate',
                         alt_hypothesis='x > y',
                         num_permutations=5000,
                         seed=123)
    assert round(p, 3) == 0.028, round(p, 4)
예제 #16
0
def test_approximateone_sided_x_greater_y():
    p = permutation_test(treatment,
                         control,
                         func=lambda x, y: np.mean(x) - np.mean(y),
                         method='approximate',
                         num_rounds=5000,
                         seed=123)
    assert round(p, 3) == 0.028, p
예제 #17
0
def test_approximateone_sided_x_greater_y():
    p = permutation_test(treatment,
                         control,
                         func=lambda x, y: np.mean(x) - np.mean(y),
                         method='approximate',
                         num_rounds=5000,
                         seed=123)
    assert round(p, 3) == 0.028, p
def calculate_permutation_test(hyperpartisan_valid_predictions,
                               joint_valid_predictions):
    p_value = permutation_test(hyperpartisan_valid_predictions,
                               joint_valid_predictions,
                               method='approximate',
                               num_rounds=10000,
                               seed=0)

    return p_value
예제 #19
0
def test_paired_runs_approximate():
    a = [3.67, 1.72, 3.46, 2.60, 2.03, 2.10, 3.01]
    b = [2.11, 1.79, 2.71, 1.89, 1.69, 1.71, 2.01]
    p = permutation_test(a,
                         b,
                         paired=True,
                         method="approximate",
                         seed=0,
                         num_rounds=100000)
    assert round(p, 3) == 0.031
예제 #20
0
    def _permutation_test(self, x, y):
        r_score = np.corrcoef(x, y)[1][0]

        p_value = permutation_test(x,
                                   y,
                                   method='approximate',
                                   num_rounds=10000,
                                   func=lambda x, y: np.corrcoef(x, y)[1][0],
                                   seed=0)

        return round(r_score, 3), round(p_value, 4)
def perm_test(x,y):
    print (x[0], y[0])
    X.append(x[0])
    Y.append(y[0])
    a = hist[hist.index==x[0]].values
    b = hist[hist.index==y[0]].values
    p_value = permutation_test(a, b,
       method='approximate',
       num_rounds=100000,
       seed=0)

    return p_value
def make_pheno_df(phe_list1, phe_list2):
	p_value = permutation_test(phe_list1, phe_list2,
	                           method='approximate',
	                           num_rounds=10000,
	                           seed=0)
	print(p_value)
	df=pd.DataFrame({'Group': 'Syndromic', 'Phe_No': phe_list1})
	df=df.set_index('Group')
	df2=pd.DataFrame({'Group': 'Non-Syndromic', 'Phe_No': phe_list2})
	df2=df2.set_index('Group')

	final=pd.concat([df, df2], axis=0)
	return final
예제 #23
0
 def kruskal_similar_distribution(self, column,
                                   pvalue_threshold=0.05,
                                   num_rounds=3):
     p_value = permutation_test(
         self.new_data[column],
         self.historical_data[column],
         method="approximate",
         num_rounds=num_rounds,
         func=lambda x, y: stats.kruskal(x, y).statistic,
         seed=0)
     if p_value < pvalue_threshold:
         return False
     return True
def non_parametric_permutation_test(recurrence_group, no_recurrence_group):
    """
    A non-parametric permutation test for the null hypothesis that patients grouped by cancer recurrence come from
    the same distribution.

    1) Compute the difference (here: mean) of sample x and sample y
    2) Combine all measurements into a single dataset
    3) Draw a permuted dataset from all possible permutations of the dataset in 2.
    4) Divide the permuted dataset into two datasets x' and y' of size n and m, respectively
    5) Compute the difference (here: mean) of sample x' and sample y' and record this difference
    6) Repeat steps 3-5 until all permutations are evaluated
    7) Return the p-value as the number of times the recorded differences were more extreme than the original
    difference from 1. and divide this number by the total number of permutations

    :param selected_patients: A data frame column of a patient characteristic to analyse
    :param test_type: A string either 'two_tail', or 'one_tail' to signify the type of test
    :return: The p-value for the test
    """

    global_p_value = permutation_test(recurrence_group,
                                      no_recurrence_group,
                                      method='approximate',
                                      num_rounds=10000,
                                      seed=0)
    lower_p_value = permutation_test(recurrence_group,
                                     no_recurrence_group,
                                     func='x_mean < y_mean',
                                     method='approximate',
                                     num_rounds=10000,
                                     seed=0)
    upper_p_value = permutation_test(recurrence_group,
                                     no_recurrence_group,
                                     func='x_mean > y_mean',
                                     method='approximate',
                                     num_rounds=10000,
                                     seed=0)

    return global_p_value, lower_p_value, upper_p_value
예제 #25
0
def _calc_weat_pvalue(first_associations,
                      second_associations,
                      method=PVALUE_DEFUALT_METHOD):

    if method not in PVALUE_METHODS:
        raise ValueError('method should be one of {}, {} was given'.format(
            PVALUE_METHODS, method))

    pvalue = permutation_test(first_associations,
                              second_associations,
                              func=lambda x, y: sum(x) - sum(y),
                              method=method,
                              seed=RANDOM_STATE)  # if exact - no meaning
    return pvalue
예제 #26
0
 def pearson_similar_correlation(self, column,
                                  correlation_lower_bound,
                                  pvalue_threshold=0.05,
                                  num_rounds=3):
     correlation_info = stats.pearsonr(self.new_data[column],
                                       self.historical_data[column])
     p_value = permutation_test(
         self.new_data[column],
         self.historical_data[column],
         method="approximate",
         num_rounds=num_rounds,
         func=lambda x, y: stats.pearsonr(x, y)[0],
         seed=0)
     if p_value > pvalue_threshold:
         return False
     if correlation_info[0] < correlation_lower_bound:
         return False
     return True
예제 #27
0
def displacement_diff(Score1, Score2, n_ch_comp):

    p_value = []
    alpha = 0.01  #/n_ch_comp #set alpha value to define significant change; this can be scaled on the number of comparisons

    for i in range(n_ch_comp):

        treatment = Score1[i]
        control = Score2[i]

        p_value.append(
            permutation_test(treatment,
                             control,
                             method='approximate',
                             num_rounds=int(alpha**(-1) * 100)))

    significance = [i < alpha for i in p_value]

    return p_value, significance, alpha
예제 #28
0
def analyze_cell_lines(data, positives, negatives, nonbrain_cells, group):
    fc_list = []

    sem_list = []

    p_list = []
    for item in nonbrain_cells:
        nonbrain_df = data[item]
        #print (nonbrain_df)
        pos = nonbrain_df.loc[positives]
        print(pos)
        pos_list = pos.tolist()

        neg = nonbrain_df.loc[negatives]
        neg_list = neg.tolist()

        p_value = permutation_test(pos_list,
                                   neg_list,
                                   method='approximate',
                                   num_rounds=10000,
                                   seed=0)
        print('permutation test p', p_value)

        fc = np.mean(pos_list) / np.mean(neg_list)
        fc_list.append(fc)

        norm_pos = pos_list / np.mean(neg_list)
        sem = stats.sem(norm_pos)
        sem_list.append(sem)

        p_list.append(p_value)

        #print (len(nonbrain_cells), len(fc_list), len(sem_list), len(p_list))

    df = pd.DataFrame({
        'Cell_Line': nonbrain_cells,
        'Fold_Change': fc_list,
        'SEM': sem_list,
        'Significance': p_list
    })
    print(df)
    return df
def _permutation_test(
    x1,
    x2,
    alternative=ALTERNATIVE,
    num_rounds=NUM_ROUNDS,
    random_state=RANDOM_STATE,
):
    pvalue = permutation_test(
        x1,
        x2,
        method='approximate',
        num_rounds=int(num_rounds),
        seed=random_state,
    )
    if alternative == 'two-sided':
        pvalue = pvalue
    elif alternative == 'greater':
        pvalue = pvalue / 2
    else:
        raise Exception(f'{alternative}')
    return pvalue
예제 #30
0
def test(args, model, train_dataloader, eval_dataset, K=10):
    lims = np.linspace(0, len(eval_dataset), K + 1).astype('int')
    A = []
    B = []
    for i in range(K):
        sub_dataset = Subset(eval_dataset, list(range(lims[i], lims[i + 1])))
        sub_sampler = SequentialSampler(
            sub_dataset) if args.local_rank == -1 else DistributedSampler(
                sub_dataset)
        sub_dataloader = DataLoader(sub_dataset,
                                    sampler=sub_sampler,
                                    batch_size=args.batch_size,
                                    collate_fn=default_data_collator)
        if i == 0:
            A_scores, A_sparsities, A_head_masks = mask_heads(
                args, model, train_dataloader, sub_dataloader)
            A.append(auc(A_sparsities, A_scores).cpu())
            B_scores, B_sparsities, B_head_masks = unmask_dpp(
                args, model, train_dataloader, sub_dataloader)
            B.append(auc(B_sparsities, B_scores).cpu())
        else:
            A_scores = []
            for head_mask in A_head_masks:
                A_scores.append(
                    evaluate(args, model, sub_dataloader, head_mask=head_mask))
            A.append(auc(A_sparsities, A_scores).cpu())

            B_scores = []
            for head_mask in B_head_masks:
                B_scores.append(
                    evaluate(args, model, sub_dataloader, head_mask=head_mask))
            B.append(auc(B_sparsities, B_scores).cpu())
    p_value = permutation_test(A,
                               B,
                               method='exact',
                               func=lambda x, y: np.mean(y) - np.mean(x))
    np.save(os.path.join(args.output_dir, "A.npy"), A)
    np.save(os.path.join(args.output_dir, "B.npy"), B)

    return p_value
예제 #31
0
def get_permutation_of_pdists(temp_df1, temp_df2):
    """
    Applies the Mantel test on two dataframes.
    First the pairwise distance matrix of each dataframe is computed.
    Then a permutation test is applied on the two distributions.
    :param temp_df1: Dataframe
    :param temp_df2: Dataframe
    :return: p value of the permutation test
    """
    temp_df1_pdist, temp_df2_pdist = get_pdists(temp_df1, temp_df2)

    # print("~Starting permutation test!")
    # start = time()
    p_value = permutation_test(temp_df1_pdist,
                               temp_df2_pdist,
                               method='approximate',
                               num_rounds=1000,
                               seed=0)
    # end = time()
    # print(f"~Permutation test ended!It took {end - start} seconds!")

    return p_value
def get_p_values(results, model_name, n_vars=10):
    """
    Compare the scores from the first and second-most important
    features as determined by multipass. 
    """
    multipass_scores = results[f'multipass_scores__{model_name}'].values
    scores_to_compare_against = results[f'second_place_scores__{model_name}'].values

    p_values = []
    for n in range(n_vars):
        p_value = permutation_test(multipass_scores[n,:],
                           scores_to_compare_against[n,:],
                           method='approximate',
                           num_rounds=1000,
                           seed=0)
        p_values.append(p_value)
        if p_value > 0.05:
            print('Probably the same distribution\n')
        else:
            print('Probably different distributions\n')
    p_values = np.array(p_values)>0.05
    return p_values
예제 #33
0
        def GetStats(atoms,SelectedHingeResidues,filename='Output'):
            """This sub-method is used to get the statistical data on the hinges and print it into a file.
            
            Notes:
                * * Function level: 1 (1 being top)
                * Do something about the output file

            Args:
                atoms ([packman.molecule.Atom])                   : Set of atoms. (Read parent method description)
                SelectedHingeResidues ([packman.molecule.Residue]): Predicted hinge residues. 
                filename (str, optional)                          : Output file name. Defaults to 'Output'.
            
            Returns:
                [p-value, stats] (float): p-value of the predicted hinge, statistics of the hinge (in that order)
            """
            hinge_atoms=[i.get_backbone() for i in SelectedHingeResidues]
            hinge_atoms=[item for sublist in hinge_atoms for item in sublist]
            non_hinge_atoms=list(set([i for i in atoms])-set(hinge_atoms))
            all_atoms_bfactor=[i.get_bfactor() for i in atoms]
            hinge_atoms_bfactor=[i.get_bfactor() for i in hinge_atoms]
            non_hinge_atoms_bfactor=[i.get_bfactor() for i in non_hinge_atoms]

            return_stats=[]

            outputfile.write('\nSTATISTICS\n\t\tN\tMin\tMax\tMean\tMode\tMedian\tSTDDev\n')
            return_stats.append(['','N','Min','Max','Mean','Mode','Median','STDDev'])
            outputfile.write('Total   '+'\t'+str(len(all_atoms_bfactor))+'\t'+str(numpy.min(all_atoms_bfactor))+'\t'+str(numpy.max(all_atoms_bfactor))+'\t'+str(numpy.mean(all_atoms_bfactor))+'\t'+str(mode(all_atoms_bfactor)[0][0])+'\t'+str(numpy.median(all_atoms_bfactor))+'\t'+str(numpy.std(all_atoms_bfactor))+'\n')
            return_stats.append(['Total',len(all_atoms_bfactor),numpy.min(all_atoms_bfactor),numpy.max(all_atoms_bfactor),numpy.mean(all_atoms_bfactor),mode(all_atoms_bfactor)[0][0],numpy.median(all_atoms_bfactor),numpy.std(all_atoms_bfactor)])
            outputfile.write('Hinge   '+'\t'+str(len(hinge_atoms_bfactor))+'\t'+str(numpy.min(hinge_atoms_bfactor))+'\t'+str(numpy.max(hinge_atoms_bfactor))+'\t'+str(numpy.mean(hinge_atoms_bfactor))+'\t'+str(mode(hinge_atoms_bfactor)[0][0])+'\t'+str(numpy.median(hinge_atoms_bfactor))+'\t'+str(numpy.std(hinge_atoms_bfactor))+'\n')
            return_stats.append(['Hinge',len(hinge_atoms_bfactor),numpy.min(hinge_atoms_bfactor),numpy.max(hinge_atoms_bfactor),numpy.mean(hinge_atoms_bfactor),mode(hinge_atoms_bfactor)[0][0],numpy.median(hinge_atoms_bfactor),numpy.std(hinge_atoms_bfactor)])
            outputfile.write('NonHinge'+'\t'+str(len(non_hinge_atoms_bfactor))+'\t'+str(numpy.min(non_hinge_atoms_bfactor))+'\t'+str(numpy.max(non_hinge_atoms_bfactor))+'\t'+str(numpy.mean(non_hinge_atoms_bfactor))+'\t'+str(mode(non_hinge_atoms_bfactor)[0][0])+'\t'+str(numpy.median(non_hinge_atoms_bfactor))+'\t'+str(numpy.std(non_hinge_atoms_bfactor))+'\n')
            return_stats.append(['NonHinge',len(non_hinge_atoms_bfactor),numpy.min(non_hinge_atoms_bfactor),numpy.max(non_hinge_atoms_bfactor),numpy.mean(non_hinge_atoms_bfactor),mode(non_hinge_atoms_bfactor)[0][0],numpy.median(non_hinge_atoms_bfactor),numpy.std(non_hinge_atoms_bfactor)])
            
            p_value = permutation_test(hinge_atoms_bfactor, non_hinge_atoms_bfactor,method='approximate',num_rounds=10000,seed=0)
            outputfile.write('\np-value:\t'+str(p_value)+'\n')
            return p_value,return_stats
예제 #34
0
def test_default():
    p = permutation_test(treatment, control)
    assert round(p, 3) == 0.055, p
예제 #35
0
def test_default():
    p = permutation_test(treatment, control)
    assert round(p, 3) == 0.055, p