import sys, numpy as np from utils.read_csv import read_csv_by_column from scipy.stats.stats import spearmanr if len(sys.argv) != 3: sys.exit("usage: python spearman.py [num_variables] ***.csv: the number of variables should be the same as the number of columns in the csv file") columns = [] var = int(sys.argv[1]) for i in range (1, (var+1)): columns.append("column " + str(i)) #print columns values = read_csv_by_column(sys.argv[2], columns) ylength = len(values[columns[0]]) inp = np.ndarray(shape=(var, ylength)) for i in range(0, len(columns)): inp[i] = values[columns[i]] (rho, pval) = spearmanr(inp, axis=1) print rho print pval
import sys from utils.read_csv import read_csv_by_column from scipy.stats import ttest_1samp, ttest_ind if len(sys.argv) != 3: sys.exit("usage: python t_test.py [paired/unpaired] ***.csv") """ Let's say we only compare two data values at a time right now. """ columns = ["col1", "col2"] values = read_csv_by_column("data/pre_post.csv", columns) if sys.argv[1] == "paired": x = values[columns[0]] y = values[columns[1]] x = x-y # paired t-test: doing two measurments on the same experimental unit # e.g., before and after a treatment t_statistic, p_value = ttest_1samp(x, 0.0) # p < 0.05 => alternative hypothesis: # the difference in mean is not equal to 0 print "paired t-test", p_value