def corr(x, y, method='Pearson'): '''correlate two vectors/matrices. This function can be useful because scipy.stats.pearsonr does too little (takes only vectors) and scipy.stats.spearmanr does too much (calculates all possible correlations when given two matrices - instead of correlating only pairs of variables where one is from the first and the other from the second matrix) ''' if x.ndim == 1: x = x[:, np.newaxis] x_size = x.shape if method == 'Pearson': from scipy.stats import pearsonr as cor elif method == 'Spearman': from scipy.stats import spearmanr as cor rs = list() ps = list() if method == 'Spearman': for col in range(x.shape[1]): r, p = cor(x[:, col], y) rs.append(r) ps.append(p) return np.array(rs), np.array(ps) else: rmat = np.zeros((x.shape[1], y.shape[1])) pmat = rmat.copy() for x_idx in range(x.shape[1]): for y_idx in range(y.shape[1]): r, p = cor(x[:, x_idx], y[:, y_idx]) rmat[x_idx, y_idx] = r pmat[x_idx, y_idx] = p return rmat, pmat
def corr(x, y, method='Pearson'): '''correlate two vectors/matrices. This function can be useful because scipy.stats.pearsonr does too little (takes only vectors) and scipy.stats.spearmanr does too much (calculates all possible correlations when given two matrices - instead of correlating only pairs of variables where one is from the first and the other from the second matrix) ''' x_size = x.shape y_size = y.shape if len(x_size) == 1: x = x[:, np.newaxis] x_size = x.shape if method == 'Pearson': from scipy.stats import pearsonr as cor elif method == 'Spearman': from scipy.stats import spearmanr as cor rs = list() ps = list() for col in range(x_size[1]): r, p = cor(x[:, col], y) rs.append(r) ps.append(p) return np.array(rs), np.array(ps)
def calculate_c(self, videos, f1, f2): pairs = [(f1(x) or 0, f2(x) or 0) for x in videos] ranks, parameter = zip(*pairs) fig, ax = plt.subplots() ax.plot(ranks, parameter, 'o') plt.show() x, y = cor(ranks, parameter) return x, y
def calculate_views_c(self, videos): pairs = [(x.rank1, x.views) for x in videos] ranks, views = zip(*pairs) fig, ax = plt.subplots() ax.plot(ranks, views, 'o') plt.show() x, y = cor(ranks, views) return x, y
import pandas as pd import numpy as np cc = pd.read_csv("C:/Users/USER/Desktop/cc.csv") cc.columns = "y", "x" import matplotlib.pylab as plt ####one variet analysis##### plt.hist(cc.x) plt.hist(cc.y) plt.boxplot(cc.x) plt.boxplot(cc.y) cc.describe() ####bi variet analysis###### import scipy from scipy import stats stats.cor(cc.x, cc.y) np.corrcoef(cc.x, cc.y) plt.scatter(cc.x, cc.y) import statsmodels.formula.api as smf model = smf.ols("y~x", data=cc).fit() model.summary() pred = model.predict(pd.DataFrame(cc['x'])) pred err = pred - cc.y err_sq = err * err err_mean = np.mean(err_sq) err_sqrt = np.sqrt(err_mean) err_sqrt
data = {} for filename in dsfilelist: infile = open(filename, "r") data[filename] = [] for line in infile: tmparray = line.split("\t") data[filename].append(int(tmparray[-1])) height = len(data[dsfilelist[0]]) width = len(data) tmpmat = np.array([data[filename] for filename in dsfilelist]) rho, pval = cor(tmpmat, axis=1) tasknames = [filename.split("-")[1][0:-4] for filename in dsfilelist] #header = "%10s" % "" + " ".join(["%10s" % task for task in tasknames]) header = "{:10s}".format("") + " | ".join(["{:_^11s}".format(task) for task in tasknames]) + " |" if tofile: print >>outfile, header else: print header for i in range(width): toprint = "{:10s}".format(tasknames[i]) + " | ".join(["{:>11f}".format(num) for num in rho[i]]) + " |" if tofile: print >>outfile, toprint else: print toprint if tofile: print >>outfile, ""
def fit_and_evaluate(model, model_dir, model_name, y_train, y_val, y_test, protein_train, protein_test, protein_val, make_checkpoints=True): ''' fit keras model to test, train and val data :param model: function type of model used :param model_dir: str directory location data being saved :param model_name: str description of the model :param y_train: :param y_val: :param y_test: :param protein_train: :param protein_test: :param protein_val: :param make_checkpoints: :return: keras object fitted model (with stats); tuples of train, test and val results ''' min_delta = 0 patience = 5 batch_size = 256 n_epochs = 100 # Training callbacks = [ EarlyStopping(monitor='val_loss', min_delta=min_delta, patience=patience, verbose=0, mode='auto') ] if make_checkpoints: callbacks.append( ModelCheckpoint(filepath=os.path.join( model_dir, model_name + '__epoch={epoch:02d}.h5'), period=10)) fit = model.fit([protein_train], [y_train], validation_data=([protein_val], [y_val]), batch_size=batch_size, epochs=n_epochs, callbacks=callbacks) # Saving fitted model try: model.save(os.path.join(model_dir, model_name) + '.h5') except ValueError: warnings.warn('Model could not be saved') # Validation ypred_train = model.predict([protein_train]) ypred_val = model.predict([protein_val]) ypred_test = model.predict([protein_test]) y_train_flat = ypred_train.flatten() y_val_flat = ypred_val.flatten() y_test_flat = ypred_test.flatten() cor_train = cor(y_train, y_train_flat) cor_val = cor(y_val, y_val_flat) cor_test = cor(y_test, y_test_flat) return fit, [y_train, ypred_train, cor_train], [y_test, ypred_test, cor_test], [y_val, ypred_val, cor_val]
def lineReader(file, type=None): # open a file g = open(file, 'r') lineCounter = 0 # define the new variables for each file lineName = [] lineFileCount = [] line3DVar1Sigma = [] line3DVar = [] lineReconVar = [] lineReconVarSD = [] lineAnisoVar = [] lineProlateVar = [] lineOblateVar = [] line3DSkew = [] lineReconSkew = [] lineReconSkewSD = [] lineReconSkew1Sigma = [] lineAnisoSkew = [] line3DKurt = [] lineReconKurt = [] lineReconKurtSD = [] lineReconKurt1Sigma = [] lineAnisoKurt = [] for line in g: if lineCounter == 0: lineCounter += 1 continue line = line.strip('\n').split(',') # File information and anisotropy lineName.append(line[0]) lineFileCount.append(float(line[1])) # Variance lineAnisoVar.append(float(line[11])) line3DVar.append(float(line[5])) lineReconVar.append(float(line[8])) lineReconVarSD.append(float(line[9])) lineProlateVar.append(float(line[6])) lineOblateVar.append(float(line[7])) # Skewness lineAnisoSkew.append(float(line[19])) line3DSkew.append(float(line[14])) lineReconSkew.append(float(line[17])) lineReconSkewSD.append(float(line[18])) # Kurtosis lineAnisoKurt.append(float(line[27])) line3DKurt.append(float(line[22])) lineReconKurt.append(float(line[25])) lineReconKurtSD.append(float(line[26])) # add here for skewness and kurtosis lineCounter += 1 if type != None: g.close() return lineName, lineFileCount, line3DVar, line3DKurt, line3DSkew, lineAnisoKurt, lineAnisoSkew, lineAnisoVar, lineReconSkew, lineReconKurt, lineReconVar, lineProlateVar, lineOblateVar # Correlations are taken before averaging fileCorVar.append(cor(np.array(line3DVar), np.array(lineReconVar))) # take means and 1sigma fluctations of variance line3DVar1Sigma = np.std(np.array(line3DVar)) line3DVar = np.mean(np.array(line3DVar)) lineReconVar1Sigma = np.std(np.array(lineReconVar)) lineReconVar = np.mean(np.array(lineReconVar)) lineReconVarSD = np.mean(np.array(lineReconVarSD)) lineAnisoVar = np.mean(np.sqrt(1 - np.array(lineAnisoVar))) # take means and 1sigma fluctations of skewness line3DSkew1Sigma = np.std(np.array(line3DSkew)) line3DSkew = np.mean(np.array(line3DSkew)) lineReconSkew1Sigma = np.std(np.array(lineReconSkew)) lineReconSkew = np.mean(np.array(lineReconSkew)) lineReconSkewSD = np.mean(np.array(lineReconSkewSD)) lineAnisoSkew = np.mean(np.sqrt(1 - np.array(lineAnisoSkew))) # take means and 1sigma fluctations of kurtosis line3DKurt1Sigma = np.std(np.array(line3DKurt)) line3DKurt = np.mean(np.array(line3DKurt)) lineReconKurt1Sigma = np.std(np.array(lineReconKurt)) lineReconKurt = np.mean(np.array(lineReconKurt)) lineReconKurtSD = np.mean(np.array(lineReconKurtSD)) lineAnisoKurt = np.mean(np.sqrt(1 - np.array(lineAnisoKurt))) # append to the global dataset fileMach.append(machData[lineName[0]]['M']) fileMach1Sigma.append(machData[lineName[0]]['MStd']) fileMA.append(machData[lineName[0]]['MA']) fileName.append(lineName[0]) # Variance file3DVar.append(line3DVar) file3DVar1Sigma.append(line3DVar1Sigma) fileReconVar.append(lineReconVar) fileReconVarSD.append(lineReconVarSD) fileReconVar1Sigma.append(lineReconVar1Sigma) fileAnisoVar.append(lineAnisoVar) # Skewness file3DSkew.append(line3DSkew) file3DSkew1Sigma.append(line3DSkew1Sigma) fileReconSkew.append(lineReconSkew) fileReconSkewSD.append(lineReconSkewSD) fileReconSkew1Sigma.append(lineReconSkew1Sigma) fileAnisoSkew.append(lineAnisoSkew) # Kurtosis file3DKurt.append(line3DKurt) file3DKurt1Sigma.append(line3DKurt1Sigma) fileReconKurt.append(lineReconKurt) fileReconKurtSD.append(lineReconKurtSD) fileReconKurt1Sigma.append(lineReconKurt1Sigma) fileAnisoKurt.append(lineAnisoKurt) g.close()