Example #1
0
def corr(x, y, method='Pearson'):
    '''correlate two vectors/matrices.

    This function can be useful because scipy.stats.pearsonr does too little
    (takes only vectors) and scipy.stats.spearmanr does too much (calculates
    all possible correlations when given two matrices - instead of correlating
    only pairs of variables where one is from the first and the other from  the
    second matrix)
    '''
    if x.ndim == 1:
        x = x[:, np.newaxis]
        x_size = x.shape

    if method == 'Pearson':
        from scipy.stats import pearsonr as cor
    elif method == 'Spearman':
        from scipy.stats import spearmanr as cor

    rs = list()
    ps = list()
    if method == 'Spearman':
        for col in range(x.shape[1]):
            r, p = cor(x[:, col], y)
            rs.append(r)
            ps.append(p)
        return np.array(rs), np.array(ps)
    else:
        rmat = np.zeros((x.shape[1], y.shape[1]))
        pmat = rmat.copy()
        for x_idx in range(x.shape[1]):
            for y_idx in range(y.shape[1]):
                r, p = cor(x[:, x_idx], y[:, y_idx])
                rmat[x_idx, y_idx] = r
                pmat[x_idx, y_idx] = p
        return rmat, pmat
Example #2
0
def corr(x, y, method='Pearson'):
    '''correlate two vectors/matrices.

    This function can be useful because scipy.stats.pearsonr does too little
    (takes only vectors) and scipy.stats.spearmanr does too much (calculates
    all possible correlations when given two matrices - instead of correlating
    only pairs of variables where one is from the first and the other from  the
    second matrix)
    '''
    x_size = x.shape
    y_size = y.shape
    if len(x_size) == 1:
        x = x[:, np.newaxis]
        x_size = x.shape

    if method == 'Pearson':
        from scipy.stats import pearsonr as cor
    elif method == 'Spearman':
        from scipy.stats import spearmanr as cor

    rs = list()
    ps = list()
    for col in range(x_size[1]):
        r, p = cor(x[:, col], y)
        rs.append(r)
        ps.append(p)
    return np.array(rs), np.array(ps)
Example #3
0
def corr(x, y, method='Pearson'):
    '''correlate two vectors/matrices.

    This function can be useful because scipy.stats.pearsonr does too little
    (takes only vectors) and scipy.stats.spearmanr does too much (calculates
    all possible correlations when given two matrices - instead of correlating
    only pairs of variables where one is from the first and the other from  the
    second matrix)
    '''
    x_size = x.shape
    y_size = y.shape
    if len(x_size) == 1:
        x = x[:, np.newaxis]
        x_size = x.shape

    if method == 'Pearson':
        from scipy.stats import pearsonr as cor
    elif method == 'Spearman':
        from scipy.stats import spearmanr as cor

    rs = list()
    ps = list()
    for col in range(x_size[1]):
        r, p = cor(x[:, col], y)
        rs.append(r)
        ps.append(p)
    return np.array(rs), np.array(ps)
 def calculate_c(self, videos, f1, f2):
     pairs = [(f1(x) or 0, f2(x) or 0) for x in videos]
     ranks, parameter = zip(*pairs)
     fig, ax = plt.subplots()
     ax.plot(ranks, parameter, 'o')
     plt.show()
     x, y = cor(ranks, parameter)
     return x, y
 def calculate_views_c(self, videos):
     pairs = [(x.rank1, x.views) for x in videos]
     ranks, views = zip(*pairs)
     fig, ax = plt.subplots()
     ax.plot(ranks, views, 'o')
     plt.show()
     x, y = cor(ranks, views)
     return x, y
import pandas as pd
import numpy as np
cc = pd.read_csv("C:/Users/USER/Desktop/cc.csv")
cc.columns = "y", "x"
import matplotlib.pylab as plt
####one variet analysis#####
plt.hist(cc.x)
plt.hist(cc.y)
plt.boxplot(cc.x)
plt.boxplot(cc.y)
cc.describe()
####bi variet analysis######
import scipy
from scipy import stats
stats.cor(cc.x, cc.y)

np.corrcoef(cc.x, cc.y)
plt.scatter(cc.x, cc.y)
import statsmodels.formula.api as smf
model = smf.ols("y~x", data=cc).fit()
model.summary()
pred = model.predict(pd.DataFrame(cc['x']))
pred
err = pred - cc.y
err_sq = err * err
err_mean = np.mean(err_sq)
err_sqrt = np.sqrt(err_mean)
err_sqrt
Example #7
0
    data = {}
    
    for filename in dsfilelist:
       infile = open(filename, "r")
       data[filename] = []
       for line in infile:
          tmparray = line.split("\t")
          data[filename].append(int(tmparray[-1]))
       
          
    height = len(data[dsfilelist[0]])
    width = len(data)
 
    tmpmat = np.array([data[filename] for filename in dsfilelist])
    
    rho, pval = cor(tmpmat, axis=1)
    
    tasknames = [filename.split("-")[1][0:-4] for filename in dsfilelist]
    
    #header = "%10s" % "" + " ".join(["%10s" % task for task in tasknames])
    header = "{:10s}".format("") + " | ".join(["{:_^11s}".format(task) for task in tasknames]) + " |"
    
    if tofile: print >>outfile, header
    else: print header
    
    for i in range(width):
       toprint = "{:10s}".format(tasknames[i]) + " | ".join(["{:>11f}".format(num) for num in rho[i]]) + " |"
       if tofile: print >>outfile, toprint
       else: print toprint
    
    if tofile: print >>outfile, ""
Example #8
0
def fit_and_evaluate(model,
                     model_dir,
                     model_name,
                     y_train,
                     y_val,
                     y_test,
                     protein_train,
                     protein_test,
                     protein_val,
                     make_checkpoints=True):
    '''
    fit keras model to test, train and val data
    :param model: function type of model used
    :param model_dir: str directory location data being saved
    :param model_name: str description of the model
    :param y_train:
    :param y_val:
    :param y_test:
    :param protein_train:
    :param protein_test:
    :param protein_val:
    :param make_checkpoints:
    :return: keras object fitted model (with stats); tuples of train, test and val results
    '''
    min_delta = 0
    patience = 5
    batch_size = 256
    n_epochs = 100
    # Training
    callbacks = [
        EarlyStopping(monitor='val_loss',
                      min_delta=min_delta,
                      patience=patience,
                      verbose=0,
                      mode='auto')
    ]

    if make_checkpoints:
        callbacks.append(
            ModelCheckpoint(filepath=os.path.join(
                model_dir, model_name + '__epoch={epoch:02d}.h5'),
                            period=10))

    fit = model.fit([protein_train], [y_train],
                    validation_data=([protein_val], [y_val]),
                    batch_size=batch_size,
                    epochs=n_epochs,
                    callbacks=callbacks)

    # Saving fitted model
    try:
        model.save(os.path.join(model_dir, model_name) + '.h5')
    except ValueError:
        warnings.warn('Model could not be saved')

    # Validation
    ypred_train = model.predict([protein_train])
    ypred_val = model.predict([protein_val])
    ypred_test = model.predict([protein_test])

    y_train_flat = ypred_train.flatten()
    y_val_flat = ypred_val.flatten()
    y_test_flat = ypred_test.flatten()

    cor_train = cor(y_train, y_train_flat)
    cor_val = cor(y_val, y_val_flat)
    cor_test = cor(y_test, y_test_flat)

    return fit, [y_train, ypred_train,
                 cor_train], [y_test, ypred_test,
                              cor_test], [y_val, ypred_val, cor_val]
Example #9
0
def lineReader(file, type=None):
    # open a file
    g = open(file, 'r')
    lineCounter = 0

    # define the new variables for each file
    lineName = []
    lineFileCount = []
    line3DVar1Sigma = []
    line3DVar = []
    lineReconVar = []
    lineReconVarSD = []
    lineAnisoVar = []
    lineProlateVar = []
    lineOblateVar = []

    line3DSkew = []
    lineReconSkew = []
    lineReconSkewSD = []
    lineReconSkew1Sigma = []
    lineAnisoSkew = []

    line3DKurt = []
    lineReconKurt = []
    lineReconKurtSD = []
    lineReconKurt1Sigma = []
    lineAnisoKurt = []

    for line in g:
        if lineCounter == 0:
            lineCounter += 1
            continue

        line = line.strip('\n').split(',')

        # File information and anisotropy
        lineName.append(line[0])
        lineFileCount.append(float(line[1]))
        # Variance
        lineAnisoVar.append(float(line[11]))
        line3DVar.append(float(line[5]))
        lineReconVar.append(float(line[8]))
        lineReconVarSD.append(float(line[9]))
        lineProlateVar.append(float(line[6]))
        lineOblateVar.append(float(line[7]))
        # Skewness
        lineAnisoSkew.append(float(line[19]))
        line3DSkew.append(float(line[14]))
        lineReconSkew.append(float(line[17]))
        lineReconSkewSD.append(float(line[18]))
        # Kurtosis
        lineAnisoKurt.append(float(line[27]))
        line3DKurt.append(float(line[22]))
        lineReconKurt.append(float(line[25]))
        lineReconKurtSD.append(float(line[26]))

        # add here for skewness and kurtosis

        lineCounter += 1

    if type != None:
        g.close()
        return lineName, lineFileCount, line3DVar, line3DKurt, line3DSkew, lineAnisoKurt, lineAnisoSkew, lineAnisoVar, lineReconSkew, lineReconKurt, lineReconVar, lineProlateVar, lineOblateVar

    # Correlations are taken before averaging
    fileCorVar.append(cor(np.array(line3DVar), np.array(lineReconVar)))

    # take means and 1sigma fluctations of variance
    line3DVar1Sigma = np.std(np.array(line3DVar))
    line3DVar = np.mean(np.array(line3DVar))
    lineReconVar1Sigma = np.std(np.array(lineReconVar))
    lineReconVar = np.mean(np.array(lineReconVar))
    lineReconVarSD = np.mean(np.array(lineReconVarSD))
    lineAnisoVar = np.mean(np.sqrt(1 - np.array(lineAnisoVar)))

    # take means and 1sigma fluctations of skewness
    line3DSkew1Sigma = np.std(np.array(line3DSkew))
    line3DSkew = np.mean(np.array(line3DSkew))
    lineReconSkew1Sigma = np.std(np.array(lineReconSkew))
    lineReconSkew = np.mean(np.array(lineReconSkew))
    lineReconSkewSD = np.mean(np.array(lineReconSkewSD))
    lineAnisoSkew = np.mean(np.sqrt(1 - np.array(lineAnisoSkew)))

    # take means and 1sigma fluctations of kurtosis
    line3DKurt1Sigma = np.std(np.array(line3DKurt))
    line3DKurt = np.mean(np.array(line3DKurt))
    lineReconKurt1Sigma = np.std(np.array(lineReconKurt))
    lineReconKurt = np.mean(np.array(lineReconKurt))
    lineReconKurtSD = np.mean(np.array(lineReconKurtSD))
    lineAnisoKurt = np.mean(np.sqrt(1 - np.array(lineAnisoKurt)))

    # append to the global dataset
    fileMach.append(machData[lineName[0]]['M'])
    fileMach1Sigma.append(machData[lineName[0]]['MStd'])
    fileMA.append(machData[lineName[0]]['MA'])
    fileName.append(lineName[0])

    # Variance
    file3DVar.append(line3DVar)
    file3DVar1Sigma.append(line3DVar1Sigma)
    fileReconVar.append(lineReconVar)
    fileReconVarSD.append(lineReconVarSD)
    fileReconVar1Sigma.append(lineReconVar1Sigma)
    fileAnisoVar.append(lineAnisoVar)

    # Skewness
    file3DSkew.append(line3DSkew)
    file3DSkew1Sigma.append(line3DSkew1Sigma)
    fileReconSkew.append(lineReconSkew)
    fileReconSkewSD.append(lineReconSkewSD)
    fileReconSkew1Sigma.append(lineReconSkew1Sigma)
    fileAnisoSkew.append(lineAnisoSkew)

    # Kurtosis
    file3DKurt.append(line3DKurt)
    file3DKurt1Sigma.append(line3DKurt1Sigma)
    fileReconKurt.append(lineReconKurt)
    fileReconKurtSD.append(lineReconKurtSD)
    fileReconKurt1Sigma.append(lineReconKurt1Sigma)
    fileAnisoKurt.append(lineAnisoKurt)

    g.close()