def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987]) # x = ma.fix_invalid([ 0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan ]) y = ma.fix_invalid([ 0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0 ]) result = mstats.kendalltau(x, y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) # test for namedtuple attributes res = mstats.kendalltau(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0]) result = mstats.kendalltau(x, y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
def compute(self,x,y): assert np.size(x) == np.size(y) k, p = mstats.kendalltau(x,y) return { "KENDALL": k, "KENDALL_PV": p }
def get_score(self, word: str): top_n_1 = [word for word, score in self.w2v1.most_similar(word, topn=self.top_n_neighbors)] top_n_2 = [word for word, score in self.w2v2.most_similar(word, topn=self.top_n_neighbors)] if len(top_n_1) == len(top_n_2) == self.top_n_neighbors: top_n_1 = [self.word_index(word) for word in top_n_1] top_n_2 = [self.word_index(word) for word in top_n_2] score, p_value = mstats.kendalltau(top_n_1, top_n_2) return score else: raise ValueError("Problem with word {word} and its neighbours".format(word=word))
def test_kendalltau(self): # Tests some computations of Kendall's tau x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan]) y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) assert_almost_equal(np.asarray(mstats.kendalltau(x,y)), [+0.3333333,0.4969059]) assert_almost_equal(np.asarray(mstats.kendalltau(x,z)), [-0.5477226,0.2785987]) # x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20, 10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan]) y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27, 25,80,80,80,80,80,80, 0,10,45, np.nan, 0]) result = mstats.kendalltau(x,y) assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) # test for namedtuple attributes res = mstats.kendalltau(x, y) attributes = ('correlation', 'pvalue') check_named_results(res, attributes, ma=True)
def compute(self, x, y): assert np.size(x) == np.size(y) k, p = mstats.kendalltau(x, y) return {"KENDALL": k, "KENDALL_PV": p}
def draw_corplot(x, y, xname, yname, add_robust=False, save_to_file=True, \ ax=None, stats_title=True, stats_legend=False, customcol=None, \ legendprefix=''): # Choose the right colour for the plot. if customcol is None: regress_col = PLOTCOLS['regression'] sample_col = PLOTCOLS['samples'] sample_alpha = 0.75 else: regress_col = customcol sample_col = customcol sample_alpha = 0.75 # Create a new plot. if ax is None: fig, ax = pyplot.subplots(nrows=1, ncols=1) # Plot a scatter plot of the x and y values. ax.plot(x, y, 'o', color=sample_col, alpha=sample_alpha) # Plot the regression line. if add_robust: # Perform a linear regression. slope, intercept, lo_slope, up_slope = theilslopes(y, x, alpha=0.95) # Plot the regression line. x_pred = numpy.array([numpy.min(x), numpy.max(x)]) y_pred = slope * x_pred + intercept y_lo = lo_slope * x_pred + intercept y_up = up_slope * x_pred + intercept ax.plot(x_pred, y_pred, '-', color=regress_col) ax.fill_between(x_pred, y_lo, y_up, linewidth=3, alpha=0.2, \ color=PLOTCOLS['regression']) # Perform a linear regression. model = linregress(x, y) try: r = model.rvalue p = model.pvalue slope = model.slope intercept = model.intercept except: slope, intercept, r, p, stderr = model # Perform a Spearman correlation. spearman = spearmanr(x, y) try: spearman_rho = spearman.correlation spearman_p = spearman.pvalue except: spearman_rho, spearman_p = spearman # Compute Kendall's Tau. kendall = kendalltau(x, y) try: kendall_tau = kendall.correlation kendall_p = kendall.pvalue except: kendall_tau, kendall_p = kendall # Set the regression line's label. if stats_legend: # Uncomment if you'd like to see both parametric and non-parametric # test results. #lbl = r"$R=%.2f, p=%.2f$" % (r, p) #lbl = lbl + "\n" + r"$\tau=%.2f, p=%.2f$" % (kendall_tau, kendall_p) # Show Kendall's tau, as we're using a lowish N. if kendall_p < 0.001: kendall_pstr = r"p<0.001" else: kendall_pstr = r"p=%.3f" % (kendall_p) lbl = r"%s$\tau=%.2f, %s$" % (legendprefix, kendall_tau, kendall_pstr) else: lbl = None # Plot the regression line. x_pred = numpy.array([numpy.min(x), numpy.max(x)]) y_pred = slope * x_pred + intercept ax.plot(x_pred, y_pred, '-', color=regress_col, linewidth=3, label=lbl) # Finish the plot. ax.set_xlabel(xname.capitalize(), fontsize=FONTSIZE['label']) ax.set_ylabel(yname.capitalize(), fontsize=FONTSIZE['label']) if stats_title: ax.set_title("R=%.2f, p=%.3f; Rho=%.2f, p=%.3f; Tau=%.3f, p=%.3f" % \ (r, p, spearman_rho, spearman_p, kendall_tau, kendall_p)) if stats_legend: ax.legend(loc="best", fontsize=FONTSIZE['legend']) # Save the plot. if save_to_file: fig.savefig(os.path.join(OUTDIR, "corplot_%sx%s.png" % (xname, yname))) if ax is None: pyplot.close(fig)
def ktau_corr(X, Y): return mstats.kendalltau(X, Y, use_ties=True, use_missing=False)
LOG_MSG = "#npy_fname=%(npy_fname)s, function=%(function)s, start=%(start)d, end=%(end)d, m=%(m)d, date=%(date)s" REPORT_N = 1000 # get username TMP_DIR = "/tmp/%s" % pwd.getpwuid(os.getuid()).pw_name def euclidean(x,y): q=x-y return ma.sqrt((q*q.T).sum()) # this should be in a separate file FUNCTIONS = { 'pearson': lambda x, y: mstats.pearsonr(x,y)[0], 'spearman': lambda x, y: mstats.spearmanr(x,y)[0], 'euclidean': euclidean, 'kendalltau': lambda x,y: mstats.kendalltau(x,y)[0], 'dcor': dcor, } def main(npy_fname=None, function=None, batchname=None, outdir=None, start=None, end=None, m=None): """Compute pairs of dependency""" assert npy_fname, function assert function in FUNCTIONS assert os.path.exists(outdir) assert os.path.isdir(outdir) m = int(m) assert m > 0 if end is None: end = m*(m-1) / 2
def compute(self,x,y,i): assert np.size(x) == np.size(y) and i >= 0 k, p = mstats.kendalltau(x,y) self.Matrices["KENDALL"][i] = k self.Matrices["KENDALL_PV"][i] = p