Example #1
0
    def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x, y)),
                            [+0.3333333, 0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x, z)),
                            [-0.5477226, 0.2785987])
        #
        x = ma.fix_invalid([
            0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0,
            0, np.nan
        ])
        y = ma.fix_invalid([
            0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80,
            0, 10, 45, np.nan, 0
        ])
        result = mstats.kendalltau(x, y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #2
0
 def test_kendalltau(self):
     # Tests some computations of Kendall's tau
     x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan])
     y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
     z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
     assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), [+0.3333333, 0.4969059])
     assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), [-0.5477226, 0.2785987])
     #
     x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20, 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan])
     y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27, 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0])
     result = mstats.kendalltau(x, y)
     assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
Example #3
0
 def test_kendalltau(self):
     # Tests some computations of Kendall's tau
     x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
     y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
     z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
     assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                         [+0.3333333,0.4969059])
     assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                         [-0.5477226,0.2785987])
     #
     x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                         10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
     y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                         25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
     result = mstats.kendalltau(x,y)
     assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])
 def compute(self,x,y):
   assert np.size(x) == np.size(y)
   k, p = mstats.kendalltau(x,y)
   return {
     "KENDALL": k,
     "KENDALL_PV": p
     }
Example #5
0
 def get_score(self, word: str):
     top_n_1 = [word for word, score in self.w2v1.most_similar(word, topn=self.top_n_neighbors)]
     top_n_2 = [word for word, score in self.w2v2.most_similar(word, topn=self.top_n_neighbors)]
     if len(top_n_1) == len(top_n_2) == self.top_n_neighbors:
         top_n_1 = [self.word_index(word) for word in top_n_1]
         top_n_2 = [self.word_index(word) for word in top_n_2]
         score, p_value = mstats.kendalltau(top_n_1, top_n_2)
         return score
     else:
         raise ValueError("Problem with word {word} and its neighbours".format(word=word))
Example #6
0
    def test_kendalltau(self):
        # Tests some computations of Kendall's tau
        x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66,np.nan])
        y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan])
        z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,y)),
                            [+0.3333333,0.4969059])
        assert_almost_equal(np.asarray(mstats.kendalltau(x,z)),
                            [-0.5477226,0.2785987])
        #
        x = ma.fix_invalid([0, 0, 0, 0,20,20, 0,60, 0,20,
                            10,10, 0,40, 0,20, 0, 0, 0, 0, 0, np.nan])
        y = ma.fix_invalid([0,80,80,80,10,33,60, 0,67,27,
                            25,80,80,80,80,80,80, 0,10,45, np.nan, 0])
        result = mstats.kendalltau(x,y)
        assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009])

        # test for namedtuple attributes
        res = mstats.kendalltau(x, y)
        attributes = ('correlation', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #7
0
 def compute(self, x, y):
     assert np.size(x) == np.size(y)
     k, p = mstats.kendalltau(x, y)
     return {"KENDALL": k, "KENDALL_PV": p}
def draw_corplot(x, y, xname, yname, add_robust=False, save_to_file=True, \
    ax=None, stats_title=True, stats_legend=False, customcol=None, \
    legendprefix=''):
    # Choose the right colour for the plot.
    if customcol is None:
        regress_col = PLOTCOLS['regression']
        sample_col = PLOTCOLS['samples']
        sample_alpha = 0.75
    else:
        regress_col = customcol
        sample_col = customcol
        sample_alpha = 0.75
    # Create a new plot.
    if ax is None:
        fig, ax = pyplot.subplots(nrows=1, ncols=1)
    # Plot a scatter plot of the x and y values.
    ax.plot(x, y, 'o', color=sample_col, alpha=sample_alpha)
    # Plot the regression line.
    if add_robust:
        # Perform a linear regression.
        slope, intercept, lo_slope, up_slope = theilslopes(y, x, alpha=0.95)
        # Plot the regression line.
        x_pred = numpy.array([numpy.min(x), numpy.max(x)])
        y_pred = slope * x_pred + intercept
        y_lo = lo_slope * x_pred + intercept
        y_up = up_slope * x_pred + intercept
        ax.plot(x_pred, y_pred, '-', color=regress_col)
        ax.fill_between(x_pred, y_lo, y_up, linewidth=3, alpha=0.2, \
            color=PLOTCOLS['regression'])
    # Perform a linear regression.
    model = linregress(x, y)
    try:
        r = model.rvalue
        p = model.pvalue
        slope = model.slope
        intercept = model.intercept
    except:
        slope, intercept, r, p, stderr = model
    # Perform a Spearman correlation.
    spearman = spearmanr(x, y)
    try:
        spearman_rho = spearman.correlation
        spearman_p = spearman.pvalue
    except:
        spearman_rho, spearman_p = spearman
    # Compute Kendall's Tau.
    kendall = kendalltau(x, y)
    try:
        kendall_tau = kendall.correlation
        kendall_p = kendall.pvalue
    except:
        kendall_tau, kendall_p = kendall
    # Set the regression line's label.
    if stats_legend:
        # Uncomment if you'd like to see both parametric and non-parametric
        # test results.
        #lbl = r"$R=%.2f, p=%.2f$" % (r, p)
        #lbl = lbl + "\n" + r"$\tau=%.2f, p=%.2f$" % (kendall_tau, kendall_p)
        # Show Kendall's tau, as we're using a lowish N.
        if kendall_p < 0.001:
            kendall_pstr = r"p<0.001"
        else:
            kendall_pstr = r"p=%.3f" % (kendall_p)
        lbl = r"%s$\tau=%.2f, %s$" % (legendprefix, kendall_tau, kendall_pstr)
    else:
        lbl = None
    # Plot the regression line.
    x_pred = numpy.array([numpy.min(x), numpy.max(x)])
    y_pred = slope * x_pred + intercept
    ax.plot(x_pred, y_pred, '-', color=regress_col, linewidth=3, label=lbl)
    # Finish the plot.
    ax.set_xlabel(xname.capitalize(), fontsize=FONTSIZE['label'])
    ax.set_ylabel(yname.capitalize(), fontsize=FONTSIZE['label'])
    if stats_title:
        ax.set_title("R=%.2f, p=%.3f; Rho=%.2f, p=%.3f; Tau=%.3f, p=%.3f" % \
            (r, p, spearman_rho, spearman_p, kendall_tau, kendall_p))
    if stats_legend:
        ax.legend(loc="best", fontsize=FONTSIZE['legend'])
    # Save the plot.
    if save_to_file:
        fig.savefig(os.path.join(OUTDIR, "corplot_%sx%s.png" % (xname, yname)))
    if ax is None:
        pyplot.close(fig)
Example #9
0
 def ktau_corr(X, Y):
     return mstats.kendalltau(X, Y, use_ties=True, use_missing=False)
Example #10
0
LOG_MSG = "#npy_fname=%(npy_fname)s, function=%(function)s, start=%(start)d, end=%(end)d, m=%(m)d, date=%(date)s"
REPORT_N = 1000
# get username
TMP_DIR = "/tmp/%s" % pwd.getpwuid(os.getuid()).pw_name

def euclidean(x,y):
  q=x-y
  return ma.sqrt((q*q.T).sum())


# this should be in a separate file
FUNCTIONS = {
  'pearson': lambda x, y: mstats.pearsonr(x,y)[0],
  'spearman': lambda x, y: mstats.spearmanr(x,y)[0],
  'euclidean': euclidean,
  'kendalltau': lambda x,y: mstats.kendalltau(x,y)[0],
  'dcor': dcor,
  }

def main(npy_fname=None, function=None, batchname=None, outdir=None, start=None, end=None, m=None):
  """Compute pairs of dependency"""
  assert npy_fname, function
  assert function in FUNCTIONS
  assert os.path.exists(outdir)
  assert os.path.isdir(outdir)

  m = int(m)
  assert m > 0

  if end is None: 
    end = m*(m-1) / 2
Example #11
0
 def ktau_corr(X, Y):
     return mstats.kendalltau(X, Y, use_ties=True, use_missing=False)
 def compute(self,x,y,i):
   assert np.size(x) == np.size(y) and i >= 0
   k, p = mstats.kendalltau(x,y)
   self.Matrices["KENDALL"][i] = k
   self.Matrices["KENDALL_PV"][i] = p