def gen_sample(p, n):
    """Accepts two integers.
        Returns a new dataset of x,y gaussians plus 
            x^2 and y^2 in a 2-tuple of 2 arrays; (p,4) and (n,4) 
    """
    pos = gaussian(mean_pos, cov_pos, p)
    pos = add_x2_y2(pos)
    neg = gaussian(mean_neg, cov_neg, n)
    neg = add_x2_y2(neg)
    return (pos, neg,) + logistic.sample_positive(c, pos, neg)
Ejemplo n.º 2
0
def gen_sample(c, p, n):
    """Accepts two integers.
        Returns a new dataset of x,y gaussians plus 
            x^2 and y^2 in a 2-tuple of 2 arrays; (p,4) and (n,4) 
    """
    pos = gaussian(mean_pos, cov_pos, p)
    pos = add_x2_y2(pos)
    neg = gaussian(mean_neg, cov_neg, n)
    neg = add_x2_y2(neg)
    return (pos, neg,) + logistic.sample_positive(c, pos, neg)
Ejemplo n.º 3
0
 def gen_sample(p, n):
     """Accepts two integers.
         Returns a new dataset of x,y gaussians plus 
             x^2 and y^2 in a 2-tuple of 2 arrays; (p,4) and (n,4) 
     """
     def add_x2_y2(a):
         """Accepts an (N,2) array, adds 2 more columns
             which are first col squared, second col squared.
         """
         return logistic.vstack([a.T, a[:,0]**2, a[:,1]**2]).T
     pos = gaussian(mean_pos, cov_pos, p)
     pos = add_x2_y2(pos)
     neg = gaussian(mean_neg, cov_neg, n)
     neg = add_x2_y2(neg)
     return logistic.sample_positive(c, pos, neg)
Ejemplo n.º 4
0
    numpy.save(os.path.join(folder, 'data.test_pos.swissprot.npy'),
               test_pos.todense())

    print 'read data...'

    table = []
    for cp in [1.0, 0.5, 0.1, 0.7, 0.6, 0.4, 0.3, 0.2, 0.9, 0.8]:
        # split out the validation set separately
        split = lambda a: logistic.sample_split(a, int(0.8 * a.shape[0]))
        half_pos, v_pos = split(pos)
        half_neg, v_neg = split(neg)
        half_test_pos, v_test_pos = split(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print 'set up data...'

        data = (pos_sample, unlabeled, v_p, v_u)
        #data, fixers = normalize_pu_nonnegative_data(*data)
        print 'not-normalized...'
        #print 'normalized...'
        _, estimators = logistic.calculate_estimators(*data, max_iter=100)

        t = (
            cp,
Ejemplo n.º 5
0
    print "read data..."

    # set up data

    table = []
    for cp in [1.0, 0.5, 0.1, 0.7, 0.6, 0.4, 0.3, 0.2, 0.9, 0.8]:
        # split out the validation set separately
        split_half = lambda a: logistic.sample_split(a, len(a) / 2)
        half_pos, v_pos = split_half(pos)
        half_neg, v_neg = split_half(neg)
        half_test_pos, v_test_pos = split_half(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print "set up data..."

        _, estimators = logistic.calculate_estimators(pos_sample, unlabeled, v_p, v_u)

        t = (
            cp,
            len(half_pos),
            len(half_neg),
            len(half_test_pos),
            estimators,