Example #1
0
        half_test_pos, v_test_pos = split(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print 'set up data...'

        data = (pos_sample, unlabeled, v_p, v_u)
        #data, fixers = normalize_pu_nonnegative_data(*data)
        print 'not-normalized...'
        #print 'normalized...'
        _, estimators = logistic.calculate_estimators(*data, max_iter=100)

        t = (
            cp,
            half_pos.shape[0],
            half_neg.shape[0],
            half_test_pos.shape[0],
            estimators,
            float(int(half_pos.shape[0] * cp)) /
            (half_test_pos.shape[0] + half_pos.shape[0]),
        )
        table.append(t)

        print t
        pos_sample = scipy.sparse.csr_matrix(pos_sample)
        unlabeled = scipy.sparse.csr_matrix(unlabeled)

        testX = np.vstack([pos, neg])
        testy = np.hstack([np.array([1] * pos.shape[0]),
                            np.array([0] * neg.shape[0]),])
        scaler = sklearn.preprocessing.Scaler()
        scaler.fit(testX)
        testX = scaler.transform(testX)




        data = (pos_sample, unlabeled, v_p, v_u)
        #data, fixers = logistic.normalize_pu_data(*data)
        params, estimators = logistic.calculate_estimators(*data, max_iter=1000)
        theta, thetaM, b = params

        t = ('vf:', vf, 'c:', c, ) + estimators
        print t
        table.append(t)

        # run the LR on the true data
        (thetaTrue, _, _), _ = logistic.calculate_estimators(*(pos, neg, v_p, v_u), max_iter=1000)

        # unit area ellipse
        fig = pyplot.figure()
        ax = fig.add_subplot(111)
        ax.scatter(pos[:,0], pos[:,1], s=6, c='b', marker='+')
        ax.scatter(neg[:,0], neg[:,1], s=6, c='r', marker='o', lw=0)
    for cp in [1.0, 0.5, 0.1, 0.7, 0.6, 0.4, 0.3, 0.2, 0.9, 0.8]:
        # split out the validation set separately
        split_half = lambda a: logistic.sample_split(a, len(a) / 2)
        half_pos, v_pos = split_half(pos)
        half_neg, v_neg = split_half(neg)
        half_test_pos, v_test_pos = split_half(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print "set up data..."

        _, estimators = logistic.calculate_estimators(pos_sample, unlabeled, v_p, v_u)

        t = (
            cp,
            len(half_pos),
            len(half_neg),
            len(half_test_pos),
            estimators,
            float(int(len(half_pos) * cp)) / (len(half_test_pos) + len(half_pos)),
        )
        table.append(t)

        print t
Example #4
0
        split = lambda a: logistic.sample_split(a, int(0.8 * a.shape[0]))
        half_pos, v_pos = split(pos)
        half_neg, v_neg = split(neg)
        half_test_pos, v_test_pos = split(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print 'set up data...'

        data = (pos_sample, unlabeled, v_p, v_u)
        #data, fixers = normalize_pu_nonnegative_data(*data)
        print 'not-normalized...'
        #print 'normalized...'
        _, estimators = logistic.calculate_estimators(*data, max_iter=100)

        t = (cp, 
         half_pos.shape[0], half_neg.shape[0], half_test_pos.shape[0], 
         estimators,
         float(int(half_pos.shape[0] * cp)) / (half_test_pos.shape[0] + half_pos.shape[0]),
        )
        table.append(t)

        print t

Example #5
0
        split_half = lambda a: logistic.sample_split(a, len(a) / 2)
        half_pos, v_pos = split_half(pos)
        half_neg, v_neg = split_half(neg)
        half_test_pos, v_test_pos = split_half(test_pos)

        # figure out the subset to sample (c)
        u = logistic.vstack([half_neg, half_test_pos])
        pos_sample, unlabeled = logistic.sample_positive(cp, half_pos, u)

        # create validation set the same way
        u = logistic.vstack([v_neg, v_test_pos])
        v_p, v_u = logistic.sample_positive(cp, v_pos, u)

        print 'set up data...'

        _, estimators = logistic.calculate_estimators(pos_sample, unlabeled,
                                                      v_p, v_u)

        t = (
            cp,
            len(half_pos),
            len(half_neg),
            len(half_test_pos),
            estimators,
            float(int(len(half_pos) * cp)) /
            (len(half_test_pos) + len(half_pos)),
        )
        table.append(t)

        print t
        pos_sample = scipy.sparse.csr_matrix(pos_sample)
        unlabeled = scipy.sparse.csr_matrix(unlabeled)

        testX = np.vstack([pos, neg])
        testy = np.hstack([
            np.array([1] * pos.shape[0]),
            np.array([0] * neg.shape[0]),
        ])
        scaler = sklearn.preprocessing.Scaler()
        scaler.fit(testX)
        testX = scaler.transform(testX)

        data = (pos_sample, unlabeled, v_p, v_u)
        #data, fixers = logistic.normalize_pu_data(*data)
        params, estimators = logistic.calculate_estimators(*data,
                                                           max_iter=1000)
        theta, thetaM, b = params

        t = (
            'vf:',
            vf,
            'c:',
            c,
        ) + estimators
        print t
        table.append(t)

        # run the LR on the true data
        (thetaTrue, _, _), _ = logistic.calculate_estimators(*(pos, neg, v_p,
                                                               v_u),
                                                             max_iter=1000)