Python get_features 예제들, galatea.s3c.feature_loading.get_features Python 예제들

예제 #1

0

파일 보기

파일: learning_curve_point.py 프로젝트: vd114/galatea

def main(train_path,
        test_path,
        num_examples,
        **kwargs):

    train_y, fold_indices = get_labels_and_fold_indices(cifar10 = True, stl10 = False)
    del fold_indices
    assert train_y is not None

    train_X = get_features(train_path, split = False)

    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 50000
    assert train_y.shape == (50000,)

    test_X = get_features(test_path, split = False)
    test_y  = get_test_labels(cifar10 = True, stl10 = False)

    accs = []

    while True:
        accs.append(run_experiment(train_X,train_y,test_X, test_y,num_examples,**kwargs))
        v = np.asarray(accs)
        mn = v.mean()
        sd = v.std()
        print 'accuracy: %f +- %f' % (mn, sd)

예제 #2

0

파일 보기

파일: tlc_cascade_sup.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        split,
        **kwargs):

    y_fine, y_coarse, fold_indices = get_labels_and_fold_indices()

    gc.collect()

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()


    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120
    assert y_fine.shape == (120,)
    assert y_coarse.shape == (120,)

    report = Report(train_path, split)

    gc.collect()


    print 'making omnivore classifiers'
    omnivore_classifiers = get_classifiers('omnivore',train_X,y_fine,y_coarse,fold_indices)
    print 'making fruit classifiers'
    fruit_classifiers = get_classifiers('fruit',train_X,y_fine,y_coarse,fold_indices)


    print 'loading cifar features'
    aux_features = get_features(train_path.replace('aux','train'), False)
    print 'loading cifar labels'
    aux_labels = CIFAR100(which_set='train').y_coarse

    print 'making masks'
    mask = np.zeros( aux_labels.shape, dtype='uint8')
    for label in [4,11,3,12,7,6]:
        mask += (aux_labels == label)

    print 'restricting classes'
    aux_features = aux_features[mask,:]
    aux_labels = aux_labels[mask]

    print 'downsampling data'
    aux_features = aux_features[0:300,:]
    aux_labels = aux_labels[0:300]

    print 'main train loop'
    model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse,
            aux_features, aux_labels, report, **kwargs)


    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')

예제 #3

0

파일 보기

파일: evaluate_svm.py 프로젝트: cc13ny/galatea

def main(model_path,
        test_path,
        output_path,
        dataset,
        split,
        **kwargs):

    model =  serial.load(model_path)

    cifar100 = dataset == 'cifar100'
    cifar10 = dataset == 'cifar10'
    stl10 = dataset == 'stl10'
    assert cifar10 + cifar100 + stl10 == 1

    y = get_test_labels(cifar10, cifar100, stl10)
    X = get_features(test_path, split, False)
    if stl10:
        num_examples = 8000
    if cifar10 or cifar100:
        num_examples = 10000
    if not X.shape[0] == num_examples:
        raise AssertionError('Expected %d examples but got %d' % (num_examples, X.shape[0]))
    assert y.shape[0] == num_examples

    test(model,X,y,output_path)

예제 #4

0

파일 보기

def main(train_path, out_path, split, **kwargs):

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()

    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120

    report = Report(train_path, split)

    train_X_omnivore, train_y, fold_indices = get_training_subset(
        train_X, 'omnivore')

    model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs)

    serial.save(out_path + '.omnivore.model.pkl', model)
    report.write(out_path + '.omnivore.validation_report.txt')

    report = Report(train_path, split)

    train_X_fruit, train_y, fold_indices = get_training_subset(
        train_X, 'fruit')

    model = train(fold_indices, train_X_fruit, train_y, report, **kwargs)

    serial.save(out_path + '.fruit.model.pkl', model)
    report.write(out_path + '.fruit.validation_report.txt')

예제 #5

0

파일 보기

파일: fit_final_model.py 프로젝트: 123fengye741/pylearn2

def main(train_path,
        out_path,
        dataset,
        standardize,
        C,
        **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print('getting labels and oflds')
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    gc.collect()
    assert train_y is not None

    print('loading training features')
    train_X = get_features(train_path, split = False, standardize = standardize)

    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    print('training model')
    model =  train(train_X, train_y, C)

    print('saving model')
    serial.save(out_path, model)

예제 #6

0

파일 보기

파일: fit_final_model.py 프로젝트: gdesjardins/pylearn2_bio

def main(train_path, out_path, dataset, standardize, C, **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print 'getting labels and oflds'
    train_y, fold_indices = get_labels_and_fold_indices(
        cifar10, cifar100, stl10)
    gc.collect()
    assert train_y is not None

    print 'loading training features'
    train_X = get_features(train_path, split=False, standardize=standardize)

    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000, )

    print 'training model'
    model = train(train_X, train_y, C)

    print 'saving model'
    serial.save(out_path, model)

예제 #7

0

파일 보기

파일: tlcloo_sub.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        split,
        **kwargs):



    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()


    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120

    report = Report(train_path, split)

    train_X_omnivore, train_y, fold_indices = get_training_subset(train_X, 'omnivore')

    model = train(fold_indices, train_X_omnivore, train_y, report, **kwargs)

    serial.save(out_path+'.omnivore.model.pkl', model)
    report.write(out_path+'.omnivore.validation_report.txt')


    report = Report(train_path, split)

    train_X_fruit, train_y, fold_indices = get_training_subset(train_X, 'fruit')

    model = train(fold_indices, train_X_fruit, train_y, report, **kwargs)

    serial.save(out_path+'.fruit.model.pkl', model)
    report.write(out_path+'.fruit.validation_report.txt')

예제 #8

0

파일 보기

def main(train_path, out_path, split, **kwargs):

    y_fine, y_coarse, fold_indices = get_labels_and_fold_indices()

    gc.collect()

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()

    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120
    assert y_fine.shape == (120, )
    assert y_coarse.shape == (120, )

    report = Report(train_path, split)

    gc.collect()

    print 'making omnivore classifiers'
    omnivore_classifiers = get_classifiers('omnivore', train_X, y_fine,
                                           y_coarse, fold_indices)
    print 'making fruit classifiers'
    fruit_classifiers = get_classifiers('fruit', train_X, y_fine, y_coarse,
                                        fold_indices)

    model = train(fold_indices, omnivore_classifiers, fruit_classifiers,
                  train_X, y_fine, y_coarse, report, **kwargs)

    serial.save(out_path + '.model.pkl', model)
    report.write(out_path + '.validation_report.txt')

예제 #9

0

파일 보기

def main(model_path,
        data_path,
        split,
        **kwargs):

    model =  serial.load(model_path)

    raw_dataset = get_test_data()
    X = get_features(data_path, split, False)
    assert X.shape[0] == 8000

    size = 100
    for start in xrange(0,X.shape[0]-size,size):
        y = raw_dataset.y[start:start+size]
        pred_y = model.predict(X[start:start+size,:])

        wrong_mask = y != pred_y

        raw_X = raw_dataset.X[start:start+size,:]
        pv = make_viewer(raw_X / 127.5, rescale = False, is_color = True, activation = wrong_mask )
        pv.show()

        right = 0
        for i in xrange(y.shape[0]):
            if y[i] == pred_y[i]:
                right += 1
                print str(start+i)+': correct ('+raw_dataset.class_names[y[i]-1]+')'
            else:
                print str(start+i)+': mistook '+raw_dataset.class_names[y[i]-1]+' for '+raw_dataset.class_names[pred_y[i]-1]
        print 'accuracy this batch : ',float(right)/float(size)
        x = raw_input()
        if x == 'q':
            break

예제 #10

0

파일 보기

파일: fewer.py 프로젝트: cc13ny/galatea

def main(model_path,
        data_path,
        split,
        **kwargs):

    model =  serial.load(model_path)

    raw_dataset = get_test_data()
    X = get_features(data_path, split, False)
    assert X.shape[0] == 8000

    size = 25
    for start in xrange(0,X.shape[0]-size,size):
        y = raw_dataset.y[start:start+size]
        pred_y = model.predict(X[start:start+size,:])

        wrong_mask = y != pred_y

        raw_X = raw_dataset.X[start:start+size,:]
        pv = make_viewer(raw_X / 127.5, rescale = False, is_color = True, activation = wrong_mask )
        pv.show()

        right = 0
        for i in xrange(y.shape[0]):
            if y[i] == pred_y[i]:
                right += 1
                print str(start+i)+': correct ('+raw_dataset.class_names[y[i]-1]+')'
            else:
                print str(start+i)+': mistook '+raw_dataset.class_names[y[i]-1]+' for '+raw_dataset.class_names[pred_y[i]-1]
        print 'accuracy this batch : ',float(right)/float(size)
        x = raw_input()
        if x == 'q':
            break

예제 #11

0

파일 보기

파일: coarse_analysis.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        split,
        **kwargs):


    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, test_y = get_labels()
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'

    if mem:
        print 'mem usage before getting features '+str(mem.usage())
    train_X = get_features(train_path.replace('aux','train'),split)
    test_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()
    if mem:
        print 'mem usage after getting features '+str(mem.usage())

    assert train_X.shape[0] == 50000
    assert train_y.shape == (50000,)

    assert str(train_X.dtype) == 'float32'
    assert test_X.shape[0] == 120
    assert test_y.shape == (120,)

    report = Report(train_path, split)

    gc.collect()

    if mem:
        print 'mem usage before calling train: '+str(mem.usage())
    model = train(train_X, train_y, test_X, test_y, report, **kwargs)


    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')

예제 #12

0

파일 보기

파일: train_svm.py 프로젝트: vd114/galatea

def main(train_path,
        out_path,
        split,
        dataset,
        standardize,
        **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'

    if mem:
        print 'mem usage before getting features '+str(mem.usage())
    train_X = get_features(train_path, split, standardize)
    if not train_X.flags.c_contiguous:
        print 'not C contiguous, reshaping'
        assert len(train_X.shape) == 2
        train_X = np.ascontiguousarray(train_X)
        assert train_X.flags.c_contiguous
        print 'success, contiguous now'
    gc.collect()
    if mem:
        print 'mem usage after getting features '+str(mem.usage())


    if str(train_X.dtype) != 'float32':
        warnings.warn('Your features are not float32, you may be wasting memory')
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    report = Report(train_path, split, stl10, cifar10, cifar100)

    gc.collect()

    if mem:
        print 'mem usage before calling train: '+str(mem.usage())
    model = train(fold_indices, train_X, train_y, report, **kwargs)

    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')

예제 #13

0

파일 보기

파일: coarse_analysis.py 프로젝트: vd114/galatea

def main(train_path, out_path, split, **kwargs):

    if mem:
        print 'mem usage before getting labels and folds ' + str(mem.usage())
    train_y, test_y = get_labels()
    if mem:
        print 'mem usage after getting labels and folds ' + str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'

    if mem:
        print 'mem usage before getting features ' + str(mem.usage())
    train_X = get_features(train_path.replace('aux', 'train'), split)
    test_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()
    if mem:
        print 'mem usage after getting features ' + str(mem.usage())

    assert train_X.shape[0] == 50000
    assert train_y.shape == (50000, )

    assert str(train_X.dtype) == 'float32'
    assert test_X.shape[0] == 120
    assert test_y.shape == (120, )

    report = Report(train_path, split)

    gc.collect()

    if mem:
        print 'mem usage before calling train: ' + str(mem.usage())
    model = train(train_X, train_y, test_X, test_y, report, **kwargs)

    serial.save(out_path + '.model.pkl', model)
    report.write(out_path + '.validation_report.txt')

예제 #14

0

파일 보기

파일: fold_point_worker.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        dataset,
        standardize,
        fold,
        C,
        log,
        **kwargs):

    log.write('in main\n')
    log.flush()


    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print 'getting labels and oflds'
    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None
    log.write('got labels and folds')
    log.flush()

    print 'loading training features'
    train_X = get_features(train_path, split = False, standardize = standardize)
    log.write('got features')
    log.flush()


    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    print 'running validate'
    acc = validate(train_X, train_y, fold_indices[fold,:], C, log, **kwargs)

    report = open(out_path, 'w')
    report.write('C\tfold\tvalidation accuracy\n%f\t%d\t%f\n' % (C, fold, acc))
    report.close()

예제 #15

0

파일 보기

파일: train_svm_right.py 프로젝트: vd114/galatea

def main(train_path,
        out_path,
        split,
        dataset,
        standardize,
        **kwargs):

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'

    if mem:
        print 'mem usage before getting features '+str(mem.usage())
    train_X = get_features(train_path, split, standardize)
    #assert train_X.flags.c_contiguous
    gc.collect()
    if mem:
        print 'mem usage after getting features '+str(mem.usage())


    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    #report = Report(train_path, split, stl10, cifar10, cifar100)

    gc.collect()

    if mem:
        print 'mem usage before calling train: '+str(mem.usage())
    models = train(fold_indices, train_X, train_y, **kwargs)

    serial.save(out_path+'.models.pkl', models)

예제 #16

0

파일 보기

def main(train_path, out_path, dataset, standardize, fold, C, log, **kwargs):

    log.write('in main\n')
    log.flush()

    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print 'getting labels and oflds'
    if mem:
        print 'mem usage before getting labels and folds ' + str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(
        cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds ' + str(mem.usage())
    gc.collect()
    assert train_y is not None
    log.write('got labels and folds')
    log.flush()

    print 'loading training features'
    train_X = get_features(train_path, split=False, standardize=standardize)
    log.write('got features')
    log.flush()

    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000, )

    print 'running validate'
    acc = validate(train_X, train_y, fold_indices[fold, :], C, log, **kwargs)

    report = open(out_path, 'w')
    assert fold is not None
    assert C is not None
    assert acc is not None
    report.write('C\tfold\tvalidation accuracy\n%f\t%d\t%f\n' % (C, fold, acc))
    report.close()

예제 #17

0

파일 보기

파일: evaluate.py 프로젝트: wojzaremba/pylearn2

def main(model_path, test_path, dataset, **kwargs):

    model = serial.load(model_path)

    cifar100 = dataset == 'cifar100'
    cifar10 = dataset == 'cifar10'
    stl10 = dataset == 'stl10'
    assert cifar10 + cifar100 + stl10 == 1

    y = get_test_labels(cifar10, cifar100, stl10)
    X = get_features(test_path, False, False)
    if stl10:
        num_examples = 8000
    if cifar10 or cifar100:
        num_examples = 10000
    if not X.shape[0] == num_examples:
        raise AssertionError('Expected %d examples but got %d' %
                             (num_examples, X.shape[0]))
    assert y.shape[0] == num_examples

    test(model, X, y)

예제 #18

0

파일 보기

파일: final_logistic.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        dataset,
        standardize,
        C,
        **kwargs):



    stl10 = dataset == 'stl10'
    cifar10 = dataset == 'cifar10'
    cifar100 = dataset == 'cifar100'
    assert stl10 + cifar10 + cifar100 == 1

    print 'getting labels and oflds'
    if mem:
        print 'mem usage before getting labels and folds '+str(mem.usage())
    train_y, fold_indices = get_labels_and_fold_indices(cifar10, cifar100, stl10)
    if mem:
        print 'mem usage after getting labels and folds '+str(mem.usage())
    gc.collect()
    assert train_y is not None

    print 'loading training features'
    train_X = get_features(train_path, split = False, standardize = standardize)


    assert str(train_X.dtype) == 'float32'
    if stl10:
        assert train_X.shape[0] == 5000
    if cifar10 or cifar100:
        assert train_X.shape[0] == 50000
        assert train_y.shape == (50000,)

    model = train_model(train_X, train_y, C, **kwargs)

    serial.save(out_path, model)

예제 #19

0

파일 보기

파일: tlc_cascade.py 프로젝트: cc13ny/galatea

def main(train_path,
        out_path,
        split,
        **kwargs):

    y_fine, y_coarse, fold_indices = get_labels_and_fold_indices()

    gc.collect()

    print 'loading training features'

    train_X = get_features(train_path, split)
    #assert train_X.flags.c_contiguous
    gc.collect()


    assert str(train_X.dtype) == 'float32'
    assert train_X.shape[0] == 120
    assert y_fine.shape == (120,)
    assert y_coarse.shape == (120,)

    report = Report(train_path, split)

    gc.collect()


    print 'making omnivore classifiers'
    omnivore_classifiers = get_classifiers('omnivore',train_X,y_fine,y_coarse,fold_indices)
    print 'making fruit classifiers'
    fruit_classifiers = get_classifiers('fruit',train_X,y_fine,y_coarse,fold_indices)

    model = train(fold_indices, omnivore_classifiers, fruit_classifiers, train_X, y_fine, y_coarse, report, **kwargs)


    serial.save(out_path+'.model.pkl', model)
    report.write(out_path+'.validation_report.txt')