def get(dataset_name):
        # List of datasets that works with the current model ?
        datasets = ['adult',
                    'binarized_mnist',
                    'connect4',
                    'dna',
                    'mushrooms',
                    'nips',
                    'ocr_letters',
                    'rcv1',
                    'rcv2_russ',
                    'web']

        # Setup dataset env
        if dataset_name not in datasets:
            raise ValueError('Dataset unknown: ' + dataset_name)
        # mldataset = __import__('mlpython.datasets.' + dataset_name, globals(), locals(), [dataset_name], -1)
        datadir = os.path.join(os.getenv("MLPYTHON_DATASET_REPO"), dataset_name)

        # Verify if dataset exist and if not, download it
        if(not os.path.exists(datadir)):
            dataset_store.download(dataset_name)

        print('### Loading dataset [{0}] ...'.format(dataset_name))
        start_time = t.time()

        all_data = mldataset.load(datadir, load_to_memory=True)
        train_data, train_metadata = all_data['train']

        if dataset_name == 'binarized_mnist' or dataset_name == 'nips':
            trainset = mlpb.MLProblem(train_data, train_metadata)
        else:
            trainset = mlpb.SubsetFieldsProblem(train_data, train_metadata)

        trainset.setup()

        valid_data, valid_metadata = all_data['valid']

        validset = trainset.apply_on(valid_data, valid_metadata)

        test_data, test_metadata = all_data['test']
        testset = trainset.apply_on(test_data, test_metadata)

        # Cleaning up, packaging and theanized
        full_dataset = {'input_size': trainset.metadata['input_size']}

        trainset_theano = theano.shared(value=Dataset._clean(trainset), borrow=True)
        validset_theano = theano.shared(value=Dataset._clean(validset), borrow=True)
        testset_theano = theano.shared(value=Dataset._clean(testset), borrow=True)

        full_dataset['train'] = {'data': trainset_theano, 'length': all_data['train'][1]['length']}
        full_dataset['valid'] = {'data': validset_theano, 'length': all_data['valid'][1]['length']}
        full_dataset['test'] = {'data': testset_theano, 'length': all_data['test'][1]['length']}

        print("(Dim:{0} Train:{1} Valid:{2} Test:{3})".format(trainset.metadata['input_size'], full_dataset['train']['length'], full_dataset['valid']['length'], full_dataset['test']['length']))
        print(get_done_text(start_time), "###")
        return full_dataset
Example #2
0
    def get(dataset_name):
        # List of datasets that works with the current model ?
        datasets = ['adult',
                    'binarized_mnist',
                    'connect4',
                    'dna',
                    'mushrooms',
                    'nips',
                    'ocr_letters',
                    'rcv1',
                    'rcv2_russ',
                    'web']

        # Setup dataset env
        if dataset_name not in datasets:
            raise ValueError('Dataset unknown: ' + dataset_name)
        mldataset = __import__('mlpython.datasets.' + dataset_name, globals(), locals(), [dataset_name], -1)
        datadir = os.path.join(os.getenv("MLPYTHON_DATASET_REPO"), dataset_name)

        # Verify if dataset exist and if not, download it
        if(not os.path.exists(datadir)):
            dataset_store.download(dataset_name)

        print '### Loading dataset [{0}] ...'.format(dataset_name),
        start_time = t.time()

        all_data = mldataset.load(datadir, load_to_memory=True)
        train_data, train_metadata = all_data['train']

        if dataset_name == 'binarized_mnist' or dataset_name == 'nips':
            trainset = mlpb.MLProblem(train_data, train_metadata)
        else:
            trainset = mlpb.SubsetFieldsProblem(train_data, train_metadata)

        trainset.setup()

        valid_data, valid_metadata = all_data['valid']

        validset = trainset.apply_on(valid_data, valid_metadata)

        test_data, test_metadata = all_data['test']
        testset = trainset.apply_on(test_data, test_metadata)

        # Cleaning up, packaging and theanized
        full_dataset = {'input_size': trainset.metadata['input_size']}

        trainset_theano = theano.shared(value=Dataset._clean(trainset), borrow=True)
        validset_theano = theano.shared(value=Dataset._clean(validset), borrow=True)
        testset_theano = theano.shared(value=Dataset._clean(testset), borrow=True)

        full_dataset['train'] = {'data': trainset_theano, 'length': all_data['train'][1]['length']}
        full_dataset['valid'] = {'data': validset_theano, 'length': all_data['valid'][1]['length']}
        full_dataset['test'] = {'data': testset_theano, 'length': all_data['test'][1]['length']}

        print "(Dim:{0} Train:{1} Valid:{2} Test:{3})".format(trainset.metadata['input_size'], full_dataset['train']['length'], full_dataset['valid']['length'], full_dataset['test']['length']),
        print get_done_text(start_time), "###"
        return full_dataset
Example #3
0
def sklearn_convex(classifier,
                   algorithm,
                   max_evals=100,
                   seed=1,
                   filename='none',
                   preproc=[],
                   loss=None):

    global suppress_output
    if suppress_output:
        dump_file = None
    else:
        dump_file = filename + '.dump'

    estim = hyperopt_estimator(classifier=classifier,
                               algo=algorithm,
                               preprocessing=preproc,
                               max_evals=max_evals,
                               trial_timeout=240,
                               fit_increment_dump_filename=dump_file,
                               loss_fn=loss)

    filename = filename + '.out'

    dataset_store.download('convex')
    trainset, validset, testset = dataset_store.get_classification_problem(
        'convex')

    X_train = trainset.data.mem_data[0]
    y_train = trainset.data.mem_data[1]

    X_valid = validset.data.mem_data[0]
    y_valid = validset.data.mem_data[1]

    X_test = testset.data.mem_data[0]
    y_test = testset.data.mem_data[1]

    X_fulltrain = np.concatenate((X_train, X_valid))
    y_fulltrain = np.concatenate((y_train, y_valid))

    print(y_train.shape)
    print(y_valid.shape)
    print(y_test.shape)

    #find_model( X_train, y_train, X_test, y_test, estim, filename )
    find_model(X_fulltrain, y_fulltrain, X_test, y_test, estim, filename)
def convex():

  dataset_store.download('convex')
  trainset,validset,testset = dataset_store.get_classification_problem('convex')

  X_train = trainset.data.mem_data[0]
  y_train = trainset.data.mem_data[1]
  
  X_valid = validset.data.mem_data[0]
  y_valid = validset.data.mem_data[1]
  
  X_test = testset.data.mem_data[0]
  y_test = testset.data.mem_data[1]
  
  X_fulltrain = np.concatenate((X_train, X_valid))
  y_fulltrain = np.concatenate((y_train, y_valid))
  
  pca = PCA()
  X_train_pca = pca.fit_transform( X_fulltrain )
  X_test_pca = pca.fit_transform( X_test )
  
  clfs = [ MultinomialNB(), SVC(),
           KNeighborsClassifier(),
           SGDClassifier() ]
  
  pca_clfs = [ SVC(),
               KNeighborsClassifier(),
               SGDClassifier() ]
  
  print("Convex\n")
  with open( "convex_baselines.txt", 'w' ) as f:
    for clf in clfs:
      clf.fit( X_fulltrain, y_fulltrain )
      pred = clf.predict( X_test )
      score = metrics.f1_score( y_test, pred )
      print( "Classifier: %s\nScore: %f\n" % (clf, score) )
      f.write("Classifier: %s\nScore: %f\n\n" % (clf, score))
    for clf in pca_clfs:
      clf.fit( X_train_pca, y_fulltrain )
      pred = clf.predict( X_test_pca )
      score = metrics.f1_score( y_test, pred )
      print( "Classifier: PCA + %s\nScore: %f\n" % (clf, score) )
      f.write("Classifier: PCA + %s\nScore: %f\n\n" % (clf, score))
Example #5
0
def convex():

    dataset_store.download('convex')
    trainset, validset, testset = dataset_store.get_classification_problem(
        'convex')

    X_train = trainset.data.mem_data[0]
    y_train = trainset.data.mem_data[1]

    X_valid = validset.data.mem_data[0]
    y_valid = validset.data.mem_data[1]

    X_test = testset.data.mem_data[0]
    y_test = testset.data.mem_data[1]

    X_fulltrain = np.concatenate((X_train, X_valid))
    y_fulltrain = np.concatenate((y_train, y_valid))

    pca = PCA()
    X_train_pca = pca.fit_transform(X_fulltrain)
    X_test_pca = pca.fit_transform(X_test)

    clfs = [MultinomialNB(), SVC(), KNeighborsClassifier(), SGDClassifier()]

    pca_clfs = [SVC(), KNeighborsClassifier(), SGDClassifier()]

    print("Convex\n")
    with open("convex_baselines.txt", 'w') as f:
        for clf in clfs:
            clf.fit(X_fulltrain, y_fulltrain)
            pred = clf.predict(X_test)
            score = metrics.f1_score(y_test, pred)
            print("Classifier: %s\nScore: %f\n" % (clf, score))
            f.write("Classifier: %s\nScore: %f\n\n" % (clf, score))
        for clf in pca_clfs:
            clf.fit(X_train_pca, y_fulltrain)
            pred = clf.predict(X_test_pca)
            score = metrics.f1_score(y_test, pred)
            print("Classifier: PCA + %s\nScore: %f\n" % (clf, score))
            f.write("Classifier: PCA + %s\nScore: %f\n\n" % (clf, score))
def sklearn_convex( classifier, algorithm, max_evals=100, seed=1,
                    filename = 'none', preproc=[], loss=None ):

  
  global suppress_output
  if suppress_output:
    dump_file = None
  else:
    dump_file = filename+'.dump'
  
  estim = hyperopt_estimator( classifier=classifier, algo=algorithm,
                              preprocessing=preproc,
                              max_evals=max_evals, trial_timeout=240,
                              fit_increment_dump_filename=dump_file,
                              loss_fn=loss)
  
  filename = filename + '.out'

  dataset_store.download('convex')
  trainset,validset,testset = dataset_store.get_classification_problem('convex')

  X_train = trainset.data.mem_data[0]
  y_train = trainset.data.mem_data[1]
  
  X_valid = validset.data.mem_data[0]
  y_valid = validset.data.mem_data[1]
  
  X_test = testset.data.mem_data[0]
  y_test = testset.data.mem_data[1]

  X_fulltrain = np.concatenate((X_train, X_valid))
  y_fulltrain = np.concatenate((y_train, y_valid))

  print(y_train.shape)
  print(y_valid.shape)
  print(y_test.shape)
  
  #find_model( X_train, y_train, X_test, y_test, estim, filename )
  find_model( X_fulltrain, y_fulltrain, X_test, y_test, estim, filename )
def setUp():
    try:
        dataset_store.download('mnist_rotated_background_images')
    except:
        print 'Could not download the dataset : ', 'mnist_rotated_background_images'
        assert False
def setUp():
    try:
        dataset_store.download('binarized_mnist')
    except:
        print 'Could not download the dataset : ', 'binarized_mnist'
        assert False
Example #9
0
def setUp():
    try:
        dataset_store.download('letor_mq2007')
    except:
        print 'Could not download the dataset : ', 'letor_mq2007'
        assert False
def setUp():
    try:
        dataset_store.download('face_completion_lfw')
    except:
        print 'Could not download the dataset : ', 'face_completion_lfw'
        assert False
Example #11
0
def setUp():
    try:
        dataset_store.download("cadata")
    except:
        print "Could not download the dataset : ", "cadata"
        assert False
Example #12
0
def setUp():
    try:
        dataset_store.download('bibtex')
    except:
        print 'Could not download the dataset : ', 'bibtex'
        assert False
def setUp():
    try:
        dataset_store.download('occluded_faces_lfw')
    except:
        print 'Could not download the dataset : ', 'occluded_faces_lfw'
        assert False
Example #14
0
def setUp():
    try:
        dataset_store.download('newsgroups')
    except:
        print 'Could not download the dataset : ', 'newsgroups'
        assert False
Example #15
0
def setUp():
    try:
        dataset_store.download('housing')
    except:
        print 'Could not download the dataset : ', 'housing'
        assert False
Example #16
0
def setUp():
    try:
        dataset_store.download('medical')
    except:
        print 'Could not download the dataset : ', 'medical'
        assert False
Example #17
0
def setUp():
    try:
        dataset_store.download('ocr_letters')
    except:
        print 'Could not download the dataset : ', 'ocr_letters'
        assert False
import mlpython.datasets.store as store

store.download('adult')
store.download('connect4')
store.download('dna')
store.download('mushrooms')
store.download('nips')
store.download('ocr_letters')
store.download('rcv1')
store.download('web')
Example #19
0
def setUp():
    try:
        dataset_store.download('corrupted_ocr_letters')
    except:
        print 'Could not download the dataset : ', 'corrupted_ocr_letters'
        assert False
Example #20
0
def setUp():
    try:
        dataset_store.download('mnist_basic')
    except:
        print 'Could not download the dataset : ', 'mnist_basic'
        assert False
def setUp():
    try:
        dataset_store.download('mnist_background_random')
    except:
        print 'Could not download the dataset : ', 'mnist_background_random'
        assert False
Example #22
0
def setUp():
    try:
        dataset_store.download('connect4')
    except:
        print 'Could not download the dataset : ', 'connect4'
        assert False
Example #23
0
def setUp():
    try:
        dataset_store.download('housing')
    except:
        print 'Could not download the dataset : ', 'housing'
        assert False
Example #24
0
def setUp():
    try:
        dataset_store.download('rectangles')
    except:
        print 'Could not download the dataset : ', 'rectangles'
        assert False
Example #25
0
def setUp():
    try:
        dataset_store.download('newsgroups')
    except:
        print 'Could not download the dataset : ', 'newsgroups'
        assert False
Example #26
0
def setUp():
    try:
        dataset_store.download('mediamill')
    except:
        print 'Could not download the dataset : ', 'mediamill'
        assert False
Example #27
0
def setUp():
    try:
        dataset_store.download('rectangles')
    except:
        print 'Could not download the dataset : ', 'rectangles'
        assert False
Example #28
0
def setUp():
    try:
        dataset_store.download('adult')
    except:
        print 'Could not download the dataset : ', 'adult'
        assert False
Example #29
0
def setUp():
    try:
        dataset_store.download('occluded_mnist')
    except:
        print 'Could not download the dataset : ', 'occluded_mnist'
        assert False
Example #30
0
def setUp():
    try:
        dataset_store.download('mushrooms')
    except:
        print 'Could not download the dataset : ', 'mushrooms'
        assert False
Example #31
0
def setUp():
    try:
        dataset_store.download('connect4')
    except:
        print 'Could not download the dataset : ', 'connect4'
        assert False
Example #32
0
def setUp():
    try:
        dataset_store.download('letor_mq2008')
    except:
        print 'Could not download the dataset : ', 'letor_mq2008'
        assert False
Example #33
0
def setUp():
    try:
        dataset_store.download('face_completion_lfw')
    except:
        print 'Could not download the dataset : ', 'face_completion_lfw'
        assert False
Example #34
0
def setUp():
    try:
        dataset_store.download('mnist_basic')
    except:
        print 'Could not download the dataset : ', 'mnist_basic'
        assert False
Example #35
0
def setUp():
    try:
        dataset_store.download('abalone')
    except:
        print 'Could not download the dataset : ', 'abalone'
        assert False