Exemple #1
0
def experiment_with_parameters(ser_filename,
                               batch_sizes=[64],
                               learning_rates=[0.05],
                               optimizers=['Ftrl', 'RMSProp', 'Adam', 'Adagrad', 'SGD'],
                               class_weights=[[0.4,0.6], [0.6,0.4]]):
    '''
    Calculate and print accuracies for different combinations of hyper-paramters.
    '''
    # Load dataset
    train_data, train_targets, test_data, expected = Helper.unserialize(ser_filename)

    # Build Classifier
    for b_size in batch_sizes:
        for l_rate in learning_rates:
            for optimizer in optimizers:
                for class_weight in class_weights:
                    classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2,
                                                            steps=500, learning_rate=l_rate, batch_size=b_size,
                                                            optimizer=optimizer, class_weight=class_weight)
                    classifier.fit(train_data, train_targets)

                    # Assess
                    predictions = classifier.predict(test_data)
                    accuracy = metrics.accuracy_score(expected, predictions)
                    confusion_matrix = metrics.confusion_matrix(expected, predictions)
                    print('Accuracy for batch_size %.2d learn_rate %.3f Cost Function %s: %f' % (b_size, l_rate, optimizer, accuracy))
                    print("Confusion matrix:\n%s" % confusion_matrix)
Exemple #2
0
def run(featureRepresentation='image',
        glcm_distance=1,
        glcm_isMultidirectional=False):
    '''
    Apply a CNN on the grain_images dataset and print test accuracies.
    That is, train it on training data and test it on test data.
    '''
    train_data, train_targets, test_data, expected = Helper.extract_features_from_new_data(
        featureRepresentation,
        glcm_distance,
        glcm_isMultidirectional,
        train_size=0.5)
    Helper.serialize("../Datasets/grain_glcm_d1_a4_2_new.data",
                     (train_data, train_targets, test_data, expected))

    # Build Classifier
    classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model,
                                            n_classes=2,
                                            steps=500,
                                            learning_rate=0.05,
                                            batch_size=128)
    classifier.fit(train_data, train_targets)

    # Assess
    predictions = classifier.predict(test_data)
    accuracy = metrics.accuracy_score(expected, predictions)
    confusion_matrix = metrics.confusion_matrix(expected, predictions)
    print("Confusion matrix:\n%s" % confusion_matrix)
    print('Accuracy: %f' % accuracy)
Exemple #3
0
def instantiateModel(hyperparams):
    # We'll copy the same model from above
    def custom_model(X, y):
        #X = learn.ops.batch_normalize(X, scale_after_normalization=True) TODO possibly include this

        layers = learn.ops.dnn(X,
                               hyperparams['HIDDEN_UNITS'],
                               activation=hyperparams['ACTIVATION_FUNCTION'],
                               dropout=hyperparams['KEEP_PROB'])

        return learn.models.logistic_regression(layers, y)

    classifier = learn.TensorFlowEstimator(
        model_fn=custom_model,
        n_classes=y_classes,
        batch_size=hyperparams['BATCH_SIZE'],
        steps=hyperparams['STEPS'],
        optimizer=hyperparams['OPTIMIZER'],
        learning_rate=hyperparams['LEARNING_RATE'],
    )

    # We'll make a monitor so that we can implement early stopping based on our train accuracy. This will prevent overfitting.
    #monitor = learn.monitors.BaseMonitor(early_stopping_rounds=int(hyperparams['MAX_BAD_COUNT']),)#, print_steps=100)

    return classifier  # , #monitor
Exemple #4
0
def dnn(nn_lr=0.1, nn_steps=5000, hidden_units=[30, 30]):
    def tanh_dnn(X, y):
        features = skflow.ops.dnn(X, hidden_units=hidden_units,
          activation=tf.tanh)
        return skflow.models.linear_regression(features, y)

    regressor = skflow.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0,
        steps=nn_steps, learning_rate=nn_lr, batch_size=100)
    return regressor
Exemple #5
0
 def get_classifier(self,
                    n_classes,
                    batch_size=128,
                    learning_rate=0.1,
                    training_steps=10):
     self.classifier = learn.TensorFlowEstimator(
         model_fn=self.rnn_model,
         n_classes=n_classes,
         batch_size=batch_size,
         steps=training_steps,
         optimizer='SGD',
         learning_rate=learning_rate)
def main():
    images, labels, pokemon = load_images()
    pokemon_test = []
    print (labels)

    # Label encoder
    le = preprocessing.LabelEncoder()
    le.fit(labels)
    print (le.classes_)
    transformed_labels = le.transform(labels)
    print (transformed_labels)

    msk = np.random.rand(714) < 0.8
    print (msk)
    true_indexes = []
    false_indexes = []
    training_labels = []
    test_labels = []
    for idx, val in enumerate(msk):
        if val == 1:
            true_indexes.append(idx)
            training_labels.append(transformed_labels[idx])
        else:
            false_indexes.append(idx)
            test_labels.append(transformed_labels[idx])
            pokemon_test.append(pokemon[idx])

    training_set = np.delete(images, false_indexes, 0)
    test_set = np.delete(images, true_indexes, 0)


    reshaped_dataset = training_set.reshape(len(training_labels), 3072)
    reshaped_testset = test_set.reshape(len(test_labels), 3072)

    # Training and predicting.
    classifier = learn.TensorFlowEstimator(
        model_fn=conv_model, n_classes=17, batch_size=100, steps=20000,
        learning_rate=0.001, verbose=2)
    classifier.fit(reshaped_dataset, training_labels, logdir=os.getcwd() + 'model_20000b_logs')
    classifier.save(os.getcwd() + '/model_20000b')
    score = metrics.accuracy_score(
        test_labels, classifier.predict(reshaped_testset))
    print('Accuracy: {0:f}'.format(score))

    prediction_labels = classifier.predict(reshaped_testset)
    target_names=['Bug' 'Dark' 'Dragon' 'Electric' 'Fairy' 'Fighting' 'Fire' 'Ghost' 'Grass'
        'Ground' 'Ice' 'Normal' 'Poison' 'Psychic' 'Rock' 'Steel' 'Water']

    print (metrics.classification_report(test_labels, prediction_labels))
    print (test_labels)
    print (prediction_labels)
    print (pokemon_test)
Exemple #7
0
def get_model(filename=CLASSIFIER_FILE):
    ''' Get CNN classifier object from file or create one if none exists on file.'''
    if (filename == None):
        # Load dataset
        train_data, train_targets, test_data, expected = Helper.unserialize(
            "../Datasets/raw_new_80.data")
        train_data2, train_targets2, test_data2, expected2 = Helper.unserialize(
            "../Datasets/raw.data")

        train_data = np.concatenate((train_data, train_data2), axis=0)
        train_targets = np.concatenate((train_targets, train_targets2), axis=0)
        test_data = np.concatenate((test_data, test_data2), axis=0)
        expected = np.concatenate((expected, expected2), axis=0)
        print(train_data.shape)

        raw_train_data = np.zeros((train_data.shape[0], 20, 20))
        i = 0
        for item in train_data:
            raw_train_data[i] = item.reshape((20, 20))
            #Display.show_image(raw_train_data[i])
            i = i + 1

        raw_test_data = np.zeros((test_data.shape[0], 20, 20))
        i = 0
        for item in test_data:
            raw_test_data[i] = item.reshape((20, 20))
            #Display.show_image(raw_test_data[i])
            i = i + 1

        # Build Classifier
        # classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2,
        #                                         steps=500, learning_rate=0.05, batch_size=128)
        classifier = skflow.TensorFlowEstimator(model_fn=conv_model,
                                                n_classes=2,
                                                steps=500,
                                                learning_rate=0.05,
                                                batch_size=128,
                                                optimizer='Ftrl')
        classifier.fit(raw_train_data, train_targets)

        # Assess built classifier
        predictions = classifier.predict(raw_test_data)
        accuracy = metrics.accuracy_score(expected, predictions)
        confusion_matrix = metrics.confusion_matrix(expected, predictions)
        print("Confusion matrix:\n%s" % confusion_matrix)
        print('Accuracy: %f' % accuracy)

        return classifier
    else:
        serialized_classifier = Helper.unserialize(filename)
        return serialized_classifier
Exemple #8
0
def predict_nn(hist_data, data_to_predict, cl_data):
    
    np_hist_data, np_prd_data, np_classes_data = \
        prepare_data_for_nn(hist_data, data_to_predict, cl_data)
    
    nn = skflow.TensorFlowEstimator(model_fn=nn_model, n_classes=3)
    
    nn.fit(np_hist_data, np_classes_data, logdir = './log')
    
    score = metrics.accuracy_score(np_classes_data, nn.predict(np_hist_data))
    print("Accuracy NN: %f" % score)
      
    prd = nn.predict_proba(np_prd_data) 
    
    return link_perc_to_cl(prd, CLASSES_PRE)
Exemple #9
0
def run_with_dataset(ser_filename):
    '''
    Apply a CNN on a dataset and print test accuracies.
    That is, train it on training data and test it on test data.
    '''
    # Load dataset
    train_data, train_targets, test_data, expected = Helper.unserialize(ser_filename)

    # Build Classifier
    classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2,
                                            steps=500, learning_rate=0.05, batch_size=128)
    classifier.fit(train_data, train_targets)

    # Assess
    predictions = classifier.predict(test_data)
    accuracy = metrics.accuracy_score(expected, predictions)
    confusion_matrix = metrics.confusion_matrix(expected, predictions)
    print("Confusion matrix:\n%s" % confusion_matrix)
    print('Accuracy: %f' % (accuracy))
Exemple #10
0
def main(fx, scale):
    logdir = '../data/fx/ann/tensorboard_models/%s%s%s' % (
        scale,
        fx,
        time.strftime(time_format, time.localtime()))
    # Load dataset
    path_f_final = ['%s/%s_%s_f.npy' % (FILE_PREX, fx, scale),
                    '%s/%s_%s_t.pkl.npy' % (FILE_PREX, fx, scale)]
    path_f_in = '%s/%s_H.pkl' % (FILE_PREX, fx)
    pd_data = pd.read_pickle(path_f_in)['close']
    fx_max = max(pd_data)
    fx_min = min(pd_data)
    data = np.load(path_f_final[0])
    data_s = np.load(path_f_final[1])
    data_train = data[:data.shape[0] - num_test]
    data_test = data[data.shape[0] - num_test:]
    data_s_train = data_s[:data.shape[0] - num_test]
    data_s_test = data_s[data.shape[0] - num_test:]

    regressor = learn.TensorFlowEstimator(
        model_fn=my_model,
        n_classes=0, optimizer='SGD',
        batch_size=len(data_train), steps=20000,
        learning_rate=0.2)

    # Fit
    regressor.fit(data_train, data_s_train, logdir=logdir)

    # Predict and score
    prediction = regressor.predict(data_test)
    data = {'close_price': [i * (fx_max - fx_min) + fx_min for i in data_s_test],
            'predict': [i * (fx_max - fx_min) + fx_min for i in prediction]}
    frame = pd.DataFrame(data)
    frame.to_pickle('%s/%sprediction.pkl' % (logdir, fx))
    score1 = metrics.explained_variance_score(
        data_s_test, prediction)
    score2 = metrics.mean_absolute_error(
        data_s_test, prediction)
    print(score1, score2)
    return score1, score2
Exemple #11
0
def get_fund_classifier():

    sample_x, sample_y = load_training_data()

    MAX_DOCUMENT_LENGTH = 50
    EMBEDDING_SIZE = 200

    vocab_processor = learn.preprocessing.VocabularyProcessor(
        MAX_DOCUMENT_LENGTH)
    sample_x = np.array(list(vocab_processor.fit_transform(sample_x)))
    n_words = len(vocab_processor.vocabulary_)
    logger_fund.info('Size of data')
    logger_fund.info(sample_x.shape)
    logger_fund.info('Total words: %d' % n_words)

    def average_model(X, y):

        word_vectors = learn.ops.categorical_variable(
            X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        features = tf.reduce_max(word_vectors, reduction_indices=1)
        return learn.models.logistic_regression(features, y)

    def rnn_model(X, y):

        word_vectors = learn.ops.categorical_variable(
            X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        word_list = tf.unpack(word_vectors, axis=1)
        cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE)
        _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32)
        return learn.models.logistic_regression(encoding, y)

    classifier = learn.TensorFlowEstimator(model_fn=rnn_model,
                                           n_classes=2,
                                           continue_training=True,
                                           steps=1000,
                                           learning_rate=0.1,
                                           optimizer='Adagrad')
    classifier.fit(sample_x, sample_y)
    return vocab_processor, classifier
    save = pickle.load(f)
    X_valid = save['X']
    y_valid = save['y']
    X_valid = X_valid.reshape(-1, X_valid.shape[1], X_valid.shape[2], 1)
    print 'valid: X => ', X_valid.shape, 'y => ', y_valid.shape

# Restore model if graph is saved into a folder.
if os.path.exists("%s/graph.pbtxt" % (model_dir)):
    classifier = learn.TensorFlowEstimator.restore(model_dir)
    pred_valid = classifier.predict_proba(X_valid, batch_size=64)
    print pred_valid
else:
    # Create a new resnet classifier.
    classifier = learn.TensorFlowEstimator(model_fn=get_image_feature_small,
                                           n_classes=0,
                                           batch_size=100,
                                           steps=100,
                                           learning_rate=0.001,
                                           continue_training=True)

while True:
    # Train model and save summaries into logdir.
    classifier.fit(X, y, logdir=model_dir)
    # Save model graph and checkpoints.
    classifier.save(model_dir)

    # Calculate accuracy.
    pred_valid = classifier.predict_proba(X_valid, batch_size=5)
    print pred_valid
    print y_valid
    #print('Accuracy: {0:f}'.format(score))
Exemple #13
0
    # clf.fit(trainx, trainy)
    # joblib.dump(clf, os.path.join(os.path.split(os.path.realpath(__file__))[0], 'models/category.rnn.model'))

    # scores = cross_validation.cross_val_score(clf, trainx, trainy, scoring='precision', cv=5)
    # print MAX_DOCUMENT_LENGTH, EMBEDDING_SIZE, scores.mean(), scores

    # score = metrics.accuracy_score(trainy, clf.predict(trainx))
    # score = metrics.accuracy_score(testy, clf.predict(testx))
    # print('Accuracy: {0:f}'.format(score))

    parameters = {
        'learning_rate': [0.05, 0.1, 0.15],
        'steps': [500, 1000, 1200, 1500, 2000],
        'optimizer': ["Adam", "Adagrad"]
    }

    keys, values = parameters.keys(), parameters.values()
    cvscores = []
    for parameter in itertools.product(*values):
        ps = {keys[i]: parameter[i] for i in xrange(3)}
        clf = learn.TensorFlowEstimator(model_fn=rnn_model,
                                        n_classes=2,
                                        continue_training=True,
                                        **ps)
        clf.fit(trainx, trainy)
        score = metrics.accuracy_score(testy, clf.predict(testx))
        cvscores.append((ps, score))
    for cvscore in cvscores:
        print cvscore[0], cvscore[1]
    print 'best score'
    print sorted(cvscores, key=lambda x: x[1], reverse=True)[0]
Exemple #14
0
def train_cnn():
    steps = 1
    for i in SCALE:
        result_tmp0 = np.empty(0)
        result_tmp1 = np.empty(0)
        result_tmp2 = np.empty(0)
        # df = pd.DataFrame()
        for fx in FX_LIST:
            result_tmp3 = np.empty(0)
            # fs_t_path = ['%s/NFs/%s_%i.npy' % (FILE_PREX, fx, i),
            #              '%s/T/%s_%i.pkl' % (FILE_PREX, fx, i)]
            # fs = np.load(fs_t_path[0])
            # t = pd.read_pickle(fs_t_path[1])
            f_train = np.load('%s/NFs/%s_train_%i.npy' % (FILE_PREX, fx, i))
            f_test = np.load('%s/NFs/%s_test_%i.npy' % (FILE_PREX, fx, i))
            t_train = pd.read_pickle('%s/T/%s_train_%i.pkl' %
                                     (FILE_PREX, fx, i))
            t_test = pd.read_pickle('%s/T/%s_test_%i.pkl' % (FILE_PREX, fx, i))
            for optimizer in optimizers:
                start = time.strftime(time_format, time.localtime())
                print('%s start at %s.' % (fx, start))
                model = learn.TensorFlowEstimator(model_fn=conv_model,
                                                  n_classes=0,
                                                  batch_size=80,
                                                  steps=steps,
                                                  optimizer=optimizer,
                                                  learning_rate=0.001)
                logdir = '%s/tensorboard_models/exam/%s/%s' % (FILE_PREX,
                                                               optimizer, fx)
                # model.fit(fs[:-num_test],
                #           t['change'][:-num_test],
                #           logdir=logdir)
                model.fit(f_train, t_train['change'].values, logdir=logdir)
                model.save('%s/saves/exam/%s/%s' % (FILE_PREX, optimizer, fx))
                # prediction1 = model.predict(fs[-num_test:])
                # prediction2 = (prediction1 / 100 + 1) * \
                #     t['target_open'][-num_test:]
                # score0 = mean_absolute_percentage_error(
                #     t['real_target'][-num_test:].values, prediction2)
                # score1 = metrics.explained_variance_score(
                #     t['change'][-num_test:].values, prediction1)
                # score2 = metrics.mean_squared_error(
                #     t['real_target'][-num_test:].values, prediction2)
                prediction1 = model.predict(f_test)
                prediction2 = (prediction1 / 100 + 1) * \
                    t_test['target_open'].values
                score0 = mean_absolute_percentage_error(
                    t_test['real_target'].values, prediction2)
                score1 = metrics.explained_variance_score(
                    t_test['change'].values, prediction1)
                score2 = metrics.mean_squared_error(
                    t_test['real_target'].values, prediction2)
                result_tmp0 = np.append(result_tmp0, score0)
                print(result_tmp0)
                result_tmp1 = np.append(result_tmp1, score1)
                print(result_tmp1)
                result_tmp2 = np.append(result_tmp2, score2)
                print(result_tmp2)
                result_tmp3 = np.append(result_tmp3, prediction2)
                end = time.strftime(time_format, time.localtime())
                print('%s end at %s.' % (fx, end))
            result_tmp3 = pd.DataFrame(result_tmp3.reshape(
                -1, len(optimizers)),
                                       columns=optimizers)
            result_tmp3['real'] = t_test['real_target'].values
            result_tmp3.to_pickle('%s/pre_result/%s_%i,pkl' %
                                  (FILE_PREX, fx, i))
        result0 = pd.DataFrame(result_tmp0.reshape(-1, len(optimizers)),
                               index=FX_LIST,
                               columns=optimizers)
        print(result0)
        result1 = pd.DataFrame(result_tmp1.reshape(-1, len(optimizers)),
                               index=FX_LIST,
                               columns=optimizers)
        print(result1)
        result2 = pd.DataFrame(result_tmp2.reshape(-1, len(optimizers)),
                               index=FX_LIST,
                               columns=optimizers)
        print(result2)
        result0.to_pickle('%s/exam_mape_%i.pkl' % (FILE_PREX, i))
        result1.to_pickle('%s/exam_evs_%i.pkl' % (FILE_PREX, i))
        result2.to_pickle('%s/exam_mse_%i.pkl' % (FILE_PREX, i))
Exemple #15
0
    net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])

    return learn.models.logistic_regression(net, y)


# Download and load MNIST data.
mnist = input_data.read_data_sets('MNIST_data')

# Restore model if graph is saved into a folder.
if os.path.exists("models/resnet/graph.pbtxt"):
    classifier = learn.TensorFlowEstimator.restore("models/resnet/")
else:
    # Create a new resnet classifier.
    classifier = learn.TensorFlowEstimator(model_fn=res_net,
                                           n_classes=10,
                                           batch_size=100,
                                           steps=100,
                                           learning_rate=0.001,
                                           continue_training=True)

while True:
    # Train model and save summaries into logdir.
    classifier.fit(mnist.train.images,
                   mnist.train.labels,
                   logdir="models/resnet/")

    # Calculate accuracy.
    score = metrics.accuracy_score(
        mnist.test.labels, classifier.predict(mnist.test.images,
                                              batch_size=64))
    print('Accuracy: {0:f}'.format(score))
def rnn_model(X, y):
    """Recurrent neural network model to predict from sequence of words
    to a class."""
    # Convert indexes of words into embeddings.
    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = learn.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    # Split into list of embedding per word, while removing doc length dim.
    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
    word_list = learn.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
    cell = tf.nn.rnn_cell.GRUCell(EMBEDDING_SIZE)
    # Create an unrolled Recurrent Neural Networks to length of
    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
    _, encoding = tf.nn.rnn(cell, word_list, dtype=tf.float32)
    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
    # neural network of last step) and pass it as features for logistic
    # regression over output classes.
    return learn.models.logistic_regression(encoding, y)

classifier = learn.TensorFlowEstimator(model_fn=rnn_model, n_classes=15,
    steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True)

# Continuously train for 1000 steps & predict on test set.
while True:
    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
    print('Accuracy: {0:f}'.format(score))
Exemple #17
0
def train_all_models():
    columns = np.empty(0)
    steps = 20000  # !
    batch_size = 30
    learning_rate = 0.001
    result_tmp0 = np.empty(0)
    for i in SCALE:
        for fx in FX_LIST:
            result_tmp1 = pd.DataFrame()
            result_tmp2 = pd.DataFrame()
            for name in models:
                if i == SCALE[-1]:
                    columns = np.append(columns, '%s%s_MSE' % (fx, name))
                    columns = np.append(columns, '%s%s_MAPE' % (fx, name))
                    # columns = np.append(columns, '%s%s_EVS' % (fx, name))
                    columns = np.append(columns, '%s%s_R2' % (fx, name))
                    columns = np.append(columns, '%s%s_R2_R' % (fx, name))
                    # columns = np.append(columns, '%s%s_MAPE_C' % (fx, name))
                start = time.strftime(time_format, time.localtime())
                print('%s with %s for h=%i start at %s.' %
                      (fx, name, i, start))
                logdir = '%s/tensorboard_models/exam/%s/%s_%i' % (FILE_PREX,
                                                                  name, fx, i)
                if name == 'CNN':
                    f_train = np.load('%s/NFs/%s_train_%i.npy' %
                                      (FILE_PREX, fx, i))
                    f_test = np.load('%s/NFs/%s_test_%i.npy' %
                                     (FILE_PREX, fx, i))
                    f_plot = np.load('%s/NFs/%s_plot_%i.npy' %
                                     (FILE_PREX, fx, i))
                    t_train = pd.read_pickle('%s/T/%s_train_%i.pkl' %
                                             (FILE_PREX, fx, i))
                    t_test = pd.read_pickle('%s/T/%s_test_%i.pkl' %
                                            (FILE_PREX, fx, i))
                    t_plot = pd.read_pickle('%s/T/%s_plot_%i.pkl' %
                                            (FILE_PREX, fx, i))
                    model = learn.TensorFlowEstimator(
                        model_fn=conv_model,
                        n_classes=0,
                        batch_size=batch_size,
                        steps=steps,
                        optimizer='Adagrad',
                        learning_rate=learning_rate)
                    model.fit(f_train, t_train['change'].values, logdir=logdir)
                    model.save('%s/saves/exam/%s/%s_%i' %
                               (FILE_PREX, name, fx, i))
                else:
                    f_train = np.load('%s/NFs/%s_train_5_%i.npy' %
                                      (FILE_PREX, fx, i))
                    f_test = np.load('%s/NFs/%s_test_5_%i.npy' %
                                     (FILE_PREX, fx, i))
                    f_plot = np.load('%s/NFs/%s_plot_5_%i.npy' %
                                     (FILE_PREX, fx, i))
                    t_train = pd.read_pickle('%s/T/%s_train_5_%i.pkl' %
                                             (FILE_PREX, fx, i))
                    t_test = pd.read_pickle('%s/T/%s_test_5_%i.pkl' %
                                            (FILE_PREX, fx, i))
                    t_plot = pd.read_pickle('%s/T/%s_plot_5_%i.pkl' %
                                            (FILE_PREX, fx, i))
                    if name == 'ANN-10':
                        model = learn.TensorFlowEstimator(
                            model_fn=model10,
                            n_classes=0,
                            optimizer='Adagrad',
                            batch_size=batch_size,
                            steps=steps,
                            learning_rate=learning_rate)
                        model.fit(f_train,
                                  t_train['change'].values,
                                  logdir=logdir)
                        model.save('%s/saves/exam/%s/%s_%i' %
                                   (FILE_PREX, name, fx, i))
                    elif name == 'ANN-15':
                        model = learn.TensorFlowEstimator(
                            model_fn=model15,
                            n_classes=0,
                            optimizer='Adagrad',
                            batch_size=batch_size,
                            steps=steps,
                            learning_rate=learning_rate)
                        model.fit(f_train,
                                  t_train['change'].values,
                                  logdir=logdir)
                        model.save('%s/saves/exam/%s/%s_%i' %
                                   (FILE_PREX, name, fx, i))
                    elif name == 'ANN-20':
                        model = learn.TensorFlowEstimator(
                            model_fn=model20,
                            n_classes=0,
                            optimizer='Adagrad',
                            batch_size=batch_size,
                            steps=steps,
                            learning_rate=learning_rate)
                        model.fit(f_train,
                                  t_train['change'].values,
                                  logdir=logdir)
                        model.save('%s/saves/exam/%s/%s_%i' %
                                   (FILE_PREX, name, fx, i))
                    else:
                        model = svm.SVR()
                        model.fit(f_train, t_train['change'].values)
                prediction1 = model.predict(f_test)
                prediction2 = (prediction1 / 100 + 1) * \
                    t_test['target_open'].values
                prediction3 = model.predict(f_plot)
                prediction4 = (prediction3 / 100 + 1) * \
                    t_plot['target_open'].values
                score0 = metrics.mean_squared_error(
                    t_test['real_target'].values, prediction2)
                score1 = mean_absolute_percentage_error(
                    t_test['real_target'].values, prediction2)
                # score2 = metrics.explained_variance_score(
                #     t_test['real_target'].values, prediction2)
                score2 = metrics.r2_score(t_test['change'].values, prediction1)
                score3 = metrics.r2_score(t_test['real_target'].values,
                                          prediction2)
                result_tmp0 = np.append(result_tmp0, score0)
                result_tmp0 = np.append(result_tmp0, score1)
                result_tmp0 = np.append(result_tmp0, score2)
                result_tmp0 = np.append(result_tmp0, score3)
                result_tmp1['%s' % name] = prediction2
                result_tmp2['%s' % name] = prediction4
                end = time.strftime(time_format, time.localtime())
                print('%s with %s for h=%i end at %s.\
                    \nMSE: %f\nMAPE: %f\nR2: %f\nR2_R: %f' %
                      (fx, name, i, end, score0, score1, score2, score3))
            # result_tmp1 = pd.DataFrame(
            #     result_tmp1.reshape(len(models), -1), columns=models)
            result_tmp1['real'] = t_test['real_target'].values
            result_tmp1.to_pickle('%s/pre_result/%s_all_%i.pkl' %
                                  (FILE_PREX, fx, i))
            # result_tmp2 = pd.DataFrame(
            #     result_tmp2.reshape(-1, len(models)), columns=models)
            result_tmp2['real'] = t_plot['real_target'].values
            result_tmp2.to_pickle('%s/pre_result/%s_plot_%i.pkl' %
                                  (FILE_PREX, fx, i))
    result0 = pd.DataFrame(result_tmp0.reshape(-1, len(columns)),
                           index=SCALE,
                           columns=columns)
    result0.to_pickle('%s/exam_all.pkl' % FILE_PREX)
    decoding, _, sampling_decoding, _ = learn.ops.rnn_seq2seq(
        in_X, in_y, encoder_cell, decoder_cell)
    return learn.ops.sequence_classifier(decoding, out_y, sampling_decoding)


def get_language_model(hidden_size):
    """Returns a language model with given hidden size."""
    def language_model(X, y):
        inputs = learn.ops.one_hot_matrix(X, 256)
        inputs = learn.ops.split_squeeze(1, MAX_DOC_LENGTH, inputs)
        target = learn.ops.split_squeeze(1, MAX_DOC_LENGTH, y)
        encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(
            tf.nn.rnn_cell.GRUCell(hidden_size), 256)
        output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32)
        return learn.ops.sequence_classifier(output, target)

    return language_model


### Training model.

estimator = learn.TensorFlowEstimator(model_fn=get_language_model(HIDDEN_SIZE),
                                      n_classes=256,
                                      optimizer='Adam',
                                      learning_rate=0.01,
                                      steps=1000,
                                      batch_size=64,
                                      continue_training=True)

estimator.fit(X, y)
Exemple #19
0
X_test = np.array(list(char_processor.transform(X_test)))

### Models

HIDDEN_SIZE = 20


def char_rnn_model(X, y):
    byte_list = learn.ops.one_hot_matrix(X, loader.num_alpha)
    byte_list = learn.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
    cell = tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE)
    _, encoding = tf.nn.rnn(cell, byte_list, dtype=tf.float32)
    return learn.models.logistic_regression(encoding, y)


classifier = learn.TensorFlowEstimator(model_fn=char_rnn_model,
                                       n_classes=loader.num_hash,
                                       steps=100,
                                       optimizer='Adam',
                                       learning_rate=0.01,
                                       continue_training=True)

# Continuously train for 100 steps & predict on test set.

print("TRAIN")
while True:
    classifier.fit(X_train, y_train)
    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
    classifier.save('save/')
    print("Accuracy: %f" % score)
Exemple #20
0
# hyperparameters: these are adjustable and lead to different results
LOG_DIR = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
TIMESTEPS = 8
RNN_LAYERS = [150]
DENSE_LAYERS = None
TRAINING_STEPS = 5000
BATCH_SIZE = 100
PRINT_STEPS = TRAINING_STEPS / 10
LEARNING_RATE = 0.05

# TensorFlowEstimator does all the training work
regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, 
                                                          DENSE_LAYERS), 
                                      n_classes=0,
                                      verbose=1,  
                                      steps=TRAINING_STEPS, 
                                      optimizer='SGD',
                                      learning_rate=LEARNING_RATE, 
                                      batch_size=BATCH_SIZE,
                                      continue_training=True)

#read the data 
print("Reading CSV file...")
with open('pub.csv') as f:
    data = list(reader(f.read().splitlines()))

    # get output
    # for 'data.csv', standardized impressions are in column 5
    adOps = [float(i[5]) for i in data[1::]]
    tf.to_float(adOps, name='ToFloat') 
vocab_processor = learn.preprocessing.ByteProcessor(
    max_document_length=MAX_DOCUMENT_LENGTH)

x_iter = vocab_processor.transform(X_train)
y_iter = vocab_processor.transform(y_train)
xpred = np.array(list(vocab_processor.transform(X_test))[:20])
ygold = list(y_test)[:20]

PATH = '/tmp/tf_examples/ntm/'

if os.path.exists(PATH):
    translator = learn.TensorFlowEstimator.restore(PATH)
else:
    translator = learn.TensorFlowEstimator(model_fn=translate_model,
                                           n_classes=256,
                                           optimizer='Adam',
                                           learning_rate=0.01,
                                           batch_size=128,
                                           continue_training=True)

while True:
    translator.fit(x_iter, y_iter, logdir=PATH)
    translator.save(PATH)

    predictions = translator.predict(xpred, axis=2)
    xpred_inp = vocab_processor.reverse(xpred)
    text_outputs = vocab_processor.reverse(predictions)
    for inp_data, input_text, pred, output_text, gold in zip(
            xpred, xpred_inp, predictions, text_outputs, ygold):
        print('English: %s. French (pred): %s, French (gold): %s' %
              (input_text, output_text, gold.decode('utf-8')))
        print(inp_data, pred)
                                   bias=True,
                                   activation=tf.nn.relu)
        h_pool1 = max_pool_2x2(h_conv1)
    # second conv layer will compute 64 features for each 5x5 patch
    with tf.variable_scope('conv_layer2'):
        h_conv2 = learn.ops.conv2d(h_pool1,
                                   n_filters=64,
                                   filter_shape=[5, 5],
                                   bias=True,
                                   activation=tf.nn.relu)
        h_pool2 = max_pool_2x2(h_conv2)
        # reshape tensor into a batch of vectors
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    # densely connected layer with 1024 neurons
    h_fc1 = learn.ops.dnn(h_pool2_flat, [1024],
                          activation=tf.nn.relu,
                          dropout=0.5)
    return learn.models.logistic_regression(h_fc1, y)


# Training and predicting
classifier = learn.TensorFlowEstimator(model_fn=conv_model,
                                       n_classes=10,
                                       batch_size=100,
                                       steps=20000,
                                       learning_rate=0.001)
classifier.fit(mnist.train.images, mnist.train.labels)
score = metrics.accuracy_score(mnist.test.labels,
                               classifier.predict(mnist.test.images))
print('Accuracy: {0:f}'.format(score))
Exemple #23
0
LOG_DIR = os.path.join(os.getcwd(), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
TIMESTEPS = 80
RNN_LAYERS = [80]
DENSE_LAYERS = None
TRAINING_STEPS = 30000
BATCH_SIZE = 100
PRINT_STEPS = TRAINING_STEPS / 100

my_dir = os.sep.join([os.path.expanduser('~'), 'Desktop', 'sine'])

regressor = learn.TensorFlowEstimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, 
                                                          DENSE_LAYERS), 
                                      n_classes=0,
                                      verbose=2,  
                                      steps=TRAINING_STEPS, 
                                      optimizer='SGD',
                                      learning_rate=0.001, 
                                      batch_size=BATCH_SIZE,
                                      class_weight = [1])

#generate SINE WAVE data
X, y = generate_data(np.sin, np.linspace(0, 100, 5000), TIMESTEPS, seperate=False)
# create a lstm instance and validation monitor
validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'],
                                                      every_n_steps=PRINT_STEPS,
                                                      early_stopping_rounds=100000)

regressor.fit(X['train'], y['train'], monitors=[validation_monitor], logdir=LOG_DIR)

# based off training, get the predictions
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from sklearn import datasets, metrics, cross_validation
from tensorflow.contrib import learn

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
    test_size=0.2, random_state=42)

def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.1 probability."""
    layers = learn.ops.dnn(X, [10, 20, 10], dropout=0.1)
    return learn.models.logistic_regression(layers, y)

classifier = learn.TensorFlowEstimator(model_fn=my_model, n_classes=3,
    steps=1000)
classifier.fit(X_train, y_train)
score = metrics.accuracy_score(y_test, classifier.predict(X_test))
print('Accuracy: {0:f}'.format(score))
import tensorflow.contrib.learn as skflow
from sklearn import datasets, metrics, preprocessing
import numpy as np
import pandas as pd

df = pd.read_csv("data/CHD.csv", header=0)
print df.describe()


def my_model(X, y):
    return skflow.models.logistic_regression(X, y)


a = preprocessing.StandardScaler()

X = a.fit_transform(df['age'].astype(float))

print a.get_params()
classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=1)
classifier.fit(X, df['chd'].astype(float), logdir='/tmp/logistic')
print(classifier.get_tensor_value('logistic_regression/bias:0'))
print(classifier.get_tensor_value('logistic_regression/weight:0'))
score = metrics.accuracy_score(df['chd'].astype(float), classifier.predict(X))
print("Accuracy: %f" % score)
Exemple #26
0
### Embeddings

EMBEDDING_SIZE = 3


def categorical_model(X, y):
    features = skflow.ops.categorical_variable(X,
                                               n_classes,
                                               embedding_size=EMBEDDING_SIZE,
                                               name='embarked')
    return skflow.models.logistic_regression(tf.squeeze(features, [1]), y)


# features has shape (712, 1, 3)

classifier = skflow.TensorFlowEstimator(model_fn=categorical_model,
                                        n_classes=2)
classifier.fit(X_train, y_train)

print("Accuracy: {0}".format(
    metrics.accuracy_score(classifier.predict(X_test), y_test)))
print("ROC: {0}".format(
    metrics.roc_auc_score(classifier.predict(X_test), y_test)))

### One Hot


def one_hot_categorical_model(X, y):
    features = skflow.ops.one_hot_matrix(X, n_classes)
    return skflow.models.logistic_regression(tf.squeeze(features, [1]), y)

Exemple #27
0
                              dropout=0.7)
    with tf.variable_scope('LR_Layer'):
        o_linear = learn.models.linear_regression(h_fc1, y)
    return o_linear


time_format = '%Y%m%d%H%M'
result_tmp = np.empty(0)
num_test = 8496

if __name__ == '__main__':
    for fx in FX_LIST:
        for optimizer in optimizers:
            re = learn.TensorFlowEstimator(model_fn=conv_model,
                                           n_classes=0,
                                           batch_size=200,
                                           steps=20000,
                                           optimizer=optimizer,
                                           learning_rate=0.001)
            path_f_final = [
                '%s/%s_FINAL_M_new100.npy' % (FILE_PREX, fx),
                '%s/%s_FINAL_S_new100.pkl' % (FILE_PREX, fx)
            ]
            data = np.load(path_f_final[0])
            data_s = pd.read_pickle(path_f_final[1])
            range_price = data_s['max_price'] - data_s['min_price']
            data = np.array([
                (data[i] - data_s['min_price'][i]) / range_price[i]
                for i in range(data.shape[0])
            ])
            data_train = data[:data.shape[0] - num_test]
            data_test = data[data.shape[0] - num_test:]
Exemple #28
0
import tensorflow.contrib.learn as skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = skflow.TensorFlowEstimator(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
print("Accuracy: %f" % score)
Exemple #29
0
# ## Parameter definitions
#
# - LOG_DIR: log file
# - TIMESTEPS: RNN time steps
# - RNN_LAYERS: RNN layer 정보
# - DENSE_LAYERS: DNN 크기 [10, 10]: Two dense layer with 10 hidden units
# - TRAINING_STEPS: 학습 스텝
# - BATCH_SIZE: 배치 학습 크기
# - PRINT_STEPS: 학습 과정 중간 출력 단계 (전체의 1% 해당하는 구간마다 출력)

# In[15]:

regressor = learn.TensorFlowEstimator(model_fn=lstm_model(
    TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
                                      n_classes=0,
                                      verbose=1,
                                      steps=TRAINING_STEPS,
                                      optimizer='Adagrad',
                                      learning_rate=0.03,
                                      batch_size=BATCH_SIZE)

# ## Create a regressor with TF Learn
#
# : 예측을 위한 모델 생성. TF learn 라이브러리에 제공되는 TensorFlowEstimator를 사용.
#
# **Parameters**:
#
# - model_fn: 학습 및 예측에 사용할 모델
# - n_classes: label에 해당하는 클래스 수 (0: prediction, 1이상: classification) 확인필요
# - verbose: 과정 출력
# - steps: 학습 스텝
# - optimizer: 최적화 기법 ("SGD", "Adam", "Adagrad")
Exemple #30
0
# Create random dataset.
rng = np.random.RandomState(1)
X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T

# Fit regression DNN models.
regressors = []
options = [[2], [10, 10], [20, 20]]
for hidden_units in options:
    def tanh_dnn(X, y):
        features = learn.ops.dnn(X, hidden_units=hidden_units,
          activation=learn.tf.tanh)
        return learn.models.linear_regression(features, y)

    regressor = learn.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0,
        steps=500, learning_rate=0.1, batch_size=100)
    regressor.fit(X, y)
    score = mean_squared_error(regressor.predict(X), y)
    print("Mean Squared Error for {0}: {1:f}".format(str(hidden_units), score))
    regressors.append(regressor)

# Predict on new random Xs.
X_test = np.arange(-100.0, 100.0, 0.1)[:, np.newaxis]
y_1 = regressors[0].predict(X_test)
y_2 = regressors[1].predict(X_test)
y_3 = regressors[2].predict(X_test)

# Plot the results
plt.figure()
plt.scatter(y[:, 0], y[:, 1], c="k", label="data")
plt.scatter(y_1[:, 0], y_1[:, 1], c="g",