コード例 #1
0
def preprocess_data():
    # GET DATA
    data = pd.read_csv("data/StockTwits_SPY_Sentiment_2017.gz",
                       encoding="utf-8",
                       compression="gzip",
                       index_col=0)

    # GET MESSAGES AND VALUS
    messages = data.message.values
    labels = data.sentiment.values

    messages = np.array(
        [utl.preprocess_ST_message(message) for message in messages])

    full_lexicon = " ".join(messages).split()
    vocab_to_int, int_to_vocab = utl.create_lookup_tables(full_lexicon)

    messages_lens = Counter([len(x) for x in messages])
    print("Zero-length messages: {}".format(messages_lens[0]))
    print("Maximum message length: {}".format(max(messages_lens)))
    print("Average message length: {}".format(
        np.mean([len(x) for x in messages])))

    messages, labels = utl.drop_empty_messages(messages, labels)

    messages = utl.encode_ST_messages(messages, vocab_to_int)
    labels = utl.encode_ST_labels(labels)

    messages = utl.zero_pad_messages(messages, seq_len=244)

    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
        messages, labels, split_frac=0.80)
    return train_x, val_x, test_x, train_y, val_y, test_y, vocab_to_int
コード例 #2
0
def main(nlp, glove_dir):
    """Filter out sentences that are too short to be meaningful or far longer
    than the rest of our data.

    Parameters
    -----------
    nlp: spacy.lang.en.English
        Spacy parser used for tokenization.
    glove_dir: str
        Location to load glove vectors from.
    """
    # Load and split data.
    dtypes = dict(text=object, sex='category', age=np.int8)
    df = pd.read_csv('data/sentences.csv', dtype=dtypes, usecols=dtypes.keys())
    df['sex'] = (df.sex == 'male') * 1
    lengths = df.text.str.split().str.len()
    df = df[(lengths >= 5) & (lengths <= 50)]
    data = train_val_test_split(df.text,
                                df[['sex', 'age']],
                                train_p=.99,
                                val_p=.005,
                                state=1,
                                shuffle=True)
    # Order: x_train, x_val, x_test, y_train, y_val, y_test
    save_pickle(data, 'split_data')

    # w2count, w2idx, i2w, and w2vec will be pickled for easy access.
    build_word_mappings(data[0], nlp, glove_dir)
コード例 #3
0
def model(labels, data, go_id):
    # set parameters:
    # Embedding
    # Convolution
    nb_conv = 7
    nb_filter = 64
    nb_pool = 2

    # Training
    batch_size = 30
    nb_epoch = 12

    train, val, test = train_val_test_split(labels,
                                            data,
                                            batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test
    test_label_rep = test_label

    train_data = train_data.reshape(train_data.shape[0], 1, 500, 20)
    test_data = test_data.reshape(test_data.shape[0], 1, 500, 20)
    val_data = val_data.reshape(val_data.shape[0], 1, 500, 20)
    model = Sequential()
    model.add(
        Convolution2D(96,
                      nb_conv,
                      1,
                      border_mode='valid',
                      input_shape=(1, 500, 20)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter, 3, 1, border_mode='valid'))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  class_mode='binary')

    model.fit(X=train_data,
              y=train_label,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              show_accuracy=True,
              verbose=1,
              validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #4
0
def model(labels, data, go_id):

    batch_size = 64
    nb_classes = 2
    nb_epoch = 1

    train, val, test = train_val_test_split(labels,
                                            data,
                                            batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test
    test_label_rep = test_label

    train_data = train_data.reshape(-1, 500 * 20)
    test_data = test_data.reshape(-1, 500 * 20)
    val_data = val_data.reshape(-1, 500 * 20)
    # convert class vectors to binary class matrices
    enc_wt = []
    #creating the autoencoder
    ae = Sequential()
    encoder = containers.Sequential([Dense(5000, input_dim=10000), Dense(100)])
    decoder = containers.Sequential([Dense(5000, input_dim=100), Dense(10000)])
    ae.add(
        AutoEncoder(encoder=encoder,
                    decoder=decoder,
                    output_reconstruction=True))

    ae.compile(loss='mean_squared_error', optimizer='rmsprop')
    ae.fit(train_data,
           train_data,
           batch_size=batch_size,
           nb_epoch=nb_epoch,
           show_accuracy=False,
           verbose=1,
           validation_data=[val_data, val_data])

    model = Sequential()
    model.add(encoder)
    model.add(Dense(100, nb_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    score = model.evaluate(val_data, val_label, show_accuracy=True, verbose=0)
    print('Test score before fine turning:', score[0])
    print('Test accuracy after fine turning:', score[1])

    model.fit(train_data,
              train_label,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              show_accuracy=True,
              validation_data=(val_data, val_label))
    score = model.evaluate(val_data, val_label, show_accuracy=True, verbose=0)
    print('Test score after fine turning:', score[0])
    print('Test accuracy after fine turning:', score[1])
コード例 #5
0
def main():
    utils.download(REVIEWS_URL, REVIEWS_FILE)
    utils.download(LABELS_URL, LABELS_FILE)
    features, labels, N_WORDS = utils.preprocess(REVIEWS_FILE, LABELS_FILE)
    train_x, train_y, val_x, val_y, test_x, test_y = utils.train_val_test_split(
        features, labels, 0.8)
    model = RnnSentiment(BATCH_SIZE, EMBED_SIZE, N_WORDS, LSTM_LAYERS,
                         LSTM_SIZE, LEARNING_RATE, EPOCHS)
    model.build()
    model.train(train_x, train_y, val_x, val_y, KEEP_PROB)
    model.test(test_x, test_y)
コード例 #6
0
def split(args):
    graph_file = '../graph2gauss/data/%s.npz' % (args.name)
    A, X, labels, val_edges, val_ground_truth, test_edges, test_ground_truth = train_val_test_split(graph_file, p_val=args.p_val, p_test=args.p_test)
    np.savez('data/%s/%s_train.npz' % (args.name, args.name), adj_data=A.data, adj_indices=A.indices,
             adj_indptr=A.indptr, adj_shape=A.shape, attr_data=X.data, attr_indices=X.indices,
             attr_indptr=X.indptr, attr_shape=X.shape, labels=labels, val_edges=val_edges,
             val_ground_truth=val_ground_truth, test_edges=test_edges,
             test_ground_truth=test_ground_truth)
    # test_edges = np.vstack((data_loader.test_edges.T, data_loader.test_ground_truth)).T.astype(np.int32)
    # np.savez('data/%s/%s_train_test.npz' % (args.name, args.name), train_edges=test_edges, test_edges=test_edges)
    print('%s train data saved' % args.name)
コード例 #7
0
def model(labels, data, go_id):
    # set parameters:
    # Embedding
    # Convolution
    nb_conv = 7
    nb_filter = 64
    nb_pool = 2

  
    # Training
    batch_size = 30
    nb_epoch = 12

    train, val, test = train_val_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test
    test_label_rep = test_label
    
    train_data = train_data.reshape(train_data.shape[0], 1, 500, 20)
    test_data = test_data.reshape(test_data.shape[0], 1, 500, 20)
    val_data = val_data.reshape(val_data.shape[0], 1, 500, 20)
    model = Sequential()
    model.add(Convolution2D(96, nb_conv, 1,
                        border_mode='valid',
                        input_shape=(1, 500, 20)))
    model.add(Activation('relu'))
    model.add(Convolution2D(nb_filter, 3, 1,
                        border_mode='valid'))
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(
        loss='binary_crossentropy', optimizer='adam', class_mode='binary')

    model.fit(
        X=train_data, y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #8
0
def model(labels, data, go_id):

	batch_size = 64
	nb_classes = 2
	nb_epoch = 1

 	train, val, test = train_val_test_split(
        	labels, data, batch_size=batch_size)
    	train_label, train_data = train

    	val_label, val_data = val
    	test_label, test_data = test
    	test_label_rep = test_label

	train_data = train_data.reshape(-1, 500*20)
	test_data = test_data.reshape(-1, 500*20)
        val_data= val_data.reshape(-1,500*20)
# convert class vectors to binary class matrices
        enc_wt=[]
#creating the autoencoder
	ae = Sequential()
        encoder = containers.Sequential([Dense(5000, input_dim=10000), Dense(100)])
	decoder = containers.Sequential([Dense(5000, input_dim=100), Dense(10000)])
	ae.add(AutoEncoder(encoder=encoder, decoder=decoder,
                   output_reconstruction=True))

	ae.compile(loss='mean_squared_error', optimizer='rmsprop')
	ae.fit(train_data, train_data, batch_size=batch_size, nb_epoch=nb_epoch,
       		show_accuracy=False, verbose=1, validation_data=[val_data, val_data])

	model = Sequential()
	model.add(encoder)
	model.add(Dense(100, nb_classes, activation='softmax'))
	model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
	
	score = model.evaluate(val_data, val_label, show_accuracy=True, verbose=0)
	print('Test score before fine turning:', score[0])
	print('Test accuracy after fine turning:', score[1])
	
	model.fit(train_data, train_label, batch_size=batch_size, nb_epoch=nb_epoch,
          show_accuracy=True, validation_data=(val_data, val_label))
	score = model.evaluate(val_data, val_label, show_accuracy=True, verbose=0)
	print('Test score after fine turning:', score[0])
	print('Test accuracy after fine turning:', score[1])
コード例 #9
0
def model(labels, data, go_id):
    # set parameters:
    max_features = 5000
    batch_size = 16
    nb_epoch = 12
    maxlen = 500
    train, val, test = train_val_test_split(labels,
                                            data,
                                            batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test

    test_label_rep = test_label

    model = Sequential()
    model.add(Embedding(max_features, 8, mask_zero=True))
    model.add(LSTM(8, 8))
    model.add(Dropout(0.5))
    model.add(Dense(8, 1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  class_mode='binary')

    model.fit(X=train_data,
              y=train_label,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              show_accuracy=True,
              verbose=1,
              validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #10
0
def split_dataset(selected_data):
    # splitting the seleted_data into trainset, valset, testset, and normalization
    train_size = 0.74
    val_size = 0.13
    test_size = 0.13

    X_train, X_val, X_test, y_train, y_val, y_test = utils.train_val_test_split(
        selected_data,
        train_size,
        val_size,
        test_size,
        random_state=0,
        time_factors=False)
    # minmax preprocessing
    min_max_scaler = MinMaxScaler()
    min_max_scaler.fit(X_train)
    X_train = min_max_scaler.transform(X_train)
    X_val = min_max_scaler.transform(X_val)
    X_test = min_max_scaler.transform(X_test)

    return X_train, X_val, X_test, y_train, y_val, y_test
コード例 #11
0
def model(labels, data, go_id):
    # set parameters:
    max_features = 5000
    batch_size = 16
    nb_epoch = 12
    maxlen = 500
    train, val, test = train_val_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test

    test_label_rep = test_label

    model = Sequential()
    model.add(Embedding(
        max_features, 8, mask_zero=True))
    model.add(LSTM(8, 8))
    model.add(Dropout(0.5))
    model.add(Dense(8, 1))
    model.add(Activation('sigmoid'))

    model.compile(
        loss='binary_crossentropy', optimizer='adam', class_mode='binary')

    model.fit(
        X=train_data, y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #12
0
def main(args):
    X, y = get_appro_syn_data(args.n_samples)
    mean, std = np.mean(y), np.std(y)
    min_y = mean-std
    max_y = mean+std 

    X_train, X_val, X_test, y_train, y_val, y_test = \
        train_val_test_split(X, y, val_size=args.val_size, test_size=args.test_size, random_state=args.seed)

    X_train_noise = corrupt_X(X_train, args.p)
    X_val_noise = corrupt_X(X_val, args.p)

    X_train = np2tensor(X_train)
    X_train_noise = np2tensor(X_train_noise)
    X_val = np2tensor(X_val)
    X_val_noise = np2tensor(X_val_noise)
    X_test = np2tensor(X_test)
    y_train = np2tensor(y_train)
    y_val = np2tensor(y_val)
    y_test = np2tensor(y_test)

    model = Model(args.hid1, args.hid2)

    if torch.cuda.is_available():
        model = model.cuda()

    optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wdc) 
    
    best_val = 1e20 
    result = None 

    for epoch in tqdm(range(args.n_epochs)):
        #TODO: Mini-batch training
        model.train()
        optimizer.zero_grad()
        y_pred = model(X_train_noise)

        loss = rmse(y_pred,y_train)

        if args.adapt:
            adapt_loss =  appro_loss(y_pred, min_y, max_y)
        else:
            adapt_loss = 0

        total_loss = loss + args.lam * adapt_loss

        total_loss.backward()
        optimizer.step()

        model.eval()
        
        if args.noise_free:
            y_pred = model(X_val)
        else:
            y_pred = model(X_val_noise)

        val_loss = rmse(y_pred,y_val).item()
        if val_loss < best_val:
            best_val = val_loss
            y_pred = model(X_test)
            result = rmse(y_pred,y_test).item()

        if args.debug:
            print("Epoch ", epoch, total_loss.item(), val_loss, result)

    print(result)
コード例 #13
0
def model(labels, data, go_id):
    # set parameters:
    max_features = 60000
    batch_size = 256
    embedding_dims = 100
    nb_filters = 250
    hidden_dims = 250
    nb_epoch = 12

    # pool lengths
    pool_length = 2
    # level of convolution to perform
    filter_length = 3

    # length of APAAC
    maxlen = 20 + 6 * LAMBDA

    train, val, test = train_val_test_split(
        labels, data, batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test

    test_label_rep = test_label

    model = Sequential()
    model.add(Embedding(max_features, embedding_dims))
    model.add(Dropout(0.25))
    model.add(Convolution1D(
        input_dim=embedding_dims,
        nb_filter=nb_filters,
        filter_length=filter_length,
        border_mode='valid',
        activation='relu',
        subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length))
    model.add(Flatten())
    output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2
    model.add(Dense(output_size, hidden_dims))
    model.add(Dropout(0.25))
    model.add(Activation('relu'))
    model.add(Dense(hidden_dims, 1))
    model.add(Activation('sigmoid'))
    model.compile(
        loss='binary_crossentropy', optimizer='adam', class_mode='binary')
    weights_train = [1.0 if y == 1 else 1.0 for y in train_label]
    model.fit(
        X=train_data, y=train_label,
        batch_size=batch_size, nb_epoch=nb_epoch,
        show_accuracy=True, verbose=1,
        validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    score = model.evaluate(
        test_data, test_label,
        batch_size=batch_size, verbose=1, show_accuracy=True)
    print "Loss:", score[0], "Accuracy:", score[1]
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #14
0
def main():
    # Option Parser
    if (len(sys.argv) <= 1):
        print("memopad.py -h or --help to get guideline of input options")
        exit()
    use = "Usage: %prog [options] filename"
    parser = OptionParser(usage=use)
    parser.add_option("-d",
                      "--input-dir",
                      dest="input_dir",
                      action="store",
                      type="string",
                      help="input data dir")
    parser.add_option("-t",
                      "--timesteps",
                      dest="timesteps",
                      action="store",
                      type="int",
                      help="timesteps")
    parser.add_option("-n",
                      "--num-input",
                      dest="num_input",
                      action="store",
                      type="int",
                      help="number of input (input vector's width)")

    (options, args) = parser.parse_args()
    input_dir = options.input_dir
    timesteps = options.timesteps
    num_input = options.num_input

    X = np.fromfile(input_dir + '/X.dat', dtype=float)
    cardinality = int(X.shape[0] / (timesteps * num_input))
    X = X.reshape([cardinality, timesteps, num_input])
    Y = np.fromfile(input_dir + '/Y.dat', dtype=float)
    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
        X, Y, split_frac=0.80)
    #print("Data Set Size")
    #print("Train set: \t\t{}".format(train_x.shape),
    #      "\nValidation set: \t{}".format(val_x.shape),
    #      "\nTest set: \t\t{}".format(test_x.shape))

    # In[ ]:

    # Training Parameters
    learning_rate = 0.001
    epochs = 30
    batch_size = 20
    #display_step = 200

    # Network Parameters
    #num_input = 2
    #timesteps = 480
    #num_hidden = 1024
    num_classes = 1

    print("### Network Parameters ###")
    print("Learning Rate: {}".format(learning_rate))
    print("Batch Size: {}".format(batch_size))
    #print("Size of Hidden Layer: {}".format(num_hidden))
    print("Timestep: {}".format(timesteps))
    print("------------------")
    X_ = tf.placeholder("float", [None, timesteps, num_input])
    Y_ = tf.placeholder("float", [None, num_classes])
    lr = tf.placeholder("float")
    keep_prob_ = tf.placeholder(tf.float32, name='keep')
    # (batch, 480, 2) -> (batch, 240, 18)
    conv1 = tf.layers.conv1d(inputs=X_,
                             filters=18,
                             kernel_size=4,
                             strides=1,
                             padding='same',
                             activation=tf.nn.relu)
    max_pool_1 = tf.layers.max_pooling1d(inputs=conv1,
                                         pool_size=2,
                                         strides=2,
                                         padding='same')
    # (batch, 240, 18) -> (batch, 120, 36)
    conv2 = tf.layers.conv1d(inputs=max_pool_1,
                             filters=36,
                             kernel_size=2,
                             strides=1,
                             padding='same',
                             activation=tf.nn.relu)
    max_pool_2 = tf.layers.max_pooling1d(inputs=conv2,
                                         pool_size=2,
                                         strides=2,
                                         padding='same')
    # (batch, 120, 36) -> (batch, 60, 24)
    conv3 = tf.layers.conv1d(inputs=max_pool_2,
                             filters=24,
                             kernel_size=2,
                             strides=1,
                             padding='same',
                             activation=tf.nn.relu)
    max_pool_3 = tf.layers.max_pooling1d(inputs=conv3,
                                         pool_size=2,
                                         strides=2,
                                         padding='same')

    # Flatten and dropout
    flat = tf.reshape(max_pool_3, (-1, 60 * 24))
    flat = tf.nn.dropout(flat, keep_prob=keep_prob_)

    # Prediction
    prediction = tf.layers.dense(flat, num_classes)
    loss_op = tf.losses.mean_squared_error(Y_, prediction)
    #optimizer = tf.train.AdadeltaOptimizer(lr).minimize(loss_op)
    optimizer = tf.train.AdamOptimizer(lr).minimize(loss_op)

    correct_pred = tf.equal(
        tf.cast((prediction / 1.8) - tf.round(prediction / 1.8), tf.float32),
        tf.cast((prediction / 1.8) - tf.round(Y_ / 1.8), tf.float32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    init = tf.global_variables_initializer()

    with tf.Session() as sess:

        # Run the initializer
        sess.run(init)

        n_batches = len(train_x) // batch_size

        for e in range(epochs):
            if epochs % 10 == 0:
                learning_rate = learning_rate * 0.9
            train_acc = []
            for ii, (x, y) in enumerate(
                    utl.get_batches(train_x, train_y, batch_size), 1):
                x = x.reshape((batch_size, timesteps, num_input))
                feed = {
                    X_: x,
                    Y_: y[:, None],
                    lr: learning_rate,
                    keep_prob_: 0.75
                }
                loss, acc = sess.run([loss_op, accuracy], feed_dict=feed)
                train_acc.append(acc)

                if (ii + 1) % n_batches == 0:
                    val_acc = []
                    for xx, yy in utl.get_batches(val_x, val_y, batch_size):
                        xx = xx.reshape((batch_size, timesteps, num_input))
                        feed = {
                            X_: xx,
                            Y_: yy[:, None],
                            lr: learning_rate,
                            keep_prob_: 1
                        }
                        val_batch_acc = sess.run([accuracy], feed_dict=feed)
                        val_acc.append(val_batch_acc)

                    print(
                        "Epoch: {}/{}...".format(e + 1, epochs),
                        "Batch: {}/{}...".format(ii + 1, n_batches),
                        "Train Loss: {:.3f}...".format(loss),
                        "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                        "Val Accuracy: {:.3f}".format(np.mean(val_acc)))

        # Calculate accuracy for 128 mnist test images
        #test_len = 128
        test_data = test_x.reshape((-1, timesteps, num_input))
        test_label = test_y
        print(
            "Testing Accuracy:",
            sess.run(accuracy,
                     feed_dict={
                         X_: test_data,
                         Y_: test_label[:, None],
                         lr: learning_rate,
                         keep_prob_: 1
                     }))
コード例 #15
0
ファイル: fc_train.py プロジェクト: dongulee/thydys
import pandas as pd
import numpy as np
import subprocess
import tensorflow as tf
import utils as utl
#from collections import Counter

import sys
#sys.stdin.encoding
#tf.disable_v2_behavior()

# In[2]:

X = np.fromfile('X.dat', dtype=float).reshape([96, 14400])
Y = np.fromfile('Y.dat', dtype=float)
train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
    X, Y, split_frac=0.80)
#print("Data Set Size")
#print("Train set: \t\t{}".format(train_x.shape),
#      "\nValidation set: \t{}".format(val_x.shape),
#      "\nTest set: \t\t{}".format(test_x.shape))

# In[ ]:

# Training Parameters
learning_rate = 0.1
epochs = 30
batch_size = 10
display_step = 200

# Network Parameters
num_input = 2
コード例 #16
0
ファイル: infer.py プロジェクト: dongulee/thydys
def main():
    if (len(sys.argv) <= 1):
        print("infer.py -h or --help to get guideline of input options")
        exit()
    use = "Usage: %prog [options] filename"
    parser = OptionParser(usage = use)
    parser.add_option("-d", "--input-dir", dest="input_dir", action="store", type="string", help="input data dir")
    parser.add_option("-t", "--timesteps", dest="timesteps", action="store", type="int", help="timesteps")
    parser.add_option("-n", "--num-input", dest="num_input", action="store", type="int", help="number of input (input vector's width)")
    parser.add_option("-c", "--ckpt-dir", dest="ckpt_dir", action="store", type="string", help="directory of checkpoint")

    (options, args) = parser.parse_args()
    input_dir = options.input_dir
    timesteps = options.timesteps
    num_input = options.num_input
    #ckpt_dir = options.ckpt_dir

    X = np.fromfile(input_dir + '/X.dat', dtype=float)
    cardinality = int(X.shape[0]/(timesteps * num_input))
    X = X.reshape([cardinality, timesteps, num_input])
    Y = np.fromfile(input_dir + '/Y.dat', dtype=float)
    

    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(X, Y, split_frac=0.80)
     
    # Training Parameters
    learning_rate = 0.001
    epochs =800 
    batch_size = 40
    #display_step = 200
    
    # Network Parameters
    #num_input = 2 
    #timesteps = 480 
    num_hidden = 2048 
    num_classes = 1
   
    print("### Network Parameters ###")
    print("Learning Rate: {}".format(learning_rate))
    print("Batch Size: {}".format(batch_size))
    print("Size of Hidden Layer: {}".format(num_hidden))
    print("Timestep: {}".format(timesteps)) 
    print("------------------")
    X_ = tf.placeholder("float", [None, timesteps, num_input])
    Y_ = tf.placeholder("float", [None, num_classes])
    lr = tf.placeholder("float")
    
    weights = {
        'out':tf.Variable(tf.random_normal([num_hidden,num_classes])),
    }
    biases = {
        'out':tf.Variable(tf.random_normal([num_classes]))
    }
    prediction = RNN(X_, weights, biases, timesteps, num_hidden)
    
    loss_op = tf.losses.mean_squared_error(Y_, prediction)
    #optimizer = tf.train.AdadeltaOptimizer(lr).minimize(loss_op)
    #optimizer = tf.train.AdamOptimizer(lr).minimize(loss_op)
    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_op)
     
    correct_pred = tf.equal(tf.cast( (prediction/1.8) - tf.round(prediction/1.8), tf.float32), tf.cast( (prediction/1.8)-tf.round(Y_/1.8), tf.float32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    # Restore the ckpt
    SAVER_DIR = options.ckpt_dir 
    saver = tf.train.Saver()
    checkpoint_path = os.path.join(SAVER_DIR, SAVER_DIR)
    ckpt = tf.train.get_checkpoint_state(SAVER_DIR)
    
    
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        #new_saver = tf.train.import_meta_graph('ckpt.meta')
        saver.restore(sess, ckpt.model_checkpoint_path) 
        test_norm = utl.minmax_norm(test_x)
        print("loss test: %f" % loss_op.eval(feed_dict = {X_:test_norm, Y_:test_y[:, None]}))

        X_norm = utl.minmax_norm(X)
        pred = np.array(prediction.eval(feed_dict = {X_:X_norm, Y_:Y[:, None]}))
        
        pred_diagnosis = [1 if x[0]>=1.8 else 0 for x in list(pred)]
        y_diagnosis = [1 if x>=1.8 else 0 for x in list(Y)]
        evaluation = np.equal(pred_diagnosis, y_diagnosis)
        print(np.mean(evaluation))
        f = open(SAVER_DIR + '/result.txt', 'w')
        for i in range(0, len(Y)):
            f.write(str(pred[i][0]) + ', ' + str(Y[i])+'\n')
        f2 = open(SAVER_DIR + '/result_diagnosis.txt', 'w')
        for i in range(0, len(Y)):
            f2.write(str(pred_diagnosis[i]) + ', ' + str(y_diagnosis[i])+'\n')
        f2.close()
        f.close()
コード例 #17
0
def main():
    # Option Parser
    if (len(sys.argv) <= 1):
        print("train.py -h or --help to get guideline of input options")
        exit()
    use = "Usage: %prog [options] filename"
    parser = OptionParser(usage = use)
    parser.add_option("-d", "--input-dir", dest="input_dir", action="store", type="string", help="input data dir")
    parser.add_option("-o", "--output-dir", dest="ckpt_dir", action="store", type="string", help="ckpt data dir")
    parser.add_option("-t", "--timesteps", dest="timesteps", action="store", type="int", help="timesteps")
    parser.add_option("-n", "--num-input", dest="num_input", action="store", type="int", help="number of input (input vector's width)")

    (options, args) = parser.parse_args()
    input_dir = options.input_dir
    timesteps = options.timesteps
    num_input = options.num_input
    ckpt_dir = options.ckpt_dir
    len_status = 4
    # Training Parameters
    learning_rate = 0.001
    epochs =130
    batch_size = 40
    #display_step = 200
    
    # Network Parameters
    #num_input = 2 
    #timesteps = 480 
    num_hidden =2048 
    num_classes = 4
    max_length = timesteps * num_classes

    X = np.fromfile(input_dir + '/X.dat', dtype=float) #padded sequence N x 7,680
    cardinality = int(X.shape[0]/(timesteps * num_input *num_classes))
    X = X.reshape([cardinality, max_length * num_input])

    #X_len = np.fromfile(input_dir + '/X_len.dat', dtype=float) #FIXME: dynamic
    #X_len = X_len.reshape([cardinality, 1])

    Y = np.fromfile(input_dir + '/Y.dat', dtype=float)
    Y = Y.reshape([cardinality, num_classes])

    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(X, Y, split_frac=0.80)
    #train_x, val_x, test_x, train_x_len, val_x_len, test_x_len, train_y, val_y, test_y = utl.train_val_test_split2(X, X_len, Y, split_frac=0.80)
    #print("Data Set Size")
    #print("Train set: \t\t{}".format(train_x.shape), 
    #      "\nValidation set: \t{}".format(val_x.shape),
    #      "\nTest set: \t\t{}".format(test_x.shape))
    
    
    # In[ ]:
    
    


    print("### Network Parameters ###")
    print("Learning Rate: {}".format(learning_rate))
    print("Batch Size: {}".format(batch_size))
    print("Size of Hidden Layer: {}".format(num_hidden))
    print("Timestep: {}".format(timesteps)) 
    print("------------------")
    X_ = tf.placeholder("float", [None, max_length, num_input])
    
    #X_len_ = tf.placeholder("float", [None, 1]) #FIXME: dynamic 

    Y_ = tf.placeholder("float", [num_classes, None, 1])
    lr = tf.placeholder("float")
    
    weights = {
        'out1':tf.Variable(tf.random_normal([num_hidden,1])),
        'out2':tf.Variable(tf.random_normal([num_hidden,1])),
        'out3':tf.Variable(tf.random_normal([num_hidden,1])),
        'out4':tf.Variable(tf.random_normal([num_hidden,1]))
    }
    biases = {
        'out1':tf.Variable(tf.random_normal([1])),
        'out2':tf.Variable(tf.random_normal([1])),
        'out3':tf.Variable(tf.random_normal([1])),
        'out4':tf.Variable(tf.random_normal([1]))
    }
    #LSTM_out, LSTM_states = var_RNN(X_, X_len_, weights, biases, timesteps, num_hidden) #FIXME: dynamic
    LSTM_out, LSTM_states = var_RNN(X_, weights, biases, max_length, num_hidden)
    prediction = []
    prediction.append(tf.matmul(LSTM_out[timesteps*1], weights['out1']) + biases['out1'])
    prediction.append(tf.matmul(LSTM_out[timesteps*1], weights['out2']) + biases['out2'])
    prediction.append(tf.matmul(LSTM_out[timesteps*1], weights['out3']) + biases['out3'])
    prediction.append(tf.matmul(LSTM_out[timesteps*1], weights['out4']) + biases['out4'])
    #prediction = seq_embed + tf.matmul(X_status, weights['status']) 
    tf.reshape(tf.concat(prediction, 1), [-1, 4]) 
    loss_op = tf.losses.mean_squared_error(Y_[3], prediction[3])
    #optimizer = tf.train.AdadeltaOptimizer(lr).minimize(loss_op)
    #optimizer = tf.train.AdamOptimizer(lr).minimize(loss_op)
    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_op)
     
    #correct_pred = tf.equal(tf.cast( (prediction/1.8) - tf.round(prediction/1.8), tf.float32), tf.cast( (prediction/1.8)-tf.round(Y_/1.8), tf.float32))
    #accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
    
        # Run the initializer
        sess.run(init)
        saver = tf.train.Saver()

        n_batches = len(train_x)//batch_size
        
        for e in range(epochs):
            if (epochs%30 == 0):
                learning_rate = learning_rate*0.95
            train_acc = []
            #for ii, (x, x2, y) in enumerate(utl.get_batches2(train_x, train_x_len, train_y, batch_size), 1): #FIXME: dynamic
            for ii, (x, y) in enumerate(utl.get_batches(train_x, train_y, batch_size), 1):
                x = x.reshape([-1, max_length, num_input]) 
                #feed = {X_: x, X_len_: x2, Y_: y[:, None], lr:learning_rate} #FIXME: dynamic
                y=y.reshape([4,-1,1])
                feed = {X_: x, Y_: y, lr:learning_rate}
                #loss, acc, _ = sess.run([loss_op, accuracy, optimizer], feed_dict=feed)
                loss,  _ = sess.run([loss_op, optimizer], feed_dict=feed)
                #train_acc.append(acc)
    
                if (ii+1) % n_batches == 0:
                    val_acc = []
                    #for xx, xx2, yy in utl.get_batches2(val_x, val_x_len, val_y, batch_size): #FIXME: dynamic
                    for xx, yy in utl.get_batches(val_x, val_y, batch_size):
                        xx = xx.reshape([-1, max_length, num_input])
                        #feed = {X_:xx, X_len_:xx2 Y_:yy[:,None], lr:learning_rate} #FIXME: dynamic
                        yy = yy.reshape([4,-1,1])
                        feed = {X_:xx, Y_:yy, lr:learning_rate}
                        val_batch_loss = sess.run([loss_op], feed_dict=feed)
                        val_acc.append(val_batch_loss)
    
                    print("Epoch: {}/{}...".format(e+1, epochs),
                          "Batch: {}/{}...".format(ii+1, n_batches),
                          "Train Loss: {:.3f}...".format(loss),
                          "Val Loss: {:.3f}".format(np.mean(val_acc)))
        
        test_x = test_x.reshape((-1, max_length, num_input))
        #print("Testing Loss:", sess.run(loss_op, feed_dict={X_: test_x, X_len_:test_x_len, Y_: test_y[:, None], lr:learning_rate})) #FIXME: dynamic
        test_y = test_y.reshape([4,-1,1])
        print("Testing Loss:", sess.run(loss_op, feed_dict={X_: test_x, Y_: test_y, lr:learning_rate}))
        
        # Model Checkpoint
        saver.save(sess, ckpt_dir) 
コード例 #18
0
ファイル: tf_lstm.py プロジェクト: einyboy/machine_learning
full_lexicon = " ".join(messages).split()
vocab_to_int, int_to_vocab = utl.create_lookup_tables(full_lexicon)

messages_lens = Counter([len(x) for x in messages])
print("Zero-length messages: {}".format(messages_lens[0]))
print("Maximum message length: {}".format(max(messages_lens)))
print("Average message length: {}".format(np.mean([len(x) for x in messages])))

messages, labels = utl.drop_empty_messages(messages, labels)
messages = utl.encode_ST_messages(messages, vocab_to_int)
labels = utl.encode_ST_labels(labels)

messages = utl.zero_pad_messages(messages, seq_len=244)

train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
    messages, labels, split_frac=0.80)
print("Data Set Size")
print("Train set: \t\t{}".format(train_x.shape),
      "\nValidation set: \t{}".format(val_x.shape),
      "\nTest set: \t\t{}".format(test_x.shape))


def model_inputs():
    '''
    Create the model inputs
    '''
    inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs')
    labels_ = tf.placeholder(tf.int32, [None, None], name='labels')
    keep_prob_ = tf.placeholder(tf.float32, name='keep_prob')

    return inputs_, labels_, keep_prob_
コード例 #19
0
def main():
    # Option Parser
    if (len(sys.argv) <= 1):
        print("train.py -h or --help to get guideline of input options")
        exit()
    use = "Usage: %prog [options] filename"
    parser = OptionParser(usage=use)
    parser.add_option("-d",
                      "--input-dir",
                      dest="input_dir",
                      action="store",
                      type="string",
                      help="input data dir")
    #parser.add_option("-o", "--output-dir", dest="ckpt_dir", action="store", type="string", help="ckpt data dir")
    parser.add_option("-t",
                      "--timesteps",
                      dest="timesteps",
                      action="store",
                      type="int",
                      help="timesteps")
    parser.add_option("-n",
                      "--num-input",
                      dest="num_input",
                      action="store",
                      type="int",
                      help="number of input (input vector's width)")
    parser.add_option("-c",
                      "--ckpt-dir",
                      dest="ckpt_dir",
                      action="store",
                      type="string",
                      help="directory of checkpoint")

    (options, args) = parser.parse_args()
    input_dir = options.input_dir
    timesteps = options.timesteps
    num_input = options.num_input
    ckpt_dir = options.ckpt_dir
    len_status = 4
    # Training Parameters
    learning_rate = 0.001
    epochs = 800
    batch_size = 40
    #display_step = 200

    # Network Parameters
    #num_input = 2
    #timesteps = 480
    num_hidden = 2048
    num_classes = 4
    max_length = timesteps * num_classes

    X = np.fromfile(input_dir + '/X.dat',
                    dtype=float)  #padded sequence N x 7,680
    cardinality = int(X.shape[0] / (timesteps * num_input * num_classes))
    X = X.reshape([cardinality, max_length * num_input])

    #X_len = np.fromfile(input_dir + '/X_len.dat', dtype=float) #FIXME: dynamic
    #X_len = X_len.reshape([cardinality, 1])

    Y = np.fromfile(input_dir + '/Y.dat', dtype=float)
    Y = Y.reshape([cardinality, num_classes])

    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
        X, Y, split_frac=0.80)
    #train_x, val_x, test_x, train_x_len, val_x_len, test_x_len, train_y, val_y, test_y = utl.train_val_test_split2(X, X_len, Y, split_frac=0.80)
    #print("Data Set Size")
    #print("Train set: \t\t{}".format(train_x.shape),
    #      "\nValidation set: \t{}".format(val_x.shape),
    #      "\nTest set: \t\t{}".format(test_x.shape))

    # In[ ]:

    print("### Network Parameters ###")
    print("Learning Rate: {}".format(learning_rate))
    print("Batch Size: {}".format(batch_size))
    print("Size of Hidden Layer: {}".format(num_hidden))
    print("Timestep: {}".format(timesteps))
    print("------------------")
    X_ = tf.placeholder("float", [None, max_length, num_input])

    #X_len_ = tf.placeholder("float", [None, 1]) #FIXME: dynamic

    Y_ = tf.placeholder("float", [None, num_classes])
    lr = tf.placeholder("float")

    weights = {
        'out1': tf.Variable(tf.random_normal([num_hidden, 1])),
        'out2': tf.Variable(tf.random_normal([num_hidden, 1])),
        'out3': tf.Variable(tf.random_normal([num_hidden, 1])),
        'out4': tf.Variable(tf.random_normal([num_hidden, 1]))
    }
    biases = {
        'out1': tf.Variable(tf.random_normal([1])),
        'out2': tf.Variable(tf.random_normal([1])),
        'out3': tf.Variable(tf.random_normal([1])),
        'out4': tf.Variable(tf.random_normal([1]))
    }
    #LSTM_out, LSTM_states = var_RNN(X_, X_len_, weights, biases, timesteps, num_hidden) #FIXME: dynamic
    LSTM_out, LSTM_states = var_RNN(X_, weights, biases, max_length,
                                    num_hidden)
    prediction = []
    prediction.append(
        tf.matmul(LSTM_out[timesteps * 1], weights['out1']) + biases['out1'])
    prediction.append(
        tf.matmul(LSTM_out[timesteps * 1], weights['out2']) + biases['out2'])
    prediction.append(
        tf.matmul(LSTM_out[timesteps * 1], weights['out3']) + biases['out3'])
    prediction.append(
        tf.matmul(LSTM_out[timesteps * 1], weights['out4']) + biases['out4'])
    #prediction = seq_embed + tf.matmul(X_status, weights['status'])
    prediction = tf.reshape(tf.concat(prediction, 1), [-1, 4])
    loss_op = tf.losses.mean_squared_error(Y_, prediction)
    #optimizer = tf.train.AdadeltaOptimizer(lr).minimize(loss_op)
    #optimizer = tf.train.AdamOptimizer(lr).minimize(loss_op)
    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_op)

    #correct_pred = tf.equal(tf.cast( (prediction/1.8) - tf.round(prediction/1.8), tf.float32), tf.cast( (prediction/1.8)-tf.round(Y_/1.8), tf.float32))
    #accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Restore the ckpt
    SAVER_DIR = options.ckpt_dir
    saver = tf.train.Saver()
    checkpoint_path = os.path.join(SAVER_DIR, SAVER_DIR)
    ckpt = tf.train.get_checkpoint_state(SAVER_DIR)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:

        # Run the initializer
        sess.run(init)
        saver.restore(sess, ckpt.model_checkpoint_path)

        #Y=Y.reshape([4,-1,1])
        X = X.reshape([-1, max_length, 2])
        pred = np.array(prediction.eval(feed_dict={X_: X, Y_: Y}))

        pred_diagnosis = [1 if x[3] >= 1.8 else 0 for x in list(pred)]
        y_diagnosis = [1 if x[3] >= 1.8 else 0 for x in list(Y)]
        evaluation = np.equal(pred_diagnosis, y_diagnosis)
        print(np.mean(evaluation))
        f = open(SAVER_DIR + 'result.txt', 'w')
        for i in range(0, len(Y)):
            f.write(str(pred[i]) + ', ' + str(Y[i]) + '\n')
        f2 = open(SAVER_DIR + 'result_diagnosis.txt', 'w')
        for i in range(0, len(Y)):
            f2.write(
                str(pred_diagnosis[i]) + ', ' + str(y_diagnosis[i]) + '\n')
        f2.close()
        f.close()
コード例 #20
0
ファイル: train.py プロジェクト: dongulee/thydys
def main():
    # Option Parser
    if (len(sys.argv) <= 1):
        print("train.py -h or --help to get guideline of input options")
        exit()
    use = "Usage: %prog [options] filename"
    parser = OptionParser(usage=use)
    parser.add_option("-d",
                      "--input-dir",
                      dest="input_dir",
                      action="store",
                      type="string",
                      help="input data dir")
    parser.add_option("-o",
                      "--output-dir",
                      dest="ckpt_dir",
                      action="store",
                      type="string",
                      help="ckpt data dir")
    parser.add_option("-t",
                      "--timesteps",
                      dest="timesteps",
                      action="store",
                      type="int",
                      help="timesteps")
    parser.add_option("-n",
                      "--num-input",
                      dest="num_input",
                      action="store",
                      type="int",
                      help="number of input (input vector's width)")

    (options, args) = parser.parse_args()
    input_dir = options.input_dir
    timesteps = options.timesteps
    num_input = options.num_input
    ckpt_dir = options.ckpt_dir

    X = np.fromfile(input_dir + '/X.dat', dtype=float)
    cardinality = int(X.shape[0] / (timesteps * num_input))
    X = X.reshape([cardinality, timesteps * num_input])
    Y = np.fromfile(input_dir + '/Y.dat', dtype=float)
    train_x, val_x, test_x, train_y, val_y, test_y = utl.train_val_test_split(
        X, Y, split_frac=0.80)
    #print("Data Set Size")
    #print("Train set: \t\t{}".format(train_x.shape),
    #      "\nValidation set: \t{}".format(val_x.shape),
    #      "\nTest set: \t\t{}".format(test_x.shape))

    # In[ ]:

    # Training Parameters
    learning_rate = 0.0015
    epochs = 200
    batch_size = 40
    #display_step = 200

    # Network Parameters
    #num_input = 2
    #timesteps = 480
    num_hidden = 2048
    num_classes = 1

    print("### Network Parameters ###")
    print("Learning Rate: {}".format(learning_rate))
    print("Batch Size: {}".format(batch_size))
    print("Size of Hidden Layer: {}".format(num_hidden))
    print("Timestep: {}".format(timesteps))
    print("------------------")
    X_ = tf.placeholder("float", [None, timesteps, num_input])
    Y_ = tf.placeholder("float", [None, num_classes])
    lr = tf.placeholder("float")

    weights = {'out': tf.Variable(tf.random_normal([num_hidden, num_classes]))}
    biases = {'out': tf.Variable(tf.random_normal([num_classes]))}
    prediction = RNN(X_, weights, biases, timesteps, num_hidden)

    loss_op = tf.losses.mean_squared_error(Y_, prediction)
    #optimizer = tf.train.AdadeltaOptimizer(lr).minimize(loss_op)
    #optimizer = tf.train.AdamOptimizer(lr).minimize(loss_op)
    optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_op)

    correct_pred = tf.equal(
        tf.cast((prediction / 1.8) - tf.round(prediction / 1.8), tf.float32),
        tf.cast((prediction / 1.8) - tf.round(Y_ / 1.8), tf.float32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    init = tf.global_variables_initializer()

    with tf.Session() as sess:

        # Run the initializer
        sess.run(init)
        saver = tf.train.Saver()

        n_batches = len(train_x) // batch_size

        for e in range(epochs):
            if epochs % 30 == 0:
                learning_rate = learning_rate * 0.95
            train_acc = []
            for ii, (x, y) in enumerate(
                    utl.get_batches(train_x, train_y, batch_size), 1):
                x = x.reshape((batch_size, timesteps, num_input))
                x_norm = utl.minmax_norm(x)

                feed = {X_: x_norm, Y_: y[:, None], lr: learning_rate}
                loss, acc, _ = sess.run([loss_op, accuracy, optimizer],
                                        feed_dict=feed)
                train_acc.append(acc)

                if (ii + 1) % n_batches == 0:
                    val_acc = []
                    for xx, yy in utl.get_batches(val_x, val_y, batch_size):
                        xx = xx.reshape((batch_size, timesteps, num_input))
                        xx_norm = utl.minmax_norm(xx)
                        feed = {
                            X_: xx_norm,
                            Y_: yy[:, None],
                            lr: learning_rate
                        }
                        val_batch_loss = sess.run([loss_op], feed_dict=feed)
                        val_acc.append(val_batch_loss)

                    print(
                        "Epoch: {}/{}...".format(e + 1, epochs),
                        "Batch: {}/{}...".format(ii + 1, n_batches),
                        "Train Loss: {:.3f}...".format(loss),
                        #"Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                        "Val Loss: {:.3f}".format(np.mean(val_acc)))

        test_data = test_x.reshape((-1, timesteps, num_input))
        test_norm = utl.minmax_norm(test_data)
        test_label = test_y
        print(
            "Testing Loss:",
            sess.run(loss_op,
                     feed_dict={
                         X_: test_norm,
                         Y_: test_label[:, None],
                         lr: learning_rate
                     }))

        # Model Checkpoint
        saver.save(sess, ckpt_dir)
コード例 #21
0
def model(labels, data, go_id):
    # set parameters:
    max_features = 60000
    batch_size = 256
    embedding_dims = 100
    nb_filters = 250
    hidden_dims = 250
    nb_epoch = 12

    # pool lengths
    pool_length = 2
    # level of convolution to perform
    filter_length = 3

    # length of APAAC
    maxlen = 20 + 6 * LAMBDA

    train, val, test = train_val_test_split(labels,
                                            data,
                                            batch_size=batch_size)
    train_label, train_data = train

    val_label, val_data = val
    test_label, test_data = test

    test_label_rep = test_label

    model = Sequential()
    model.add(Embedding(max_features, embedding_dims))
    model.add(Dropout(0.25))
    model.add(
        Convolution1D(input_dim=embedding_dims,
                      nb_filter=nb_filters,
                      filter_length=filter_length,
                      border_mode='valid',
                      activation='relu',
                      subsample_length=1))
    model.add(MaxPooling1D(pool_length=pool_length))
    model.add(Flatten())
    output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2
    model.add(Dense(output_size, hidden_dims))
    model.add(Dropout(0.25))
    model.add(Activation('relu'))
    model.add(Dense(hidden_dims, 1))
    model.add(Activation('sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  class_mode='binary')
    weights_train = [1.0 if y == 1 else 1.0 for y in train_label]
    model.fit(X=train_data,
              y=train_label,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              show_accuracy=True,
              verbose=1,
              validation_data=(val_data, val_label))
    # # Loading saved weights
    # print 'Loading weights'
    # model.load_weights(DATA_ROOT + go_id + '.hdf5')
    score = model.evaluate(test_data,
                           test_label,
                           batch_size=batch_size,
                           verbose=1,
                           show_accuracy=True)
    print "Loss:", score[0], "Accuracy:", score[1]
    pred_data = model.predict_classes(test_data, batch_size=batch_size)
    # Saving the model
    print 'Saving the model for ' + go_id
    model.save_weights(DATA_ROOT + go_id + '.hdf5', overwrite=True)
    return classification_report(list(test_label_rep), pred_data)
コード例 #22
0
def train():
    """
    Training function
    Adapted from https://github.com/jesseniagonzalezv/App_segmentation_water_bodies/
    """

    parser = argparse.ArgumentParser()
    arg = parser.add_argument

    # image-related variables
    arg('--image-patches-dir', type=str, default='./data/dataset/split', help='satellite image patches directory')
    arg('--masks-dir', type=str, default='./data/dataset/labels', help='numPy masks directory')
    arg('--npy-dir', type=str, default='./data/dataset/split_npy', help='numPy preprocessed patches directory')

    # preprocessing-related variables
    arg('--val-percent', type=float, default=0.25, help='Validation percent')
    arg('--test-percent', type=float, default=0.10, help='Test percent')

    # training-related variable
    arg('--batch-size', type=int, default=16, help='HR:4,VHR:8')
    arg('--limit', type=int, default=0, help='number of images in epoch')
    arg('--n-epochs', type=int, default=500)
    arg('--lr', type=float, default=1e-3)
    arg('--step', type=float, default=60)
    arg('--model', type=str, help='roof: roof segmentation / income: income determination')
    arg('--out-path', type=str, default='./trained_models/', help='model output path')
    arg('--pretrained', type=int, default=1, help='0: False; 1: True')

    # CUDA devices
    arg('--device-ids', type=str, default='0,1', help='For example 0,1 to run on two GPUs')

    args = parser.parse_args()

    pretrained = True if args.pretrained else False

    if args.model == "roof":
        model = models.UNet11(pretrained=pretrained)
    elif args.model == "income":
        model = models.UNet11(pretrained=pretrained, num_classes=4, input_channels=5)
    else:
        raise ValueError

    if torch.cuda.is_available():
        if args.device_ids:
            device_ids = list(map(int, args.device_ids.split(',')))
        else:
            device_ids = None

        model = torch.nn.DataParallel(model, device_ids=device_ids).cuda()
        cudnn.benchmark = True

    images_filenames = np.array(sorted(glob.glob(args.image_patches_dir + "/*.tif")))

    train_set_indices, val_set_indices, test_set_indices = utils.train_val_test_split(len(images_filenames),
                                                                                      args.val_percent,
                                                                                      args.test_percent)

    images_np_filenames = utils.save_npy(images_filenames, args.npy_dir, args.model, args.masks_dir)

    channel_num = 4 if args.model == "roof" else 5
    max_value, mean_train, std_train = utils.meanstd(np.array(images_np_filenames)[train_set_indices],
                                                     channel_num=channel_num)

    train_transform = DualCompose([
        HorizontalFlip(),
        VerticalFlip(),
        Rotate(),
        ImageOnly(Normalize(mean=mean_train, std=std_train))
    ])

    val_transform = DualCompose([
        ImageOnly(Normalize(mean=mean_train, std=std_train))
    ])

    limit = args.limit if args.limit > 0 else None

    train_loader = utils.make_loader(filenames=np.array(images_np_filenames)[train_set_indices],
                                     mask_dir=args.masks_dir,
                                     dataset=args.model,
                                     shuffle=False,
                                     transform=train_transform,
                                     mode='train',
                                     batch_size=args.batch_size,
                                     limit=limit)

    valid_loader = utils.make_loader(filenames=np.array(images_np_filenames)[val_set_indices],
                                     mask_dir=args.masks_dir,
                                     dataset=args.model,
                                     shuffle=False,
                                     transform=val_transform,
                                     mode='train',
                                     batch_size=args.batch_size,
                                     limit=None)

    dataloaders = {
        'train': train_loader, 'val': valid_loader
    }

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step, gamma=0.1)

    name_file = '_' + str(int(args.val_percent * 100)) + '_percent_' + args.model

    utils.train_model(name_file=name_file,
                      model=model,
                      dataset=args.model,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      dataloaders=dataloaders,
                      name_model="Unet11",
                      num_epochs=args.n_epochs)

    if not os.path.exists(args.out_path):
        os.mkdir(args.out_path)

    torch.save(model.module.state_dict(),
               (str(args.out_path) + '/model{}_{}_{}epochs').format(name_file, "Unet11", args.n_epochs))

    find_metrics(train_file_names=np.array(images_np_filenames)[train_set_indices],
                 val_file_names=np.array(images_np_filenames)[val_set_indices],
                 test_file_names=np.array(images_np_filenames)[test_set_indices],
                 mask_dir=args.masks_dir,
                 dataset=args.model,
                 mean_values=mean_train,
                 std_values=std_train,
                 model=model,
                 name_model="Unet11",
                 epochs=args.n_epochs,
                 out_file=args.model,
                 dataset_file=args.model,
                 name_file=name_file)
コード例 #23
0
def main(args):
    (X1, y1), (X2, y2), (X3, y3) = get_mono_syn_data(args.n_samples)

    ### Split train val test set
    idx_train, idx_val, idx_test, _, _, _ = \
        train_val_test_split(np.arange(args.n_samples), np.arange(args.n_samples), val_size=args.val_size, test_size=args.test_size, random_state=args.seed)

    X1_train = X1[idx_train]
    X2_train = X2[idx_train]
    X3_train = X3[idx_train]
    X_train = np.vstack([X1_train, X2_train, X3_train])
    y1_train = y1[idx_train]
    y2_train = y2[idx_train]
    y3_train = y3[idx_train]

    X1_val = X1[idx_val]
    X2_val = X2[idx_val]
    X3_val = X3[idx_val]
    X_val = np.vstack([X1_val, X2_val, X3_val])
    y1_val = y1[idx_val]
    y2_val = y2[idx_val]
    y3_val = y3[idx_val]
    y_val = np.concatenate([y1_val, y2_val, y3_val])

    X1_test = X1[idx_test]
    X2_test = X2[idx_test]
    X3_test = X3[idx_test]
    X_test = np.vstack([X1_test, X2_test, X3_test])
    y1_test = y1[idx_test]
    y2_test = y2[idx_test]
    y3_test = y3[idx_test]
    y_test = np.concatenate([y1_test, y2_test, y3_test])

    ### Corrupt data
    y1_train_noise, y2_train_noise1 = corrupt_Y(y1_train, y2_train, args.p)
    y2_train_noise2, y3_train_noise = corrupt_Y(y2_train, y3_train, args.p)

    y1_val_noise, y2_val_noise1 = corrupt_Y(y1_val, y2_val, args.p)
    y2_val_noise2, y3_val_noise = corrupt_Y(y2_val, y3_val, args.p)

    X_train = np2tensor(X_train)
    X_val = np2tensor(X_val)
    X_test = np2tensor(X_test)

    y1_train_noise = np2tensor(y1_train_noise)
    y2_train_noise1 = np2tensor(y2_train_noise1)
    y2_train_noise2 = np2tensor(y2_train_noise2)
    y3_train_noise = np2tensor(y3_train_noise)

    y1_val_noise = np2tensor(y1_val_noise)
    y2_val_noise1 = np2tensor(y2_val_noise1)
    y2_val_noise2 = np2tensor(y2_val_noise2)
    y3_val_noise = np2tensor(y3_val_noise)

    y_test = np2tensor(y_test)

    model = Model(args.hid1, args.hid2)

    if torch.cuda.is_available():
        model = model.cuda()

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.lr,
                                 weight_decay=args.wdc)

    best_val = 1e20
    result = None

    for epoch in tqdm(range(args.n_epochs)):
        #TODO: Mini-batch training
        model.train()
        optimizer.zero_grad()
        y_pred = model(X_train)

        loss = rmse(y_pred[:len(idx_train)],y1_train_noise) + \
            rmse(y_pred[len(idx_train):-len(idx_train)],y2_train_noise1) + \
            rmse(y_pred[len(idx_train):-len(idx_train)],y2_train_noise2) + \
            rmse(y_pred[-len(idx_train):],y3_train_noise)

        if args.adapt:
            adapt_loss = mono_loss(y_pred[:len(idx_train)], y_pred[len(idx_train):-len(idx_train)]) + \
                        mono_loss(y_pred[len(idx_train):-len(idx_train)], y_pred[-len(idx_train):])
        else:
            adapt_loss = 0
        # import pdb;pdb.set_trace()
        total_loss = loss + args.lam * adapt_loss

        total_loss.backward()
        optimizer.step()

        model.eval()

        y_pred = model(X_val)
        if args.noise_free:
            val_loss = rmse(y_pred, y_val)
        else:
            val_loss = rmse(y_pred[:len(idx_val)],y1_train_noise) + \
                    rmse(y_pred[len(idx_val):-len(idx_val)],y2_train_noise1) + \
                    rmse(y_pred[len(idx_val):-len(idx_val)],y2_train_noise2) + \
                    rmse(y_pred[-len(idx_val):],y3_train_noise)
        val_loss = val_loss.item()
        # import pdb;pdb.set_trace()
        if val_loss < best_val:
            best_val = val_loss
            y_pred = model(X_test)
            result = rmse(y_pred, y_test).item()
        if args.debug:
            print("Epoch ", epoch, total_loss.item(), val_loss, result)

    print(result)