예제 #1
0
def main(type):
    if os.path.exists('./fig1/%s' % type + '/transfer/results.txt'):
        print("The result will be written in the 'results.txt' file")
    else:
        with open('./fig1/%s' % type + '/transfer/results.txt', 'w') as file:
            file.write('date' + '\t' + 'type' + '\t' + 'time' + '\t' +
                       'train loss' + '\t' + 'train acc' + '\t' + 'val loss' +
                       '\t' + 'val acc' + '\t' + 'test loss' + '\t' +
                       'test acc' + '\t' + 'precision' + '\t' + 'recall' +
                       '\t' + 'Sn' + '\t' + 'Sp' + '\t' + 'F1' + '\t' + 'mcc' +
                       '\t' + 'auc' + '\n')

    for i in range(1, 2):
        trainX, trainY, input, _ = data_processing.load_data(
            './data_train/%s' % type + '/onehot/trainX%d.pickle' % i)
        valX, valY, _, _ = data_processing.load_data('./data_train/%s' % type +
                                                     '/onehot/valX%d.pickle' %
                                                     i)
        testX, testY, _, _ = data_processing.load_data(
            './data_train/%s' % type + '/onehot/testX%d.pickle' % i)
        # testX, testY, _, _ = data_processing.load_data('./m1Aonehot/trainX%d.pickle' % i)

        # # cnn = model(input, numm)
        # # history = fit_model(trainX, trainY, valX, valY, cnn, i, type)
        # history = fine_tuning(trainX, trainY, valX, valY, i, type, './m6A/GRU/1_m6A.h5')
        #
        # loss1, acc1, loss2, acc2 = print_fine_loss(history, i, type)
        # print('train loss:', loss1,
        #       'train acc:', acc1,
        #       'val loss:', loss2,
        #       'val acc:', acc2)

        loss1, acc1, loss2, acc2 = '0', '0', '0', '0'

        pre_score, pre, rec, SN, SP, f1, mcc, roc_auc = evaluate_model(
            './fig1/%s' % type + '/transfer/model/' + str(type) + '_%d.h5' % i,
            testX, testY, i, type)

        # loss1, acc1, loss2, acc2 = 0, 0, 0, 0,
        # pre_score, pre, rec, SN, SP, f1, mcc, roc_auc = evaluate_model('./model/m1A_3.h5', testX,
        #                                                                testY, i, type)

        print('test loss:', pre_score[0], 'test acc:', pre_score[1])

        with open('./fig1/%s' % type + '/transfer/results.txt', 'a') as file:
            file.write(
                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + '\t' +
                str(type) + '\t' + str(i) + '\t' + str(loss1) + '\t' +
                str(acc1) + '\t' + str(loss2) + '\t' + str(acc2) + '\t' +
                str(pre_score[0]) + '\t' + str(pre_score[1]) + '\t' +
                str(pre) + '\t' + str(rec) + '\t' + str(SN) + '\t' + str(SP) +
                '\t' + str(f1) + '\t' + str(mcc) + '\t' + str(roc_auc) + '\n')
def train_original():
    fake, real = data_processing.load_data()
    data, keywords = data_processing.process_data(fake, real)
    training_set = data_processing.Headlines(data[0])
    validation_set = data_processing.Headlines(data[1])
    testing_set = data_processing.Headlines(data[2])
    print('Data Loaded')
    model = classifiers.ConvnetClassifier(len(keywords),
                                          data[0][0][0].shape[1]).cuda()
    loss_fn = torch.nn.CrossEntropyLoss().cuda()
    training_loss, validation_loss = train.train_classifier(model,
                                                            loss_fn,
                                                            training_set,
                                                            validation_set,
                                                            patience=3)
    plt.plot(training_loss)
    plt.plot(validation_loss)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(('Training Set', 'Validation Set'))
    plt.savefig('error_orig.png')
    plt.show()
    torch.save(model.state_dict(), 'model_orig.pkl')
    model.eval()
    print('Acheived {:%} accuracy on the training set.'.format(
        train.get_accuracy(model, training_set)))
    print('Acheived {:%} accuracy on the validation set.'.format(
        train.get_accuracy(model, validation_set)))
    print('Acheived {:%} accuracy on the testing set.'.format(
        train.get_accuracy(model, testing_set)))
예제 #3
0
def main():
    # Data processing
    df = dp.load_data('pol_regression.csv')

    x = df['x']
    y = df['y']

    # Plot training data
    plt.figure()
    plt.xlim((-5, 5))
    plt.plot(x, y, 'o', color='g')

    colors = ['g', 'r', 'y', 'b', 'c', 'm', 'b']

    # Perform polynomial regression for powers 0 to 10
    for i, degree in enumerate([0, 1, 2, 3, 4, 5, 10]):
        w = 1

        if degree != 0:
            # Calculate the coefficients based on the training values
            w = pol.pol_regression(x, y, degree)

        y_hat = pol.prediction(x, w, degree)

        list = zip(*sorted(zip(*(x, y_hat))))
        plt.plot(*list, colors[i])

    plt.legend(('training points', '$x^0$', '$x$', '$x^2$', '$x^3$', '$x^4$', '$x^5$', '$x^{10}$'), loc = 'lower right')
    plt.savefig(os.path.join('images', 'polynomial.png'))
예제 #4
0
def run():

    data_dir = './data'  #'D:/DeepLearning'  # the path of the dataset
    saved_weight = data_dir + '/weight.h5'  # the path to save the weight of the model
    saved_preImage = os.path.join(
        data_dir, 'test')  # the path to save the predicted results

    image_shape = (256, 512, 3)  # the shape of the input images of the model
    target_shape = (32, 64)  # the shape of outshape of the model
    #train_steps, val_steps = 2500, 450
    batch_size = 1
    epochs = 3

    train_images, valid_images, test_images, num_classes, label_colors = data_processing.load_data(
        data_dir)

    train_steps = 5  #int(len(train_images)/batch_size-5)
    val_steps = 5  #int(len(valid_images)/batch_size-5)
    test_steps = 5  #int(len(test_images)/batch_size-5)
    print(train_steps, val_steps, test_steps)

    get_train_image = data_processing.gen_batch_function(
        train_images, image_shape[0:2])
    get_val_image = data_processing.gen_batch_function(valid_images,
                                                       image_shape[0:2])
    get_test_image = data_processing.gen_batch_function(test_images,
                                                        image_shape[0:2],
                                                        test=True)

    train(num_classes, batch_size, epochs, train_steps, val_steps, data_dir,
          image_shape, target_shape, get_train_image, get_val_image)
    test(batch_size, test_steps, target_shape, saved_preImage, saved_weight,
         label_colors, get_test_image)
def largest_activations():
    fake, real = data_processing.load_data()
    _, keywords = data_processing.process_data(fake, real)
    model = classifiers.ConvnetClassifier(len(keywords), 40)
    model.load_state_dict(torch.load('model_orig.pkl'))
    weights = model.classifier[1].weight.data.numpy()

    print("Real sequences")
    most_real = np.argsort(weights[0])[-10:]
    for most in most_real:
        if most < 100:
            conv = model.features3[0].weight.data.numpy()[most]
        elif most < 200:
            conv = model.features4[0].weight.data.numpy()[most - 100]
        else:
            conv = model.features5[0].weight.data.numpy()[most - 200]
        print(*keywords[np.argmax(conv, 0)])

    print("Fake sequences")
    most_fake = np.argsort(weights[1])[-10:]
    for most in most_fake:
        if most < 100:
            conv = model.features3[0].weight.data.numpy()[most]
        elif most < 200:
            conv = model.features4[0].weight.data.numpy()[most - 100]
        else:
            conv = model.features5[0].weight.data.numpy()[most - 200]
        print(*keywords[np.argmax(conv, 0)])
예제 #6
0
def run():
    
    data_dir = './data'  # the path of the dataset
    pre_weight = None #'C:/code/Practice/SqueezeNet_Segmentation/squeezenet_weights.h5'
    saved_weight = data_dir + '/sq_weight.h5'  # the path to save the weight of the model
    saved_preImage = os.path.join(data_dir, 'test') # the path to save the predicted results

    image_shape = (128, 256, 3)  # the shape of the input images of the model
    target_shape = (128, 256)
    
    batch_size = 5
    epochs = 1


    train_images, valid_images, test_images, num_classes, label_colors = data_processing.load_data(data_dir)
    
    train_steps = 5 #int(len(train_images)/batch_size)
    val_steps = 5 #int(len(valid_images)/batch_size)
    test_steps = 5 #int(len(test_images)/batch_size)
    print(train_steps, val_steps, test_steps)

    get_train_image = data_processing.gen_batch_function(train_images, image_shape[0:2])
    get_val_image = data_processing.gen_batch_function(valid_images, image_shape[0:2])
    get_test_image = data_processing.gen_batch_function(test_images, image_shape[0:2], test=True)
    
    
    train(num_classes, batch_size, epochs, train_steps, val_steps, data_dir, image_shape, pre_weight, get_train_image, get_val_image)
    test(batch_size, test_steps, target_shape, saved_preImage, saved_weight, label_colors, get_test_image)
예제 #7
0
def main():
    df = dp.load_data('dog_breeds.csv')
    data = dp.data_norm(df)
    k = 2

    centroids, clusters, errors = kmeans(data[['height', 'leg length']].values,
                                         k)

    print(errors)
    plot_data(data, centroids, clusters, k)
    plot_error(errors, k)
예제 #8
0
def task2():
    # Load the data from CSV file (unlabeled rn)
    data, labels = load_data("iris.data")

    # Run k-means with 3 clusters using Euclidean metric.
    kmeans(data, 3)

    # Run using cosign
    kmeans(data, 3, None, "cosine")

    # Run using jaccard
    kmeans(data, 3, None, "jaccard")
예제 #9
0
def main():
    # Data processing
    df = dp.load_data('pol_regression.csv')

    x_train, y_train, x_test, y_test = dp.split_data(df)

    # Create an array to represent the different test errors of each degree
    train_error = []
    test_error = []

    # Plot ground truth
    plt.figure()
    plt.ylim(0, 1.5)
    #plt.plot(x_train, y_train, 'bo')
    plt.plot(x_test, y_test, 'bo')

    colors = ['r', 'y', 'b', 'c', 'k', 'm', 'g']

    # Perform polynomial regression for powers 0 to 10
    for i, degree in enumerate([0, 1, 2, 3, 4, 5, 10]):
        w = 1

        if degree != 0:
            # Calculate the coefficients based on the training values
            w = pol.pol_regression(x_train, y_train, degree)

        # Make predictions for test data
        y_train_hat = pol.prediction(x_train, w, degree)
        y_test_hat = pol.prediction(x_test, w, degree)

        # Plot predictions
        list = zip(*sorted(zip(*(x_test, y_test_hat))))
        plt.plot(*list, color=colors[i])

        # Measure accuracy of model
        # RMSE of training set
        train_error.append(
            pol.eval_pol_regression(y_train_hat, w, x_train, y_train, degree))

        # RMSE of testing set
        test_error.append(
            pol.eval_pol_regression(y_test_hat, w, x_test, y_test, degree))

        print("[Degree: {0}] - Train: {1:.4f}, Test: {2:.4f}".format(
            degree, train_error[i], test_error[i]))

    plt.legend(('ground truth', '$x^0$', '$x$', '$x^2$', '$x^3$', '$x^4$',
                '$x^5$', '$x^{10}$'),
               loc='lower right')
    plt.savefig(os.path.join('images', 'polynomial_split.png'))

    pol.plot_error_graph(train_error, test_error)
예제 #10
0
def tarin_test_model(is_train, is_test):

    dnn_model = None
    train_data, train_labels = None, None
    test_data, test_labels = None, None
    try:
        if is_train:
            train_data, train_labels = dp.load_data(normalize=True)
            train_labels = get_one_hot_vector_array(train_labels)
            # train_data.shape
            dnn_model = MyNN(train_data, np.array(train_labels), 1500)
            for epoch in tqdm(range(dnn_model.epoch)):
                dnn_model.feedforward()
                dnn_model.backprop()
            print("Epoch: {0}, Loss: {1}".format(dnn_model.epoch,
                                                 dnn_model.loss[-1]))
            # dnn_model.weight2
            print("Training accuracy : ",
                  get_accuracy(dnn_model, train_data, np.array(train_labels)))
            dnn_model.save("mnist_dnn")
        if is_test:
            test_data, test_labels = dp.load_data(train_data=False,
                                                  normalize=True)
            # test_data.shape
            dnn_model = dp.load_model("mnist_dnn")
            test_labels = get_one_hot_vector_array(test_labels)
            # dnn_model.weights_matrices
            if dnn_model != None:
                print(
                    "Training accuracy : ",
                    get_accuracy(dnn_model, test_data, np.array(test_labels),
                                 False))
                dnn_model.save("mnist_dnn")
    except Exception as ex:
        print("Error:: {0}".format(ex))

    return 0
def main():
    # data generator
    data_generator = ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,
        rotation_range=0,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=.1,
        horizontal_flip=True)

    model = CNN()
    opt = optimizers.Adam(lr=0.0001)
    # opt = optimizers.SGD(lr=0.001)

    model.compile(opt, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()

    # callbacks
    f = open(base_path + 'gender_classification_training.log', 'w')
    f.close()
    log_file_path = base_path + 'gender_classification_training.log'
    csv_logger = CSVLogger(log_file_path, append=False)
    early_stop = EarlyStopping('val_loss', patience=patience)
    reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, patience=int(patience/4), verbose=1)

    trained_models = base_path + 'CNN.{epoch:02d}-{val_loss:.3f}-{val_acc:.2f}.hdf5'
    # model_cp = ModelCheckpoint(trained_models, 'val_acc', verbose=1, save_best_only=True)
    model_cp = ModelCheckpoint(trained_models, 'val_loss', verbose=1, save_best_only=True)
    callbacks = [model_cp, csv_logger, early_stop, reduce_lr]

    # load data
    faces, labels = load_data(data_path)
    print (len(faces))
    print (len(labels))
    faces = preprocess_input(faces)
    order = np.argsort(np.random.random(len(faces)))
    faces = faces[order]
    labels = labels[order]

    train_data, val_data = split_data(faces, labels, validation_split)
    train_faces, train_labels = train_data
    model.fit_generator(data_generator.flow(train_faces, train_labels, batch_size),
                        steps_per_epoch=len(train_faces)/batch_size,
                        epochs=num_epochs,
                        verbose=1,
                        callbacks=callbacks,
                        validation_data=val_data)
예제 #12
0
    def stock(self, name, win, days):
        action = []
        price = []
        original_data = data_processing.load_data(name)
        original_data = np.delete(original_data, -1, axis=1)
        buffer_data = original_data[:win]
        input = np.zeros((win, buffer_data.shape[1]))
        input_buffer = np.zeros(1, buffer_data.shape[1])
        for i in range(0, days):  #####预测20天的价格
            rl_original = buffer_data[i:]
            for j in range(0, win):
                for u in range(0, buffer_data.shape[1]):
                    t = buffer_data[i + j][u]
                    if buffer_data[i][u] != 0:
                        s = t / buffer_data[i][u]
                    else:
                        s = 1
                    s = s - 1
                    input[i][j][u] = s

            lstm_high = load_model('lstm_stock_high.h5')
            lstm_low = load_model('lstm_stock_low.h5')
            lstm_open = load_model('lstm_stock_open.h5')
            lstm_close = load_model('lstm_stock_close.h5')
            agent = load_model('dqn_stock_picking.h5')

            pred_high = lstm_high.predict(input)
            pred_low = lstm_low.predict(input)
            pred_open = lstm_open.predict(input)
            pred_close = lstm_open.predict(input)

            pred_open = (pred_open + 1) * buffer_data[i][0]
            pred_high = (pred_high + 1) * buffer_data[i][1]
            pred_low = (pred_low + 1) * buffer_data[i][2]
            pred_close = (pred_close + 1) * buffer_data[i][3]

            input_buffer[0][0] = pred_open
            input_buffer[0][1] = pred_high
            input_buffer[0][2] = pred_low
            input_buffer[0][3] = pred_close

            rldata = self.rl_data(rl_original)
            act = agent.predict(rldata)
            action.append(act)
            price.append(pred_close)
            buffer_data = np.vstack((buffer_data, input_buffer))

        return action, price
예제 #13
0
def validate_model():
    data = prepare_data()
    #build graph
    with tf.Graph().as_default():
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            'resnet_v1_50', is_training=False)
        processed_image, score = load_data(data['val_image_names'],
                                           data['val_image_scores'], 1,
                                           image_preprocessing_fn, 128, False)
        score = tf.reshape(score, [-1, 1])

        logits, _ = predict_model(processed_image, is_training=False)
        variables_to_use = slim.get_variables_to_restore()
        variables_restorer = tf.train.Saver(variables_to_use)
        #Loss
        with tf.name_scope('loss'):
            #MSE loss
            loss = tf.sqrt(tf.reduce_mean(tf.square(logits - score)))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            variables_restorer.restore(sess, SAVE_MODEL_PATH)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            sum_ls = 0.0
            steps = 0
            try:
                while not coord.should_stop():
                    ls = sess.run(loss)
                    sum_ls += ls
                    steps += 1

            except tf.errors.OutOfRangeError:
                print("Validating: mean loss %f" % (sum_ls / steps))
            finally:
                coord.request_stop()
            coord.join(threads)
    return sum_ls / steps
예제 #14
0
def CNN():
    train_img, train_label, test_img, test_label = load_data()
    # I choose No.1 folder to No.20 folder as class 1 to class 20
    # And set the 10 photos of mine as class 21

    train_X = train_img.reshape(train_img.shape[0], 32, 32, 1).astype('float32')/255.
    test_X = test_img.reshape(test_img.shape[0], 32, 32, 1).astype('float32')/255.
    train_y = to_categorical(np.array(train_label)-1, num_classes=21)
    test_y = to_categorical(np.array(test_label)-1, num_classes=21)

    model = Sequential()
    # The 1st CONV layer with 20 nodes
    model.add(Conv2D(filters=20, kernel_size=5, strides=1, padding="same", input_shape=(32, 32, 1)))
    model.add(MaxPool2D(pool_size=2, strides=2))
    model.add(Activation('relu'))
    # The 2nd CONV layer with 50 nodes
    model.add(Conv2D(filters=50, kernel_size=5, strides=1, padding='same'))
    model.add(MaxPool2D(pool_size=2, strides=2))
    model.add(Activation('relu')) #use Relu as the activate function
    # The FC layer with 500 nodes
    model.add(Flatten())
    model.add(Dense(500))
    model.add(Activation('relu'))
    # The Output layer with 21 nodes
    model.add(Dense(21))
    model.add(Activation('softmax'))
    adam = tf.keras.optimizers.Adam(lr=1e-3)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    # Training the model
    nn = model.fit(train_X, train_y, epochs = 30, batch_size = 256,
                   validation_data=(test_X, test_y),
                   shuffle=True, verbose=1, )

    # Evaluating the model
    loss_train, acc_train = model.evaluate(train_X, train_y, verbose=0)
    loss_test, acc_test = model.evaluate(test_X, test_y, verbose=0)
    print('Accuracy of training set:', acc_train, 'loss:', loss_train)
    print('Accuracy of testing set:', acc_test, 'loss:', loss_test)
def test_expanded():
    fake, real = data_processing.load_data()
    keywords = data_processing.load_keywords()
    model = classifiers.ConvnetClassifier(len(keywords), 40)
    model.load_state_dict(torch.load('model.pkl'))
    fake_encoded = data_processing.encode_data(keywords, fake, 40)
    fake_data = data_processing.Headlines((fake_encoded, np.ones(len(fake))))
    real_encoded = data_processing.encode_data(keywords, real, 40)
    real_data = data_processing.Headlines((real_encoded, np.zeros(len(real))))
    print('Data Loaded')

    train_data = data_processing.load_tsv('train.tsv')
    valid_data = data_processing.load_tsv('valid.tsv')
    test_data = data_processing.load_tsv('test.tsv')
    print('Data Loaded')

    train_encoded = data_processing.encode_data(keywords, train_data[0], 40)
    training_set = data_processing.Headlines((train_encoded, train_data[1]))
    print('Training Set Processed')
    valid_encoded = data_processing.encode_data(keywords, valid_data[0], 40)
    validation_set = data_processing.Headlines((valid_encoded, valid_data[1]))
    print('Validation Set Processed')
    test_encoded = data_processing.encode_data(keywords, test_data[0], 40)
    testing_set = data_processing.Headlines((test_encoded, test_data[1]))
    print('Testing Set Processed')

    print('Acheived {:%} accuracy on the fake set.'.format(
        train.get_accuracy(model, fake_data)))
    print('Acheived {:%} accuracy on the real set.'.format(
        train.get_accuracy(model, real_data)))
    print('Acheived {:%} accuracy on the training set.'.format(
        train.get_accuracy(model, training_set)))
    print('Acheived {:%} accuracy on the validation set.'.format(
        train.get_accuracy(model, validation_set)))
    print('Acheived {:%} accuracy on the testing set.'.format(
        train.get_accuracy(model, testing_set)))
예제 #16
0
파일: model.py 프로젝트: monika113/GBDT
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 14 15:39:44 2019

@author: monika

train and test GBDT model on dataset '20newsgroups'
"""

from data_processing import clean_text
from data_processing import load_data

#obtain train/test set
train_num, test_num, x_train, y_train, x_test, y_test, class_names = load_data(
    '20_newsgroups', split=0.3)

for i in range(train_num):
    x_train[i] = clean_text(x_train[i])

for i in range(test_num):
    x_test[i] = clean_text(x_test[i])

#TFIDF
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
#Covert train/test set into a matrix of token counts
count_vect = CountVectorizer()
x_train_counts = count_vect.fit_transform(x_train)
x_test_counts = count_vect.transform(x_test)
# calculate TFIDF
tf_trans = TfidfTransformer()
예제 #17
0
                    dest='gpu',
                    help='Use GPU or not for training. The default is True',
                    type=bool,
                    default=True)

args = parser.parse_args()
device = torch.device(
    'cuda:0' if torch.cuda.is_available() else 'cpu') if args.gpu else 'cpu'
data_dir = args.data_directory
save_dir = args.save_directory
learning_rate = args.lr
hidden_units = args.units
epochs = args.num_epochs
arch = args.model_arch
# Process and load the data
train_data, vaild_data, test_data, trainloader, validloader, testloader = load_data(
    data_dir)
# Loading the pre-trained network
model = getattr(models, arch)(pretrained=True)
input_units = model.classifier[0].in_features
# Creating the model
model = ProjectClassifier(model, input_units, hidden_units)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
model.to(device)
#Training the model
model, optimizer = train_model(model, trainloader, validloader, criterion,
                               optimizer, epochs, device)
# Testing the model
test_model(model, testloader, criterion, device)
# Saving the model
save_model(model, arch, epochs, optimizer, train_data, save_dir)
import theano.tensor as T
import lasagne
from IPython import display
from tqdm import tqdm
from data_processing import load_data
import pdb

# data params
datapath = '/media/steampunkhd/rafaelvalle/datasets/MIDI/Piano'
glob_file_str = '*.npy'
n_pieces = 8  # 0 is equal to all pieces, unbalanced dataset
crop = None  # (32, 96)
as_dict = True

# load data, takes time depending on dataset site
dataset = load_data(datapath, glob_file_str, n_pieces, crop, as_dict)

# model params
d_batch_size = g_batch_size = 512
n_timesteps = 100  # 100 ms per step
min_len = 50
max_len = 100
single_len = True
n_features = dataset[dataset.keys()[0]][0].shape[1]
n_conditions = len(dataset.keys())
temperature = 1.
n_units_d = 8
n_units_g = 16
arch = 1

예제 #19
0
seed = 42
anchor = 50
axis = 0
repeat = 20
loss_tem = np.zeros(repeat)

for axis in range(repeat):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if args.cuda:
        torch.cuda.manual_seed(seed)

    # Load data
    mode_fea, mode_adj, num_anchor, adj, features, labels, delta, degree, fea_original, fea_true, Range_Mat, Range, Dist_Mat, Dist, truncated_noise, idx_train, idx_val, idx_test = load_data(threshold, anchor)

    # Model and optimizer
    model = GCN(nfeat=features.shape[1],
                nhid1=args.hidden,
                nhid2=2000,
                nout=labels.shape[1],
                dropout=args.dropout)

    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr, weight_decay=args.weight_decay)

    print(model)

    loss_fun = torch.nn.MSELoss()
예제 #20
0
import numpy as np
import tensorflow as tf
# import matplotlib.pyplot as plt
import data_processing
# import random
data =data_processing.load_data(download = False)
new_data =data_processing.convert2onehot(data)
new_data =new_data.values.astype(np.float32)
np.random.shuffle(new_data)
sep =int(0.7*len(new_data))
train_data = new_data[:sep]
test_data = new_data[sep:]
# 开始搭建神经网络
tf_input = tf.placeholder(tf.float32,[None,25],"input")
tfx=tf_input[:,:21]
tfy=tf_input[:,21:]
L1=tf.layers.dense(tfx,128,tf.nn.relu,name="L1")
L2=tf.layers.dense(L1,128,tf.nn.relu,name="L2")
out=tf.layers.dense(L2,4,name="output")
prediction=tf.nn.softmax(out,name="pred")

loss=tf.losses.softmax_cross_entropy(onehot_labels=tfy,logits=out)
accuracy=tf.metrics.accuracy(
labels=tf.argmax(tfy,axis=1),
predictions=tf.argmax(out,axis=1))[1]
opt=tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op=opt.minimize(loss)
sess=tf.Session()
sess.run(tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()))
#plt.ion()
#fig,(ax1,ax2)=plt.subplots(1,2,figsize(8,4))
예제 #21
0
import os
import numpy as np
import tensorflow as tf


# Parameters
batch_size = 128
embedding_size = 100
window_size = 3
neg_samples = 20
learn_rate = 1.0

# Data Loading
print("loading data...")
# train: {userID (string) => list_of_movies}, train_sets: {userID => set(movies)}, test: {userID => next movie}
train, train_sets, test = load_data(validation=False)
movies_train = list(train.values())
movie_vocab = get_movie_vocab(train_sets)
vocab_size = len(movie_vocab)
print(vocab_size)
movie_dictionary, movie_reversed_dictionary = build_index_dictionary(list(movie_vocab))

print("building graph...")
user_index = 0  # where to start generating batch from


def generate_batch(batch_size, window_size):
    # window_size = window_size for products
    global user_index
    # src words (batch) and context words (labels)
    batch_words = np.ndarray(shape=(batch_size), dtype=np.int32)
예제 #22
0
#!/usr/bin/env python3

import sys

sys.path.append(".")
sys.path.append("..")
import numpy as np

from data_processing import load_data, split_train_valid_set, transform_data
from from_scratch.data_processing import normalization
from linear_model import (adaptive_gradient_descent, gradient_descent,
                          pseudo_inverse, stochastic_gradient_descent)

if __name__ == "__main__":
    df = load_data("../../Data/train.csv")
    train_df, valid_df = split_train_valid_set(df)
    train_set = transform_data(train_df, data_path="../../Data/train_set.csv")
    valid_set = transform_data(valid_df, data_path="../../Data/valid_set.csv")

    train_X = train_set[train_set.columns[:-1]].to_numpy()
    train_Y = train_set[train_set.columns[-1]].to_numpy()

    valid_X = valid_set[valid_set.columns[:-1]].to_numpy()
    valid_Y = valid_set[valid_set.columns[-1]].to_numpy()

    # Pseudo Inverse
    train_loss, general_weights = pseudo_inverse(train_X, train_Y, lambdaL2=0)
    bias = general_weights[0]
    weights = general_weights[1:]
    print("Pesudo Inverse with train cost: {}".format(train_loss))
    ## loss for validation set
import re

import sys
import os

from keras.layers import Dense, Input
from keras.layers import GRU, Bidirectional, TimeDistributed, Dropout, BatchNormalization
from keras.models import Model
from keras.preprocessing.sequence import TimeseriesGenerator

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from Attention import AttLayer
# 加载数据
from data_processing import load_data
train, train_label, test, test_label, name = load_data()

MAX_SENTS = 8  # 句子数量,即多少个时间步的
WORD_LENTTH = 1
MAX_SENT_LENGTH = 196  # 即多少个特征值

# 利用TimesereisGenerator生成序列数据
time_steps = MAX_SENTS
batch_size = 1024
# 先把训练集划分出一部分作为验证集
train = train[:(172032 + time_steps), :]  # 4096 * 42 = 172032
train = train.reshape(-1, WORD_LENTTH, MAX_SENT_LENGTH)
train_label = train_label[:(172032 + time_steps), ]
test = test[:(81920 + time_steps), :]  # 4096 * 20 = 81920
test = test.reshape(-1, WORD_LENTTH, MAX_SENT_LENGTH)
test_label = test_label[:(81920 + time_steps), ]
예제 #24
0
def train_model():
    data = prepare_data()
    #build graph
    with tf.Graph().as_default():
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            args.model_name, is_training=True)
        processed_image, score = load_data(data['train_image_names'],
                                           data['train_image_scores'],
                                           args.epoch_num,
                                           image_preprocessing_fn,
                                           args.batch_size, True)
        score = tf.reshape(score, [-1, 1])
        print(score.shape)
        logits, _ = predict_model(processed_image, is_training=True)
        print(logits.shape)
        variables_to_restore = slim.get_variables_to_restore(
            exclude=['resnet_v1_50/logits'])
        variables_restorer = tf.train.Saver(variables_to_restore)

        #Loss
        with tf.name_scope('ls'):
            #MSE loss
            loss = tf.sqrt(tf.reduce_mean(tf.square(logits - score)))
            tf.summary.scalar('loss', loss)

        current_epoch = tf.Variable(0, trainable=False)
        decay_step = EPOCHS_PER_LR_DECAY * len(
            data['train_image_names']) // args.batch_size
        learning_rate = tf.train.exponential_decay(args.lr,
                                                   current_epoch,
                                                   decay_step,
                                                   LR_DECAY_FACTORY,
                                                   staircase=True)

        opt = tf.train.MomentumOptimizer(learning_rate, 0.9)
        #opt = tf.train.AdamOptimizer(learning_rate)
        optimizer = slim.learning.create_train_op(loss,
                                                  opt,
                                                  global_step=current_epoch)

        saver = tf.train.Saver()
        summary_op = tf.summary.merge_all()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            summary_writer = tf.summary.FileWriter(TRAIN_LOG_DIR, sess.graph)
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            variables_restorer.restore(sess, RES_v1_50_MODEL_PATH)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            sum_ls = 0.0
            batch_num = len(data['train_image_scores']) // args.batch_size
            val_step = 0
            best_val_ls = 100.0
            try:
                while not coord.should_stop():
                    _, ls, step, summary = sess.run(
                        [optimizer, loss, current_epoch, summary_op])
                    sum_ls += ls

                    if step % 50 == 0:
                        print("Epoch %d, loss %f" % (step / batch_num + 1, ls))
                        summary_writer.add_summary(summary, step)
                    if step % batch_num == 0 and step != 0:
                        print("Epoch %d, mean loss %f" %
                              (step / batch_num + 1, sum_ls / batch_num))
                        sum_ls = 0.0
                        saver.save(sess, SAVE_MODEL_PATH)
                        val_ls = validate_model()
                        if val_ls < best_val_ls:
                            best_val_ls = val_ls
                            saver.save(sess, BEST_MODEL_PATH)
                        print('best val loss %f' % (best_val_ls))
            except tf.errors.OutOfRangeError:
                saver.save(sess, SAVE_MODEL_PATH)
            finally:
                coord.request_stop()
            coord.join(threads)
예제 #25
0
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import data_processing

data = data_processing.load_data(download=True)
new_data = data_processing.convert2onehot(data)

# prepare training data
new_data = new_data.values.astype(
    np.float32)  # change to numpy array and float32
np.random.shuffle(new_data)
sep = int(0.7 * len(new_data))
train_data = new_data[:sep]  # training data (70%)
test_data = new_data[sep:]  # test data (30%)

# build network
tf_input = tf.placeholder(tf.float32, [None, 25], "input")
tfx = tf_input[:, :21]
tfy = tf_input[:, 21:]

l1 = tf.layers.dense(tfx, 128, tf.nn.relu, name="l1")
l2 = tf.layers.dense(l1, 128, tf.nn.relu, name="l2")
out = tf.layers.dense(l2, 4, name="l3")
prediction = tf.nn.softmax(out, name="pred")

loss = tf.losses.softmax_cross_entropy(onehot_labels=tfy, logits=out)
accuracy = tf.metrics.accuracy(  # return (acc, update_op), and create 2 local variables
    labels=tf.argmax(tfy, axis=1),
    predictions=tf.argmax(out, axis=1),
)[1]
import numpy as np
import theano
import theano.tensor as T
import lasagne

from data_processing import load_data
import pdb

# dataset params and load data
datapath = '/media/steampunkhd/rafaelvalle/datasets/MIDI/Piano'
glob_file_str = '*.npy'
n_pieces = 0  # 0 is equal to all pieces, unbalanced dataset
crop = None  # (32, 96)
as_dict = True
dataset = load_data(datapath, glob_file_str, n_pieces, crop, as_dict)

# model params
c_batch_size = g_batch_size = 512
n_timesteps = 100  # 100 ms per step
min_len = 50
max_len = 100
single_len = True  # single length per mini-batch
n_features = dataset[dataset.keys()[0]][0].shape[1]
n_conditions = len(dataset.keys())
n_units_d = 8
n_units_g = 16

# declare theano variables
c_in_X = T.ftensor3('ddata')
c_in_M = T.imatrix('dismask')
예제 #27
0
from models import SpeakerRecognitionModel

# training params
SPEAKER_ID = None
SPEAKER_ID_OTHERS = None
LENGTH = 64
BATCH_SIZE = 256
VAL_BATCH_SIZE = 1024
TEST_BATCH_SIZE = 1024
N_ITERS = int(50000)

# model params
init_lr = 1e-4

# load data
data = load_data('data_16khz', '*.wav')
N_CLASSES = len(data['train'])
SPEAKER_ID_OTHERS = range(N_CLASSES)
N_TEST_RUNS = 100

data_training = iterate_minibatches(data['train'],
                                    SPEAKER_ID,
                                    SPEAKER_ID_OTHERS,
                                    BATCH_SIZE,
                                    shuffle=False,
                                    forever=True,
                                    length=LENGTH,
                                    one_hot_labels=False,
                                    apply_transform=False)

data_validation = iterate_minibatches(data['valid'],
def main(num_epochs=200, convs=0, batchsize=64, initial_eta=5e-3, add_noise=True):
    # Load the dataset
    print("Loading data...")
    datapath = '/media/steampunkhd/rafaelvalle/datasets/MIDI/Piano'
    glob_file_str = '*.npy'
    n_pieces = 0  # 0 is equal to all pieces, unbalanced dataset
    crop = None  # (32, 96)
    as_dict = False
    inputs, _ = load_data(datapath, glob_file_str, n_pieces, crop, as_dict)

    # scale to [0, 1]
    # inputs = (inputs + 1) * 0.5

    # Prepare Theano variables for inputs and targets
    noise_var = T.matrix('noise')
    input_var = T.tensor4('inputs')

    # Instantiate a symbolic noise generator to use for training
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    srng = RandomStreams(seed=np.random.randint(2147462579, size=6))

    # Create neural network model
    print("Building model and compiling functions...")
    generator = build_generator(noise_var, convs)
    discriminator = build_discriminator(input_var, convs)

    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(
        discriminator, lasagne.layers.get_output(generator))

    # Create loss expressions
    # one-sided label smoothing
    lbl_noise = 0.0
    if add_noise:
        lbl_noise = srng.normal(size=(3,), avg=0.0, std=0.1)
        generator_loss = lasagne.objectives.binary_crossentropy(
            fake_out, 1).mean()
        discriminator_loss = (
            lasagne.objectives.binary_crossentropy(real_out, 1 + lbl_noise) +
            lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Create update expressions for training
    generator_params = lasagne.layers.get_all_params(generator, trainable=True)
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(initial_eta))
    updates = lasagne.updates.adam(
        generator_loss, generator_params, learning_rate=eta, beta1=0.9)
    updates.update(lasagne.updates.adam(
            discriminator_loss, discriminator_params, learning_rate=eta, beta1=0.9))

    noise = srng.uniform((batchsize, 100))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var],
                               [(real_out > .5).mean(), (fake_out < .5).mean()],
                               givens={noise_var: noise},
                               updates=updates)


    # Compile another function generating some data
    gen_fn = theano.function([noise_var],
                             lasagne.layers.get_output(generator,
                                                       deterministic=True))
    obs_length = 128
    print("Starting training...")
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(inputs, batchsize, length=obs_length):
            batch = lasagne.utils.floatX(batch)
            # reshape batch to proper dimensions
            batch = batch.reshape(
                (batch.shape[0], 1, batch.shape[1], batch.shape[2]))
            train_err += np.array(train_fn(batch))
            train_batches += 1


        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{}".format(train_err / train_batches))

        # And finally, we plot some generated data
        samples = gen_fn(lasagne.utils.floatX(np.random.rand(42, noise_size)))
        plt.imsave('images/dcgan_proll/proll_samples_epoch{}.png'.format(epoch),
                    (samples.reshape(6, 7, obs_length, obs_length)
                            .transpose(0, 2, 1, 3)
                            .reshape(6*obs_length, 7*obs_length)).T,
                    cmap='gray',
                    origin='bottom')

        # After half the epochs, start decaying the learning rate towards zero
        if epoch >= num_epochs // 2:
            progress = float(epoch) / num_epochs
            eta.set_value(lasagne.utils.floatX(initial_eta*2*(1 - progress)))
예제 #29
0
import numpy as np
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

#configuring keras
import keras
from keras.models import Model
from keras.layers import Input, BatchNormalization, Conv2D, Dense, Dropout, Flatten, GaussianDropout
from keras.utils import plot_model
import data_processing as dp
from keras import backend as K

# NOTE: comment the next line if running more than once back-to-back
dp.save_data('./dataset/data.csv', binary=1, augment=0)
eeg_data, labels = dp.load_data()

inputs = Input(shape=(178, 1, 1))
x = GaussianDropout(0.1)(inputs)
x = Conv2D(24, kernel_size=(5, 1), strides=3, activation="relu")(x)
x = BatchNormalization()(x)
x = Conv2D(16, (3, 1), strides=2, activation="relu")(x)
x = BatchNormalization()(x)
x = Conv2D(8, (3, 1), strides=2, activation="relu")(x)
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(20)(x)
x = Dropout(0.3)(x)
predictions = Dense(units=2, activation="softmax")(x)

예제 #30
0
from data_processing import load_data
from build_model import build_SAE
import numpy as np
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.callbacks import TensorBoard
from keras.layers import LSTM, Reshape, Dropout
import os
# load data
print("Load data...")
train, train_label, test, test_label = load_data()  # data:(, 196) label:(, 10)
print("train shape: ", train.shape)
train_label = train_label.reshape((-1, 10))
test_label = test_label.reshape((-1, 10))
print("train_label shape: ", train_label.shape)

np.save('multi_train_label.npy', train_label)
np.save('multi_test_label.npy', test_label)
# build model
print("Build AE model")
autoencoder_1, encoder_1, autoencoder_2, encoder_2, autoencoder_3, encoder_3, sSAE, sSAE_encoder = build_SAE(
    rho=0.04)

print("Start pre-training....")

# fit the first layer, 在此处添加validation_data=test,加上callbacks,记录的是val_loss,取最小的那个
print("First layer training....")
AE_1_dir = os.path.join(os.getcwd(), 'saved_ae_1')
ae_1_filepath = "best_ae_1.hdf5"
예제 #31
0
####
# Train and save a model
####

import numpy as np
import data_processing as dp
import neural_net

if __name__ == "__main__":

    ngames = int(5 * 1e5)
    NAME = 'all_ratings'
    X, y, weights = dp.load_data(ngames=ngames, use_cache=True, name=NAME)
    nn_mdl = neural_net.NeuralNet(input_dim=(8, 8, 7))

    batch_size = 512
    epochs = 5
    val_split = 0.2

    nn_mdl.model.fit(X,
                     y,
                     batch_size=batch_size,
                     epochs=epochs,
                     verbose=1,
                     validation_split=val_split,
                     sample_weight=weights)

    nn_mdl.model.save(
        f'trained_models/{NAME}_{ngames}_bsize{batch_size}_epochs{epochs}')
예제 #32
0
파일: customers.py 프로젝트: corcorf/doodl
"""
Module defining customer classes
"""

import numpy as np
from numpy.random import choice
from data_processing import load_data, joe_ipmf, joe_tm

CUSTOMER_DATA = load_data()
JOE_IPMF = joe_ipmf(CUSTOMER_DATA)
JOE_TM = joe_tm(CUSTOMER_DATA)


class Customer:
    """
    Class representing a customer in the DOODL supermarket!

    Attributes:
        entry_time (datetime.datetime): the time at which the customer enters
                                        the supermarket
        initial_pmf (pandas.Series): probability mass function for the
                                     customer's initial state,
        i.e. which aisle the customer will go to first
        transition_matrix (pandas.DataFrame): transition matrix containing the
                                              probability of where the
        customer will head in the next minute, base on where they are now
        exit_state (string): the state at which the customer exits the
                             simulation
    """
    def __init__(self, number, initial_pmf, transition_matrix):
        assert np.all(initial_pmf.index.isin(transition_matrix.index))