예제 #1
0
def evaluate_autoencoder(model_dir, test_data_dir):
    weights_file = os.path.join(model_dir, "autoencoder_weights.hdf5")
    json_file = os.path.join(model_dir, "autoencoder_model.json")

    with open(json_file, 'r') as f:
        json_string = f.read()
    autoencoder = model_from_json(json_string)
    autoencoder.load_weights(weights_file)

    originaldata = load_data(test_data_dir)
    data = originaldata.astype(np.float32) / 255
    data = np.reshape(data, (data.shape[0], ) + data.shape[2:])

    decoded = (autoencoder.predict(data) * 255).astype(np.uint8)
    # decoded = decoded.reshape(decoded, (decoded.shape[0], 48,64))

    gs = gridspec.GridSpec(1, 2)
    for i in range(20):
        image = decoded[i, :, :, 0]
        plt.subplot(gs[0])
        plt.imshow(image, interpolation='none')
        plt.subplot(gs[1])
        plt.imshow(originaldata[i, 0, :, :, 0])

        plt.savefig(os.path.join(model_dir, 'plot_' + str(i) + '.png'))
        plt.clf()
예제 #2
0
    def set_train_data(self, raw=False):
        """
		sets train_data attribute to ADL data correpsonding to the dataset for this experiment
		"""

        split_by_vid_or_class = 'Split_by_class'
        vid_class = 'NonFall'

        data = load_data(split_by_vid_or_class = split_by_vid_or_class, raw = raw,\
         img_width = self.img_width, img_height = self.img_height, vid_class = vid_class, dset = self.dset)

        self.train_data = data
예제 #3
0
def train_autoencoder(trainingData, outDir):
    # Load training data
    print("Loading and normalizing data...")
    data = load_data(trainingData)
    data = np.reshape(data, (data.shape[0], ) + data.shape[2:])
    data = data.astype(np.float32) / 255
    print("Finished loading data!!")

    # Initialize and train model
    print("Initlializing model...")
    #data_format = backend.image_data_format()
    input = Input(shape=(48, 64, 1))
    autoencoder_layers = Conv2D(3, 3, padding='same', activation='relu')(input)
    autoencoder_layers = MaxPooling2D()(autoencoder_layers)
    autoencoder_layers = Conv2D(5, 3, padding='same',
                                activation='relu')(autoencoder_layers)
    autoencoder_layers = MaxPooling2D()(autoencoder_layers)
    autoencoder_layers = UpSampling2D()(autoencoder_layers)
    autoencoder_layers = Conv2D(3, 3, padding='same',
                                activation='relu')(autoencoder_layers)
    autoencoder_layers = UpSampling2D()(autoencoder_layers)
    autoencoder_layers = Conv2D(1, 3, padding='same',
                                activation='sigmoid')(autoencoder_layers)
    autoencoder = Model(inputs=input, outputs=autoencoder_layers)
    autoencoder.compile(optimizer='sgd', loss='binary_crossentropy')
    print("Done!")

    batch_size = 256
    nb_epoch = 100

    print("Training model...")
    weights_file = os.path.join(outDir, "autoencoder_weights.hdf5")
    json_file = os.path.join(outDir, "autoencoder_model.json")
    tbLogPath = os.path.join(outDir, "tensorboard_logs")
    callbacks = [
        ModelCheckpoint(filepath=weights_file,
                        monitor='val_loss',
                        save_best_only=True),
        TensorBoard(log_dir=tbLogPath,
                    histogram_freq=nb_epoch / 10,
                    batch_size=batch_size)
    ]
    autoencoder.fit(data,
                    data,
                    batch_size,
                    nb_epoch,
                    verbose=2,
                    callbacks=callbacks,
                    validation_split=0.1)
    json_string = autoencoder.to_json()
    with open(json_file, "w") as f:
        f.write(json_string)
    print("Done!")
예제 #4
0
def train(config):
    # Load training data
    print("Loading and normalizing data...")
    training_data = os.path.join(config.data_dir, "train")
    data_type = np.uint8 if config.model_type != ModelType.STATE_VECTOR else np.float32
    data = load_data(training_data,
                     num_of_samples=config.sequences,
                     dtype=data_type)
    data = normalize_data(data, config.model_type)
    print("Finished loading data!!")

    input_shape = data.shape[2:]

    # Initialize and train model
    print("Initilalizing model...")
    factory = PredNetModelBuilder(config)
    factory.set_input_shape(input_shape)
    factory.set_tensorboard_verbosity(write_grads=True, write_images=True)
    #factory.add_pretrained_autoencoder("C:\\Users\\Alberto\\Projects\\Keras_models\\autoencoder_bb\\autoencoder_model.json",
    #                                   "C:\\Users\\Alberto\\Projects\\Keras_models\\autoencoder_bb\\autoencoder_weights.hdf5")
    factory.trainable_autoencoder = False
    model, callbacks = factory.build_model(config.model_type)
    print("Done!")

    print("Training model...")
    y = np.zeros((data.shape[0], 1), np.float32)
    model.fit(data,
              y,
              config.batch_size,
              config.epochs,
              verbose=2,
              callbacks=callbacks,
              validation_split=0.1)
    print("Done!")

    print("Saving model...")
    json_string = model.to_json()
    if not os.path.isdir(config.model_dir):
        os.mkdir(config.model_dir)
    json_file = os.path.join(config.model_dir, "prednet_model.json")
    with open(json_file, "w") as f:
        f.write(json_string)
    print("Done!")
예제 #5
0
파일: MLP.py 프로젝트: akbari59/CSCE670_NCF
    np.save('MLP_WE/mlp_2_weights',
            pretrain_model.get_layer('mlp_2').get_weights())
    np.save('MLP_WE/mlp_3_weights',
            pretrain_model.get_layer('mlp_3').get_weights())
    np.save('MLP_WE/mlp_user_embed_weights',
            pretrain_model.get_layer('MLP_user_embed').get_weights())
    np.save('MLP_WE/mlp_item_embed_weights',
            pretrain_model.get_layer('MLP_item_embed').get_weights())

    hit_rate_accuracy = evaluation.evaluate_integer_input(
        'input/testing_data.npy', pretrain_model, 'hit_rate',
        'input/int_mat.npy')
    print('MLP produces accuracy rate of: ' + str(hit_rate_accuracy))


if __name__ == '__main__':
    try:
        interaction_mx = np.load('input/int_mat.npy')
    except IOError:
        data_management.load_data()
        interaction_mx = np.load('input/int_mat.npy')
    inputs, labels = data_management.training_data_generation(
        'input/training_data.npy', 'input/int_mat.npy', 5)
    data_management.load_data(file_path='../data/movielens/ratings.dat')
    train_mlp(num_predictive_factors=8,
              batch_size=256,
              epochs=2,
              interaction_mx=interaction_mx,
              inputs=inputs,
              labels=labels)
예제 #6
0
        num_predictive_factors = int(arg)
        print "Number of predictive factors is " + str(num_predictive_factors)
    elif opt in ("-b", "--bsize"):
        batch_size = int(arg)
        print "Batch size is " + str(batch_size)
    elif opt in ("-e", "--epoch"):

        num_pretrain_epochs = int(arg)
        print "number of training epochs for pretrain and full model is " + str(num_pretrain_epochs)


num_final_epochs = num_pretrain_epochs



data_management.load_data()
interaction_mx = np.load('input/int_mat.npy')
inputs, labels = data_management.training_data_generation('input/training_data.npy', 'input/int_mat.npy')
labels = keras.utils.to_categorical(labels, 6)
# pretrain MLP
MLP.train_mlp(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs,
              interaction_mx=interaction_mx, inputs=inputs, labels=labels)
# pretrain GMF
GMF.train_gmf(num_predictive_factors=num_predictive_factors, batch_size=batch_size, epochs=num_pretrain_epochs,
              interaction_mx=interaction_mx, inputs=inputs, labels=labels)

# check out the shared vision guide at https://keras.io/getting-started/functional-api-guide/
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

    sequences = tokenizer.texts_to_sequences(articles)

    # padding the sequences
    padded = pad_sequences(sequences,
                           maxlen=max_length,
                           padding=padding_type,
                           truncating=trunc_type)

    return padded


if __name__ == "__main__":

    # load data
    my_articles, my_labels = load_data(data_path=DATA_PATH, file_name=FILENAME)

    # clearn the data
    my_articles = clean_articles(articles=my_articles, stopwords=STOPWORDS)

    # split articles
    train_articles, validation_articles, test_articles = split_articles(
        articles=my_articles, train_portion=TRAIN_PORTION, test_size=TEST_SIZE)

    # split labels
    train_labels, validation_labels, test_labels = split_labels(
        labels=my_labels, train_portion=TRAIN_PORTION, test_size=TEST_SIZE)

    print("Train Articles:", len(train_articles))
    print("validation Articles:", len(validation_articles))
    print("Test Articles:", len(test_articles))
예제 #8
0
__author__ = 'Rex'

# usage:
#   python inspect_blockdata [height]

import sys
sys.path.append('..')

import data_management as dm

if __name__ == "__main__":
    try:
        height = int(sys.argv[1])
        exchange = dm.load_data(str(height), dm.BLOCKS_FOLDER)
    except:
        exchange = dm.pop_exchange()

    print exchange.exchange
예제 #9
0
def evaluate(config):
    if config.model_type == ModelType.PREDNET or config.model_type == ModelType.SINGLE_PIXEL_ACTIVATION:
        PredNet = PredNetVanilla
    elif config.model_type == ModelType.CONV_PREDNET:
        PredNet = ConvPredNet
    elif config.model_type == ModelType.AMPLIF_ERROR:
        PredNet = AmplifiedErrorPredNet
    elif config.model_type == ModelType.STATE_VECTOR:
        PredNet = RNN
    elif config.model_type == ModelType.CONCAT_PREDNET:
        PredNet = ConcatPredNet

    weights_file = os.path.join(results_folder,
                                str(config) + "\\prednet_weights.hdf5")
    json_file = os.path.join(results_folder,
                             str(config) + "\\prednet_model.json")

    batch_size = 8

    # load test data
    test_set_dir = os.path.join(config.data_dir, "test")
    data_type = 'uint8' if config.model_type != ModelType.STATE_VECTOR else 'float32'
    data = load_data(test_set_dir, dtype=data_type)
    X_test = normalize_data(data, config.model_type)
    nt = X_test.shape[1]

    # Load trained model
    with open(json_file, 'r') as f:
        json_string = f.read()

    # add custom layers definitions
    if config.model_type != ModelType.STATE_VECTOR and config.model_type != ModelType.CONCAT_PREDNET:
        custom_objects = {PredNet.__name__: PredNet}
    elif config.model_type == ModelType.STATE_VECTOR:
        custom_objects = {
            StateVectorPredNetCell.__name__: StateVectorPredNetCell
        }
    elif config.model_type == ModelType.CONCAT_PREDNET:
        custom_objects = {
            PredNetVanilla.__name__: PredNetVanilla,
            ConcatPredNet.__name__: ConcatPredNet
        }

    model = model_from_json(json_string, custom_objects=custom_objects)
    model.load_weights(weights_file)

    # Create testing model (to output predictions)
    layer_config = model.layers[1].get_config()
    if config.model_type == ModelType.STATE_VECTOR:
        cell_config = layer_config['cell']['config']
        cell_config['output_mode'] = 'prediction'
        layer_config['cell'] = StateVectorPredNetCell(**cell_config)
        test_prednet = PredNet(weights=model.layers[1].get_weights(),
                               **layer_config)
    else:
        layer_config['output_mode'] = 'prediction'
        test_prednet = PredNet(weights=model.layers[1].get_weights(),
                               **layer_config)
        test_prednet.extrap_start_time = config.infer_pred_start
        test_prednet.extrap_end_time = config.infer_pred_end
    input_shape = list(model.layers[0].batch_input_shape[1:])
    input_shape[0] = nt
    inputs = Input(shape=tuple(input_shape))
    predictions = test_prednet(inputs)
    test_model = Model(inputs=inputs, outputs=predictions)

    # make predictions
    X_hat = test_model.predict(X_test, batch_size)

    if config.model_type != ModelType.STATE_VECTOR:
        data_format = layer_config[
            'data_format'] if 'data_format' in layer_config else layer_config[
                'dim_ordering']
        if data_format == 'channels_first':
            X_test = np.transpose(X_test, (0, 1, 3, 4, 2))
            X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2))
        X_test = (X_test * 255).astype(np.uint8)
        X_hat = (X_hat * 255).astype(np.uint8)
    else:
        X_test, X_hat = image_from_state_vector(
            X_test), image_from_state_vector(X_hat)

    # Compare MSE of PredNet predictions vs. using last frame.  Write results to prediction_scores.txt
    scores_file = os.path.join(results_folder,
                               str(config) + '\\prediction_scores.txt')
    mse_model, mse_prev = compute_and_save_mse(X_test, X_hat, scores_file)

    plots_dir = os.path.join(results_folder,
                             str(config) + '\\prediction_plots\\')
    plot_predictions(X_test, X_hat, plots_dir, config)
예제 #10
0

def __signature__project(dsv, seed, perplexity):
    return hash(dsv[:5].tostring()), seed, perplexity


def project(dsv, seed=1, perplexity=30):
    for item in project.cache.items():
        if item[1][0].size == dsv.shape[0]:
            return item[1]
    temp = TSNE(2, perplexity, random_state=seed).fit_transform(dsv)
    project.cache[signature] = (temp.T[0], temp.T[1])
    return project.cache[signature]


project.cache = load_data("projection_cache")


def plot_clusters(projection,
                  clustering):  # cluster = None for simply plotting projection
    try:
        clustering = clustering.labels_
    except AttributeError:
        pass
    plt.scatter(*projection, c=clustering)
    plt.show()


def complement_customers(df, a_clusterings, b_clusterings):
    groups = split_to_groups(df)
    try: