Ejemplo n.º 1
0
    def test_compare_accuracy_against_reference(self, tarantella_framework,
                                                model_runners,
                                                micro_batch_size,
                                                number_epochs, nbatches):
        batch_size = micro_batch_size * tarantella_framework.get_size()
        nsamples = nbatches * batch_size

        tnt_model_runner, reference_model_runner = model_runners
        # reuse model with its initial weights
        tnt_model_runner.reset_weights()
        reference_model_runner.reset_weights()

        # verify that both models have identical weights
        tnt_initial_weights = tnt_model_runner.get_weights()
        reference_initial_weights = reference_model_runner.get_weights()
        util.compare_weights(tnt_initial_weights, reference_initial_weights,
                             1e-6)

        # train reference model
        (ref_train_dataset,
         ref_test_dataset) = util.load_dataset(mnist.load_mnist_dataset,
                                               train_size=nsamples,
                                               train_batch_size=batch_size,
                                               test_size=10000,
                                               test_batch_size=batch_size)
        reference_model_runner.train_model(ref_train_dataset, number_epochs)
        reference_loss_accuracy = reference_model_runner.evaluate_model(
            ref_test_dataset)

        # train Tarantella model
        (train_dataset,
         test_dataset) = util.load_dataset(mnist.load_mnist_dataset,
                                           train_size=nsamples,
                                           train_batch_size=batch_size,
                                           test_size=10000,
                                           test_batch_size=batch_size)
        tnt_model_runner.train_model(train_dataset, number_epochs)
        tnt_loss_accuracy = tnt_model_runner.evaluate_model(test_dataset)

        rank = tarantella_framework.get_rank()
        logging.getLogger().info("[Rank %d] Tarantella[loss, accuracy] = %s" %
                                 (rank, str(tnt_loss_accuracy)))
        logging.getLogger().info("[Rank %d] Reference [loss, accuracy] = %s" %
                                 (rank, str(reference_loss_accuracy)))
        assert np.isclose(tnt_loss_accuracy[0],
                          reference_loss_accuracy[0],
                          atol=1e-2)  # losses might not be identical
        assert np.isclose(tnt_loss_accuracy[1],
                          reference_loss_accuracy[1],
                          atol=1e-2)
Ejemplo n.º 2
0
def main():
    batch_size = 32  # the number of training examples in one forward/backward pass
    num_classes = 10  # number of cifar-10 dataset classes
    epochs = 3  # number of forward and backward passes of all the training examples
    '''
        dataset contains the hyper parameters for loading data and the dataset:
            dataset = {
                'batch_size': batch_size,
                'num_classes': num_classes,
                'epochs': epochs,
                'x_train': x_train,
                'x_test': x_test,
                'y_train': y_train,
                'y_test': y_test
            }
    '''
    dataset = load_dataset(batch_size, num_classes, epochs)

    num_population = 4
    num_generation = 4
    num_offspring = 2

    # plot the best model obtained
    optCNN = genetic_algorithm(num_population, num_generation, num_offspring,
                               dataset)

    # plot the training and validation loss and accuracy
    num_epoch = 3
    model = optCNN.build_model()
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    history = model.fit(dataset['x_train'],
                        dataset['y_train'],
                        batch_size=dataset['batch_size'],
                        epochs=num_epoch,
                        validation_data=(dataset['x_test'], dataset['y_test']),
                        shuffle=True)
    optCNN.model = model  # model
    optCNN.fitness = history.history['val_loss'][-1]  # fitness

    print("\n\n-------------------------------------")
    print("The initial CNN has been evolved successfully in the individual",
          optCNN.name)
    print("-------------------------------------\n")
    daddy = load_network('parent_0')
    model = tf.keras.models.load_model('parent_0.h5')
    print("\n\n-------------------------------------")
    print("Summary of initial CNN")
    print(model.summary())
    print("Fitness of initial CNN:", daddy.fitness)

    print("\n\n-------------------------------------")
    print("Summary of evolved individual")
    print(optCNN.model.summary())
    print("Fitness of the evolved individual:", optCNN.fitness)
    print("-------------------------------------\n")

    plot_training(history)
Ejemplo n.º 3
0
def preprocessing():
    df = load_dataset('ionosphere_csv.csv')  # load data
    X = (df.drop(['class'], 1))
    X = (X - X.min()) / (X.max() - X.min())  # standardize data
    X = X.replace(np.NaN, 0)
    y = df['class'].transform(lambda x: 1
                              if x is 'g' else 0)  # one hot encode target
    return X, y
Ejemplo n.º 4
0
def preprocessing():
    categorical_columns = ['Orientation', 'Glazing Area Distribution']
    categories = {1: 'uniform', 2: 'north', 3: 'east', 4: 'south', 5: 'west'}
    target = 'Heating Load'
    df = load_dataset('EnergyEfficiency_data.csv')  # load data
    df = one_hot_encode(df, categorical_columns,
                        categories)  # one hot encode categorical columns
    df = df.drop('Glazing Area Distribution_0', 1)
    df = (df - df.min()) / (df.max() - df.min())  # standardize data
    X = df.drop(target, 1)  # split data into training and test
    y = df[target]
    return X, y
Ejemplo n.º 5
0
 def test_metrics_names_after_fit(self):
     tnt_model = tnt.Model(mnist.lenet5_model_generator())
     tnt_model.compile(optimizer=tf.keras.optimizers.Adam(),
                       loss="sparse_categorical_crossentropy",
                       metrics=["sparse_categorical_accuracy"])
     train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset,
                                             train_size=24,
                                             train_batch_size=24)
     tnt_model.fit(train_dataset)
     assert tnt_model.metrics_names == [
         "loss", "sparse_categorical_accuracy"
     ]
Ejemplo n.º 6
0
def train_val_dataset_generator():
    micro_batch_size = 64
    nbatches = 1
    batch_size = micro_batch_size * tnt.get_size()
    nsamples = nbatches * batch_size
    train_dataset, val_dataset, _ = util.load_dataset(
        mnist.load_mnist_dataset,
        train_size=nsamples,
        train_batch_size=batch_size,
        val_size=nsamples,
        val_batch_size=batch_size)
    return train_dataset, val_dataset
Ejemplo n.º 7
0
    def test_reset_metrics(self):
        tnt_model = tnt.Model(mnist.lenet5_model_generator())
        tnt_model.compile(optimizer=tf.keras.optimizers.Adam(),
                          loss="sparse_categorical_crossentropy",
                          metrics=["sparse_categorical_accuracy"])
        train_dataset, _, _ = util.load_dataset(mnist.load_mnist_dataset,
                                                train_size=60,
                                                train_batch_size=60)
        tnt_model.fit(train_dataset)
        assert all(float(m.result()) != 0 for m in tnt_model.metrics)

        tnt_model.reset_metrics()
        assert all(float(m.result()) == 0 for m in tnt_model.metrics)
Ejemplo n.º 8
0
def load_reference_datasets(batch_size, num_batches, num_test_batches):
  util.set_tf_random_seed()
  train_size = num_batches * batch_size
  test_size = num_test_batches * batch_size
  train_dataset, val_dataset, test_dataset = util.load_dataset(mnist.load_mnist_dataset,
                                                               train_size = train_size,
                                                               train_batch_size = batch_size,
                                                               val_size = test_size,
                                                               val_batch_size = batch_size,
                                                               test_size = test_size,
                                                               test_batch_size = batch_size,
                                                               shuffle = True)
  return {"train" : train_dataset,
          "val"   : val_dataset,
          "test"  : test_dataset }
Ejemplo n.º 9
0
def main():
    (x_train, y_train) = utilities.load_dataset()
    if os.path.isfile('trained_model.json'):
        print("Model found, loading...")
        model_def = load_model('trained_model')
    else:
        input_shape = [384, 256, 3]
        output_shape = 3
        model_def = build_inference_graph(input_shape, utilities.hyper_param,
                                          output_shape)
    model_def.summary()
    model_def.compile(optimizer=Adam(lr=0.0001),
                      loss='mse',
                      metrics=['accuracy'])
    model_def.fit(x_train, y_train, batch_size=16, epochs=10)
    save_model(model_def, 'trained_model')
Ejemplo n.º 10
0
    def test_compare_accuracy_optimizers(self, tarantella_framework,
                                         mnist_model_runner, optimizer,
                                         micro_batch_size, nbatches):
        batch_size = micro_batch_size * tarantella_framework.get_size()
        nsamples = nbatches * batch_size
        (number_epochs, lr) = mnist.get_hyperparams(optimizer)
        (train_dataset,
         test_dataset) = util.load_dataset(mnist.load_mnist_dataset,
                                           train_size=nsamples,
                                           train_batch_size=batch_size,
                                           test_size=10000,
                                           test_batch_size=batch_size)
        mnist_model_runner.compile_model(optimizer(learning_rate=lr))
        mnist_model_runner.reset_weights()
        mnist_model_runner.train_model(train_dataset, number_epochs)

        results = mnist_model_runner.evaluate_model(test_dataset)
        util.check_accuracy_greater(results[1], 0.91)
Ejemplo n.º 11
0
def main():
    (x_train, y_train) = ut.load_dataset()
    print("Dataset loaded...")
    model_def = tm.load_model('trained_model')
    y_predicted = model_def.predict(x_train)
    np.savetxt("gt", y_train)
    np.savetxt("pred", y_predicted)
    path_input_image = '../../Dataset/GehlerShi_input/'
    path_output = '../../Dataset/Prediction/'
    file_names = []
    for i in range(1, 569):
        file_names.append('00' + ut.zero_string(4-ut.nr_digits(i)) + str(i))
    for index,file_name in enumerate(file_names):
        image_blob = Image.open(os.path.join(path_input_image, file_name+".png"))
        gt_lumminance = y_train[index]
        pred_lumminance = y_predicted[index]
        White_bal_groundtruth = to_pil(white_balance(image_blob,gt_lumminance))
        White_bal_prediction = to_pil(white_balance(image_blob,pred_lumminance))
        White_bal_groundtruth.save(os.path.join(path_output, file_name+"_gt.png"))
        White_bal_prediction.save(os.path.join(path_output, file_name+"_pred.png"))
Ejemplo n.º 12
0
    def test_compare_sgd_momentum(self, tarantella_framework,
                                  mnist_model_runner, lr, nesterov, momentum,
                                  micro_batch_size, nbatches, number_epochs):
        batch_size = micro_batch_size * tarantella_framework.get_size()
        nsamples = nbatches * batch_size
        (train_dataset,
         test_dataset) = util.load_dataset(mnist.load_mnist_dataset,
                                           train_size=nsamples,
                                           train_batch_size=batch_size,
                                           test_size=10000,
                                           test_batch_size=batch_size)
        mnist_model_runner.compile_model(
            keras.optimizers.SGD(learning_rate=lr,
                                 momentum=momentum,
                                 nesterov=nesterov))
        mnist_model_runner.reset_weights()
        mnist_model_runner.train_model(train_dataset, number_epochs)

        results = mnist_model_runner.evaluate_model(test_dataset)
        util.check_accuracy_greater(results[1], 0.91)
Ejemplo n.º 13
0
    def test_compare_weights_across_ranks(self, tarantella_framework,
                                          model_runner, micro_batch_size,
                                          nbatches, number_epochs):
        comm_size = tarantella_framework.get_size()
        batch_size = micro_batch_size * comm_size
        nsamples = nbatches * batch_size

        (train_dataset, _) = util.load_dataset(mnist.load_mnist_dataset,
                                               train_size=nsamples,
                                               train_batch_size=batch_size,
                                               test_size=0,
                                               test_batch_size=batch_size)
        model_runner.reset_weights()
        model_runner.train_model(train_dataset, number_epochs)
        final_weights = model_runner.get_weights()

        # broadcast the weights from the master rank to all the participating ranks
        model_runner.model._broadcast_weights()

        reference_rank_weights = model_runner.get_weights()
        util.compare_weights(final_weights, reference_rank_weights, 1e-6)
Ejemplo n.º 14
0
    def test_cifar_alexnet(self, tarantella_framework, cifar_model_runner,
                           optimizer, micro_batch_size, nbatches):
        batch_size = micro_batch_size * tarantella_framework.get_size()
        nsamples = nbatches * batch_size
        (number_epochs, lr) = cifar.get_hyperparams(optimizer)
        (train_dataset,
         test_dataset) = util.load_dataset(cifar.load_cifar_dataset,
                                           train_size=nsamples,
                                           train_batch_size=batch_size,
                                           test_size=10000,
                                           test_batch_size=batch_size)
        if optimizer.__name__ == 'SGD':
            cifar_model_runner.compile_model(
                optimizer(learning_rate=lr, momentum=0.9))
        else:
            cifar_model_runner.compile_model(optimizer(learning_rate=lr))

        cifar_model_runner.reset_weights()
        cifar_model_runner.train_model(train_dataset, number_epochs)

        results = cifar_model_runner.evaluate_model(test_dataset)
        util.check_accuracy_greater(results[1], 0.5)
Ejemplo n.º 15
0
def main():
    x_train_sel, x_train_hyp, y_train_hyp = ut.load_dataset()
    print(x_train_sel.shape)
    print(y_train_hyp.shape)
    #Training Hypothesis network
    if os.path.isfile('trained_model_hyp.json'):
        print("Model found, loading...")
        model_def_hyp = load_model('trained_model_hyp')
    else:
        input_shape = [44, 44, 2]
        output_shape = 2  #along two branches
        model_def_hyp = build_inference_graph(input_shape, ut.hyper_param_hyp,
                                              output_shape)
    model_def_hyp.summary()
    #Define loss for Hyp-Net
    print(x_train_hyp.shape)
    print(y_train_hyp.shape)
    model_def_hyp.compile(optimizer=Adam(lr=0.0001), loss=hyp_loss)
    model_def_hyp.fit(x_train_hyp, y_train_hyp, batch_size=1, epochs=10)
    save_model(model_def_hyp, 'trained_model_hyp')
    #We need inference output of hypnet to train selnet
    y_train_sel_a, y_train_sel_b = model_def_hyp.predict(x_train_hyp)
    y_train_sel = prepare_sel_data(y_train_sel_a, y_train_sel_b, y_train_hyp)
    #Training Selection network
    if os.path.isfile('trained_model_sel.json'):
        print("Model found, loading...")
        model_def_hyp = load_model('trained_model_sel')
    else:
        input_shape = [44, 44, 2]
        output_shape = 2
        model_def_sel = build_inference_graph(input_shape, ut.hyper_param_sel,
                                              output_shape)
    model_def_sel.summary()
    model_def_sel.compile(optimizer=Adam(lr=0.0001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])
    model_def_sel.fit(x_train, y_train_sel, batch_size=1, epochs=10)
    save_model(model_def_sel, 'trained_model_sel')
Ejemplo n.º 16
0
    data[columns] = data[columns].apply(lambda x: (x - x.min()) /
                                        (x.max() - x.min()))

    # Discretizzazione
    for column in columns:
        data[column] = pd.cut(data[column], m, labels=False)

    data['classes'] = classi

    return data


if __name__ == "__main__":
    #Load original dataset
    print("Uploading dataset..")
    dataset = util.load_dataset()
    print(dataset)

    #Extract features from the dataset
    print("Extracting useful features from the dataset..")
    dataset = features_extraction(dataset)
    print('Dataset with extracted features')
    print(dataset[:100])

    #Sample dataset
    print("Sampling dataset...")
    dataset_sampled = sample_dataset(dataset)
    print(dataset_sampled)

    #Save dataframe as pickle
    print('Saving dataset_sampled to pickle object...')
Ejemplo n.º 17
0
                           weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv4')
        conv4 = utils.lrelu(conv4)
        conv5 = tcl.conv2d(conv4, 1, 4, 1, activation_fn=tf.identity,
                           weights_initializer=tf.random_normal_initializer(stddev=0.02), scope='d_conv5')
        return conv5



# training parameters
batch_size = cfg.batch_size
lr = 0.0002
train_epoch = 2
rotate = False

#load dataset
train_set, train_setY, val_set, val_setY, test_set, test_setY = utils.load_dataset(name='MNIST')

# variables : input
x = tf.placeholder(tf.float32, shape=(cfg.batch_size, 28, 28, 1))
z = tf.placeholder(tf.float32, shape=(cfg.batch_size, 1, 1, 100))
isTrain = tf.placeholder(dtype=tf.bool)
fixed_z_ = np.random.normal(0, 1, (cfg.batch_size, 1, 1, 100))

# networks : generator
G_z = generator(z, isTrain)
flatG_z = tf.reshape(G_z, [batch_size, -1])

# networks : discriminator
D_real = discriminator(x, isTrain)
D_fake = discriminator(G_z, isTrain, reuse=True)
# get chromosome info
chr_lst = get_chr_info(vp_info['genome'], property='chr_name')
chr_size = get_chr_info(vp_info['genome'], property='chr_size')
n_chr = len(chr_lst)

# load RE positions
print('Loading 1st ResEnz: {:s}'.format(vp_info['res_enzyme']))
re1_pos_lst = get_re_info(re_name=vp_info['res_enzyme'], property='pos', genome=vp_info['genome'])
if ('second_cutter' in vp_info) and (isinstance(vp_info['second_cutter'], str)):
    print('Loading 2nd ResEnz: {:s}'.format(vp_info['second_cutter']))
    re2_pos_lst = get_re_info(re_name=vp_info['second_cutter'], property='pos', genome=vp_info['genome'])
else:
    re2_pos_lst = [np.empty(0, dtype=int)] * n_chr

# load data
data_pd = load_dataset(vp_info, target_field='frg_np', verbose=True, data_path=inp_args.dataset_dir)
data = data_pd[['chr', 'pos', '#read']].values.astype('int32')
del data_pd
vp_info['#rd_all'] = np.sum(data[:, 2])

# Downsampling, if requested
if inp_args.downsample is not None:
    # TODO: Adding other types of downsampling, such as #captures
    assert inp_args.downsample[:4] == 'nmap'
    n_map = int(float(inp_args.downsample[4:]))
    print('Downsampling #mapped: From {:,d} mapped fragment to {:0,.0f} fragment.'.format(np.sum(data[:, 2]), n_map))
    idx_set = np.repeat(np.arange(data.shape[0]), data[:, 2])
    ds_set = np.random.choice(idx_set, size=n_map, replace=False)
    # Note: Here, we are only downsampling covered restriction fragments, empty restriction fragments are not selected
    del idx_set
    rf_uid, rf_frq = np.unique(ds_set, return_counts=True)
def train(epochs=100,
          batch_size=512,
          validation_split=0.1,
          drop_probability=0.5,
          extra_training=True,
          save=False):
    # These take some time to load, and need at least 650 MB of memory
    print("Loading dataset...")
    Y, features = load_dataset()

    print("Saving item list...")
    with open("anime_list.txt", 'w') as f:
        for anime in list(Y.columns):
            f.write(anime + "\n")

    print("Creating feature and target data...")
    # This mask drops (with 50% probability) values across our dataset
    mask = np.random.randint(1 // drop_probability + 1, size=Y.shape)
    X = Y * mask
    # Convert user's item-ratings to user's features
    X = X @ features
    # normalizea
    X = X.apply(lambda x: x / x.max(), axis=1)

    print("Defining model...")
    ## Model definition and training
    inp = Input(shape=(X.shape[1], ))
    x = Dense(64, activation='relu')(inp)
    x = Dense(128, activation='relu')(x)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(Y.shape[1], activation='linear')(x)
    model = Model(inp, out)
    print(model.summary())
    model.compile(SGD(lr=0.01, momentum=0.9, decay=1e-2), loss='mse')

    print("Training model...")
    h = model.fit(X,
                  Y,
                  batch_size,
                  epochs=epochs,
                  validation_split=validation_split)

    plt.figure(figsize=(12, 8))
    plt.plot(np.arange(0, 100),
             h.history['loss'],
             label="train_loss",
             color='blue')
    plt.plot(np.arange(0, 100),
             h.history['val_loss'],
             label='val_loss',
             color='orange')

    if extra_training:
        print("Extra model training...")
        model.compile(SGD(lr=0.001, momentum=0.9, decay=1e-3), loss='mse')
        h2 = model.fit(X,
                       Y,
                       batch_size // 2,
                       epochs=epochs // 5,
                       validation_split=validation_split)

        plt.plot(np.arange(100, 120), h2.history['loss'], color='blue')
        plt.plot(np.arange(100, 120), h2.history['val_loss'], color='orange')

    plt.legend()
    plt.show()

    if save:
        print("Saving model...")
        model.save('weights/' + save)
Ejemplo n.º 20
0
# Package imports
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from utilities import load_dataset


##---------------------------------------------------------
# PREPARE DATASET
##---------------------------------------------------------

# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()

# Reshape the training and test examples
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

# Standardize dataset
train_set_x = train_set_x_flatten / 255.
test_set_x = test_set_x_flatten / 255.


##---------------------------------------------------------
# SETUP LINEAR REGRESSION ALGORITHM
##---------------------------------------------------------

# Compute the sigmoid function of z
# Arguments:    z -- A scalar or numpy array of any size.
# Return:       s -- sigmoid(z)
Ejemplo n.º 21
0
from cnn_model import CNNModel
from utilities import load_dataset

parser = argparse.ArgumentParser()
parser.add_argument("--use_evaluation_dataset",
                    help="use evaluation dataset",
                    action="store_true")
args = parser.parse_args()

if args.use_evaluation_dataset:
    test_dataset_path = "/tmp/deers_and_trucks_evaluation"
else:
    test_dataset_path = "data/deers_and_trucks_test"

# Load the dataset.
images_test, cls_test = load_dataset(test_dataset_path)
n_classes = 2
cls_names = ["deers", "trucks"]

# Encode the labels as one hot.
cls_test_one_hot_encoded = np.eye(n_classes, dtype=float)[cls_test]

# Create a convolutional neural network.
model = CNNModel(is_training=False)

# Load the saved model.
model.load("model/")

# Create a dictionary for evaluating the network on the full validation data.
testing_dict = model.make_dictionary(images_test, cls_test_one_hot_encoded)
Ejemplo n.º 22
0
def train_on_unified_dataset():
    # Name of the classifier to use in learning process and its path of serialization
    classifierType = "svm"
    modelFilePath = "models/" + classifierType + ".pickle"

    # Defining categories of the classes to use in model
    categories = {'Positive': 'pos', 'Negative': 'neg'}

    counter = 0

    # -------------------------------------------------------------------------------------------------------------------
    # Loading the data-set. The data-set is loaded as a dictionary with each
    # element contains the content of the example file
    dataset_labels, dataset = load_dataset('datasets/Twitter')

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels2, dataset2, counter = csv_dict_list("datasets/ATT.csv",
                                                       counter)
    dataset_labels += dataset_labels2
    dataset.update(dataset2)

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels3, dataset3, counter = csv_dict_list("datasets/HTL.csv",
                                                       counter)
    dataset_labels += dataset_labels3
    dataset.update(dataset3)

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels4, dataset4, counter = csv_dict_list("datasets/MOV.csv",
                                                       counter)
    dataset_labels += dataset_labels4
    dataset.update(dataset4)

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels5, dataset5, counter = csv_dict_list("datasets/PROD.csv",
                                                       counter)
    dataset_labels += dataset_labels5
    dataset.update(dataset5)

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels6, dataset6, counter = csv_dict_list("datasets/RES.csv",
                                                       counter)
    dataset_labels += dataset_labels6
    dataset.update(dataset6)

    # Preprocessing of data-set
    dataset = preprocessing(dataset)

    # feature extraction, tf-idf transformation
    count_vect, X_train_tfidf, tfidf_transformer = tf_idf_features(dataset)

    # an object from sentiment analysis module to use in training and testing
    sent_anal = sentiment_analysis()

    # train a classifier
    print('Training a classifier is in progress ...')
    classifier = sent_anal.sentiment_analysis_train(X_train_tfidf,
                                                    dataset_labels,
                                                    classifierType,
                                                    modelFilePath)

    # Cross validation
    build_pipeline(dataset, dataset_labels, 'accuracy')

    print('Training done')
    print('-------------------------------------------------------------')
    # loading the classifier. Un-commit if a classifier already exists
    classifier = readSerializedClassifier(modelFilePath)

    # testing a new input example
    print('Testing a new data example ...')
    input_text = ['الحياة صعبة شباب']

    filtered_input_text = list()
    filtered_input_text.append(preprocessing(''.join(input_text)))

    sent_anal.sentiment_analysis_test(filtered_input_text, classifier,
                                      count_vect, X_train_tfidf,
                                      tfidf_transformer, categories)
Ejemplo n.º 23
0
# This exercise has been inspired by Magnus Erik Hvass Pedersen's tutorial on CNN: https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/02_Convolutional_Neural_Network.ipynb

import argparse
import numpy as np
import time

from datetime import timedelta

from batchmaker import Batchmaker
from cnn_model import CNNModel
from utilities import load_dataset, plot_images

DATASET_PATH = "data/deers_and_trucks"

# Load the dataset.
images_train, cls_train = load_dataset(DATASET_PATH)
n_classes = 2
cls_names = ["deers", "trucks"]

# Plot a few samples if not disabled.
parser = argparse.ArgumentParser()
parser.add_argument("--disable_visualization",
                    help="disable image visualization",
                    action="store_true")
args = parser.parse_args()
if not (args.disable_visualization):
    plot_images(images_train[0:9], np.asarray(cls_names)[cls_train[0:9]])

# Encode the labels as one hot.
cls_train_one_hot_encoded = np.eye(n_classes, dtype=float)[cls_train]
Ejemplo n.º 24
0
# The main method of the script. The script train and test the classifier on a new input data
# -----------------------------------------------------------------------------------------------------------------------
if __name__ == "__main__":
    # Name of the classifier to use in learning process and its path of serialization
    classifierType = "svm"
    modelFilePath = "models/" + classifierType + ".pickle"

    # Defining categories of the classes to use in model
    categories = {'Positive': 'pos', 'Negative': 'neg'}

    counter = 0

    # -------------------------------------------------------------------------------------------------------------------
    # Loading the data-set. The data-set is loaded as a dictionary with each
    # element contains the content of the example file
    dataset_labels, dataset = load_dataset('datasets/Twitter')

    # ------------------------------------------------------------------------------------------------------------------
    # Calls the csv_dict_list function, passing the named csv
    dataset_labels2, dataset2, counter = csv_dict_list("datasets/ATT.csv",
                                                       counter)
    dataset_labels += dataset_labels2
    dataset.update(dataset2)

    # Preprocessing of data-set
    dataset = preprocessing(dataset)

    # feature extraction, tf-idf transformation
    count_vect, X_train_tfidf, tfidf_transformer = tf_idf_features(dataset)

    # an object from sentiment analysis module to use in training and testing