Beispiel #1
0
    def process_dataset(self):

        if self.dataset:
            logging.debug("Processing dataset.")
            X, Y = preprocess.read_dataset(self.dataset, balanced=True)
            print X.shape
            Y = to_categorical(Y)
            logging.debug("X example: %s\ny example: %s" % (X[0], Y[0]))
            X_train, X_val, X_test, y_train, y_val, y_test = preprocess.split_dataset(
                X, Y)
            self.num_steps = X.shape[1]
        elif self.train_path:
            X_train, y_train = preprocess.read_set(self.train_path)
            X_train, X_val, y_train, y_val = preprocess.split_dataset(
                X_train, y_train, test_size=0.2, validation=False)
            X_test, y_test = preprocess.read_set(self.test_path)
            self.num_steps = X_train.shape[1]

        return X_train, X_val, X_test, y_train, y_val, y_test
Beispiel #2
0
 def train(self, app, ui, window):
     if not self.x.size or not self.y.size:
         QtWidgets.QMessageBox.critical(window, "Error",
                                        "No training data loaded")
     else:
         x_training, x_validation, y_training, y_validation = pp.split_dataset(
             self.x, self.y, self.data_split)
         ui.netOutput.setText("Training dataset size: {}".format(
             x_training.shape[0]))
         ui.netOutput.append("Validation dataset size: {}".format(
             x_validation.shape[0]))
         ui.netOutput.append("")
         self.model.train(x_training, y_training, x_validation,
                          y_validation, self.learning_rate, self.epochs,
                          self.batch_size, app, ui)
Beispiel #3
0
    LEARNING_RATE = 1e-3  #1e-3

    USE_CUDA = True
    PRINT_INTERVAL = 100
    LOG_PATH = './logs/log.pkl'
    MODEL_PATH = './checkpoints/'
    COMPARE_PATH = './comparisons/'

    use_cuda = USE_CUDA and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using device', device)
    print('num cpus:', multiprocessing.cpu_count())
    print(torch.cuda.is_available())

    # training code
    train_ids, test_ids = prep.split_dataset()
    print('num train_images:', len(train_ids))
    print('num test_images:', len(test_ids))

    data_train = prep.ImageDiskLoader(train_ids)
    data_test = prep.ImageDiskLoader(test_ids)
    print(data_train)

    kwargs = {
        'num_workers': multiprocessing.cpu_count(),
        'pin_memory': True
    } if use_cuda else {}

    #train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True, **kwargs)
    train_loader = torch.utils.data.DataLoader(x_train_new,
                                               batch_size=BATCH_SIZE,
Beispiel #4
0
input_size = 1
hidden_size = 128
num_layers = 1
batch_size = 128

num_classes = 5
out = "./result"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

root_dir = './data'

if __name__ == "__main__":
    train_dataset_dict, test_dataset_dict = load_ECG_dataset(root_dir)
    train_dataset, validation_dataset = split_dataset(train_dataset_dict,
                                                      val_num=100,
                                                      seed=0)

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(validation_dataset,
                                                 batch_size=16,
                                                 shuffle=False)

    dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

    model = LSTM(num_classes, input_size, hidden_size, num_layers, device)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss().to(device)
Beispiel #5
0
 def process_dataset(self):
     X, Y = preprocess.read_dataset(self.dataset, balanced=True)
     return preprocess.split_dataset(X, Y, validation=False)
Beispiel #6
0
# Load dictionaries
#index_to_word= load_obj("data/index_to_word")
#word_to_index = load_obj("data/word_to_index")

FNAME = "../data/spam.csv"  #training set file
MIN_COUNT = 5
SEQ_LENGTH = 200
SPLIT_FRAC = 0.8
BATCH_SIZE = 32

# Get matrix of features.
word_to_index, index_to_word, matrix, labels = csv_to_matrix(
    fname=FNAME, min_count=MIN_COUNT, seq_length=SEQ_LENGTH)

# Get loaders.
train_loader, valid_loader, test_loader, tensor_train, tensor_validation, tensor_test = split_dataset(
    matrix, labels, split_frac=SPLIT_FRAC, batch_size=BATCH_SIZE)

# MODEL PARAMETERS
# Instantiate the model w/ hyperparams
vocab_size = len(index_to_word)
output_size = cfg['output_size']
embedding_dim = cfg['embedding_dim']  #200
hidden_dim = cfg['hidden_dim']  #128
n_layers = cfg['n_layers']

DEVICE = th.device('cuda' if th.cuda.is_available() else 'cpu')

# First checking if GPU is available
train_on_gpu = th.cuda.is_available()
print("Train on GPU", train_on_gpu)
Beispiel #7
0
import pandas as pd

import load_dataset as load
import preprocess
from categorizing_model import CModel
import plotting

# load dataset
FILENAME = 'Dataset.txt'
dataset = load.load_dataset(FILENAME)

# tokenize titles
tensor, vocab_size = preprocess.tokenize(dataset['titles'].to_list())

# split and shuffle dataset
X_train, X_test, Y_train, Y_test = preprocess.split_dataset(
    tensor, dataset['labels'].to_list())

# make dataset pairs (title, labels)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

# split dataset to batches
BATCH_SIZE = 64
train_dataset = train_dataset.batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

HIDDEN_UNITS = 64
FINAL_OUTPUT_UNITS = 6
DROPOUT_RATE = 0.4
DENSE_LAYERS = 1
L2_RATE = 0.01