def RNN_data_generator(corpus, seq_size, batch_size, dict_len, skip_step): ''' generate input x and y from the whole corpus poem = whole corpus mapped into integer index in order seq_size = number of consecutive characters from corpus that forms a sequence batch_size = number of sequences to train over dict_len = length of dictionary built from the corups skip_step = gap between consecutive sequences ''' x = np.zeros((batch_size, seq_size, dict_len)) y = np.zeros((batch_size, dict_len)) corpus_len = len(corpus) idx_start = 0 idx_end = 0 for idx in range(batch_size): idx_start = idx_start % corpus_len idx_end = idx_start + seq_size if idx_end+1 >= corpus_len: print('redundant sequences, choose smaller batch_size') idx_start = int(round(skip_step/2.0)) idx_end = idx_start + seq_size tmp_x = corpus[idx_start:idx_end] x[idx, :, :] = to_categorical(tmp_x, num_classes=dict_len) tmp_y = corpus[idx_end] y[idx, :] = to_categorical(tmp_y, num_classes=dict_len) idx_start += skip_step return x, y
def next_batch(self, batch_size, shuffle = True): """Return the next `batch_size` examples from this data set.""" # shuffle for the first epoch start = self._index_in_epoch if self._epochs_completed == 0 and start == 0 and shuffle: self._shuffle_images_and_labels() if start + batch_size > self._num_examples: # retrieve the rest of the examples that does not add up to a full batch size self._epochs_completed += 1 rest_num_examples = self._num_examples - start rest_images = self._images[start:self._num_examples] rest_labels = self._labels[start:self._num_examples] if shuffle: self._shuffle_images_and_labels() # complete the batch size from the next epoch start = 0 self._index_in_epoch = batch_size - rest_num_examples end = self._index_in_epoch new_images = self._images[start:end] new_labels = self._labels[start:end] images = np.concatenate((rest_images, new_images), axis = 0) labels = np.concatenate((rest_labels, new_labels), axis = 0) return images, to_categorical(labels, self.num_classes) else: self._index_in_epoch += batch_size end = self._index_in_epoch return (self._images[start:end], to_categorical(self._labels[start:end], self.num_classes))
def load_data2(path): print("[INFO] loading images...") data = [] labels = [] # grab the image paths and randomly shuffle them imagePaths = sorted(list(paths.list_images(path))) random.seed(42) random.shuffle(imagePaths) # loop over the input images for imagePath in imagePaths: # load the image, pre-process it, and store it in the data list image = cv2.imread(imagePath) image = cv2.resize(image, (norm_size, norm_size)) image = img_to_array(image) data.append(image) # extract the class label from the image path and update the # labels list label = int(imagePath.split(os.path.sep)[-2]) labels.append(label) # scale the raw pixel intensities to the range [0, 1] data = np.array(data, dtype="float") / 255.0 labels = np.array(labels) # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) # convert the labels from integers to vectors trainY = to_categorical(trainY, num_classes=CLASS_NUM) testY = to_categorical(testY, num_classes=CLASS_NUM) return trainX,trainY,testX,testY
def train(self, epochs, batch_size=128, sample_interval=50): # Load the dataset (X_train, y_train), (_, _) = mnist.load_data() # Rescale MNIST to 32x32 X_train = np.array([scipy.misc.imresize(x, [self.img_rows, self.img_cols]) for x in X_train]) # Rescale -1 to 1 X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = np.expand_dims(X_train, axis=3) y_train = y_train.reshape(-1, 1) # Adversarial ground truths valid = np.ones((batch_size, 4, 4, 1)) fake = np.zeros((batch_size, 4, 4, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Sample half batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] labels = y_train[idx] masked_imgs = self.mask_randomly(imgs) # Generate a half batch of new images gen_imgs = self.generator.predict(masked_imgs) # One-hot encoding of labels labels = to_categorical(labels, num_classes=self.num_classes+1) fake_labels = to_categorical(np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, [valid, labels]) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, fake_labels]) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- # Train the generator g_loss = self.combined.train_on_batch(masked_imgs, valid) # Plot the progress print ("%d [D loss: %f, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[4], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: # Select a random half batch of images idx = np.random.randint(0, X_train.shape[0], 6) imgs = X_train[idx] self.sample_images(epoch, imgs) self.save_model()
def load_miniplaces_batch(): x_train, y_train = loader_train.next_batch(10000) x_test,y_test = loader_val.next_batch(10000) print("\n OG shapes:",x_train.shape,y_train.shape,x_test.shape,y_test.shape,len(y_test),y_test[0]) y_train = to_categorical(y_train.astype('float32'),100) y_test = to_categorical(y_test.astype('float32'),100) print(len(y_test[0])) return (x_train,y_train), (x_test,y_test)
def multi_label(self): '''For many labels in a single column''' self.y_pred = to_categorical(self.y_pred, num_classes=self.classes) self.y_val = to_categorical(self.y_val, num_classes=self.classes) self.multi_class()
def train(self, epochs, batch_size=128, sample_interval=50): # Load the dataset (X_train, y_train), (_, _) = mnist.load_data() # Rescale -1 to 1 X_train = (X_train.astype(np.float32) - 127.5) / 127.5 X_train = np.expand_dims(X_train, axis=3) y_train = y_train.reshape(-1, 1) # Class weights: # To balance the difference in occurences of digit class labels. # 50% of labels that the discriminator trains on are 'fake'. # Weight = 1 / frequency cw1 = {0: 1, 1: 1} cw2 = {i: self.num_classes / half_batch for i in range(self.num_classes)} cw2[self.num_classes] = 1 / half_batch # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] # Sample noise and generate a batch of new images noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) gen_imgs = self.generator.predict(noise) # One-hot encoding of labels labels = to_categorical(y_train[idx], num_classes=self.num_classes+1) fake_labels = to_categorical(np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, [valid, labels], class_weight=[cw1, cw2]) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2]) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- g_loss = self.combined.train_on_batch(noise, validity, class_weight=[cw1, cw2]) # Plot the progress print ("%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: self.sample_images(epoch)
def load_data(self): x_train, y_train = _.load_train_set() x_test, y_test = _.load_test_set() # Some simple preprocess. self.x_train = x_train.astype('float32') / 255 self.x_test = x_test.astype('float32') / 255 # Convert class vectors to binary class matrices. self.y_train = to_categorical(y_train, self.num_classes) self.y_test = to_categorical(y_test, self.num_classes) print('Data load complete.')
def load_mnist(): # the data, shuffled and split between train and test sets from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255. x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255. y_train = to_categorical(y_train.astype('float32')) y_test = to_categorical(y_test.astype('float32')) return (x_train, y_train), (x_test, y_test)
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty): X, Y = zip(*dataset) X = np.array([string_to_int(i, Tx, human_vocab) for i in X]) Y = [string_to_int(t, Ty, machine_vocab) for t in Y] Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X))) Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y))) return X, np.array(Y), Xoh, Yoh
def fit(self, x_train, y_train, x_val, y_val): self.model.summary() y_train = to_categorical(y_train, num_classes=2) y_val = to_categorical(y_val, num_classes=2) weight_file = 'hybrid_{val_acc:.3f}_loss_{loss:.3f}.hdf5' model_checkpoint = ModelCheckpoint(WEIGHT_DIR + weight_file, monitor='val_acc', verbose=1, save_best_only=True) tensorboard = TensorBoard(log_dir=LOG_DIR + 'train_log', histogram_freq=0, write_graph=True, write_images=False) self.model.fit(x_train, y_train, validation_data=(x_val, y_val), batch_size=self.batch_size, epochs=self.epochs, verbose=1, shuffle=True, callbacks=[model_checkpoint, tensorboard])
def iris(): df = pd.read_csv(base + 'iris.csv') df['species'] = df['species'].factorize()[0] df = df.sample(len(df)) y = to_categorical(df['species']) x = df.iloc[:, :-1].values y = to_categorical(df['species']) x = df.iloc[:, :-1].values return x, y
def load_cifar_10(): from keras.datasets import cifar10 num_classes = 10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255.0 x_test /= 255.0 y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) return (x_train,y_train),(x_test,y_test)
def load_mnist(normalize=False, one_hot_label=True): (x_train, y_train), (x_test, y_test) = mnist.load_data() if normalize: x_train = x_train.reshape(x_train.shape[0], 28*28).astype(np.float32)/255.0 x_test = x_test.reshape(x_test.shape[0], 28*28).astype(np.float32)/255.0 if one_hot_label: y_train = to_categorical(y_train, num_classes=10) y_test = to_categorical(y_test, num_classes=10) return (x_train, y_train), (x_test, y_test)
def cifar10(*args, **kwargs): dataset = cx.Dataset() from keras.datasets import cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data() inputs = np.concatenate((x_train, x_test)) x_train, x_test = None, None inputs = inputs.astype('float32') inputs /= 255 labels = np.concatenate((y_train, y_test)) y_train, y_test = None, None targets = to_categorical(labels, 10) labels = np.array([str(label[0]) for label in labels], dtype=str) dataset.name = "CIFAR-10" dataset.description = """ Original source: https://www.cs.toronto.edu/~kriz/cifar.html The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. The classes are completely mutually exclusive. There is no overlap between automobiles and trucks. "Automobile" includes sedans, SUVs, things of that sort. "Truck" includes only big trucks. Neither includes pickup trucks. """ dataset.load_direct([inputs], [targets], [labels]) return dataset
def __init__(self, folder, transforms=None, shuffle=True, batch_size=32, seed=None): if transforms is None: transforms = [] paths, labels, label_names = get_paths_with_labels(folder) self.n = len(paths) self.paths = np.asarray(paths) self.labels = to_categorical(labels, num_classes=len(label_names)) self.label_names = label_names self.shuffle = shuffle self.seed = seed self.transform = get_transform(*transforms) self.batch_size = batch_size self.batch_idx = 0 self.num_batches_so_far = -1 self.indices = np.arange(self.n) # calculate output shape by loading an image and # passing it through the functions img = imread(paths[0]) img = np.asarray(img, dtype=K.floatx()) self.output_shape = self.transform(img).shape if K.image_data_format() == 'channels_first': self.output_shape = (self.output_shape[2], self.output_shape[0], self.output_shape[1]) self.reset()
def sample_generator_input(self, batch_size): # Generator inputs sampled_noise = np.random.normal(0, 1, (batch_size, 62)) sampled_labels = np.random.randint(0, self.num_classes, batch_size).reshape(-1, 1) sampled_labels = to_categorical(sampled_labels, num_classes=self.num_classes) return sampled_noise, sampled_labels
def process_images(image_path, shape_path): shape_y = None if shape_path == "crop_images/valid_circle/": shape_y = 0 if shape_path == "crop_images/valid_rectangle/": shape_y = 1 if shape_path == "crop_images/valid_triangle/": shape_y = 2 if shape_path == "crop_images/valid_square/": shape_y = 3 ylabel = to_categorical(shape_y, num_classes = 4) ylabel = np.reshape(ylabel, (4)) childs_lock.acquire() img = Image.open(image_path) childs_lock.release() np_img = np.array(img, dtype = [('img_info', np.float16)]) #img = img.resize((200, 200), Image.ANTIALIAS) #ANTIALIAS reserves quality # to check that all image input are the same shape '''width, height = np_img.shape[0], np_img.shape[1] if width == 300 or height == 300: print ("rm ", image_path)''' img.close() return np_img['img_info'], ylabel
def get_ori_data(self): print('...Load original data begin') data = self.load_pkl_data(ORIGINAL) data['data'] = data['data'].astype('float32') data['label'] = to_categorical(data['label'], n_classes) print('......original data shape : {0}'.format(data['data'].shape)) print('...Load original data done') return data
def sentence_to_id(seed, dict): seed_id = [] dict_len = len(dict) for char in seed: id = dict[char] tmp = to_categorical(id, num_classes = dict_len) seed_id.append(tmp) return seed_id
def load_cifar100() : (train_data, train_labels), (test_data, test_labels) = cifar100.load_data() # train_data = train_data / 255.0 # test_data = test_data / 255.0 train_data, test_data = normalize(train_data, test_data) train_labels = to_categorical(train_labels, 100) test_labels = to_categorical(test_labels, 100) seed = 777 np.random.seed(seed) np.random.shuffle(train_data) np.random.seed(seed) np.random.shuffle(train_labels) return train_data, train_labels, test_data, test_labels
def load_fashion() : (train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data() train_data = np.expand_dims(train_data, axis=-1) test_data = np.expand_dims(test_data, axis=-1) train_data, test_data = normalize(train_data, test_data) train_labels = to_categorical(train_labels, 10) test_labels = to_categorical(test_labels, 10) seed = 777 np.random.seed(seed) np.random.shuffle(train_data) np.random.seed(seed) np.random.shuffle(train_labels) return train_data, train_labels, test_data, test_labels
def get_ori_data(self): print('...Load original data begin') path = self.root_dir + 'fold{0}/'.format(self.k) + 'original_{0}.pkl'.format(self.train_or_valid) data = self.load_pkl_data(path) data['data'] = data['data'].astype('float32') data['label'] = to_categorical(data['label'], n_classes) print('......original data shape : {0}'.format(data['data'].shape)) print('...Load original data done') return data
def load_testing_data(self): #testing_dataframe = pandas.read_csv(self.commandline_args.test) #values = testing_dataframe.values (X_train, y_train), (X_test, y_test) = self.cifar_data shaped_labels = to_categorical(y_test, self.num_classes+1) scaled_values = self.scale_values(X_test) shaped_values = self.reshape_values(scaled_values) return shaped_values, shaped_labels
def trainBatch(model, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=args.debug) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-resnet-{epoch:02d}.h5', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args.lr * (0.9 ** epoch)) # compile the model # model.compile(optimizer=optimizers.Adam(lr=args.lr), # loss=[margin_loss, 'mse'], # loss_weights=[1., args.lam_recon], # metrics={'out_caps': 'accuracy'}) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['mae', 'acc','top_k_categorical_accuracy']) groups = args.groups for i in range(groups): print("Training Group: ",i) (x_test, y_test, x_train, y_train) = loadMiniplacesBatch(train_data_list, val_data_list, images_root,group=i,groups=groups,size=[100,100]) x_train = x_train.reshape(-1, 100, 100, 3).astype('float32') / 255. x_test = x_test.reshape(-1, 100, 100, 3).astype('float32') / 255. y_train = to_categorical(y_train.astype('float32'),num_classes=100) y_test = to_categorical(y_test.astype('float32'),num_classes=100) print(x_train.shape,y_train.shape,x_test.shape,y_test.shape) # Training without data augmentation: model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, callbacks=[log, tb, checkpoint, lr_decay],validation_data=(x_test,y_test)) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) return model
def get_ori_data(self): print('...Load original data begin') path = self.root_dir + 'fold{0}/'.format(self.k) + 'original_{0}.pkl'.format(self.train_or_valid) data = self.load_pkl_data(path) if self.train_or_valid=='train': data['data'] = np.array(data['data']) if self.train_or_valid=='valid': data['data'] = conduct_fe(data['data'], SPEC) data['label'] = to_categorical(data['label'], n_classes) print('...Load original data done') return data
def train_fold(data): i_fold, train, test, nv_cls, X, y, s_info, cachedir = data window_qtd = int(sys.argv[1]) window_qtd_stride = 5 print window_qtd print i_fold print nv_cls X_train, y_train = lofar2image(X, y, train, window_qtd, window_qtd_stride, s_info) X_test, y_test = lofar2image(X, y, test, window_qtd, window_qtd, s_info) if verbose: print('\t\t Fold %i' % i_fold) novelty_cls = nv_cls X_train = X_train[y_train != novelty_cls] X_test = X_test[y_test != novelty_cls] y_train = y_train[y_train != novelty_cls] y_test = y_test[y_test != novelty_cls] y_train = to_categorical(y_train) y_test = to_categorical(y_test) mask = np.ones(4, dtype=bool) mask[novelty_cls] = False if y_train.shape[1] == 4: y_train = y_train[:, mask] y_test = y_test[:, mask] elif y_train.shape[1] != 3: raise NotImplementedError class_mapping = {0: 'ClassA', 1: 'ClassB', 2: 'ClassC', 3: 'ClassD'} inner_cachedir = os.path.join(cachedir, class_mapping[nv_cls], '%i_fold' % i_fold) estimator.fit(X_train, y_train, validation_split=0.1, # validation_data=(X_test, y_test), n_inits=1, verbose=verbose, cachedir=inner_cachedir) scores = estimator.score(X_test, y_test, return_eff=True) return i_fold, scores, nv_cls
def prepare_image_dataset(x_data, y_data, img_dim, num_classes, reshape=True): """ Normalizes the images in the datasets and converts the labels to categoricals. """ img_rows, img_cols = img_dim if reshape: x_data = x_data.reshape(x_data.shape[0], img_rows, img_cols) x_data = x_data.astype('float32')/255 # convert class vectors to binary class matrices y_data = utils.to_categorical(y_data, num_classes) return (x_data, y_data)
def get_class_one_hot(self, class_str): """Given a class as a string, return its number in the classes list. This lets us encode and one-hot it for training.""" # Encode it first. label_encoded = self.classes.index(class_str) # Now one-hot it. label_hot = to_categorical(label_encoded, len(self.classes)) assert len(label_hot) == len(self.classes) return label_hot
def load_training_data(self): #training_dataframe = pandas.read_csv(self.commandline_args.train) #values = training_dataframe.values[:,1:] #labels = training_dataframe.values[:,0] (X_train, y_train), (X_test, y_test) = self.cifar_data #shaped_labels = to_categorical(y_train, self.num_classes+1) shaped_labels = to_categorical(np.full((y_train.shape[0], 1), 0), self.num_classes+1) scaled_values = self.scale_values(X_train) shaped_values = self.reshape_values(scaled_values) return shaped_values, shaped_labels
for x in test_datasets: ax = pickle.load(open(x, 'rb')) test_sizes.append(len(ax)) _, _, test_dataset, test_labels = merge_datasets(test_datasets, test_sizes, 1) print('Testing:', test_dataset.shape, test_labels.shape) pickle_file = os.path.join(local_path, 'FER_traj.pickle') translate_labels = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'] print('Test set', test_dataset.shape, test_labels.shape) test_dataset = test_dataset.reshape( (-1, settings['image_size'][0], settings['image_size'][1], settings['num_channels'])).astype(np.float32) test_labels_oh = to_categorical(test_labels) print('Test set', test_dataset.shape, test_labels_oh.shape) input_img = Input(shape = (settings['image_size'][0], settings['image_size'][1], settings['num_channels'])) encode = encoder(input_img) flat_level = (Model(input_img,fcfl(encode))) full_model = Model(input_img,fc(encode)) full_model.load_weights('classification_complete.h5') for l1,l2 in zip(flat_level.layers[:],full_model.layers[:14]): l1.set_weights(l2.get_weights()) flat_level.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(),metrics=['accuracy'])
label = 1 if label == "positive" else 0 labels.append(label) # scale the raw pixel intensities to the range [0, 1] data = np.array(data, dtype="float") / 255.0 labels = np.array(labels) # partition the data into training and testing splits using 75% of # the data for training and the remaining 25% for testing (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42) # convert the labels from integers to vectors trainY = to_categorical(trainY, num_classes=2) testY = to_categorical(testY, num_classes=2) # construct the image generator for data augmentation aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest") # initialize the model print("[INFO] compiling model...") model = LeNet.build(width=28, height=28, depth=3, classes=2) opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
from keras.datasets import cifar10 from keras.utils import to_categorical from keras.models import Sequential from keras.layers import Dense, Conv2D, MaxPool2D, Flatten from sklearn.metrics import classification_report import matplotlib.pyplot as plt # Data Settings (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train / 225 x_test = x_test / 255 y_cat_train = to_categorical(y_train, 10) y_cat_test = to_categorical(y_test, 10) # Training the Model model = Sequential() model.add( Conv2D( filters=32, kernel_size=(4, 4), input_shape=(32, 32, 3), activation='relu', )) model.add(MaxPool2D(pool_size=(2, 2))) model.add( Conv2D( filters=32,
def encode_categoricals(df, col_names): for col in col_names: onehots = to_categorical(df[col].values) onehot_df = pd.DataFrame(onehots, columns=[col + "_" + str(i) for i in range(onehots.shape[1])]) df = pd.concat([df.drop(col, axis=1), onehot_df], axis=1) return df
def remember(self, game, action): self.memory_data.append(self.collect_memory_fragment(game, action)) self.memory_label.append( to_categorical(list(Direction).index(action), len(Direction)).reshape((1, len(Direction))))
model_path = 'train_test_verified' if not os.path.exists(model_path): os.mkdir(model_path) # refine_path = 'cnn2d_verified_refine' # models = [join(model_path, f) for f in listdir(model_path) if isfile(join(model_path, f))] # if not os.path.exists(refine_path): # os.mkdir(refine_path) df = pd.read_csv('data/train_label.csv') df['trans'] = df['label'].map(map_dict) df['onehot'] = df['trans'].apply(lambda x: to_categorical(x, num_classes=41)) Y = df_manu['onehot'].tolist() Y = np.array(Y) Y = Y.reshape(-1, 41) X = np.load('data/mfcc/X_train.npy') X, Y = shuffle(X, Y, random_state=5) split = X.shape[0] * (9 / 10) X_train, X_test = X[:split], X[split:] Y_train, Y_test = Y[:split], Y[split:] print(X_train.shape) checkpoint = ModelCheckpoint(os.path.join(model_path,
y_test = y_test.values """### Let's take a look at our data""" show_data(X_train, y_train) """### We notice that the digits themselves are always black, and the backgrounds are of a different shade. This implies we could remove the background. Let's do that""" X_train = preprocess(X_train) X_test = preprocess(X_test) display( X_train.shape ) # Now our inputs have been reshaped, so we have a 4D array, with each item representing an image. display(X_test.shape) """### Categorical labels should always be one-hot encoded""" y_train_enc = to_categorical(y_train, NUM_CLASSES) y_test_enc = to_categorical(y_test, NUM_CLASSES) show_data(X_train, y_train) show_data(X_test, y_test) """### Now we define our base model This is a very simple CNN architecture similar to what you learned in the lecture. We have cascading blocks of convolutional layers followed by pooling layers. """ def create_base_model(): model = Sequential() model.add(
def get_state(env): state = env.get_feature_vec_observation() res = state[0:-1] res.extend(to_categorical(state[-1], n_object)) return res
sequences = tokenizer.texts_to_sequences(contents) sequences_words = tokenizer.texts_to_sequences(word) data_x = pad_sequences(sequences,maxlen=50) # 平均长度是20,最长是200,设置为50 def getDataY(data_x): # 获取情感关键词在评论内容中的位置 data_y = [] for i in range(data_x.shape[0]): try: data_y.append(list(data_x[i]).index(sequences_words[i][0])) except: data_y.append(-1) # 如果情感的关键词不在分词里面 return np.array(data_y) data_y = getDataY(data_x) onehot_y = to_categorical(data_y[data_y >= 0],num_classes=50) # 将位置的信息转化为OneHot train_x, test_x, train_y, test_y = train_test_split(data_x[data_y >= 0],onehot_y) def trainModel(train_x, test_x, train_y, test_y): # 训练模型 model = Sequential() model.add(Embedding(len(tokenizer.word_index) + 1, 128)) model.add(LSTM(128,dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(50, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(train_x, train_y, batch_size=32,
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() #Next,we need to deal with the datas as suitable array (match the CNN as inputing data) #For fitting the CNN, we need the reshape the origin data(ex:train_images.shape=(60000,28,28)->(60000,28,28,1)) #(60000->the number of graph,28->length,28->width,1->RGB number(because our data is black and )) train_images = train_images.reshape((60000, 28, 28, 1)) train_images = train_images.astype('float32') / 255 test_images = test_images.reshape((10000, 28, 28, 1)) test_images = test_images.astype('float32') / 255 train_labels = to_categorical(train_labels) test_labels = to_categorical(test_labels) ##Create the depthwise separable convolution## from keras.models import Sequential, Model from keras import layers, models #Use Sequential method to create the model model = models.Sequential() model.add( layers.SeparableConv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
def loadData(iData): lColumns = getColumns() #h5File = h5py.File(iFile) #treeArray = h5File['test'][()] df = pd.DataFrame(iData, columns=lColumns) print "A" idconv = { 11.: 1, 12.: 2, 13.: 3, 22.: 4, 130.: 5, 211.: 6, 310.: 7, 321.: 8, 2112.: 9, 2212.: 10, 3112.: 11, 3122.: 12, 3222.: 13, 3312.: 14, 3322.: 15, 3334.: 16, -11.: 17, -12.: 18, -13.: 19, -22.: 20, -130.: 21, -211.: 22, -310.: 23, -321.: 24, -2112.: 25, -2212.: 26, -3112.: 27, -3122.: 28, -3222.: 29, -3312.: 30, -3322.: 31, -3334.: 32, 0.: 0 } nIDs = 33 print "B" for i0 in range(nparts): df['j_part_pt_' + str(i0)] = df['j_part_pt_' + str(i0)] / df['j_pt'] df['j_part_id_' + str(i0)] = df['j_part_id_' + str(i0)].map(idconv) features_val = df[lPartfeatures] for p in lPartfeatures: if (df[p].isna().sum() > 0): print(p, "found nan!!") features_2df = np.zeros( (len(df['procid']), nparts, len(lPartvars) + nIDs - 1)) for ir, row in features_val.iterrows(): features_row = np.array( np.transpose(row.values.reshape(len(lPartvars), nparts))) features_row = np.concatenate( (features_row[:, :-1], to_categorical(features_row[:, -1], num_classes=nIDs)), axis=1) features_2df[ir, :, :] = features_row features_val = features_2df return features_val
num_classes = 10 print(x_train.shape) print(y_train.shape) print(x_test.shape) print(y_test.shape) import numpy as np import matplotlib.pyplot as plt from keras.utils import to_categorical x_train = x_train / 255 x_test = x_test / 255 y_train = to_categorical(y_train, num_classes) y_test = to_categorical(y_test, num_classes) print(y_train.shape) print(y_train[0]) from keras.models import Sequential from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout model = Sequential() model.add(Conv2D(32, (3, 3), padding = "same", input_shape = x_train.shape[1:], activation = "relu")) model.add(MaxPooling2D(pool_size = 2)) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512, activation = "relu"))
model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab)) model.summary() model.compile(optimizer=Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01), loss='categorical_crossentropy', metrics=['accuracy']) s0 = np.zeros((m, n_s)) c0 = np.zeros((m, n_s)) outputs = list(Yoh.swapaxes(0, 1)) model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100) model.load_weights('models/model.h5') EXAMPLES = [ '3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001' ] for example in EXAMPLES: source = string_to_int(example, Tx, human_vocab) source = np.array( list( map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0, 1) prediction = model.predict([source, s0, c0]) prediction = np.argmax(prediction, axis=-1) output = [inv_machine_vocab[int(i)] for i in prediction] print("source:", example) print("output:", ''.join(output))
# develop an mlp for blobs dataset from sklearn.datasets.samples_generator import make_blobs from keras.utils import to_categorical from keras.models import Sequential from keras.layers import Dense from keras.optimizers import SGD from matplotlib import pyplot # generate 2d classification dataset X, y = make_blobs(n_samples=1100, centers=3, n_features=2, cluster_std=2, random_state=2) # one hot encode output variable y = to_categorical(y) # split into train and test n_train = 100 trainX, testX = X[:n_train, :], X[n_train:, :] trainy, testy = y[:n_train], y[n_train:] # define model model = Sequential() model.add(Dense(25, input_dim=2, activation='relu')) model.add(Dense(3, activation='softmax')) opt = SGD(lr=0.01, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # fit model history = model.fit(trainX, trainy, validation_data=(testX, testy),
def train_model(): if not os.path.exists('ModelCheckpoint'): os.makedirs('ModelCheckpoint') MAX_SEQUENCE_LENGTH = 90018 MAX_NB_WORDS = 170000 EMBEDDING_DIM = 300 VALIDATION_SPLIT = 0.1 BATCH_SIZE = 32 print('Indexing word vectors.') embeddings_index = {} f = file_io.FileIO('GoogleNews-vectors-negative300.txt', mode='r') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) print('Processing text dataset') sc = SupremeCourt() print(sc.info) texts = [] # list of text samples labels_index = {} # dictionary mapping label name to numeric id labels = [] # list of label ids issue_codes = list(sc.issue_area_codes.keys()) # 15 labels issue_codes.sort() issue_codes = [str(ic) for ic in issue_codes] labels_index = dict(zip(issue_codes, np.arange(len(issue_codes)))) for record in sc.records(): if record['issue'] == None: # some cases have None as an issue labels.append(labels_index['-1']) else: labels.append(labels_index[record['issue'][:-4]]) texts.append(record['text']) print('Found %s texts.' % len(texts)) print('Found %s labels.' % len(labels_index)) # finally, vectorize the text samples into a 2D integer tensor tokenizer = Tokenizer( num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', ) tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) word_index = tokenizer.word_index print('Found %s unique tokens.' % len(word_index)) data = pad_sequences(sequences) MAX_SEQUENCE_LENGTH = data.shape[1] labels = to_categorical(np.asarray(labels)) print('Shape of data tensor:', data.shape) print('Shape of label tensor:', labels.shape) # split the data into a training set and a validation set x_train, x_test, y_train, y_test = train_test_split( data, labels, test_size=VALIDATION_SPLIT, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=VALIDATION_SPLIT, random_state=42) def generator(): while True: indices = list(range(len(x_train))) imax = len(indices) // BATCH_SIZE for i in range(imax): list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] yield x_train[list_IDs_temp], y_train[list_IDs_temp] def test_generator(): while True: indices = list(range(len(x_test))) imax = len(indices) // BATCH_SIZE for i in range(imax): list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] yield x_test[list_IDs_temp], y_test[list_IDs_temp] def val_generator(): while True: indices = list(range(len(x_val))) imax = len(indices) // BATCH_SIZE for i in range(imax): list_IDs_temp = indices[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] yield x_val[list_IDs_temp], y_val[list_IDs_temp] print('Preparing embedding matrix.') # prepare embedding matrix num_words = min(MAX_NB_WORDS, len(word_index)) embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) for word, i in word_index.items(): if i >= MAX_NB_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector print('Training model.') model = Sequential() model.add( Embedding(num_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)) model.add(CuDNNLSTM(128)) model.add(Dropout(0.5)) model.add(Dense(len(labels_index), activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) checkpointer = ModelCheckpoint(filepath="ModelCheckpoint/" + os.path.basename(__file__)[:-3] + "-{epoch:02d}-{val_acc:.2f}.hdf5", monitor='val_acc', verbose=2, save_best_only=True, mode='max') earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=2, mode='auto') model.summary() model.fit_generator(generator=generator(), steps_per_epoch=len(x_train) // BATCH_SIZE, epochs=50, verbose=2, validation_data=test_generator(), validation_steps=len(x_test) // BATCH_SIZE, callbacks=[checkpointer, earlystopper], shuffle=True) score = model.evaluate_generator(val_generator(), steps=len(x_val) // BATCH_SIZE) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Save Keras ModelCheckpoints locally model.save('model.hdf5')
def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision def f1_m(y_true, y_pred): precision = precision_m(y_true, y_pred) recall = recall_m(y_true, y_pred) return 2*((precision*recall)/(precision+recall+K.epsilon())) label = to_categorical(label) print(label) print(label.shape) X_train, X_test, y_train, y_test = train_test_split( data, label, test_size=0.2, random_state=1) # fit hanya berdasarkan data train tokenizer.fit_on_texts(X_train) # konversi train seq_x_train = tokenizer.texts_to_sequences(X_train) X_enc_train = tokenizer.sequences_to_matrix(seq_x_train, mode="tfidf") # # konversi teks seq_x_test = tokenizer.texts_to_sequences(X_test)
print(df.shape) df=df[['Presence_of_Smoke','Area','ROG','Color','Severity1']] ##create input and output vectors X = df.iloc[:,0:4] Y = df.iloc[:,4] print(X.shape) X = X.values.reshape(1467, 4, 1) Y = Y.values.reshape(1467, 1) Z = Y from keras.utils import to_categorical Y = to_categorical(Y) total_rows = 1467 * 4 max_len=4 ## splitting of training and testing data X_train,X_test,Z_train,Z_test = train_test_split(X,Z,test_size=0.20) X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.20) X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1])) X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1])) #create model (GRU)
# Train the Model history = model.fit_generator( train_generator, epochs=epochs, validation_data=validation_generator, verbose=1, callbacks=[metrics_epoch]) # Save the Model model.save('Dogs_Cats_Finetuning_MobileNet.h5') predictions = model.predict_generator(validation_generator) # val_preds = np.argmax(predictions, axis=-1) val_preds = [1 if x >= 0.5 else 0 for x in predictions] val_trues = validation_generator.classes classes_one_hot_encoded = to_categorical(val_trues) cm = metrics.confusion_matrix(val_trues, val_preds) print(cm) precisions, recall, fscore, support = metrics.precision_recall_fscore_support(val_trues, val_preds, average=None) # Plot the accuracy and loss curves acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] # metrics calculated by using sklearn after validating print('Precision') print(precisions)
for num_key in range(label_mapping.get(label)): labels.append(i) i += 1 print("Found %s words in our csv file" % len(texts)) # Vectorize the words tokenizer = Tokenizer(num_words=MAX_NB_WORDS) tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) word_index = tokenizer.word_index print("Found %s unique words" % len(word_index)) data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) # Create one-hot vectors for each of the labels labels = to_categorical(np.asarray(labels)) print("Shape of data: ", data.shape) print("Shape of label: ", labels.shape) # Split the data into a training set and a validation set # Will later perform 10 fold crossvalidation to find best split # Shufling the len(data) items of data indices = np.arange(data.shape[0]) np.random.shuffle(indices) data = data[indices] labels = labels[indices] num_validation_samples = int(VALIDATION_SPLIT * data.shape[0]) # Data (x) and labels (y) are actually split here
def train_models(self): if not self.feature_sets: print("Error: no input feature sets found!") sys.exit(1) self.df = pd.concat(self.df_dict.values()) only_train = self.df[self.df["for_train"] & ~self.df["for_test"]] x_otrain = only_train.loc[:, only_train.columns != 'category'] y_otrain = only_train["category"] only_test = self.df[~self.df["for_train"] & self.df["for_test"]] x_otest = only_test.loc[:, only_test.columns != 'category'] y_otest = only_test["category"] both = self.df[self.df["for_train"] & self.df["for_test"]] x_both = both.loc[:, both.columns != 'category'] y_both = both["category"] train_frac = 0.6 # if sample used for both training and testing, split it x_train, x_test, y_train, y_test = train_test_split( x_both, y_both, train_size=train_frac, test_size=(1 - train_frac), shuffle=True) # if only for testing - assign a weight so roc curves don't mess up only_test["wgt"] = only_test["wgt"] * (1 - train_frac) x_train["category"] = y_train x_otrain["category"] = y_otrain train = pd.concat([x_train, x_otrain]) train = train.sample(frac=1) # shuffle x_test["category"] = y_test x_otest["category"] = y_otest test = pd.concat([x_test, x_otest]) tr_filter = False if tr_filter: train = train_filter(train) self.x_train = train.loc[:, train.columns != 'category'] self.y_train = train['category'] self.x_test = test.loc[:, test.columns != 'category'] self.y_test = test['category'] for feature_set_name, feature_set in self.feature_sets.items(): for model in self.mva_models: training_data, testing_data = self.prepare_data( feature_set_name, feature_set) if model.binary: if len(self.categories) is not 2: print( "Can't perform binary classification with {0} categories!" .format(len(self.categories))) sys.exit(1) elif not self.converted_to_cat: self.y_train = to_categorical(self.y_train, len(self.categories)) self.y_test = to_categorical(self.y_test, len(self.categories)) # need this to convert only once (for the case when several models are trained) self.converted_to_cat = True if "resweights" in model.name: self.y_train = train[['category', 'resweight']] self.y_test = test[['category', 'resweight']] else: training_data = training_data.loc[:, training_data. columns != 'category'] testing_data = testing_data.loc[:, testing_data. columns != 'category'] model.train(training_data, self.y_train, feature_set_name, self.model_dir, self.name) print(f"Test shape: {testing_data.shape}") prediction = model.predict(testing_data, self.y_test, feature_set_name) if model.binary: if "resweights" in model.name: roc = roc_curve(self.y_test.iloc[:, 0], prediction, sample_weight=self.x_test['wgt'] * self.x_test["genweight"]) testing_data["category"] = self.y_test.iloc[:, 0] else: roc = roc_curve(self.y_test, prediction, sample_weight=self.x_test['wgt'] * self.x_test["genweight"]) testing_data["category"] = self.y_test self.print_yields(roc, prediction, 0.01) self.plot_hist("dnn_score_{0}_{1}_{2}".format( self.name, model.name, feature_set_name), df=testing_data, values=prediction) np.save( "{0}/{1}_{2}_{3}_roc".format(self.out_dir, self.name, model.name, feature_set_name), roc) self.roc_curves[model.name + "_" + feature_set_name] = roc else: vbf_pred = prediction[0] ggh_pred = prediction[1] dy_pred = prediction[2] ewk_pred = prediction[3] # cuts = (ewk_pred<0.7) cuts = None if cuts: self.x_test = self.x_test[cuts] self.y_test = self.y_test[cuts] vbf_pred = vbf_pred[cuts] ggh_pred = ggh_pred[cuts] dy_pred = dy_pred[cuts] ewk_pred = ewk_pred[cuts] pred = vbf_pred # pred = np.sum([vbf_pred,ggh_pred], axis=0) # pred = np.sum([vbf_pred, (-1)*ewk_pred], axis=0) roc = roc_curve(np.logical_or(self.y_test[:, 0], self.y_test[:, 1]), pred, sample_weight=self.x_test['wgt'] * self.x_test["genweight"]) self.print_yields(roc, pred, 0.01) # np.save("{0}/{1}_{2}_{3}_roc".format(self.out_dir, self.name, model.name, feature_set_name), roc) # np.save("{0}/{1}_{2}_{3}_vbf-ewk_roc".format(self.out_dir, self.name, model.name, feature_set_name), roc) np.save( "{0}/{1}_{2}_{3}_ewk<07_roc".format( self.out_dir, self.name, model.name, feature_set_name), roc)
def generator(self): while True: batches = _make_batches(size=self.total_images, batch_size=self.batch_size) for start, end in batches: arr = [] labels = [] cur_batch = self.image_paths[start:end] for image_path in cur_batch: # print image_path img = imread( fname=os.path.join(self.data_path, image_path)) # if channels are not 3 ndim = len(img.shape) if ndim == 2: img = img[..., np.newaxis] img = np.tile(A=img, reps=(1, 1, 3)) if ndim == 4: img = img[..., :3] # resizing image maintaining aspect ratio img = resize_image(img=img, size=self.input_size) if self.training: # random cropping while training img = random_crop_image(img=img, size=self.input_size) img = augment(img=img, horizontal_flip=True, vertical_flip=True, brightness=True, contrast=True, rotation=True, translation=True, blur=True, noise=True) else: # center cropping h, w, c = img.shape center_h = h / 2 center_w = w / 2 center_new_img = self.input_size / 2 new_x1 = center_w - center_new_img new_y1 = center_h - center_new_img new_x2 = center_w + center_new_img new_y2 = center_h + center_new_img if self.input_size % 2 == 1: new_x2 += 1 new_y2 += 1 img = img[new_y1:new_y2, new_x1:new_x2] arr.append(img) cls = image_path.split('/')[0] id_for_cls = self.cls2id[cls] labels.append(id_for_cls) arr = np.array(arr) arr.astype('float32') # making mean of data 0 with standard deviation 1 arr /= 255. arr -= 0.5 arr *= 2. # one hot encoding labels = to_categorical(y=labels, num_classes=self.total_classes) yield (arr, labels)
from keras.utils import to_categorical from keras.models import Sequential from keras.layers import Dense, Conv2D, Flatten import code import imageResize as ir import numpy as np #download mnist data and split into train and test sets (X_train, y_train), (X_test, y_test) = mnist.load_data() #reshape data to fit model X_train = X_train.reshape(60000, 28, 28, 1) X_test = X_test.reshape(10000, 28, 28, 1) #one-hot encode target column y_train = to_categorical(y_train) y_test = to_categorical(y_test) #create model model = Sequential() #add model layers model.add(Conv2D(64, kernel_size=3, activation="relu", input_shape=(28, 28, 1))) model.add(Conv2D(32, kernel_size=3, activation="relu")) model.add(Flatten()) model.add(Dense(10, activation="softmax")) #compile model using accuracy to measure model performance model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
from keras.optimizers import Adam np.random.seed(1671) (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(60000, 784) X_train = X_train.astype('float32') X_test = X_test.reshape(10000, 784) X_test = X_test.astype('float32') # normalize data X_train /= 255 X_test /= 255 y_train = utils.to_categorical(y_train, 10) y_test = utils.to_categorical(y_test, 10) model = Sequential() model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu')) model.add(Dropout(0.3)) model.add(Dense(128, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(y_train.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy']) model.summary() history = model.fit(X_train, y_train, batch_size=128, epochs=20, validation_split=0.2) score = model.evaluate(X_test, y_test)
for imgPath in tqdm(imgPaths): image = load_img(imgPath, target_size=(28, 28), grayscale=True) image = img_to_array(image) data.append(image) label = imgPath.split(os.path.sep)[-2] label = int(label) labels.append(label) data = np.array(data, dtype=np.float) / 255. labels = np.array(labels) train_input, valid_input, train_target, valid_target = train_test_split( data, labels, test_size=0.25, random_state=123) train_target = to_categorical(train_target, num_classes) valid_target = to_categorical(valid_target, num_classes) aug = ImageDataGenerator( rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180) zoom_range=0.1, # Randomly zoom image width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1) input = (28, 28, 1) model = build_model(input) opt = Adam() model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["acc"])
def train_on_VeRi(model, out_name, fix_layer =-1, xml_file = 'train_label.xml', image_folder = 'image_train',ismodelfile = True): nb_epoch = 70 num_classes = 776 batch_size = 32 labels = [] train_names = [] xmlp = ET.XMLParser(encoding="utf-8") # train label file - xml f = ET.parse(xml_file, parser=xmlp) root = f.getroot() for child in root.iter('Item'): labels.append(child.attrib['vehicleID']) train_names.append(os.path.join(image_folder, child.attrib['imageName'])) labels = utils.to_categorical(labels, num_classes=776) X_train = [] for name in train_names: x = load_img(name, target_size=(img_rows, img_cols)) x = img_to_array(x) X_train.append(x) X_train = np.array(X_train, ndmin=4) train_datagen = ImageDataGenerator( rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow( x=X_train, y=labels, batch_size=batch_size) if ismodelfile: base_model = load_model(model) else: base_model = model x = base_model.get_layer(name='block5_pool').output x = Flatten()(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dropout(0.5)(x) predictions = Dense(num_classes, activation='softmax')(x) model = Model(input=base_model.input, output=predictions) if fix_layer > -1: for layer in model.layers[:fix_layer]: layer.trainable = False sgd = SGD(lr=1e-3) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator( train_generator, epochs=nb_epoch, steps_per_epoch=1184, ) model.save(out_name) return model
def train_model(name, regression_train_data_path, classification_train_data_path, regression_dev_data_path, classification_train_dev_path, word_index, use_neural_features=True, use_lexicon_features=True, feature_extraction_model=None): early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=1) lexicon_feature_extractor = LexiconFeatureExtractor() print("Preparing Data for {} model ...".format(name)) train_x, train_y_labels = data_utils.load_affect_data(classification_train_data_path, is_label_numeric=False) _, train_y_scores = data_utils.load_affect_data(regression_train_data_path) train_y_labels = to_categorical(train_y_labels) train_y_scores = np.array(train_y_scores) dev_x, dev_y_labels = data_utils.load_affect_data(classification_train_dev_path, is_label_numeric=False) _, dev_y_scores = data_utils.load_affect_data(regression_dev_data_path) dev_y_scores = np.array(dev_y_scores) dev_y_labels = to_categorical(dev_y_labels) if use_neural_features and use_lexicon_features: neural_fetures_train = feature_extraction_model.predict(encode(train_x, word_index)) lexicon_features_train = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in train_x] lexicon_features_train = np.array(lexicon_features_train) neural_fetures_dev = feature_extraction_model.predict(encode(dev_x, word_index)) lexicon_features_dev = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in dev_x] lexicon_features_dev = np.array(lexicon_features_dev) model = create_prediction_model(len(neural_fetures_train[0]), len(lexicon_features_train[0]), len(train_y_labels[0])) print(model.summary()) print("Start training for {} model ...".format(name)) model.fit(x=[neural_fetures_train, lexicon_features_train], y={"regression_output": train_y_scores, "classification_output": train_y_labels}, validation_data=( [neural_fetures_dev, lexicon_features_dev], {"regression_output": dev_y_scores, "classification_output": dev_y_labels} ), epochs=100, callbacks=[early_stop], batch_size=1) elif use_lexicon_features and not use_neural_features: lexicon_features_train = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in train_x] lexicon_features_train = np.array(lexicon_features_train) lexicon_features_dev = [extract_lexicon_features(instance, lexicon_feature_extractor) for instance in dev_x] lexicon_features_dev = np.array(lexicon_features_dev) model = create_prediction_model(0, len(lexicon_features_train[0]), len(train_y_labels[0])) print(model.summary()) print("Start training for {} model ...".format(name)) model.fit(x=lexicon_features_train, y={"regression_output": train_y_scores, "classification_output": train_y_labels}, validation_data=( lexicon_features_dev, {"regression_output": dev_y_scores, "classification_output": dev_y_labels} ), epochs=100, callbacks=[early_stop], batch_size=1) elif not use_lexicon_features and use_neural_features: neural_fetures_train = feature_extraction_model.predict(encode(train_x, word_index)) neural_fetures_dev = feature_extraction_model.predict(encode(dev_x, word_index)) model = create_prediction_model(len(neural_fetures_train[0]), 0, len(train_y_labels[0])) print(model.summary()) print("Start training for {} model ...".format(name)) model.fit(x=neural_fetures_train, y={"regression_output": train_y_scores, "classification_output": train_y_labels}, validation_data=( neural_fetures_dev, {"regression_output": dev_y_scores, "classification_output": dev_y_labels} ), epochs=100, callbacks=[early_stop], batch_size=1) else: raise Exception("You must use one of the feature sets at least!") predictions = model.predict([neural_fetures_dev, lexicon_features_dev]) print("pearson correlation={}".format(pearson_correlation(dev_y_scores, predictions[0]))) print("Saving {} model ...".format(name)) model.save("resources/saved_models/{}.model".format(name))
X = Dense(120, activation='tanh', name='fc1')(X) X = Dense(84, activation='tanh', name='fc2')(X) X = Dense(10, activation='softmax')(X) model = Model(inputs=X_input, outputs=X, name='lenet_5') return model model = lenet_5(input_shape=(28, 28, 1), classes=10) model.summary() model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) X_train = data_train[:, 1:].reshape((data_train.shape[0], 28, 28, 1)) Y_train = to_categorical(data_train[:, 0]) model.fit(X_train, Y_train, epochs=10, batch_size=16) X_val = data_val[:, 1:].reshape((data_val.shape[0], 28, 28, 1)) Y_val = to_categorical(data_val[:, 0]) preds = model.evaluate(X_val, Y_val) print("Validation loss=" + str(preds[0])) print("Validation accuracy=" + str(preds[1])) X_test = data_test.reshape((data_test.shape[0], 28, 28, 1)) predicted = np.argmax(model.predict(X_test), axis=1) with open('data/submission.csv', 'w') as f: f.write('ImageId,Label\n') for i in range(len(predicted)): f.write(str(i + 1) + ',' + str(predicted[i]) + '\n') print(predicted)
sess.run(tf.global_variables_initializer()) print("Testing started") scale = 3 BATCH_SIZE = 128 cfar10 = CFAR10.KittiData(path=["KITTI/train_y.npy", "KITTI/label_y.npy"]) accuracy = [] while (True): xs, ys = cfar10.next_batch(batch_size=BATCH_SIZE, shuffle=False) xs = scale * xs lo = sess.run(layerout7, {input_real: xs, output_real: ys}) layerout = np.argmin(lo, axis=1) layerout = to_categorical(layerout, 10) accurate = 0 for i in range(len(ys)): if (layerout[i] == ys[i]).all(): accurate = accurate + 1 accurate = accurate / 128. # print(accurate) accuracy.append(accurate) step = sess.run(step_inc_op) if step % 10 == 0: accurate1 = tf.reduce_mean(accuracy) print('Step: ' + repr(step) + ', ' + 'Accurate: ' + repr(sess.run(accurate1))) if step == 78: break
def label2vec(labels): labels1 = encoder.transform(labels) one_hot_labels1 = to_categorical(labels1, num_classes=102) return labels1, one_hot_labels1
else: test_images.append(image) test_labels.append(label) to_train = 0 #-----------------keras time --> make the model #flatten data dataDim = np.prod(images[0].shape) train_data = flatten(dataDim, train_images) test_data = flatten(dataDim, test_images) #change labels to categorical train_labels = np.array(train_labels) test_labels = np.array(test_labels) train_labels_one_hot = to_categorical(train_labels) test_labels_one_hot = to_categorical(test_labels) #determine the number of classes classes = np.unique(train_labels) nClasses = len(classes) #three layers #activation function: both #neurons: 256 model = Sequential() model.add(Dense(256, activation='tanh', input_shape=(dataDim,))) model.add(Dropout(0.2)) model.add(Dense(256, activation='tanh')) model.add(Dropout(0.2))