def grab_data(config, examples, labels, is_train=True): params = { 'batch_size': config['batch_size'], 'num_workers': 4, 'pin_memory': True, 'sampler': None } normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tr_transforms, ts_transforms = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224, (0.08, 1), (0.5, 4.0 / 3)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ]) if is_train: params['shuffle'] = True params['sampler'] = None data_set = data.DataLoader( DataSet(config, examples, labels, tr_transforms, is_train), **params) else: params['shuffle'] = False data_set = data.DataLoader( DataSet(config, examples, labels, ts_transforms, is_train), **params) return data_set
def extractImageFile(path_img, path_labels): # Structure is as follows: # [offset] [type] [value] [description] # 0000 32 bit integer 0x00000803(2051) magic number # 0004 32 bit integer 60000 number of images # 0008 32 bit integer 28 number of rows # 0012 32 bit integer 28 number of columns # 0016 unsigned byte ?? pixel # 0017 unsigned byte ?? pixel # xxxx unsigned byte ?? pixel f = open(path_img, "rb") # HEADER int.from_bytes(f.read(4), byteorder="big") number_of_images = int.from_bytes(f.read(4), byteorder="big") row = int.from_bytes(f.read(4), byteorder="big") col = int.from_bytes(f.read(4), byteorder="big") labels = extractLabelFile(path_labels) # DATA data = np.zeros(number_of_images * row * col) EOF = b'' byte = f.read(1) i = 0 while byte != EOF: data[i] = nomalyzeByte(int.from_bytes(byte, byteorder="big")) byte = f.read(1) i += 1 dataset = DataSet(number_of_images, labels, row, col, data) f.close() return dataset
def load(path: str='dataset/mnist', valid_size: int=5000, mean_subtraction=True, normalization=True): images_train_valid = load_images(os.path.join(path, train_images_filename)) labels_train_valid = load_labels(os.path.join(path, train_labels_filename)) images_valid = images_train_valid[:valid_size] labels_valid = labels_train_valid[:valid_size] images_train = images_train_valid[valid_size:] labels_train = labels_train_valid[valid_size:] images_test = load_images(os.path.join(path, test_images_filename)) labels_test = load_labels(os.path.join(path, test_labels_filename)) # mean subtraction if mean_subtraction: train_mean = np.mean(images_train, axis=0) images_train -= train_mean images_valid -= train_mean images_test -= train_mean # normalization if normalization: train_std = np.std(images_train, axis=0) train_std += (train_std == 0).astype(int) images_train /= train_std images_valid /= train_std images_test /= train_std return DataSet( train=(images_train, labels_train), validation=(images_valid, labels_valid), test=(images_test, labels_test) )
class Test: def __init__(self): self.ds = DataSet() def test(self): X, Y = self.ds.load_dataset() print("dataset size {}".format(X.shape)) json_file = open(str(model_dir.joinpath('model.json')), 'r') loaded_model_json = json_file.read() json_file.close() model = tf.keras.models.model_from_json(loaded_model_json) model.load_weights(str(model_dir.joinpath('model.h5'))) print("Loaded model from disk") model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) results = model.evaluate(X, Y) print('test loss, test acc:', results) for i, x in enumerate(X): pred = model.predict(np.array([x])) print("{}: {}: max: {}".format (i, pred[0][i], np.max(pred[0])))
class Train: def __init__(self): self.ds = DataSet() def train(self, lr=1e-3, epochs = 12): X, Y = self.ds.load_dataset() print("dataset size {}".format(X.shape)) batch , num_features , num_channel = X.shape xception = Xception(num_classes=13, num_features=num_features) checkpoint_filepath = str(model_dir.joinpath('model.h5')) model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath, monitor='acc', mode='max', verbose=1, save_best_only=True) model = xception.get_model() adam = tf.keras.optimizers.Adam(lr=lr) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(X, Y, epochs=epochs, callbacks=[model_checkpoint_callback], verbose=2) model_json = model.to_json() with open(str(model_dir.joinpath('model.json')), "w") as json_file: json_file.write(model_json) model.save_weights(str(model_dir.joinpath('model.h5'))) print("Saved model to disk") plt.plot(history.history['accuracy']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train'], loc='upper left') plt.savefig(str(model_dir.joinpath('acc.png'))) plt.close() plt.plot(history.history['loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train'], loc='upper left') plt.savefig(str(model_dir.joinpath('loss.png')))
def load(path: str = 'dataset/cifar10/cifar-10-batches-py', valid_size: int = 5000, mean_subtraction=True, normalization=True): images_train_valid = np.zeros(shape=(train_size, channels, image_height, image_width), dtype=float) labels_train_valid = np.zeros(shape=train_size, dtype=int) start = 0 for i in range(train_files): images_batch, labels_batch = load_data( os.path.join(path, "data_batch_{}".format(i + 1))) size = images_batch.shape[0] images_train_valid[start:start + size] = images_batch labels_train_valid[start:start + size] = labels_batch start = start + size images_valid = images_train_valid[:valid_size] labels_valid = labels_train_valid[:valid_size] images_train = images_train_valid[valid_size:] labels_train = labels_train_valid[valid_size:] images_test, labels_test = load_data(os.path.join(path, "test_batch")) # mean subtraction if mean_subtraction: train_mean = np.mean(images_train, axis=0) images_train -= train_mean images_valid -= train_mean images_test -= train_mean # normalization if normalization: train_std = np.std(images_train, axis=0) train_std += (train_std == 0).astype(int) images_train /= train_std images_valid /= train_std images_test /= train_std return DataSet(train=(images_train, labels_train), validation=(images_valid, labels_valid), test=(images_test, labels_test))
class Train: def __init__(self): self.ds = DataSet() def train(self, lr=1e-3, epochs=12): X, Y = self.ds.load_dataset() batch, num_features, num_channel = X.shape xception = Xception(num_classes=4, num_features=num_features) mc = tf.keras.callbacks.ModelCheckpoint(str( model_dir.joinpath('model.h5')), monitor='train_acc', mode='auto', verbose=1, save_best_only=True) model = xception.get_model() adam = tf.keras.optimizers.Adam(lr=lr) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = model.fit(X, Y, epochs=epochs, verbose=2)
#pca_dim = np.load(osp.join(args.pca_dir, 'pca_dim.npy')) #pca_dir = np.load(osp.join(args.pca_dir, 'pca_dir.npy')).item() pca_dim, pca_dir = PCA_Dim_Compute(os.path.join(args.data_dir, 'features'), args.pca_ratio) model = VGG(pca_dir=pca_dir, num_classes=args.num_classes, dim=pca_dim, pca=True) model.initialize_weights(pretrained=True, weight_path=osp.join(args.pretrained_ra), feat_path=os.path.join(args.data_dir, 'features')) cudnn.benchmark = True trainloader = data.DataLoader(DataSet(args.data_dir, mean=IMG_MEAN), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainloader_iter = enumerate(trainloader) testloader = data.DataLoader(TestDataSet(args.data_dir, mean=IMG_MEAN), batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) testloader_iter = enumerate(testloader)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' import tensorflow as tf from dataset.dataset import DataSet from train.train import Train from test.test import Test from model.xception import Xception ''' This project uses tensorflow 2.1.0 ''' print('tensorflow version {}'.format(tf.__version__)) ds = DataSet() xception = Xception() # xception.plot_model() train = Train() train.train(epochs=3)
def train(self, data_set: DataSet, method: str, num_passes: int = 20, batch_size: int = 128, verbose: bool = True): if verbose: print('\ntrain method: {}'.format(method), '\nnum_passes: {}'.format(num_passes), '\nbatch_size: {}\n'.format(batch_size)) start_total_time = time.time() X_train, y_train = data_set.train_set() X_valid, y_valid = data_set.validation_set() """ initalize layers """ input_size = X_train[0].shape for layer in self.layers: input_size = layer.initialize(input_size, self.num_classes, method) layer.reset_params() step = 0 for epoch in range(num_passes): """ decay learning rate if necessary """ if self.lr_decay > 0 and epoch > 0 and ( epoch % self.lr_decay_interval) == 0: self.optimizer.decay_learning_rate(self.lr_decay) if verbose: print("Decreased learning rate by {}".format( self.lr_decay)) for batch in data.mini_batches(X_train, y_train, batch_size): X_batch, y_batch = batch """ forward pass """ start_forward_time = time.time() for layer in self.layers: X_batch = layer.forward(X_batch, mode='train') self.statistics['forward_time'] += time.time( ) - start_forward_time """ loss """ loss, delta = self.loss.calculate(X_batch, y_batch) """ backward pass """ gradients = [] start_backward_time = time.time() if method == 'dfa': for layer in self.layers: dW, db = layer.dfa(delta) gradients.append((layer, dW, db)) elif method == 'bp': dX = delta for layer in reversed(self.layers): dX, dW, db = layer.back_prob(dX) gradients.append((layer, dW, db)) gradients.reverse() else: raise ValueError( "Invalid train method '{}'".format(method)) self.statistics['backward_time'] += time.time( ) - start_backward_time """ regularization (L2) """ start_regularization_time = time.time() if self.regularization > 0: reg_term = 0 for layer, dW, db in gradients: if layer.has_weights(): dW += self.regularization * layer.W reg_term += np.sum(np.square(layer.W)) reg_term *= self.regularization / 2. reg_term /= y_batch.shape[0] loss += reg_term self.statistics['regularization_time'] += time.time( ) - start_regularization_time """ update """ start_update_time = time.time() update = UpdateLayer(self.optimizer) self.layers = [update(x) for x in gradients] self.statistics['update_time'] += time.time( ) - start_update_time """ log statistics """ accuracy = (np.argmax(X_batch, axis=1) == y_batch).sum() / y_batch.shape[0] self.statistics['train_loss'].append(loss) self.statistics['train_accuracy'].append(accuracy) if (step % 10) == 0 and verbose: print("epoch {}, step {}, loss = {:07.5f}, accuracy = {}". format(epoch, step, loss, accuracy)) step += 1 """ log statistics """ valid_loss, valid_accuracy = self.cost(X_valid, y_valid) self.statistics['valid_step'].append(step) self.statistics['valid_loss'].append(valid_loss) self.statistics['valid_accuracy'].append(valid_accuracy) if verbose: print( "validation after epoch {}: loss = {:07.5f}, accuracy = {}" .format(epoch, valid_loss, valid_accuracy)) self.statistics['total_time'] = time.time() - start_total_time return self.statistics
# Load numpy import numpy as np # Set random seed #np.random.seed(3) good_annot = ["Live_In", "NONE", "Work_For" ] def print_annot_file(annot_sent_dic, label): line = 'sent' + str(annot_sent_dic['id']) + '\t' + str(annot_sent_dic['ent1']) + '\t' + good_annot[label] + '\t' \ + str(annot_sent_dic['ent2']) + '\t' + '( ' + str(annot_sent_dic['sent']) + ' )'+'\n' return line if __name__ == "__main__": ds = DataSet('dataset') # Create a random forest Classifier. By convention, clf means 'Classifier' clf = RandomForestClassifier(n_estimators=14, random_state=3, class_weight={0:4, 1:1, 2:5}, max_depth=12) #clf = LogisticRegression(random_state=0, class_weight={0:4, 1:1, 2:4}) # Train the Classifier to take the training features and learn how they relate # to the training y (the species) X, y = ds.train clf.fit(X, y) X_dev, y_dev = ds.dev annot_data_dev = ds.dev_annot_data y_pred = clf.predict(X_dev) from sklearn import metrics print('f1-WRK-LIV:', metrics.f1_score(y_dev, y_pred, labels=[0, 2], average='weighted')) print('f1-NON:', metrics.f1_score(y_dev, y_pred, labels=[1], average='weighted'))
import os import cv2 import startup import config from dataset.dataset import DataSet from algorithms.analysisframework import AnalysisFramework from algorithms.distance import * from utils import * #Read data associated with Net label_file = "/media/blcv/drive_2TB/CODE/FV-Benchmark/DataSets/Felix/labels.txt" main_folder = "/media/blcv/drive_2TB/CODE/FV-Benchmark/" config_file = main_folder + "Nets/configs_net/casia_ver_conv52_correct_felix.json" config_file = parse_deep_config(config_file) data = DataSet("deep", config_file) labels = data.labels labels_set = set(labels) #After calulcating matches outside iPython (using parallel etc.), load score and extract best images score_file = "/media/blcv/drive_2TB/CODE/FV-Benchmark/FELIX_data/calculate_matching/divide_pairs/scores_all.txt" with open(score_file, 'r') as f: lines_score = [line.strip() for line in f] lines_score = sorted( lines_score, key=lambda x: int(x.split('/')[-1].split('.')[0].split("_")[1]), reverse=False) verification_task = np.load( "/media/blcv/drive_2TB/CODE/FV-Benchmark/notebooks/verification_felix.npy") scores_matrix = np.zeros((verification_task.shape[0]))
def __init__(self): self.ds = DataSet()