def getSchema(db: Dataset, version, estm) -> BaseSchema: module = __import__('Schema') schema = getattr(module, 'Schema' + version)() if estm == Estm.Conventional: schema.buildConventional(db.get_shape(), db.info['n_cls']) elif estm == Estm.Siamese: schema.buildSiamese(db.get_shape(), db.info['n_cls']) elif estm == Estm.Triplet: schema.buildTriplet(db.get_shape(), db.info['n_cls']) return schema
def reshaping(db: Dataset) -> Dataset: shape = db.info['shape'] flat = shape[0] img_rows = shape[1] if len(shape) > 2 else None img_cols = shape[2] if len(shape) > 3 else None channels = shape[3] if flat != True: db.X_train = db.X_train.reshape(db.X_train.shape[0], img_rows, img_cols, channels) db.X_test = db.X_test.reshape(db.X_test.shape[0], img_rows, img_cols, channels) return db
def fit(self, db: Dataset, epochs: int = 1000, batch_size: int = 128, verbose: int = 2, callbacks: list = []) -> History: history = self.model.fit(db.X_train, db.Y_train(), validation_data=(db.X_test, db.Y_test()), epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks=callbacks) return History(history.epoch, history.params, history.history)
def prep_data(self): training, testing = Dataset.kohonen_dataset(in_features=self.features, team=self.team) self.training_0 = [x[:-1] for x in training if x[-1] is 0] # Extract Home-Win matches self.training_1 = [x[:-1] for x in training if x[-1] is 1] # Extract Away-Win matches self.training_2 = [x[:-1] for x in training if x[-1] is 2] if self.include_ties else [] # Extract Tie Matches self.testing_0 = [x[:-1] for x in testing if x[-1] is 0] # Extract Home-Win matches self.testing_1 = [x[:-1] for x in testing if x[-1] is 1] # Extract Away-Win matches self.testing_2 = [x[:-1] for x in testing if x[-1] is 2] if self.include_ties else [] # Extract Tie Matches self.training_data = self.training_0 + self.training_1 + self.training_2 if self.data_processing == "Normalize": # Normalize data self.training_data = preprocessing.normalize(self.training_data) self.testing_0 = preprocessing.normalize(self.testing_0) self.testing_1 = preprocessing.normalize(self.testing_1) self.testing_2 = preprocessing.normalize(self.testing_2) elif self.data_processing is "Standardize": # Standardize data self.training_data = preprocessing.scale(self.training_data) self.testing_0 = preprocessing.scale(self.testing_0) self.testing_1 = preprocessing.scale(self.testing_1) self.testing_2 = preprocessing.scale(self.testing_2) else: # Keep the data as-is pass if self.shuffle_data: random.shuffle(self.training_data)
def getDataset(name: str = '') -> Dataset: if name == '': return Dataset() X_train = y_train = np.array([]) for cv in CVNAMES: data = np.genfromtxt(DATABASE + '/' + name + '/' + cv + '.txt', delimiter=',') if cv == 'E': X_test = data[:, 1:].astype('float64') y_test = data[:, 0].astype('int') else: X_train = np.concatenate( (X_train, data[:, 1:].astype('float64')), axis=0) if X_train.size else data[:, 1:].astype('float64') y_train = np.concatenate( (y_train, data[:, 0].astype('int')), axis=0) if y_train.size else data[:, 0].astype('int') return Dataset(name, X_train, y_train, X_test, y_test)
def fit_on_batch(self, db: Dataset, gen: BaseGenerator, epochs: int = 1000000, batch_size: int = 128, patience: int = 100, verbose: int = 2, callbacks: list = []) -> History: history = {} for item in self.model.metrics_names: history.update({item: []}) history.update({'val_' + item: []}) def _print_report(ltype, metrics_value): i = 0 for item in self.model.metrics_names: if ltype == 'train': print("%s: %.5f - " % (item, metrics_value[i]), end='') elif ltype == 'test': print("%s: %.5f - " % ('val_' + item, metrics_value[i]), end='') i += 1 def _update_history(ltype, metrics_value): i = 0 for item in self.model.metrics_names: if ltype == 'train': history[item].append(metrics_value[i]) elif ltype == 'test': history['val_' + item].append(metrics_value[i]) i += 1 epoch = [] for i in range(epochs): X_data, y_data = gen.get_batch() metrics_value = self.model.train_on_batch( X_data, to_categorical(y_data, num_classes=db.info['n_cls'])) _update_history('train', metrics_value) val_metrics_value = self.model.test_on_batch( db.X_test, db.Y_test()) _update_history('test', val_metrics_value) epoch.append(i) if not i % 100: print("Batch %d --> " % i, end='') _print_report('train', metrics_value) _print_report('test', val_metrics_value) print('') if i and history['val_loss'][-101] <= val_metrics_value[0]: patience -= 1 if not patience: break return History(epoch=epoch, history=history)
def getDataset(name: str = '') -> Dataset: if name == '': return Dataset() if os.path.exists(DATABASE + '/' + name + '/Full.cp'): fileObj = open(DATABASE + '/' + name + '/Full.cp', 'rb') X_train, y_train, X_test, y_test = _pickle.load(fileObj) return Dataset(name, X_train, y_train, X_test, y_test) X_train = y_train = np.array([]) for cv in CVNAMES: data = pd.read_csv(DATABASE + '/' + name + '/' + cv + '.txt').values if cv == 'E': X_test = data[:, 1:].astype('float32') y_test = data[:, 0].astype('int') else: X_train = np.concatenate( (X_train, data[:, 1:].astype('float32')), axis=0) if X_train.size else data[:, 1:].astype('float32') y_train = np.concatenate( (y_train, data[:, 0].astype('int')), axis=0) if y_train.size else data[:, 0].astype('int') with open(DATABASE + '/' + name + '/Full.cp', 'wb') as fileObj: _pickle.dump((X_train, y_train, X_test, y_test), fileObj) return Dataset(name, X_train, y_train, X_test, y_test)