def __init__(self, hparams, loss_fn=F.cross_entropy, log_grads: bool = False, use_sentence_split: bool = True): super().__init__() """Configuration flags""" self.use_sentence_split = use_sentence_split self.log_grads = log_grads """Dataset""" self.batch_size = hparams.batch_size self.output_length = hparams.out_len self.win_len = hparams.win_len self._setup_dataloaders() """Training""" self.loss_fn = loss_fn self.lr = hparams.lr """Embedding""" self.embedding_dim = hparams.emb_dim self.embedding = nn.Embedding(self.num_classes, self.embedding_dim) """Metrics""" self.metrics = MetricsCalculator( ["accuracy", "precision", "recall", "f1"]) """Model""" self.model = WaveNet(num_blocks=hparams.num_blocks, num_layers=hparams.num_layers, num_classes=self.num_classes, output_len=self.output_length, ch_start=self.embedding_dim, ch_residual=hparams.ch_residual, ch_dilation=hparams.ch_dilation, ch_skip=hparams.ch_skip, ch_end=hparams.ch_end, kernel_size=hparams.kernel_size, bias=True)
def perform_ridge_regression(self): print( '*********************************************RIDGE REGRESSION**************************************************' ) model_trainer = ModelTrainer() ridge = Ridge(alpha=1.0) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( ridge, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for ridge regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('ridge regression', y_true_glucose, y_pred_glucose)
def perform_linear_regression(self): print( '------------------------------------------LINEAR REGRESSION------------------------------------------' ) model_trainer = ModelTrainer() linear_reg = LinearRegression() Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( linear_reg, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for linear regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('linear regression', y_true_glucose, y_pred_glucose)
def perform_lasso_regression(self): print( '................................... LASSO REGRESSION ............................................' ) model_trainer = ModelTrainer() lasso = Lasso() Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( lasso, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for lasso regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('lasso regression', y_true_glucose, y_pred_glucose)
def calculateMetrics(self, dataset, altered_dataset): merged_dataset = self.datasetsProduct(dataset, altered_dataset) columns_tuples = [(col + '_x', col + '_y', col) for col in list(set([e.replace('_x','').replace('_y','') for e in self.datasource_columns if e not in self.pass_through_columns]))] pass_through_columns_tuples = [(col + '_x', col + '_y', col) for col in list(set([e.replace('_x','').replace('_y','') for e in self.pass_through_columns]))] calculator = MetricsCalculator( metrics = self.text_metrics, workers = self.workers, dataset = merged_dataset, columns = columns_tuples, pass_through_columns = pass_through_columns_tuples, logging_level = self.logging_level) calculated = calculator.calculate() return merged_dataset, calculated
def perform_PLS(self): print( ',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, PARTIAL LEAST SQUARE ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,' ) model_trainer = ModelTrainer() pls = PLSRegression(n_components=20, scale=True, max_iter=5000, tol=1e-06, copy=True) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( pls, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for partial least square', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('pls', y_true_glucose, y_pred_glucose)
def test_deploy_metrics(): start_time = datetime(year=2017, month=12, day=11, hour=8) run1 = PipelineRun(start_time=start_time, end_time=start_time + timedelta(minutes=10), stage_results=[StageRun(StageStatus.ok)], deploy_time=start_time + timedelta(minutes=11)) run2 = PipelineRun( start_time=start_time + timedelta(minutes=10), end_time=start_time + timedelta(minutes=20), stage_results=[StageRun(StageStatus.fail)], ) run3 = PipelineRun(start_time=start_time + timedelta(minutes=20), end_time=start_time + timedelta(minutes=30), stage_results=[StageRun(StageStatus.ok)], deploy_time=start_time + timedelta(minutes=31)) runs = [run1, run2, run3] calculator = MetricsCalculator(runs) metrics = calculator.metrics() assert metrics.deployment_lead_time == timedelta(minutes=11) assert metrics.deployment_failure_rate == 0 assert metrics.deployment_interval == timedelta(minutes=20) assert metrics.deployment_recovery_time == None assert len(calculator.deploys) == 2
def perform_NN(self): print( '/////////////////////////////////////////////////// NEURAL NETWORK ///////////////////////////////////' ) model_trainer = ModelTrainer() nn = MLPRegressor(hidden_layer_sizes=(200, ), activation='relu', solver='adam', alpha=0.1, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=3000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( nn, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for Neural network', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('neural network', y_true_glucose, y_pred_glucose)
class WaveNetModule(pl.LightningModule): def __init__(self, hparams, loss_fn=F.cross_entropy, log_grads: bool = False, use_sentence_split: bool = True): super().__init__() """Configuration flags""" self.use_sentence_split = use_sentence_split self.log_grads = log_grads """Dataset""" self.batch_size = hparams.batch_size self.output_length = hparams.out_len self.win_len = hparams.win_len self._setup_dataloaders() """Training""" self.loss_fn = loss_fn self.lr = hparams.lr """Embedding""" self.embedding_dim = hparams.emb_dim self.embedding = nn.Embedding(self.num_classes, self.embedding_dim) """Metrics""" self.metrics = MetricsCalculator( ["accuracy", "precision", "recall", "f1"]) """Model""" self.model = WaveNet(num_blocks=hparams.num_blocks, num_layers=hparams.num_layers, num_classes=self.num_classes, output_len=self.output_length, ch_start=self.embedding_dim, ch_residual=hparams.ch_residual, ch_dilation=hparams.ch_dilation, ch_skip=hparams.ch_skip, ch_end=hparams.ch_end, kernel_size=hparams.kernel_size, bias=True) def forward(self, x): return self.model(x) def _forward_batch(self, batch): x, y = batch x_emb = self._embed(x) y_hat = self.forward(x_emb) return self.loss_fn(y_hat, y), y, torch.argmax(y_hat, dim=1) def _embed(self, x): x_emb = self.embedding(x).permute(0, 2, 1) return x_emb def training_step(self, batch, batch_idx): loss, true, preds = self._forward_batch(batch) return { "loss": loss, "log": (self.metrics.generate_logs(loss, preds, true, "train")) } def training_step_end(self, training_out): tensorboard_logs = self.metrics.generate_mean_metrics( training_out["log"], "train") return { "loss": training_out["loss"], "progress_bar": tensorboard_logs, "log": tensorboard_logs } def validation_step(self, batch, batch_idx): loss, true, preds = self._forward_batch(batch) return self.metrics.generate_logs(loss, preds, true, "val") def validation_epoch_end(self, outputs): tensorboard_logs = self.metrics.generate_mean_metrics(outputs, "val") return {"progress_bar": tensorboard_logs, "log": tensorboard_logs} def test_step(self, batch, batch_idx): loss, true, preds = self._forward_batch(batch) return self.metrics.generate_logs(loss, preds, true, "test") def test_epoch_end(self, outputs): tensorboard_logs = self.metrics.generate_mean_metrics(outputs, "test") return {"progress_bar": tensorboard_logs, "log": tensorboard_logs} def configure_optimizers(self): # can return multiple optimizers and learning_rate schedulers optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) return optimizer def _setup_dataloaders(self): ds = PennTreeSentenceDataset if self.use_sentence_split else PennTreeCharDataset self.dl_train = DataLoader(ds(self.win_len, self.output_length, is_train=True), self.batch_size, shuffle=True) self.dl_valid = DataLoader( ds(self.win_len, self.output_length, is_valid=True), self.batch_size) self.dl_test = DataLoader( ds(self.win_len, self.output_length, is_test=True), self.batch_size) @property def num_classes(self): return self.dl_train.dataset.num_chars def train_dataloader(self): return self.dl_train def val_dataloader(self): return self.dl_valid def test_dataloader(self): return self.dl_test def on_train_start(self): input = torch.ones((self.batch_size, self.embedding_dim, self.win_len)) if torch.cuda.is_available(): input = input.cuda() self.logger.experiment.add_graph(self.model, input) def on_after_backward(self): # example to inspect gradient information in tensorboard if self.log_grads and self.trainer.global_step % 100 == 0: # don't make the tf file huge params = self.state_dict() for k, v in params.items(): grads = v name = k self.logger.experiment.add_histogram( tag=name, values=grads, global_step=self.trainer.global_step)
def recognize_data(self, group, folder_name): classifier_path = f"classifiers/{self.photos_size}/{folder_name}" paths_to_recognize = [] y_true = [] photos_ids = [] for photos_path in group: paths_to_recognize.append(photos_path) student_id = int(os.path.split(photos_path)[-1].split("-")[0]) photo_id = int( os.path.split(photos_path)[-1].split("-")[1].split(".")[0]) y_true.append(student_id) photos_ids.append(photo_id) recognizer = Recognizer(paths_to_recognize, classifier_path) eigenfaces_y_pred = recognizer.eigenfaces() eigenfaces_metrics = MetricsCalculator(y_true, photos_ids, eigenfaces_y_pred) eigenfaces_metrics.calculate_metrics() eigenfaces_metrics.print_metrics() self.eigenfaces_metrics.append(eigenfaces_metrics) fisherfaces_y_pred = recognizer.fisherfaces() fisherfaces_metrics = MetricsCalculator(y_true, photos_ids, fisherfaces_y_pred) fisherfaces_metrics.calculate_metrics() fisherfaces_metrics.print_metrics() self.fisherfaces_metrics.append(fisherfaces_metrics) lbph_y_pred = recognizer.lbph() lbph_metrics = MetricsCalculator(y_true, photos_ids, lbph_y_pred) lbph_metrics.calculate_metrics() lbph_metrics.print_metrics() self.lbph_metrics.append(lbph_metrics)
def eval_metrics(self, left=-1, thr_number=10): result = run_test(self.test, mode=self.mode, left=left, thr_number=thr_number) self.mc = FROCMetricsCalculator(result, label=self.label) if self.mode == 'froc' else MetricsCalculator(result)
def train(args): """Train""" os.environ['CUDA_VISIBLE_DEVICES'] = '0' if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') model = modules.CNN1() # Read data print('Reading data...', flush=True) data_loader = data_loaders.NPZDataLoader(args.dataset_dir) data_train = data_loader.get_data_train() data_val = data_loader.get_data_val() if args.output_dir and not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Prepare to train train_acc_history, val_acc_history = [], [] loss_history = [] optimizer = optim.SGD(model.parameters(), lr=args.lr) # Start training print('Start training.', flush=True) for epoch in range(args.num_epochs): # Train start_time = time.time() random.shuffle(data_train) calculator = MetricsCalculator(10) for start in tqdm(range(0, len(data_train), args.train_batch_size), desc='Training epoch %d: ' % epoch): images = [ data_train[idx][0] for idx in range( start, min(start + args.train_batch_size, len(data_train))) ] actual_labels = [ data_train[idx][1] for idx in range( start, min(start + args.train_batch_size, len(data_train))) ] # forward outputs = model(torch.tensor(images, dtype=torch.float32)) # backward batch_labels = torch.tensor(actual_labels, dtype=torch.int64) model.zero_grad() loss = nn.CrossEntropyLoss()(outputs, batch_labels) loss_history.append(float(loss)) loss.backward() optimizer.step() pred_labels = outputs.softmax(1).argmax(1).tolist() calculator.update(actual_labels, pred_labels) acc = calculator.calc_accuracy() print('Accuracy:', acc) train_acc_history.append(acc) end_time = time.time() print('Training lasts', end_time - start_time, 's') if args.output_dir: torch.save( model, os.path.join(args.output_dir, 'epoch_' + str(epoch) + '.pt')) if args.not_val: continue # Validate start_time = time.time() calculator = MetricsCalculator(10) for start in tqdm(range(0, len(data_val), args.val_batch_size), desc='Validating epoch %d: ' % epoch): images = [ data_val[idx][0] for idx in range( start, min(start + args.val_batch_size, len(data_val))) ] actual_labels = [ data_val[idx][1] for idx in range( start, min(start + args.val_batch_size, len(data_val))) ] # forward outputs = model(torch.tensor(images, dtype=torch.float32)) # Update metrics pred_labels = outputs.softmax(1).argmax(1).tolist() calculator.update(actual_labels, pred_labels) acc = calculator.calc_accuracy() print('Accuracy:', acc) val_acc_history.append(acc) end_time = time.time() print('Validating lasts', end_time - start_time, 's') # Plot if args.output_dir: plt.xlabel('Batch') plt.ylabel('Loss') plt.title('Loss During Training') plt.grid(True) plt.plot(loss_history, 'r') plt.savefig(os.path.join(args.output_dir, 'loss.jpg')) plt.close() plt.xlabel('Epoch') plt.ylabel('Accuracy') plt.title('Accuracy During Training') plt.grid(True) plt.plot(train_acc_history, 'r') plt.plot(val_acc_history, 'b') plt.legend(['train', 'val']) plt.savefig(os.path.join(args.output_dir, 'acc.jpg')) plt.close()
stages = [ Stage("Commit Stage", duration=timedelta(minutes=3), failure_rate=0.005), Stage("Automated Acceptance Test", duration=timedelta(minutes=20), failure_rate=0.01), Stage("Performance Test", duration=timedelta(minutes=20), failure_rate=0.01), Stage("Internal Release", duration=timedelta(minutes=4), failure_rate=0.01, single_threaded=True), ] start_time = datetime(year=2017, month=12, day=11, hour=8) commits = generate_commits(100, start_time, offset=2000, max_interval=100) deployer = Deployer(duration=timedelta(minutes=4), deploy_policy=DeployPolicy.OnceADay, deploy_hour=17, deploy_day=6) runs = run_simulation(start_time, stages, commits=commits, deployer=deployer) print_runs("simulation_farley", stages, runs) metrics_calc = MetricsCalculator(runs) metrics = metrics_calc.metrics() print_metrics("simulation_farley", metrics) print(metrics.pretty_print())