예제 #1
0
 def __init__(self,
              hparams,
              loss_fn=F.cross_entropy,
              log_grads: bool = False,
              use_sentence_split: bool = True):
     super().__init__()
     """Configuration flags"""
     self.use_sentence_split = use_sentence_split
     self.log_grads = log_grads
     """Dataset"""
     self.batch_size = hparams.batch_size
     self.output_length = hparams.out_len
     self.win_len = hparams.win_len
     self._setup_dataloaders()
     """Training"""
     self.loss_fn = loss_fn
     self.lr = hparams.lr
     """Embedding"""
     self.embedding_dim = hparams.emb_dim
     self.embedding = nn.Embedding(self.num_classes, self.embedding_dim)
     """Metrics"""
     self.metrics = MetricsCalculator(
         ["accuracy", "precision", "recall", "f1"])
     """Model"""
     self.model = WaveNet(num_blocks=hparams.num_blocks,
                          num_layers=hparams.num_layers,
                          num_classes=self.num_classes,
                          output_len=self.output_length,
                          ch_start=self.embedding_dim,
                          ch_residual=hparams.ch_residual,
                          ch_dilation=hparams.ch_dilation,
                          ch_skip=hparams.ch_skip,
                          ch_end=hparams.ch_end,
                          kernel_size=hparams.kernel_size,
                          bias=True)
    def perform_ridge_regression(self):
        print(
            '*********************************************RIDGE REGRESSION**************************************************'
        )
        model_trainer = ModelTrainer()
        ridge = Ridge(alpha=1.0)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            ridge, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for ridge regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('ridge regression', y_true_glucose, y_pred_glucose)
예제 #3
0
    def perform_linear_regression(self):

        print(
            '------------------------------------------LINEAR REGRESSION------------------------------------------'
        )
        model_trainer = ModelTrainer()
        linear_reg = LinearRegression()
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            linear_reg, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for linear regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('linear regression', y_true_glucose, y_pred_glucose)
    def perform_lasso_regression(self):

        print(
            '................................... LASSO REGRESSION ............................................'
        )
        model_trainer = ModelTrainer()
        lasso = Lasso()
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            lasso, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for lasso regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('lasso regression', y_true_glucose, y_pred_glucose)
예제 #5
0
 def calculateMetrics(self, dataset, altered_dataset):
     merged_dataset = self.datasetsProduct(dataset, altered_dataset)
     columns_tuples = [(col + '_x', col + '_y', col) for col in 
         list(set([e.replace('_x','').replace('_y','') for e in self.datasource_columns 
         if e not in self.pass_through_columns]))]
     pass_through_columns_tuples = [(col + '_x', col + '_y', col) for col in 
         list(set([e.replace('_x','').replace('_y','') for e in self.pass_through_columns]))]
     calculator = MetricsCalculator(
                            metrics = self.text_metrics,
                            workers = self.workers,                               
                            dataset = merged_dataset,
                            columns = columns_tuples,
                            pass_through_columns = pass_through_columns_tuples,
                            logging_level = self.logging_level)
     calculated = calculator.calculate()
     return merged_dataset, calculated
    def perform_PLS(self):
        print(
            ',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, PARTIAL LEAST SQUARE ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'
        )
        model_trainer = ModelTrainer()

        pls = PLSRegression(n_components=20,
                            scale=True,
                            max_iter=5000,
                            tol=1e-06,
                            copy=True)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            pls, self.X_train, self.X_test, self.Y_train, self.Y_test)

        evl = MetricsCalculator()
        evl.evaluate('root mean square error for partial least square',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('pls', y_true_glucose, y_pred_glucose)
예제 #7
0
def test_deploy_metrics():
    start_time = datetime(year=2017, month=12, day=11, hour=8)

    run1 = PipelineRun(start_time=start_time,
                       end_time=start_time + timedelta(minutes=10),
                       stage_results=[StageRun(StageStatus.ok)],
                       deploy_time=start_time + timedelta(minutes=11))
    run2 = PipelineRun(
        start_time=start_time + timedelta(minutes=10),
        end_time=start_time + timedelta(minutes=20),
        stage_results=[StageRun(StageStatus.fail)],
    )
    run3 = PipelineRun(start_time=start_time + timedelta(minutes=20),
                       end_time=start_time + timedelta(minutes=30),
                       stage_results=[StageRun(StageStatus.ok)],
                       deploy_time=start_time + timedelta(minutes=31))
    runs = [run1, run2, run3]
    calculator = MetricsCalculator(runs)
    metrics = calculator.metrics()
    assert metrics.deployment_lead_time == timedelta(minutes=11)
    assert metrics.deployment_failure_rate == 0
    assert metrics.deployment_interval == timedelta(minutes=20)
    assert metrics.deployment_recovery_time == None
    assert len(calculator.deploys) == 2
예제 #8
0
    def perform_NN(self):
        print(
            '/////////////////////////////////////////////////// NEURAL NETWORK ///////////////////////////////////'
        )
        model_trainer = ModelTrainer()
        nn = MLPRegressor(hidden_layer_sizes=(200, ),
                          activation='relu',
                          solver='adam',
                          alpha=0.1,
                          batch_size='auto',
                          learning_rate='constant',
                          learning_rate_init=0.001,
                          power_t=0.5,
                          max_iter=3000,
                          shuffle=True,
                          random_state=None,
                          tol=0.0001,
                          verbose=False,
                          warm_start=False,
                          momentum=0.9,
                          nesterovs_momentum=True,
                          early_stopping=False,
                          validation_fraction=0.1,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=1e-08,
                          n_iter_no_change=10)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            nn, self.X_train, self.X_test, self.Y_train, self.Y_test)

        evl = MetricsCalculator()
        evl.evaluate('root mean square error for Neural network',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('neural  network', y_true_glucose, y_pred_glucose)
예제 #9
0
class WaveNetModule(pl.LightningModule):
    def __init__(self,
                 hparams,
                 loss_fn=F.cross_entropy,
                 log_grads: bool = False,
                 use_sentence_split: bool = True):
        super().__init__()
        """Configuration flags"""
        self.use_sentence_split = use_sentence_split
        self.log_grads = log_grads
        """Dataset"""
        self.batch_size = hparams.batch_size
        self.output_length = hparams.out_len
        self.win_len = hparams.win_len
        self._setup_dataloaders()
        """Training"""
        self.loss_fn = loss_fn
        self.lr = hparams.lr
        """Embedding"""
        self.embedding_dim = hparams.emb_dim
        self.embedding = nn.Embedding(self.num_classes, self.embedding_dim)
        """Metrics"""
        self.metrics = MetricsCalculator(
            ["accuracy", "precision", "recall", "f1"])
        """Model"""
        self.model = WaveNet(num_blocks=hparams.num_blocks,
                             num_layers=hparams.num_layers,
                             num_classes=self.num_classes,
                             output_len=self.output_length,
                             ch_start=self.embedding_dim,
                             ch_residual=hparams.ch_residual,
                             ch_dilation=hparams.ch_dilation,
                             ch_skip=hparams.ch_skip,
                             ch_end=hparams.ch_end,
                             kernel_size=hparams.kernel_size,
                             bias=True)

    def forward(self, x):
        return self.model(x)

    def _forward_batch(self, batch):
        x, y = batch
        x_emb = self._embed(x)
        y_hat = self.forward(x_emb)
        return self.loss_fn(y_hat, y), y, torch.argmax(y_hat, dim=1)

    def _embed(self, x):
        x_emb = self.embedding(x).permute(0, 2, 1)
        return x_emb

    def training_step(self, batch, batch_idx):
        loss, true, preds = self._forward_batch(batch)

        return {
            "loss": loss,
            "log": (self.metrics.generate_logs(loss, preds, true, "train"))
        }

    def training_step_end(self, training_out):
        tensorboard_logs = self.metrics.generate_mean_metrics(
            training_out["log"], "train")
        return {
            "loss": training_out["loss"],
            "progress_bar": tensorboard_logs,
            "log": tensorboard_logs
        }

    def validation_step(self, batch, batch_idx):
        loss, true, preds = self._forward_batch(batch)

        return self.metrics.generate_logs(loss, preds, true, "val")

    def validation_epoch_end(self, outputs):
        tensorboard_logs = self.metrics.generate_mean_metrics(outputs, "val")
        return {"progress_bar": tensorboard_logs, "log": tensorboard_logs}

    def test_step(self, batch, batch_idx):
        loss, true, preds = self._forward_batch(batch)

        return self.metrics.generate_logs(loss, preds, true, "test")

    def test_epoch_end(self, outputs):
        tensorboard_logs = self.metrics.generate_mean_metrics(outputs, "test")
        return {"progress_bar": tensorboard_logs, "log": tensorboard_logs}

    def configure_optimizers(self):
        # can return multiple optimizers and learning_rate schedulers
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

    def _setup_dataloaders(self):

        ds = PennTreeSentenceDataset if self.use_sentence_split else PennTreeCharDataset

        self.dl_train = DataLoader(ds(self.win_len,
                                      self.output_length,
                                      is_train=True),
                                   self.batch_size,
                                   shuffle=True)
        self.dl_valid = DataLoader(
            ds(self.win_len, self.output_length, is_valid=True),
            self.batch_size)
        self.dl_test = DataLoader(
            ds(self.win_len, self.output_length, is_test=True),
            self.batch_size)

    @property
    def num_classes(self):
        return self.dl_train.dataset.num_chars

    def train_dataloader(self):
        return self.dl_train

    def val_dataloader(self):
        return self.dl_valid

    def test_dataloader(self):
        return self.dl_test

    def on_train_start(self):
        input = torch.ones((self.batch_size, self.embedding_dim, self.win_len))
        if torch.cuda.is_available():
            input = input.cuda()

        self.logger.experiment.add_graph(self.model, input)

    def on_after_backward(self):
        # example to inspect gradient information in tensorboard
        if self.log_grads and self.trainer.global_step % 100 == 0:  # don't make the tf file huge
            params = self.state_dict()
            for k, v in params.items():
                grads = v
                name = k
                self.logger.experiment.add_histogram(
                    tag=name,
                    values=grads,
                    global_step=self.trainer.global_step)
예제 #10
0
    def recognize_data(self, group, folder_name):
        classifier_path = f"classifiers/{self.photos_size}/{folder_name}"
        paths_to_recognize = []
        y_true = []
        photos_ids = []
        for photos_path in group:
            paths_to_recognize.append(photos_path)
            student_id = int(os.path.split(photos_path)[-1].split("-")[0])
            photo_id = int(
                os.path.split(photos_path)[-1].split("-")[1].split(".")[0])
            y_true.append(student_id)
            photos_ids.append(photo_id)

        recognizer = Recognizer(paths_to_recognize, classifier_path)

        eigenfaces_y_pred = recognizer.eigenfaces()
        eigenfaces_metrics = MetricsCalculator(y_true, photos_ids,
                                               eigenfaces_y_pred)
        eigenfaces_metrics.calculate_metrics()
        eigenfaces_metrics.print_metrics()
        self.eigenfaces_metrics.append(eigenfaces_metrics)

        fisherfaces_y_pred = recognizer.fisherfaces()
        fisherfaces_metrics = MetricsCalculator(y_true, photos_ids,
                                                fisherfaces_y_pred)
        fisherfaces_metrics.calculate_metrics()
        fisherfaces_metrics.print_metrics()
        self.fisherfaces_metrics.append(fisherfaces_metrics)

        lbph_y_pred = recognizer.lbph()
        lbph_metrics = MetricsCalculator(y_true, photos_ids, lbph_y_pred)
        lbph_metrics.calculate_metrics()
        lbph_metrics.print_metrics()
        self.lbph_metrics.append(lbph_metrics)
 def eval_metrics(self, left=-1, thr_number=10):
     result = run_test(self.test, mode=self.mode, left=left, thr_number=thr_number)
     self.mc = FROCMetricsCalculator(result, label=self.label) if self.mode == 'froc' else MetricsCalculator(result)
예제 #12
0
def train(args):
    """Train"""
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    model = modules.CNN1()

    # Read data
    print('Reading data...', flush=True)
    data_loader = data_loaders.NPZDataLoader(args.dataset_dir)
    data_train = data_loader.get_data_train()
    data_val = data_loader.get_data_val()

    if args.output_dir and not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Prepare to train
    train_acc_history, val_acc_history = [], []
    loss_history = []
    optimizer = optim.SGD(model.parameters(), lr=args.lr)

    # Start training
    print('Start training.', flush=True)
    for epoch in range(args.num_epochs):

        # Train
        start_time = time.time()

        random.shuffle(data_train)
        calculator = MetricsCalculator(10)
        for start in tqdm(range(0, len(data_train), args.train_batch_size),
                          desc='Training epoch %d: ' % epoch):
            images = [
                data_train[idx][0] for idx in range(
                    start, min(start + args.train_batch_size, len(data_train)))
            ]
            actual_labels = [
                data_train[idx][1] for idx in range(
                    start, min(start + args.train_batch_size, len(data_train)))
            ]

            # forward
            outputs = model(torch.tensor(images, dtype=torch.float32))

            # backward
            batch_labels = torch.tensor(actual_labels, dtype=torch.int64)
            model.zero_grad()
            loss = nn.CrossEntropyLoss()(outputs, batch_labels)
            loss_history.append(float(loss))
            loss.backward()
            optimizer.step()

            pred_labels = outputs.softmax(1).argmax(1).tolist()
            calculator.update(actual_labels, pred_labels)
        acc = calculator.calc_accuracy()
        print('Accuracy:', acc)
        train_acc_history.append(acc)

        end_time = time.time()
        print('Training lasts', end_time - start_time, 's')

        if args.output_dir:
            torch.save(
                model,
                os.path.join(args.output_dir, 'epoch_' + str(epoch) + '.pt'))

        if args.not_val:
            continue

        # Validate
        start_time = time.time()

        calculator = MetricsCalculator(10)
        for start in tqdm(range(0, len(data_val), args.val_batch_size),
                          desc='Validating epoch %d: ' % epoch):
            images = [
                data_val[idx][0] for idx in range(
                    start, min(start + args.val_batch_size, len(data_val)))
            ]
            actual_labels = [
                data_val[idx][1] for idx in range(
                    start, min(start + args.val_batch_size, len(data_val)))
            ]

            # forward
            outputs = model(torch.tensor(images, dtype=torch.float32))

            # Update metrics
            pred_labels = outputs.softmax(1).argmax(1).tolist()
            calculator.update(actual_labels, pred_labels)
        acc = calculator.calc_accuracy()
        print('Accuracy:', acc)
        val_acc_history.append(acc)

        end_time = time.time()
        print('Validating lasts', end_time - start_time, 's')

    # Plot
    if args.output_dir:
        plt.xlabel('Batch')
        plt.ylabel('Loss')
        plt.title('Loss During Training')
        plt.grid(True)
        plt.plot(loss_history, 'r')
        plt.savefig(os.path.join(args.output_dir, 'loss.jpg'))
        plt.close()

        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Accuracy During Training')
        plt.grid(True)
        plt.plot(train_acc_history, 'r')
        plt.plot(val_acc_history, 'b')
        plt.legend(['train', 'val'])
        plt.savefig(os.path.join(args.output_dir, 'acc.jpg'))
        plt.close()
stages = [
    Stage("Commit Stage", duration=timedelta(minutes=3), failure_rate=0.005),
    Stage("Automated Acceptance Test",
          duration=timedelta(minutes=20),
          failure_rate=0.01),
    Stage("Performance Test",
          duration=timedelta(minutes=20),
          failure_rate=0.01),
    Stage("Internal Release",
          duration=timedelta(minutes=4),
          failure_rate=0.01,
          single_threaded=True),
]

start_time = datetime(year=2017, month=12, day=11, hour=8)

commits = generate_commits(100, start_time, offset=2000, max_interval=100)

deployer = Deployer(duration=timedelta(minutes=4),
                    deploy_policy=DeployPolicy.OnceADay,
                    deploy_hour=17,
                    deploy_day=6)

runs = run_simulation(start_time, stages, commits=commits, deployer=deployer)
print_runs("simulation_farley", stages, runs)

metrics_calc = MetricsCalculator(runs)
metrics = metrics_calc.metrics()
print_metrics("simulation_farley", metrics)
print(metrics.pretty_print())