Beispiel #1
0
def Validation(cf, sess, sb):
    val_time = time.time()
    val_writer = sb.tensorBoard.save(
        cf.exp_folder + cf.log_path + 'validation/', sess)
    valid_image_path = os.path.join(cf.valid_dataset_path,
                                    cf.valid_folder_names[0])
    valid_gt_path = os.path.join(cf.valid_dataset_path,
                                 cf.valid_folder_names[1])
    valid_set = Data_loader(cf, valid_image_path, cf.valid_samples,
                            cf.size_image_valid, valid_gt_path)
    valid_set.Load_dataset(cf.valid_batch_size)
    valid_stats = Statistics(cf.valid_batch_size, sb)
    tf.summary.scalar("Mean_IoU/validation",
                      valid_stats.mean_IoU,
                      collections=['validation'])
    tf.summary.scalar("Mean_Acc/validation",
                      valid_stats.accuracy_class,
                      collections=['validation'])
    valid_loss_batch = np.zeros(valid_set.num_batches, dtype=np.float32)
    sess.run(valid_stats.running_vars_initializer)
    prog_bar = ProgressBar(valid_set.num_batches)
    for i in range(valid_set.num_batches):
        batch_x, batch_y = valid_set.Next_batch(cf.valid_batch_size)
        feed_dict = {
            sb.model.simb_image: batch_x,
            sb.model.simb_gt: batch_y,
            sb.model.simb_is_training: False
        }
        simbol_list = [
            sb.loss_fun, sb.model.annotation_pred, valid_stats.update_IoU,
            valid_stats.update_acc_class
        ]
        sess_return = sess.run(simbol_list, feed_dict)
        valid_loss_batch[i] = sess_return[0]
        pred = sess_return[1]
        conf_mat = sess_return[3]
        prog_bar.update()
    conf_mat = conf_mat / valid_set.num_batches
    img_conf_mat = confm_metrics2image(conf_mat, cf.labels)
    img_conf_mat = tf.expand_dims(img_conf_mat, 0)
    tf.summary.image("conf_mat/validation",
                     img_conf_mat,
                     max_outputs=2,
                     collections=['validation'])
    summary_op_val = sb.tensorBoard.set_up('validation')
    mIoU_valid, mAcc_valid, sammary_val = sess.run(
        [valid_stats.mean_IoU, valid_stats.accuracy_class, summary_op_val])
    val_time = time.time() - val_time
    print("\t Loss: %g, mIoU: %g, mAcc: %g, Time: %ds" % (np.mean(
        np.asarray(valid_loss_batch)), mIoU_valid, mAcc_valid, val_time))
    val_writer.add_summary(sammary_val)
Beispiel #2
0
def Predict(cf, sess, sb):
    predict_time = time.time()
    test_image_path = os.path.join(cf.test_dataset_path,
                                   cf.test_folder_names[0])
    test_set = Data_loader(cf, test_image_path, cf.test_samples,
                           cf.resize_image_test)
    test_set.Load_dataset(cf.test_batch_size)
    prog_bar = ProgressBar(test_set.num_batches)
    for i in range(test_set.num_batches):
        batch_x, batch_names = test_set.Next_batch_pred(cf.test_batch_size)
        feed_dict = {
            sb.model.simb_image: batch_x,
            sb.model.simb_is_training: False
        }
        simbol_list = [sb.model.annotation_pred]
        sess_return = sess.run(simbol_list, feed_dict)
        pred = sess_return[0]
        save_prediction(cf.predict_output, pred, batch_names)
        prog_bar.update()
    predict_time = time.time() - predict_time
    print("\t Time: %ds" % (predict_time))
Beispiel #3
0
 def get_dynamic_datasets_in_timerange(self, datasets, time_min, time_max):
     assert (isinstance(time_min, np.datetime64))
     assert (isinstance(time_max, np.datetime64))
     # find valid time stamps in range
     mask = np.logical_and(time_min <= self._valid_times,
                           self._valid_times <= time_max)
     idxs = np.argwhere(mask).flatten().astype(np.int32)
     # remove duplicate time stamps
     valid_times = self._valid_times[idxs]
     valid_times, valid_idxs = np.unique(valid_times, return_index=True)
     idxs = idxs[valid_idxs]
     result = []
     # find all grids contained in dynamic variables
     grids_contained = [
         d for d in datasets if d in self._dynamic_datasets.keys()
     ]
     if len(grids_contained) > 0:
         progress = ProgressBar(len(grids_contained))
         print(
             "[INFO]: Load dynamic grids <{}> for <{}> in time range ({} - {}) ..."
             .format(grids_contained, self.area, time_min, time_max))
         progress.proceed(0)
         for i, grid in enumerate(grids_contained):
             # if grid in self._dynamic_datasets.keys():
             selected_grids = self._dynamic_datasets[grid]
             result += [selected_grids[idxs]]
             progress.proceed(i + 1)
     return np.array(result), self._valid_times[idxs], grids_contained
 def calc_accuracy(self, path_test_file):
     """
     This function calculates both sentence accuracy and word accuracy on a given test set.
     :param path_test_file: path to file containing labeled samples (str)
     :return: sentence_accuracy: percentage on complete sentences (float)
     :return: word_accuracy: percentage on words (float)
     """
     total_words = 0
     total_sentences = 0
     correct_words = 0
     correct_sentences = 0
     num_samples = 0
     for _ in dep_sample_generator(path_test_file):
         num_samples += 1
     progress = ProgressBar(num_samples, fmt=ProgressBar.FULL)
     samp_gen = dep_sample_generator(path_test_file)
     for sample in samp_gen:
         total_sentences += 1
         total_words += sample[-1].idx
         infered_sample = self.infer(sample)
         correct_parse = True
         for i in range(len(sample)):
             if not i:
                 # skip ROOT
                 continue
             if sample[i].head == infered_sample[i].head:
                 correct_words += 1
             else:
                 correct_parse = False
         if correct_parse:
             correct_sentences += 1
         progress.current += 1
         progress()
     progress.done()
     print('\n')
     sentence_accuracy = 1.0 * correct_sentences / total_sentences
     word_accuracy = 1.0 * correct_words / total_words
     return sentence_accuracy, word_accuracy
Beispiel #5
0
def generate_manifest(url, root_dir, globally, show_progress):
    '''Build the index and compile a manifest.'''
    start_time = time.time()
    manifest = Manifest(url, globally)
    html_path_info = _get_html_path_info(root_dir, url)
    if not html_path_info:
        raise NothingIndexedError()
    num_documents = len(html_path_info)
    if show_progress:
        progress_bar = ProgressBar(start_time=start_time,
                                   num_documents=num_documents)
    else:
        progress_bar = None
    _process_html_files(html_path_info, manifest, progress_bar)
    _summarize_build(num_documents, start_time)
    return manifest.json()
Beispiel #6
0
        def start(self,
                  valid_set,
                  valid_loader,
                  mode='Validation',
                  epoch=None,
                  global_bar=None,
                  save_folder=None):
            confm_list = np.zeros((self.cf.num_classes, self.cf.num_classes))

            self.val_loss = AverageMeter()

            # Initialize epoch progress bar
            val_num_batches = math.ceil(valid_set.num_images /
                                        float(self.cf.valid_batch_size))
            prev_msg = '\n' + mode + ' estimated time...\n'
            bar = ProgressBar(val_num_batches, lenBar=20)
            bar.set_prev_msg(prev_msg)
            bar.update(show=False)

            # Validate model
            if self.cf.problem_type == 'detection':
                self.validation_loop(epoch, valid_loader, valid_set, bar,
                                     global_bar, save_folder)
            else:
                self.validation_loop(epoch, valid_loader, valid_set, bar,
                                     global_bar, confm_list)

            # Compute stats
            self.compute_stats(np.asarray(self.stats.val.conf_m),
                               self.val_loss)

            # Save stats
            self.save_stats(epoch)
            if mode == 'Epoch Validation':
                self.logger_stats.write_stat(
                    self.stats.train, epoch,
                    os.path.join(self.cf.train_json_path,
                                 'valid_epoch_' + str(epoch) + '.json'))
            elif mode == 'Validation':
                self.logger_stats.write_stat(self.stats.val, epoch,
                                             self.cf.val_json_file)
            elif mode == 'Test':
                self.logger_stats.write_stat(self.stats.val, epoch,
                                             self.cf.test_json_file)
        def start(self, criterion, valid_set, valid_loader, epoch=None, global_bar=None):
            confm_list = np.zeros((self.cf.num_classes,self.cf.num_classes))

            val_loss = AverageMeter()

            # Initialize epoch progress bar
            val_num_batches = math.ceil(valid_set.num_images / float(self.cf.valid_batch_size))
            prev_msg = '\nValidation estimated time...\n'
            bar = ProgressBar(val_num_batches, lenBar=20)
            bar.set_prev_msg(prev_msg)
            bar.update(show=False)

            # Validate model
            for vi, data in enumerate(valid_loader):
                # Read data
                inputs, gts = data
                n_images,w,h,c = inputs.size()
                inputs = Variable(inputs, volatile=True).cuda()
                gts = Variable(gts, volatile=True).cuda()

                # Predict model
                outputs = self.model.net(inputs)
                predictions = outputs.data.max(1)[1].cpu().numpy()

                # Compute batch stats
                val_loss.update(criterion(outputs, gts).data[0] / n_images, n_images)
                confm = compute_confusion_matrix(predictions,gts.cpu().data.numpy(),self.cf.num_classes,self.cf.void_class)
                confm_list = map(operator.add, confm_list, confm)

                # Save epoch stats
                self.stats.val.conf_m = confm_list
                self.stats.val.loss = val_loss.avg / (w * h * c)

                # Update messages
                self.update_msg(bar, global_bar)

            # Compute stats
            self.compute_stats(np.asarray(self.stats.val.conf_m), val_loss)

            # Save stats
            self.save_stats(epoch)
Beispiel #8
0
 def testModel(self):
     n_test_samples, max_length = self.data['X_test'].shape
     accuracy_test = []
     preds_test = []
     self.initModel()
     test_bar = ProgressBar('Testing', max=len(self.data['X_test']))
     for batch in minibatches_iter(self.data['X_test'],
                                   self.data['Y_test'],
                                   masks=self.data['mask_test'],
                                   char_inputs=self.data['C_test'],
                                   lexicons=self.lexicons['lexicons_test'],
                                   batch_size=self.batch_size):
         inputs, targets, masks, char_inputs, lexicons = batch
         test_bar.next(len(inputs))
         corrects = self.model.eval_fn(inputs, targets, masks, lexicons)
         _, preds = self.model.test_fn(inputs, targets, masks, lexicons)
         preds_test.append(preds)
         accuracy_test.append(corrects)
     this_test_accuracy = np.concatenate(
         accuracy_test)[0:n_test_samples].sum() / float(n_test_samples)
     test_bar.finish()
     print("Test accuracy: " + str(this_test_accuracy * 100) + "%")
     compute_f1_score(self.data['Y_test'], preds_test)
Beispiel #9
0
 def get_static_datasets(self, datasets, raw=False):
     assert isinstance(datasets, (list, str))
     if not isinstance(datasets, list):
         datasets = [datasets]
     result = []
     grids_contained = [
         d for d in datasets if d in self._static_datasets.keys()
     ]
     if len(grids_contained) > 0:
         progress = ProgressBar(len(grids_contained))
         print("[INFO]: Load static grids <{}> for <{}> ...".format(
             grids_contained, self.area))
         progress.proceed(0)
         for i, grid_name in enumerate(grids_contained):
             if grid_name == 'seaMask':
                 lsm = self._static_datasets[grid_name]
                 if not raw:
                     lsm = (np.array(lsm) > 0.5).astype(np.float32)
                 result += [lsm]
             else:
                 result += [self._static_datasets[grid_name]]
             progress.proceed(i + 1)
     return np.array(result), grids_contained
Beispiel #10
0
        def start(self,
                  train_loader,
                  train_set,
                  valid_set=None,
                  valid_loader=None):
            self.train_num_batches = math.ceil(train_set.num_images /
                                               float(self.cf.train_batch_size))
            self.val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / \
                                                                    float(self.cf.valid_batch_size))
            # Define early stopping control
            if self.cf.early_stopping:
                early_Stopping = Early_Stopping(self.cf)
            else:
                early_Stopping = None

            prev_msg = '\nTotal estimated training time...\n'
            self.global_bar = ProgressBar(
                (self.cf.epochs + 1 - self.curr_epoch) *
                (self.train_num_batches + self.val_num_batches),
                lenBar=20)
            self.global_bar.set_prev_msg(prev_msg)

            # Train process
            for epoch in range(self.curr_epoch, self.cf.epochs + 1):
                # Shuffle train data
                train_set.update_indexes()

                # Initialize logger
                epoch_time = time.time()
                self.logger_stats.write('\t ------ Epoch: ' + str(epoch) +
                                        ' ------ \n')

                # Initialize epoch progress bar
                self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % \
                                     (epoch, self.cf.epochs)
                epoch_bar = ProgressBar(self.train_num_batches, lenBar=20)
                epoch_bar.update(show=False)

                # Initialize stats
                self.stats.epoch = epoch
                self.train_loss = AverageMeter()
                self.confm_list = np.zeros(
                    (self.cf.num_classes, self.cf.num_classes))

                # Train epoch
                self.training_loop(epoch, train_loader, epoch_bar)

                # Save stats
                self.stats.train.conf_m = self.confm_list
                self.compute_stats(np.asarray(self.confm_list),
                                   self.train_loss)
                self.save_stats_epoch(epoch)
                self.logger_stats.write_stat(
                    self.stats.train, epoch,
                    os.path.join(self.cf.train_json_path,
                                 'train_epoch_' + str(epoch) + '.json'))

                # Validate epoch
                self.validate_epoch(valid_set, valid_loader, early_Stopping,
                                    epoch, self.global_bar)

                # Update scheduler
                if self.model.scheduler is not None:
                    self.model.scheduler.step(self.stats.val.loss)

                # Saving model if score improvement
                new_best = self.model.save(self.stats)
                if new_best:
                    self.logger_stats.write_best_stats(self.stats, epoch,
                                                       self.cf.best_json_file)

                # Update display values
                self.update_messages(epoch, epoch_time, new_best)

                if self.stop:
                    return

            # Save model without training
            if self.cf.epochs == 0:
                self.model.save_model()
Beispiel #11
0
    class train(object):
        def __init__(self, logger_stats, model, cf, validator, stats, msg):
            # Initialize training variables
            self.logger_stats = logger_stats
            self.model = model
            self.cf = cf
            self.validator = validator
            self.logger_stats.write('\n- Starting train <--- \n')
            self.curr_epoch = 1 if self.model.best_stats.epoch == 0 else self.model.best_stats.epoch
            self.stop = False
            self.stats = stats
            self.best_acc = 0
            self.msg = msg
            self.loss = None
            self.outputs = None
            self.labels = None
            self.writer = SummaryWriter(
                os.path.join(cf.tensorboard_path, 'train'))

        def start(self,
                  train_loader,
                  train_set,
                  valid_set=None,
                  valid_loader=None):
            self.train_num_batches = math.ceil(train_set.num_images /
                                               float(self.cf.train_batch_size))
            self.val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / \
                                                                    float(self.cf.valid_batch_size))
            # Define early stopping control
            if self.cf.early_stopping:
                early_Stopping = Early_Stopping(self.cf)
            else:
                early_Stopping = None

            prev_msg = '\nTotal estimated training time...\n'
            self.global_bar = ProgressBar(
                (self.cf.epochs + 1 - self.curr_epoch) *
                (self.train_num_batches + self.val_num_batches),
                lenBar=20)
            self.global_bar.set_prev_msg(prev_msg)

            # Train process
            for epoch in range(self.curr_epoch, self.cf.epochs + 1):
                # Shuffle train data
                train_set.update_indexes()

                # Initialize logger
                epoch_time = time.time()
                self.logger_stats.write('\t ------ Epoch: ' + str(epoch) +
                                        ' ------ \n')

                # Initialize epoch progress bar
                self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % \
                                     (epoch, self.cf.epochs)
                epoch_bar = ProgressBar(self.train_num_batches, lenBar=20)
                epoch_bar.update(show=False)

                # Initialize stats
                self.stats.epoch = epoch
                self.train_loss = AverageMeter()
                self.confm_list = np.zeros(
                    (self.cf.num_classes, self.cf.num_classes))

                # Train epoch
                self.training_loop(epoch, train_loader, epoch_bar)

                # Save stats
                self.stats.train.conf_m = self.confm_list
                self.compute_stats(np.asarray(self.confm_list),
                                   self.train_loss)
                self.save_stats_epoch(epoch)
                self.logger_stats.write_stat(
                    self.stats.train, epoch,
                    os.path.join(self.cf.train_json_path,
                                 'train_epoch_' + str(epoch) + '.json'))

                # Validate epoch
                self.validate_epoch(valid_set, valid_loader, early_Stopping,
                                    epoch, self.global_bar)

                # Update scheduler
                if self.model.scheduler is not None:
                    self.model.scheduler.step(self.stats.val.loss)

                # Saving model if score improvement
                new_best = self.model.save(self.stats)
                if new_best:
                    self.logger_stats.write_best_stats(self.stats, epoch,
                                                       self.cf.best_json_file)

                # Update display values
                self.update_messages(epoch, epoch_time, new_best)

                if self.stop:
                    return

            # Save model without training
            if self.cf.epochs == 0:
                self.model.save_model()

        def training_loop(self, epoch, train_loader, epoch_bar):
            # Train epoch
            for i, data in enumerate(train_loader):
                # Read Data
                inputs, labels = data

                N, w, h, c = inputs.size()
                inputs = Variable(inputs).cuda()
                self.inputs = inputs
                self.labels = Variable(labels).cuda()

                # Predict model
                self.model.optimizer.zero_grad()
                self.outputs = self.model.net(inputs)
                predictions = self.outputs.data.max(1)[1].cpu().numpy()

                # Compute gradients
                self.compute_gradients()

                # Compute batch stats
                self.train_loss.update(float(self.loss.cpu().item()), N)
                confm = compute_confusion_matrix(
                    predictions,
                    self.labels.cpu().data.numpy(), self.cf.num_classes,
                    self.cf.void_class)
                self.confm_list = map(operator.add, self.confm_list, confm)

                if self.cf.normalize_loss:
                    self.stats.train.loss = self.train_loss.avg
                else:
                    self.stats.train.loss = self.train_loss.avg

                if not self.cf.debug:
                    # Save stats
                    self.save_stats_batch((epoch - 1) *
                                          self.train_num_batches + i)

                    # Update epoch messages
                    self.update_epoch_messages(epoch_bar, self.global_bar,
                                               self.train_num_batches, epoch,
                                               i)

        def save_stats_epoch(self, epoch):
            # Save logger
            if epoch is not None:
                # Epoch loss tensorboard
                self.writer.add_scalar('losses/epoch', self.stats.train.loss,
                                       epoch)
                self.writer.add_scalar('metrics/accuracy',
                                       100. * self.stats.train.acc, epoch)

        def save_stats_batch(self, batch):
            # Save logger
            if batch is not None:
                self.writer.add_scalar('losses/batch', self.stats.train.loss,
                                       batch)

        def compute_gradients(self):
            self.loss = self.model.loss(self.outputs, self.labels)
            self.loss.backward()
            self.model.optimizer.step()

        def compute_stats(self, confm_list, train_loss):
            TP_list, TN_list, FP_list, FN_list = extract_stats_from_confm(
                confm_list)
            mean_accuracy = compute_accuracy(TP_list, TN_list, FP_list,
                                             FN_list)
            self.stats.train.acc = np.nanmean(mean_accuracy)
            self.stats.train.loss = float(train_loss.avg.cpu().data)

        def validate_epoch(self, valid_set, valid_loader, early_Stopping,
                           epoch, global_bar):

            if valid_set is not None and valid_loader is not None:
                # Set model in validation mode
                self.model.net.eval()

                self.validator.start(valid_set,
                                     valid_loader,
                                     'Epoch Validation',
                                     epoch,
                                     global_bar=global_bar)

                # Early stopping checking
                if self.cf.early_stopping:
                    early_Stopping.check(self.stats.train.loss,
                                         self.stats.val.loss,
                                         self.stats.val.mIoU,
                                         self.stats.val.acc)
                    if early_Stopping.stop == True:
                        self.stop = True
                # Set model in training mode
                self.model.net.train()

        def update_messages(self, epoch, epoch_time):
            # Update logger
            epoch_time = time.time() - epoch_time
            self.logger_stats.write('\t Epoch step finished: %ds \n' %
                                    (epoch_time))

            # Compute best stats
            self.msg.msg_stats_last = '\nLast epoch: acc = %.2f, loss = %.5f\n' % (
                100 * self.stats.val.acc, self.stats.val.loss)
            if self.best_acc < self.stats.val.acc:
                self.msg.msg_stats_best = 'Best case: epoch = %d, acc = %.2f, loss = %.5f\n' % (
                    epoch, 100 * self.stats.val.acc, self.stats.val.loss)

                msg_confm = self.stats.val.get_confm_str()
                self.logger_stats.write(msg_confm)
                self.msg.msg_stats_best = self.msg.msg_stats_best + msg_confm

                self.best_acc = self.stats.val.acc

        def update_epoch_messages(self, epoch_bar, global_bar,
                                  train_num_batches, epoch, batch):
            # Update progress bar
            epoch_bar.set_msg('loss = %.5f' % self.stats.train.loss)
            self.msg.last_str = epoch_bar.get_message(step=True)
            global_bar.set_msg(self.msg.accum_str + self.msg.last_str + self.msg.msg_stats_last + \
                               self.msg.msg_stats_best)
            global_bar.update()

            # writer.add_scalar('train_loss', train_loss.avg, curr_iter)

            # Display progress
            curr_iter = (epoch - 1) * train_num_batches + batch + 1
            if (batch + 1) % math.ceil(train_num_batches / 20.) == 0:
                self.logger_stats.write(
                    '[Global iteration %d], [iter %d / %d], [train loss %.5f] \n'
                    % (curr_iter, batch + 1, train_num_batches,
                       self.stats.train.loss))
    def trainingModel(self):

        self.initModel()

        best_acc = 0
        best_validation_accuracy = 0
        stop_count = 0
        lr = self.learning_rate
        patience = self.patience
        n_dev_samples, max_length = self.data['X_dev'].shape
        n_test_samples, max_length = self.data['X_test'].shape

        for epoch in range(1, self.num_epochs + 1):
            print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (
                epoch, lr, self.decay_rate)
            train_err = 0.0
            train_batches = 0
            train_bar = ProgressBar('Training', max=len(self.data['X_train']))
            for batch in minibatches_iter(
                    self.data['X_train'],
                    self.data['Y_train'],
                    masks=self.data['mask_train'],
                    char_inputs=self.data['C_train'],
                    lexicons=self.lexicons['lexicons_train'],
                    batch_size=self.batch_size,
                    shuffle=True):
                inputs, targets, masks, char_inputs, lexicons = batch
                err = self.model.train_fn(inputs, targets, masks, char_inputs,
                                          lexicons)
                train_err += err
                train_bar.next(len(inputs))

                if train_batches > 0 and train_batches % self.valid_freq == 0:
                    accuracy_valid = []
                    for batch in minibatches_iter(
                            self.data['X_dev'],
                            self.data['Y_dev'],
                            masks=self.data['mask_dev'],
                            lexicons=self.lexicons['lexicons_dev'],
                            char_inputs=self.data['C_dev'],
                            batch_size=self.batch_size):
                        inputs, targets, masks, char_inputs, lexicons = batch
                        accuracy_valid.append(
                            self.model.eval_fn(inputs, targets, masks,
                                               char_inputs, lexicons))
                    this_validation_accuracy = np.concatenate(accuracy_valid)[
                        0:n_dev_samples].sum() / float(n_dev_samples)

                    if this_validation_accuracy > best_validation_accuracy:
                        print("\nTrain loss, " + str(
                            (train_err / self.valid_freq)) +
                              ", validation accuracy: " +
                              str(this_validation_accuracy * 100) + "%")
                        best_validation_accuracy = this_validation_accuracy
                        preds_test = []
                        accuracy_test = []
                        for batch in minibatches_iter(
                                self.data['X_test'],
                                self.data['Y_test'],
                                masks=self.data['mask_test'],
                                char_inputs=self.data['C_test'],
                                lexicons=self.lexicons['lexicons_test'],
                                batch_size=self.batch_size):
                            inputs, targets, masks, char_inputs, lexicons = batch
                            _, preds = self.model.test_fn(
                                inputs, targets, masks, char_inputs, lexicons)
                            preds_test.append(preds)
                            accuracy_test.append(
                                self.model.eval_fn(inputs, targets, masks,
                                                   char_inputs, lexicons))
                        this_test_accuracy = np.concatenate(accuracy_test)[
                            0:n_test_samples].sum() / float(n_test_samples)
                        # print "F1-score: " + str(compute_f1_score(self.data["Y_test"], preds_test, self.data['label_alphabet']) * 100)
                        print("Test accuracy: " +
                              str(this_test_accuracy * 100) + "%")
                        if best_acc < this_test_accuracy:
                            best_acc = this_test_accuracy
                            write_model_data(self.model.network,
                                             self.model_path + '/best_model')

                    train_err = 0
                train_batches += 1

            train_bar.finish()

            # stop if dev acc decrease 3 time straightly.
            if stop_count == patience:
                break

            # re-compile a function with new learning rate for training
            if self.update_algo != 'adadelta':
                lr = self.learning_rate / (1.0 + epoch * self.decay_rate)
                updates = utils.create_updates(self.model.loss_train,
                                               self.model.params,
                                               self.update_algo,
                                               lr,
                                               momentum=self.momentum)
                self.model.train_fn = theano.function(
                    [
                        self.model.input_var, self.model.target_var,
                        self.model.mask_var, self.model.char_input_var,
                        self.model.lex_var
                    ],
                    outputs=self.model.loss_train,
                    updates=updates,
                    allow_input_downcast=True)

            print("Epoch " + str(epoch) + " finished.")
        print("The final best acc: " + str(best_acc * 100) + "%")

        if self.output_predict:
            f = codecs.open('./results/10-fold.txt', 'a+', 'utf-8')
            f.write(str(best_acc * 100) + '\n')
            f.close()
    def perceptron_train(self, num_iterations: int, accuracy_step=10) -> None:
        """
        Given the number of iterations for training we loop
        over the training file said number of iterations preforming
        the perceptron algorithm
        the result is updated weights in self.w
        :param num_iterations: number of iterations to perform (int)
        :param accuracy_step: interval between accuracy calculation (int)
        :return: None
        """
        print("training started")
        self.w = np.zeros(self.num_of_features)
        num_samples = 0
        for _ in dep_sample_generator(self.training_file_path):
            num_samples += 1
        st_time = time.time()
        # dep_weights = DepOptimizer(self.w, None, path_to_train_file=self.training_file_path,
        #                            dicts=self.dicts, minimal=self.minimal) # moved to class level
        train_word_accuracies = []
        train_sentenence_accuracies = []
        for i in range(num_iterations):
            print("iteration: ", i)
            progress = ProgressBar(num_samples, fmt=ProgressBar.FULL)
            total_sentences = 0
            correct_sentences = 0
            total_words = 0
            correct_words = 0
            it_st_time = time.time()
            for idx, sample in enumerate(
                    dep_sample_generator(self.training_file_path)):
                total_sentences += 1
                sample_len = sample[-1].idx

                successors = self.fc_graphs[
                    sample_len]  # sample_to_full_successors(sample_len)
                # dep_weights = DepOptimizer(self.w, sample, dicts=self.dicts, minimal=self.minimal)
                self.dep_weights.update_sample(sample)
                self.dep_weights.update_weights(self.w)
                graph = Digraph(successors, self.dep_weights.get_score)
                mst_start_time = time.time()
                argmax_tree = graph.mst().successors
                argmax_tree = {k: v for k, v in argmax_tree.items() if v}
                ground_truth_successors = self.gt_trees[
                    idx]  # sample_to_successors(sample)

                # print("mst calc time: %.5f secs" % (time.time() - mst_start_time))
                infered_sample = successors_to_sample(deepcopy(sample),
                                                      argmax_tree)
                for j in range(len(sample)):
                    if not j:
                        # skip ROOT
                        continue
                    total_words += 1
                    if sample[j].head == infered_sample[j].head:
                        correct_words += 1

                #  according to python doc dictionary == works as expected
                #  returning true only if both have same keys and same values to those keys
                #  order of dict.values() corresponded to dict.keys()
                if argmax_tree != ground_truth_successors:
                    # features_ground_truth = self.feature_extractor(sample, self.dicts, self.minimal)
                    #  could also be replaced by a dict
                    features_ground_truth = self.gt_global_features[idx]
                    feat_calc_start_time = time.time()
                    features_argmax = self.feature_extractor(
                        infered_sample,
                        self.dicts,
                        self.minimal,
                        use_mcdonald=self.use_mcdonald)
                    # print("feature extraction time: %.5f" % (time.time() - feat_calc_start_time))
                    self.w[list(features_ground_truth.keys())] += np.array(
                        list(features_ground_truth.values()))
                    self.w[list(features_argmax.keys())] -= np.array(
                        list(features_argmax.values()))

                else:
                    correct_sentences += 1
                progress.current += 1
                progress()
            sen_acc = 1.0 * correct_sentences / total_sentences
            word_acc = 1.0 * correct_words / total_words
            train_sentenence_accuracies.append(sen_acc)
            train_word_accuracies.append(word_acc)
            progress.done()
            print('\n')
            print(
                'iteration/epoch ', i, "- iteration time: %.2f min" %
                ((time.time() - it_st_time) / 60),
                ", train accuracy:: sentence: %.3f " % sen_acc,
                " words: %.3f " % word_acc,
                ", total time: %.2f min" % ((time.time() - st_time) / 60))

            if (i + 1
                ) % accuracy_step == 0 and self.path_to_valid_file is not None:
                print("validation accuracy calculation step:")
                valid_sent_acc, valid_word_acc = self.calc_accuracy(
                    self.path_to_valid_file)
                print("valid accuracy:: sentence: %.3f" % valid_sent_acc,
                      " words: %.3f" % valid_word_acc)
                self.w.dump(self.weights_file_name)
                print("saved weights @ ", self.weights_file_name)
                # save checkpoint
                path = self.training_file_path + "_epoch_" + str(
                    i) + ".checkpoint"
                ckpt = {}
                ckpt['weights'] = self.w.tolist()
                ckpt['train_acc'] = (sen_acc, word_acc)
                ckpt['valid_acc'] = (valid_sent_acc, valid_word_acc)
                with open(path, 'wb') as fp:
                    pickle.dump(ckpt, fp)
                print("saved checkpoint @ ", path)

        self.w.dump(self.weights_file_name)
        path = self.training_file_path + "_" + str(i +
                                                   1) + "_epochs" + ".results"
        ckpt = {}
        ckpt['weights'] = self.w.tolist()
        ckpt['train_word_acc'] = train_word_accuracies
        ckpt['train_sen_acc'] = train_sentenence_accuracies
        with open(path, 'wb') as fp:
            pickle.dump(ckpt, fp)
        print("saved final results @ ", path)
        def start(self, criterion, optimizer, train_loader, train_set, valid_set=None, valid_loader=None, scheduler=None):
            train_num_batches = math.ceil(train_set.num_images / float(self.cf.train_batch_size))
            val_num_batches = 0 if valid_set is None else math.ceil(valid_set.num_images / float(self.cf.valid_batch_size))
            # Define early stopping control
            if self.cf.early_stopping:
                early_Stopping = Early_Stopping(self.cf)
            else:
                early_Stopping = None

            prev_msg = '\nTotal estimated training time...\n'
            global_bar = ProgressBar((self.cf.epochs+1-self.curr_epoch)*(train_num_batches+val_num_batches), lenBar=20)
            global_bar.set_prev_msg(prev_msg)


            # Train process
            for epoch in range(self.curr_epoch, self.cf.epochs + 1):
                # Shuffle train data
                train_set.update_indexes()

                # Initialize logger
                epoch_time = time.time()
                self.logger_stats.write('\t ------ Epoch: ' + str(epoch) + ' ------ \n')

                # Initialize epoch progress bar
                self.msg.accum_str = '\n\nEpoch %d/%d estimated time...\n' % (epoch, self.cf.epochs + 1 - self.curr_epoch)
                epoch_bar = ProgressBar(train_num_batches, lenBar=20)
                epoch_bar.update(show=False)

                # Initialize stats
                train_loss = AverageMeter()
                confm_list = np.zeros((self.cf.num_classes, self.cf.num_classes))

                # Train epoch
                for i, data in enumerate(train_loader):
                    # Read Data
                    inputs, labels = data

                    N,w,h,c = inputs.size()
                    inputs = Variable(inputs).cuda()
                    labels = Variable(labels).cuda()

                    # Predict model
                    optimizer.zero_grad()
                    outputs = self.model.net(inputs)
                    predictions = outputs.data.max(1)[1].cpu().numpy()

                    # Compute gradients
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                    # Compute batch stats
                    train_loss.update(loss.data[0], N)
                    confm = compute_confusion_matrix(predictions, labels.cpu().data.numpy(), self.cf.num_classes,
                                                     self.cf.void_class)
                    confm_list = map(operator.add, confm_list, confm)
                    self.stats.train.loss = train_loss.avg / (w*h*c)

                    # Save stats
                    self.save_stats_batch((epoch - 1) * train_num_batches + i)

                    # Update epoch messages
                    self.update_epoch_messages(epoch_bar, global_bar, train_num_batches,epoch, i)

                # Save stats
                self.stats.train.conf_m = confm_list
                self.compute_stats(np.asarray(confm_list),train_loss)
                self.save_stats_epoch(epoch)

                # Validate epoch
                self.validate_epoch(valid_set, valid_loader, criterion, early_Stopping, epoch, global_bar)

                # Update scheduler
                if scheduler is not None:
                    scheduler.step(self.stats.val.loss)

                # Saving model if needed
                self.model.net.save(self.stats)

                # Update display values
                self.update_messages(epoch, epoch_time)

                if self.stop:
                    return

            # Save model without training
            if self.cf.epochs == 0:
                self.model.save_model(self.model.net)
Beispiel #15
0
def Train(cf, sess, sb):
    #Path definitions
    train_image_path = os.path.join(cf.train_dataset_path,
                                    cf.train_folder_names[0])
    train_gt_path = os.path.join(cf.train_dataset_path,
                                 cf.train_folder_names[1])
    valid_image_path = os.path.join(cf.valid_dataset_path,
                                    cf.valid_folder_names[0])
    valid_gt_path = os.path.join(cf.valid_dataset_path,
                                 cf.valid_folder_names[1])
    trainable_var = tf.trainable_variables()
    # Training dataset set up
    train_set = Data_loader(cf, train_image_path, cf.train_samples,
                            cf.size_image_train, train_gt_path)
    train_set.Load_dataset(cf.train_batch_size)
    # Validation dataset set up
    valid_set = Data_loader(cf, valid_image_path, cf.valid_samples_epoch,
                            cf.size_image_valid, valid_gt_path)
    valid_set.Load_dataset(cf.valid_batch_size)
    # Simbol creation for metrics and statistics
    train_stats = Statistics(cf.train_batch_size, sb)
    valid_stats = Statistics(cf.valid_batch_size, sb)
    # More summary information to add
    #tf.summary.scalar("Mean_loss", train_mLoss)
    #img_conf_mat = tf.placeholder(tf.uint8, shape=[None, 480, 640, 3], name="conf_mat")
    tf.summary.scalar("Mean_IoU/train",
                      train_stats.mean_IoU,
                      collections=['train'])
    tf.summary.scalar("Mean_Acc/train",
                      train_stats.accuracy_class,
                      collections=['train'])
    tf.summary.scalar("Mean_IoU/train_valid",
                      valid_stats.mean_IoU,
                      collections=['train_valid'])
    tf.summary.scalar("Mean_Acc/train_valid",
                      valid_stats.accuracy_class,
                      collections=['train_valid'])

    train_writer = sb.tensorBoard.save(cf.exp_folder + cf.log_path + 'train/',
                                       sess)
    val_writer = sb.tensorBoard.save(
        cf.exp_folder + cf.log_path + 'train_valid/', sess)

    # Early stopping
    if cf.early_stopping:
        e_stop = Early_Stopping(cf.patience)
    # Training
    feed_dict = []
    stop = False
    epoch = 1

    # Epoch loop
    while epoch < cf.epochs + 1 and not stop:
        epoch_time = time.time()
        if cf.shuffle:
            train_set.Shuffle()
            valid_set.Shuffle()
        loss_per_batch = np.zeros(train_set.num_batches, dtype=np.float32)
        conf_mat = np.zeros((cf.num_classes, cf.num_classes), dtype=np.float32)
        # initialize/reset the running variables
        sess.run(train_stats.running_vars_initializer)
        #Progress bar
        prog_bar = ProgressBar(train_set.num_batches)
        #Dataset batch loop
        for i in range(train_set.num_batches):
            batch_x, batch_y = train_set.Next_batch(cf.train_batch_size,
                                                    crop=True)
            feed_dict = {
                sb.model.simb_image: batch_x,
                sb.model.simb_gt: batch_y,
                sb.model.simb_is_training: True
            }
            simbol_list = [
                sb.train_op, sb.loss_fun, sb.model.annotation_pred,
                train_stats.update_IoU, train_stats.update_acc_class,
                train_stats.conf_matrix_batch
            ]
            sess_return = sess.run(simbol_list, feed_dict)
            loss_per_batch[i] = sess_return[1]
            #pred = sess_return[2]
            conf_mat += sess_return[5]
            prog_bar.update()
        # Epoch train summary info
        conf_mat = conf_mat / train_set.num_batches
        img_conf_mat = confm_metrics2image(conf_mat, cf.labels)
        img_conf_mat = tf.expand_dims(img_conf_mat, 0)
        tf.summary.image("conf_mat/train",
                         img_conf_mat,
                         max_outputs=2,
                         collections=['train'])
        train_mLoss = np.mean(np.asarray(loss_per_batch))
        summary_op_train = sb.tensorBoard.set_up('train')
        mIoU_train, mAcc_train, summary_train = sess.run([
            train_stats.mean_IoU, train_stats.accuracy_class, summary_op_train
        ], feed_dict)
        train_set.Reset_Offset()

        # Validation in train
        if cf.valid_samples_epoch > 0:
            conf_mat = np.zeros((cf.num_classes, cf.num_classes),
                                dtype=np.float32)
            valid_loss_batch = np.zeros(valid_set.num_batches,
                                        dtype=np.float32)
            sess.run(valid_stats.running_vars_initializer)
            for i in range(valid_set.num_batches):
                batch_x, batch_y = valid_set.Next_batch(cf.valid_batch_size)
                feed_dict = {
                    sb.model.simb_image: batch_x,
                    sb.model.simb_gt: batch_y,
                    sb.model.simb_is_training: False
                }
                simbol_list = [
                    sb.loss_fun, sb.model.annotation_pred,
                    valid_stats.update_IoU, valid_stats.update_acc_class,
                    valid_stats.conf_matrix_batch
                ]
                sess_return = sess.run(simbol_list, feed_dict)
                valid_loss_batch[i] = sess_return[0]
                pred = sess_return[1]
                conf_mat += sess_return[4]
            conf_mat = conf_mat / train_set.num_batches
            img_conf_mat = confm_metrics2image(conf_mat, cf.labels)
            img_conf_mat = tf.expand_dims(img_conf_mat, 0)
            tf.summary.image("conf_mat/train_valid",
                             img_conf_mat,
                             max_outputs=2,
                             collections=['train_valid'])
            summary_op_val = sb.tensorBoard.set_up('train_valid')
            mIoU_valid, mAcc_valid, sammary_val = sess.run([
                valid_stats.mean_IoU, valid_stats.accuracy_class,
                summary_op_val
            ])
            valid_mLoss = np.mean(np.asarray(valid_loss_batch))
            valid_set.Reset_Offset()

        # Screen display
        train_writer.add_summary(summary_train, epoch)
        val_writer.add_summary(sammary_val, epoch)
        epoch_time = time.time() - epoch_time
        print("Epoch: %d, Time: %ds \n\t Train_loss: %g, mIoU: %g, mAcc: %g" %
              (epoch, epoch_time, train_mLoss, mIoU_train, mAcc_train))
        if cf.valid_samples_epoch > 0:
            print("\t Valid_loss: %g, mIoU: %g, mAcc: %g" %
                  (valid_mLoss, mIoU_valid, mAcc_valid))
            sb.model.modelIO.Save(cf, sess, train_mLoss, mIoU_train,
                                  mAcc_train, valid_mLoss, mIoU_valid,
                                  mAcc_valid)
            if cf.early_stopping:
                stop = e_stop.Check(cf.save_condition, train_mLoss, mIoU_train,
                                    mAcc_train, valid_mLoss, mIoU_valid,
                                    mAcc_valid)
        else:
            sb.model.modelIO.Save(cf, sess, train_mLoss, mIoU_train,
                                  mAcc_train)
            if cf.early_stopping:
                stop = e_stop.Check(cf.save_condition, train_mLoss, mIoU_train,
                                    mAcc_train)
        epoch += 1
Beispiel #16
0
LR = 10e-4
EPOCH = 10

# lambda2 = lambda epoch: LR* (0.9 ** epoch)
optim = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.1)
# scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda=lambda2)

model_name = 'fcn'
model_file = model_name + '.pkl'
if os.path.exists(model_file):
    sd = torch.load(model_file)
    model.load_state_dict(sd)

running_loss = 0
for epoch in range(100):
    progress = ProgressBar(len(train_loader))
    for step, (img, target) in enumerate(train_loader):
        img = img.to(device)
        #         img = torch.ceil(img*255)
        target = torch.round(target * 255)
        target[target == 255] = 0
        # print('t:',target[target>0])
        target = target.to(device, dtype=torch.long)

        optim.zero_grad()
        d = model(img)['out']
        #         print('target:',d.size())
        #         print('target:',target.size())
        log_p = F.log_softmax(d, dim=1)
        loss = F.nll_loss(log_p, target[0], reduction='mean')
        loss.backward()