Esempio n. 1
0
def validate_classifier(val_loader, model, loss_function, labels_names):
    """Validate the model on all validation batches from one epoch.

    Same validation function is used to validate the base and TCN classifier.
    Same validation function is used to validate unimodal and multimodal models.

    Args:
        val_loader: PyTorch validation data loader
        model: PyTorch model
        loss_function: PyTorch loss function
        labels_names (array/list): Names of labels/targets

    Returns:
        val_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a
            corresponding validation metric value. Besides the class/label/target names provided in labels_names, it
            also contains a key 'overall' referring to the average over all targets/labels.
    """

    with torch.no_grad():
        all_predictions = []
        all_labels = []
        all_losses = []

        # Apply loss weights based on validation dataset class weights
        loss_function.pos_weight = val_loader.dataset.pos_class_weights.cuda()

        for i, val_data in enumerate(tqdm(val_loader, desc='    valid')):
            features, labels = val_data

            features = [f.cuda() for f in features]
            labels = labels.cuda()

            logits, predictions = model.forward(*features)

            # Get (already weighted) loss matrix (batch_size x class_num)
            batch_losses = loss_function(logits, labels)

            all_predictions.extend(predictions.tolist())
            all_labels.extend(labels.tolist())
            all_losses.extend(batch_losses.tolist())

        # Calculate validation metrics
        val_metrics = calculate_metrics(
            all_predictions,
            all_labels,
            labels_names,
            losses=all_losses,
            loss_function_pos_weights=loss_function.pos_weight.tolist())

    return val_metrics
Esempio n. 2
0
def test_model(model, data, labels, mc_steps=None):
    '''Test models and return probalities, predicted labels, entropy, and metrics.'''
    start_time = time()
    if mc_steps is None:
        pred, pred_labels, entropy = model.predict(
            np.expand_dims(data, axis=-1))
    else:
        pred, pred_labels, entropy = model.predict(np.expand_dims(data,
                                                                  axis=-1),
                                                   mc_steps=mc_steps)
    elapsed_time = time() - start_time
    metrics = calculate_metrics(pred_labels, labels) + [elapsed_time]

    print('{} ~ Accuracy: {:.4f} ~ Time: {:.2f} seconds'.format(
        model.model.name, metrics[0], metrics[4]))
    return pred, pred_labels, entropy, metrics
Esempio n. 3
0
    def __predict(self, return_df_metrics=True):

        start_time = time.time()
        model_path = self.output_folder + '/best_model.hdf5'
        model = keras.models.load_model(model_path,
                                        custom_objects={'TCN': TCN})
        y_pred = model.predict(self.X_test, batch_size=self.batch_size)
        if return_df_metrics:
            y_pred = np.argmax(y_pred, axis=1)
            metrics = calculate_metrics(self.y_true, y_pred, 0.0,
                                        self.output_folder)
            return metrics
        else:
            test_duration = time.time() - start_time
            save_test_duration(self.output_folder + '/test_duration.csv',
                               test_duration)
            return y_pred
Esempio n. 4
0
def train():
    channel_name = 'training'
    training_path = os.path.join(input_path, channel_name)

    train_df = pd.read_csv(f"{training_path}/train.csv")

    y = train_df.pop('narrowing-diagnosis')
    X = train_df

    params_file = "params.json"

    with open(params_file) as f:
        params = json.load(f)

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    train_data = Pool(data=X_train,
                      cat_features=params['data_params']['cat_features'],
                      label=y_train)
    test_data = Pool(data=X_test,
                     cat_features=params['data_params']['cat_features'],
                     label=y_test)

    model = CatBoostClassifier(**params['model_params'])

    model.fit(train_data, eval_set=test_data, verbose=False, plot=False)

    model.save_model(f"{model_path}/heart.cbm")

    print('model has been trained successfully')

    predict_probas = model.predict_proba(test_data)

    model_metrics = calculate_metrics(params['data_params']['metrics'], y_test,
                                      predict_probas)

    print('Metrics for trained model:')
    for k, v in model_metrics.items():
        print(f'{k}={v}')

    with open(f"{model_path}/metrics.json", 'w') as fp:
        fp.write(json.dumps(model_metrics))
    def on_step_end(self, episode_step, logs):
        """Calculate metrics every `interval`-steps. Save target_model if conditions are met."""
        self.step += 1
        self.loss.append(logs.get("metrics")[0])

        if not self.step % self.interval:
            y_pred = make_predictions(self.model.target_model, self.X_val)
            stats = calculate_metrics(self.y_val, y_pred)

            if np.isnan(self.loss).all():  # If all entries are NaN, this happens during training
                stats["loss"] = 0
            else:
                stats["loss"] = np.nanmean(self.loss)
            self.loss = []  # Reset loss every `self.interval`

            for k, v in stats.items():
                summary = Summary(value=[Summary.Value(tag=k, simple_value=v)])
                self.writer.add_summary(summary, global_step=self.step)

            if stats.get("FN") <= self.FN_bound and stats.get("FP") <= self.FP_bound and self.step >= self.save_after:
                print(f"Model saved! FN: {stats.get('FN')}; FP: {stats.get('FP')}")
                self.model.target_model.save(f"./models/{datetime.now().strftime('%Y%m%d')}_FN{stats.get('FN')}_FP{stats.get('FP')}.h5")
def test_classifier(test_loader, model, labels_names):
    """Test the model on all test batches.

    Same test function is used to test the base and TCN classifier.
    Same test function is used to test unimodal and multimodal models.

    Args:
        test_loader: PyTorch test data loader
        model: PyTorch model
        labels_names (array/list): Names of labels/targets

    Returns:
        test_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a
            corresponding test metric value. Besides the class/label/target names provided in labels_names, it also
            contains a key 'overall' referring to the average over all targets/labels.
    """

    with torch.no_grad():
        # Aggregates from all test batches
        all_predictions = []
        all_labels = []

        for i, test_data in enumerate(tqdm(test_loader, desc='    test')):
            features, labels = test_data

            features = [f.cuda() for f in features]
            labels = labels.cuda()

            _, predictions = model.forward(*features)

            all_predictions.extend(predictions.tolist())
            all_labels.extend(labels.tolist())

        # Calculate test metrics
        test_metrics = calculate_metrics(all_predictions, all_labels,
                                         labels_names)

    return test_metrics
Esempio n. 7
0
        roc = 0.0
        ap = 0.0

        folds = data[experiment_type]

        for fold_idx, (X_train, _, X_test, y_test) in enumerate(folds):

            for iter_idx in range(cfg["num_iterations"]):
                np.random.seed(iter_idx)

                params.update({"get_top": get_top})
                model = model_cls(**params)

                model.fit(X_train)

                y_test_pred = model.predict_proba(X_test)[:, 1]

                iter_roc, iter_ap = calculate_metrics(y_test, y_test_pred)

                roc += iter_roc
                ap += iter_ap

        roc = np.round(roc / num_experiments, decimals=4)
        ap = np.round(ap / num_experiments, decimals=4)

        res += f"{model_name},{data_name},{str(get_top)},{str(roc)},{str(ap)}\n"

    res_f = open(fname, "w+")

    res_f.write(res)
Esempio n. 8
0
    def _train(self):
        FLAGS = self.FLAGS
        for _ in range(FLAGS.valid_step_period):
            threshold = (1.0 - 1.0 /
                         (1.0 + self.FLAGS.decay_rate *
                          np.floor(self.step / self.FLAGS.decay_steps)))
            loss_and_info = {
                "loss": {},
                "info": {
                    "threshold": threshold
                },
                "metrics": {},
                "csv_fieldnames": self.csv_fieldnames
            }
            outputs = {}
            if self.rng.rand() > threshold:
                inputs = self.data_generator.get_unsup_data()
                struc_loss_and_info, struc_outputs = self.train_one_step_struc(
                    inputs)
                outputs.update(struc_outputs)
                loss_and_info["loss"]["struc"] = struc_loss_and_info["loss"]
                loss_and_info["info"]["struc"] = struc_loss_and_info["info"]
                loss_and_info["info"]["struc_lr"] = (
                    self.optimizer_struc.lr(self.optimizer_struc.iterations -
                                            1).numpy()
                    if callable(self.optimizer_struc.lr) else
                    self.optimizer_struc.lr.numpy())
            else:
                inputs = self.data_generator.get_data()
                meta_loss_and_info, meta_outputs = self.train_one_step_meta(
                    inputs)
                outputs.update(meta_outputs)
                loss_and_info["loss"]["meta"] = meta_loss_and_info["loss"]
                loss_and_info["info"]["meta"] = meta_loss_and_info["info"]
                loss_and_info["info"]["meta_lr"] = (
                    self.optimizer_meta.lr(self.optimizer_meta.iterations -
                                           1).numpy()
                    if callable(self.optimizer_meta.lr) else
                    self.optimizer_meta.lr.numpy())

            self._iteration += 1
            self._iterations_since_restore += 1

        self._iteration -= 1
        self._iterations_since_restore -= 1

        if "meta" in outputs:
            metrics = calculate_metrics(inputs["meta"]["test_labels"].numpy(),
                                        outputs["meta"]["logits"].numpy())
            loss_and_info["metrics"]["metatrain"] = metrics
        if (self.step + 1) % FLAGS.valid_step_period == 0:
            valid_metrics = self._test("valid")
            test_metrics = self._test("test")
            loss_and_info["metrics"]["metaval"] = valid_metrics
            loss_and_info["metrics"]["metatest"] = test_metrics

        def _func(x):
            try:
                y = float(x)
            except (TypeError, ValueError):
                y = x
            return y

        result = nest.map_structure(_func, loss_and_info)
        ahb_metric = (loss_and_info["metrics"]["metaval"]["roc_auc"] +
                      loss_and_info["metrics"]["metaval"]["f1"])

        if not hasattr(self, "_best_ahb_metric"):
            self._best_ahb_metric = ahb_metric
            self._metric_after_best = 0.0
            self._step_after_best = 0
        if ahb_metric > self._best_ahb_metric:
            self._best_ahb_metric = ahb_metric
            self._metric_after_best = 0.0
            self._step_after_best = 0
            deviation_after_best = 0.0
        else:
            self._metric_after_best += ahb_metric
            self._step_after_best += 1
            deviation_after_best = self._best_ahb_metric - self._metric_after_best / self._step_after_best

        result["best_ahb_metric"] = self._best_ahb_metric
        result["deviation_after_best"] = deviation_after_best
        result["hpo_metric"] = self._best_ahb_metric - deviation_after_best
        return result
Esempio n. 9
0
def train_model(model, data_loaders, criterion, optimizer, args):

    # create states df and csv file
    stats_df = pd.DataFrame(columns=[
        'epoch', 'train_loss', 'train_acc', 'train_f1', 'val_loss', 'val_acc',
        'val_f1'
    ])

    sub_dump_dir = get_sub_dump_dir(args)
    stats_path = os.path.join(sub_dump_dir, 'stats.csv')

    stats_df.to_csv(stats_path, sep=',',
                    index=False)  # write loss and acc values
    fprint('\nCreated stats file\t-> {}'.format(stats_path), args)
    fprint('\nTRAINING {} EPOCHS...\n'.format(args.epochs), args)

    since = time.time()

    # initialize best values
    best_model_state_dict = copy.deepcopy(model.state_dict())
    best_opt_state_dict = copy.deepcopy(optimizer.state_dict())
    best_loss = 999999.9
    best_acc = 0.0
    best_epoch = 0

    for epoch in range(args.epochs):
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            phase_loss = 0.0
            phase_corrects = 0
            phase_preds = torch.LongTensor()
            phase_category_ids = torch.LongTensor()

            # Iterate over data
            for inputs, category_ids in data_loaders[phase]:
                inputs = inputs.to(torch.device(args.device))
                category_ids = category_ids.to(torch.device(args.device))

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, category_ids)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # stats
                batch_loss = loss.item() * inputs.size(0)
                batch_corrects = torch.sum(preds == category_ids.data)
                phase_loss += batch_loss
                phase_corrects += batch_corrects
                phase_preds = torch.cat((phase_preds, preds), 0)
                phase_category_ids = torch.cat(
                    (phase_category_ids, category_ids), 0)

            epoch_loss = phase_loss / len(data_loaders[phase].dataset)
            epoch_acc, epoch_f1 = calculate_metrics(phase_preds,
                                                    phase_category_ids)

            stats_df.at[0, 'epoch'] = epoch
            stats_df.at[0, phase + '_loss'] = round(epoch_loss, 6)
            stats_df.at[0, phase + '_acc'] = round(epoch_acc, 6)
            stats_df.at[0, phase + '_f1'] = round(epoch_f1, 6)

            # define the new bests
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_state_dict = copy.deepcopy(model.state_dict())
                best_opt_state_dict = copy.deepcopy(optimizer.state_dict())
                best_loss = copy.deepcopy(epoch_loss)
                best_epoch = epoch

        # append epoch stats to file
        fprint(
            stats_df.to_string(index=False,
                               header=(epoch == 0),
                               col_space=10,
                               justify='right'), args)
        stats_df.to_csv(stats_path, mode='a', header=False, index=False)

    time_elapsed = time.time() - since
    fprint(
        '\nTraining completed in {:.0f}m {:.0f}s\n'.format(
            time_elapsed // 60, time_elapsed % 60), args)

    # reload best model weights and best optimizer variables
    model.load_state_dict(best_model_state_dict)
    optimizer.load_state_dict(best_opt_state_dict)

    # save best checkpoint
    if not os.path.exists(cfg.MODEL_DIR):
        os.makedirs(cfg.MODEL_DIR)

    cp_path = os.path.join(
        cfg.MODEL_DIR,
        '{}_{}_{:.6f}.pth'.format('pt' if args.pretrained else 'fs',
                                  args.t_start, best_acc))

    if args.save:
        torch.save(
            {
                'epoch': best_epoch,
                'model_state_dict': best_model_state_dict,
                'optimizer_state_dict': best_opt_state_dict,
                'loss': best_loss,
                'acc': best_acc
            }, cp_path)
        fprint('Saved best checkpoint\t-> {}'.format(cp_path), args)

    return model, optimizer
Esempio n. 10
0
model.save_model(f"{project_dir}/model/model.cbm")

builtin_metrics = model.eval_metrics(train_data,
                                     metrics=['Logloss', 'AUC', 'F1', 'PRAUC'])

# write results

hold_out_score = model.get_best_score()

# write_eval_summary_file(cv_scores, hold_out_score)

predict_probas = model.predict_proba(test_data)

test_data_metrics = calculate_metrics(build_spec['standard_metrics'],
                                      build_spec['custom_metrics'], y_test,
                                      predict_probas[:, 1])

write_eval_summary_file(cv_scores, test_data_metrics)

predict_probas = pd.DataFrame(predict_probas, columns=['False', 'True'])
predict_probas.to_csv(f"{project_dir}/model/test_data_prediction.csv")

shap_values = model.get_feature_importance(data=test_data,
                                           type=EFstrType.ShapValues,
                                           shap_calc_type='Exact')

shap_values_df = pd.DataFrame(shap_values,
                              columns=features + ['expected_value'])

shap_values_df.to_csv(f"{project_dir}/model/test_data_shap_values.csv")
Esempio n. 11
0
        # for localization, 80, 96, 96 => 84, 108, 108
        # for segmentation, 64, 160, 160 => 76, 196, 196
        pred_sitk_obj = generate_sitk_obj_from_npy_array(
            image_sitk_obj, label_prediction, True,
            os.path.join(dir_name,
                         "{}_{}_prediction.nrrd".format(dataset, patient_id)))

        # get arrays from data
        image_arr_org = sitk.GetArrayFromImage(image_sitk_obj)
        label_arr_org = sitk.GetArrayFromImage(label_sitk_obj)
        # get arrays from prediction
        pred_arr_org = sitk.GetArrayFromImage(pred_sitk_obj)

        # metrics
        result, dice, bbox_metrics = calculate_metrics(
            patient_id, spacing, label_arr_org, pred_arr_org,
            HAUSDORFF_PERCENT, OVERLAP_TOLERANCE, SURFACE_DICE_TOLERANCE)
        # append
        results.append(result)

        # plot 5x3 views
        plot_images(dataset, patient_id, image_arr_org, label_arr_org,
                    pred_arr_org, dir_name, True, bbox_metrics, dice)
        print("{} done. dice :: {}".format(patient_id, result["dice"]))

        # extract ROI from image_interpolated_resized
        if SAVE_CANDIDATES:
            # create folder
            dir = "{}/{}/{}".format(MASTER_FOLDER, dataset,
                                    IMAGE_INTERPOLATED_ROI_PR_FOLDER)
            if not os.path.exists(dir):
Esempio n. 12
0
    "Recall",
    "Specifity",
]

metrics = [
    "g_mean",
    "precision",
    "recall",
    "specifity",
]

experiment_name = "final"

calculate_metrics(method_names,
                  streams,
                  metrics,
                  experiment_name,
                  recount=True)

plt = Ploting()
plt.plot_streams_matplotlib(method_names,
                            streams,
                            metrics,
                            experiment_name,
                            gauss=3,
                            methods_alias=methods_alias,
                            metrics_alias=metrics_alias)

rnk = Ranking()
rnk.pairs_metrics(method_names,
                  streams,
Esempio n. 13
0
def train_classifier(train_loader, model, optimizer, loss_function,
                     labels_names):
    """Train the model on all training batches from one epoch.

    Same train function is used to train the base and TCN classifier.
    Same train function is used to train unimodal and multimodal models.

    Args:
        train_loader: PyTorch train data loader
        model: PyTorch model
        optimizer: PyTorch optimizer
        loss_function: PyTorch loss function
        labels_names (array/list): Names of labels/targets

    Returns:
        train_metrics (dict of dict of lists): Dictionary that maps class/label/target name and metric name to a
            corresponding training metric value. Besides the class/label/target names provided in labels_names, it also
            contains a key 'overall' referring to the average over all targets/labels.
    """

    # Aggregates from all training batches from one epoch
    all_predictions = []
    all_labels = []
    all_losses = []

    # Apply loss weights based on the train dataset class weights
    loss_function.pos_weight = train_loader.dataset.pos_class_weights.cuda()

    for i, train_data in enumerate(tqdm(train_loader, desc='    train')):
        features, labels = train_data

        optimizer.zero_grad()

        features = [f.cuda() for f in features]
        labels = labels.cuda()

        logits, predictions = model.forward(*features)

        # Get (already weighted) loss matrix (batch_size x class_num)
        batch_losses = loss_function(logits, labels)

        # Reduce/average loss for each class
        # In case of reduction='none', the loss needs to be normalized manually
        pos_labels_cnt = labels.sum(dim=0)
        neg_labels_cnt = len(labels) - pos_labels_cnt
        class_losses = batch_losses.sum(dim=0) / (
            loss_function.pos_weight * pos_labels_cnt + 1. * neg_labels_cnt)

        loss = class_losses.mean()
        loss.backward()
        optimizer.step()

        all_predictions.extend(predictions.tolist())
        all_labels.extend(labels.tolist())
        all_losses.extend(batch_losses.tolist())

    # Calculate training metrics
    train_metrics = calculate_metrics(
        all_predictions,
        all_labels,
        labels_names,
        losses=all_losses,
        loss_function_pos_weights=loss_function.pos_weight.tolist())

    return train_metrics
Esempio n. 14
0
    def train(self,
              X,
              label,
              X_val=None,
              label_val=None,
              X_test=None,
              label_test=None,
              param=None):
        eta1 = param.eta_1
        C = param.C
        maxium_epoch = param.maxium_epoch
        decay = param.decay
        #     weights_return=param["weights_return
        #     print(X.shape,X_test.shape)
        eta_origin = eta1
        np.random.seed(2)
        # label_expand=label[:,np.newaxis]

        epoch_train_acc = []
        time_step = 0.0
        train_acc_list = []
        test_acc_list = []
        val_acc_list = []
        w_best = []
        step_list = []
        best_val = 0.0
        time_step = 0
        W = np.random.normal(0, 0.001, X.shape[1])
        b = np.random.normal(0, 0.001, (1))
        W_best = W
        b_best = b
        d = 1
        for j in range(maxium_epoch):
            lr = eta_origin
            #print("current lea",lr)
            if decay == True:
                d = 1 / (1 + j)
            ind = np.arange(X.shape[0])
            #         print(ind)
            np.random.shuffle(ind)
            for hh in range(X.shape[0]):
                k = ind[hh]
                x = X[k, :]
                l = label[k]
                #             print(l*(x@W+b))
                #             print(W.mean())
                lr = eta1
                if hasattr(param, "pos_weight") and l == 1:
                    lr = eta1 * param.pos_weight

                if l * (x @ W + b) <= 1:

                    W = (1 - lr) * W + x * l * lr * d * C
                    b = (1 - lr) * b + l * lr * d * C
    #                     print(b)
                else:
                    #print("neg")
                    W = (1 - lr) * W
                    b = (1 - lr) * b
    #                 time_step=time_step+1
    #                 #print("learnign",lr)
    #                # print(k,"here is th wrong one")
    #                 W=W+x*l*lr*d
    #                 #print(lr)
    #                 b=b+l*lr*d

            train_acc = calculate_metrics(X, label, W, b, param)
            #         print("epoch: #",str(j)," current "+ param.metrics +" is",str(train_acc))
            epoch_train_acc.append(train_acc)
            if j % param.valid_each == 0:

                val_acc = calculate_metrics(X_val, label_val, W, b, param)
                #             print("########################/n epoch: #",str(j)," current validating "+ param.metrics+"is",str(val_acc)+"########################/n ")
                if val_acc > best_val:
                    best_val = val_acc
                    W_best = W
                    b_best = b

    #                 np.savez(directory+"epoch #"+str(j)+param.metrics+"="+str(val_acc)+'.npz', w=W, b=b)
    #                 print("found better one, checkpoint saved ")


#                 val_acc_list.append(val_acc)

#step_list=step_list.append(time_step)
#     test_acc=calculate_metrics(X_test,label_test,W_best,b_best,param)
#     print("the final test "+param.metrics+" is ",test_acc)
#     os.rename(directory,param.log_file+"best"+ param.metrics+"+"+str(best_val)+"  "+str(datetime.now()))
        results = {
            "metrics": best_val,
            "W": W_best,
            "b": b_best,
            "param": [W_best, b_best]
        }
        return results
Esempio n. 15
0
		label_high = np.ones([feat_high.shape[0], 1])

		x = np.vstack((feat_low, feat_high))
		y = np.vstack((label_low, label_high))

		logo = LeaveOneGroupOut()
		logo.get_n_splits(x, y, y_subs)
		logo.get_n_splits(groups=y_subs)

		acc_list = []

		for train_index, test_index in logo.split(x, y, y_subs):

			model = RandomForestClassifier(n_estimators = 20)
			#acc = cross_val_score(model, x[train_index, :], y[train_index, :].ravel(), cv=5, scoring='accuracy')
			#acc_train = max(np.mean(acc), 1-np.mean(acc))
			#results_dict_train[norm].append(np.asarray(acc_train))			

			x_train, x_source_test, y_train, y_source_test = train_test_split(x[train_index, :], y[train_index, :].ravel(), test_size=0.33, stratify=y_subs[train_index])
			
			acc_test, acc_train = utils.calculate_metrics(model, x_train, y_train, x[test_index, :], y[test_index, :].ravel(), x_source_test, y_source_test)
			acc_test = max(acc_test, 1-acc_test)
			results_dict_test[norm].append(np.asarray(acc_test))
			acc_train = max(acc_train, 1-acc_train)
			results_dict_train[norm].append(np.asarray(acc_train))

pd.DataFrame.from_dict(results_dict_train).to_csv('./Results/all_normalizations_classification_train.csv', index=False)
pd.DataFrame.from_dict(results_dict_test).to_csv('./Results/all_normalizations_classification_test.csv', index=False)


Esempio n. 16
0
def val_full_video(dataloader,
                   dataset,
                   model,
                   device,
                   classify_thresh,
                   pred_size,
                   contacts_out_path=None,
                   viz_out_path=None,
                   fps=30):
    ''' 
    Evaluates each batch as if it contains the windows for every frame of a full video.
    Visualizes result if desired.
    '''
    model.eval()

    loss_sum = 0.0
    loss_count = 0
    confusion_count = np.zeros((pred_size, 4), dtype=int)
    merged_confusion = np.zeros((4), dtype=int)
    batch_size = -1
    target_idx = -1
    for batch_idx, batch_data in enumerate(dataloader):
        # prepare the data for this batch
        input_data = batch_data['joint2d']
        if batch_size == -1:
            batch_size = input_data.size()[0]
        input_data = input_data.to(device)
        have_contacts = False
        if 'contacts' in batch_data.keys():
            label_data = batch_data['contacts'].to(device)
            if target_idx == -1:
                target_idx = label_data.size()[1] // 2
            have_contacts = True
        # run model
        output_data = model(input_data)  # B x contact_size x 4
        if have_contacts:
            loss = model.loss(output_data, label_data)
            for target_frame_idx in range(pred_size):
                n_tp, n_fp, n_fn, n_tn = model.accuracy(
                    output_data,
                    label_data,
                    thresh=classify_thresh,
                    tgt_frame=target_frame_idx)
                confusion_count[target_frame_idx] += np.array(
                    [n_tp, n_fp, n_fn, n_tn], dtype=int)
            # save loss
            loss_sum += torch.sum(loss).to('cpu').item()
            loss_count += loss.size()[0] * loss.size()[1] * loss.size()[2]

        # merge together to get full video labels
        model_predictions, model_probs = model.prediction(output_data)
        model_predictions = model_predictions.to(
            'cpu').numpy()  # B x contact_size x 4

        # sliding window through entire video to aggregate votes
        window_size = input_data.size()[1]
        vote_aggregation = np.zeros(
            (batch_size + 2 * (pred_size // 2),
             4))  # only collect for frames we directly predict
        for window_start_idx in range(model_predictions.shape[0]):
            window_end_idx = window_start_idx + pred_size
            vote_aggregation[
                window_start_idx:window_end_idx] += model_predictions[
                    window_start_idx]
        # define threshold for considering in contact
        # on edges there are less possible votes b/c were never a target frame
        # must account for this
        # don't need majority in order to be considered in contact (this pushes towards more false positives than negatives)
        vote_thresh = np.ones(
            (vote_aggregation.shape[0])) * (((pred_size + 1) / 2))
        for edge_offset in range(pred_size - 1):
            vote_thresh[edge_offset] = (edge_offset // 2) + 1
            vote_thresh[(-1 - edge_offset)] = (edge_offset // 2) + 1
        # print(vote_thresh)
        vote_predictions = vote_aggregation >= vote_thresh.reshape((-1, 1))
        contact_preds = vote_predictions.astype(np.int)

        # # NOTE: uncomment this to turn off vote merging (majority voting)
        # # want contact predictions
        # contact_preds = model_predictions[:,target_idx,:].copy() # B x 4
        # # still need edges
        # # take as much as we can from predictions
        # leading_preds = model_predictions[0,:target_idx,:].reshape((-1, 4))
        # tailing_preds = model_predictions[batch_size-1,target_idx+1:,:].reshape((-1, 4))
        # contact_preds = np.concatenate([leading_preds, contact_preds, tailing_preds], axis=0)

        # fill in the rest with copies
        contact_offset = (window_size - pred_size) // 2
        leading_pad = np.repeat(contact_preds[0].reshape((1, 4)),
                                contact_offset,
                                axis=0)
        tailing_pad = np.repeat(contact_preds[-1].reshape((1, 4)),
                                contact_offset,
                                axis=0)
        contact_preds = np.concatenate(
            [leading_pad, contact_preds, tailing_pad], axis=0)  # F x 4

        if have_contacts:
            # same thing for labels
            contact_label_data = label_data.to(
                'cpu').numpy()  # B x contact_size x 4
            contact_labels = contact_label_data[:,
                                                target_idx, :].copy()  # B x 4
            leading_labels = contact_label_data[0, :target_idx, :].reshape(
                (-1, 4))
            tailing_labels = contact_label_data[batch_size - 1,
                                                target_idx + 1:, :].reshape(
                                                    (-1, 4))
            contact_labels = np.concatenate(
                [leading_labels, contact_labels, tailing_labels], axis=0)
            # fill in the rest with copies
            leading_pad = np.repeat(contact_labels[0].reshape((1, 4)),
                                    contact_offset,
                                    axis=0)
            tailing_pad = np.repeat(contact_labels[-1].reshape((1, 4)),
                                    contact_offset,
                                    axis=0)
            contact_labels = np.concatenate(
                [leading_pad, contact_labels, tailing_pad], axis=0)  # F x 4

            # evaluate accuracy after merging
            n_tp, n_fp, n_fn, n_tn = model.accuracy(torch.from_numpy(contact_preds.reshape((-1, 1, 4))).to(torch.float),\
                                                    torch.from_numpy(contact_labels.reshape((-1, 1, 4))).to(torch.float), \
                                                    thresh=0.5, tgt_frame=0)
            merged_confusion += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int)

        # save predictions
        if contacts_out_path:
            video_name = batch_data['name'][0]
            contact_dir_out = os.path.join(contacts_out_path, video_name)
            if not os.path.exists(contact_dir_out):
                os.makedirs(contact_dir_out, exist_ok=True)
            contact_path_out = os.path.join(contact_dir_out, 'foot_contacts')
            true_seq_len = batch_data['seq_len'][0]
            # trim to actual seq_len
            save_contact_preds = contact_preds.astype(np.int)[:true_seq_len]
            np.save(contact_path_out, save_contact_preds)

        # visualization
        if have_contacts and viz_out_path:
            video_name = batch_data['name'][0]
            result_vid_path = os.path.join(
                viz_out_path,
                video_name.replace('/', '-') + '.mp4')
            print('Saving video to %s...' % (result_vid_path))

            frame_data = batch_data['frames'].to(
                'cpu').numpy()  # B x x H x W x 3
            _, H, W, _ = frame_data.shape
            # frame data is only target frames, need leading and trailing frames for whole first and last window
            leading_frames = []
            trailing_frames = []
            for frame_idx in range(model.window_size // 2):
                cur_frame_paths = None
                if isinstance(dataset, RealVideoDataset):
                    cur_frame_paths = dataset.frame_paths[batch_idx]
                else:
                    # synthetic dataset
                    cur_frame_paths = get_frame_paths(
                        dataset.view_dirs[batch_idx])
                cur_lead_frame = io.imread(
                    cur_frame_paths[frame_idx])[:, :, :3]  # remove alpha
                cur_trail_frame = io.imread(
                    cur_frame_paths[-(frame_idx + 1)])[:, :, :3]
                if cur_lead_frame.shape[0] != H or cur_lead_frame.shape[1] != W:
                    cur_lead_frame = transform.resize(
                        cur_lead_frame, (TRAIN_DIM[1], TRAIN_DIM[0]))
                    cur_trail_frame = transform.resize(
                        cur_trail_frame, (TRAIN_DIM[1], TRAIN_DIM[0]))
                leading_frames.append(cur_lead_frame)
                trailing_frames.append(cur_trail_frame)
            leading_frames = np.stack(leading_frames, axis=0)
            trailing_frames = np.stack(trailing_frames[::-1], axis=0)
            frame_seq = np.concatenate(
                [leading_frames, frame_data, trailing_frames], axis=0)

            # need 2d joint sequence
            joint_data = input_data.to(
                'cpu').numpy()  # B x window_size x J x 3
            window_tgt = joint_data.shape[1] // 2
            joint2d_seq = joint_data[:, window_tgt, :, :2].copy(
            )  # B x J x 2 (x,y)
            # unnormalize
            root_idx = openpose_dataset.OP_LOWER_JOINTS_MAP['MidHip']
            joint_trans_normalization = joint2d_seq[:, root_idx, :].copy()
            joint2d_seq[:,
                        root_idx, :] -= joint_trans_normalization  # zero it out so it's correct when added back in
            joint_trans_normalization = joint_trans_normalization.reshape(
                (batch_size, 1, 2))
            joint2d_seq += joint_trans_normalization
            # need all frames
            num_joints = input_data.size()[2]
            leading_joint2d = joint_data[0, :window_tgt, :, :2].reshape(
                (-1, num_joints, 2))
            leading_joint2d += joint_data[0, window_tgt, root_idx, :2].reshape(
                (1, 1, 2))  # unnormalize
            tailing_joint2d = joint_data[batch_size - 1,
                                         window_tgt + 1:, :, :2].reshape(
                                             (-1, num_joints, 2))
            tailing_joint2d += joint_data[batch_size - 1, window_tgt,
                                          root_idx, :2].reshape(
                                              (1, 1, 2))  # unnormalize
            joint2d_seq = np.concatenate(
                [leading_joint2d, joint2d_seq, tailing_joint2d], axis=0)
            joint2d_seq *= dataset.get_joint_scaling()

            # now visualize
            viz_full_video_simple(frame_seq,
                                  joint2d_seq,
                                  contact_preds,
                                  contact_labels,
                                  show=False,
                                  save_path=result_vid_path,
                                  fps=fps)
            # viz_full_video_simple(frame_seq, joint2d_seq, contact_preds, contact_labels, show=True, save_path=None, fps=fps)

            frame_data = None
            frame_seq = None
            gc.collect()

    mean_loss = 0.0
    metrics = []
    merged_metrics = None
    if have_contacts:
        mean_loss = loss_sum / loss_count
        metrics = []
        for target_frame_idx in range(pred_size):
            metrics.append(calculate_metrics(
                confusion_count[target_frame_idx]))
        merged_metrics = calculate_metrics(merged_confusion)
    return mean_loss, metrics, merged_metrics
		#epoch_loss_list2.append(valid_loss)

		print('Epoch', epoch+1, 'completed out of', epochs, 'loss:', epoch_loss, ' validation loss: ',valid_loss)

	#utils.plot_epoch_loss(epoch_index_list,epoch_loss_list,epoch_loss_list2)


	print("Optimization Finished!")

	#testing

	net_output = tf.round(tf.nn.sigmoid(logit))
	correct_preds = tf.equal(net_output,yplaceholder)
	accuracy = tf.reduce_mean(tf.cast(correct_preds, "float"))
	test_inputs = test_inputs.reshape((-1,input_length,number_of_sequences))

	t_labels = test_labels

	test_labels = test_labels.reshape((-1,n_classes))


	print("Tf-Accuracy: ",accuracy.eval({xplaceholder: test_inputs, yplaceholder: test_labels}))
	predictions = correct_preds.eval({xplaceholder: test_inputs, yplaceholder: test_labels})
	utils.figure_faults_timeseries(predictions,test_asset)

	preds = sess.run([net_output], feed_dict = {xplaceholder: test_inputs})
	preds = np.array(preds)[0]

	utils.calculate_metrics(preds,t_labels)
	
Esempio n. 18
0
def train(flags):
    data_root = flags.data
    window_size = flags.window_size
    pred_size = flags.pred_size
    batch_size = flags.batch_size
    out_dir = flags.out
    num_epochs = flags.epochs
    val_every = flags.val_every
    classify_thresh = flags.classify_thresh
    # optim args
    lr = flags.lr
    betas = (flags.beta1, flags.beta2)
    eps = flags.eps
    weight_decay = flags.decay
    use_confidence = flags.use_confidence
    joint_set = flags.joint_set

    if not os.path.exists(data_root):
        print('Could not find training data at ' + data_root)
        return
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    weights_out_path = os.path.join(out_dir, 'op_only_weights.pth')
    best_weights_out_path = os.path.join(out_dir, 'op_only_weights_BEST.pth')

    # load training and validation data
    train_dataset = OpenPoseDataset(data_root,
                                    split='train',
                                    window_size=window_size,
                                    contact_size=pred_size,
                                    use_confidence=use_confidence,
                                    joint_set=joint_set)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=2)
    val_dataset = OpenPoseDataset(data_root,
                                  split='val',
                                  window_size=window_size,
                                  contact_size=pred_size,
                                  use_confidence=use_confidence,
                                  joint_set=joint_set)
    val_loader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=2)

    num_joints = len(
        openpose_dataset.OP_JOINT_SUBSETS[train_dataset.joint_set])

    # create the model and optimizer
    device_str = 'cpu' if flags.cpu else None
    device = get_device(device_str)
    op_model = create_model(window_size,
                            num_joints,
                            pred_size,
                            device,
                            use_confidence=use_confidence)
    op_optim = optim.Adam(op_model.parameters(), lr=lr, betas=betas, \
                                    eps=eps, weight_decay=weight_decay)

    model_parameters = filter(lambda p: p.requires_grad, op_model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print('Num model params: ' + str(params))

    # viz stats
    train_steps = []
    train_losses = []
    train_accs = []
    val_steps = []
    val_losses = []
    val_accs = []

    # train
    loss_sum = 0.0
    loss_count = 0
    best_val_f1 = -float('inf')
    confusion_count = np.zeros((4), dtype=int)
    for epoch_idx in range(num_epochs):
        for batch_idx, batch_data in enumerate(train_loader):
            # prepere the data for this batch
            input_data = batch_data['joint2d'].to(device)
            label_data = batch_data['contacts'].to(device)

            # zero the gradients
            op_optim.zero_grad()
            # forward + backward + optimize
            output_data = op_model(input_data)
            loss = op_model.loss(output_data, label_data)
            n_tp, n_fp, n_fn, n_tn = op_model.accuracy(output_data,
                                                       label_data,
                                                       thresh=classify_thresh)
            loss = torch.mean(loss)
            loss.backward()
            op_optim.step()

            loss_sum += loss.to('cpu').item()
            loss_count += 1
            confusion_count += np.array([n_tp, n_fp, n_fn, n_tn], dtype=int)

        if epoch_idx % 5 == 0:
            print('=================== TRAIN (' + str(epoch_idx + 1) +
                  ' epochs) ================================================')
            mean_loss = loss_sum / loss_count
            print('Mean loss: %0.3f' % (mean_loss))
            loss_sum = 0.0
            loss_count = 0

            metrics = calculate_metrics(confusion_count)
            cur_acc, _, _, _, _ = metrics
            print_metrics(metrics)
            confusion_count = np.zeros((4), dtype=int)
            print(
                '======================================================================================'
            )

            train_steps.append(epoch_idx * len(train_loader) + batch_idx)
            train_losses.append(mean_loss)
            train_accs.append(cur_acc)

            # save plot
            plot_train_stats((train_steps, train_losses, train_accs), \
                             (val_steps, val_losses, val_accs), \
                             out_dir, accuracy_metrics=metrics)

        if epoch_idx % val_every == 0:
            # run on the validation data
            print('==================== VALIDATION (' + str(epoch_idx + 1) +
                  ' epochs) ===========================================')
            val_loss, val_metrics = val_epoch(val_loader, op_model, device,
                                              classify_thresh, pred_size)
            print('Mean Loss: %0.3f' % (val_loss))

            for tgt_frame_idx in range(pred_size):
                print('----- Pred Frame ' + str(tgt_frame_idx) + ' ------')
                print_metrics(val_metrics[tgt_frame_idx])
            val_acc, _, _, _, _ = val_metrics[
                pred_size // 2]  # only want accuracy for middle target
            print(
                '======================================================================================'
            )
            op_model.train()

            val_steps.append(epoch_idx * len(train_loader) + batch_idx)
            val_losses.append(val_loss)
            val_accs.append(val_acc)

            # save confusion matrix
            for tgt_frame_idx in range(pred_size):
                accuracy, precision, recall, f1, cm = val_metrics[
                    tgt_frame_idx]
                plot_confusion_mat(
                    cm,
                    os.path.join(
                        out_dir,
                        'val_confusion_matrix_%d.png' % (tgt_frame_idx)))

            # also save model weights
            print('Saving checkpoint...')
            torch.save(op_model.state_dict(), weights_out_path)

            # check if this is the best so far and save (in terms of f1 score)
            if f1 > best_val_f1:
                best_val_f1 = f1
                print('Saving best model so far...')
                torch.save(op_model.state_dict(), best_weights_out_path)

    # save final model
    print('Saving final checkpoint...')
    torch.save(op_model.state_dict(),
               os.path.join(out_dir, 'op_only_weights_FINAL.pth'))
    # save plot
    metrics = calculate_metrics(confusion_count)
    plot_train_stats((train_steps, train_losses, train_accs), \
                        (val_steps, val_losses, val_accs), \
                        out_dir, accuracy_metrics=metrics)
    print('FINISHED Training!')
Esempio n. 19
0
    steps_in_epoch += 1
    epoch_loss += total_loss
    epoch_slot_loss += slot_loss
    epoch_intent_loss += intent_loss

    # if epoch is finished
    if data_processor.end == 1:

        epochs += 1

        # clean up data_processor
        data_processor.close()
        data_processor = None

        # calculate train metrics
        f1, precision, recall, accuracy, semantic_acc = calculate_metrics(
            pred_intents, correct_intents, slot_outputs_pred, correct_slots)

        log_in_tensorboard(tb_log_writer, epochs, "train",
                           epoch_loss / steps_in_epoch,
                           epoch_intent_loss / steps_in_epoch,
                           epoch_intent_loss / steps_in_epoch, f1, accuracy,
                           semantic_acc)

        # reset steps and epoch loss
        steps_in_epoch = 0
        epoch_loss = 0.0
        epoch_slot_loss = 0.0
        epoch_intent_loss = 0.0
        # clean up epoch variables
        pred_intents = []
        correct_intents = []
Esempio n. 20
0
                close = divide_crypto(cryptos[key], ["BTC-USD", "ETH-USD"],
                                      close, 100)

                # Update wallet, ledger & portfolio
                wallet, ledger, portfolio = update_db(wallet, ledger,
                                                      portfolio, d,
                                                      cryptos[key],
                                                      c_d_df["order_type"],
                                                      close)

        except KeyError as e:
            print(f"KeyError -- {cryptos[key]} -- {e}")

# ROI
portfolio = undivide_crypto(["BTC-USD", "ETH-USD"], portfolio, 100)
portfolio_value, overall_value, roi = calculate_metrics(
    portfolio, df_list, cryptos, wallet, 5000)

display_results(cryptos, 5000, wallet, portfolio, portfolio_value,
                overall_value, roi)
generate_log(ledger)

ledger_df = pd.DataFrame(ledger)
ledger_df.columns = [
    "date", "symbol", "order_type", "close", "qty", "order_value"
]

signals_df = {}
for crypto in cryptos:
    signals_df[crypto] = {}
    buy_df = ledger_df[(ledger_df["symbol"] == crypto)
                       & (ledger_df["order_type"] == "buy")].copy()
                                      nb_steps=EPS_STEPS)

        dqn = DQNAgent(model=model,
                       policy=policy,
                       nb_actions=2,
                       memory=memory,
                       processor=processor,
                       nb_steps_warmup=WARMUP_STEPS,
                       gamma=GAMMA,
                       target_model_update=TARGET_MODEL_UPDATE,
                       train_interval=4,
                       delta_clip=1,
                       batch_size=BATCH_SIZE,
                       enable_double_dqn=DOUBLE_DQN)
        dqn.compile(Adam(lr=LR))

        metrics = Metrics(X_val, y_val)
        dqn.fit(env,
                nb_steps=TRAINING_STEPS,
                log_interval=LOG_INTERVAL,
                callbacks=[metrics],
                verbose=0)
        y_pred = make_predictions(dqn.target_model, X_test)
        stats = calculate_metrics(y_test, y_pred)  # Get stats as dictionairy
        writer.writerow(stats)  # Write dictionairy as row
        f.flush(
        )  # Save results to file inbetween iterations as to not lose results

        if not i % LOG_EVERY:
            print(f"{i}: FN: {stats.get('FN')}, FP: {stats.get('FP')}")
Esempio n. 22
0
def test(weights_path: Optional[str], only_show: bool) -> None:
    """
    Testing classifier and visualizing predictions.

    :param weights_path: path to saved weights or None.
    :param only_show: if True then only show predictions and labels. If False, calculate test metric before showing.
    """
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    test_loader = get_data_loader(is_train=False)
    class_names = test_loader.dataset.classes
    class_names = [
        CLASS_NAMES[class_names[i]] for i in range(len(class_names))
    ]
    testing_samples_num = len(test_loader.dataset)

    model = get_model(weights_path)
    model = model.to(device)
    loss_func = torch.nn.CrossEntropyLoss()

    # Set model to evaluating mode.
    model.eval()
    running_loss, running_corrects = 0.0, 0

    if not only_show:
        all_labels, all_predictions = [], []
        for inputs, labels in tqdm(test_loader, desc='Testing. Batch'):
            all_labels.append(labels.numpy())
            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.set_grad_enabled(False):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = loss_func(outputs, labels)
            all_predictions.append(preds.cpu().numpy())

            # Get loss and corrects.
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / testing_samples_num
        epoch_acc = running_corrects.double() / testing_samples_num
        print('Testing: loss = {:.4f}, accuracy = {:.4f}.\n'.format(
            epoch_loss, epoch_acc))
        report = calculate_metrics(all_labels, all_predictions, class_names)
        print(report)

    test_loader = get_data_loader(is_train=False, batch_size=1)
    for inputs, labels in test_loader:
        inputs = inputs.to(device)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            outputs_softmax = torch.nn.Softmax(-1).forward(outputs)

        image = inputs.cpu().numpy()[0, :, :, :].transpose((1, 2, 0))
        image = np.clip(image * NORMALIZE_STD + NORMALIZE_MEAN, 0, 1)
        label_class = class_names[labels.numpy()[0]]
        pred_class_index = preds.cpu().numpy()[0]
        pred_class = class_names[pred_class_index]
        pred_value = outputs_softmax.cpu().numpy()[0, pred_class_index]

        plt.imshow(image)
        plt.title('Prediction: {}, {:.02f}%.\nLabel: {}.'.format(
            pred_class, pred_value * 100, label_class))
        if plt.waitforbuttonpress(0):
            plt.close('all')
            return
    if (test_mode == 0):

        correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print("Tf-Accuracy:",
              accuracy.eval({
                  X: test_inputs,
                  Y: test_labels,
                  keep_prob: 1.0
              }))

        y_p = tf.argmax(logits, 1)
        preds = sess.run([y_p], feed_dict={X: test_inputs, keep_prob: 1.0})
        preds = np.array(preds)[0]
        utils.calculate_metrics(preds, np.argmax(test_labels, 1))

        predictions = correct_prediction.eval({
            X: test_inputs,
            Y: test_labels,
            keep_prob: 1.0
        })
        utils.figure_faults_timeseries(predictions, test_asset)

    else:

        Accuracy = 0
        pred_list = []

        for i in range(test_iters):
            next_input = utils.next_batch(test_inputs, i, batch_size)
    def train(self, X, label, X_val=None, label_val=None, param=None):
        #         directory=param.log_file+"current/"
        #         if not os.path.exists(directory):
        #             os.makedirs(directory)
        eta1 = param.eta_1
        #         eta2=param.eta_2
        maxium_epoch = param.maxium_epoch
        decay = param.decay
        eta_origin = eta1
        np.random.seed(2)

        epoch_train_acc = []
        time_step = 0.0
        train_acc_list = []
        test_acc_list = []
        val_acc_list = []
        w_best = []
        step_list = []
        best_val = 0.0
        time_step = 0
        W = np.random.uniform(-0.01, 0.01, X.shape[1])
        b = np.random.uniform(-0.01, 0.01, (1))
        W_best = W
        b_best = b
        d = 1
        running_W = W
        running_b = b
        for j in range(maxium_epoch + 1):
            lr = eta_origin
            #print("current lea",lr)
            if decay == True:
                d = 1 / (1 + j)
            ind = np.arange(X.shape[0])
            #         print(ind)
            np.random.shuffle(ind)
            for k in range(X.shape[0]):
                k = ind[k]
                x = X[k, :]
                l = label[k]
                running_W = 0.99 * running_W + W * 0.01
                running_b = b * 0.01 + 0.99 * running_b
                #                 if l*(x@W+b)<=0: # we will do update all the time which is to improve acc
                if l * (x @ W + b) <= 0:
                    lr = eta1
                    if hasattr(param, "pos_weight") and l == 1:
                        lr = eta1 * param.pos_weight
                    time_step = time_step + 1
                    #print("learnign",lr)
                    # print(k,"here is th wrong one")
                    W = W + x * l * lr * d
                    #print(lr)
                    b = b + l * lr * d
            train_acc = calculate_metrics(X, label, W, b, param)
            if param.verbose == 1:
                print("epoch: #", str(j),
                      " current for training" + param.metrics + " is",
                      str(train_acc))

    # epoch_train_acc.append(train_acc)
            if j % param.valid_each == 0:
                val_acc = calculate_metrics(X_val, label_val, running_W,
                                            running_b, param)
                if param.verbose == 1:
                    print("########################/n epoch: #", str(j),
                          " current validating " + param.metrics + "is",
                          str(val_acc) + "########################/n ")
                if val_acc > best_val:
                    best_val = val_acc
                    W_best = running_W
                    b_best = running_b

        results = {
            "W": W_best,
            "b": b_best,
            "metrics": best_val,
            "param": [W_best, b_best]
        }
        return results
Esempio n. 25
0
                        m.dropout_keep_prob: 1.
                    })
            else:
                cost, b_id = sess.run(
                    [m.classify_loss, m.baseline_sample_id],
                    feed_dict={
                        m.encoder_inputs: X,
                        m.encoder_inputs_length: X_len,
                        m.decoder_inputs: Y,
                        m.decoder_inputs_length: Y_len,
                        m.decoder_targets: Y_t,
                        m.dropout_keep_prob: 1.
                    })

            baseline = calculate_metrics(b_id,
                                         np.transpose(Y_t),
                                         Y_len,
                                         metric="bleu")
            valid_score += np.mean(baseline)

            cost = cost * len(X[0])

            valid_cost += cost
            valid_step += 1
            valid_len += len_

        avg_valid_score = valid_score / valid_step
        avg_cost = valid_cost / valid_step
        valid_perplexity = np.exp(valid_cost / valid_len)

        if valid_perplexity < best_valid_perplexity:
            best_valid_perplexity = valid_perplexity
    if not os.path.exists('../results/'):
        os.makedirs('../results/')

    #save result to prediction file
    prediction_file = os.path.join(
        '../results/',
        'predictions_{}.txt'.format(args.model_name + '_for_test_set_' +
                                    str(args.ini_epochs)))

    with open(prediction_file, "w") as f:
        for pd, gt in zip(predicts, labels):
            f.write("Y {}\nP {}\n\n".format(gt, pd))

    #calculate metrics to assess the model
    evaluate = utils.calculate_metrics(predicts=predicts, ground_truth=labels)

    e_corpus = "\n".join([
        "Total test audios:    {}".format(len(labels)),
        "Total time:           {}\n".format(total_time),
        "Metrics:",
        "Character Error Rate: {}".format(evaluate[0]),
        "Word Error Rate:      {}".format(evaluate[1]),
        "Sequence Error Rate:  {}".format(evaluate[2]),
    ])

    evaluate_file = os.path.join(
        '../results/',
        "evaluate_{}.txt".format(args.model_name + '_for_test_set_' +
                                 str(args.ini_epochs)))
    with open(evaluate_file, "w") as ev_f:
Esempio n. 27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('frame_number', default=os.getcwd())
    parser.add_argument('-m', '--mode', help='train|test', default='test')
    parser.add_argument('--export-image',
                        help='export predicted image, gt and intensity',
                        action='store_true')
    args = parser.parse_args()
    item_no = args.frame_number
    squeeze_seg = SqueezeSeg()

    file_names = []

    mean_iou = {'unknown': [], 'car': [], 'pedestrian': [], 'cyclist': []}
    mean_precision = {
        'unknown': [],
        'car': [],
        'pedestrian': [],
        'cyclist': []
    }
    mean_recall = {'unknown': [], 'car': [], 'pedestrian': [], 'cyclist': []}

    if item_no == 0:
        data_set_paths = glob(
            os.path.join('..', 'dataset', 'lidar_2d', args.mode, '*.npy'))
        for path in data_set_paths:
            file_names.append(os.path.splitext(path)[0].split('/')[-1])
    else:
        file_names.append(item_no)

    classifier = tf.estimator.Estimator(
        model_fn=squeeze_seg.squeeze_seg_fn,
        model_dir='./model/',
    )
    idx = 0
    for item_no in file_names:
        print('{}/{}'.format(idx, len(file_names)))
        image, labels = get_item(item_no, mode=args.mode)

        eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": image},
                                                           num_epochs=1,
                                                           shuffle=False)

        before = time()
        predictions = classifier.predict(input_fn=eval_input_fn)
        after = time()

        pred = next(predictions)['classes']
        ious, precisions, recalls = calculate_metrics(labels, pred, 4)

        print('IOUS: {}'.format(ious))
        print('PRECISIONS: {}'.format(precisions))
        print('RECALL: {}'.format(recalls))
        print('Time elapsed: {}'.format((after - before) * 1000))

        for cls, iou in ious.iteritems():
            mean_iou[cls].append(iou)

        for cls, precision in precisions.iteritems():
            mean_precision[cls].append(precision)

        for cls, recall in recalls.iteritems():
            mean_recall[cls].append(recall)

        if args.export_image:
            export_image('intensity-{}-pred.png'.format(item_no), image[:, :,
                                                                        0])
            export_image('gt-{}-pred.png'.format(item_no), labels)
            pred[pred == 1] = 5e6
            pred[pred == 2] = 10e6
            pred[pred == 3] = 15e6
            pred[pred == 0] = 0
            export_image('predict-{}-pred.png'.format(item_no), pred)

        idx += 1

    mean_iou = {cls: mean(iou) for cls, iou in mean_iou.iteritems()}
    mean_precision = {
        cls: mean(precision)
        for cls, precision in mean_precision.iteritems()
    }
    mean_recall = {
        cls: mean(recall)
        for cls, recall in mean_recall.iteritems()
    }

    print('IoU:', mean_iou)
    print('PRECISION:', mean_precision)
    print('RECALL:', mean_recall)
Esempio n. 28
0
    elif args.pretrain == 'txt':
        model.pretrain('txt')
    elif args.pretrain == 'load_all':
        model.load_cpt(args.dm2c_cptpath)
    elif args.pretrain == 'load_ae':
        model.load_pretrain_cpt(args.img_cptpath, 'img', only_weight=True)
        model.load_pretrain_cpt(args.txt_cptpath, 'txt', only_weight=True)

        for epoch in range(args.n_epochs):
            model.train(epoch)
            train_embedding, train_target, train_modality = model.embedding(
                model.train_loader_ordered, unify_modal='img')
            test_embedding, test_target, test_modality = model.embedding(
                model.test_loader, unify_modal='img')
            kmeans = KMeans(config['n_clusters'], max_iter=1000,
                            tol=5e-5, n_init=20).fit(train_embedding)
            train_metrics = calculate_metrics(train_target, kmeans.labels_)
            y_pred = kmeans.predict(test_embedding)
            test_metrics = calculate_metrics(test_target, y_pred)
            print('>Train', METRIC_PRINT.format(*train_metrics))
            print('>Test ', METRIC_PRINT.format(*test_metrics))
            # sio.savemat('result/result_{}.mat'.format(epoch),
            #             {'X_embed_train': train_embedding,
            #              'y_pred_train': kmeans.predict(train_embedding),
            #              'y_true_train': train_target,
            #              'modal_train': train_modality,
            #              'X_embed_test': test_embedding,
            #              'y_pred_test': y_pred,
            #              'y_true_test': test_target,
            #              'modal_test': test_modality})