コード例 #1
0
def accaracy_measures(model,
                      points,
                      conf_mat=False,
                      roc_curve=False,
                      pre_recall_curve=False):
    x_plot_area = int(conf_mat) + int(roc_curve) + 1
    #viz.plt.figure()
    # visualization confusion matrix
    if conf_mat:
        print len(y_set)
        conf_matrix = confusion_matrix(y_set, model.predict(x_set))
        viz.plt.subplot(1, x_plot_area, x_plot_area - 1)
        viz.plot_confusion_matrix(conf_matrix,
                                  classes=[0, 1],
                                  title='Confusion matrix')

    # visualize ROC curve
    if roc_curve:
        viz.plt.subplot(1, x_plot_area, x_plot_area - 2)
        x, y, _ = ROC_Cruve(y_set, model.predict_proba(x_set)[:, 1])
        viz.plt.plot(x, y)
        #viz.plot_roc_curve(points['y-test'], model.predict(points['x-test']))

    if pre_recall_curve:
        viz.plot_recision_recall(points['y-test'],
                                 model.predict(points['x-test']))
    viz.plt.show()
コード例 #2
0
def confusion_matrix(mlp, epochs, bs):
    test_prediction, train_prediction = test_mlp_model(mlp,
                                                       epochs,
                                                       30,
                                                       print_lists=False,
                                                       plot=False,
                                                       get_predictions=True)
    plot_confusion_matrix(y_test, test_prediction,
                          "Test data confusion_matrix")
    plot_confusion_matrix(y_train, train_prediction,
                          "Train data confusion_matrix")
コード例 #3
0
ファイル: ml_model.py プロジェクト: Drug1996/DeepNAR
def main(num):
    pkl_file = open('features_vector.pkl', 'rb')
    ts_features, y = pickle.load(pkl_file)
    pkl_file.close()

    names = [
        "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
        "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
        "Naive Bayes", "QDA"
    ]

    accuracy = {}
    for name in names:
        accuracy[name] = []

    for i in range(num):
        classifiers = [
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            GaussianProcessClassifier(1.0 * RBF(1.0)),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5,
                                   n_estimators=10,
                                   max_features=1),
            MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier(),
            GaussianNB(),
            QuadraticDiscriminantAnalysis()
        ]

        train_x, test_x, train_y, test_y = \
            train_test_split(ts_features, y, test_size=0.4)

        for name, clf in zip(names, classifiers):
            clf.fit(train_x, train_y)
            score = clf.score(test_x, test_y)
            if name == 'Decision Tree' or name == 'Naive Bayes':
                pred_y = clf.predict(test_x)
                plt.figure()
                plot_confusion_matrix([pred_y], [test_y], LABELS)
                save(name + '_confusion_matrix_' + str(i) + '.png')
            # print(name, ': ', score)
            accuracy[name].append(score)

    print(accuracy)
    for name in names:
        print(name + ': ', np.array(accuracy[name]).mean())
コード例 #4
0
def save_confusion_matrix(data_root,
                          output_root,
                          segmenter,
                          data_subset="val"):
    dataset = SegmentationDataset(data_root, data_subset)

    confusion_matrix_caluclator = ConfusionMatrix(num_classes=2,
                                                  average="precision")
    accuracy_calculator = Accuracy()

    for image, mask_gt in dataset:
        mask_pred = segmenter.get_raw_prediction(image)
        mask_gt = torch.from_numpy(mask_gt).to(
            mask_pred.device).unsqueeze(0).unsqueeze(0)

        output = (mask_pred, mask_gt)

        confusion_matrix_caluclator.update(
            output_transform_confusion_matrix(output))
        accuracy_calculator.update(output_transform_accuracy(output))

    confusion_matrix = confusion_matrix_caluclator.compute()
    accuracy = accuracy_calculator.compute()

    cm_figure = plot_confusion_matrix(confusion_matrix)

    filename_base = f"confusion_matrix_acc={accuracy:.6f}"

    cm_figure.savefig(os.path.join(output_root, filename_base + ".pdf"))
    cm_figure.savefig(os.path.join(output_root, filename_base + ".png"))
コード例 #5
0
ファイル: model.py プロジェクト: Drug1996/DeepNAR
def test_model(model, test_loader):
    # Test the model on test set
    with torch.no_grad():
        y_true = []
        y_pred = []
        correct = 0
        total = 0
        for i, (inputs, labels) in enumerate(test_loader):
            inputs = inputs.reshape(-1, sequence_length, input_size).to(device)
            labels = labels.reshape(-1).to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            y_true.append(labels.item())
            y_pred.append(predicted.item())

        print('Final accuracy is {} %'.format((correct / total) * 100))
        plot_confusion_matrix([y_true], [y_pred], LABELS)
        save('test_confusion_matrix' + '.png')

    # Save the modal checkpoint
    torch.save(model.state_dict(), 'DeepNAR_model.ckpt')
コード例 #6
0
def ML_with_BN_feat(bn_feat_file='../data/factors_n_bn_feat.csv',
                    n_comp=100,
                    plotting=False):
    plt.close('all')
    if n_comp < 50:
        n_comp = 50
    # Importing the bottleneck features for each image
    feat_df = pd.read_csv(bn_feat_file, index_col=0, dtype='unicode')
    #    feat_df = feat_df.sample(frac=0.05)
    print('Data frame shape:', feat_df.shape)
    #    feat_df = feat_df.iloc[0:300,:]
    mask = feat_df.loc[:, 'label'].isin(['Parasitized', 'Uninfected'])
    feat_df = feat_df.loc[mask, :].drop_duplicates()
    print('Number of bottleneck features:', feat_df.shape[1] - 7)
    y = feat_df.loc[:, ['label']].values
    print(type(y), y.shape)

    print('Number of samples for each label \n',
          feat_df.groupby('label')['label'].count())
    X = feat_df.loc[:, 'x0':'x2047'].astype(float).values
    #    print(list(feat_df.loc[:, 'x0':].columns))

    ##-- Dealing with imbalanced data

    #    from imblearn.over_sampling import RandomOverSampler
    #    ros = RandomOverSampler(random_state=0)
    #
    #    X_resampled, y_resampled = ros.fit_sample(X, y[:,0])
    #
    #    from collections import Counter
    #    print(sorted(Counter(y_resampled).items()))
    #
    #    X, y = X_resampled, y_resampled
    # checking for nulls in DF
    #nulls = BN_featues.isnull().any(axis=1)

    # checking for nulls in DF
    #nulls = BN_featues.isnull().any(axis=1)
    # In[3]:

    class_names = set(feat_df.loc[:, 'label'])
    # Binarize the labels
    # print(class_names)
    #    lb = label_binarize(y = y, classes = list(class_names))
    # classes.remove('unknown')
    # lb.fit(y) #for LabelBinarizer not lable_binerize()
    # lb.classes_ #for LabelBinarizer not lable_binerize

    # Split the training data for cross validation
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=0)

    ##### Dimensionality Reduction ####

    # In[4]:

    # Princple Component Analysis
    # Use n_components = None first to determine variability of principle components
    # Then limit the number of principle components that are reasonable
    # n_components=None --> min(n observation, n features)
    print('...running PCA analysis...' '')
    pca_none = PCA(n_components=None)
    pca_none.fit_transform(X_train)
    #    print(X_test.shape, type(X_test))
    #    arr_index = np.where(X_test == '0.1465795w85188675')
    #    print('arr_index', arr_index)
    #    print('X_test[arr_index]',X_test[arr_index])
    pca_none.transform(X_test)
    explained_variance = pca_none.explained_variance_ratio_
    plt.figure(0)
    plt.plot(explained_variance)
    plt.xlabel('n_components')
    plt.ylabel('variance')
    plt.suptitle('Explained Variance of Principle Components')
    #    plt.show(block=False)
    plt.savefig('../plots/pca_var_vs_ncomp.png')
    # #### After about 70 components there is very little variance gain  ####
    # Applying Principle Component Decomposition

    # In[5]:

    #    n_comp = 11 # the number of Principal Components to project/decompose the data into
    print('...running PCA with', n_comp, 'components')
    pca = PCA(n_components=n_comp)
    X_train = pca.fit_transform(X_train)
    X_test = pca.transform(X_test)
    explained_variance1 = pca.explained_variance_ratio_
    plt.figure(1)
    plt.plot(explained_variance1)
    plt.xlabel('n_components')
    plt.ylabel('variance')
    plt.suptitle('Explained Variance of Principle Components')
    plt.show(block=False)
    plt.savefig('../plots/pca_var_vs_{}_ncomp.png'.format(n_comp))
    # Save feature reduction PCA
    save_PCA = '../models/trained_PCA.sav'
    pickle.dump(pca, open(save_PCA, 'wb'))

    # In[6]:
    if plotting:
        # Pairwise plots of 11 PCA, note this only works with two labels
        feat_df_ploting = pd.DataFrame({'label': y_train[:, 0]})
        caa_plot_pairs(X_train[:, :11], feat_df_ploting, 'PCA')
        plt.figure(figsize=(16, 24))
        plt.show(block=False)

    # In[70]:
    # seaborn plot of PCA
    # need to add columns to pca X_train
    # conver to a dataframe
    #Pairwise plots of 11 components
    pca_DF = pd.DataFrame(X_train[:, :11])

    df_y_train = pd.DataFrame(y_train,
                              columns=['label'])  #,'Date','group_idx'])
    df_pca_train = pd.concat([df_y_train, pca_DF], axis=1)
    #    dates = list(set(df_pca_train['Date']))

    #    print(list(feat_df.columns))
    feature_names = df_pca_train.columns[1:]
    n_comp_pca = pca_DF.shape[1]
    print('n_comp_pca', n_comp_pca)
    print('feature_names', feature_names)
    print('df_pca_train columns', list(df_pca_train.columns))

    plt.close('all')

    # Set up plot to compare confusion matrices
    params = {
        'axes.titlesize': 'x-large',
        #            'legend.fontsize': 'large',
        #          'figure.figsize': (15, 5),
        'axes.labelsize': 'large',
        'axes.titlesize': 'large',
        'xtick.labelsize': 'medium',
        'ytick.labelsize': 'medium'
    }
    plt.rcParams.update(params)

    fig, axs = plt.subplots(1, 4, sharey=True, figsize=(15, 8.5))
    font = {
        'linespacing':
        1.5,  #'family': 'serif', 'color':  'darkred', 'weight': 'normal',
        'size': 14
    }

    # ## Exploring Different Algorithms For Mutliclass Classfication

    #Metric in this case is F2
    from sklearn.metrics import fbeta_score, make_scorer
    ftwo_scorer = make_scorer(fbeta_score, beta=2)
    # In[7.5]:
    # Let's scale the features and plug into logisitc regression classifier
    #    from sklearn.preprocessing import StandardScaler
    #    X_scaled = StandardScaler().fit_transform(X_train)

    from sklearn import linear_model
    log_reg_classifier = linear_model.LogisticRegression(penalty='l2',
                                                         tol=0.0001,
                                                         C=1.0,
                                                         fit_intercept=True,
                                                         intercept_scaling=1,
                                                         class_weight=None,
                                                         random_state=None,
                                                         solver='liblinear',
                                                         max_iter=100,
                                                         multi_class='ovr',
                                                         n_jobs=1)
    log_r = log_reg_classifier.fit(X_train, df_y_train['label'].values)

    y_test_predictions_log_r = log_r.predict(X_test)
    y_predict_prob_log_r = log_r.predict_proba(X_test)
    # save results into a DF
    results = pd.DataFrame()
    results['y_test'] = y_test[:, 0]
    results['log_r_pred'] = list(y_test_predictions_log_r)
    results['log_r_prob'] = y_predict_prob_log_r[:, 0]

    #Perform 3-fold cross validation and return the mean accuracy on each fold
    cv_scores_lr = cross_val_score(estimator=log_r, X=X_train,
                                   y=y_train)  #, scoring = ftwo_scorer)
    print('Logistic regression cv_scores', cv_scores_lr)

    save_LR = '../models/trained_log_reg.sav'
    pickle.dump(log_reg_classifier, open(save_LR, 'wb'))

    # Confusion Matrix for Logistic Regresssion
    cmNB = confusion_matrix(y_test,
                            y_test_predictions_log_r,
                            labels=list(class_names))
    plt.subplot(1, 4, 1)
    plot_confusion_matrix(cm1=cmNB,
                          classes=class_names,
                          normalize=True,
                          gradientbar=False,
                          title='Logistic Regression\n')
    cv_scores_lr = ["{:.2f}".format(x) for x in cv_scores_lr]

    p_r_fscore_lr = precision_recall_fscore_support(y_test,
                                                    y_test_predictions_log_r,
                                                    beta=2.0,
                                                    labels=['Parasitized'],
                                                    pos_label='Parasitized',
                                                    average='binary')

    print(p_r_fscore_lr[:3])
    plt.text(
        0.01,
        -1,
        '\nCV Scores:\n' + str(cv_scores_lr) + '\n' +
        'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'.
        format(d=p_r_fscore_lr[:3]),
        ha='left',
        va='bottom',
        fontdict=font,
        transform=plt.subplot(1, 4, 1).transAxes)

    # In[7]:

    # ### OneVsRestClassifier with Naive Bayes

    classifier = OneVsRestClassifier(GaussianNB())
    nbclf = classifier.fit(X_train, df_y_train['label'].values)
    y_test_predictions_nbclf = nbclf.predict(X_test)
    y_predict_prob = nbclf.predict_proba(X_test)
    # save results into a DF
    results['NB_pred'] = list(y_test_predictions_nbclf)
    results['NB_r_prob'] = y_predict_prob[:, 0]

    #Perform 3-fold cross validation and return the mean accuracy on each fold
    cv_scores = cross_val_score(classifier, X_train,
                                y_train)  #default 3-fold cross validation
    print('NB cv_scores', cv_scores)
    #    answer = pd.DataFrame(y_predict_prob, columns = class_names).round(decimals=3) # index= pd.DataFrame(X_test).index.tolist())
    #print('One vs Rest - Naive Bayes\n', answer.head())

    # Confusion Matrix for Naive Bayes
    cmNB = confusion_matrix(y_test,
                            y_test_predictions_nbclf,
                            labels=list(class_names))
    plt.subplot(1, 4, 2)
    plot_confusion_matrix(cm1=cmNB,
                          classes=class_names,
                          normalize=True,
                          gradientbar=False,
                          title='One vs Rest - Naive Bayes\n')
    cv_scores = ["{:.2f}".format(x) for x in cv_scores]

    p_r_fscore_NB = precision_recall_fscore_support(y_test,
                                                    y_test_predictions_nbclf,
                                                    beta=2.0,
                                                    labels=['Parasitized'],
                                                    pos_label='Parasitized',
                                                    average='binary')
    print(p_r_fscore_NB[:3])
    plt.text(
        0.01,
        -1,
        '\nCV Scores:\n' + str(cv_scores) + '\n' +
        'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'.
        format(d=p_r_fscore_NB[:3]),
        ha='left',
        va='bottom',
        fontdict=font,
        transform=plt.subplot(1, 4, 2).transAxes)

    # ### Random Forest Classification

    # In[8]:

    # Next, let's try Random Forest Classifier
    if n_comp < 100:
        f = n_comp
    else:
        f = 100
    n = 30
    RFclf = OneVsRestClassifier(
        RandomForestClassifier(n_estimators=n, max_features=f))
    RFclf.fit(X_train, df_y_train['label'].values)
    y_test_predictions_RF = RFclf.predict(X_test)
    #    y_score_RF = RFclf.predict_proba(X_test)
    y_score_answer_RF = RFclf.predict_proba(X_test)

    # save results into a DF
    results['RF'] = list(y_test_predictions_RF)
    results['RF_prob'] = y_score_answer_RF[:, 0]

    #Perform 3-fold cross validation and return the mean accuracy on each fold
    cv_scores_RF = cross_val_score(RFclf, X_train,
                                   y_train)  #default 3-fold cross validation
    print('Random Forest cv_scores', cv_scores_RF)
    #    answer_RF = pd.DataFrame(y_score_answer_RF)
    save_RF = '../models/trained_RF.sav'
    pickle.dump(RFclf, open(save_RF, 'wb'))
    #print('Random Forest\n', answer_RF.head())

    # confusion matrix
    cmRF = confusion_matrix(y_test,
                            y_test_predictions_RF,
                            labels=list(class_names))
    plt.subplot(1, 4, 3)
    plot_confusion_matrix(
        cm1=cmRF,
        classes=class_names,
        normalize=True,
        gradientbar=False,
        title='Random Forests\nestimators: {0}\n max_features: {1}\n'.format(
            n, f))
    cv_scores_RF = ["{:.2f}".format(x) for x in cv_scores_RF]

    p_r_fscore_RF = precision_recall_fscore_support(y_test,
                                                    y_test_predictions_RF,
                                                    beta=2.0,
                                                    labels=['Parasitized'],
                                                    pos_label='Parasitized',
                                                    average='binary')
    print(p_r_fscore_RF[:3])
    plt.text(
        0.01,
        -1,
        '\nCV Scores:\n' + str(cv_scores_RF) + '\n' +
        'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'.
        format(d=p_r_fscore_RF[:3]),
        ha='left',
        va='bottom',
        fontdict=font,
        transform=plt.subplot(1, 4, 3).transAxes)

    # ### Adaptive Boosting Classifier
    # http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html

    # In[9]:

    AdaBoost = AdaBoostClassifier()
    AdaBoost.fit(X_train, y_train)
    y_predAB = AdaBoost.predict(X_test)
    y_predAB_prob = AdaBoost.predict_proba(X_test)
    #    y_predAB_binarized = label_binarize(y_predAB,
    #                                     classes=['single_product','market_place'])
    # save results into a DF
    results['AB_pred'] = list(y_predAB)
    results['AB_prob'] = y_predAB_prob[:, 0]

    results.to_csv('../data/y_test_predictions')
    #Perform 3-fold cross validation and return the mean accuracy on each fold
    cv_scores_AB = cross_val_score(AdaBoost, X_train,
                                   y_train)  #default 3-fold cross validation
    print('Adaptive Boosting cv_scores', cv_scores_AB)
    save_AdaBoost = '../models/trained_AdaBoost.sav'
    pickle.dump(AdaBoost, open(save_AdaBoost, 'wb'))

    plt.subplot(1, 4, 4)
    cmAdaBoost = confusion_matrix(y_test, y_predAB, labels=list(class_names))
    plot_confusion_matrix(cm1=cmAdaBoost,
                          normalize=True,
                          classes=class_names,
                          title='AdaBoost\n',
                          gradientbar=False)
    cv_scores_AB = ["{:.2f}".format(x) for x in cv_scores_AB]

    p_r_fscore_AB = precision_recall_fscore_support(y_test,
                                                    y_predAB,
                                                    beta=2.0,
                                                    labels=['Parasitized'],
                                                    pos_label='Parasitized',
                                                    average='binary')
    print(p_r_fscore_AB[:3])

    plt.text(
        0.01,
        -1,
        '\nCV Scores:\n' + str(cv_scores_AB) + '\n' +
        'Precision: {d[0]:.2f}\nRecall: {d[1]:.2f} \nF2 score: {d[2]:.2f} \n'.
        format(d=p_r_fscore_AB[:3]),
        ha='left',
        va='bottom',
        fontdict=font,
        transform=plt.subplot(1, 4, 4).transAxes)

    # #### Comparing mean accuracy and confusion matrices of difference classification algorithrms

    # In[10]:
    print('\nLogistic Regression mean accuracy:',
          round(log_reg_classifier.score(X_test, y_test), 4))
    print('One vs Rest - Naive Bayes mean accuracy:',
          round(classifier.score(X_test, y_test), 4))
    print('Random Forest Classifier mean accuracy:',
          round(RFclf.score(X_test, y_test), 4))
    print('Adaptive Boosting Classifier mean accuracy:',
          round(AdaBoost.score(X_test, y_test), 4))
    plt.tight_layout()
    fig.tight_layout()
    plt.savefig('../plots/confusion_matrix_result_1.png')
    plt.show(block=False)

    ### -- ROC and AUC
    # Compute ROC curve and area the curve
    plt.figure(12)
    #    print('y_test before binirization', y_test[0:4])
    y_test = label_binarize(y_test, classes=['Uninfected', 'Parasitized'])
    #    print('y_test after binirization', y_test[0:4])

    #    print(y_predict_prob_log_r[1:4, 0])
    fpr, tpr, thresholds = roc_curve(y_test, y_predict_prob_log_r[:, 0])
    roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds})
    roc_df.to_csv('../data/roc_data.csv')
    #    tprs = [interp(mean_fpr, fpr, tpr)]
    #    tprs[-1][0] = 0.0
    roc_auc = auc(fpr, tpr)
    plt.title('Receiver Operating Characteristic', fontsize=18)
    plt.plot(fpr,
             tpr,
             lw=2,
             color='#3399ff',
             label='AUC = {0:.2f}'.format(roc_auc))

    plt.plot([0, 1], [0, 1],
             linestyle='--',
             lw=2,
             color='gray',
             label='Chance',
             alpha=.8)

    plt.ylabel('True Positive Rate', fontsize=14)
    plt.xlabel('False Positive Rate', fontsize=14)
    plt.tick_params(axis='both', which='major', labelsize=12)
    plt.legend(loc="lower right")
    plt.tight_layout()
    plt.savefig('../plots/ROC_CNN_log_reg.png')
    plt.show()
    plt.close('all')
    print(
        'If launched from command line use ctrl+z to close all plots and finish'
    )
コード例 #7
0
def main_Core50(conf, run, close_at_the_end=False):
    # Prepare configurations files
    conf['solver_file_first_batch'] = conf['solver_file_first_batch'].replace(
        'X', conf['model'])
    conf['solver_file'] = conf['solver_file'].replace('X', conf['model'])
    conf['init_weights_file'] = conf['init_weights_file'].replace(
        'X', conf['model'])
    conf['tmp_weights_file'] = conf['tmp_weights_file'].replace(
        'X', conf['model'])
    train_filelists = conf['train_filelists'].replace('RUN_X', run)
    test_filelist = conf['test_filelist'].replace('RUN_X', run)
    run_on_the_fly = True  # If True, tells the train_utils.get_data(...) script not to cache batch data on disk

    (Path(conf['exp_path']) / 'CM').mkdir(exist_ok=True, parents=True)
    (Path(conf['exp_path']) / 'EwC').mkdir(exist_ok=True, parents=True)
    (Path(conf['exp_path']) / 'Syn').mkdir(exist_ok=True, parents=True)

    if 'brn_past_weight' not in conf or conf['brn_past_weight'] is None:
        if conf['rehearsal_is_latent']:
            conf['brn_past_weight'] = 20000
        else:
            conf['brn_past_weight'] = 10000

    # To change if needed the network prototxt
    if conf['rehearsal_is_latent']:
        solver_param = caffe_pb2.SolverParameter()
        with open(conf['solver_file']) as f:
            txtf.Merge(str(f.read()), solver_param)
        next_batches_net_prototxt_path = Path(solver_param.net)

        if not next_batches_net_prototxt_path.stem.endswith('b'):
            print(
                'Error dealing with latent rehearsal: invalid net prototxt name!'
            )
            exit(1)

        next_batches_net_prototxt_path_orig = next_batches_net_prototxt_path.parent / (
            next_batches_net_prototxt_path.stem[:-1] +
            next_batches_net_prototxt_path.suffix)
        moving_avg_fraction = 1.0 - (1.0 / conf['brn_past_weight'])
        train_utils.modify_net_prototxt(
            str(next_batches_net_prototxt_path_orig),
            str(next_batches_net_prototxt_path),
            moving_average_fraction=moving_avg_fraction)

        if conf['model'] == 'MobileNetV1':
            rehearsal_layer_mapping_for_mobilenetv1 = {
                'data': ([-1, 3, 128, 128], 'conv1'),
                'conv2_1/dw': ([-1, 32, 64, 64], 'conv2_1/sep'),
                #conv2_1 / dw(128, 32, 64, 64)
                #    conv2_1 / sep(128, 64, 64, 64)
                'conv2_2/dw': ([-1, 64, 32, 32], 'conv2_2/sep'),
                #conv2_2 / dw(128, 64, 32, 32)
                #    conv2_2 / sep(128, 128, 32, 32)
                'conv3_1/dw': ([-1, 128, 32, 32], 'conv3_1/sep'),
                #conv3_1 / dw(128, 128, 32, 32)
                #    conv3_1 / sep(128, 128, 32, 32)
                'conv3_2/dw': ([-1, 128, 16, 16], 'conv3_2/sep'),
                #conv3_2 / dw(128, 128, 16, 16)
                #    conv3_2 / sep(128, 256, 16, 16)
                'conv4_1/dw': ([-1, 256, 16, 16], 'conv4_1/sep'),
                #conv4_1 / dw(128, 256, 16, 16)
                #    conv4_1 / sep(128, 256, 16, 16)
                'conv4_2/dw': ([-1, 256, 8, 8], 'conv4_2/sep'),
                #conv4_2 / dw(128, 256, 8, 8)
                #    conv4_2 / sep(128, 512, 8, 8)
                'conv5_1/dw': ([-1, 512, 8, 8], 'conv5_1/sep'),
                #conv5_1 / dw(512, 1, 3, 3)
                #    conv5_1 / sep(512, 512, 1, 1)
                'conv5_2/dw': ([-1, 512, 8, 8], 'conv5_2/sep'),
                #conv5_2 / dw(512, 1, 3, 3)
                #    conv5_2 / sep(512, 512, 1, 1)
                'conv5_3/dw': ([-1, 512, 8, 8], 'conv5_3/sep'),
                #conv5_3 / dw(512, 1, 3, 3)
                #    conv5_3 / sep(512, 512, 1, 1)
                'conv5_4/dw': ([-1, 512, 8, 8], 'conv5_4/sep'),
                #conv5_4 / dw(512, 1, 3, 3)
                #    conv5_4 / sep(512, 512, 1, 1)
                'conv5_5/dw': ([-1, 512, 8, 8], 'conv5_5/sep'),
                #conv5_5 / dw(512, 1, 3, 3)
                #    conv5_5 / sep(512, 512, 1, 1)
                'conv5_6/dw': ([-1, 512, 4, 4], 'conv5_6/sep'),
                #conv5_6 / dw(512, 1, 3, 3)
                #    conv5_6 / sep(1024, 512, 1, 1)
                'conv6/dw': ([-1, 1024, 4, 4], 'conv6/sep'),
                #conv6 / dw(1024, 1, 3, 3)
                #    conv6 / sep(1024, 1024, 1, 1)
                'pool6': ([-1, 1024, 1, 1], 'mid_fc7')
                #avg_pool(1024)
                #    mid_fc7(50, 1024, 1, 1)(50, )
            }

            current_mapping = rehearsal_layer_mapping_for_mobilenetv1[
                conf['rehearsal_layer']]
            if 'rehearsal_stop_layer' not in conf or conf[
                    'rehearsal_stop_layer'] is None:
                conf['rehearsal_stop_layer'] = current_mapping[1]

            rehe_lat_surgery.create_concat_layer_from_net_template(
                str(next_batches_net_prototxt_path),
                str(next_batches_net_prototxt_path),
                conf['rehearsal_layer'],
                current_mapping[0],
                current_mapping[1],
                original_input=21,
                rehearsal_input=107)
        else:
            raise RuntimeError('Unsupported model for latent rehearsal:',
                               conf['model'])

    # Parse the solver prototxt
    #  for more details see - https://stackoverflow.com/questions/31823898/changing-the-solver-parameters-in-caffe-through-pycaffe
    if conf['initial_batch'] == 0:
        print('Solver proto: ', conf['solver_file_first_batch'])
        solver_param = caffe_pb2.SolverParameter()
        with open(conf['solver_file_first_batch']) as f:
            txtf.Merge(str(f.read()), solver_param)
        net_prototxt = solver_param.net  # Obtains the path to the net prototxt
        print('Net proto: ', net_prototxt)
    else:
        print('Solver proto: ', conf['solver_file'])
        solver_param = caffe_pb2.SolverParameter()
        with open(conf['solver_file']) as f:
            txtf.Merge(str(f.read()), solver_param)
        net_prototxt = solver_param.net  # Obtains the path to the net prototxt
        print('Net proto: ', net_prototxt)

    # Obtain class labels
    if conf['class_labels'] != '':
        # More complex than a simple loadtxt because of the unicode representation in python 3
        label_str = np.loadtxt(conf['class_labels'],
                               dtype=bytes,
                               delimiter="\n").astype(str)

    # Obtain minibatch size from net proto
    train_minibatch_size, test_minibatch_size = train_utils.extract_minibatch_size_from_prototxt_with_input_layers(
        net_prototxt)
    print(' test minibatch size: ', test_minibatch_size)
    print(' train minibatch size: ', train_minibatch_size)

    # Load test set
    print("Recovering Test Set: ", test_filelist, " ...")
    start = time.time()
    test_x, test_y = train_utils.get_data(test_filelist,
                                          conf['db_path'],
                                          conf['exp_path'],
                                          on_the_fly=run_on_the_fly,
                                          verbose=conf['verbose'])
    assert (test_x.shape[0] == test_y.shape[0])
    if conf['num_classes'] < 50:  # Checks if we are doing category-based classification
        test_y = test_y // 5
    test_y = test_y.astype(np.float32)
    test_patterns = test_x.shape[0]
    test_x, test_y, test_iterat = train_utils.pad_data(test_x, test_y,
                                                       test_minibatch_size)
    print(' -> %d patterns of %d classes (%.2f sec.)' %
          (test_patterns, len(np.unique(test_y)), time.time() - start))
    print(' -> %.2f -> %d iterations for full evaluation' %
          (test_patterns / test_minibatch_size, test_iterat))

    # Load training patterns in batches (by now assume the same number in all batches)
    batch_count = conf['num_batches']
    train_patterns = train_utils.count_lines_in_batches(
        batch_count, train_filelists)
    train_iterations_per_epoch = np.zeros(batch_count, int)
    train_iterations = np.zeros(batch_count, int)
    test_interval_epochs = conf['test_interval_epochs']
    test_interval = np.zeros(batch_count, float)
    for batch in range(batch_count):
        if conf["rehearsal"] and batch > 0:
            train_patterns[batch] += conf["rehearsal_memory"]
        train_iterations_per_epoch[batch] = int(
            np.ceil(train_patterns[batch] / train_minibatch_size))
        test_interval[
            batch] = test_interval_epochs * train_iterations_per_epoch[batch]
        if (batch == 0):
            train_iterations[batch] = train_iterations_per_epoch[batch] * conf[
                'num_epochs_first_batch']
        else:
            train_iterations[
                batch] = train_iterations_per_epoch[batch] * conf['num_epochs']
        print("Batch %2d: %d patterns, %d iterations (%d iter. per epochs - test every %.1f iter.)" \
              % (batch, train_patterns[batch], train_iterations[batch], train_iterations_per_epoch[batch], test_interval[batch]))

    # Create evaluation points
    # -> iterations which are boundaries of batches
    batch_iter = [0]
    iter = 0
    for batch in range(batch_count):
        iter += train_iterations[batch]
        batch_iter.append(iter)

    # Calculates the iterations where the network will be evaluated
    eval_iters = [
        1
    ]  # Start with 1 (instead of 0) because the test net is aligned to the train one after solver.step(1)
    for batch in range(batch_count):
        start = batch_iter[batch]
        end = batch_iter[batch + 1]
        start += test_interval[batch]
        while start < end:
            eval_iters.append(int(start))
            start += test_interval[batch]
        eval_iters.append(end)

    # Iterations which are epochs in the evaluation range
    epochs_iter = []
    for batch in range(batch_count):
        start = batch_iter[batch]
        end = batch_iter[batch + 1]
        start += train_iterations_per_epoch[batch]
        while start <= end:
            epochs_iter.append(int(start))
            start += train_iterations_per_epoch[batch]

    prev_train_loss = np.zeros(len(eval_iters))
    prev_test_acc = np.zeros(len(eval_iters))
    prev_train_acc = np.zeros(len(eval_iters))
    prev_exist = filelog.TryLoadPrevTrainingLog(conf['train_log_file'],
                                                prev_train_loss, prev_test_acc,
                                                prev_train_acc)
    train_loss = np.copy(
        prev_train_loss
    )  # Copying allows to correctly visualize the graph in case we start from initial_batch > 0
    test_acc = np.copy(prev_test_acc)
    train_acc = np.copy(prev_train_acc)

    epochs_tick = False if batch_count > 30 else True  # For better visualization
    visualization.Plot_Incremental_Training_Init('Incremental Training',
                                                 eval_iters,
                                                 epochs_iter,
                                                 batch_iter,
                                                 train_loss,
                                                 test_acc,
                                                 5,
                                                 conf['accuracy_max'],
                                                 prev_exist,
                                                 prev_train_loss,
                                                 prev_test_acc,
                                                 show_epochs_tick=epochs_tick)
    filelog.Train_Log_Init(conf['train_log_file'])
    filelog.Train_LogDetails_Init(conf['train_log_file'])

    start_train = time.time()
    eval_idx = 0  # Evaluation iterations counter
    global_eval_iter = 0  # Global iterations counter
    first_round = True
    initial_batch = conf['initial_batch']
    if initial_batch > 0:  # Move forward by skipping unnecessary evaluation
        global_eval_iter = batch_iter[initial_batch]
        while eval_iters[eval_idx] < global_eval_iter:
            eval_idx += 1
        eval_idx += 1

    for batch in range(initial_batch, batch_count):

        print(
            '\nBATCH = {:2d} ----------------------------------------------------'
            .format(batch))

        if batch == 0:
            solver = caffe.get_solver(
                conf['solver_file_first_batch']
            )  # Load the solver for the first batch and create net(s)
            if conf['init_weights_file'] != '':
                solver.net.copy_from(conf['init_weights_file'])
                print('Network created and Weights loaded from: ',
                      conf['init_weights_file'])
                # Test
                solver.share_weights(solver.test_nets[0])
                print('Weights shared with Test Net')

                accuracy, _, pred_y = train_utils.test_network_with_accuracy_layer(
                    solver,
                    test_x,
                    test_y,
                    test_iterat,
                    test_minibatch_size,
                    prediction_level_Model[conf['model']],
                    return_prediction=True)

            if conf['strategy'] in ['cwr+', 'ar1', 'ar1free']:
                cwr.zeros_cwr_layer_bias_lr(solver.net,
                                            cwr_layers_Model[conf['model']])
                class_updates = np.full(conf['num_classes'],
                                        conf['initial_class_updates_value'],
                                        dtype=np.float32)
                cons_w = cwr.init_consolidated_weights(
                    solver.net, cwr_layers_Model[conf['model']],
                    conf['num_classes']
                )  # allocate space for consolidated weights and initialze to 0
                cwr.reset_weights(
                    solver.net, cwr_layers_Model[conf['model']],
                    conf['num_classes']
                )  # reset weights to 0 (done here for the first batch to keep initial stats correct)

            # cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'])   # reset weights to 0 (done here for the first batch to keep initial stats correct)

            if conf['strategy'] in ['ar1', 'ar1free']:
                ewcData, synData = syn.create_syn_data(
                    solver.net
                )  # ewcData stores optimal weights + normalized fisher; trajectory store unnormalized summed grad*deltaW

            if conf['rehearsal_is_latent']:
                reha_data_size = solver.net.blobs[
                    conf['rehearsal_layer']].data[0].size
                rehearsal.allocate_memory(conf['rehearsal_memory'],
                                          reha_data_size, 1)
            else:
                rehearsal.allocate_memory(conf['rehearsal_memory'],
                                          test_x[0].size, 1)

        elif batch == 1:
            solver = caffe.get_solver(
                conf['solver_file'])  # load solver and create net
            if first_round:
                solver.net.copy_from(conf['init_weights_file'])
                print('Network created and Weights loaded from: ',
                      conf['init_weights_file'])
            else:
                solver.net.copy_from(conf['tmp_weights_file'])
                print('Network created and Weights loaded from: ',
                      conf['tmp_weights_file'])

            solver.share_weights(solver.test_nets[0])

            if first_round:
                print('Loading consolidated weights...')
                class_updates = np.full(conf['num_classes'],
                                        conf['initial_class_updates_value'],
                                        dtype=np.float32)
                rand_w, cons_w = cwr.copy_initial_weights(
                    solver.net, cwr_layers_Model[conf['model']],
                    conf['num_classes'])
                if conf['strategy'] in ['ar1']:
                    ewcData, synData = syn.create_syn_data(
                        solver.net
                    )  # ewcData stores optimal weights + normalized fisher; trajectory store unnormalized summed grad*deltaW

            if conf['strategy'] in ['cwr+']:
                cwr.zeros_non_cwr_layers_lr(
                    solver.net,
                    cwr_layers_Model[conf['model']])  # blocca livelli sotto

            if conf['strategy'] in ['cwr+', 'ar1', 'ar1free']:
                if 'cwr_lr_mult' in conf.keys() and conf['cwr_lr_mult'] != 1:
                    cwr.zeros_cwr_layer_bias_lr(
                        solver.net,
                        cwr_layers_Model[conf['model']],
                        force_weights_lr_mult=conf['cwr_lr_mult'])
                else:
                    cwr.zeros_cwr_layer_bias_lr(
                        solver.net, cwr_layers_Model[conf['model']])

            cwr.set_brn_past_weight(solver.net, conf['brn_past_weight'])

        # Initializes some data structures used for reporting stats. Executed once (in the first round)
        if first_round:
            if batch == 1 and (conf['strategy']
                               in ['cwr', 'cwr+', 'ar1', 'ar1free']):
                print('Cannot start from batch 1 in ', conf['strategy'],
                      ' strategy!')
                sys.exit(0)
            visualization.PrintNetworkArchitecture(solver.net)
            # If accuracy layer is defined in the prototxt also in TRAIN mode -> log train accuracy too (not in the plot)
            try:
                report_train_accuracy = True
                err = solver.net.blobs[
                    'accuracy'].num  # Assume this is stable for prototxt of successive batches
            except:
                report_train_accuracy = False
            first_round = False
            if conf['compute_param_stats']:
                param_change = {}
                param_stats = train_utils.stats_initialize_param(solver.net)
                # nonzero_activations = train_utils.stats_activations_initialize(solver.net)

        # Load training data for the current batch
        # Note that the file lists are provided in the batch_filelists folder
        current_train_filelist = train_filelists.replace(
            'XX',
            str(batch).zfill(2))
        print("Recovering training data: ", current_train_filelist, " ...")
        batch_x, batch_y = train_utils.get_data(current_train_filelist,
                                                conf['db_path'],
                                                conf['exp_path'],
                                                on_the_fly=run_on_the_fly,
                                                verbose=conf['verbose'])
        print("Done.")
        if conf['num_classes'] < 50:  # Category based classification
            batch_y = batch_y // 5

        batch_t = train_utils.compute_one_hot_vectors(batch_y,
                                                      conf['num_classes'])

        # Load patterns from Rehearsal Memory
        rehe_x, rehe_y = rehearsal.get_samples()
        rehe_t = train_utils.compute_one_hot_vectors(rehe_y,
                                                     conf['num_classes'])

        # Detects how many patterns per class are present in the current batch
        if batch == 0:
            classes_in_cur_train = batch_y.astype(np.int)
        else:
            classes_in_cur_train = np.concatenate(
                (batch_y.astype(np.int), rehe_y.astype(np.int)))
        unique_y, y_freq = np.unique(classes_in_cur_train, return_counts=True)

        if conf['strategy'] in ['cwr+', 'ar1', 'ar1free'
                                ] and batch > initial_batch:
            cwr.reset_weights(
                solver.net, cwr_layers_Model[conf['model']],
                conf['num_classes'])  # Reset weights of CWR layers to 0

            # Loads previously consolidated weights
            # This procedure, explained in Fine-Grained Continual Learning (https://arxiv.org/pdf/1907.03799.pdf),
            # is necessary in the NIC scenario
            if 'cwr_nic_load_weight' in conf.keys(
            ) and conf['cwr_nic_load_weight']:
                cwr.load_weights_nic(solver.net,
                                     cwr_layers_Model[conf['model']], unique_y,
                                     cons_w)

        if conf['strategy'] in ['ar1'] and batch > initial_batch:
            syn.weight_stats(solver.net, batch, ewcData, conf['ewc_clip_to'])
            solver.net.blobs['ewc'].data[...] = ewcData

        # Convert labels to float32
        batch_y = batch_y.astype(np.float32)
        assert (batch_x.shape[0] == batch_y.shape[0])
        rehe_y = rehe_y.astype(np.float32)

        avg_train_loss = 0
        avg_train_accuracy = 0
        avg_count = 0

        if conf['strategy'] in ['syn', 'ar1']:
            syn.init_batch(solver.net, ewcData, synData)

        reharshal_size = conf[
            "rehearsal_memory"] if batch > initial_batch else 0
        orig_in_minibatch = np.round(
            train_minibatch_size * batch_x.shape[0] /
            (batch_x.shape[0] + reharshal_size)).astype(np.int)
        reha_in_minibatch = train_minibatch_size - orig_in_minibatch

        print(' -> Current Batch: %d patterns, External Memory: %d patterns' %
              (batch_x.shape[0], reharshal_size))
        print(
            ' ->   per minibatch (size %d): %d from current batch and %d from external memory'
            % (train_minibatch_size, orig_in_minibatch, reha_in_minibatch))

        # Padding and shuffling
        batch_x, orig_iters_per_epoch = train_utils.pad_data_single(
            batch_x, orig_in_minibatch)
        batch_y, _ = train_utils.pad_data_single(batch_y, orig_in_minibatch)
        batch_t, _ = train_utils.pad_data_single(batch_t, orig_in_minibatch)
        batch_x, batch_y, batch_t = train_utils.shuffle_in_unison(
            (batch_x, batch_y, batch_t), 0)

        if conf['rehearsal_is_latent']:
            req_shape = (batch_x.shape[0], ) + solver.net.blobs[
                conf['rehearsal_layer']].data.shape[1:]
            latent_batch_x = np.zeros(req_shape, dtype=np.float32)

        # Padding and shuffling of rehasal patterns
        reha_iters_per_epoch = 0
        if reharshal_size > 0:
            rehe_x, reha_iters_per_epoch = train_utils.pad_data_single(
                rehe_x, reha_in_minibatch)
            rehe_y, _ = train_utils.pad_data_single(rehe_y, reha_in_minibatch)
            rehe_t, _ = train_utils.pad_data_single(rehe_t, reha_in_minibatch)
            rehe_x, rehe_y, rehe_t = train_utils.shuffle_in_unison(
                (rehe_x, rehe_y, rehe_t), 0)  # shuffle

        print(
            ' ->   iterations per epoch (with padding): %d, %d (initial %d)' %
            (orig_iters_per_epoch, reha_iters_per_epoch,
             train_iterations_per_epoch[batch]))

        # The main solver loop (per batch)
        it = 0
        while it < train_iterations[batch]:
            # The following part is pretty much straight-forward
            # The current batch is split in minibatches (which size was previously detected by looking at the net prototxt)
            # The minibatch is loaded in blobs 'data', 'data_reha', 'label' and 'target'
            it_mod_orig = it % orig_iters_per_epoch
            orig_start = it_mod_orig * orig_in_minibatch
            orig_end = (it_mod_orig + 1) * orig_in_minibatch

            if conf['rehearsal_is_latent']:
                solver.net.blobs['data'].data[
                    ...] = batch_x[orig_start:orig_end]
            else:
                solver.net.blobs['data'].data[:orig_in_minibatch] = batch_x[
                    orig_start:orig_end]

            # Provide data to input layers (new patterns)
            solver.net.blobs['label'].data[:orig_in_minibatch] = batch_y[
                orig_start:orig_end]
            solver.net.blobs['target'].data[:orig_in_minibatch] = batch_t[
                orig_start:orig_end]

            # Provide data to input layers (reharsal patterns)
            if reharshal_size > 0:
                it_mod_reha = it % reha_iters_per_epoch
                reha_start = it_mod_reha * reha_in_minibatch
                reha_end = (it_mod_reha + 1) * reha_in_minibatch

                if conf['rehearsal_is_latent']:
                    solver.net.blobs['data_reha'].data[
                        ...] = rehe_x[reha_start:reha_end]
                else:
                    solver.net.blobs['data'].data[orig_in_minibatch:] = rehe_x[
                        reha_start:reha_end]

                solver.net.blobs['label'].data[orig_in_minibatch:] = rehe_y[
                    reha_start:reha_end]
                solver.net.blobs['target'].data[orig_in_minibatch:] = rehe_t[
                    reha_start:reha_end]

            if conf['strategy'] in ['ar1']:
                syn.pre_update(solver.net, ewcData, synData)

            # Explicit (net.step(1))
            solver.net.clear_param_diffs()
            solver.net.forward()  # start=None, end=None
            if batch > 0 and conf['strategy'] in ['cwr+', 'cwr']:
                solver.net.backward(
                    end='mid_fc7'
                )  # In CWR+ we stop the backward step at the CWR layer
            else:
                if batch > 0 and 'rehearsal_stop_layer' in conf.keys(
                ) and conf['rehearsal_stop_layer'] is not None:
                    # When using latent replay we stop the backward step at the latent rehearsal layer
                    solver.net.backward(end=conf['rehearsal_stop_layer'])
                else:
                    solver.net.backward()

            if conf['rehearsal_is_latent']:
                # Save latent features of new patterns (only during the first epoch)
                if batch > 0 and it < orig_iters_per_epoch:
                    latent_batch_x[orig_start:orig_end] = solver.net.blobs[
                        conf['rehearsal_layer']].data

            # Weights update
            solver.apply_update()

            if conf['strategy'] == 'ar1':
                syn.post_update(solver.net, ewcData, synData,
                                cwr_layers_Model[conf['model']])

            print('+', end='', flush=True)

            global_eval_iter += 1
            avg_count += 1

            avg_train_loss += solver.net.blobs['loss'].data
            if report_train_accuracy:
                avg_train_accuracy += solver.net.blobs['accuracy'].data

            if global_eval_iter == eval_iters[eval_idx]:
                # Evaluation point
                if avg_count > 0:
                    avg_train_loss /= avg_count
                    avg_train_accuracy /= avg_count
                train_loss[eval_idx] = avg_train_loss
                print('\nIter {:>4}'.format(it + 1),
                      '({:>4})'.format(global_eval_iter),
                      ': Train Loss = {:.5f}'.format(avg_train_loss),
                      end='',
                      flush=True)
                if report_train_accuracy:
                    train_acc[eval_idx] = avg_train_accuracy
                    print('  Train Accuracy = {:.5f}%'.format(
                        avg_train_accuracy * 100),
                          end='',
                          flush=True)

                compute_confusion_matrix = True if (
                    conf['confusion_matrix']
                    and it == train_iterations[batch] -
                    1) else False  # last batch iter

                # The following lines are executed only if this is the last iteration for the current batch
                if conf['strategy'] in [
                        'cwr+', 'ar1', 'ar1free'
                ] and it == train_iterations[batch] - 1:
                    cwr.consolidate_weights_cwr_plus(
                        solver.net, cwr_layers_Model[conf['model']], unique_y,
                        y_freq, class_updates, cons_w)
                    class_updates[unique_y] += y_freq
                    print(class_updates)
                    cwr.load_weights(
                        solver.net, cwr_layers_Model[conf['model']],
                        conf['num_classes'],
                        cons_w)  # Load consolidated weights for testing

                accuracy, _, pred_y = train_utils.test_network_with_accuracy_layer(
                    solver,
                    test_x,
                    test_y,
                    test_iterat,
                    test_minibatch_size,
                    prediction_level_Model[conf['model']],
                    return_prediction=compute_confusion_matrix)
                test_acc[eval_idx] = accuracy * 100
                print('  Test Accuracy = {:.5f}%'.format(accuracy * 100))

                # Batch(Re)Norm Stats
                train_utils.print_bn_stats(solver.net)

                visualization.Plot_Incremental_Training_Update(
                    eval_idx, eval_iters, train_loss, test_acc)

                filelog.Train_Log_Update(conf['train_log_file'],
                                         eval_iters[eval_idx], accuracy,
                                         avg_train_loss, report_train_accuracy,
                                         avg_train_accuracy)

                avg_train_loss = 0
                avg_train_accuracy = 0
                avg_count = 0
                eval_idx += 1  # Next eval

            it += 1  # Next iter

        # Current batch training concluded
        if conf['strategy'] in ['ar1']:
            syn.update_ewc_data(solver.net,
                                ewcData,
                                synData,
                                batch,
                                conf['ewc_clip_to'],
                                c=conf['ewc_w'])
            if conf['save_ewc_histograms']:
                visualization.EwcHistograms(ewcData,
                                            100,
                                            save_as=conf['exp_path'] +
                                            'Syn/F_' + str(batch) + '.png')

        if conf['rehearsal_is_latent']:
            if batch == 0:
                reha_it = 0
                while reha_it < orig_iters_per_epoch:
                    orig_start = reha_it * orig_in_minibatch
                    orig_end = (reha_it + 1) * orig_in_minibatch
                    solver.net.blobs['data'].data[
                        ...] = batch_x[orig_start:orig_end]
                    solver.net.forward()
                    latent_batch_x[orig_start:orig_end] = solver.net.blobs[
                        conf['rehearsal_layer']].data
                    reha_it += 1

            rehearsal.update_memory(latent_batch_x, batch_y.astype(np.int),
                                    batch)
        else:
            rehearsal.update_memory(batch_x, batch_y.astype(np.int), batch)

        if compute_confusion_matrix:
            # Computes the confusion matrix and logs + plots it
            cnf_matrix = confusion_matrix(test_y, pred_y,
                                          range(conf['num_classes']))
            if batch == 0:
                prev_class_accuracies = np.zeros(conf['num_classes'])
            else:
                prev_class_accuracies = current_class_accuracies
            current_class_accuracies = np.diagonal(
                cnf_matrix) / cnf_matrix.sum(axis=1)
            deltas = current_class_accuracies - prev_class_accuracies
            classes_in_batch = set(batch_y.astype(np.int))
            classes_non_in_batch = set(range(
                conf['num_classes'])) - classes_in_batch
            mean_class_in_batch = np.mean(deltas[list(classes_in_batch)])
            std_class_in_batch = np.std(deltas[list(classes_in_batch)])
            mean_class_non_in_batch = np.mean(
                deltas[list(classes_non_in_batch)])
            std_class_non_in_batch = np.std(deltas[list(classes_non_in_batch)])
            print(
                'InBatch -> mean =  %.2f%% std =  %.2f%%, OutBatch -> mean =  %.2f%% std =  %.2f%%'
                %
                (mean_class_in_batch * 100, std_class_in_batch * 100,
                 mean_class_non_in_batch * 100, std_class_non_in_batch * 100))
            filelog.Train_LogDetails_Update(conf['train_log_file'], batch,
                                            mean_class_in_batch,
                                            std_class_in_batch,
                                            mean_class_non_in_batch,
                                            std_class_non_in_batch)
            visualization.plot_confusion_matrix(
                cnf_matrix,
                normalize=True,
                title='CM after batch: ' + str(batch),
                save_as=conf['exp_path'] + 'CM/CM_' + str(batch) + '.png')

        if conf['compute_param_stats']:
            train_utils.stats_compute_param_change_and_update_prev(
                solver.net, param_stats, batch, param_change)

        if batch == 0:
            solver.net.save(conf['tmp_weights_file'])
            print('Weights saved to: ', conf['tmp_weights_file'])
            del solver

    print('Training Time: %.2f sec' % (time.time() - start_train))

    if conf['compute_param_stats']:
        stats_normalization = True
        train_utils.stats_normalize(solver.net, param_stats, batch_count,
                                    param_change, stats_normalization)
        visualization.Plot3d_param_stats(solver.net, param_change, batch_count,
                                         stats_normalization)

    filelog.Train_Log_End(conf['train_log_file'])
    filelog.Train_LogDetails_End(conf['train_log_file'])

    visualization.Plot_Incremental_Training_End(close=close_at_the_end)
コード例 #8
0
def main():
    # 写入数据
    print('*' * 20, "程序开始-读取数据", '*' * 20)
    X_Train = commonFunc.parseFile('../UCI HAR Dataset/train/X_train.txt')
    Y_Train = commonFunc.parseFile(
        '../UCI HAR Dataset/train/y_train.txt').flatten()
    X_Test = commonFunc.parseFile('../UCI HAR Dataset/test/X_test.txt')
    Y_Test = commonFunc.parseFile(
        '../UCI HAR Dataset/test/y_test.txt').flatten()
    activityLabels = [
        'WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING',
        'STANDING', 'LAYING'
    ]
    print("数据读取完成~\n")

    # 参数设置
    print('*' * 20, "设置参数表", '*' * 20)
    KERNELSET = 'linear'
    DEGREE = 0.91
    CSET = 3
    COEF0 = 0
    GAMMA = 'scale'
    print("SVM:")
    print("kernel:{0} \t C:{1}".format(KERNELSET, CSET))
    print("RFE:")
    STEPSET = 5
    MINFEATURETOSET = 300
    CROSSVALIDATION = 20
    CPUCHANNEL = 6
    print(
        "estimate:SVM \t step:{0} \t min_feature_to_select:{1} \t CrossValidation:{2} \t CPUChannel:{3} \n"
        .format(STEPSET, MINFEATURETOSET, CROSSVALIDATION, CPUCHANNEL))

    # 特征选择
    print('*' * 20, "读取特征文件", '*' * 20)
    maskSaveName = "SVM-features-mask.out"
    if (os.path.exists(maskSaveName)):
        print("存在特征文件,开始读取...")
        maskInteger = np.loadtxt(maskSaveName)
        mask = (maskInteger == 1)
        print("读取完成,准备显示...")
        print("特征选择数量: {0}".format(sum(mask == 1)))
    else:
        print("特征文件不存在~")
        print("开始特征选择...")
        start = perf_counter()
        estimator = svm.SVC(kernel=KERNELSET,
                            degree=DEGREE,
                            C=CSET,
                            coef0=COEF0,
                            gamma=GAMMA,
                            probability=False)
        selector = RFECV(estimator,
                         step=STEPSET,
                         min_features_to_select=MINFEATURETOSET,
                         cv=CROSSVALIDATION,
                         n_jobs=CPUCHANNEL)
        selector = selector.fit(X_Train, Y_Train)
        mask = selector.get_support()
        print("特征选择完成!")
        print("用时 {0:.2f}mins".format((perf_counter() - start) / 60))
        print("特征选择数量: {0}".format(sum(mask == 1)))
        np.savetxt(maskSaveName, mask, fmt='%d')

    # 画图
    plt.figure(figsize=(14, 14))
    plt.subplot(2, 2, (1, 2))
    plt.imshow(mask.reshape(1, -1), cmap='tab20c_r')
    plt.title("Feature Selected: {0}".format(sum(mask == 1)),
              fontsize=14,
              y=2.5)
    plt.ylim([-5, 5])
    plt.xlabel("Feature Index(Deeper Color means Selected)", fontsize=10)
    #    plt.show()
    print('\n')

    # 选择特征抽取
    print('*' * 20, "特征选择后的数据结果", '*' * 20)
    X_Train_selected = X_Train[:, mask]
    X_Test_selected = X_Test[:, mask]
    clf_selected = svm.SVC(kernel=KERNELSET,
                           degree=DEGREE,
                           C=CSET,
                           coef0=COEF0,
                           gamma=GAMMA,
                           probability=False)
    clf_selected.fit(X_Train_selected, Y_Train)
    Y_predict_selected = clf_selected.predict(X_Test_selected)
    prec_selected, rec_selected, f_score_selected = commonFunc.checkAccuracy(
        Y_Test, Y_predict_selected)
    print("训练结果:")
    print("准确率:{0}\n召回率:{1}\nF1度量:{2}".format(prec_selected, rec_selected,
                                              f_score_selected))

    # 混淆矩阵
    plt.subplot(2, 2, 3)
    cm = commonFunc.createConfusionMatrix(Y_predict_selected, Y_Test)
    plot_confusion_matrix(cm,
                          activityLabels,
                          normalize=False,
                          title='Selected_F Confusion matrix')
    print('\n')

    # 原始数据的训练结果
    print('*' * 20, "特征选择前的数据结果", '*' * 20)
    clf = svm.SVC(kernel=KERNELSET,
                  degree=DEGREE,
                  C=CSET,
                  coef0=COEF0,
                  gamma=GAMMA,
                  probability=False)
    clf.fit(X_Train, Y_Train)
    Y_predict = clf.predict(X_Test)
    prec, rec, f_score = commonFunc.checkAccuracy(Y_Test, Y_predict)
    print("训练结果:")
    print("准确率:{0}\n召回率:{1}\nF1度量:{2}".format(prec, rec, f_score))

    # 混淆矩阵
    plt.subplot(2, 2, 4)
    cm = commonFunc.createConfusionMatrix(Y_predict, Y_Test)
    plot_confusion_matrix(cm,
                          activityLabels,
                          normalize=False,
                          title='All_F Confusion matrix')

    #    plt.tight_layout()
    plt.show()
コード例 #9
0
                                        feed_dict = {
                                            model.input_x: test_features,
                                            model.input_y: one_hot_test_labels
                                        }

                                        predictions = sess.run(
                                            model.predictions, feed_dict)

                                        recall, precision, f1, confusion_matrix = visualization.calculate_cm(
                                            pred_vals=predictions,
                                            true_vals=test_labels,
                                            classes=list(range(n_labels)))

                                        visualization.plot_confusion_matrix(
                                            cm=confusion_matrix,
                                            classes=list(range(n_labels)),
                                            path=f'figures/{name}/',
                                            name=f'step-{i}.png',
                                            normalize=True)

                                        metrics[i] = [
                                            confusion_matrix, recall,
                                            precision, f1
                                        ]

                                path = f'metrics/{metric_path}/'

                                if not os.path.exists(path):
                                    os.makedirs(path)

                                with open(f'{path}{metric_name}.p',
                                          'wb') as bin_file:
コード例 #10
0
ファイル: model.py プロジェクト: Drug1996/DeepNAR
def training_model(num):
    # torch.manual_seed(RANDOM_SEED_NUM)

    # Load the dataset
    X_training, Y_training, X_dev, Y_dev, X_test, Y_test = data_load()
    print(len(X_training))

    modellist = []
    training_losseslist = []
    test_accuracieslist = []
    training_losses = []
    test_accuracies = []
    y_truelist = []
    y_predlist = []
    y_true = []
    y_pred = []

    for t in range(TEST_NUM):
        try:
            # Create the model
            model = BiRNN(input_size, hidden_size, num_layers,
                          num_classes).to(device)

            # Loss and optimizer
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

            training_losses = []
            test_accuracies = []

            # Train the model
            train_dataset = TensorDataset(X_training, Y_training)
            train_loader = DataLoader(train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True)
            dev_dataset = TensorDataset(X_dev, Y_dev)
            dev_loader = DataLoader(dev_dataset)
            test_dataset = TensorDataset(X_test, Y_test)
            test_loader = DataLoader(test_dataset)

            # total_step = len(train_loader)  # how many batches for one epoch
            for epoch in range(num_epochs):
                for i, (inputs, labels) in enumerate(train_loader):
                    inputs = inputs.reshape(-1, sequence_length,
                                            input_size).to(device)
                    labels = labels.reshape(-1).to(device)

                    # Forward pass
                    outputs = model(inputs)
                    training_loss = criterion(outputs, labels)

                    # Backward and optimize
                    optimizer.zero_grad()
                    training_loss.backward()
                    optimizer.step()

                if (epoch + 1) % 2 == 0:
                    print('Test [{}/{}], Epoch [{}/{}], Loss: {:.4f}'.format(
                        t + 1, TEST_NUM, epoch + 1, num_epochs,
                        training_loss.item()))

                    # Get the value of loss
                    training_losses.append(training_loss.item())

                    # Test the model on dev set
                    with torch.no_grad():
                        y_true = []
                        y_pred = []
                        correct = 0
                        total = 0
                        for j, (inputs, labels) in enumerate(dev_loader):
                            inputs = inputs.reshape(-1, sequence_length,
                                                    input_size).to(device)
                            labels = labels.reshape(-1).to(device)
                            outputs = model(inputs)
                            _, predicted = torch.max(outputs.data, 1)
                            total += labels.size(0)
                            correct += (predicted == labels).sum().item()
                            y_true.append(labels.item())
                            y_pred.append(predicted.item())
                        test_accuracies.append(correct / total)

        except KeyboardInterrupt:
            print('Stop!')

        modellist.append(model)
        training_losseslist.append(training_losses)
        test_accuracieslist.append(test_accuracies)
        y_truelist.append(y_true)
        y_predlist.append(y_pred)

    # Print accuracy of the model
    accuracy = []
    for item in test_accuracieslist:
        accuracy.append(item[-1] * 100)
    max_accuracy = max(accuracy)
    print('Dev accuracy of the No.{} model on dev action samples: {} %'.format(
        num + 1, max_accuracy))

    # Show or save the graph of variance and bias analysis, and confusion matrix graph
    variance_and_bias_analysis(training_losseslist, test_accuracieslist)
    save('trials' + str(num + 1) + '_loss_accuracy' + '.png')
    plot_confusion_matrix(y_truelist, y_predlist, LABELS)
    save('trials_' + str(num + 1) + '_confusion_matrix' + '.png')

    return max_accuracy, modellist[accuracy.index(max_accuracy)], test_loader
コード例 #11
0
def log_confusion_matrix(tb_logger, epoch, data_subset, metrics):
    figure = plot_confusion_matrix(metrics["confusion matrix"].cpu().numpy())
    tb_logger.writer.add_figure(f"confusion matrix/{data_subset}",
                                figure,
                                global_step=epoch)
コード例 #12
0
def main_Core50(conf, run, close_at_the_end = False):

    # Prepare configurations files
    conf['solver_file_first_batch'] = conf['solver_file_first_batch'].replace('X', conf['model'])
    conf['solver_file'] = conf['solver_file'].replace('X', conf['model'])
    conf['init_weights_file'] = conf['init_weights_file'].replace('X', conf['model'])
    conf['tmp_weights_file'] = conf['tmp_weights_file'].replace('X', conf['model'])
    train_filelists = conf['train_filelists'].replace('RUN_X', run)
    test_filelist = conf['test_filelist'].replace('RUN_X', run)

    # For run 0 store/load binary files
    # For the rest of runs read single files (slower, but saves disk space)
    #run_on_the_fly = False if run == 'run0' else True
    run_on_the_fly = True
    # This is the procedure we applied to obtain the reduced test set
    # train_utils.reduce_filelist(test_filelist, test_filelist+"3", 20)

    (Path(conf['exp_path']) / 'CM').mkdir(exist_ok=True, parents=True)
    (Path(conf['exp_path']) / 'EwC').mkdir(exist_ok=True, parents=True)
    (Path(conf['exp_path']) / 'Syn').mkdir(exist_ok=True, parents=True)

    # Parse the solver prototxt
    #  for more details see - https://stackoverflow.com/questions/31823898/changing-the-solver-parameters-in-caffe-through-pycaffe
    print('Solver proto: ', conf['solver_file_first_batch'])
    solver_param = caffe_pb2.SolverParameter()
    with open(conf['solver_file_first_batch']) as f:
        txtf.Merge(str(f.read()), solver_param)
    net_prototxt = solver_param.net  # Obtains the path to the net prototxt
    print('Net proto: ',net_prototxt)

    # Obtain class labels
    if conf['class_labels'] != '':
        # More complex than a simple loadtxt because of the unicode representation in python 3
        label_str = np.loadtxt(conf['class_labels'], dtype=bytes, delimiter="\n").astype(str)

    # Obtain minibatch size from net proto
    train_minibatch_size, test_minibatch_size = train_utils.extract_minibatch_size_from_prototxt_with_input_layers(net_prototxt)
    print(' test minibatch size: ', test_minibatch_size)
    print(' train minibatch size: ', train_minibatch_size)

    # Is the network using target vectors (besides the labels)?
    need_target = train_utils.net_use_target_vectors(net_prototxt)

    # Load test set
    print ("Recovering Test Set: ", test_filelist, " ...")
    start = time.time()
    test_x, test_y = train_utils.get_data(test_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose = conf['verbose'])
    assert(test_x.shape[0] == test_y.shape[0])
    if conf['num_classes'] == 10:  # Checks if we are doing category-based classification
        test_y = test_y // 5
    test_y = test_y.astype(np.float32)
    test_patterns = test_x.shape[0]
    test_x, test_y, test_iterat = train_utils.pad_data(test_x, test_y, test_minibatch_size)
    print (' -> %d patterns of %d classes (%.2f sec.)' % (test_patterns, len(np.unique(test_y)), time.time() - start))
    print (' -> %.2f -> %d iterations for full evaluation' % (test_patterns / test_minibatch_size, test_iterat))

    # Load training patterns in batches (by now assume the same number in all batches)
    batch_count = conf['num_batches']
    train_patterns = train_utils.count_lines_in_batches(batch_count,train_filelists)
    train_iterations_per_epoch = np.zeros(batch_count, int)
    train_iterations = np.zeros(batch_count, int)
    test_interval_epochs = conf['test_interval_epochs']
    test_interval = np.zeros(batch_count, float)
    for batch in range(batch_count):
        train_iterations_per_epoch[batch] = int(np.ceil(train_patterns[batch] / train_minibatch_size))
        test_interval[batch] = test_interval_epochs * train_iterations_per_epoch[batch]
        if (batch == 0):
            train_iterations[batch] = train_iterations_per_epoch[batch] * conf['num_epochs_first_batch']
        else:
            train_iterations[batch] = train_iterations_per_epoch[batch] * conf['num_epochs']
        print ("Batch %2d: %d patterns, %d iterations (%d iter. per epochs - test every %.1f iter.)" \
             % (batch, train_patterns[batch],  train_iterations[batch], train_iterations_per_epoch[batch], test_interval[batch]))

    # Create evaluation points
    # -> iterations which are boundaries of batches
    batch_iter = [0]
    iter = 0
    for batch in range(batch_count):
        iter += train_iterations[batch]
        batch_iter.append(iter)

    # Calculates the iterations where the network will be evaluated
    eval_iters = [1]   # Start with 1 (insted of 0) because the test net is aligned to the train one after solver.step(1)
    for batch in range(batch_count):
        start = batch_iter[batch]
        end = batch_iter[batch+1]
        start += test_interval[batch]
        while start < end:
            eval_iters.append(int(start))
            start += test_interval[batch]
        eval_iters.append(end)

    # Iterations which are epochs in the evaluation range
    epochs_iter = []
    for batch in range(batch_count):
        start = batch_iter[batch]
        end = batch_iter[batch+1]
        start += train_iterations_per_epoch[batch]
        while start <= end:
            epochs_iter.append(int(start))
            start += train_iterations_per_epoch[batch]

    prev_train_loss = np.zeros(len(eval_iters))
    prev_test_acc = np.zeros(len(eval_iters))
    prev_exist = filelog.TryLoadPrevTrainingLog(conf['train_log_file'], prev_train_loss, prev_test_acc)
    train_loss = np.copy(prev_train_loss)  # Copying allows to correctly visualize the graph in case we start from initial_batch > 0
    test_acc = np.copy(prev_test_acc)
    train_acc = np.zeros(len(eval_iters))

    epochs_tick = False if batch_count > 30 else True  # For better visualization
    visualization.Plot_Incremental_Training_Init('Incremental Training', eval_iters, epochs_iter, batch_iter, train_loss, test_acc, 5, conf['accuracy_max'], prev_exist, prev_train_loss, prev_test_acc, show_epochs_tick = epochs_tick)
    filelog.Train_Log_Init(conf['train_log_file'])
    filelog.Train_LogDetails_Init(conf['train_log_file'])

    start_train = time.time()
    eval_idx = 0   # Evaluation iterations counter
    global_eval_iter = 0  # Global iterations counter
    first_round = True
    initial_batch = conf['initial_batch']
    if initial_batch > 0:  # Move forward by skipping unnecessary evaluation
        global_eval_iter = batch_iter[initial_batch]
        while eval_iters[eval_idx] < global_eval_iter:
            eval_idx += 1
        eval_idx += 1

    for batch in range(initial_batch, batch_count):
        print ('\nBATCH = {:2d} ----------------------------------------------------'.format(batch))

        if batch == 0:
            solver = caffe.get_solver(conf['solver_file_first_batch'])   # Load the solver for the first batch and create net(s)
            if conf['init_weights_file'] !='':
                solver.net.copy_from(conf['init_weights_file'])
                print('Network created and Weights loaded from: ', conf['init_weights_file'])
                # Test
                solver.test_nets[0].copy_from(conf['init_weights_file'])
                accuracy, _ , pred_y = train_utils.test_network_with_accuracy_layer(solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction = True)

                # BatchNorm Stats
                train_utils.print_bn_stats(solver.net)

            if conf['strategy'] in ['cwr+','ar1']:
                cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']])
                class_updates = np.zeros(conf['num_classes'], dtype=np.float32)
                cons_w = cwr.init_consolidated_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'])    # Allocate space for consolidated weights and initialze them to 0
                cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'])   # Reset cwr weights to 0 (done here for the first batch to keep initial stats correct)

            if conf['strategy'] == 'cwr' or conf['dynamic_head_expansion'] == True:
                class_updates = np.zeros(conf['num_classes'], dtype=np.float32)
                rand_w, cons_w = cwr.copy_initial_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'])    # Random values for cwr layers (since they do not exist in pretrained models)

            if conf['strategy'] in ['syn','ar1']:
                # ewcData stores optimal weights + normalized fisher; trajectory stores unnormalized summed grad*deltaW
                ewcData, synData = syn.create_syn_data(solver.net)

        elif batch == 1:
            solver = caffe.get_solver(conf['solver_file'])   # Load the solver for the next batches and create net(s)
            solver.net.copy_from(conf['tmp_weights_file'])
            print('Network created and Weights loaded from: ', conf['tmp_weights_file'])

            if conf['strategy'] in ['cwr','cwr+']:
                cwr.zeros_non_cwr_layers_lr(solver.net, cwr_layers_Model[conf['model']])   # In CWR we freeze every layer except the CWR one(s)

            # By providing a cwr_lr_mult multiplier we can use a different Learning Rate for CWR and non-CWR cwr_layers_Model
            # Note that a similar result can be achieved by manually editing the net prototxt
            if conf['strategy'] in ['cwr+', 'ar1']:
                if 'cwr_lr_mult' in conf.keys() and conf['cwr_lr_mult'] != 1:
                    cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']], force_weights_lr_mult = conf['cwr_lr_mult'])
                else:
                    cwr.zeros_cwr_layer_bias_lr(solver.net, cwr_layers_Model[conf['model']])

            cwr.set_brn_past_weight(solver.net, 10000)

        # Initializes some data structures used for reporting stats. Executed once (in the first round)
        if first_round:
            if batch == 1 and (conf['strategy'] in ['ewc','cwr', 'cwr+', 'syn', 'ar1']):
                print('Cannot start from batch 1 in ', conf['strategy'], ' strategy!')
                sys.exit(0)
            visualization.PrintNetworkArchitecture(solver.net)
            # if accuracy layer is defined in the prototxt also in TRAIN mode -> log also train accuracy (not in the plot)
            try:
                report_train_accuracy = True
                err = solver.net.blobs['accuracy'].num  # assume this is stable for prototxt of successive batches
            except:
                report_train_accuracy = False
            first_round = False
            if conf['compute_param_stats']:
                param_change = {}
                param_stats = train_utils.stats_initialize_param(solver.net)

        # Load training data for the current batch
        # Note that the file lists are provided in the batch_filelists folder
        current_train_filelist = train_filelists.replace('XX', str(batch).zfill(2))
        print ("Recovering training data: ", current_train_filelist, " ...")
        load_start = time.time()
        train_x, train_y = train_utils.get_data(current_train_filelist, conf['db_path'], conf['exp_path'], on_the_fly=run_on_the_fly, verbose = conf['verbose'])
        print ("Done.")
        if conf['num_classes'] == 10:  # Category based classification
            train_y = train_y // 5

        # If target values (e.g. one hot vectors) are needed we need to create them from numerical class labels
        if need_target:
            target_y = train_utils.compute_one_hot_vectors(train_y, conf['num_classes'])
            train_x, tmp_iters = train_utils.pad_data_single(train_x, train_minibatch_size)
            train_y, _ = train_utils.pad_data_single(train_y, train_minibatch_size)
            target_y, _ = train_utils.pad_data_single(target_y, train_minibatch_size)

            if batch>0 and conf['strategy'] == 'lwf':
                if conf['lwf_weight'] > 0: weight_old = conf['lwf_weight']
                else:
                    weight_old = 1 - (train_patterns[batch] / np.sum(train_patterns[0:batch+1]))
                    x_min = 2.0/3.0
                    x_max = 0.9
                    y_min = 0.45
                    y_max = 0.60
                    #
                    if weight_old > x_max: weight_old = x_max # Clip weight_old
                    weight_old = y_min + (weight_old - x_min)*(y_max-y_min)/(x_max-x_min)
                print('Lwf Past Weight: %.2f' % (weight_old))
                target_y = lwf.update_target_vectors(solver, train_x, train_y, conf['num_classes'], train_iterations_per_epoch[batch], train_minibatch_size, weight_old)

            if conf['dynamic_head_expansion'] == True:
                train_utils.dynamic_head_expansion(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], train_y, rand_w)

            if conf['strategy'] == 'cwr' and batch > initial_batch:
                cwr.load_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], rand_w)  # Reset net weights
            if conf['strategy'] in ['cwr+','ar1'] and batch > initial_batch:
                cwr.reset_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'])  # Reset weights of CWR layers to 0  (maximal head approach!)
                # Loads previously consolidated weights
                # This procedure, explained in the paper, is necessary in the NIC scenario
                if 'cwr_nic_load_weight' in conf.keys() and conf['cwr_nic_load_weight']:
                    cwr.load_weights_nic(solver.net, cwr_layers_Model[conf['model']], train_y, cons_w)

            train_x, train_y, target_y = train_utils.shuffle_in_unison((train_x, train_y, target_y), 0)
            if conf['strategy'] in ['ewc','syn','ar1'] and batch > initial_batch:
                #syn.weight_stats(solver.net, batch, ewcData, conf['ewc_clip_to'])
                # Makes ewc info available to the network for successive training
                # The 'ewc' blob will be used by our C++ code (see the provided custom "sgd_solver.cpp")
                solver.net.blobs['ewc'].data[...] = ewcData
        else:
            #TODO: review branch (is it necessary?)
            train_x, tmp_iters = train_utils.pad_data_single(train_x, train_minibatch_size)
            train_y, _ = train_utils.pad_data_single(train_y, train_minibatch_size)
            train_x, train_y = train_utils.shuffle_in_unison((train_x, train_y), 0)
            # apply temporal coherence strategy to modify labels
            if batch > 0 and conf['strategy'] != 'naive':
                train_x, train_y = train_utils.predict_labels_temporal_coherence(solver, train_x, train_y, conf['num_classes'], train_iterations_per_epoch[batch], train_minibatch_size, conf['strategy'], 0.80)
                # ATTENTION, if patterns have been removed do padding again!

        print (' -> %d patterns (of %d classes) after padding and shuffling (%.2f sec.)' % (train_x.shape[0], len(np.unique(train_y)), time.time()-load_start))
        assert(train_iterations[batch] >= tmp_iters)

        # convert labels to float32
        train_y = train_y.astype(np.float32)
        assert(train_x.shape[0] == train_y.shape[0])

        # training
        avg_train_loss = 0
        avg_train_accuracy = 0
        avg_count = 0;

        if conf['strategy'] in ['syn','ar1']:
            syn.init_batch(solver.net, ewcData, synData)

        # The main solver loop (per batch)
        it = 0
        while it < train_iterations[batch]:
            # The following part is pretty much straight-forward
            # The current batch is split in minibatches (which size was previously detected by looking at the net prototxt)
            # The minibatch is loaded in blobs 'data', 'label' and 'target'
            # a step(1) is executed (which executes forward + backward + weights update)
            it_mod = it % train_iterations_per_epoch[batch]
            start = it_mod * train_minibatch_size
            end = (it_mod + 1) * train_minibatch_size

            if conf['verbose']:
                avgl = avga = 0
                if avg_count > 0:
                    avgl = avg_train_loss / avg_count
                print ('Iter {:>4}'.format(it+1), '({:>4})'.format(global_eval_iter), ': Train Loss = {:.5f}'.format(avgl), end='', flush = True)
                if report_train_accuracy:
                    if avg_count > 0:
                        avga = avg_train_accuracy / avg_count
                    print ('  Train Accuracy = {:.5f}%'.format(avga*100), flush = True)
            else:
                print ('+', end = '', flush=True)

            # Provide data to input layers
            solver.net.blobs['data'].data[...] = train_x[start:end]
            solver.net.blobs['label'].data[...] = train_y[start:end]
            if need_target:
                solver.net.blobs['target'].data[...] = target_y[start:end]

            if conf['strategy'] in ['syn','ar1']:
                syn.pre_update(solver.net, ewcData, synData)

            # SGD by Caffe
            if conf['strategy'] in ['cwr+','cwr'] and batch > initial_batch:
                solver.net.clear_param_diffs()
                solver.net.forward()  # start=None, end=None
                solver.net.backward(end='mid_fc7')
                solver.apply_update()
            else:
                solver.step(1)
            #train_utils.print_bn_stats(solver.net)

            # If enabled saves the gradient magniture of the prediction_level stats on file
            # train_utils.gradient_stats(prediction_level_Model[conf['model']], global_eval_iter, solver.net, train_y, start, end)

            if conf['strategy'] == 'syn':
                syn.post_update(solver.net, ewcData, synData)
            if conf['strategy'] == 'ar1':
                syn.post_update(solver.net, ewcData, synData, cwr_layers_Model[conf['model']])

            global_eval_iter +=1
            avg_count +=1

            avg_train_loss += solver.net.blobs['loss'].data
            if report_train_accuracy:
                avg_train_accuracy += solver.net.blobs['accuracy'].data

            # Early stopping (a.k.a. Limited)
            if conf['strategy'] == '_syn' and avg_count > 0 and avg_train_loss/avg_count < syn.target_train_loss_accuracy_per_batch(batch):    # enable by removing "_" on demand
                it = train_iterations[batch]-1              # skip to last iter
                global_eval_iter = eval_iters[eval_idx]     # enable evaluation point now

            if global_eval_iter == eval_iters[eval_idx]:
                # Evaluation point
                if avg_count > 0:
                    avg_train_loss/= avg_count
                    avg_train_accuracy /= avg_count
                train_loss[eval_idx] = avg_train_loss
                print ('\nIter {:>4}'.format(it+1), '({:>4})'.format(global_eval_iter), ': Train Loss = {:.5f}'.format(avg_train_loss), end='', flush = True)
                if report_train_accuracy:
                    train_acc[eval_idx] = avg_train_accuracy
                    print ('  Train Accuracy = {:.5f}%'.format(avg_train_accuracy*100), end='', flush = True)

                compute_confusion_matrix = True if (conf['confusion_matrix'] and it == train_iterations[batch]-1) else False   # last batch iter

                # The following lines are executed only if this is the last iteration for the current batch
                if conf['strategy'] in ['cwr', 'cwr+', 'ar1'] and it == train_iterations[batch]-1:
                    if conf['strategy'] == 'cwr':
                        batch_weight = conf['cwr_batch0_weight'] if batch == initial_batch else 1
                        cwr._consolidate_weights_cwr(solver.net, cwr_layers_Model[conf['model']], train_y, cons_w, batch_weight, class_updates = class_updates)
                        class_updates[train_y.astype(np.int)] += 1;  # Increase weights of trained classes
                    else:
                        unique_y, y_freq = np.unique(train_y.astype(np.int), return_counts=True)
                        cwr.consolidate_weights_cwr_plus(solver.net, cwr_layers_Model[conf['model']], unique_y, y_freq, class_updates, cons_w)
                        class_updates[unique_y] += y_freq;

                    # print(class_updates)
                    cwr.load_weights(solver.net, cwr_layers_Model[conf['model']], conf['num_classes'], cons_w)   # Load consolidated weights for testing

                accuracy, _ , pred_y = train_utils.test_network_with_accuracy_layer(solver, test_x, test_y, test_iterat, test_minibatch_size, prediction_level_Model[conf['model']], return_prediction = compute_confusion_matrix)
                test_acc[eval_idx] = accuracy*100
                print ('  Test Accuracy = {:.5f}%'.format(accuracy*100))

                # Batch(Re)Norm Stats
                train_utils.print_bn_stats(solver.net)

                visualization.Plot_Incremental_Training_Update(eval_idx, eval_iters, train_loss, test_acc)

                filelog.Train_Log_Update(conf['train_log_file'], eval_iters[eval_idx], accuracy, avg_train_loss, report_train_accuracy, avg_train_accuracy)

                avg_train_loss = 0
                avg_train_accuracy = 0
                avg_count = 0
                eval_idx+=1   # next eval

            it+=1  # next iter

        # Current batch training concluded
        if conf['strategy'] == 'ewc':
            if batch == initial_batch:
                ewcData, fisher = ewc.create_ewc_data(solver.net)   # ewcData stores optimal weights + normalized fisher; fisher store unnormalized summed fisher
            print ("Computing Fisher Information and Storing Optimal Weights...")
            ewc.update_ewc_data(ewcData, fisher, solver.net, train_x, train_y, target_y, train_iterations_per_epoch[batch], train_minibatch_size, batch, conf['ewc_clip_to'], conf['ewc_w'])
            print ("Done.")
            if conf['save_ewc_histograms']:
                visualization.EwcHistograms(ewcData, 100, save_as = conf['exp_path'] + 'EwC/F_' + str(batch) + '.png')

        if conf['strategy'] in ['syn','ar1']:
            syn.update_ewc_data(solver.net, ewcData, synData, batch, conf['ewc_clip_to'])
            if conf['save_ewc_histograms']:
                visualization.EwcHistograms(ewcData, 100, save_as = conf['exp_path'] + 'Syn/F_' + str(batch) + '.png')

        if compute_confusion_matrix:
            # Computes the confusion matrix and logs + plots it
            cnf_matrix = confusion_matrix(test_y, pred_y)
            if batch ==0:
                prev_class_accuracies = np.zeros(conf['num_classes'])
            else:
                prev_class_accuracies = current_class_accuracies
            current_class_accuracies = np.diagonal(cnf_matrix) / cnf_matrix.sum(axis = 1)
            deltas = current_class_accuracies - prev_class_accuracies
            classes_in_batch = set(train_y.astype(np.int))
            classes_non_in_batch = set(range(conf['num_classes']))-classes_in_batch
            mean_class_in_batch = np.mean(deltas[list(classes_in_batch)])
            std_class_in_batch = np.std(deltas[list(classes_in_batch)])
            mean_class_non_in_batch = np.mean(deltas[list(classes_non_in_batch)])
            std_class_non_in_batch = np.std(deltas[list(classes_non_in_batch)])
            print('InBatch -> mean =  %.2f%% std =  %.2f%%, OutBatch -> mean =  %.2f%% std =  %.2f%%' % (mean_class_in_batch*100, std_class_in_batch*100, mean_class_non_in_batch*100, std_class_non_in_batch*100))
            filelog.Train_LogDetails_Update(conf['train_log_file'], batch, mean_class_in_batch, std_class_in_batch, mean_class_non_in_batch, std_class_non_in_batch)
            visualization.plot_confusion_matrix(cnf_matrix, normalize = True, title='CM after batch: ' + str(batch), save_as = conf['exp_path'] + 'CM/CM_' + str(batch) + '.png')

        if conf['compute_param_stats']:
            train_utils.stats_compute_param_change_and_update_prev(solver.net, param_stats, batch, param_change)

        if batch == 0:
            solver.net.save(conf['tmp_weights_file'])
            print('Weights saved to: ', conf['tmp_weights_file'])
            del solver

    print('Training Time: %.2f sec' % (time.time() - start_train))

    if conf['compute_param_stats']:
        stats_normalization = True
        train_utils.stats_normalize(solver.net, param_stats, batch_count, param_change, stats_normalization)
        visualization.Plot3d_param_stats(solver.net, param_change, batch_count, stats_normalization)

    filelog.Train_Log_End(conf['train_log_file'])
    filelog.Train_LogDetails_End(conf['train_log_file'])

    visualization.Plot_Incremental_Training_End(close = close_at_the_end)
コード例 #13
0
            print("Accuracy for split", split, ":", accuracy, "Total Time: ",
                  class_time - start, ". BOW Time: ", bow_time - start,
                  ". Classification Time: ", class_time - bow_time)
            split += 1

        time_list.append(np.average(splits_accuracy))
        accuracy_list.append(np.average(splits_time))

#    test_images_filenames = open_pkl('test_images_filenames.dat')
#    test_labels = open_pkl('test_labels.dat')
#

# Plot Acurracy
    plot_accuracy_vs_time(range_value,
                          accuracy_list,
                          time_list,
                          feature_name='Number of SIFT scales',
                          title="DSIFT")

    unique_labels = list(set(y_test))
    # Compute confusion matrix
    cnf_matrix = confusion_matrix(y_test,
                                  predicted_labels,
                                  labels=unique_labels)
    # Plot normalized confusion matrix
    np.set_printoptions(precision=2)
    plot_confusion_matrix(cnf_matrix,
                          classes=unique_labels,
                          normalize=True,
                          title='Normalized confusion matrix')