コード例 #1
0
    def train(self):
        self.model = model_setter.get_model_params()
        self.model.build(input_shape=(None, 96, 160, 3))
        self.model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            metrics=['accuracy'])
        self.model.summary()

        log_dir = f"logs/fit/{self.output_path}"
        checkpoint_path = f"{self.output_path}/cp-{{epoch:04d}}.ckpt"

        best_checkpoint = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_path,
            monitor='val_loss',
            verbose=0,
            mode='min',
            save_best_only=True)
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                              histogram_freq=1)

        self.history = self.model.fit(
            self.images, [self.directions_labels, self.speeds_labels],
            epochs=self.nb_epochs,
            validation_split=self.validation_split,
            shuffle=True,
            verbose=1,
            callbacks=[best_checkpoint, tensorboard_callback])
        if self.test_split > 0:
            validation.evaluate(self)
コード例 #2
0
ファイル: main.py プロジェクト: germaine-wu/2021-capstone
def main():
    args = parseArgs()
    function = args.function
    input_file = args.input
    output_file = args.output
    true_file = args.trueout
    result_file = args.result
    method = args.method

    if function == ("Pred" or "Predict"):
        if input_file == "":
            print("Error - Read inputs")
        elif output_file == "":
            print("Error - Write output")
        else:
            prediction(input_file, output_file, method)
    elif function == ("Eval" or "Evaluate"):
        if input_file == "" or true_file == "":
            print("Error - Read inputs")
        elif output_file == "":
            print("Error - Write output")
        else:
            evaluate(input_file, output_file, true_file, result_file)
    else:
        print("Please choose 'Pred' function or 'Eval' function")
コード例 #3
0
def execute_experiments(dataset, w2v_model, n_splits, model_option, output_csv_path, score_threshold):
    results = []

    shuffle(dataset)
    folds = KFold(n_splits=n_splits, random_state=7, shuffle=False)
    splits = [(train_index, test_index) for train_index, test_index in folds.split(dataset)]

    prep_dataset = np.array([[row[0], row[1], compute_candidates(row, w2v_model)] for row in dataset])
    for train_index, test_index in splits:
        train, test = prep_dataset[train_index], prep_dataset[test_index]
        model = fit_model(model_option, train, w2v_model)
        results.append(evaluate(test, model, score_threshold))

    save_results(results, output_csv_path)
コード例 #4
0
def svm_baseline(X, Y, X_test, Y_test, method=None):
    setup_seed(20)
    clf = SVC(gamma='auto', class_weight='balanced').fit(X, Y)
    train_acc = accuracy_score(Y, clf.predict(X))
    train_pre = precision_score(Y, clf.predict(X))
    train_rec = recall_score(Y, clf.predict(X))
    train_fscore = f1_score(Y, clf.predict(X))
    train_mcc = matthews_corrcoef(Y, clf.predict(X))

    Y_pred = clf.predict(X_test)
    precision, recall, fscore, mcc, val_acc = evaluate(Y_test, Y_pred)
    print('T_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f' %
          (train_acc, train_pre, train_rec, train_fscore, train_mcc))
    print('V_acc  %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f' %
          (val_acc, precision, recall, fscore, mcc))
コード例 #5
0
def nn_baseline(X, Y, X_test, Y_test):
    setup_seed(20)
    clf = MLPClassifier(random_state=100).fit(X, Y)
    train_acc = accuracy_score(Y, clf.predict(X))
    train_pre = precision_score(Y, clf.predict(X))
    train_rec = recall_score(Y, clf.predict(X))
    train_fscore = f1_score(Y, clf.predict(X))
    train_mcc = matthews_corrcoef(Y, clf.predict(X))

    Y_pred = clf.predict(X_test)
    precision, recall, fscore, mcc, val_acc = evaluate(Y_test, Y_pred)
    print('T_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f' %
          (train_acc, train_pre, train_rec, train_fscore, train_mcc))
    print('V_acc  %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f' %
          (val_acc, precision, recall, fscore, mcc))
コード例 #6
0
def lr_baseline(X, Y, X_test, Y_test, method=None):
    setup_seed(20)
    clf = linear_model.LogisticRegression().fit(X, Y)
    train_acc = accuracy_score(Y, clf.predict(X))
    train_pre = precision_score(Y, clf.predict(X))
    train_rec = recall_score(Y, clf.predict(X))
    train_fscore = f1_score(Y, clf.predict(X))
    train_mcc = matthews_corrcoef(Y, clf.predict(X))

    Y_pred = clf.predict(X_test)
    precision, recall, fscore, mcc, val_acc = evaluate(Y_test, Y_pred)
    print('T_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f' %
          (train_acc, train_pre, train_rec, train_fscore, train_mcc))
    print('V_acc  %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f' %
          (val_acc, precision, recall, fscore, mcc))
コード例 #7
0
def knn_baseline(X, Y, X_test, Y_test, method=None):
    setup_seed(20)
    clf = neighbors.KNeighborsClassifier().fit(X, Y)
    train_acc = accuracy_score(Y, clf.predict(X))
    train_pre = precision_score(Y, clf.predict(X))
    train_rec = recall_score(Y, clf.predict(X))
    train_fscore = f1_score(Y, clf.predict(X))
    train_mcc = matthews_corrcoef(Y, clf.predict(X))

    Y_pred = clf.predict(X_test)
    precision, recall, fscore, mcc, val_acc = evaluate(Y_test, Y_pred)
    print('T_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f' %
          (train_acc, train_pre, train_rec, train_fscore, train_mcc))
    print('V_acc  %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f' %
          (val_acc, precision, recall, fscore, mcc))
コード例 #8
0
ファイル: main.py プロジェクト: FernanOrtega/encoder-decoder
def execute_experiments(dataset, w2v_model, n_splits, model_option, output_csv_path):
    results = []

    shuffle(dataset)
    folds = KFold(n_splits=n_splits, random_state=7, shuffle=False)
    splits = [(train_index, test_index) for train_index, test_index in folds.split(dataset)]

    x, y, label_encoder = preprocess_dataset(dataset, w2v_model)
    for train_index, test_index in splits:
        dataset_split = np.array(dataset)[test_index]
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], [row[1] for row in dataset_split]
        l_sizes = [len(row[0]) for row in dataset_split]
        model = fit_model(model_option, x_train, y_train, w2v_model)
        results.append(evaluate(x_test, y_test, l_sizes, model, label_encoder))

    save_results(results, output_csv_path)
コード例 #9
0
def rf_baseline(X, Y, X_test, Y_test):
    setup_seed(20)
    clf = ensemble.RandomForestClassifier().fit(X, Y)
    train_acc = accuracy_score(Y, clf.predict(X))
    train_pre = precision_score(Y, clf.predict(X))
    train_rec = recall_score(Y, clf.predict(X))
    train_fscore = f1_score(Y, clf.predict(X))
    train_mcc = matthews_corrcoef(Y, clf.predict(X))

    Y_pred = clf.predict(X_test)
    precision, recall, fscore, mcc, val_acc = evaluate(Y_test, Y_pred)
    outcome = [train_acc, train_pre, train_rec, train_fscore, val_acc, precision, recall, fscore]
    return outcome
    
    print('T_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f'
          % (train_acc, train_pre, train_rec, train_fscore, train_mcc))
    print('V_acc  %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f'
          % (val_acc, precision, recall, fscore, mcc))
コード例 #10
0
def validate(combined_model, unsupervised_val, retrievable_items):
    batches_per_epoch = len(unsupervised_val)
    kbar = pkbar.Kbar(target=batches_per_epoch, width=8)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    combined_model.eval()

    image_embeddings = []
    text_embeddings = []

    running_loss = 0.0
    print("Start validation")

    for indx, unsupervised_inputs in enumerate(unsupervised_val):
        unsupervised_image_inputs = unsupervised_inputs[0].to(device)
        unsupervised_text_inputs = unsupervised_inputs[1].to(device)

        with torch.set_grad_enabled(False):
            text_embeddings_unsupervised, image_embeddings_unsupervised = combined_model(
                unsupervised_text_inputs, unsupervised_image_inputs)

            unsupervised_loss = criterion_unsupervised(
                text_embeddings_unsupervised, image_embeddings_unsupervised)

            image_embeddings.append(
                image_embeddings_unsupervised.detach().clone())
            text_embeddings.append(
                text_embeddings_unsupervised.detach().clone())

        # statistics
        running_loss += unsupervised_loss
        kbar.update(indx, values=[("unsupervised_loss", unsupervised_loss)])

    recalls = []

    for item_count in retrievable_items:
        recalls.append((evaluate(text_embeddings, image_embeddings, 5,
                                 item_count), item_count))

    epoch_loss = running_loss / len(unsupervised_val)

    return epoch_loss, recalls
コード例 #11
0
def crossValidate(data_set):
    #80 because size gives total data points not no. of rows
    empty_confusion_matrix = np.zeros((4, 4))
    empty_metrics = (empty_confusion_matrix, 0.0, 0.0, 0.0, 0.0)
    unpruned_results = [[empty_metrics for a in range(9)] for b in range(10)]
    pruned_results = [[empty_metrics for a in range(9)] for b in range(10)]

    split_size = int(data_set.size / 80)

    startTime = time.time()
    #split out the testing data
    for i in range(10):

        split_set = np.split(data_set, [i * split_size, (i + 1) * split_size])
        test_set = split_set[1]
        set_without_test = np.concatenate((split_set[0], split_set[2]), axis=0)

        #split out the validation
        for j in range(9):

            split_training_set = np.split(
                set_without_test, [j * split_size, (j + 1) * split_size])
            validation_set = split_training_set[1]
            training_set = np.concatenate(
                (split_training_set[0], split_training_set[2]), axis=0)

            tree = getTree(training_set, 0)
            #print("Depth of tree:")
            #print(tree[1])
            tree = tree[0]
            unpruned_results[i][j] = evaluate(test_set, tree)
            pruned_tree = prune(tree, validation_set)
            pruned_results[i][j] = evaluate(test_set, pruned_tree)

            #stuff for printing nicely
            percent = (float(i * 9) + float(j + 1)) / 0.9
            timeElapsed = time.time() - startTime
            timeLeft = timeElapsed / percent * (100 - percent)
            print("\r\t",
                  round(percent, 2),
                  "%\t Time elapsed: ",
                  int(timeElapsed / 3600),
                  ":",
                  int((timeElapsed / 60) % 60),
                  ":",
                  int(timeElapsed % 60),
                  "\t Time left: ",
                  int(timeLeft / 3600),
                  ":",
                  int((timeLeft / 60) % 60),
                  ":",
                  int(timeLeft % 60),
                  end="      ",
                  sep="")

    average_unpruned_results = average_metrics(unpruned_results)
    average_pruned_results = average_metrics(pruned_results)

    print("Done:")
    print()
    print("\nResults before pruning:\n")
    print_metrics(average_unpruned_results)
    print("\nResults after pruning:\n")
    print_metrics(average_pruned_results)
コード例 #12
0
def train_cnn(model, epochs, learning_rate, batch_size, X, Y, X_test, Y_test):
    """
  Training loop for a model utilizing hidden states.

  verify enables sanity checks of the model.
  epochs decides the number of training iterations.
  learning rate decides how much the weights are updated each iteration.
  batch_size decides how many examples are in each mini batch.
  show_attention decides if attention weights are plotted.
  """
    print_interval = 10
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                weight_decay=0.01)

    criterion = torch.nn.CrossEntropyLoss()
    num_of_examples = X.shape[0]
    num_of_batches = math.floor(num_of_examples / batch_size)

    #if verify:
    # verify_model(model, X, Y, batch_size)

    all_losses = []
    all_val_losses = []
    all_accs = []
    all_pres = []
    all_recs = []
    all_fscores = []
    all_mccs = []
    all_val_accs = []

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        running_acc = 0
        running_pre = 0
        running_pre_total = 0
        running_rec = 0
        running_rec_total = 0
        epoch_fscore = 0
        running_mcc_numerator = 0
        running_mcc_denominator = 0
        running_rec_total = 0

        #hidden = model.init_hidden(batch_size)

        for count in range(0, num_of_examples - batch_size + 1, batch_size):
            #repackage_hidden(hidden)

            #X_batch = X[:, count:count+batch_size, :]
            X_batch = X[count:count + batch_size, :, :, :]
            Y_batch = Y[count:count + batch_size]

            scores = model(X_batch)
            loss = criterion(scores, Y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            predictions = predictions_from_output(scores)
            conf_matrix = get_confusion_matrix(Y_batch, predictions)
            TP, FP, FN, TN = conf_matrix[0][0], conf_matrix[0][1], conf_matrix[
                1][0], conf_matrix[1][1]
            running_acc += TP + TN
            running_pre += TP
            running_pre_total += TP + FP
            running_rec += TP
            running_rec_total += TP + FN
            running_mcc_numerator += (TP * TN - FP * FN)
            if ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) == 0:
                running_mcc_denominator += 0
            else:
                running_mcc_denominator += math.sqrt(
                    (TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
            running_loss += loss.item()

        elapsed_time = time.time() - start_time
        epoch_acc = running_acc / Y.shape[0]
        all_accs.append(epoch_acc)

        if running_pre_total == 0:
            epoch_pre = 0
        else:
            epoch_pre = running_pre / running_pre_total
        all_pres.append(epoch_pre)

        if running_rec_total == 0:
            epoch_rec = 0
        else:
            epoch_rec = running_rec / running_rec_total
        all_recs.append(epoch_rec)

        if (epoch_pre + epoch_rec) == 0:
            epoch_fscore = 0
        else:
            epoch_fscore = 2 * epoch_pre * epoch_rec / (epoch_pre + epoch_rec)
        all_fscores.append(epoch_fscore)

        if running_mcc_denominator == 0:
            epoch_mcc = 0
        else:
            epoch_mcc = running_mcc_numerator / running_mcc_denominator
        all_mccs.append(epoch_mcc)

        epoch_loss = running_loss / num_of_batches
        all_losses.append(epoch_loss)

        with torch.no_grad():
            model.eval()
            test_scores = model(X_test)
            predictions = predictions_from_output(test_scores)
            predictions = predictions.view_as(Y_test)

            precision, recall, fscore, mcc, val_acc = evaluate(
                Y_test, predictions)

            val_loss = criterion(test_scores, Y_test).item()
            all_val_losses.append(val_loss)
            all_val_accs.append(val_acc)

        if (epoch + 1) % print_interval == 0:
            print('Epoch %d Time %s' % (epoch, get_time_string(elapsed_time)))
            print(
                'T_loss %.3f\tT_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f'
                % (epoch_loss, epoch_acc, epoch_pre, epoch_rec, epoch_fscore,
                   epoch_mcc))
            print(
                'V_loss %.3f\tV_acc %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f'
                % (val_loss, val_acc, precision, recall, fscore, mcc))
コード例 #13
0
def train_cnn(model, epochs, learning_rate, batch_size, X, Y, X_test, Y_test,
              subtype):
    """
    Training loop for a model utilizing hidden states.

    verify enables sanity checks of the model.
    epochs decides the number of training iterations.
    learning rate decides how much the weights are updated each iteration.
    batch_size decides how many examples are in each mini batch.
    show_attention decides if attention weights are plotted.
    """
    print_interval = 1
    if subtype == 'PAX' or subtype == 'M2' or subtype == 'NS2':
        Weight_Decay = 0.05
    else:
        Weight_Decay = 0.001

    for i in range(1):  # define different optimizers

        if i == 0:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=learning_rate,
                                        weight_decay=Weight_Decay)
            op_title = 'SGD'
            print("SGD")
            Color = 'r'
        elif i == 1:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=0.0001,
                                         weight_decay=Weight_Decay)
            op_title = 'Adam'
            print("Adam")
            Color = 'b'
        elif i == 2:
            optimizer = torch.optim.RMSprop(model.parameters(),
                                            lr=0.0001,
                                            weight_decay=Weight_Decay)
            op_title = 'RMSprop'
            print("RMSprop")
            Color = 'g'
        elif i == 3:
            optimizer = torch.optim.Adadelta(model.parameters(),
                                             lr=0.005,
                                             weight_decay=Weight_Decay)
            op_title = 'Adadelta'
            print("Adadelta")
            Color = 'c'
        elif i == 4:
            optimizer = torch.optim.Adagrad(model.parameters(),
                                            lr=0.005,
                                            weight_decay=Weight_Decay)
            op_title = 'Adagrad'
            print("Adagrad")
            Color = 'y'

        criterion = torch.nn.CrossEntropyLoss()
        num_of_examples = X.shape[0]
        num_of_batches = math.floor(num_of_examples / batch_size)

        # if verify:
        # verify_model(model, X, Y, batch_size)

        all_losses = []
        all_val_losses = []
        all_accs = []
        all_pres = []
        all_recs = []
        all_fscores = []
        all_mccs = []
        all_val_accs = []

        best_acc = 0
        best_loss = 10
        start_time = time.time()
        for epoch in range(epochs):
            model.train()
            running_loss = 0
            running_acc = 0
            running_pre = 0
            running_pre_total = 0
            running_rec = 0
            running_rec_total = 0
            epoch_fscore = 0
            running_mcc_numerator = 0
            running_mcc_denominator = 0
            running_rec_total = 0

            # hidden = model.init_hidden(batch_size)

            for count in range(0, num_of_examples - batch_size + 1,
                               batch_size):
                # repackage_hidden(hidden)

                # X_batch = X[:, count:count+batch_size, :]
                X_batch = X[count:count + batch_size, :, :, :]
                Y_batch = Y[count:count + batch_size]

                scores = model(X_batch)
                loss = criterion(scores, Y_batch)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                predictions = predictions_from_output(scores)

                conf_matrix = get_confusion_matrix(Y_batch, predictions)
                TP, FP, FN, TN = conf_matrix[0][0], conf_matrix[0][
                    1], conf_matrix[1][0], conf_matrix[1][1]
                running_acc += TP + TN
                running_pre += TP
                running_pre_total += TP + FP
                running_rec += TP
                running_rec_total += TP + FN
                running_mcc_numerator += (TP * TN - FP * FN)
                if ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) == 0:
                    running_mcc_denominator += 0
                else:
                    running_mcc_denominator += math.sqrt(
                        (TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
                running_loss += loss.item()

            elapsed_time = time.time() - start_time
            epoch_acc = running_acc / Y.shape[0]
            all_accs.append(epoch_acc)
            if running_pre_total == 0:
                epoch_pre = 0
            else:
                epoch_pre = running_pre / running_pre_total
            all_pres.append(epoch_pre)

            if running_rec_total == 0:
                epoch_rec = 0
            else:
                epoch_rec = running_rec / running_rec_total
            all_recs.append(epoch_rec)

            if (epoch_pre + epoch_rec) == 0:
                epoch_fscore = 0
            else:
                epoch_fscore = 2 * epoch_pre * epoch_rec / (epoch_pre +
                                                            epoch_rec)
            all_fscores.append(epoch_fscore)

            if running_mcc_denominator == 0:
                epoch_mcc = 0
            else:
                epoch_mcc = running_mcc_numerator / running_mcc_denominator
            all_mccs.append(epoch_mcc)

            epoch_loss = running_loss / num_of_batches
            all_losses.append(epoch_loss)

            with torch.no_grad():
                model.eval()
                test_scores = model(X_test)
                predictions = predictions_from_output(test_scores)
                predictions = predictions.view_as(Y_test)
                pred_prob = calculate_prob(test_scores)

                precision, recall, fscore, mcc, val_acc = evaluate(
                    Y_test, predictions)

                val_loss = criterion(test_scores, Y_test).item()
                all_val_losses.append(val_loss)
                all_val_accs.append(val_acc)

                if val_acc > best_acc and (val_acc < epoch_acc + 0.01):
                    torch.save(model.state_dict(),
                               str(subtype) + '_params.pkl')
                    print("Higher accuracy, New best ", subtype,
                          " model saved.")
                    best_epoch = epoch
                    best_pred_prob = pred_prob
                    best_acc = val_acc
                    best_loss = val_loss
                    best_outcome = [
                        epoch_acc, epoch_pre, epoch_rec, epoch_fscore, val_acc,
                        precision, recall, fscore
                    ]
                elif (val_acc == best_acc) and (val_loss < best_loss) and (
                        val_acc < epoch_acc + 0.01):
                    torch.save(model.state_dict(),
                               str(subtype) + '_params.pkl')
                    print("Lower loss, New best ", subtype, " model saved.")
                    best_epoch = epoch
                    best_pred_prob = pred_prob
                    best_acc = val_acc
                    best_loss = val_loss
                    best_outcome = [
                        epoch_acc, epoch_pre, epoch_rec, epoch_fscore, val_acc,
                        precision, recall, fscore
                    ]

            if (epoch + 1) % print_interval == 0:
                #print("Epoch: ", epoch)
                print('Epoch %d Time %s' %
                      (epoch, get_time_string(elapsed_time)))
                print(
                    'T_loss %.3f\tT_acc %.3f\tT_pre %.3f\tT_rec %.3f\tT_fscore %.3f\tT_mcc %.3f'
                    % (epoch_loss, epoch_acc, epoch_pre, epoch_rec,
                       epoch_fscore, epoch_mcc))
                print(
                    'V_loss %.3f\tV_acc %.3f\tV_pre %.3f\tV_rec %.3f\tV_fscore %.3f\tV_mcc %.3f'
                    % (val_loss, val_acc, precision, recall, fscore, mcc))
        ############################### save ROC figure ################################
        print(best_epoch)
        fpr_cnn, tpr_cnn, _ = roc_curve(Y_test.cpu(), best_pred_prob.cpu())
        print(subtype, "/'s auc:", auc(fpr_cnn, tpr_cnn))
        '''
        plt.figure(1)
        # plt.xlim(0, 0.8)
        plt.ylim(0, 1)
        plt.plot([0, 1], [0, 1], 'k--')
        plt.plot(fpr_cnn, tpr_cnn, label = "ResNeXt-" + subtype + "(" + str(auc(fpr_cnn, tpr_cnn)).split('.')[0] + '.' + str(auc(fpr_cnn, tpr_cnn)).split('.')[1][:3] + ")")
        '''
        ############################# save optimizer figure #########################
        plt.plot(np.arange(epochs),
                 all_val_accs,
                 lw=0.8,
                 color=Color,
                 label=op_title)
        print(op_title, " is done!")
    plt.xlabel('Epochs')
    plt.ylabel('Validation Accuracy')
    #plt.title("")
    plt.legend()
    plt.ylim((0, 0.75))
    plt.grid()
    filename = "test_" + str(subtype) + ".eps"
    plt.savefig(filename, format="eps", dpi=300)
    print(str(subtype), " is done!")
    plt.clf()
    #plt.show()
    '''
    plt.legend(loc='best')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.savefig("ROC.eps", format="eps", dpi=300)
    '''
    return best_outcome
コード例 #14
0
def get_evaluation_metric(set, tree):
    metrics = evaluate(set, tree)

    return metrics[4]
コード例 #15
0
def main(config, resume):

    # parameters
    batch_size = config.get('batch_size', 32)
    start_epoch = config['epoch']['start']
    max_epoch = config['epoch']['max']
    lr = config.get('lr', 0.0005)
    use_conf = config.get('use_conf', False)

    ## path
    save_path = config['save_path']
    timestamp = datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S")
    save_path = os.path.join(save_path, timestamp)

    result_path = os.path.join(save_path, 'result')
    if not os.path.exists(result_path):
        os.makedirs(result_path)

    model_path = os.path.join(save_path, 'model')
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    dest = shutil.copy('train.py', save_path)
    print("save to: ", dest)

    ## cuda or cpu
    if config['n_gpu'] == 0 or not torch.cuda.is_available():
        device = torch.device("cpu")
        print("using CPU")
    else:
        device = torch.device("cuda:0")

    ## dataloader
    dataset = Dataset(phase='train', do_augmentations=False)
    data_loader = DataLoader(
        dataset,
        batch_size=int(batch_size),
        num_workers=1,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
        # **loader_kwargs,
    )

    val_dataset = Dataset(phase='val', do_augmentations=False)
    val_data_loader = DataLoader(
        val_dataset,
        batch_size=int(batch_size),
        num_workers=1,
        shuffle=True,
        drop_last=True,
        pin_memory=True,
        # **loader_kwargs,
    )

    ## few shot
    do_few_shot = True
    if do_few_shot:
        fs_dataset = Dataset(
            phase='train',
            do_augmentations=False,
            metafile_path='metadata/detection_train_images.json')
        fs_data_loader = DataLoader(
            fs_dataset,
            batch_size=int(128),
            num_workers=1,
            shuffle=True,
            pin_memory=True,
            # **loader_kwargs,
        )

    ## CNN model
    output_dim = 3
    model = MyNet(output_dim)
    model = model.to(device)
    model.train()
    print(model)

    ## loss
    criterion = nn.CrossEntropyLoss(reduction='none')

    ## optimizer
    params = list(filter(lambda p: p.requires_grad, model.parameters()))
    optim_params = {
        'lr': lr,
        'weight_decay': 0,
        'amsgrad': False,
    }
    optimizer = torch.optim.Adam(params, **optim_params)
    lr_params = {
        'milestones': [10],
        'gamma': 0.1,
    }
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, **lr_params)

    loss_avg = AverageMeter()
    acc_avg = AverageMeter()
    fs_loss_avg = AverageMeter()
    fs_acc_avg = AverageMeter()
    logger = SimpleLogger(['train_loss', 'train_acc', 'val_loss', 'val_acc'])

    ## loop
    for epoch in range(start_epoch, max_epoch):
        loss_avg.reset()

        for batch_idx, batch in tqdm(
                enumerate(data_loader),
                total=len(data_loader),
                ncols=80,
                desc=f'training epoch {epoch}',
        ):
            data = batch[0].to(device)
            gt_lbls = batch[1].to(device)
            gt_gt_lbls = batch[2].to(device)

            ## set zerograd
            optimizer.zero_grad()

            ## run forward pass
            out = model(data)  ## logits: [B, NC]; conf: [B, 1]
            preds = torch.max(out, dim=-1)[1]
            # print("out shape: ", out.shape)

            weights = model.compute_entropy_weight(out)
            # print("weights shape: ", weights.shape)

            ## compute loss
            class_loss = criterion(out, gt_lbls)  ## [B, 1]
            # print("class_loss shape: ", class_loss.shape)

            if use_conf:
                loss = (class_loss * (weights**2) + (1 - weights)**2).mean()
            else:
                loss = class_loss.mean()

            ## record
            loss_avg.update(loss.item(), batch_size)
            positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum()
            batch_acc = positive.to(torch.float) / batch_size
            acc_avg.update(batch_acc.item(), batch_size)

            ## run backward pass
            loss.backward()
            optimizer.step()  ## update

        ## each epoch
        logger.update(loss_avg.avg, 'train_loss')
        logger.update(acc_avg.avg, 'train_acc')
        print("train loss: ", loss_avg.avg)
        print("train acc: ", acc_avg.avg)

        if do_few_shot and fs_data_loader is not None:
            for batch_idx, batch in tqdm(
                    enumerate(fs_data_loader),
                    total=len(fs_data_loader),
                    ncols=80,
                    desc=f'training epoch {epoch}',
            ):
                data = batch[0].to(device)
                gt_lbls = batch[1].to(device)
                gt_gt_lbls = batch[2].to(device)

                ## set zerograd
                optimizer.zero_grad()

                ## run forward pass
                out = model(data)  ## logits: [B, NC]; conf: [B, 1]
                preds = torch.max(out, dim=-1)[1]
                # print("out shape: ", out.shape)

                weights = model.compute_entropy_weight(out)
                # print("weights shape: ", weights.shape)

                ## compute loss
                class_loss = criterion(out, gt_lbls)  ## [B, 1]
                # print("class_loss shape: ", class_loss.shape)

                if use_conf:
                    loss = (class_loss * (weights**2) +
                            (1 - weights)**2).mean()
                else:
                    loss = class_loss.mean()

                ## record
                positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum()
                batch_acc = positive.to(torch.float) / data.shape[0]
                fs_loss_avg.update(loss.item(), data.shape[0])
                fs_acc_avg.update(batch_acc.item(), data.shape[0])

                ## run backward pass
                loss = loss * 1.0
                loss.backward()
                optimizer.step()  ## update

            # print(f"\nfew-shot: {preds}, {gt_gt_lbls}")
            ## each epoch
            print("fs train loss: ", fs_loss_avg.avg)
            print("fs train acc: ", fs_acc_avg.avg)

        if val_data_loader is not None:
            log = evaluate(model.eval(),
                           val_data_loader,
                           device,
                           use_conf=use_conf)
            model.train()

            logger.update(log['loss'], 'val_loss')
            logger.update(log['acc'], 'val_acc')
            print("val loss: ", log['loss'])
            print("val acc: ", log['acc'])

        best_idx = logger.get_best('val_acc', best='max')
        if best_idx == epoch:
            print('save ckpt')
            ## save ckpt
            _save_checkpoint(model_path, epoch, model)

        lr_scheduler.step()
        print()

    ## save final model
    _save_checkpoint(model_path, epoch, model)
コード例 #16
0
ファイル: cw1.py プロジェクト: alexander3605/C395-ML
        f.write("**** NOISY TREE -> pruned) ****\n")
        f.write(visualPrunedNoisy)
        f.close()

        print("\t------------------------------------")
        print("\tTrees correctly printed to file.")
    except IOError:
        print("\t------------------------------------")
        print(
            "\t*** IOError: Could not write to file! Trees visualization NOT printed! ***"
        )

# STEP 3 - EVALUATION
print("\n------- 3 - EVALUATION -------")
print("\tEvaluating cleanTree on cleanTest...")
metrics = evaluate(cleanTest, cleanTree)
print_metrics(metrics)
print("\t------------------------------------")
print("\tEvaluating cleanTree on noisyTest...")
metrics = evaluate(noisyTest, cleanTree)
print_metrics(metrics)
print("\t------------------------------------")
print("\tEvaluating noisyTree on cleanTest...")
metrics = evaluate(cleanTest, noisyTree)
print_metrics(metrics)
print("\t------------------------------------")
print("\tEvaluating noisyTree on noisyTest...")
metrics = evaluate(noisyTest, noisyTree)
print_metrics(metrics)

#STEP 4 - PRUNING (AND EVALUATION AGAIN)
コード例 #17
0
def train(combined_model, supervised, unsupervised, optimizer, mmd_weight):
    batches_per_epoch = len(supervised)

    kbar = pkbar.Kbar(target=batches_per_epoch, width=8)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")

    combined_model.train()

    images_supervised = []
    text_supervised = []

    images_unsupervised = []
    text_unsupervised = []

    running_loss = 0.0
    running_loss_supervised = 0.0
    running_loss_unsupervised = 0.0

    print("Start training")

    for indx, (supervised_inputs,
               unsupervised_inputs) in enumerate(zip(supervised,
                                                     unsupervised)):
        img_inputs = supervised_inputs[0].to(device)
        text_inputs = supervised_inputs[1].to(device)

        unsupervised_image_inputs = unsupervised_inputs[0].to(device)
        unsupervised_text_inputs = unsupervised_inputs[1].to(device)

        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            text_embeddings_supervised, image_embeddings_supervised = combined_model(
                text_inputs, img_inputs)

            # unsupervised output
            text_embeddings_unsupervised, image_embeddings_unsupervised = combined_model(
                unsupervised_text_inputs, unsupervised_image_inputs)

            supervised_loss = criterion_supervised(
                text_embeddings_supervised, image_embeddings_supervised)
            unsupervised_loss = criterion_unsupervised(
                text_embeddings_unsupervised, image_embeddings_unsupervised)

            # Scale unsupervised loss with batch size
            unsupervised_loss = unsupervised_loss * mmd_weight

            loss = supervised_loss + unsupervised_loss

            loss.backward()
            optimizer.step()

            images_supervised.append(
                image_embeddings_supervised.detach().clone())
            text_supervised.append(text_embeddings_supervised.detach().clone())

            images_unsupervised.append(
                image_embeddings_unsupervised.detach().clone())
            text_unsupervised.append(
                text_embeddings_unsupervised.detach().clone())

        # statistics
        running_loss += loss.item()
        running_loss_supervised += supervised_loss
        running_loss_unsupervised += unsupervised_loss

        kbar.update(indx,
                    values=[("loss", loss),
                            ("supervised_loss", supervised_loss),
                            ("unsupervised_loss", unsupervised_loss)])

    recall_supervised = evaluate(text_supervised, images_supervised)
    recall_unsupervised = evaluate(text_unsupervised, images_unsupervised)

    epoch_loss = (running_loss / len(supervised),
                  running_loss_supervised / len(supervised),
                  running_loss_unsupervised / len(supervised))

    return epoch_loss, recall_supervised, recall_unsupervised
コード例 #18
0
ファイル: main.py プロジェクト: rsakib15/DeepLearning
def main():
    # load and split raw data in 3 different file for training purposes
    # init_clean(spot_check=20,src="data/deu.txt", dest="data/english-german")
    split_data(dest="data/english-german", data_size=10000)
    train_model(base_filename="data/english-german",model_filename="model/model")
    evaluate(filename="data/english-german", modelname="model/model")