コード例 #1
0
    def on_epoch_end(self, epoch, logs={}):
        
        self.counter +=1
        train_predictions = self.model.predict(self.X_train, verbose=0)
        cpo = CutPointOptimizer(train_predictions, self.Y_train)
        self.cutPoints = optimize.fmin(cpo.qwk, self.cutPoints)
        
        p = self.model.predict(self.X_val, verbose=0) #score the validation data 
        
        p = np.searchsorted(self.cutPoints, p) + 1   
        current = quadratic_weighted_kappa.quadratic_weighted_kappa(self.y_val.values.ravel(), p)       

        print('Epoch %d Kappa: %f | Best Kappa: %f \n' % (epoch,current,self.best))
    
    
        #if improvement over best....
        if current > self.best:
            self.best = current
            self.best_rounds=self.counter
            self.wait = 0
            self.model.save_weights(self.filepath, overwrite=True)
        else:
            if self.wait >= self.patience: #no more patience, retrieve best model
                self.model.stop_training = True
                print('Best number of rounds: %d \nKappa: %f \n' % (self.best_rounds, self.best))
                
                self.model.load_weights(self.filepath)
                           
            self.wait += 1 #incremental the number of times without improvement
コード例 #2
0
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0):
    #m = 1000
    #dimension = 256
    (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE)

    #n_components = 100
    #images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min)

    #(pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE)
    (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE)

    kappa_score_train = quadratic_weighted_kappa(pred[:m/2], y[:m/2], min_rating=0, max_rating=4)
    kappa_score_test = quadratic_weighted_kappa(pred[m/2:], y[m/2:], min_rating=0, max_rating=4)
    kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4)

    print "kappa score for train: ", kappa_score_train
    print "kappa score for test: ", kappa_score_test
    print "kappa score for all data: ", kappa_score_all
    print "svm score: ", svm_score
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0):
    # m = 1000
    # dimension = 256
    (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE)

    # n_components = 100
    # images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min)

    # (pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE)
    (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE)

    kappa_score_train = quadratic_weighted_kappa(pred[: m / 2], y[: m / 2], min_rating=0, max_rating=4)
    kappa_score_test = quadratic_weighted_kappa(pred[m / 2 :], y[m / 2 :], min_rating=0, max_rating=4)
    kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4)

    print "kappa score for train: ", kappa_score_train
    print "kappa score for test: ", kappa_score_test
    print "kappa score for all data: ", kappa_score_all
    print "svm score: ", svm_score
コード例 #4
0
def kappa(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    if len(y_true.shape) > 1 and y_true.shape[1] > 1:
        y_true = y_true.dot(range(y_true.shape[1]))
    if len(y_pred.shape) > 1 and y_pred.shape[1] > 1:
        y_pred = y_pred.dot(range(y_pred.shape[1]))
    try:
        return quadratic_weighted_kappa(y_true, y_pred)
    except IndexError:
        return np.nan
コード例 #5
0
ファイル: util.py プロジェクト: Ankush96/kaggle_diabetic
def kappa(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    if len(y_true.shape) > 1 and y_true.shape[1] > 1:
        y_true = y_true.dot(range(y_true.shape[1]))
    if len(y_pred.shape) > 1 and y_pred.shape[1] > 1:
        y_pred = y_pred.dot(range(y_pred.shape[1]))
    try:
        return quadratic_weighted_kappa(y_true, y_pred)
    except IndexError:
        return np.nan
コード例 #6
0
 def qwk(self, cutPoints):
     transformedPredictions = np.searchsorted(cutPoints, self.predicted) + 1
     return -1 * quadratic_weighted_kappa.quadratic_weighted_kappa(transformedPredictions, self.actual)
コード例 #7
0
 def qwk(self, cutPoints):
     transformedPredictions = np.searchsorted(cutPoints, self.predicted) + 1
     return -1 * quadratic_weighted_kappa.quadratic_weighted_kappa(
         transformedPredictions, self.actual)
コード例 #8
0
FOLDER = sys.argv[1]

predict = []
true = []
for filename in os.listdir(FOLDER):
    if not filename.startswith("part"):
        continue
    for line in open(os.path.join(FOLDER, filename)):
        data = json.loads(line.strip())
        predict.append(int(float(data[PREDICTION])))
        if data[PREDICTION] != "0.0":
            print "!"
        true.append(int(float(data["label"])))

def get_ans():
    n = random.random()
    if n <= 0.728:
        return 0
    elif n <= 0.794:
        return 1
    elif n <= 0.953:
        return 2
    elif n <= 0.979:
        return 3
    else:
        return 4


print quadratic_weighted_kappa(predict, true, 0, 4)
コード例 #9
0
 def qwkerror(self, preds, dtrain):
     labels = dtrain.get_label()
     preds = np.searchsorted(self.cutPoints, preds) + 1 
     kappa = quadratic_weighted_kappa.quadratic_weighted_kappa(labels, preds)
     return 'kappa', -1 * kappa
コード例 #10
0
ファイル: script.py プロジェクト: rrozas/Kaggle
def scorer(estimator, X, y):
   return quadratic_weighted_kappa( y, estimator.predict(X) )
コード例 #11
0
ファイル: script.py プロジェクト: rrozas/Kaggle
skf = StratifiedKFold(train["median_relevance"], 5)
i = 0
y = train["median_relevance"]
y2 = train["relevance_variance"]
X = train.loc
for train_index, test_index in skf:
    print 'fold' , i
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    pipeline.fit(X_train, y_train)

    y_pred = pipeline.predict(X_test)

    print metrics.classification_report( y_test , y_pred )
    print quadratic_weighted_kappa( y_test , y_pred )
    print metrics.confusion_matrix( y_test , y_pred )
    print
    i += 1

#scores = cross_val_score( pipeline , train , train['median_relevance'] , cv=5 , scoring = scorer )
#print scores , scores.mean() , scores.std()

pipeline.fit(train, train["median_relevance"])

predictions = pipeline.predict(test)

submission = pd.DataFrame({"id": test["id"], "prediction": predictions})
submission.to_csv("python_benchmark.csv", index=False)
コード例 #12
0
    def train_model(self, num_epochs, log_nth):
        training_start_time = time.time()

        optimizer = self.optimizer

        self._reset_histories()
        if self.host_device == 'gpu':
            self.model.cuda()
        iter_per_epoch = len(self.train_dataset_loader)
        logging.info("Start training")
        logging.info(
            f"Size of training data: "
            f"{len(self.train_dataset_loader.sampler) * self.train_dataset_loader.batch_size}"
        )

        for i_epoch in range(num_epochs):
            logging.info("Starting new epoch...")
            running_loss = 0.

            all_y = []
            all_y_pred = []

            # scheduler step for exp and step schedulers

            if (not isinstance(self.scheduler,
                               torch.optim.lr_scheduler.ReduceLROnPlateau)):
                self.scheduler.step()
                logging.info(f"Learning rate is {self.scheduler.get_lr()}")

            for i_batch, batch in enumerate(self.train_dataset_loader):
                x, y = batch
                x, y = Variable(x), Variable(y)
                if self.host_device == 'gpu':
                    x, y = x.cuda(), y.cuda()

                optimizer.zero_grad()
                outputs = self.model(x)
                if self.host_device == 'gpu':
                    train_loss = self.loss_func(outputs.cuda(), y)
                else:
                    train_loss = self.loss_func(outputs, y)

                train_loss.backward()
                optimizer.step()

                running_loss += train_loss.data[0]
                _, y_pred = torch.max(outputs.data, 1)
                all_y.append(y)
                all_y_pred.append(y_pred)

                if not log_nth == 0 and (i_batch % log_nth) == 0:
                    logging.info(
                        f'[Iteration {i_batch}/{iter_per_epoch}] '
                        f'TRAIN loss: {running_loss / sum(curr_y.shape[0] for curr_y in all_y):.3f}'
                    )
                self.train_loss_history.append(running_loss)
            y = torch.cat(all_y)
            y_pred = torch.cat(all_y_pred)
            train_qwk = quadratic_weighted_kappa(y_pred, y.data)

            logging.info(
                f'[Epoch {i_epoch+1}/{num_epochs}] '
                f'TRAIN   QWK: {train_qwk:.3f}; loss: {running_loss / y.shape[0]:.3f}'
            )
            self.train_qwk_history.append(train_qwk)

            running_loss = 0.
            all_y = []
            all_y_pred = []
            for x, y in self.valid_dataset_loader:
                x, y = Variable(x), Variable(y)
                if self.host_device == 'gpu':
                    x, y = x.cuda(), y.cuda()

                outputs = self.model(x)
                if self.host_device == 'gpu':
                    val_loss = self.loss_func(outputs.cuda(), y)
                else:
                    val_loss = self.loss_func(outputs, y)

                running_loss += val_loss.data[0]
                _, y_pred = torch.max(outputs.data, 1)
                all_y.append(y)
                all_y_pred.append(y_pred)

            y = torch.cat(all_y)
            y_pred = torch.cat(all_y_pred)
            val_qwk = quadratic_weighted_kappa(y_pred, y.data)

            logging.info(
                f'[Epoch {i_epoch+1}/{num_epochs}] '
                f'VAL     QWK: {val_qwk:.3f}; loss: {running_loss / y.shape[0]:.3f}'
            )

            self.val_qwk_history.append(val_qwk)
            self.val_loss_history.append(running_loss)
            training_time = time.time() - training_start_time
            logging.info(
                f"Epoch {i_epoch+1} - Training Time - {training_time} seconds")

            # scheduler step for plateau scheduler
            val_loss_scheduler = running_loss
            if (isinstance(self.scheduler,
                           torch.optim.lr_scheduler.ReduceLROnPlateau)):
                self.scheduler.step(val_loss_scheduler)

            if val_qwk > self.best_qwk:
                logging.info(f'New best validation QWK score: {val_qwk}')
                self.best_qwk = val_qwk
                self.best_model = deepcopy(self.model)
                self.wait = 0
                logging.info('Storing best model...')
                torch.save(self.best_model, self.model_path)
                logging.info('Done storing')
            else:
                self.wait += 1
                if self.wait >= self.patience:
                    logging.info('Stopped after epoch %d' % (i_epoch))
                    break

        training_time = time.time() - training_start_time
        logging.info(f"Full Training Time - {training_time} seconds")
コード例 #13
0
def validate(n_epochs, n_models, n_steps=5, activations=False):
    with h5py.File(constants.train_features_scaled_strat_file, "r") as fi:
        labels_train = fi.get("y_train")[:60000]
        X_train = fi.get("X_train")[:60000]
        y_train, _ = preprocess_labels(labels_train,
                                       categorical=(net_type == 'softmax'))

        labels_test = fi.get("y_test")[()]
        X_test = fi.get("X_test")[()]
        y_test, _ = preprocess_labels(labels_test,
                                      categorical=(net_type == 'softmax'))

        y_train = y_train / 5.0 / 2 + 0.5
        y_test = y_test / 5.0 / 2 + 0.5

        if net_type == 'softmax':
            n_classes = y_train.shape[1]
        elif net_type == 'regression':
            n_classes = 1
        print(n_classes, 'classes')

        n_dims = X_train.shape[1]
        print(n_dims, 'dims')

        cum_blend = 0
        models = range(1, n_models + 1)
        for i in models:
            print("\n-------------- Model %d --------------\n" % i)

            model = model_factory(n_classes, n_dims, net_type)
            for n in range(0, n_epochs, n_steps):
                model.fit(X_train,
                          y_train,
                          nb_epoch=n_steps,
                          batch_size=128,
                          verbose=2)  #, validation_data=(X_test, y_test))

                # validate individual net
                if net_type == 'softmax':
                    y_pred = model.predict_classes(X_test, verbose=0)
                elif net_type == 'regression':
                    y_pred = model.predict(X_test, verbose=0)
                    y_pred = np.floor((y_pred - 0.5) * 2 * 5.0).flatten()
                    y_pred[y_pred < 0] = 0
                    y_pred[y_pred > 4] = 4

                print('Epoch: %d. Accuracy: %0.2f%%. Kappa: %0.2f' %
                      (n + n_steps, 100 * accuracy_score(labels_test, y_pred),
                       quadratic_weighted_kappa.quadratic_weighted_kappa(
                           labels_test, y_pred)))

            # validate ensemble
            if net_type == 'softmax':
                cum_blend += model.predict_proba(X_test, verbose=0)
                y_pred = np.argmax(cum_blend, axis=1)
            elif net_type == 'regression':
                cum_blend += model.predict(X_test, verbose=0)
                y_pred = np.floor((cum_blend / i - 0.5) * 2 * 5.0).flatten()
                y_pred[y_pred < 0] = 0
                y_pred[y_pred > 4] = 4

            print('\nBlend %d. Accuracy: %0.2f%%. Kappa: %0.2f' %
                  (i, 100 * accuracy_score(labels_test, y_pred),
                   quadratic_weighted_kappa.quadratic_weighted_kappa(
                       labels_test, y_pred)))
            print('Confusion matrix:\n', confusion_matrix(labels_test, y_pred))

            fitted = fit2distribution(labels_test, cum_blend)
            print('\nFitted. Accuracy: %0.2f%%. Kappa: %0.2f' %
                  (100 * accuracy_score(labels_test, fitted),
                   quadratic_weighted_kappa.quadratic_weighted_kappa(
                       labels_test, fitted)))
            print('Confusion matrix:\n', confusion_matrix(labels_test, fitted))

            if activations:
                F_train = pick_activations(model, X_train, net_type)
                F_test = pick_activations(model, X_test, net_type)

                fout = os.path.join(
                    constants.features_NN_dir,
                    features_NN_prefix + format(i, '02d') + '.hd5')
                with h5py.File(fout, "w") as fo:
                    fo.create_dataset("X_train", data=F_train)
                    fo.create_dataset("y_train", data=labels_train)
                    fo.create_dataset("X_test", data=F_test)
                    fo.create_dataset("y_test", data=labels_test)

                with h5py.File(fout, "r") as fi:
                    X = fi.get("X_train")
                    y = fi.get("y_train")
                    XX = fi.get("X_test")
                    yy = fi.get("y_test")
                    print(X.shape, y.shape, XX.shape, yy.shape)
コード例 #14
0
 def qwkerror(self, preds, dtrain):
     labels = dtrain.get_label()
     preds = np.searchsorted(self.cutPoints, preds) + 1
     kappa = quadratic_weighted_kappa.quadratic_weighted_kappa(
         labels, preds)
     return 'kappa', -1 * kappa