Beispiel #1
0
    def evaluate(self, dataset, mode="test"):
        # We use test dataset because semeval doesn't have dev dataset
        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(
            dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size
        )

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0
                )

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(
            self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds
        )

        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {:15}: {:.4f}".format(key, results[key]))

        return results
Beispiel #2
0
 def compute_metrics(p: EvalPrediction):
     preds = p.predictions[0] if isinstance(p.predictions,
                                            tuple) else p.predictions
     preds = np.argmax(preds, axis=1)
     write_prediction(
         label_list,
         os.path.join(training_args.eval_dir, "proposed_answers.txt"),
         preds)
     return {
         "accuracy":
         (preds == p.label_ids).astype(np.float32).mean().item(),
         "f1": official_f1(),
     }
Beispiel #3
0
    def evaluate(self):
        # self.load_model()  # Load model

        eval_sampler = SequentialSampler(self.test_dataset)
        eval_dataloader = DataLoader(self.test_dataset, sampler=eval_sampler, batch_size=self.config.batch_size)

        # Eval!
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(self.test_dataset))
        logger.info("  Batch size = %d", self.config.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        results = {}

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            self.model.eval()
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids': batch[0],
                          'attention_mask': batch[1],
                          'token_type_ids': batch[2],
                          'labels': batch[3],
                          'e1_mask': batch[4],
                          'e2_mask': batch[5]}
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)
        logger.info("***** Eval results *****")
        for key in sorted(result.keys()):
            logger.info("  %s = %s", key, str(result[key]))

        write_prediction(os.path.join(self.config.eval_dir, "proposed_answers.txt"), preds)
        return results
Beispiel #4
0
def main():
    operation = sys.argv[1]
    if operation == 'train':
        loss = sys.argv[2]
        output_file = sys.argv[5]        

        X, y = utils.load_data(feature_file=sys.argv[3],
                               label_file=sys.argv[4],
                               dtype=np.float64)

        scaler = StandardScaler(with_mean=False)
        # scaler.fit(X)

        X, y = utils.to_single_output(X, y)
        # X = scaler.transform(X)

        with gzip.open(output_file + '.scaler', 'wb') as f:
            cPickle.dump(scaler, f, cPickle.HIGHEST_PROTOCOL)

        print 'training...'
        model = train(X, y, loss)

        with gzip.open(output_file, 'wb') as f:
            cPickle.dump(model, f, cPickle.HIGHEST_PROTOCOL)
            print 'model saved to %s.' % output_file            
    
    elif operation == 'test':
        model_file = sys.argv[2]
        feature_file = sys.argv[3]
        output_file = sys.argv[4]
        X = utils.load_data(feature_file,
                            dtype=np.float64)
        with gzip.open(model_file + '.scaler', 'rb') as f:
            scaler = cPickle.load(f)
        with gzip.open(model_file, 'rb') as f:
            print 'loading model...'
            model = cPickle.load(f)

        print 'testing...'
        with open(output_file, 'wb') as f:
            # X = scaler.transform(X)
            y = model.predict_log_proba(X)
            utils.write_prediction(f, y, model.classes_)
Beispiel #5
0
logistic = linear_model.LogisticRegression(max_iter=1000000)
logistic.fit(features, target)
print(logistic.score(features, target))

scores = model_selection.cross_val_score(logistic,
                                         features,
                                         target,
                                         scoring='accuracy',
                                         cv=10)
print(scores)
print(scores.mean())

test_features = test[[
    "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"
]].values
utils.write_prediction(logistic.predict(test_features),
                       "logistic_regression.csv")

print("\nUse polynomial features")
poly = preprocessing.PolynomialFeatures(degree=2)
features_ = poly.fit_transform(features)

clf = linear_model.LogisticRegression(C=10, max_iter=1000000)
clf.fit(features_, target)
print(clf.score(features_, target))

scores = model_selection.cross_val_score(clf,
                                         features_,
                                         target,
                                         scoring='accuracy',
                                         cv=10)
print(scores)
print(grid_search.grid_scores_, grid_search.best_params_, grid_search.best_score_)
'''
#now, decreses the learning rate and  appling gradient descent more time i.e. 15000
gbm = ensemble.GradientBoostingClassifier(learning_rate=0.005,
                                          min_samples_split=40,
                                          min_samples_leaf=1,
                                          max_features=2,
                                          max_depth=12,
                                          n_estimators=1500,
                                          subsample=0.75,
                                          random_state=1)
gbm = gbm.fit(features, target)

print(gbm.feature_importances_)
print(gbm.score(features, target))

#it take preety much time
scores = model_selection.cross_val_score(gbm,
                                         features,
                                         target,
                                         scoring='accuracy',
                                         cv=20)
print(scores)
print(scores.mean())

test_features = test[[
    "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"
]].values
prediction_gbm = gbm.predict(test_features)
utils.write_prediction(prediction_gbm, "resultsgbm.csv")
Beispiel #7
0
import pandas as pd
import utils
from sklearn import tree, model_selection
import numpy as np

test = pd.read_csv("test.csv")
utils.clean_data(test)

train = pd.read_csv("train.csv")
utils.clean_data(train)

target = train["Survived"].values
feautures_names = [
    "Pclass", "Age", "Fare", "Embarked", "Sex", "SibSp", "Parch"
]
feautures = train[feautures_names].values

generalized_tree = tree.DecisionTreeClassifier(random_state=1,
                                               max_depth=7,
                                               min_samples_split=2)
generalized_tree_predict = generalized_tree.fit(feautures, target)

feautures_test = test[feautures_names].values
predictions = generalized_tree_predict.predict(feautures_test)
utils.write_prediction(predictions, "naive_decision_Tree_prediction.csv")
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.eval_batch_size,
                                     drop_last=True)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    "input_ids": batch[0],
                    "attention_mask": batch[1],
                    "token_type_ids": batch[2],
                    "labels": batch[3],
                    "e1_mask": batch[4],
                    "e2_mask": batch[5],
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs["labels"].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs["labels"].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(
            self.args, os.path.join(self.args.eval_dir,
                                    "proposed_answers.txt"), preds)
        # result = compute_metrics(preds, out_label_ids)
        # results.update(result)

        # 0327 新增
        precision = precision_score(out_label_ids.tolist(),
                                    preds.tolist(),
                                    average='macro',
                                    zero_division=0)
        recall = recall_score(out_label_ids.tolist(),
                              preds.tolist(),
                              average='macro',
                              zero_division=0)
        f1 = f1_score(out_label_ids.tolist(),
                      preds.tolist(),
                      average='macro',
                      zero_division=0)
        acc = accuracy_score(out_label_ids.tolist(), preds.tolist())
        results["precision"] = precision
        results["recall"] = recall
        results["f1"] = f1
        results["acc"] = acc

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))

        # 新增每一个label的prf
        logger.info(
            classification_report(out_label_ids.tolist(),
                                  preds.tolist(),
                                  target_names=self.label_lst))
        return results
Beispiel #9
0
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'token_type_ids': batch[2],
                    'labels': batch[3],
                    'e1_mask': batch[4],
                    'e2_mask': batch[5]
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        result = compute_metrics(preds, out_label_ids)
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  %s = %s", key, str(results[key]))

        write_prediction(
            self.args, os.path.join(self.args.eval_dir,
                                    "proposed_answers.txt"), preds)
        return results
target = train["Survived"].values
features_forest = train[[
    "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"
]].values

print "\nUse Random Forest classifier"

forest = ensemble.RandomForestClassifier(max_depth=7,
                                         min_samples_split=4,
                                         n_estimators=1000,
                                         random_state=1,
                                         n_jobs=-1)
forest = forest.fit(features_forest, target)

print(forest.feature_importances_)
print(forest.score(features_forest, target))

scores = model_selection.cross_val_score(forest,
                                         features_forest,
                                         target,
                                         scoring='accuracy',
                                         cv=10)
print scores
print scores.mean()

test_features_forest = test[[
    "Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"
]].values
prediction_forest = forest.predict(test_features_forest)
utils.write_prediction(prediction_forest, "results/random_forest.csv")
print(train.shape)
target = train["Survived"].values
features = train[["Pclass", "Sex", "Age", "Fare"]].values

decision_tree = tree.DecisionTreeClassifier(random_state=1)
decision_tree = decision_tree.fit(features, target)

print(decision_tree.feature_importances_)
print(decision_tree.score(features, target))

print "\nTry on test set"

test_features = test[["Pclass", "Sex", "Age", "Fare"]].values
prediction = decision_tree.predict(test_features)
utils.write_prediction(prediction, "results/decision_tree.csv")

print "\nCorrect overfitting"

feature_names = ["Pclass", "Age", "Sex", "Fare", "SibSp", "Parch", "Embarked"]
features_two = train[feature_names].values
decision_tree_two = tree.DecisionTreeClassifier(max_depth=7,
                                                min_samples_split=2,
                                                random_state=1)
decision_tree_two = decision_tree_two.fit(features_two, target)

print(decision_tree_two.feature_importances_)
print(decision_tree_two.score(features_two, target))
tree.export_graphviz(decision_tree_two,
                     feature_names=feature_names,
                     out_file="./graphs/decision_tree_two.dot")
Beispiel #12
0
    def evaluate(self, mode):
        # We use test dataset because semeval doesn't have dev dataset
        if mode == "test":
            dataset = self.test_dataset
        elif mode == "dev":
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=self.args.eval_batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.eval_batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        criterion1 = create_criterion('cross_entropy')
        criterion2 = create_criterion('f1')
        criterion3 = create_criterion('focal')
        criterion4 = create_criterion('label_smoothing')
        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                # inputs = {
                #     "input_ids": batch[0],
                #     "attention_mask": batch[1],
                #     "token_type_ids": batch[2],
                #     "labels": batch[3],
                #     "e1_mask": batch[4],
                #     "e2_mask": batch[5],
                # }
                # outputs = self.model(**inputs)
                # tmp_eval_loss, logits = outputs[:2]

                # print(batch)
                logits = self.model(input_ids=batch[0],
                                     attention_mask=batch[1],
                                     e1_mask=batch[4],
                                     e2_mask=batch[5])

                loss1 = criterion3(logits, batch[3])
                loss2 = criterion4(logits, batch[3])
                tmp_eval_loss = loss1 + loss2

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                # out_label_ids = inputs["labels"].detach().cpu().numpy()
                out_label_ids = batch[3].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                # out_label_ids = np.append(out_label_ids, inputs["labels"].detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(out_label_ids, batch[3].detach().cpu().numpy(), axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(self.args, os.path.join(self.args.eval_dir, "proposed_answers.txt"), preds)

        result = compute_metrics(preds, out_label_ids)
        print(f'evaluate acc:{result}')
        results.update(result)

        logger.info("***** Eval results *****")
        for key in sorted(results.keys()):
            logger.info("  {} = {:.4f}".format(key, results[key]))

        return results
Beispiel #13
0
    def evaluate(self, mode):
        '''
        eval process
        :param mode: "dev" or "test"
        :return:
        '''
        # We use test dataset because semeval doesn't have dev dataset
        if mode == 'test':
            dataset = self.test_dataset
        elif mode == 'dev':
            dataset = self.dev_dataset
        else:
            raise Exception("Only dev and test dataset available")

        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset,
                                     sampler=eval_sampler,
                                     batch_size=self.args.batch_size)

        # Eval!
        logger.info("***** Running evaluation on %s dataset *****", mode)
        logger.info("  Num examples = %d", len(dataset))
        logger.info("  Batch size = %d", self.args.batch_size)
        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None

        self.model.eval()

        for batch in tqdm(eval_dataloader, desc="Evaluating"):
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {
                    'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'token_type_ids': batch[2],
                    'labels': batch[3],
                    'e1_mask': batch[4],
                    'e2_mask': batch[5],
                    'e1_ids': batch[6],
                    'e2_ids': batch[7],
                    'graph': self.graph,
                    'edge_feature': self.edge_feature,
                    'entity_feature': self.entity_feature
                }
                outputs = self.model(**inputs)
                tmp_eval_loss, logits = outputs[:2]

                eval_loss += tmp_eval_loss.mean().item()
            nb_eval_steps += 1

            if preds is None:
                preds = logits.detach().cpu().numpy()
                out_label_ids = inputs['labels'].detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
                out_label_ids = np.append(
                    out_label_ids,
                    inputs['labels'].detach().cpu().numpy(),
                    axis=0)

        eval_loss = eval_loss / nb_eval_steps
        results = {"loss": eval_loss}
        preds = np.argmax(preds, axis=1)
        write_prediction(
            self.args, os.path.join(self.args.eval_dir,
                                    "proposed_answers.txt"), preds)

        result = compute_metrics(self.args.task, preds, out_label_ids)
        results.update(result)

        # logger.info("***** Eval results *****")
        # for key in sorted(results.keys()):
        # logger.info("  {} = {:.4f}".format(key, results[key]))
        output_eval_file = os.path.join("eval", "eval_results.txt")
        with open(output_eval_file, "a") as writer:
            logger.info("***** Eval results *****")
            for key in sorted(result.keys()):
                logger.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
            for i in range(0, 10):
                writer.write("\n")
        return results
Beispiel #14
0
def main():

    # -------------- Stage 1: get user dict --------------

    # read all data
    X, y, users_train = read_train_txt(os.path.join(DATA_DIR, TRAIN_TXT))

    X_dev, y_dev, users_dev = read_train_txt(os.path.join(DATA_DIR, DEV_TXT))

    X_test, ids, users_test = read_test_txt(os.path.join(DATA_DIR, TEST_TXT))

    X_train, y_train = X, y

    # merge data according to user id
    X_train_merged, y_train_merged = get_user_dict(X_train, y_train,
                                                   users_train)

    # merge dev data according to user id
    X_dev, y_dev = get_user_dict(X_dev, y_dev, users_dev)

    new_X_train = X_train + X_train_merged + X_dev
    new_y_train = y_train + y_train_merged + y_dev

    users_test, X_test, user_ids_dict = get_user_dict_test(
        X_test, ids, users_test)

    # # -------------- Stage 2: Tf-idf --------------

    # compute tf-idf features
    vectorizer = TfidfVectorizer(sublinear_tf=True, ngram_range=(1, 1))

    X_test = vectorizer.fit_transform(X_test)
    X_train = vectorizer.transform(new_X_train)
    X_dev = vectorizer.transform(X_dev)

    # # -------------- Stage 3: Training --------------

    print("--- Start training ---")

    svm = OneVsRestClassifier(LinearSVC(C=1.5), n_jobs=-1)

    svm.fit(X_train, new_y_train)

    # nb = MultinomialNB()

    # nb.fit(X_train, y_train)

    # knn = KNeighborsClassifier()

    # knn.fit(X_train, y_train)

    print("--- finish training ---")

    # # -------------- Stage 4: Predictions --------------

    # print(svm.score(X_dev, y_dev))

    # print(nb.score(X_dev, y_dev))

    predictions = svm.predict(X_test)

    write_prediction(OUT_CSV, predictions, users_test, user_ids_dict)
Beispiel #15
0
features = train[[
    'Pclass', 'Age', 'Fare', 'Embarked', 'Sex', 'SibSp', 'Parch'
]].values

#using logistic regression
classifier = linear_model.LogisticRegression(C=10)
classifier = classifier.fit(features, target)
print(classifier.score(features, target))

scores = model_selection.cross_val_score(classifier,
                                         features,
                                         target,
                                         scoring='accuracy',
                                         cv=50)
lin_predict = classifier.predict(scores)
utils.write_prediction(lin_predict, 'resultlogistic_regression.csv')

#here we use polynomial regression which fit much better than linear regression
poly = preprocessing.PolynomialFeatures(degree=2)
poly_features = poly.fit_transform(features)

classifier_ = classifier.fit(poly_features, target)
print(classifier_.score(poly_features, target))

scores = model_selection.cross_val_score(classifier,
                                         features,
                                         target,
                                         scoring='accuracy',
                                         cv=10)
print(scores)
print(scores.mean())