Esempio n. 1
0
from lib.ProcessEmbeddings import WordEmbeddings
from tools.Blogger import Blogger
import os

logger = Blogger()
CLASSIFICATION_TASKS = ["MR", "CR", "SUBJ", "MPQA", "SST5", "TREC", "MRPC"]
SIMILARITY_TASKS = [
    "SICKRelatedness", "STS12", "STS13", "STS14", "STS15", "STS16"
]

if __name__ == "__main__":
    WE = WordEmbeddings(vector_file="embeds/glove.6B.300d.txt")
    WE.sparsify("ckpt/glove3000/ckpt-8000")
    WE.subract_mean()
    logger.status_update("Running SentEval tasks...")
    WE.SentEval(
        tasks=CLASSIFICATION_TASKS,
        save_summary=True,
        summary_file_name="glove_wta_3000.json",
    )
Esempio n. 2
0
        type=str2bool,
        default=False,
        help="Whether to use pytorch as classifier",
    )
    parser.add_argument("-batch_size",
                        type=int,
                        default=128,
                        help="Batch size for classification")
    parser.add_argument("-epoch_size", type=int, default=2, help="Epoch size")
    values = parser.parse_args()
    logger.green(values)
    PATH_TO_VEC = values.path
    # Set params for SentEval
    params_senteval = {
        "task_path": PATH_TO_DATA,
        "usepytorch": values.pytorch,
        "kfold": 5,
    }
    params_senteval["classifier"] = {
        "nhid": 0,
        "optim": "rmsprop",
        "batch_size": values.batch_size,
        "tenacity": 3,
        "epoch_size": values.epoch_size,
    }
    se = senteval.engine.SE(params_senteval, batcher, prepare)
    result = se.eval(values.tests)
    for k in result:
        logger.status_update("{}: {}".format(k, result[k]["acc"]))
        print()