from lib.ProcessEmbeddings import WordEmbeddings from tools.Blogger import Blogger import os logger = Blogger() CLASSIFICATION_TASKS = ["MR", "CR", "SUBJ", "MPQA", "SST5", "TREC", "MRPC"] SIMILARITY_TASKS = [ "SICKRelatedness", "STS12", "STS13", "STS14", "STS15", "STS16" ] if __name__ == "__main__": WE = WordEmbeddings(vector_file="embeds/glove.6B.300d.txt") WE.sparsify("ckpt/glove3000/ckpt-8000") WE.subract_mean() logger.status_update("Running SentEval tasks...") WE.SentEval( tasks=CLASSIFICATION_TASKS, save_summary=True, summary_file_name="glove_wta_3000.json", )
type=str2bool, default=False, help="Whether to use pytorch as classifier", ) parser.add_argument("-batch_size", type=int, default=128, help="Batch size for classification") parser.add_argument("-epoch_size", type=int, default=2, help="Epoch size") values = parser.parse_args() logger.green(values) PATH_TO_VEC = values.path # Set params for SentEval params_senteval = { "task_path": PATH_TO_DATA, "usepytorch": values.pytorch, "kfold": 5, } params_senteval["classifier"] = { "nhid": 0, "optim": "rmsprop", "batch_size": values.batch_size, "tenacity": 3, "epoch_size": values.epoch_size, } se = senteval.engine.SE(params_senteval, batcher, prepare) result = se.eval(values.tests) for k in result: logger.status_update("{}: {}".format(k, result[k]["acc"])) print()