Exemple #1
0
class SE(object):
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [
            'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
            'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI',
            'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15',
            'STS16'
        ]

    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)
        if name == 'CR':
            self.evaluation = CREval(tpath + '/CR', seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/MR', seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/MPQA', seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/SUBJ', seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/TREC', seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/MRPC', seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath + '/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/SNLI', seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/STS/' + fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath + '/COCO',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #2
0
    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)
        if name == 'CR':
            self.evaluation = CREval(tpath + '/CR', seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/MR', seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/MPQA', seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/SUBJ', seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/TREC', seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/MRPC', seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath + '/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/SNLI', seed=self.params.seed)
        elif name == 'DIS':
            self.evaluation = DISEval(tpath + '/DIS', seed=self.params.seed)
        elif name == 'PDTB':
            self.evaluation = PDTB_Eval(tpath + '/PDTB', seed=self.params.seed)
        elif name == "PDTB_EX":
            self.evaluation = PDTB_EX_Eval(tpath + '/PDTB_EX',
                                           seed=self.params.seed)
        elif name == "PDTB_IMEX":
            self.evaluation = PDTB_IMEX_Eval(tpath + '/PDTB_IMEX',
                                             seed=self.params.seed)
        elif name == 'DAT':
            self.evaluation = DAT_EVAL(tpath + '/DAT', seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/STS/' + fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath + '/COCO',
                                                        seed=self.params.seed)
        elif name == 'ABSA_CH':
            self.evaluation = ABSA_CHEval(tpath + '/ABSA_CH',
                                          seed=self.params.seed)
        elif name == 'ABSA_SP':
            self.evaluation = ABSA_SPEval(tpath + '/ABSA_SP',
                                          seed=self.params.seed)
        elif name == 'STS_SP':
            self.evaluation = STS_SPBenchmarkEval(tpath +
                                                  '/STS_SP/STSBenchmark',
                                                  seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #3
0
    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)

        # Original SentEval tasks
        if name == 'CR':
            self.evaluation = CREval(tpath + '/downstream/CR',
                                     seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/downstream/MR',
                                     seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/downstream/MPQA',
                                       seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/downstream/SUBJ',
                                       seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/downstream/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/downstream/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/downstream/TREC',
                                       seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/downstream/MRPC',
                                       seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/downstream/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath +
                                               '/downstream/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/downstream/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/downstream/SNLI',
                                       seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/downstream/STS/' +
                                                  fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath +
                                                        '/downstream/COCO',
                                                        seed=self.params.seed)

        # Probing Tasks
        elif name == 'Length':
            self.evaluation = LengthEval(tpath + '/probing',
                                         seed=self.params.seed)
        elif name == 'WordContent':
            self.evaluation = WordContentEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Depth':
            self.evaluation = DepthEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'TopConstituents':
            self.evaluation = TopConstituentsEval(tpath + '/probing',
                                                  seed=self.params.seed)
        elif name == 'BigramShift':
            self.evaluation = BigramShiftEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Tense':
            self.evaluation = TenseEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'SubjNumber':
            self.evaluation = SubjNumberEval(tpath + '/probing',
                                             seed=self.params.seed)
        elif name == 'ObjNumber':
            self.evaluation = ObjNumberEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'OddManOut':
            self.evaluation = OddManOutEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'CoordinationInversion':
            self.evaluation = CoordinationInversionEval(tpath + '/probing',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #4
0
    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)
        if name == 'CR':
            self.evaluation = CREval(tpath + '/CR', seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/MR', seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/MPQA', seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/SUBJ', seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/TREC', seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/MRPC', seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath + '/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/SNLI', seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            # Hacky way of constructing a class from a string
            # STSClass will be one of STS[NN]Eval
            STSClass = eval(name + 'Eval')
            self.evaluation = STSClass(tpath + '/STS/' + fpath,
                                       seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath + '/COCO',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #5
0
    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if isinstance(name, list):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)
        max_seq_len, load_data, seed = self.params.max_seq_len, self.params.load_data, self.params.seed

        # Original SentEval tasks
        if name == 'CR':
            self.evaluation = CREval(tpath + '/downstream/CR',
                                     seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/downstream/MR',
                                     seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/downstream/MPQA',
                                       seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/downstream/SUBJ',
                                       seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/downstream/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/downstream/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/downstream/TREC',
                                       seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/downstream/MRPC',
                                       load_data=load_data,
                                       seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/downstream/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath +
                                               '/downstream/STS/STSBenchmark',
                                               load_data=load_data,
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/downstream/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/downstream/SNLI',
                                       seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/downstream/STS/' +
                                                  fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath +
                                                        '/downstream/COCO',
                                                        seed=self.params.seed)

        # additional GLUE tasks; STS-B, SST2, MRPC are above
        # might want to have the same interface for these tasks as above

        elif name == 'MNLI':
            self.evaluation = MNLIEval(tpath + '/glue_data/MNLI',
                                       max_seq_len=max_seq_len,
                                       load_data=load_data,
                                       seed=seed)
        elif name == 'QQP':
            self.evaluation = QQPEval(tpath + '/glue_data/QQP',
                                      max_seq_len=max_seq_len,
                                      load_data=load_data,
                                      seed=seed)
        elif name == 'RTE':
            self.evaluation = RTEEval(tpath + '/glue_data/RTE',
                                      max_seq_len=max_seq_len,
                                      load_data=load_data,
                                      seed=seed)
        elif name == 'QNLI':
            self.evaluation = QNLIEval(tpath + '/glue_data/QNLI',
                                       max_seq_len=max_seq_len,
                                       load_data=load_data,
                                       seed=seed)
        elif name == 'QNLIv2':
            self.evaluation = QNLIv2Eval(tpath + '/glue_data/QNLIv2',
                                         max_seq_len=max_seq_len,
                                         load_data=load_data,
                                         seed=seed)
        elif name == 'WNLI':
            self.evaluation = WNLIEval(tpath + '/glue_data/WNLI',
                                       max_seq_len=max_seq_len,
                                       load_data=load_data,
                                       seed=seed)
        elif name == 'CoLA':
            self.evaluation = CoLAEval(tpath + '/glue_data/CoLA',
                                       max_seq_len=max_seq_len,
                                       load_data=load_data,
                                       seed=seed)
        elif name == 'ANLI':  # diagnostic dataset
            self.evaluation = ANLIEval(tpath + '/glue_data/ANLI',
                                       max_seq_len=max_seq_len,
                                       load_data=load_data,
                                       seed=seed)

        # Probing Tasks
        elif name == 'Length':
            self.evaluation = LengthEval(tpath + '/probing',
                                         seed=self.params.seed)
        elif name == 'WordContent':
            self.evaluation = WordContentEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Depth':
            self.evaluation = DepthEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'TopConstituents':
            self.evaluation = TopConstituentsEval(tpath + '/probing',
                                                  seed=self.params.seed)
        elif name == 'BigramShift':
            self.evaluation = BigramShiftEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Tense':
            self.evaluation = TenseEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'SubjNumber':
            self.evaluation = SubjNumberEval(tpath + '/probing',
                                             seed=self.params.seed)
        elif name == 'ObjNumber':
            self.evaluation = ObjNumberEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'OddManOut':
            self.evaluation = OddManOutEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'CoordinationInversion':
            self.evaluation = CoordinationInversionEval(tpath + '/probing',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #6
0
class SentEval(object):
    def __init__(self, params, batcher, prepare=None):
        # setting default parameters
        params.usepytorch = True if 'usepytorch' not in params else \
            params.usepytorch
        params.classifier = 'LogReg' if 'classifier' not in params else \
            params.classifier
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.batch_size = 128 if 'batch_size' not in params else \
            params.batch_size
        params.seed = 1111 if 'seed' not in params else params.seed
        params.kfold = 5 if 'kfold' not in params else params.kfold
        self.params = params

        self.batcher = batcher
        if prepare:
            self.prepare = prepare
        else:
            self.prepare = lambda x, y: None

        # sanity check
        assert params.classifier in ['LogReg', 'MLP']
        if params.classifier == 'MLP':
            assert params.nhid > 0, 'When using an MLP, \
                you need to set params.nhid>0'
        if not params.usepytorch and params.classifier == 'MLP':
            assert False, 'No MLP implemented in scikit-learn'

        self.list_tasks = ['CR', 'MR', 'MPQA', 'SUBJ', 'SST', 'TREC', 'MRPC',
                           'SICKRelatedness', 'SICKEntailment', 'STSBenchmark',
                           'SNLI', 'ImageCaptionRetrieval', 'STS12', 'STS13',
                           'STS14', 'STS15', 'STS16']

    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(self.list_tasks)
        if name == 'CR':
            self.evaluation = CREval(tpath + '/CR', seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/MR', seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/MPQA', seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/SUBJ', seed=self.params.seed)
        elif name == 'SST':
            self.evaluation = SSTBinaryEval(tpath + '/SST/binary',
                                            seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/TREC', seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/MRPC', seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath + '/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/SNLI', seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/STS/' + fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath + '/COCO',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        return self.results
Exemple #7
0
    def eval(self, name):
        # evaluate on evaluation [name], either takes string or list of strings
        if (isinstance(name, list)):
            self.results = {x: self.eval(x) for x in name}
            return self.results

        tpath = self.params.task_path
        assert name in self.list_tasks, str(name) + ' not in ' + str(
            self.list_tasks)

        start = time.time()

        # Original SentEval tasks
        if name == 'CR':
            self.evaluation = CREval(tpath + '/downstream/CR',
                                     seed=self.params.seed)
        elif name == 'MR':
            self.evaluation = MREval(tpath + '/downstream/MR',
                                     seed=self.params.seed)
        elif name == 'MPQA':
            self.evaluation = MPQAEval(tpath + '/downstream/MPQA',
                                       seed=self.params.seed)
        elif name == 'SUBJ':
            self.evaluation = SUBJEval(tpath + '/downstream/SUBJ',
                                       seed=self.params.seed)
        elif name == 'SST2':
            self.evaluation = SSTEval(tpath + '/downstream/SST/binary',
                                      nclasses=2,
                                      seed=self.params.seed)
        elif name == 'SST5':
            self.evaluation = SSTEval(tpath + '/downstream/SST/fine',
                                      nclasses=5,
                                      seed=self.params.seed)
        elif name == 'TREC':
            self.evaluation = TRECEval(tpath + '/downstream/TREC',
                                       seed=self.params.seed)
        elif name == 'MRPC':
            self.evaluation = MRPCEval(tpath + '/downstream/MRPC',
                                       seed=self.params.seed)
        elif name == 'SICKRelatedness':
            self.evaluation = SICKRelatednessEval(tpath + '/downstream/SICK',
                                                  seed=self.params.seed)
        elif name == 'STSBenchmark':
            self.evaluation = STSBenchmarkEval(tpath +
                                               '/downstream/STS/STSBenchmark',
                                               seed=self.params.seed)
        elif name == 'SICKEntailment':
            self.evaluation = SICKEntailmentEval(tpath + '/downstream/SICK',
                                                 seed=self.params.seed)
        elif name == 'SNLI':
            self.evaluation = SNLIEval(tpath + '/downstream/SNLI',
                                       seed=self.params.seed)
        elif name in ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']:
            fpath = name + '-en-test'
            self.evaluation = eval(name + 'Eval')(tpath + '/downstream/STS/' +
                                                  fpath,
                                                  seed=self.params.seed)
        elif name == 'ImageCaptionRetrieval':
            self.evaluation = ImageCaptionRetrievalEval(tpath +
                                                        '/downstream/COCO',
                                                        seed=self.params.seed)

        # added tasks
        elif name == 'BEAN' or name == 'MASC':
            self.evaluation = BeanMascEval(osp.join(tpath, 'downstream', name),
                                           name,
                                           seed=self.params.seed)
        elif name == 'AmBrit':
            self.evaluation = AmBritEval(tpath + '/downstream/AmBrit',
                                         seed=self.params.seed)
        elif name == 'AmazonJa':
            self.evaluation = AmazonJaEval(osp.join(tpath, 'downstream', name),
                                           seed=self.params.seed)
        elif name == 'Rite2JaBC-Entailment':
            self.evaluation = Rite2JaBCEntailmentEval(osp.join(
                tpath, 'downstream', 'Rite2'),
                                                      seed=self.params.seed)
        elif name == 'FormalityJa':
            self.evaluation = FormalityJaEval(osp.join(tpath, 'downstream',
                                                       name),
                                              seed=self.params.seed)
        elif name == 'StyleSimJa':
            self.evaluation = StyleSimJaEval(
                osp.join(tpath, 'downstream', name))
        elif name == 'WordContentJapanese':
            self.evaluation = WordContentJapaneseEval(tpath + '/probing',
                                                      seed=self.params.seed)

        # Probing Tasks
        elif name == 'Length':
            self.evaluation = LengthEval(tpath + '/probing',
                                         seed=self.params.seed)
        elif name == 'WordContent':
            self.evaluation = WordContentEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Depth':
            self.evaluation = DepthEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'TopConstituents':
            self.evaluation = TopConstituentsEval(tpath + '/probing',
                                                  seed=self.params.seed)
        elif name == 'BigramShift':
            self.evaluation = BigramShiftEval(tpath + '/probing',
                                              seed=self.params.seed)
        elif name == 'Tense':
            self.evaluation = TenseEval(tpath + '/probing',
                                        seed=self.params.seed)
        elif name == 'SubjNumber':
            self.evaluation = SubjNumberEval(tpath + '/probing',
                                             seed=self.params.seed)
        elif name == 'ObjNumber':
            self.evaluation = ObjNumberEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'OddManOut':
            self.evaluation = OddManOutEval(tpath + '/probing',
                                            seed=self.params.seed)
        elif name == 'CoordinationInversion':
            self.evaluation = CoordinationInversionEval(tpath + '/probing',
                                                        seed=self.params.seed)

        self.params.current_task = name
        self.evaluation.do_prepare(self.params, self.prepare)

        self.results = self.evaluation.run(self.params, self.batcher)

        end = time.time()
        print(f'Eval {name} took {end - start} s')

        return self.results