#!/opt/conda/bin/python # -*- coding: utf-8 -*- import re import sys from tape.main import run_train if __name__ == '__main__': sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) sys.exit(run_train())
""" This file is used for calling run_train() in main.py """ from tape import main main.run_train()
This takes in a list of outputs from the dataset's __getitem__ method. You can use the `pad_sequences` helper function to pad a list of numpy arrays. """ input_ids, input_mask, ss_label = tuple(zip(*batch)) input_ids = torch.from_numpy(pad_sequences(input_ids, 0)) input_mask = torch.from_numpy(pad_sequences(input_mask, 0)) ss_label = torch.from_numpy(pad_sequences(ss_label, -1)) output = { 'input_ids': input_ids, 'input_mask': input_mask, 'targets': ss_label } return output registry.register_task_model('secondary_structure_8', 'transformer', ProteinBertForSequenceToSequenceClassification) if __name__ == '__main__': """ To actually run the task, you can do one of two things. You can simply import the appropriate run function from tape.main. The possible functions are `run_train`, `run_train_distributed`, and `run_eval`. Alternatively, you can add this dataset directly to tape/datasets.py. """ from tape.main import run_train run_train()