def __init__(self, pretrained_model_path: str, layer_num: int, classes_num: int) -> 'ProbingClassifier': """ It loads a pretrained main model. On the given input, it takes the representations it generates on certain layer and learns a linear classifier on top of these frozen features. Parameters ---------- pretrained_model_path : ``str`` Serialization directory of the main model which you want to probe at one of the layers. layer_num : ``int`` Layer number of the pretrained model on which to learn a linear classifier probe. classes_num : ``int`` Number of classes that the ProbingClassifier chooses from. """ super(ProbingClassifier, self).__init__() self._pretrained_model = load_pretrained_model(pretrained_model_path) self._pretrained_model.trainable = False self._layer_num = layer_num # TODO(students): start self.linear_class = tf.keras.layers.Dense(classes_num, activation='sigmoid')
def __init__(self, pretrained_model_path: str, layer_num: int, classes_num: int) -> 'ProbingClassifier': """ It loads a pretrained main model. On the given input, it takes the representations it generates on certain layer and learns a linear classifier on top of these frozen features. Parameters ---------- pretrained_model_path : ``str`` Serialization directory of the main model which you want to probe at one of the layers. layer_num : ``int`` Layer number of the pretrained model on which to learn a linear classifier probe. classes_num : ``int`` Number of classes that the ProbingClassifier chooses from. """ super(ProbingClassifier, self).__init__() self._pretrained_model = load_pretrained_model(pretrained_model_path) self._pretrained_model.trainable = False self._layer_num = layer_num # TODO(students): start # Define a linear classifier layer for probing task at "layer_num'th" layer. It will have shape as # 'classes_num', which is the number of classes which our classifier has. In our case, for IMDB sentiment # analysis task, as well as Bigram Prediction task, classes_num = 2, as they are binary classifiers self._linear_classifier = tf.keras.layers.Dense(classes_num)
def __init__( self, pretrained_model_path: str, layer_num: int, ) -> 'ProbingEncoderDecoder': """ It loads a pretrained main model. On the given input, it takes the representations it generates on certain layer and learns a linear classifier on top of these frozen features. Parameters ---------- pretrained_model_path : ``str`` Serialization directory of the main model which you want to probe at one of the layers. layer_num : ``int`` Layer number of the pretrained model on which to learn a linear classifier probe. """ super(ProbingEncoderDecoder, self).__init__() self._pretrained_model = load_pretrained_model(pretrained_model_path) self._pretrained_model.trainable = False self._layer_num = layer_num
print("Reading training instances.") train_instances = read_instances(args.train_data_file_path, MAX_NUM_TOKENS) print("Reading validation instances.") validation_instances = read_instances(args.validation_data_file_path, MAX_NUM_TOKENS) if args.load_serialization_dir: print(f"Ignoring the model arguments and loading the " f"model from serialization_dir: {args.load_serialization_dir}") # Load Vocab vocab_path = os.path.join(args.load_serialization_dir, "vocab.txt") vocab_token_to_id, vocab_id_to_token = load_vocabulary(vocab_path) # Load Model classifier = load_pretrained_model(args.load_serialization_dir) else: # Build Vocabulary with open(GLOVE_COMMON_WORDS_PATH, encoding='utf8') as file: glove_common_words = [ line.strip() for line in file.readlines() if line.strip() ] vocab_token_to_id, vocab_id_to_token = build_vocabulary( train_instances, VOCAB_SIZE, glove_common_words) # Build Config and Model if args.model_name == "main": config = { "seq2vec_choice": args.seq2vec_choice, "vocab_size": min(VOCAB_SIZE, len(vocab_token_to_id)), "embedding_dim": args.embedding_dim,
def main(): global args args = parser.parse_args() print(args) if not os.path.exists(os.path.join(args.save_root,'checkpoint')): os.makedirs(os.path.join(args.save_root,'checkpoint')) if args.cuda: cudnn.benchmark = True print('----------- Network Initialization --------------') snet = define_tsnet(name=args.s_name, num_class=args.num_class, cuda=args.cuda) checkpoint = torch.load(args.s_init) load_pretrained_model(snet, checkpoint['net']) tnet = define_tsnet(name=args.t_name, num_class=args.num_class, cuda=args.cuda) checkpoint = torch.load(args.t_model) load_pretrained_model(tnet, checkpoint['net']) tnet.eval() for param in tnet.parameters(): param.requires_grad = False print('-----------------------------------------------') # initialize optimizer optimizer = torch.optim.SGD(snet.parameters(), lr = args.lr, momentum = args.momentum, weight_decay = args.weight_decay, nesterov = True) # define loss functions if args.cuda: criterionCls = torch.nn.CrossEntropyLoss().cuda() criterionFitnet = torch.nn.MSELoss().cuda() else: criterionCls = torch.nn.CrossEntropyLoss() criterionFitnet = torch.nn.MSELoss() # define transforms if args.data_name == 'cifar10': dataset = dst.CIFAR10 mean = (0.4914, 0.4822, 0.4465) std = (0.2470, 0.2435, 0.2616) elif args.data_name == 'cifar100': dataset = dst.CIFAR100 mean = (0.5071, 0.4865, 0.4409) std = (0.2673, 0.2564, 0.2762) else: raise Exception('invalid dataset name...') train_transform = transforms.Compose([ transforms.Pad(4, padding_mode='reflect'), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean,std=std) ]) test_transform = transforms.Compose([ transforms.CenterCrop(32), transforms.ToTensor(), transforms.Normalize(mean=mean,std=std) ]) # define data loader train_loader = torch.utils.data.DataLoader( dataset(root = args.img_root, transform = train_transform, train = True, download = True), batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True) test_loader = torch.utils.data.DataLoader( dataset(root = args.img_root, transform = test_transform, train = False, download = True), batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True) for epoch in range(1, args.epochs+1): epoch_start_time = time.time() adjust_lr(optimizer, epoch) # train one epoch nets = {'snet':snet, 'tnet':tnet} criterions = {'criterionCls':criterionCls, 'criterionFitnet':criterionFitnet} train(train_loader, nets, optimizer, criterions, epoch) epoch_time = time.time() - epoch_start_time print('one epoch time is {:02}h{:02}m{:02}s'.format(*transform_time(epoch_time))) # evaluate on testing set print('testing the models......') test_start_time = time.time() test(test_loader, nets, criterions) test_time = time.time() - test_start_time print('testing time is {:02}h{:02}m{:02}s'.format(*transform_time(test_time))) # save model print('saving models......') save_name = 'fitnet_r{}_r{}_{:>03}.ckp'.format(args.t_name[6:], args.s_name[6:], epoch) save_name = os.path.join(args.save_root, 'checkpoint', save_name) if epoch == 1: save_checkpoint({ 'epoch': epoch, 'snet': snet.state_dict(), 'tnet': tnet.state_dict(), }, save_name) else: save_checkpoint({ 'epoch': epoch, 'snet': snet.state_dict(), }, save_name)
help="Location of output file") parser.add_argument('--batch-size', type=int, help="size of batch", default=32) args = parser.parse_args() MAX_NUM_TOKENS = 250 test_instances = read_instances(args.data_file_path, MAX_NUM_TOKENS, test=True) vocabulary_path = os.path.join(args.load_serialization_dir, "vocab.txt") vocab_token_to_id, _ = load_vocabulary(vocabulary_path) test_instances = index_instances(test_instances, vocab_token_to_id) # load config config_path = os.path.join(args.load_serialization_dir, "config.json") with open(config_path, 'r') as f: config = json.load(f) # load model model = load_pretrained_model(args.load_serialization_dir) predict(model, test_instances, args.batch_size, args.prediction_file) if args.prediction_file: print(f"predictions stored at: {args.prediction_file}")
for path in [vocab_path, config_path, weights_path]]) if not model_files_present: epochs = 8 if seq2vec_name == "dan" else 4 # gru is slow, use only 4 epochs training_command = (f"python train.py main " f"data/imdb_sentiment_train_5k.jsonl " f"data/imdb_sentiment_dev.jsonl " f"--seq2vec-choice {seq2vec_name} " f"--embedding-dim 50 " f"--num-layers 4 " f"--num-epochs {epochs} " f"--suffix-name _{seq2vec_name}_5k_with_emb " f"--pretrained-embedding-file data/glove.6B.50d.txt ") training_commands.append(training_command) continue model = load_pretrained_model(serialization_dir) models[seq2vec_name] = model vocab, _ = load_vocabulary(vocab_path) vocabs[seq2vec_name] = vocab if training_commands: print("\nFirst, please finish the missing model training using the following commands:") print("\n".join(training_commands)) exit() original_instance = {"text_tokens": "the film performances were awesome".split()} updates = ["worst", "okay", "cool"] updated_instances = []