Beispiel #1
0
    def __init__(self,
                 pretrained_model_path: str,
                 layer_num: int,
                 classes_num: int) -> 'ProbingClassifier':
        """
        It loads a pretrained main model. On the given input,
        it takes the representations it generates on certain layer
        and learns a linear classifier on top of these frozen
        features.

        Parameters
        ----------
        pretrained_model_path : ``str``
            Serialization directory of the main model which you
            want to probe at one of the layers.
        layer_num : ``int``
            Layer number of the pretrained model on which to learn
            a linear classifier probe.
        classes_num : ``int``
            Number of classes that the ProbingClassifier chooses from.
        """
        super(ProbingClassifier, self).__init__()
        self._pretrained_model = load_pretrained_model(pretrained_model_path)
        self._pretrained_model.trainable = False
        self._layer_num = layer_num

        # TODO(students): start
        self.linear_class = tf.keras.layers.Dense(classes_num, activation='sigmoid')
    def __init__(self, pretrained_model_path: str, layer_num: int,
                 classes_num: int) -> 'ProbingClassifier':
        """
        It loads a pretrained main model. On the given input,
        it takes the representations it generates on certain layer
        and learns a linear classifier on top of these frozen
        features.

        Parameters
        ----------
        pretrained_model_path : ``str``
            Serialization directory of the main model which you
            want to probe at one of the layers.
        layer_num : ``int``
            Layer number of the pretrained model on which to learn
            a linear classifier probe.
        classes_num : ``int``
            Number of classes that the ProbingClassifier chooses from.
        """
        super(ProbingClassifier, self).__init__()
        self._pretrained_model = load_pretrained_model(pretrained_model_path)
        self._pretrained_model.trainable = False
        self._layer_num = layer_num

        # TODO(students): start
        # Define a linear classifier layer for probing task at "layer_num'th" layer. It will have shape as
        # 'classes_num', which is the number of classes which our classifier has. In our case, for IMDB sentiment
        # analysis task, as well as Bigram Prediction task, classes_num = 2, as they are binary classifiers
        self._linear_classifier = tf.keras.layers.Dense(classes_num)
    def __init__(
        self,
        pretrained_model_path: str,
        layer_num: int,
    ) -> 'ProbingEncoderDecoder':
        """
                It loads a pretrained main model. On the given input,
                it takes the representations it generates on certain layer
                and learns a linear classifier on top of these frozen
                features.

                Parameters
                ----------
                pretrained_model_path : ``str``
                    Serialization directory of the main model which you
                    want to probe at one of the layers.
                layer_num : ``int``
                    Layer number of the pretrained model on which to learn
                    a linear classifier probe.
                    """
        super(ProbingEncoderDecoder, self).__init__()
        self._pretrained_model = load_pretrained_model(pretrained_model_path)
        self._pretrained_model.trainable = False
        self._layer_num = layer_num
    print("Reading training instances.")
    train_instances = read_instances(args.train_data_file_path, MAX_NUM_TOKENS)
    print("Reading validation instances.")
    validation_instances = read_instances(args.validation_data_file_path,
                                          MAX_NUM_TOKENS)

    if args.load_serialization_dir:
        print(f"Ignoring the model arguments and loading the "
              f"model from serialization_dir: {args.load_serialization_dir}")

        # Load Vocab
        vocab_path = os.path.join(args.load_serialization_dir, "vocab.txt")
        vocab_token_to_id, vocab_id_to_token = load_vocabulary(vocab_path)

        # Load Model
        classifier = load_pretrained_model(args.load_serialization_dir)
    else:
        # Build Vocabulary
        with open(GLOVE_COMMON_WORDS_PATH, encoding='utf8') as file:
            glove_common_words = [
                line.strip() for line in file.readlines() if line.strip()
            ]
        vocab_token_to_id, vocab_id_to_token = build_vocabulary(
            train_instances, VOCAB_SIZE, glove_common_words)

        # Build Config and Model
        if args.model_name == "main":
            config = {
                "seq2vec_choice": args.seq2vec_choice,
                "vocab_size": min(VOCAB_SIZE, len(vocab_token_to_id)),
                "embedding_dim": args.embedding_dim,
def main():
	global args
	args = parser.parse_args()
	print(args)

	if not os.path.exists(os.path.join(args.save_root,'checkpoint')):
		os.makedirs(os.path.join(args.save_root,'checkpoint'))

	if args.cuda:
		cudnn.benchmark = True

	print('----------- Network Initialization --------------')
	snet = define_tsnet(name=args.s_name, num_class=args.num_class, cuda=args.cuda)
	checkpoint = torch.load(args.s_init)
	load_pretrained_model(snet, checkpoint['net'])

	tnet = define_tsnet(name=args.t_name, num_class=args.num_class, cuda=args.cuda)
	checkpoint = torch.load(args.t_model)
	load_pretrained_model(tnet, checkpoint['net'])
	tnet.eval()
	for param in tnet.parameters():
		param.requires_grad = False
	print('-----------------------------------------------')

	# initialize optimizer
	optimizer = torch.optim.SGD(snet.parameters(),
								lr = args.lr, 
								momentum = args.momentum, 
								weight_decay = args.weight_decay,
								nesterov = True)

	# define loss functions
	if args.cuda:
		criterionCls    = torch.nn.CrossEntropyLoss().cuda()
		criterionFitnet = torch.nn.MSELoss().cuda()
	else:
		criterionCls    = torch.nn.CrossEntropyLoss()
		criterionFitnet = torch.nn.MSELoss()

	# define transforms
	if args.data_name == 'cifar10':
		dataset = dst.CIFAR10
		mean = (0.4914, 0.4822, 0.4465)
		std  = (0.2470, 0.2435, 0.2616)
	elif args.data_name == 'cifar100':
		dataset = dst.CIFAR100
		mean = (0.5071, 0.4865, 0.4409)
		std  = (0.2673, 0.2564, 0.2762)
	else:
		raise Exception('invalid dataset name...')

	train_transform = transforms.Compose([
			transforms.Pad(4, padding_mode='reflect'),
			transforms.RandomCrop(32),
			transforms.RandomHorizontalFlip(),
			transforms.ToTensor(),
			transforms.Normalize(mean=mean,std=std)
		])
	test_transform = transforms.Compose([
			transforms.CenterCrop(32),
			transforms.ToTensor(),
			transforms.Normalize(mean=mean,std=std)
		])

	# define data loader
	train_loader = torch.utils.data.DataLoader(
			dataset(root      = args.img_root,
					transform = train_transform,
					train     = True,
					download  = True),
			batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True)
	test_loader = torch.utils.data.DataLoader(
			dataset(root      = args.img_root,
					transform = test_transform,
					train     = False,
					download  = True),
			batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True)

	for epoch in range(1, args.epochs+1):
		epoch_start_time = time.time()

		adjust_lr(optimizer, epoch)

		# train one epoch
		nets = {'snet':snet, 'tnet':tnet}
		criterions = {'criterionCls':criterionCls, 'criterionFitnet':criterionFitnet}
		train(train_loader, nets, optimizer, criterions, epoch)
		epoch_time = time.time() - epoch_start_time
		print('one epoch time is {:02}h{:02}m{:02}s'.format(*transform_time(epoch_time)))

		# evaluate on testing set
		print('testing the models......')
		test_start_time = time.time()
		test(test_loader, nets, criterions)
		test_time = time.time() - test_start_time
		print('testing time is {:02}h{:02}m{:02}s'.format(*transform_time(test_time)))

		# save model
		print('saving models......')
		save_name = 'fitnet_r{}_r{}_{:>03}.ckp'.format(args.t_name[6:], args.s_name[6:], epoch)
		save_name = os.path.join(args.save_root, 'checkpoint', save_name)
		if epoch == 1:
			save_checkpoint({
				'epoch': epoch,
				'snet': snet.state_dict(),
				'tnet': tnet.state_dict(),
			}, save_name)
		else:
			save_checkpoint({
				'epoch': epoch,
				'snet': snet.state_dict(),
			}, save_name)
Beispiel #6
0
                        help="Location of output file")
    parser.add_argument('--batch-size',
                        type=int,
                        help="size of batch",
                        default=32)

    args = parser.parse_args()

    MAX_NUM_TOKENS = 250
    test_instances = read_instances(args.data_file_path,
                                    MAX_NUM_TOKENS,
                                    test=True)

    vocabulary_path = os.path.join(args.load_serialization_dir, "vocab.txt")
    vocab_token_to_id, _ = load_vocabulary(vocabulary_path)

    test_instances = index_instances(test_instances, vocab_token_to_id)

    # load config
    config_path = os.path.join(args.load_serialization_dir, "config.json")
    with open(config_path, 'r') as f:
        config = json.load(f)

    # load model
    model = load_pretrained_model(args.load_serialization_dir)

    predict(model, test_instances, args.batch_size, args.prediction_file)

    if args.prediction_file:
        print(f"predictions stored at: {args.prediction_file}")
                                   for path in [vocab_path, config_path, weights_path]])
        if not model_files_present:
            epochs = 8 if seq2vec_name == "dan" else 4 # gru is slow, use only 4 epochs
            training_command = (f"python train.py main "
                                f"data/imdb_sentiment_train_5k.jsonl "
                                f"data/imdb_sentiment_dev.jsonl "
                                f"--seq2vec-choice {seq2vec_name} "
                                f"--embedding-dim 50 "
                                f"--num-layers 4 "
                                f"--num-epochs {epochs} "
                                f"--suffix-name _{seq2vec_name}_5k_with_emb "
                                f"--pretrained-embedding-file data/glove.6B.50d.txt ")
            training_commands.append(training_command)
            continue

        model = load_pretrained_model(serialization_dir)
        models[seq2vec_name] = model

        vocab, _ = load_vocabulary(vocab_path)
        vocabs[seq2vec_name] = vocab

    if training_commands:
        print("\nFirst, please finish the missing model training using the following commands:")
        print("\n".join(training_commands))
        exit()


    original_instance = {"text_tokens": "the film performances were awesome".split()}
    updates = ["worst", "okay", "cool"]

    updated_instances = []