def load_saved_state(self, args: Namespace, unparsed_args: List[str], metadata: Tuple[Tokenizer, Embedding, List[WordFeature], List[VecFeature]], state: NeuralPredictorState) -> None: self._tokenizer, self._embedding, \ self._word_feature_functions, self._vec_feature_functions= \ metadata self._model = maybe_cuda( self._get_model(args, self._embedding.num_tokens(), self._tokenizer.numTokens())) self._model.load_state_dict(state.weights) self.training_loss = state.loss self.num_epochs = state.epoch self.training_args = args self.unparsed_args = unparsed_args
def forward(self, stems_batch: torch.LongTensor, goals_encoded_batch: torch.FloatTensor, hyps_batch: torch.LongTensor) -> torch.FloatTensor: stems_var = maybe_cuda(Variable(stems_batch)) hyps_var = maybe_cuda(Variable(hyps_batch)) batch_size = stems_batch.size()[0] assert goals_encoded_batch.size()[0] == batch_size assert hyps_batch.size()[0] == batch_size, \ "batch_size: {}; hyps_batch_size()[0]: {}"\ .format(batch_size, hyps_batch.size()[0]) stem_encoded = self._stem_embedding(stems_var)\ .view(batch_size, self.hidden_size) initial_hidden = self._in_hidden(torch.cat( (stem_encoded, goals_encoded_batch), dim=1))\ .view(1, batch_size, self.hidden_size) hidden = initial_hidden for i in range(hyps_batch.size()[1]): token_batch = self._token_embedding(hyps_var[:, i])\ .view(1, batch_size, self.hidden_size) token_batch = F.relu(token_batch) token_out, hidden = self._hyp_gru(token_batch, hidden) return token_out.squeeze()
def forward(self, stem_batch : torch.LongTensor, goal_batch : torch.LongTensor) \ -> torch.FloatTensor: goal_var = maybe_cuda(Variable(goal_batch)) stem_var = maybe_cuda(Variable(stem_batch)) batch_size = goal_batch.size()[0] assert stem_batch.size()[0] == batch_size initial_hidden = self._stem_embedding(stem_var)\ .view(1, batch_size, self.hidden_size) hidden = initial_hidden copy_likelyhoods : List[torch.FloatTensor] = [] for i in range(goal_batch.size()[1]): try: token_batch = self._token_embedding(goal_var[:,i])\ .view(1, batch_size, self.hidden_size) token_batch2 = F.relu(token_batch) token_out, hidden = self._gru(token_batch2, hidden) copy_likelyhood = self._likelyhood_layer(F.relu(token_out)) copy_likelyhoods.append(copy_likelyhood[0]) except RuntimeError: eprint("Tokenized goal:") for j in range(goal_batch.size()[0]): eprint(goal_batch[j, i].item(), end=" ") assert goal_batch[j, i] < 123 eprint() eprint(f"goal_var: {goal_var}") eprint("Token batch") eprint(token_batch) raise end_token_embedded = self._token_embedding(LongTensor([EOS_token]) .expand(batch_size))\ .view(1, batch_size, self.hidden_size) final_out, final_hidden = self._gru(F.relu(end_token_embedded), hidden) final_likelyhood = self._likelyhood_layer(F.relu(final_out)) copy_likelyhoods.insert(0, final_likelyhood[0]) catted = torch.cat(copy_likelyhoods, dim=1) return catted
def forward(self, goal_batch : torch.LongTensor, hyp_batch : torch.LongTensor, vec_features_batch : torch.FloatTensor, word_features_batch : torch.LongTensor) -> torch.FloatTensor: goal_data = self._goal_encoder(goal_batch) hyp_data = self._hyp_encoder(hyp_batch) word_features_data = self._word_features_encoder(word_features_batch) catted_data = torch.cat((goal_data, hyp_data, word_features_data, maybe_cuda(vec_features_batch)), dim=1) full_data = self._layer(F.relu(catted_data)) full_data = self._out_layer(F.relu(full_data)) result = self._softmax(full_data) return result
def __init__(self, vec_features_size : int, word_feature_vocab_sizes : List[int], term_token_vocab_size : int, hidden_size : int, num_layers : int, tactic_vocab_size : int) -> None: super().__init__() self._goal_encoder = EncoderRNN(term_token_vocab_size, hidden_size, hidden_size) self._hyp_encoder = EncoderRNN(term_token_vocab_size, hidden_size, hidden_size) self._word_features_encoder = WordFeaturesEncoder(word_feature_vocab_sizes, hidden_size, num_layers-1, hidden_size) self._layer = nn.Linear(hidden_size * 3 + vec_features_size, hidden_size) self._out_layer = nn.Linear(hidden_size, tactic_vocab_size) self._softmax = maybe_cuda(nn.LogSoftmax(dim=1)) pass
def load_saved_state(self, args: argparse.Namespace, unparsed_args: List[str], state: FeaturesDNNEvaluatorState) -> None: picklable_tmap, neural_state = state self.features_token_map = tmap_from_picklable(picklable_tmap) word_features_vocab_sizes, vec_features_size = features_vocab_sizes( self.features_token_map) self._model = maybe_cuda( self._get_model(args, word_features_vocab_sizes, vec_features_size)) self._model.load_state_dict(neural_state.weights) self.training_loss = neural_state.loss self.num_epochs = neural_state.epoch self.training_args = args self.unparsed_args = unparsed_args
def load_saved_state(self, args : Namespace, unparsed_args : List[str], metadata : Any, state : NeuralPredictorState) -> None: model = maybe_cuda(self._get_model(args, get_word_feature_vocab_sizes(metadata), get_vec_features_size(metadata), get_num_indices(metadata), get_num_tokens(metadata))) model.load_state_dict(state.weights) self._model = model self.training_loss = state.loss self.num_epochs = state.epoch self.training_args = args self.unparsed_args = unparsed_args self._metadata = metadata
def forward(self, input_vec: torch.LongTensor) -> torch.FloatTensor: batch_size = input_vec.size()[0] word_embedded_features = [] for i in range(self.num_word_features): word_feature_var = maybe_cuda(Variable(input_vec[:, i])) embedded = getattr(self, "_word_embedding{}".format(i))(word_feature_var)\ .view(batch_size, self.hidden_size) word_embedded_features.append(embedded) word_embedded_features_vec = \ torch.cat(word_embedded_features, dim=1) vals = self._in_layer(word_embedded_features_vec) for i in range(self.num_layers - 1): vals = F.relu(vals) vals = getattr(self, "_layer{}".format(i))(vals) vals = F.relu(vals) result = self._out_layer(vals).view(batch_size, -1) return result
def train(self, samples: List[Tuple[TacticContext, str, float]], batch_size: Optional[int] = None, num_epochs: int = 1) -> None: for context, action, score in samples: assert score < 2000, score assert score != float("-Inf") and score != float( "Inf") and score == score self.optimizer.zero_grad() state_word_features, vec_features = zip( *[self._features(state) for state, _, _ in samples]) encoded_actions = [ self._encode_action(state, action) for state, action, _ in samples ] all_word_features = [ list(ea) + swf for ea, swf in zip(encoded_actions, state_word_features) ] expected_outputs = [output for _, _, output in samples] if batch_size: batches: Iterable[Sequence[torch.Tensor]] = data.DataLoader( data.TensorDataset(torch.LongTensor(all_word_features), torch.FloatTensor(vec_features), torch.FloatTensor(expected_outputs)), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) else: batches = [[ torch.LongTensor(all_word_features), torch.FloatTensor(vec_features), torch.FloatTensor(expected_outputs) ]] for epoch in range(0, num_epochs): for batch in batches: self.optimizer.zero_grad() word_features_batch, vec_features_batch, \ expected_outputs_batch = batch outputs = self.model(word_features_batch, vec_features_batch) loss = self.criterion(outputs, maybe_cuda(expected_outputs_batch)) loss.backward() self.optimizer.step()
def train(self, samples: List[Tuple[TacticContext, str, float, float]], batch_size: Optional[int] = None, num_epochs: int = 1, show_loss: bool = False) -> None: for context, action, certainty, score in samples: assert score != float("-Inf") and score != float( "Inf") and score == score input_tensors = list(self.get_input_tensors(samples)) expected_outputs = torch.FloatTensor( [output for _, _, certainty, output in samples]) all_tensors = input_tensors + [expected_outputs] if batch_size: batches: Sequence[Sequence[torch.Tensor]] = data.DataLoader( data.TensorDataset(*all_tensors), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) else: batches = [all_tensors] for epoch in range(0, num_epochs): epoch_loss = 0. for idx, batch in enumerate(batches): self.optimizer.zero_grad() word_features_batch, vec_features_batch, \ expected_outputs_batch = batch outputs = self.model(word_features_batch, vec_features_batch) loss = self.criterion(outputs, maybe_cuda(expected_outputs_batch)) loss.backward() self.optimizer.step() self.adjuster.step() self.total_batches += 1 epoch_loss += loss.item() eprint(epoch_loss / len(batches), guard=show_loss and epoch % 10 == 0 and idx == len(batches) - 1) eprint("Batch {}: Learning rate {:.12f}".format( self.total_batches, self.optimizer.param_groups[0]['lr']), guard=show_loss and epoch % 10 == 0 and idx == len(batches) - 1)
def checkpoints(self, inputs : List[List[float]], outputs : List[int]) \ -> Iterable[NeuralPredictorState]: print("Building tensors") dataloader = data.DataLoader(data.TensorDataset( torch.FloatTensor(inputs), torch.LongTensor(outputs)), batch_size=self.batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) num_batches = int(len(inputs) / self.batch_size) dataset_size = num_batches * self.batch_size print("Initializing model...") training_start = time.time() for epoch in range(1, self.num_epochs): self.adjuster.step() print("Epoch {} (learning rate {:.6f})".format( epoch, self._optimizer.param_groups[0]['lr'])) epoch_loss = 0. for batch_num, data_batch in enumerate(dataloader, start=1): self._optimizer.zero_grad() input_batch, output_batch = data_batch # with autograd.detect_anomaly(): predictionDistribution = self._model(input_batch) output_var = maybe_cuda(Variable(output_batch)) loss = self._criterion(predictionDistribution, output_var) loss.backward() self._optimizer.step() epoch_loss += loss.item() if batch_num % self.print_every == 0: items_processed = batch_num * self.batch_size + \ (epoch - 1) * dataset_size progress = items_processed / (dataset_size * self.num_epochs) print("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(training_start, progress), items_processed, progress * 100, epoch_loss / batch_num)) state = self._model.state_dict() loss = epoch_loss / num_batches checkpoint = NeuralPredictorState(epoch, loss, state) yield checkpoint
def predictKTacticsWithLoss_batch(self, in_data : List[TacticContext], k : int, corrects : List[str]) -> \ Tuple[List[List[Prediction]], float]: assert self._embedding assert self.training_args with self._lock: prediction_distributions = self._predictDistributions(in_data) correct_stems = [ serapi_instance.get_stem(correct) for correct in corrects ] output_var = maybe_cuda( Variable( LongTensor([ self._embedding.encode_token(correct_stem) if self._embedding.has_token(correct_stem) else 0 for correct_stem in correct_stems ]))) loss = self._criterion(prediction_distributions, output_var).item() if k > self._embedding.num_tokens(): k = self._embedding.num_tokens() certainties_and_idxs_list = \ [single_distribution.view(-1).topk(k) if len(context.hypotheses) > 0 else topk_with_filter(single_distribution.view(-1), k, lambda certainty, idx: not serapi_instance.tacticTakesHypArgs( cast(Embedding, self._embedding).decode_token(idx))) for single_distribution, context in zip(prediction_distributions, in_data)] results = [[ Prediction( self.add_arg(self._embedding.decode_token(stem_idx.item()), in_datum.goal, in_datum.hypotheses, self.training_args.max_length), math.exp(certainty.item())) for certainty, stem_idx in zip(*certainties_and_idxs) ] for certainties_and_idxs, in_datum in zip( certainties_and_idxs_list, in_data)] return results, loss
def predictKTacticsWithLoss( prediction_distribution: torch.FloatTensor, embedding: Embedding, k: int, correct: str, criterion: nn.Module) -> Tuple[List[Prediction], float]: if k > embedding.num_tokens(): k = embedding.num_tokens() correct_stem = get_stem(correct) if embedding.has_token(correct_stem): output_var = maybe_cuda( Variable(torch.LongTensor([embedding.encode_token(correct_stem)]))) loss = criterion(prediction_distribution.view(1, -1), output_var).item() else: loss = 0 certainties_and_idxs = prediction_distribution.view(-1).topk(k) results = [ Prediction( embedding.decode_token(stem_idx.item()) + ".", math.exp(certainty.item())) for certainty, stem_idx in zip(*certainties_and_idxs) ] return results, loss
def train(dataset: SequenceSequenceDataset, hidden_size: int, learning_rate: float, num_encoder_layers: int, num_decoder_layers: int, max_length: int, num_epochs: int, batch_size: int, print_every: int, context_vocab_size: int, tactic_vocab_size: int) -> Iterable[Checkpoint]: print("Initializing PyTorch...") in_stream = [inputFromSentence(datum[0], max_length) for datum in dataset] out_stream = [inputFromSentence(datum[1], max_length) for datum in dataset] data_loader = data.DataLoader(data.TensorDataset( torch.LongTensor(out_stream), torch.LongTensor(in_stream)), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) encoder = EncoderRNN(context_vocab_size, hidden_size, num_encoder_layers, batch_size=batch_size) decoder = DecoderRNN(hidden_size, tactic_vocab_size, num_decoder_layers, batch_size=batch_size) encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) optimizers = [encoder_optimizer, decoder_optimizer] criterion = maybe_cuda(nn.NLLLoss()) start = time.time() num_items = len(dataset) * num_epochs total_loss = 0 print("Training...") for epoch in range(num_epochs): print("Epoch {}".format(epoch)) adjustLearningRates(learning_rate, optimizers, epoch) for batch_num, (output_batch, input_batch) in enumerate(data_loader): target_length = output_batch.size()[1] encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() predictor_output = decoder.run_teach( encoder.run(cast(SomeLongTensor, input_batch)), cast(SomeLongTensor, output_batch)) loss = maybe_cuda(Variable(LongTensor(0))) output_var = maybe_cuda(Variable(output_batch)) for i in range(target_length): loss += criterion(predictor_output[i], output_var[:, i]) loss.backward() encoder_optimizer.step() decoder_optimizer.step() total_loss += (loss.data[0] / target_length) * batch_size if (batch_num + 1) % print_every == 0: items_processed = (batch_num + 1) * batch_size + epoch * len(dataset) progress = items_processed / num_items print("{} ({} {:.2f}%) {:.4f}".format( timeSince(start, progress), items_processed, progress * 100, total_loss / items_processed)) yield encoder.state_dict(), decoder.state_dict()
def predictionCertainty(self, context: TacticContext, prediction: str) -> float: assert self.training_args assert self._model num_stem_poss = get_num_tokens(self.metadata) stem_width = min(self.training_args.max_beam_width, num_stem_poss) tokenized_premises, hyp_features, \ nhyps_batch, tokenized_goal, \ goal_mask, \ word_features, vec_features = \ sample_fpa(extract_dataloader_args(self.training_args), self.metadata, context.relevant_lemmas, context.prev_tactics, context.hypotheses, context.goal) prediction_stem, prediction_args = \ serapi_instance.split_tactic(prediction) prediction_stem_idx = encode_fpa_stem(extract_dataloader_args(self.training_args), self.metadata, prediction_stem) stem_distributions = self._model.stem_classifier( maybe_cuda(torch.LongTensor(word_features)), maybe_cuda(torch.FloatTensor(vec_features))) stem_certainties, stem_idxs = stem_distributions.topk(stem_width) if prediction_stem_idx in stem_idxs[0]: merged_stem_idxs = stem_idxs merged_stem_certainties = stem_certainties else: merged_stem_idxs = torch.cat( (maybe_cuda(torch.LongTensor([[prediction_stem_idx]])), stem_idxs[:, :stem_width-1]), dim=1) cother = stem_certainties[:, :stem_width-1] val = stem_distributions[0][prediction_stem_idx] merged_stem_certainties = \ torch.cat((val.view(1, 1), cother),dim=1) prediction_stem_idx_idx = list(merged_stem_idxs[0]).index( prediction_stem_idx) prediction_arg_idx = encode_fpa_arg( extract_dataloader_args(self.training_args), self.metadata, context.hypotheses + context.relevant_lemmas, context.goal, prediction_args) goal_arg_values = self.goal_token_scores( merged_stem_idxs, tokenized_goal, goal_mask) if len(tokenized_premises[0]) > 0: hyp_arg_values = self.hyp_name_scores( merged_stem_idxs[0], tokenized_goal[0], tokenized_premises[0], hyp_features[0]) total_scores = torch.cat((goal_arg_values, hyp_arg_values), dim=2) else: total_scores = goal_arg_values final_probs, predicted_stem_idxs, predicted_arg_idxs = \ self.predict_args(total_scores, merged_stem_certainties, merged_stem_idxs) for prob, stem_idx_idx, arg_idx in zip(final_probs, predicted_stem_idxs, predicted_arg_idxs): if stem_idx_idx == prediction_stem_idx and \ arg_idx == prediction_arg_idx: return math.exp(prob.item()) assert False, "Shouldn't be able to get here"
def _optimize_checkpoints(self, encoded_data : RestrictedDatasetType, arg_values : Namespace, tactic_vocab_size : int, term_vocab_size : int) \ -> Iterable[NeuralPredictorState]: dataloader = data.DataLoader(data.TensorDataset( *(self._data_tensors(encoded_data, arg_values))), batch_size=arg_values.batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) # Drop the last batch in the count num_batches = int(len(encoded_data) / arg_values.batch_size) dataset_size = num_batches * arg_values.batch_size print("Initializing model...") if arg_values.start_from: print("Starting from file") with open(arg_values.start_from, 'rb') as f: state = torch.load(f) self.load_saved_state(*state) # type: ignore model = self._model epoch_start = state[2].epoch else: epoch_start = 1 model = maybe_cuda( self._get_model(arg_values, tactic_vocab_size, term_vocab_size)) optimizer = optimizers[arg_values.optimizer]( model.parameters(), lr=arg_values.learning_rate) adjuster = scheduler.StepLR(optimizer, arg_values.epoch_step, gamma=arg_values.gamma) training_start = time.time() print("Training...") for epoch in range(1, epoch_start): adjuster.step() for epoch in range(epoch_start, arg_values.num_epochs + 1): print("Epoch {} (learning rate {:.6f})".format( epoch, optimizer.param_groups[0]['lr'])) epoch_loss = 0. for batch_num, data_batch in enumerate(dataloader, start=1): optimizer.zero_grad() loss = self._getBatchPredictionLoss(data_batch, model) loss.backward() optimizer.step() epoch_loss += (loss.item() / num_batches) if batch_num % arg_values.print_every == 0: items_processed = batch_num * arg_values.batch_size + \ (epoch - 1) * len(encoded_data) progress = items_processed / (len(encoded_data) * arg_values.num_epochs) print("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(training_start, progress), items_processed, progress * 100, epoch_loss * (num_batches / batch_num))) adjuster.step() yield NeuralPredictorState(epoch, epoch_loss / num_batches, model.state_dict())
def supervised_q(args: argparse.Namespace) -> None: replay_memory = [] with open(args.tmp_file, 'r') as f: for idx, line in enumerate(tqdm(f, desc="Loading data")): replay_memory.append(LabeledTransition.from_dict(json.loads(line))) if args.max_tuples is not None: replay_memory = replay_memory[-args.max_tuples:] # Load the predictor predictor = cast( features_polyarg_predictor.FeaturesPolyargPredictor, predict_tactic.loadPredictorByFile(args.predictor_weights)) q_estimator: QEstimator # Create an initial Q Estimator if args.estimator == "polyarg": q_estimator = PolyargQEstimator(args.learning_rate, args.epoch_step, args.gamma, predictor) else: q_estimator = FeaturesQEstimator(args.learning_rate, args.epoch_step, args.gamma) if args.start_from: q_estimator_name, *saved = \ torch.load(args.start_from) if args.estimator == "polyarg": assert q_estimator_name == "polyarg evaluator", \ q_estimator_name else: assert q_estimator_name == "features evaluator", \ q_estimator_name q_estimator.load_saved_state(*saved) training_start = time.time() training_samples = assign_scores(args, q_estimator, predictor, replay_memory, progress=True) input_tensors = q_estimator.get_input_tensors(training_samples) rescore_lr = args.learning_rate for epoch in range(1, args.num_epochs + 1): scores = torch.FloatTensor( [score for _, _, _, score in training_samples]) batches: Sequence[Sequence[torch.Tensor]] = data.DataLoader( data.TensorDataset(*(input_tensors + [scores])), batch_size=args.batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) epoch_loss = 0. eprint("Epoch {}: Learning rate {:.12f}".format( epoch, q_estimator.optimizer.param_groups[0]['lr']), guard=args.show_loss) for idx, batch in enumerate(batches, start=1): q_estimator.optimizer.zero_grad() word_features_batch, vec_features_batch, \ expected_outputs_batch = batch outputs = q_estimator.model(word_features_batch, vec_features_batch) loss = q_estimator.criterion(outputs, maybe_cuda(expected_outputs_batch)) loss.backward() q_estimator.optimizer.step() q_estimator.total_batches += 1 epoch_loss += loss.item() if idx % args.print_every == 0: items_processed = idx * args.batch_size + \ (epoch - 1) * len(replay_memory) progress = items_processed / (len(replay_memory) * args.num_epochs) eprint("{} ({:7} {:5.2f}%) {:.4f}".format( timeSince(training_start, progress), items_processed, progress * 100, epoch_loss * (len(batches) / idx)), guard=args.show_loss) q_estimator.adjuster.step() q_estimator.save_weights(args.out_weights, args) if epoch % args.score_every == 0 and epoch < args.num_epochs: training_samples = assign_scores(args, q_estimator, predictor, replay_memory, progress=True) rescore_lr *= args.rescore_gamma q_estimator.optimizer.param_groups[0]['lr'] = rescore_lr pass pass
def forward(self, goal_batch: torch.LongTensor): goal_var = maybe_cuda(goal_batch) embedded_goals = self._token_embedding(goal_var) r_out, (h_n, h_c) = self._lstm(embedded_goals, None) scores = self._scorer(r_out[:, -1]).view(-1) return scores
def initHidden(self) -> SomeLongTensor: zeroes = cast(torch.LongTensor, maybe_cuda(torch.zeros(1, 1, self.hidden_size))) return Variable(zeroes)
def __init__(self) -> None: super().__init__() self._criterion = maybe_cuda(nn.NLLLoss()) self._lock = threading.Lock()
def __init__(self) -> None: self._criterion = maybe_cuda(nn.MSELoss())
def __init__(self, num_tactics: int, hidden_size: int, num_layers: int) -> None: super().__init__() self.num_tactics = num_tactics # self.embedding = maybe_cuda(nn.Embedding(num_tactics, hidden_size)) self.dnn = maybe_cuda(DNNScorer(num_tactics, hidden_size, num_layers))
def main( lsun_data_dir: ('Base directory for the LSUN data'), image_output_prefix: ('Prefix for image output', 'option', 'o')='glo', code_dim: ('Dimensionality of latent representation space', 'option', 'd', int)=128, epochs: ('Number of epochs to train', 'option', 'e', int)=25, use_cuda: ('Use GPU?', 'flag', 'gpu')=False, batch_size: ('Batch size', 'option', 'b', int)=128, lr_g: ('Learning rate for generator', 'option', None, float)=1., lr_z: ('Learning rate for representation_space', 'option', None, float)=10., max_num_samples: ('Cap on the number of samples from the LSUN dataset', 'option', 'n', int)=-1, init: ('Initialization strategy for latent represetation vectors', 'option', 'i', str, ['pca', 'random'])='pca', n_pca: ('Number of samples to take for PCA', 'option', None, int)=(64 * 64 * 3 * 2), loss: ('Loss type (Laplacian loss as in the paper, or L2 loss)', 'option', 'l', str, ['lap_l1', 'l2'])='lap_l1', ): a = time.time() train_set = util.IndexedDataset( LSUN(lsun_data_dir, classes=['bedroom_train'], transform=transforms.Compose([ transforms.Resize(64), transforms.CenterCrop(64), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) ) b = time.time() print("===train set===\n") print(b-a) a = time.time() train_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8, pin_memory=use_cuda, ) b = time.time() print("===train loader===\n") print(b - a) # we don't really have a validation set here, but for visualization let us # just take the first couple images from the dataset val_loader = torch.utils.data.DataLoader(train_set, shuffle=False, batch_size=8*8) if max_num_samples > 0: train_set.base.length = max_num_samples train_set.base.indices = [max_num_samples] # initialize representation space: if init == 'pca': from sklearn.decomposition import PCA a = time.time() # first, take a subset of train set to fit the PCA X_pca = np.vstack([ X.cpu().numpy().reshape(len(X), -1) for i, (X, _, _) in zip(tqdm(range(n_pca // train_loader.batch_size), 'collect data for PCA'), train_loader) ]) b = time.time() print("===pca loader===\n") print(b - a) print("perform PCA...") pca = PCA(n_components=code_dim) pca.fit(X_pca) # then, initialize latent vectors to the pca projections of the complete dataset a = time.time() Z = np.empty((len(train_loader.dataset), code_dim)) for X, _, idx in tqdm(train_loader, 'pca projection'): Z[idx] = pca.transform(X.cpu().numpy().reshape(len(X), -1)) b = time.time() print("===pca projection===\n") print(b - a) elif init == 'random': Z = np.random.randn(len(train_set), code_dim) Z = util.project_l2_ball(Z) g = util.maybe_cuda(generator.Generator(code_dim), use_cuda) loss_fn = laploss.LapLoss(max_levels=3) if loss == 'lap_l1' else nn.MSELoss() zi = util.maybe_cuda(torch.zeros((batch_size, code_dim)),use_cuda) zi = Variable(zi, requires_grad=True) optimizer = SGD([ {'params': g.parameters(), 'lr': lr_g}, {'params': zi, 'lr': lr_z} ]) Xi_val, _, idx_val = next(iter(val_loader)) util.imsave('target.png', make_grid(Xi_val.cpu() / 2. + 0.5, nrow=8).numpy().transpose(1, 2, 0)) for epoch in range(epochs): losses = [] progress = tqdm(total=len(train_loader), desc='epoch % 3d' % epoch) for i, (Xi, yi, idx) in enumerate(train_loader): a = time.time() Xi = Variable(util.maybe_cuda(Xi, use_cuda)) zi.data = util.maybe_cuda(torch.FloatTensor(Z[idx.numpy()]), use_cuda) optimizer.zero_grad() rec = g(zi) loss = loss_fn(rec, Xi) loss.backward() optimizer.step() Z[idx.numpy()] = util.project_l2_ball(zi.data.cpu().numpy()) losses.append(loss.data[0]) progress.set_postfix({'loss': np.mean(losses[-100:])}) progress.update() b = time.time() print("===1 data===\n") print(b - a) progress.close() # visualize reconstructions rec = g(Variable(util.maybe_cuda(torch.FloatTensor(Z[idx_val.numpy()]), use_cuda))) util.imsave('%s_rec_epoch_%03d.png' % (image_output_prefix, epoch), make_grid(rec.data.cpu() / 2. + 0.5, nrow=8).numpy().transpose(1, 2, 0))
def __init__(self, modelclassObject) -> None: self._criterion = maybe_cuda(nn.NLLLoss()) self._lock = threading.Lock() self._modelclassobject = modelclassObject
def initHidden(self) -> torch.FloatTensor: zeroes = cast(torch.FloatTensor, maybe_cuda( torch.zeros(1, self.batch_size, self.hidden_size))) return Variable(zeroes)
def _getBatchPredictionLoss( self, arg_values: Namespace, batch: Sequence[torch.Tensor], model: FeaturesPolyArgModel) -> torch.FloatTensor: tokenized_hyp_types_batch, hyp_features_batch, num_hyps_batch, \ tokenized_goals_batch, goal_masks_batch, \ word_features_batch, vec_features_batch, \ stem_idxs_batch, arg_total_idxs_batch = \ cast(Tuple[torch.LongTensor, torch.FloatTensor, torch.LongTensor, torch.LongTensor, torch.ByteTensor, torch.LongTensor, torch.FloatTensor, torch.LongTensor, torch.LongTensor], data_batch) batch_size = tokenized_goals_batch.size()[0] goal_size = tokenized_goals_batch.size()[1] stemDistributions = model.stem_classifier(word_features_batch, vec_features_batch) num_stem_poss = stemDistributions.size()[1] stem_width = min(arg_values.max_beam_width, num_stem_poss) stem_var = maybe_cuda(Variable(stem_idxs_batch)) predictedProbs, predictedStemIdxs = stemDistributions.topk(stem_width) mergedStemIdxs = [] for stem_idx, predictedStemIdxList in zip(stem_idxs_batch, predictedStemIdxs): if stem_idx.item() in predictedStemIdxList: mergedStemIdxs.append(predictedStemIdxList) else: mergedStemIdxs.append( torch.cat((maybe_cuda(stem_idx.view(1)), predictedStemIdxList[:stem_width - 1]))) mergedStemIdxsT = torch.stack(mergedStemIdxs) correctPredictionIdxs = torch.LongTensor([ list(idxList).index(stem_idx) for idxList, stem_idx in zip(mergedStemIdxs, stem_var) ]) if arg_values.hyp_rnn: tokenized_hyps_var = maybe_cuda( Variable(tokenized_hyp_types_batch)) else: tokenized_hyps_var = maybe_cuda( Variable(torch.zeros_like(tokenized_hyp_types_batch))) if arg_values.hyp_features: hyp_features_var = maybe_cuda(Variable(hyp_features_batch)) else: hyp_features_var = maybe_cuda( Variable(torch.zeros_like(hyp_features_batch))) goal_arg_values = model.goal_args_model( mergedStemIdxsT.view(batch_size * stem_width), tokenized_goals_batch.view(batch_size, 1, goal_size).expand(-1, stem_width, -1) .contiguous().view(batch_size * stem_width, goal_size))\ .view(batch_size, stem_width, goal_size + 1) goal_arg_values = torch.where( maybe_cuda( goal_masks_batch.view(batch_size, 1, arg_values.max_length + 1)).expand( -1, stem_width, -1), goal_arg_values, maybe_cuda(torch.full_like(goal_arg_values, -float("Inf")))) encoded_goals = model.goal_encoder(tokenized_goals_batch) hyp_lists_length = tokenized_hyp_types_batch.size()[1] hyp_length = tokenized_hyp_types_batch.size()[2] hyp_features_size = hyp_features_batch.size()[2] encoded_goal_size = encoded_goals.size()[1] encoded_goals_expanded = \ encoded_goals.view(batch_size, 1, 1, encoded_goal_size)\ .expand(-1, stem_width, hyp_lists_length, -1).contiguous()\ .view(batch_size * stem_width * hyp_lists_length, encoded_goal_size) if not arg_values.goal_rnn: encoded_goals_expanded = torch.zeros_like(encoded_goals_expanded) stems_expanded = \ mergedStemIdxsT.view(batch_size, stem_width, 1)\ .expand(-1, -1, hyp_lists_length).contiguous()\ .view(batch_size * stem_width * hyp_lists_length) hyp_arg_values_concatted = \ model.hyp_model(stems_expanded, encoded_goals_expanded, tokenized_hyps_var .view(batch_size, 1, hyp_lists_length, hyp_length) .expand(-1, stem_width, -1, -1).contiguous() .view(batch_size * stem_width * hyp_lists_length, hyp_length), hyp_features_var .view(batch_size, 1, hyp_lists_length, hyp_features_size) .expand(-1, stem_width, -1, -1).contiguous() .view(batch_size * stem_width * hyp_lists_length, hyp_features_size)) assert hyp_arg_values_concatted.size() == torch.Size( [batch_size * stem_width * hyp_lists_length, 1]), hyp_arg_values_concatted.size() hyp_arg_values = hyp_arg_values_concatted.view(batch_size, stem_width, hyp_lists_length) total_arg_values = torch.cat((goal_arg_values, hyp_arg_values), dim=2) num_probs = hyp_lists_length + goal_size + 1 total_arg_distribution = \ self._softmax(total_arg_values.view(batch_size, stem_width * num_probs)) total_arg_var = maybe_cuda(Variable(arg_total_idxs_batch + (correctPredictionIdxs * num_probs)))\ .view(batch_size) loss = FloatTensor([0.]) loss += self._criterion(stemDistributions, stem_var) loss += self._criterion(total_arg_distribution, total_arg_var) return loss
def train(dataset : List[Sentence], token_vocab_size : int, max_length : int, hidden_size : int, learning_rate : float, epoch_step : int, gamma : float, num_encoder_layers : int, num_decoder_layers : int, num_epochs : int, batch_size : int, print_every : int, optimizer_f : Callable[..., Optimizer]) \ -> Iterable[Checkpoint]: curtime = time.time() print("Building pytorch dataset...", end="") sys.stdout.flush() data_loader = data.DataLoader(data.TensorDataset( torch.LongTensor(dataset[:]), torch.LongTensor(dataset[:])), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=True, drop_last=True) print(" {:.2f}s".format(time.time() - curtime)) curtime = time.time() print("Initializing model...", end="") sys.stdout.flush() encoder = maybe_cuda( EncoderRNN(token_vocab_size, hidden_size, num_encoder_layers, batch_size=batch_size)) decoder = maybe_cuda( DecoderRNN(hidden_size, token_vocab_size, num_decoder_layers, batch_size=batch_size)) encoder_optimizer = optimizer_f(encoder.parameters(), lr=learning_rate) decoder_optimizer = optimizer_f(decoder.parameters(), lr=learning_rate) encoder_adjuster = scheduler.StepLR(encoder_optimizer, epoch_step, gamma) decoder_adjuster = scheduler.StepLR(decoder_optimizer, epoch_step, gamma) criterion = maybe_cuda(nn.NLLLoss()) print(" {:.2f}s".format(time.time() - curtime)) start = time.time() num_items = len(dataset) * num_epochs total_loss = 0 print("Training...") for epoch in range(num_epochs): print("Epoch {}".format(epoch)) # Adjust learning rates if needed encoder_adjuster.step() decoder_adjuster.step() # Process batches of data for batch_num, (input_batch, output_batch) in enumerate(data_loader): # Reset the optimizers encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() # Run the autoencoder decoded_output = \ decoder.run_teach( encoder.run(cast(torch.LongTensor, input_batch)), cast(torch.LongTensor, output_batch)) # Gather the losses loss = maybe_cuda(Variable(torch.zeros(1, dtype=torch.float32))) output_var = maybe_cuda(Variable(output_batch)) target_length = output_batch.size()[1] for i in range(target_length): loss += criterion(decoded_output[i], output_var[:, i]) total_loss += (loss.data.item() / target_length) * batch_size assert total_loss == total_loss assert isinstance(total_loss, float) # Update the weights loss.backward() encoder_optimizer.step() decoder_optimizer.step() # Print status every once in a while if (batch_num + 1) % print_every == 0: items_processed = (batch_num + 1) * batch_size + epoch * len(dataset) progress = items_processed / num_items print("{} ({} {:.2f}%) {:.4f}".format( timeSince(start, progress), items_processed, progress * 100, total_loss / items_processed)) yield Checkpoint(encoder_state=encoder.state_dict(), decoder_state=decoder.state_dict(), training_loss=total_loss) pass