Esempio n. 1
0
 def load_saved_state(self, args: Namespace, unparsed_args: List[str],
                      metadata: Tuple[Tokenizer, Embedding,
                                      List[WordFeature], List[VecFeature]],
                      state: NeuralPredictorState) -> None:
     self._tokenizer, self._embedding, \
         self._word_feature_functions, self._vec_feature_functions= \
             metadata
     self._model = maybe_cuda(
         self._get_model(args, self._embedding.num_tokens(),
                         self._tokenizer.numTokens()))
     self._model.load_state_dict(state.weights)
     self.training_loss = state.loss
     self.num_epochs = state.epoch
     self.training_args = args
     self.unparsed_args = unparsed_args
Esempio n. 2
0
    def forward(self,
                stems_batch: torch.LongTensor,
                goals_encoded_batch: torch.FloatTensor,
                hyps_batch: torch.LongTensor) -> torch.FloatTensor:
        stems_var = maybe_cuda(Variable(stems_batch))
        hyps_var = maybe_cuda(Variable(hyps_batch))
        batch_size = stems_batch.size()[0]
        assert goals_encoded_batch.size()[0] == batch_size
        assert hyps_batch.size()[0] == batch_size, \
            "batch_size: {}; hyps_batch_size()[0]: {}"\
            .format(batch_size, hyps_batch.size()[0])
        stem_encoded = self._stem_embedding(stems_var)\
                           .view(batch_size, self.hidden_size)
        initial_hidden = self._in_hidden(torch.cat(
            (stem_encoded, goals_encoded_batch), dim=1))\
            .view(1, batch_size, self.hidden_size)
        hidden = initial_hidden
        for i in range(hyps_batch.size()[1]):
            token_batch = self._token_embedding(hyps_var[:, i])\
                .view(1, batch_size, self.hidden_size)
            token_batch = F.relu(token_batch)
            token_out, hidden = self._hyp_gru(token_batch, hidden)

        return token_out.squeeze()
Esempio n. 3
0
 def forward(self, stem_batch : torch.LongTensor, goal_batch : torch.LongTensor) \
     -> torch.FloatTensor:
     goal_var = maybe_cuda(Variable(goal_batch))
     stem_var = maybe_cuda(Variable(stem_batch))
     batch_size = goal_batch.size()[0]
     assert stem_batch.size()[0] == batch_size
     initial_hidden = self._stem_embedding(stem_var)\
                          .view(1, batch_size, self.hidden_size)
     hidden = initial_hidden
     copy_likelyhoods : List[torch.FloatTensor] = []
     for i in range(goal_batch.size()[1]):
         try:
             token_batch = self._token_embedding(goal_var[:,i])\
                               .view(1, batch_size, self.hidden_size)
             token_batch2 = F.relu(token_batch)
             token_out, hidden = self._gru(token_batch2, hidden)
             copy_likelyhood = self._likelyhood_layer(F.relu(token_out))
             copy_likelyhoods.append(copy_likelyhood[0])
         except RuntimeError:
             eprint("Tokenized goal:")
             for j in range(goal_batch.size()[0]):
                 eprint(goal_batch[j, i].item(), end=" ")
                 assert goal_batch[j, i] < 123
             eprint()
             eprint(f"goal_var: {goal_var}")
             eprint("Token batch")
             eprint(token_batch)
             raise
     end_token_embedded = self._token_embedding(LongTensor([EOS_token])
                                                .expand(batch_size))\
                                                .view(1, batch_size, self.hidden_size)
     final_out, final_hidden = self._gru(F.relu(end_token_embedded), hidden)
     final_likelyhood = self._likelyhood_layer(F.relu(final_out))
     copy_likelyhoods.insert(0, final_likelyhood[0])
     catted = torch.cat(copy_likelyhoods, dim=1)
     return catted
Esempio n. 4
0
 def forward(self,
             goal_batch : torch.LongTensor,
             hyp_batch : torch.LongTensor,
             vec_features_batch : torch.FloatTensor,
             word_features_batch : torch.LongTensor) -> torch.FloatTensor:
     goal_data = self._goal_encoder(goal_batch)
     hyp_data = self._hyp_encoder(hyp_batch)
     word_features_data = self._word_features_encoder(word_features_batch)
     catted_data = torch.cat((goal_data, hyp_data, word_features_data,
                              maybe_cuda(vec_features_batch)),
                             dim=1)
     full_data = self._layer(F.relu(catted_data))
     full_data = self._out_layer(F.relu(full_data))
     result = self._softmax(full_data)
     return result
Esempio n. 5
0
 def __init__(self, vec_features_size : int,
              word_feature_vocab_sizes : List[int],
              term_token_vocab_size : int,
              hidden_size : int, num_layers : int,
              tactic_vocab_size : int) -> None:
     super().__init__()
     self._goal_encoder = EncoderRNN(term_token_vocab_size, hidden_size, hidden_size)
     self._hyp_encoder = EncoderRNN(term_token_vocab_size, hidden_size, hidden_size)
     self._word_features_encoder = WordFeaturesEncoder(word_feature_vocab_sizes,
                                                       hidden_size, num_layers-1,
                                                       hidden_size)
     self._layer = nn.Linear(hidden_size * 3 + vec_features_size, hidden_size)
     self._out_layer = nn.Linear(hidden_size, tactic_vocab_size)
     self._softmax = maybe_cuda(nn.LogSoftmax(dim=1))
     pass
Esempio n. 6
0
    def load_saved_state(self, args: argparse.Namespace,
                         unparsed_args: List[str],
                         state: FeaturesDNNEvaluatorState) -> None:
        picklable_tmap, neural_state = state
        self.features_token_map = tmap_from_picklable(picklable_tmap)
        word_features_vocab_sizes, vec_features_size = features_vocab_sizes(
            self.features_token_map)
        self._model = maybe_cuda(
            self._get_model(args, word_features_vocab_sizes,
                            vec_features_size))
        self._model.load_state_dict(neural_state.weights)

        self.training_loss = neural_state.loss
        self.num_epochs = neural_state.epoch
        self.training_args = args
        self.unparsed_args = unparsed_args
Esempio n. 7
0
 def load_saved_state(self,
                      args : Namespace,
                      unparsed_args : List[str],
                      metadata : Any,
                      state : NeuralPredictorState) -> None:
     model = maybe_cuda(self._get_model(args,
                                        get_word_feature_vocab_sizes(metadata),
                                        get_vec_features_size(metadata),
                                        get_num_indices(metadata),
                                        get_num_tokens(metadata)))
     model.load_state_dict(state.weights)
     self._model = model
     self.training_loss = state.loss
     self.num_epochs = state.epoch
     self.training_args = args
     self.unparsed_args = unparsed_args
     self._metadata = metadata
Esempio n. 8
0
 def forward(self, input_vec: torch.LongTensor) -> torch.FloatTensor:
     batch_size = input_vec.size()[0]
     word_embedded_features = []
     for i in range(self.num_word_features):
         word_feature_var = maybe_cuda(Variable(input_vec[:, i]))
         embedded = getattr(self, "_word_embedding{}".format(i))(word_feature_var)\
             .view(batch_size, self.hidden_size)
         word_embedded_features.append(embedded)
     word_embedded_features_vec = \
         torch.cat(word_embedded_features, dim=1)
     vals = self._in_layer(word_embedded_features_vec)
     for i in range(self.num_layers - 1):
         vals = F.relu(vals)
         vals = getattr(self, "_layer{}".format(i))(vals)
     vals = F.relu(vals)
     result = self._out_layer(vals).view(batch_size, -1)
     return result
 def train(self,
           samples: List[Tuple[TacticContext, str, float]],
           batch_size: Optional[int] = None,
           num_epochs: int = 1) -> None:
     for context, action, score in samples:
         assert score < 2000, score
         assert score != float("-Inf") and score != float(
             "Inf") and score == score
     self.optimizer.zero_grad()
     state_word_features, vec_features = zip(
         *[self._features(state) for state, _, _ in samples])
     encoded_actions = [
         self._encode_action(state, action) for state, action, _ in samples
     ]
     all_word_features = [
         list(ea) + swf
         for ea, swf in zip(encoded_actions, state_word_features)
     ]
     expected_outputs = [output for _, _, output in samples]
     if batch_size:
         batches: Iterable[Sequence[torch.Tensor]] = data.DataLoader(
             data.TensorDataset(torch.LongTensor(all_word_features),
                                torch.FloatTensor(vec_features),
                                torch.FloatTensor(expected_outputs)),
             batch_size=batch_size,
             num_workers=0,
             shuffle=True,
             pin_memory=True,
             drop_last=True)
     else:
         batches = [[
             torch.LongTensor(all_word_features),
             torch.FloatTensor(vec_features),
             torch.FloatTensor(expected_outputs)
         ]]
     for epoch in range(0, num_epochs):
         for batch in batches:
             self.optimizer.zero_grad()
             word_features_batch, vec_features_batch, \
                 expected_outputs_batch = batch
             outputs = self.model(word_features_batch, vec_features_batch)
             loss = self.criterion(outputs,
                                   maybe_cuda(expected_outputs_batch))
             loss.backward()
             self.optimizer.step()
 def train(self,
           samples: List[Tuple[TacticContext, str, float, float]],
           batch_size: Optional[int] = None,
           num_epochs: int = 1,
           show_loss: bool = False) -> None:
     for context, action, certainty, score in samples:
         assert score != float("-Inf") and score != float(
             "Inf") and score == score
     input_tensors = list(self.get_input_tensors(samples))
     expected_outputs = torch.FloatTensor(
         [output for _, _, certainty, output in samples])
     all_tensors = input_tensors + [expected_outputs]
     if batch_size:
         batches: Sequence[Sequence[torch.Tensor]] = data.DataLoader(
             data.TensorDataset(*all_tensors),
             batch_size=batch_size,
             num_workers=0,
             shuffle=True,
             pin_memory=True,
             drop_last=True)
     else:
         batches = [all_tensors]
     for epoch in range(0, num_epochs):
         epoch_loss = 0.
         for idx, batch in enumerate(batches):
             self.optimizer.zero_grad()
             word_features_batch, vec_features_batch, \
                 expected_outputs_batch = batch
             outputs = self.model(word_features_batch, vec_features_batch)
             loss = self.criterion(outputs,
                                   maybe_cuda(expected_outputs_batch))
             loss.backward()
             self.optimizer.step()
             self.adjuster.step()
             self.total_batches += 1
             epoch_loss += loss.item()
             eprint(epoch_loss / len(batches),
                    guard=show_loss and epoch % 10 == 0
                    and idx == len(batches) - 1)
             eprint("Batch {}: Learning rate {:.12f}".format(
                 self.total_batches, self.optimizer.param_groups[0]['lr']),
                    guard=show_loss and epoch % 10 == 0
                    and idx == len(batches) - 1)
Esempio n. 11
0
    def checkpoints(self, inputs : List[List[float]], outputs : List[int]) \
        -> Iterable[NeuralPredictorState]:
        print("Building tensors")
        dataloader = data.DataLoader(data.TensorDataset(
            torch.FloatTensor(inputs), torch.LongTensor(outputs)),
                                     batch_size=self.batch_size,
                                     num_workers=0,
                                     shuffle=True,
                                     pin_memory=True,
                                     drop_last=True)
        num_batches = int(len(inputs) / self.batch_size)
        dataset_size = num_batches * self.batch_size

        print("Initializing model...")
        training_start = time.time()
        for epoch in range(1, self.num_epochs):
            self.adjuster.step()
            print("Epoch {} (learning rate {:.6f})".format(
                epoch, self._optimizer.param_groups[0]['lr']))
            epoch_loss = 0.
            for batch_num, data_batch in enumerate(dataloader, start=1):
                self._optimizer.zero_grad()
                input_batch, output_batch = data_batch
                # with autograd.detect_anomaly():
                predictionDistribution = self._model(input_batch)
                output_var = maybe_cuda(Variable(output_batch))
                loss = self._criterion(predictionDistribution, output_var)
                loss.backward()
                self._optimizer.step()

                epoch_loss += loss.item()
                if batch_num % self.print_every == 0:
                    items_processed = batch_num * self.batch_size + \
                        (epoch - 1) * dataset_size
                    progress = items_processed / (dataset_size *
                                                  self.num_epochs)
                    print("{} ({:7} {:5.2f}%) {:.4f}".format(
                        timeSince(training_start, progress), items_processed,
                        progress * 100, epoch_loss / batch_num))
            state = self._model.state_dict()
            loss = epoch_loss / num_batches
            checkpoint = NeuralPredictorState(epoch, loss, state)
            yield checkpoint
Esempio n. 12
0
 def predictKTacticsWithLoss_batch(self,
                                   in_data : List[TacticContext],
                                   k : int, corrects : List[str]) -> \
                                   Tuple[List[List[Prediction]], float]:
     assert self._embedding
     assert self.training_args
     with self._lock:
         prediction_distributions = self._predictDistributions(in_data)
     correct_stems = [
         serapi_instance.get_stem(correct) for correct in corrects
     ]
     output_var = maybe_cuda(
         Variable(
             LongTensor([
                 self._embedding.encode_token(correct_stem)
                 if self._embedding.has_token(correct_stem) else 0
                 for correct_stem in correct_stems
             ])))
     loss = self._criterion(prediction_distributions, output_var).item()
     if k > self._embedding.num_tokens():
         k = self._embedding.num_tokens()
     certainties_and_idxs_list = \
         [single_distribution.view(-1).topk(k) if len(context.hypotheses) > 0 else
          topk_with_filter(single_distribution.view(-1), k,
                           lambda certainty, idx:
                           not serapi_instance.tacticTakesHypArgs(
                               cast(Embedding, self._embedding).decode_token(idx)))
          for single_distribution, context in
          zip(prediction_distributions, in_data)]
     results = [[
         Prediction(
             self.add_arg(self._embedding.decode_token(stem_idx.item()),
                          in_datum.goal, in_datum.hypotheses,
                          self.training_args.max_length),
             math.exp(certainty.item()))
         for certainty, stem_idx in zip(*certainties_and_idxs)
     ]
                for certainties_and_idxs, in_datum in zip(
                    certainties_and_idxs_list, in_data)]
     return results, loss
Esempio n. 13
0
def predictKTacticsWithLoss(
        prediction_distribution: torch.FloatTensor, embedding: Embedding,
        k: int, correct: str,
        criterion: nn.Module) -> Tuple[List[Prediction], float]:
    if k > embedding.num_tokens():
        k = embedding.num_tokens()
    correct_stem = get_stem(correct)
    if embedding.has_token(correct_stem):
        output_var = maybe_cuda(
            Variable(torch.LongTensor([embedding.encode_token(correct_stem)])))
        loss = criterion(prediction_distribution.view(1, -1),
                         output_var).item()
    else:
        loss = 0

    certainties_and_idxs = prediction_distribution.view(-1).topk(k)
    results = [
        Prediction(
            embedding.decode_token(stem_idx.item()) + ".",
            math.exp(certainty.item()))
        for certainty, stem_idx in zip(*certainties_and_idxs)
    ]

    return results, loss
Esempio n. 14
0
def train(dataset: SequenceSequenceDataset, hidden_size: int,
          learning_rate: float, num_encoder_layers: int,
          num_decoder_layers: int, max_length: int, num_epochs: int,
          batch_size: int, print_every: int, context_vocab_size: int,
          tactic_vocab_size: int) -> Iterable[Checkpoint]:
    print("Initializing PyTorch...")
    in_stream = [inputFromSentence(datum[0], max_length) for datum in dataset]
    out_stream = [inputFromSentence(datum[1], max_length) for datum in dataset]
    data_loader = data.DataLoader(data.TensorDataset(
        torch.LongTensor(out_stream), torch.LongTensor(in_stream)),
                                  batch_size=batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)

    encoder = EncoderRNN(context_vocab_size,
                         hidden_size,
                         num_encoder_layers,
                         batch_size=batch_size)
    decoder = DecoderRNN(hidden_size,
                         tactic_vocab_size,
                         num_decoder_layers,
                         batch_size=batch_size)
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    optimizers = [encoder_optimizer, decoder_optimizer]
    criterion = maybe_cuda(nn.NLLLoss())

    start = time.time()
    num_items = len(dataset) * num_epochs
    total_loss = 0

    print("Training...")
    for epoch in range(num_epochs):
        print("Epoch {}".format(epoch))
        adjustLearningRates(learning_rate, optimizers, epoch)
        for batch_num, (output_batch, input_batch) in enumerate(data_loader):
            target_length = output_batch.size()[1]

            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
            predictor_output = decoder.run_teach(
                encoder.run(cast(SomeLongTensor, input_batch)),
                cast(SomeLongTensor, output_batch))
            loss = maybe_cuda(Variable(LongTensor(0)))
            output_var = maybe_cuda(Variable(output_batch))
            for i in range(target_length):
                loss += criterion(predictor_output[i], output_var[:, i])
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()

            total_loss += (loss.data[0] / target_length) * batch_size

            if (batch_num + 1) % print_every == 0:
                items_processed = (batch_num +
                                   1) * batch_size + epoch * len(dataset)
                progress = items_processed / num_items
                print("{} ({} {:.2f}%) {:.4f}".format(
                    timeSince(start, progress), items_processed,
                    progress * 100, total_loss / items_processed))

        yield encoder.state_dict(), decoder.state_dict()
Esempio n. 15
0
    def predictionCertainty(self, context: TacticContext, prediction: str) -> float:

        assert self.training_args
        assert self._model

        num_stem_poss = get_num_tokens(self.metadata)
        stem_width = min(self.training_args.max_beam_width, num_stem_poss)

        tokenized_premises, hyp_features, \
            nhyps_batch, tokenized_goal, \
            goal_mask, \
            word_features, vec_features = \
            sample_fpa(extract_dataloader_args(self.training_args),
                       self.metadata,
                       context.relevant_lemmas,
                       context.prev_tactics,
                       context.hypotheses,
                       context.goal)

        prediction_stem, prediction_args = \
            serapi_instance.split_tactic(prediction)
        prediction_stem_idx = encode_fpa_stem(extract_dataloader_args(self.training_args),
                                              self.metadata, prediction_stem)
        stem_distributions = self._model.stem_classifier(
            maybe_cuda(torch.LongTensor(word_features)),
            maybe_cuda(torch.FloatTensor(vec_features)))
        stem_certainties, stem_idxs = stem_distributions.topk(stem_width)
        if prediction_stem_idx in stem_idxs[0]:
            merged_stem_idxs = stem_idxs
            merged_stem_certainties = stem_certainties
        else:
            merged_stem_idxs = torch.cat(
                (maybe_cuda(torch.LongTensor([[prediction_stem_idx]])),
                 stem_idxs[:, :stem_width-1]),
                dim=1)
            cother = stem_certainties[:, :stem_width-1]
            val = stem_distributions[0][prediction_stem_idx]
            merged_stem_certainties = \
                torch.cat((val.view(1, 1), cother),dim=1)

        prediction_stem_idx_idx = list(merged_stem_idxs[0]).index(
            prediction_stem_idx)
        prediction_arg_idx = encode_fpa_arg(
            extract_dataloader_args(self.training_args),
            self.metadata,
            context.hypotheses + context.relevant_lemmas,
            context.goal,
            prediction_args)

        goal_arg_values = self.goal_token_scores(
            merged_stem_idxs, tokenized_goal, goal_mask)

        if len(tokenized_premises[0]) > 0:
            hyp_arg_values = self.hyp_name_scores(
                merged_stem_idxs[0], tokenized_goal[0],
                tokenized_premises[0], hyp_features[0])

            total_scores = torch.cat((goal_arg_values, hyp_arg_values), dim=2)
        else:
            total_scores = goal_arg_values

        final_probs, predicted_stem_idxs, predicted_arg_idxs = \
            self.predict_args(total_scores, merged_stem_certainties,
                              merged_stem_idxs)

        for prob, stem_idx_idx, arg_idx in zip(final_probs,
                                               predicted_stem_idxs,
                                               predicted_arg_idxs):
            if stem_idx_idx == prediction_stem_idx and \
               arg_idx == prediction_arg_idx:
                return math.exp(prob.item())

        assert False, "Shouldn't be able to get here"
Esempio n. 16
0
    def _optimize_checkpoints(self, encoded_data : RestrictedDatasetType,
                              arg_values : Namespace,
                              tactic_vocab_size : int, term_vocab_size : int) \
        -> Iterable[NeuralPredictorState]:
        dataloader = data.DataLoader(data.TensorDataset(
            *(self._data_tensors(encoded_data, arg_values))),
                                     batch_size=arg_values.batch_size,
                                     num_workers=0,
                                     shuffle=True,
                                     pin_memory=True,
                                     drop_last=True)
        # Drop the last batch in the count
        num_batches = int(len(encoded_data) / arg_values.batch_size)
        dataset_size = num_batches * arg_values.batch_size

        print("Initializing model...")
        if arg_values.start_from:
            print("Starting from file")
            with open(arg_values.start_from, 'rb') as f:
                state = torch.load(f)
                self.load_saved_state(*state)  # type: ignore
            model = self._model
            epoch_start = state[2].epoch
        else:
            epoch_start = 1
            model = maybe_cuda(
                self._get_model(arg_values, tactic_vocab_size,
                                term_vocab_size))
        optimizer = optimizers[arg_values.optimizer](
            model.parameters(), lr=arg_values.learning_rate)
        adjuster = scheduler.StepLR(optimizer,
                                    arg_values.epoch_step,
                                    gamma=arg_values.gamma)

        training_start = time.time()

        print("Training...")
        for epoch in range(1, epoch_start):
            adjuster.step()
        for epoch in range(epoch_start, arg_values.num_epochs + 1):
            print("Epoch {} (learning rate {:.6f})".format(
                epoch, optimizer.param_groups[0]['lr']))

            epoch_loss = 0.

            for batch_num, data_batch in enumerate(dataloader, start=1):
                optimizer.zero_grad()
                loss = self._getBatchPredictionLoss(data_batch, model)
                loss.backward()
                optimizer.step()

                epoch_loss += (loss.item() / num_batches)

                if batch_num % arg_values.print_every == 0:
                    items_processed = batch_num * arg_values.batch_size + \
                        (epoch - 1) * len(encoded_data)
                    progress = items_processed / (len(encoded_data) *
                                                  arg_values.num_epochs)
                    print("{} ({:7} {:5.2f}%) {:.4f}".format(
                        timeSince(training_start, progress),
                        items_processed, progress * 100,
                        epoch_loss * (num_batches / batch_num)))
            adjuster.step()
            yield NeuralPredictorState(epoch, epoch_loss / num_batches,
                                       model.state_dict())
Esempio n. 17
0
def supervised_q(args: argparse.Namespace) -> None:
    replay_memory = []
    with open(args.tmp_file, 'r') as f:
        for idx, line in enumerate(tqdm(f, desc="Loading data")):
            replay_memory.append(LabeledTransition.from_dict(json.loads(line)))
    if args.max_tuples is not None:
        replay_memory = replay_memory[-args.max_tuples:]

    # Load the predictor
    predictor = cast(
        features_polyarg_predictor.FeaturesPolyargPredictor,
        predict_tactic.loadPredictorByFile(args.predictor_weights))

    q_estimator: QEstimator
    # Create an initial Q Estimator
    if args.estimator == "polyarg":
        q_estimator = PolyargQEstimator(args.learning_rate, args.epoch_step,
                                        args.gamma, predictor)
    else:
        q_estimator = FeaturesQEstimator(args.learning_rate, args.epoch_step,
                                         args.gamma)
    if args.start_from:
        q_estimator_name, *saved = \
          torch.load(args.start_from)
        if args.estimator == "polyarg":
            assert q_estimator_name == "polyarg evaluator", \
                q_estimator_name
        else:
            assert q_estimator_name == "features evaluator", \
                q_estimator_name
        q_estimator.load_saved_state(*saved)

    training_start = time.time()
    training_samples = assign_scores(args,
                                     q_estimator,
                                     predictor,
                                     replay_memory,
                                     progress=True)
    input_tensors = q_estimator.get_input_tensors(training_samples)
    rescore_lr = args.learning_rate

    for epoch in range(1, args.num_epochs + 1):
        scores = torch.FloatTensor(
            [score for _, _, _, score in training_samples])
        batches: Sequence[Sequence[torch.Tensor]] = data.DataLoader(
            data.TensorDataset(*(input_tensors + [scores])),
            batch_size=args.batch_size,
            num_workers=0,
            shuffle=True,
            pin_memory=True,
            drop_last=True)

        epoch_loss = 0.
        eprint("Epoch {}: Learning rate {:.12f}".format(
            epoch, q_estimator.optimizer.param_groups[0]['lr']),
               guard=args.show_loss)
        for idx, batch in enumerate(batches, start=1):
            q_estimator.optimizer.zero_grad()
            word_features_batch, vec_features_batch, \
                expected_outputs_batch = batch
            outputs = q_estimator.model(word_features_batch,
                                        vec_features_batch)
            loss = q_estimator.criterion(outputs,
                                         maybe_cuda(expected_outputs_batch))
            loss.backward()
            q_estimator.optimizer.step()
            q_estimator.total_batches += 1
            epoch_loss += loss.item()
            if idx % args.print_every == 0:
                items_processed = idx * args.batch_size + \
                    (epoch - 1) * len(replay_memory)
                progress = items_processed / (len(replay_memory) *
                                              args.num_epochs)
                eprint("{} ({:7} {:5.2f}%) {:.4f}".format(
                    timeSince(training_start, progress), items_processed,
                    progress * 100, epoch_loss * (len(batches) / idx)),
                       guard=args.show_loss)
        q_estimator.adjuster.step()

        q_estimator.save_weights(args.out_weights, args)
        if epoch % args.score_every == 0 and epoch < args.num_epochs:
            training_samples = assign_scores(args,
                                             q_estimator,
                                             predictor,
                                             replay_memory,
                                             progress=True)
            rescore_lr *= args.rescore_gamma
            q_estimator.optimizer.param_groups[0]['lr'] = rescore_lr

        pass

    pass
Esempio n. 18
0
 def forward(self, goal_batch: torch.LongTensor):
     goal_var = maybe_cuda(goal_batch)
     embedded_goals = self._token_embedding(goal_var)
     r_out, (h_n, h_c) = self._lstm(embedded_goals, None)
     scores = self._scorer(r_out[:, -1]).view(-1)
     return scores
Esempio n. 19
0
 def initHidden(self) -> SomeLongTensor:
     zeroes = cast(torch.LongTensor, maybe_cuda(torch.zeros(1, 1, self.hidden_size)))
     return Variable(zeroes)
Esempio n. 20
0
 def __init__(self) -> None:
     super().__init__()
     self._criterion = maybe_cuda(nn.NLLLoss())
     self._lock = threading.Lock()
 def __init__(self) -> None:
     self._criterion = maybe_cuda(nn.MSELoss())
 def __init__(self, num_tactics: int, hidden_size: int,
              num_layers: int) -> None:
     super().__init__()
     self.num_tactics = num_tactics
     # self.embedding = maybe_cuda(nn.Embedding(num_tactics, hidden_size))
     self.dnn = maybe_cuda(DNNScorer(num_tactics, hidden_size, num_layers))
Esempio n. 23
0
def main(
        lsun_data_dir: ('Base directory for the LSUN data'),
        image_output_prefix: ('Prefix for image output',
                              'option', 'o')='glo',
        code_dim: ('Dimensionality of latent representation space',
                   'option', 'd', int)=128,
        epochs: ('Number of epochs to train',
                 'option', 'e', int)=25,
        use_cuda: ('Use GPU?',
                   'flag', 'gpu')=False,
        batch_size: ('Batch size',
                     'option', 'b', int)=128,
        lr_g: ('Learning rate for generator',
               'option', None, float)=1.,
        lr_z: ('Learning rate for representation_space',
               'option', None, float)=10.,
        max_num_samples: ('Cap on the number of samples from the LSUN dataset',
                          'option', 'n', int)=-1,
        init: ('Initialization strategy for latent represetation vectors',
               'option', 'i', str, ['pca', 'random'])='pca',
        n_pca: ('Number of samples to take for PCA',
                'option', None, int)=(64 * 64 * 3 * 2),
        loss: ('Loss type (Laplacian loss as in the paper, or L2 loss)',
               'option', 'l', str, ['lap_l1', 'l2'])='lap_l1',
):
    a = time.time()
    train_set = util.IndexedDataset(
        LSUN(lsun_data_dir, classes=['bedroom_train'], 
             transform=transforms.Compose([

                 transforms.Resize(64),
                 transforms.CenterCrop(64),
                 transforms.ToTensor(),
                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
             ]))
    )
    b = time.time()
    print("===train set===\n")
    print(b-a)

    a = time.time()
    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=batch_size, 
        shuffle=True, drop_last=True,
        num_workers=8, pin_memory=use_cuda,
    )
    b = time.time()
    print("===train loader===\n")
    print(b - a)
    # we don't really have a validation set here, but for visualization let us 
    # just take the first couple images from the dataset
    val_loader = torch.utils.data.DataLoader(train_set, shuffle=False, batch_size=8*8)

    if max_num_samples > 0:
        train_set.base.length = max_num_samples
        train_set.base.indices = [max_num_samples]

    # initialize representation space:
    if init == 'pca':
        from sklearn.decomposition import PCA
        a = time.time()
        # first, take a subset of train set to fit the PCA
        X_pca = np.vstack([
            X.cpu().numpy().reshape(len(X), -1)
            for i, (X, _, _)
             in zip(tqdm(range(n_pca // train_loader.batch_size), 'collect data for PCA'), 
                    train_loader)
        ])
        b = time.time()
        print("===pca loader===\n")
        print(b - a)
        print("perform PCA...")
        pca = PCA(n_components=code_dim)
        pca.fit(X_pca)
        # then, initialize latent vectors to the pca projections of the complete dataset
        a = time.time()
        Z = np.empty((len(train_loader.dataset), code_dim))
        for X, _, idx in tqdm(train_loader, 'pca projection'):
            Z[idx] = pca.transform(X.cpu().numpy().reshape(len(X), -1))
        b = time.time()
        print("===pca projection===\n")
        print(b - a)
    elif init == 'random':
        Z = np.random.randn(len(train_set), code_dim)

    Z = util.project_l2_ball(Z)

    g = util.maybe_cuda(generator.Generator(code_dim), use_cuda)
    loss_fn = laploss.LapLoss(max_levels=3) if loss == 'lap_l1' else nn.MSELoss()
    zi = util.maybe_cuda(torch.zeros((batch_size, code_dim)),use_cuda)
    zi = Variable(zi, requires_grad=True)
    optimizer = SGD([
        {'params': g.parameters(), 'lr': lr_g}, 
        {'params': zi, 'lr': lr_z}
    ])

    Xi_val, _, idx_val = next(iter(val_loader))
    util.imsave('target.png',
           make_grid(Xi_val.cpu() / 2. + 0.5, nrow=8).numpy().transpose(1, 2, 0))

    for epoch in range(epochs):
        losses = []
        progress = tqdm(total=len(train_loader), desc='epoch % 3d' % epoch)

        for i, (Xi, yi, idx) in enumerate(train_loader):
            a = time.time()
            Xi = Variable(util.maybe_cuda(Xi, use_cuda))
            zi.data = util.maybe_cuda(torch.FloatTensor(Z[idx.numpy()]), use_cuda)

            optimizer.zero_grad()
            rec = g(zi)
            loss = loss_fn(rec, Xi)
            loss.backward()
            optimizer.step()

            Z[idx.numpy()] = util.project_l2_ball(zi.data.cpu().numpy())

            losses.append(loss.data[0])
            progress.set_postfix({'loss': np.mean(losses[-100:])})
            progress.update()
            b = time.time()
            print("===1 data===\n")
            print(b - a)
        progress.close()

        # visualize reconstructions
        rec = g(Variable(util.maybe_cuda(torch.FloatTensor(Z[idx_val.numpy()]), use_cuda)))
        util.imsave('%s_rec_epoch_%03d.png' % (image_output_prefix, epoch),
               make_grid(rec.data.cpu() / 2. + 0.5, nrow=8).numpy().transpose(1, 2, 0))
Esempio n. 24
0
 def __init__(self, modelclassObject) -> None:
     self._criterion = maybe_cuda(nn.NLLLoss())
     self._lock = threading.Lock()
     self._modelclassobject = modelclassObject
Esempio n. 25
0
 def initHidden(self) -> torch.FloatTensor:
     zeroes = cast(torch.FloatTensor, maybe_cuda(
         torch.zeros(1, self.batch_size, self.hidden_size)))
     return Variable(zeroes)
Esempio n. 26
0
    def _getBatchPredictionLoss(
            self, arg_values: Namespace, batch: Sequence[torch.Tensor],
            model: FeaturesPolyArgModel) -> torch.FloatTensor:
        tokenized_hyp_types_batch, hyp_features_batch, num_hyps_batch, \
            tokenized_goals_batch, goal_masks_batch, \
            word_features_batch, vec_features_batch, \
            stem_idxs_batch, arg_total_idxs_batch = \
                cast(Tuple[torch.LongTensor, torch.FloatTensor, torch.LongTensor,
                           torch.LongTensor, torch.ByteTensor,
                           torch.LongTensor, torch.FloatTensor,
                           torch.LongTensor, torch.LongTensor],
                     data_batch)
        batch_size = tokenized_goals_batch.size()[0]
        goal_size = tokenized_goals_batch.size()[1]
        stemDistributions = model.stem_classifier(word_features_batch,
                                                  vec_features_batch)
        num_stem_poss = stemDistributions.size()[1]
        stem_width = min(arg_values.max_beam_width, num_stem_poss)
        stem_var = maybe_cuda(Variable(stem_idxs_batch))
        predictedProbs, predictedStemIdxs = stemDistributions.topk(stem_width)
        mergedStemIdxs = []
        for stem_idx, predictedStemIdxList in zip(stem_idxs_batch,
                                                  predictedStemIdxs):
            if stem_idx.item() in predictedStemIdxList:
                mergedStemIdxs.append(predictedStemIdxList)
            else:
                mergedStemIdxs.append(
                    torch.cat((maybe_cuda(stem_idx.view(1)),
                               predictedStemIdxList[:stem_width - 1])))
        mergedStemIdxsT = torch.stack(mergedStemIdxs)
        correctPredictionIdxs = torch.LongTensor([
            list(idxList).index(stem_idx)
            for idxList, stem_idx in zip(mergedStemIdxs, stem_var)
        ])
        if arg_values.hyp_rnn:
            tokenized_hyps_var = maybe_cuda(
                Variable(tokenized_hyp_types_batch))
        else:
            tokenized_hyps_var = maybe_cuda(
                Variable(torch.zeros_like(tokenized_hyp_types_batch)))

        if arg_values.hyp_features:
            hyp_features_var = maybe_cuda(Variable(hyp_features_batch))
        else:
            hyp_features_var = maybe_cuda(
                Variable(torch.zeros_like(hyp_features_batch)))

        goal_arg_values = model.goal_args_model(
            mergedStemIdxsT.view(batch_size * stem_width),
            tokenized_goals_batch.view(batch_size, 1, goal_size).expand(-1, stem_width, -1)
            .contiguous().view(batch_size * stem_width, goal_size))\
            .view(batch_size, stem_width, goal_size + 1)
        goal_arg_values = torch.where(
            maybe_cuda(
                goal_masks_batch.view(batch_size, 1,
                                      arg_values.max_length + 1)).expand(
                                          -1, stem_width, -1), goal_arg_values,
            maybe_cuda(torch.full_like(goal_arg_values, -float("Inf"))))
        encoded_goals = model.goal_encoder(tokenized_goals_batch)

        hyp_lists_length = tokenized_hyp_types_batch.size()[1]
        hyp_length = tokenized_hyp_types_batch.size()[2]
        hyp_features_size = hyp_features_batch.size()[2]
        encoded_goal_size = encoded_goals.size()[1]

        encoded_goals_expanded = \
            encoded_goals.view(batch_size, 1, 1, encoded_goal_size)\
            .expand(-1, stem_width, hyp_lists_length, -1).contiguous()\
            .view(batch_size * stem_width * hyp_lists_length, encoded_goal_size)
        if not arg_values.goal_rnn:
            encoded_goals_expanded = torch.zeros_like(encoded_goals_expanded)
        stems_expanded = \
            mergedStemIdxsT.view(batch_size, stem_width, 1)\
            .expand(-1, -1, hyp_lists_length).contiguous()\
            .view(batch_size * stem_width * hyp_lists_length)
        hyp_arg_values_concatted = \
            model.hyp_model(stems_expanded,
                            encoded_goals_expanded,
                            tokenized_hyps_var
                            .view(batch_size, 1, hyp_lists_length, hyp_length)
                            .expand(-1, stem_width, -1, -1).contiguous()
                            .view(batch_size * stem_width * hyp_lists_length,
                                  hyp_length),
                            hyp_features_var
                            .view(batch_size, 1, hyp_lists_length, hyp_features_size)
                            .expand(-1, stem_width, -1, -1).contiguous()
                            .view(batch_size * stem_width * hyp_lists_length,
                                  hyp_features_size))
        assert hyp_arg_values_concatted.size() == torch.Size(
            [batch_size * stem_width * hyp_lists_length,
             1]), hyp_arg_values_concatted.size()
        hyp_arg_values = hyp_arg_values_concatted.view(batch_size, stem_width,
                                                       hyp_lists_length)
        total_arg_values = torch.cat((goal_arg_values, hyp_arg_values), dim=2)
        num_probs = hyp_lists_length + goal_size + 1
        total_arg_distribution = \
            self._softmax(total_arg_values.view(batch_size, stem_width * num_probs))
        total_arg_var = maybe_cuda(Variable(arg_total_idxs_batch +
                                            (correctPredictionIdxs * num_probs)))\
                                            .view(batch_size)
        loss = FloatTensor([0.])
        loss += self._criterion(stemDistributions, stem_var)
        loss += self._criterion(total_arg_distribution, total_arg_var)
        return loss
Esempio n. 27
0
def train(dataset : List[Sentence],
          token_vocab_size : int, max_length : int, hidden_size : int,
          learning_rate : float, epoch_step : int, gamma : float,
          num_encoder_layers : int, num_decoder_layers : int,
          num_epochs : int, batch_size : int, print_every : int,
          optimizer_f : Callable[..., Optimizer]) \
          -> Iterable[Checkpoint]:
    curtime = time.time()
    print("Building pytorch dataset...", end="")
    sys.stdout.flush()
    data_loader = data.DataLoader(data.TensorDataset(
        torch.LongTensor(dataset[:]), torch.LongTensor(dataset[:])),
                                  batch_size=batch_size,
                                  num_workers=0,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)
    print(" {:.2f}s".format(time.time() - curtime))

    curtime = time.time()
    print("Initializing model...", end="")
    sys.stdout.flush()
    encoder = maybe_cuda(
        EncoderRNN(token_vocab_size,
                   hidden_size,
                   num_encoder_layers,
                   batch_size=batch_size))
    decoder = maybe_cuda(
        DecoderRNN(hidden_size,
                   token_vocab_size,
                   num_decoder_layers,
                   batch_size=batch_size))
    encoder_optimizer = optimizer_f(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optimizer_f(decoder.parameters(), lr=learning_rate)
    encoder_adjuster = scheduler.StepLR(encoder_optimizer, epoch_step, gamma)
    decoder_adjuster = scheduler.StepLR(decoder_optimizer, epoch_step, gamma)
    criterion = maybe_cuda(nn.NLLLoss())
    print(" {:.2f}s".format(time.time() - curtime))

    start = time.time()
    num_items = len(dataset) * num_epochs
    total_loss = 0

    print("Training...")
    for epoch in range(num_epochs):
        print("Epoch {}".format(epoch))
        # Adjust learning rates if needed
        encoder_adjuster.step()
        decoder_adjuster.step()

        # Process batches of data
        for batch_num, (input_batch, output_batch) in enumerate(data_loader):
            # Reset the optimizers
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            # Run the autoencoder
            decoded_output = \
                decoder.run_teach(
                    encoder.run(cast(torch.LongTensor, input_batch)),
                    cast(torch.LongTensor, output_batch))

            # Gather the losses
            loss = maybe_cuda(Variable(torch.zeros(1, dtype=torch.float32)))
            output_var = maybe_cuda(Variable(output_batch))
            target_length = output_batch.size()[1]
            for i in range(target_length):
                loss += criterion(decoded_output[i], output_var[:, i])
            total_loss += (loss.data.item() / target_length) * batch_size
            assert total_loss == total_loss
            assert isinstance(total_loss, float)

            # Update the weights
            loss.backward()
            encoder_optimizer.step()
            decoder_optimizer.step()

            # Print status every once in a while
            if (batch_num + 1) % print_every == 0:
                items_processed = (batch_num +
                                   1) * batch_size + epoch * len(dataset)
                progress = items_processed / num_items
                print("{} ({} {:.2f}%) {:.4f}".format(
                    timeSince(start, progress), items_processed,
                    progress * 100, total_loss / items_processed))

        yield Checkpoint(encoder_state=encoder.state_dict(),
                         decoder_state=decoder.state_dict(),
                         training_loss=total_loss)
    pass