Exemple #1
0
def make_dataset(language: Language, mapping: dict, num_positive: int,
                 num_negative: int):
    positive_strings = language.generate_batch_data(
        num_datapoints=num_positive, truth_value=True)
    negative_strings = language.generate_batch_data(
        num_datapoints=num_negative, truth_value=False)
    # TODO: fix language._max_length
    max_length = max(
        [len(string) for string in positive_strings + negative_strings])
    # print(max_length)
    positive_tensor, positive_lengths = strings_to_tensor(
        positive_strings, max_length, mapping)
    positive_labels = torch.zeros(len(positive_strings), dtype=torch.long)
    negative_tensor, negative_lengths = strings_to_tensor(
        negative_strings, max_length, mapping)
    negative_labels = torch.ones(len(negative_strings), dtype=torch.long)
    all_tensors = torch.cat((positive_tensor, negative_tensor))
    all_labels = torch.cat((positive_labels, negative_labels))
    all_lengths = torch.cat((positive_lengths, negative_lengths))
    return LanguageDataset(all_tensors, all_labels,
                           all_lengths)  # unshuffled, must be shuffled at use