Esempio n. 1
0
def transform_dataset(dataset,
                      word2id,
                      class2id,
                      max_sens=40,
                      max_words=80,
                      padding=5):
    """Transform the dataset into digits"""
    train_set, test_set = dataset
    train_doc, train_pop_class, train_type_class = train_set
    test_doc, test_pop_class, test_type_class = test_set

    train_doc_ids = [
        split_doc2sen(doc, word2id, max_sens, max_words, padding)
        for doc in train_doc
    ]
    test_doc_ids = [
        split_doc2sen(doc, word2id, max_sens, max_words, padding)
        for doc in test_doc
    ]

    train_pop_y = [class2id["pop"][c] for c in train_pop_class]
    test_pop_y = [class2id["pop"][c] for c in test_pop_class]

    train_type_y = [class2id["type"][c] for c in train_type_class]
    test_type_y = [class2id["type"][c] for c in test_type_class]

    return [(train_doc_ids, train_pop_y, train_type_y),
            (test_doc_ids, test_pop_y, test_type_y)]
Esempio n. 2
0
def transform_dataset(dataset, word2id, class2id, max_sens=40, max_words=80, padding=5):
    """Transform the dataset into digits"""
    train_set, test_set = dataset
    train_doc, train_pop_class, train_type_class = train_set
    test_doc, test_pop_class, test_type_class = test_set
    
    train_doc_ids = [split_doc2sen(doc, word2id, max_sens, max_words, padding) for doc in train_doc]
    test_doc_ids = [split_doc2sen(doc, word2id, max_sens, max_words, padding) for doc in test_doc]

    train_pop_y = [class2id["pop"][c] for c in train_pop_class]
    test_pop_y = [class2id["pop"][c] for c in test_pop_class]
    
    train_type_y = [class2id["type"][c] for c in train_type_class]
    test_type_y = [class2id["type"][c] for c in test_type_class]

    return [(train_doc_ids, train_pop_y, train_type_y), (test_doc_ids, test_pop_y, test_type_y)]