def generate_star_detect_att_weights(r1, r2, padding_size, embedding_size,
                                     embedding_source, learning_rate, epoch):
    _, _, test_ids = load_train_val_test_subsets(r1, r2)
    vocab = load_vocabulary(r1, r2)
    embedding_weights = load_embedding_weights(vocab, embedding_size,
                                               embedding_source, r1, r2)
    model = StarDetect(r1, r2, embedding_source[0])
    model.build(padding_size, len(vocab), embedding_size, embedding_weights,
                True)
    model.compile(learning_rate)
    model.load_weights(f'StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}')
    # model.summary()
    w_to_i, i_to_w = load_word_mappings(vocab, r1, r2)
    test_sequences, test_labels = load_sequences(r1, r2, test_ids, w_to_i,
                                                 padding_size, False)
    test_sequences, test_labels, test_ids = balance_dataset(
        test_sequences, test_labels, 'star', test_ids)
    print(
        f'Generating StarDetect model attention weights with reviews in range {r1}-{r2} with {embedding_source} '
        f'embedding vectors ...')
    predictions_file = f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_att_predictions.pkl'
    predicted_weights = generate_predictions(
        model, pad_sequences(test_sequences, padding_size), predictions_file)
    word_weights = map_word_with_weights(predicted_weights, test_sequences,
                                         test_ids, i_to_w)
    with open(
            f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_word_weights.pkl',
            'wb') as doc_w:
        pickle.dump(word_weights, doc_w)
Example #2
0
def train_star_detect(r1, r2, padding_size, embedding_size, embedding_source, learning_rate, batch_size, num_epochs):
    train_ids, val_ids, _ = load_train_val_test_subsets(r1, r2)
    vocab = load_vocabulary(r1, r2)
    embedding_weights = load_embedding_weights(vocab, embedding_size, embedding_source, r1, r2)
    model = StarDetect(r1, r2, embedding_source[0])
    model.build(padding_size, len(vocab), embedding_size, embedding_weights)
    model.compile(learning_rate)
    model.summary()
    w_to_i, _ = load_word_mappings(vocab, r1, r2)
    train_sequences, train_labels = load_sequences(r1, r2, train_ids, w_to_i, padding_size)
    train_sequences, train_labels, train_ids = balance_dataset(train_sequences, train_labels, 'star', train_ids)
    val_sequences, val_labels = load_sequences(r1, r2, val_ids, w_to_i, padding_size)
    val_sequences, val_labels, val_ids = balance_dataset(val_sequences, val_labels, 'star', val_ids)
    train_data_generator = star_detect_data_generator(train_sequences, train_labels, batch_size)
    val_data_generator = star_detect_data_generator(val_sequences, val_labels, batch_size)
    print(f'Training StarDetect model with reviews in range {r1}-{r2} with {embedding_source} embedding vectors ...')
    start_time = time.time()
    steps_per_epoch = len(train_sequences) // batch_size
    model.train(num_epochs, steps_per_epoch, train_data_generator, val_data_generator)
    end_time = time.time()
    print(f'Training took {end_time - start_time} seconds')
def evaluate_star_detect(r1, r2, padding_size, embedding_size,
                         embedding_source, learning_rate, epoch):
    _, _, test_ids = load_train_val_test_subsets(r1, r2)
    vocab = load_vocabulary(r1, r2)
    embedding_weights = load_embedding_weights(vocab, embedding_size,
                                               embedding_source, r1, r2)
    model = StarDetect(r1, r2, embedding_source[0])
    model.build(padding_size, len(vocab), embedding_size, embedding_weights)
    model.compile(learning_rate)
    model.load_weights(f'StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}')
    # model.summary()
    w_to_i, _ = load_word_mappings(vocab, r1, r2)
    test_sequences, test_labels = load_sequences(r1, r2, test_ids, w_to_i,
                                                 padding_size)
    test_sequences, test_labels, test_ids = balance_dataset(
        test_sequences, test_labels, 'star', test_ids)
    print(
        f'Evaluating StarDetect model with reviews in range {r1}-{r2} with {embedding_source} embedding vectors ...'
    )
    test_labels = [int(t - 1) for t in test_labels]
    predictions_file = f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_predictions.pkl'
    predicted_labels = generate_predictions(model, test_sequences,
                                            predictions_file)
    evaluate(test_labels, predicted_labels)