def generate_star_detect_att_weights(r1, r2, padding_size, embedding_size, embedding_source, learning_rate, epoch): _, _, test_ids = load_train_val_test_subsets(r1, r2) vocab = load_vocabulary(r1, r2) embedding_weights = load_embedding_weights(vocab, embedding_size, embedding_source, r1, r2) model = StarDetect(r1, r2, embedding_source[0]) model.build(padding_size, len(vocab), embedding_size, embedding_weights, True) model.compile(learning_rate) model.load_weights(f'StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}') # model.summary() w_to_i, i_to_w = load_word_mappings(vocab, r1, r2) test_sequences, test_labels = load_sequences(r1, r2, test_ids, w_to_i, padding_size, False) test_sequences, test_labels, test_ids = balance_dataset( test_sequences, test_labels, 'star', test_ids) print( f'Generating StarDetect model attention weights with reviews in range {r1}-{r2} with {embedding_source} ' f'embedding vectors ...') predictions_file = f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_att_predictions.pkl' predicted_weights = generate_predictions( model, pad_sequences(test_sequences, padding_size), predictions_file) word_weights = map_word_with_weights(predicted_weights, test_sequences, test_ids, i_to_w) with open( f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_word_weights.pkl', 'wb') as doc_w: pickle.dump(word_weights, doc_w)
def train_star_detect(r1, r2, padding_size, embedding_size, embedding_source, learning_rate, batch_size, num_epochs): train_ids, val_ids, _ = load_train_val_test_subsets(r1, r2) vocab = load_vocabulary(r1, r2) embedding_weights = load_embedding_weights(vocab, embedding_size, embedding_source, r1, r2) model = StarDetect(r1, r2, embedding_source[0]) model.build(padding_size, len(vocab), embedding_size, embedding_weights) model.compile(learning_rate) model.summary() w_to_i, _ = load_word_mappings(vocab, r1, r2) train_sequences, train_labels = load_sequences(r1, r2, train_ids, w_to_i, padding_size) train_sequences, train_labels, train_ids = balance_dataset(train_sequences, train_labels, 'star', train_ids) val_sequences, val_labels = load_sequences(r1, r2, val_ids, w_to_i, padding_size) val_sequences, val_labels, val_ids = balance_dataset(val_sequences, val_labels, 'star', val_ids) train_data_generator = star_detect_data_generator(train_sequences, train_labels, batch_size) val_data_generator = star_detect_data_generator(val_sequences, val_labels, batch_size) print(f'Training StarDetect model with reviews in range {r1}-{r2} with {embedding_source} embedding vectors ...') start_time = time.time() steps_per_epoch = len(train_sequences) // batch_size model.train(num_epochs, steps_per_epoch, train_data_generator, val_data_generator) end_time = time.time() print(f'Training took {end_time - start_time} seconds')
def evaluate_star_detect(r1, r2, padding_size, embedding_size, embedding_source, learning_rate, epoch): _, _, test_ids = load_train_val_test_subsets(r1, r2) vocab = load_vocabulary(r1, r2) embedding_weights = load_embedding_weights(vocab, embedding_size, embedding_source, r1, r2) model = StarDetect(r1, r2, embedding_source[0]) model.build(padding_size, len(vocab), embedding_size, embedding_weights) model.compile(learning_rate) model.load_weights(f'StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}') # model.summary() w_to_i, _ = load_word_mappings(vocab, r1, r2) test_sequences, test_labels = load_sequences(r1, r2, test_ids, w_to_i, padding_size) test_sequences, test_labels, test_ids = balance_dataset( test_sequences, test_labels, 'star', test_ids) print( f'Evaluating StarDetect model with reviews in range {r1}-{r2} with {embedding_source} embedding vectors ...' ) test_labels = [int(t - 1) for t in test_labels] predictions_file = f'data/StarDetect_{r1}_{r2}_{embedding_source[0]}-{epoch}_predictions.pkl' predicted_labels = generate_predictions(model, test_sequences, predictions_file) evaluate(test_labels, predicted_labels)