def load_model(path, embeddings, loss_function, just_predict=True): parameters, model = io_helper.deserialize(path) print("Defining and initializing model...") classifier = CNN(embeddings=(parameters["embedding_size"], embeddings), num_conv_layers=parameters["num_convolutions"], filters=parameters["filters"], k_max_pools=parameters["k_max_pools"], manual_features_size=parameters["manual_features_size"]) classifier.define_model(parameters["max_text_length"], parameters["num_classes"], loss_function, -1, l2_reg_factor=parameters["reg_factor"], update_embeddings=parameters["upd_embs"]) if not just_predict: classifier.define_optimization( learning_rate=parameters["learning_rate"]) print("Initializing session...", flush=True) session = tf.InteractiveSession() session.run(tf.global_variables_initializer()) classifier.set_variable_values(session, model) classifier.set_distinct_labels(parameters["dist_labels"]) return classifier, session
if not os.path.isfile(args.evaldata): print("Error: File with the evaluation dataset not found.") exit(code=1) if not os.path.isfile(args.modelpath): print("Error: Model file not found.") exit(code=1) embs_path = args.embs simlex_path = args.evaldata model_path = args.modelpath # deserializing the model hyps, vars = io_helper.deserialize(model_path) print(hyps) same_encoder, hidden_layer_sizes, distance_measure = hyps # loading/merging word embeddings t_embeddings = text_embeddings.Embeddings() t_embeddings.load_embeddings(embs_path, 200000, language='en', print_loading=True, skip_first_line=True) t_embeddings.inverse_vocabularies() vocabulary_size = len(t_embeddings.lang_vocabularies["en"]) embeddings = t_embeddings.lang_embeddings["en"].astype(np.float64) embedding_size = t_embeddings.emb_sizes["en"]
def load_labels_and_max_length(path): parameters, model = io_helper.deserialize(path) return parameters["dist_labels"], parameters["max_text_length"]
# loading the pre-trained model ######################################################################################## print("Forwarded arguments: ") model_name = os.path.join(dirname, config.MODEL) print("Model name: " + str(model_name)) lang_query = config.QUERY_LANG lang_doc = config.DOCS_LANG preds_path = config.PREDS_PATH print("Prediction language pair: " + lang_query + " " + lang_doc) print("Deserializing the model...") model_serialization_path = model_name hyperparams, variables = io_helper.deserialize(model_serialization_path) print("Hyperparameters: ") print(hyperparams) hyp_first_enc, hyp_second_encoder, batch_size, same_encoder, cross_attention, self_attention, share_cross_attention, share_intra_attention, bilinear_product_score = hyperparams state_size, max_len, forward_cell_type, backward_cell_type = hyp_first_enc ######################################################################################## # loading/merging word embeddings ######################################################################################## vocab_q = pickle.load( open(os.path.join(dirname, config.QUERY_LANG_VOCAB), "rb")) vectors_q = np.load(os.path.join(dirname, config.QUERY_LANG_EMBS)) norms_q = vectors_q / np.transpose([np.linalg.norm(vectors_q, 2, 1)])