# test all vector models for embedder_model in data_args["models"]: # identify prebuilt model if exists if isinstance(embedder_model, dict): # initialize word vector embedder embedder_model, prebuilt_model_params = embedder_model.items().pop() prebuilt_path_model = prebuilt_model_params.get("model", None) model_args = prebuilt_model_params.get("args", {}) embedder = WordVectorEmbedder(embedder_model, model_fullpath=prebuilt_path_model, model_args=model_args) # update embedder parameters if prebuilt_path_model: model_path_dir, model_path_filename, model_path_filext = WordVectorBuilder.filename_components( prebuilt_path_model ) embedder.model_subset = model_path_filename # training data (custom or default) if prebuilt_model_params.get("train", None): prebuilt_path_train = prebuilt_model_params.get("train") else: prebuilt_path_train = WordVectorBuilder.filename_train(prebuilt_path_model) with open(prebuilt_path_train, "rb") as f: data_train = pickle.load(f) # testing data (custom or default) if prebuilt_model_params.get("test", None): prebuilt_path_test = prebuilt_model_params.get("test") else:
prebuilt_path_train = WordVectorBuilder.filename_train(prebuilt_path_model) # testing data (custom or default) if prebuilt_model_params.get('test', None): prebuilt_path_test = prebuilt_model_params.get('test') else: prebuilt_path_test = WordVectorBuilder.filename_test(prebuilt_path_model) # import pickled data with open(prebuilt_path_train, 'rb') as f: data_train = pickle.load(f) with open(prebuilt_path_test, 'rb') as f: data_test = pickle.load(f) # update embedder parameters model_path_dir, model_path_filename, model_path_filext = WordVectorBuilder.filename_components(prebuilt_path_model) embedder.model_group = model_path_filename embedder.model_subset = model_path_filename # initialize lists (will be converted later into numpy arrays) values_train = [] labels_train = [] values_test = [] labels_test = [] # initialize timer seconds_loading = 0 logger.info("processing {} samples from {}...".format(len(data_train)+len(data_test), prebuilt_path_model)) # load training dataset profile_results = timed_dataload(data_train, data_args, values_train, labels_train)