Ejemplo n.º 1
0
        # test all vector models
        for embedder_model in data_args["models"]:

            # identify prebuilt model if exists
            if isinstance(embedder_model, dict):

                # initialize word vector embedder
                embedder_model, prebuilt_model_params = embedder_model.items().pop()
                prebuilt_path_model = prebuilt_model_params.get("model", None)
                model_args = prebuilt_model_params.get("args", {})
                embedder = WordVectorEmbedder(embedder_model, model_fullpath=prebuilt_path_model, model_args=model_args)

                # update embedder parameters
                if prebuilt_path_model:
                    model_path_dir, model_path_filename, model_path_filext = WordVectorBuilder.filename_components(
                        prebuilt_path_model
                    )
                    embedder.model_subset = model_path_filename

                # training data (custom or default)
                if prebuilt_model_params.get("train", None):
                    prebuilt_path_train = prebuilt_model_params.get("train")
                else:
                    prebuilt_path_train = WordVectorBuilder.filename_train(prebuilt_path_model)
                with open(prebuilt_path_train, "rb") as f:
                    data_train = pickle.load(f)

                # testing data (custom or default)
                if prebuilt_model_params.get("test", None):
                    prebuilt_path_test = prebuilt_model_params.get("test")
                else:
Ejemplo n.º 2
0
                prebuilt_path_train = WordVectorBuilder.filename_train(prebuilt_path_model)

            # testing data (custom or default)
            if prebuilt_model_params.get('test', None):
                prebuilt_path_test = prebuilt_model_params.get('test')
            else:
                prebuilt_path_test = WordVectorBuilder.filename_test(prebuilt_path_model)

            # import pickled data
            with open(prebuilt_path_train, 'rb') as f:
                data_train = pickle.load(f)
            with open(prebuilt_path_test, 'rb') as f:
                data_test = pickle.load(f)

            # update embedder parameters
            model_path_dir, model_path_filename, model_path_filext = WordVectorBuilder.filename_components(prebuilt_path_model)
            embedder.model_group = model_path_filename
            embedder.model_subset = model_path_filename

            # initialize lists (will be converted later into numpy arrays)
            values_train = []
            labels_train = []
            values_test = []
            labels_test = []

            # initialize timer
            seconds_loading = 0
            logger.info("processing {} samples from {}...".format(len(data_train)+len(data_test), prebuilt_path_model))

            # load training dataset
            profile_results = timed_dataload(data_train, data_args, values_train, labels_train)