Beispiel #1
0
    def load_testing(self,secondary_label=""):
        """
        Load the training set in the pickle dir

        :returns: train_X: the data matrix. train_y: the train set labels

        """

        data_logger.info("Loading testing set for {}".format(self.label))

        data_set_train_index=2
        if secondary_label:
            path_elements=[self.label.type,secondary_label,"testX",self.label.feature_selection,"pickle"]
        else:
            data_set_train_index=1
            path_elements=[self.label.type,"testX",self.label.feature_selection,"pickle"]

        dir=os.path.join(self.pickle_dir,self.label.feature_selection)
        testX_pickle_path=dir+"/{}".format("_".join(path_elements))

        path_elements[data_set_train_index]="testy"
        testy_pickle_path=dir+"/{}".format("_".join(path_elements))

        path_elements[data_set_train_index]="trainRefIndex"
        ref_id_pickle_path=dir+"/{}".format("_".join(path_elements))

        self._X,self._y,self._ref_index=load_X_y_refIndex(testX_pickle_path,testy_pickle_path,ref_id_pickle_path)

        return self
Beispiel #2
0
    def load_training(self,stack_per_sample=3000,maybe_load_vectorizer_from_pickle=True,
                      maybe_load_training_from_pickle=True,pickle_training=True,secondary_label=""):
        """
        Load the training set with attr_map dictionary attribute and return a scipy sparse matrix of the data fitted
            with the vocab and their labels

        :returns: train_X: the data matrix. train_y: the train set labels

        """

        if self._X is not None:
            data_logger.info("Reloading training samples")

        data_logger.info("Loading training set for {}".format(self.label))

        data_set_train_index=2
        if secondary_label:
            path_elements=[self.label.type,secondary_label,"trainX",self.label.feature_selection,"pickle"]
        else:
            path_elements=[self.label.type,"trainX",self.label.feature_selection,"pickle"]
            data_set_train_index=1

        dir=os.path.join(self.pickle_dir,self.label.feature_selection)
        trainX_pickle_path=dir+"/{}".format("_".join(path_elements))

        path_elements[data_set_train_index]="trainy"
        trainy_pickle_path=dir+"/{}".format("_".join(path_elements))

        path_elements[data_set_train_index]="trainRefIndex"
        ref_id_pickle_path=dir+"/{}".format("_".join(path_elements))

        self._X,self._y,self._ref_index=load_X_y_refIndex(trainX_pickle_path,trainy_pickle_path,ref_id_pickle_path)

        return self