def load_testing(self,secondary_label=""): """ Load the training set in the pickle dir :returns: train_X: the data matrix. train_y: the train set labels """ data_logger.info("Loading testing set for {}".format(self.label)) data_set_train_index=2 if secondary_label: path_elements=[self.label.type,secondary_label,"testX",self.label.feature_selection,"pickle"] else: data_set_train_index=1 path_elements=[self.label.type,"testX",self.label.feature_selection,"pickle"] dir=os.path.join(self.pickle_dir,self.label.feature_selection) testX_pickle_path=dir+"/{}".format("_".join(path_elements)) path_elements[data_set_train_index]="testy" testy_pickle_path=dir+"/{}".format("_".join(path_elements)) path_elements[data_set_train_index]="trainRefIndex" ref_id_pickle_path=dir+"/{}".format("_".join(path_elements)) self._X,self._y,self._ref_index=load_X_y_refIndex(testX_pickle_path,testy_pickle_path,ref_id_pickle_path) return self
def load_training(self,stack_per_sample=3000,maybe_load_vectorizer_from_pickle=True, maybe_load_training_from_pickle=True,pickle_training=True,secondary_label=""): """ Load the training set with attr_map dictionary attribute and return a scipy sparse matrix of the data fitted with the vocab and their labels :returns: train_X: the data matrix. train_y: the train set labels """ if self._X is not None: data_logger.info("Reloading training samples") data_logger.info("Loading training set for {}".format(self.label)) data_set_train_index=2 if secondary_label: path_elements=[self.label.type,secondary_label,"trainX",self.label.feature_selection,"pickle"] else: path_elements=[self.label.type,"trainX",self.label.feature_selection,"pickle"] data_set_train_index=1 dir=os.path.join(self.pickle_dir,self.label.feature_selection) trainX_pickle_path=dir+"/{}".format("_".join(path_elements)) path_elements[data_set_train_index]="trainy" trainy_pickle_path=dir+"/{}".format("_".join(path_elements)) path_elements[data_set_train_index]="trainRefIndex" ref_id_pickle_path=dir+"/{}".format("_".join(path_elements)) self._X,self._y,self._ref_index=load_X_y_refIndex(trainX_pickle_path,trainy_pickle_path,ref_id_pickle_path) return self