def extract_feature_dependent_feature(self, extractor, force_extraction=False, verbose=0, add_args=None, custom_name=None): """ Extracts a feature which may be dependent on other features and stores it in the database Parameters ---------- extractor : function, which takes the path of a data point, a dictionary of all other features and *args as parameters and returns a feature force_extraction : boolean, if True - will re-extract feature even if a feature with this name already exists in the database, otherwise, will only extract if the feature doesn't exist in the database. default value: False verbose : int, if bigger than 0, will print the current number of the file for which data is being extracted add_args : optional arguments for the extractor (list/dictionary/tuple/whatever). if None, the extractor should take only one input argument - the file path. default value: None custom_name : string, optional name for the feature (it will be stored in the database with the custom_name instead of extractor function name). if None, the extractor function name will be used. default value: None Returns ------- None """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: return extract_feature_dependent_feature_base( self.dbpath, self.path_to_set, self._set_object, extractor, force_extraction, verbose, add_args, custom_name)
def return_labels_numpy(self, original=False): """ Returns a 2d numpy array of labels Parameters ---------- original : if True, will return original labels, if False, will return transformed labels (as defined by label_dict), default value: False Returns ------- A numpy array of labels, each row corresponds to a single datapoint """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: engine = create_engine('sqlite:////' + self.dbpath) trainset.Base.metadata.create_all(engine) session_cl = sessionmaker(bind=engine) session = session_cl() tmp_object = session.query(trainset.TrainSet).get(1) columns_amt = len(tmp_object.labels['original']) return_array = np.zeros([self.points_amt, columns_amt]) for i in enumerate( session.query(trainset.TrainSet).order_by( trainset.TrainSet.id)): if original is False: return_array[i[0], :] = i[1].labels['transformed'] else: return_array[i[0], :] = i[1].labels['original'] session.close() return return_array
def return_labels(self, original=False): """ Returns the labels of the dataset Parameters ---------- original : if True, will return original labels, if False, will return transformed labels (as defined by label_dict), default value: False Returns ------- A list of lists, each 'inside list' corresponds to a single data point, each element of the 'inside list' is a label """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: engine = create_engine('sqlite:////' + self.dbpath) trainset.Base.metadata.create_all(engine) session_cl = sessionmaker(bind=engine) session = session_cl() return_list = [] for i in session.query(trainset.TrainSet).order_by( trainset.TrainSet.id): if original is True: row_list = i.labels['original'] else: row_list = i.labels['transformed'] return_list.append(row_list[:]) session.close() return return_list
def return_real_id(self): """ Returns a list of real_id's Parameters ---------- Returns ------- A list of real_id values for the dataset (a real_id is the filename minus the suffix and prefix) """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: return return_real_id_base(self.dbpath, self._set_object)
def return_features(self, names='all'): """ Returns a list of extracted features from the database Parameters ---------- names : list of strings, a list of feature names which are to be retrieved from the database, if equal to 'all', the all features will be returned, default value: 'all' Returns ------- A list of lists, each 'inside list' corresponds to a single data point, each element of the 'inside list' is a feature (can be of any type) """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: return return_features_base(self.dbpath, self._set_object, names)
def return_features_numpy(self, names='all'): """ Returns a 2d numpy array of extracted features Parameters ---------- names : list of strings, a list of feature names which are to be retrieved from the database, if equal to 'all', all features will be returned, default value: 'all' Returns ------- A numpy array of features, each row corresponds to a single datapoint. If a single feature is a 1d numpy array, then it will be unrolled into the resulting array. Higher-dimensional numpy arrays are not supported. """ if self._prepopulated is False: raise errors.EmptyDatabase(self.dbpath) else: return return_features_numpy_base(self.dbpath, self._set_object, self.points_amt, names)