Beispiel #1
0
    def extract_feature_dependent_feature(self,
                                          extractor,
                                          force_extraction=False,
                                          verbose=0,
                                          add_args=None,
                                          custom_name=None):
        """
        Extracts a feature which may be dependent on other features and stores it in the database

        Parameters
        ----------
        extractor : function, which takes the path of a data point, a dictionary of all other features and *args as
        parameters and returns a feature
        force_extraction : boolean, if True - will re-extract feature even if a feature with this name already
        exists in the database, otherwise, will only extract if the feature doesn't exist in the database.
        default value: False
        verbose : int, if bigger than 0, will print the current number of the file for which data is being extracted
        add_args : optional arguments for the extractor (list/dictionary/tuple/whatever). if None, the
        extractor should take only one input argument - the file path. default value: None
        custom_name : string, optional name for the feature (it will be stored in the database with the custom_name
        instead of extractor function name). if None, the extractor function name will be used. default value: None

        Returns
        -------
        None
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            return extract_feature_dependent_feature_base(
                self.dbpath, self.path_to_set, self._set_object, extractor,
                force_extraction, verbose, add_args, custom_name)
Beispiel #2
0
    def return_labels_numpy(self, original=False):
        """
        Returns a 2d numpy array of labels

        Parameters
        ----------
        original : if True, will return original labels, if False, will return transformed labels (as defined by
        label_dict), default value: False

        Returns
        -------
        A numpy array of labels, each row corresponds to a single datapoint
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            engine = create_engine('sqlite:////' + self.dbpath)
            trainset.Base.metadata.create_all(engine)
            session_cl = sessionmaker(bind=engine)
            session = session_cl()
            tmp_object = session.query(trainset.TrainSet).get(1)

            columns_amt = len(tmp_object.labels['original'])
            return_array = np.zeros([self.points_amt, columns_amt])
            for i in enumerate(
                    session.query(trainset.TrainSet).order_by(
                        trainset.TrainSet.id)):
                if original is False:
                    return_array[i[0], :] = i[1].labels['transformed']
                else:
                    return_array[i[0], :] = i[1].labels['original']
            session.close()
            return return_array
Beispiel #3
0
    def return_labels(self, original=False):
        """
        Returns the labels of the dataset

        Parameters
        ----------
        original : if True, will return original labels, if False, will return transformed labels (as defined by
        label_dict), default value: False

        Returns
        -------
        A list of lists, each 'inside list' corresponds to a single data point, each element of the 'inside list' is a
        label
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            engine = create_engine('sqlite:////' + self.dbpath)
            trainset.Base.metadata.create_all(engine)
            session_cl = sessionmaker(bind=engine)
            session = session_cl()
            return_list = []
            for i in session.query(trainset.TrainSet).order_by(
                    trainset.TrainSet.id):
                if original is True:
                    row_list = i.labels['original']
                else:
                    row_list = i.labels['transformed']
                return_list.append(row_list[:])
            session.close()
            return return_list
Beispiel #4
0
    def return_real_id(self):
        """
        Returns a list of real_id's

        Parameters
        ----------

        Returns
        -------
        A list of real_id values for the dataset (a real_id is the filename minus the suffix and prefix)
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            return return_real_id_base(self.dbpath, self._set_object)
Beispiel #5
0
    def return_features(self, names='all'):
        """
        Returns a list of extracted features from the database

        Parameters
        ----------
        names : list of strings, a list of feature names which are to be retrieved from the database, if equal
        to 'all', the all features will be returned, default value: 'all'

        Returns
        -------
        A list of lists, each 'inside list' corresponds to a single data point, each element of the 'inside list' is a
        feature (can be of any type)
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            return return_features_base(self.dbpath, self._set_object, names)
Beispiel #6
0
    def return_features_numpy(self, names='all'):
        """
        Returns a 2d numpy array of extracted features

        Parameters
        ----------
        names : list of strings, a list of feature names which are to be retrieved from the database, if equal to 'all',
        all features will be returned, default value: 'all'

        Returns
        -------
        A numpy array of features, each row corresponds to a single datapoint. If a single feature is a 1d numpy array,
        then it will be unrolled into the resulting array. Higher-dimensional numpy arrays are not supported.
        """
        if self._prepopulated is False:
            raise errors.EmptyDatabase(self.dbpath)
        else:
            return return_features_numpy_base(self.dbpath, self._set_object,
                                              self.points_amt, names)