Python DataSet Examples

Programming Language: Python

Namespace/Package Name: pytreex.tool.ml.dataset

Class/Type: DataSet

Examples at hotexamples.com: 6

Python DataSet - 6 examples found. These are the top rated real world Python examples of pytreex.tool.ml.dataset.DataSet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataSet(3)

load_from_arff(2)

get_attrib(1)

load_from_dict(1)

load_from_vect(1)

merge(1)

rename_attrib(1)

save_to_arff(1)

subset(1)

Example #1

Show file

File: model.py Project: ufal/pytreex

 def __vectorize(self, data):
     """\
     Train vectorization and subsequently vectorize. Accepts a DataSet
     or a list of dictionaries to be vectorized.
     """
     # no vectorization performed, only converted to matrix
     if self.vectorizer is None:
         if not isinstance(data, DataSet):
             data_set = DataSet()
             data_set.load_from_dict(data)
             data = data_set
         data.match_headers(self.data_headers, add_values=True)
         # TODO pre-filtering here?
         return data.as_bunch(target=self.class_attr,
                              select_attrib=self.select_attr).data
     # vectorization needed: converted to dictionary
     # and passed to the vectorizer
     if isinstance(data, DataSet):
         data = data.as_dict(select_attrib=self.select_attr,
                             mask_attrib=self.class_attr)
     else:
         data = [{key: val for key, val in inst.items()
                  if key != self.class_attr and key in self.select_attr}
                 for inst in data]
     # pre-filter attributes if filter_attr is set
     if self.filter_attr:
         data = [{key: val for key, val in inst.items()
                  if self.filter_attr(key, val)} for inst in data]
     if not self.vectorizer_trained:
         self.vectorizer.fit(data)
         self.vectorizer_trained = True
     return self.vectorizer.transform(data).tocsr()

Example #2

Show file

File: model.py Project: ufal/pytreex

 def load_training_set(self, filename, encoding='UTF-8'):
     """\
     Load the given training data set into memory and strip it if
     configured to via the train_part parameter.
     """
     log_info('Loading training data set from ' + str(filename) + '...')
     train = DataSet()
     train.load_from_arff(filename, encoding)
     if self.train_part < 1:
         train = train.subset(0, int(round(self.train_part * len(train))),
                              copy=False)
     return train

Example #3

Show file

File: model.py Project: leotilli/pytreex

 def load_training_set(self, filename, encoding='UTF-8'):
     """\
     Load the given training data set into memory and strip it if
     configured to via the train_part parameter.
     """
     log_info('Loading training data set from ' + str(filename) + '...')
     train = DataSet()
     train.load_from_arff(filename, encoding)
     if self.train_part < 1:
         train = train.subset(0,
                              int(round(self.train_part * len(train))),
                              copy=False)
     return train

Example #4

Show file

File: model.py Project: leotilli/pytreex

 def __vectorize(self, data):
     """\
     Train vectorization and subsequently vectorize. Accepts a DataSet
     or a list of dictionaries to be vectorized.
     """
     # no vectorization performed, only converted to matrix
     if self.vectorizer is None:
         if not isinstance(data, DataSet):
             data_set = DataSet()
             data_set.load_from_dict(data)
             data = data_set
         data.match_headers(self.data_headers, add_values=True)
         # TODO pre-filtering here?
         return data.as_bunch(target=self.class_attr,
                              select_attrib=self.select_attr).data
     # vectorization needed: converted to dictionary
     # and passed to the vectorizer
     if isinstance(data, DataSet):
         data = data.as_dict(select_attrib=self.select_attr,
                             mask_attrib=self.class_attr)
     else:
         data = [{
             key: val
             for key, val in inst.items()
             if key != self.class_attr and key in self.select_attr
         } for inst in data]
     # pre-filter attributes if filter_attr is set
     if self.filter_attr:
         data = [{
             key: val
             for key, val in inst.items() if self.filter_attr(key, val)
         } for inst in data]
     if not self.vectorizer_trained:
         self.vectorizer.fit(data)
         self.vectorizer_trained = True
     return self.vectorizer.transform(data).tocsr()

Example #5

Show file

File: model.py Project: leotilli/pytreex

 def evaluate(self, test_file, encoding='UTF-8', classif_file=None):
     """\
     Evaluate on the given test data file. Return accuracy.
     If classif_file is set, save the classification results to this file.
     """
     test = DataSet()
     test.load_from_arff(test_file, encoding)
     values = self.classify(test)
     golden = self.get_classes(test, dtype=None)
     if classif_file is not None:
         classif = DataSet()
         classif.load_from_vect(test.get_attrib(self.class_attr), values)
         classif.rename_attrib(self.class_attr, self.PREDICTED)
         test.merge(classif)
         test.save_to_arff(classif_file, encoding)
     return zero_one_score(golden, values)

Example #6

Show file

File: model.py Project: ufal/pytreex

 def evaluate(self, test_file, encoding='UTF-8', classif_file=None):
     """\
     Evaluate on the given test data file. Return accuracy.
     If classif_file is set, save the classification results to this file.
     """
     test = DataSet()
     test.load_from_arff(test_file, encoding)
     values = self.classify(test)
     golden = self.get_classes(test, dtype=None)
     if classif_file is not None:
         classif = DataSet()
         classif.load_from_vect(test.get_attrib(self.class_attr), values)
         classif.rename_attrib(self.class_attr, self.PREDICTED)
         test.merge(classif)
         test.save_to_arff(classif_file, encoding)
     return zero_one_score(golden, values)