def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs): self.format_kwargs = kwargs self.feature_extractor = feature_extractor if is_filelike(train_set): self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set self.train_features = None
def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs): self.format_kwargs = kwargs self.feature_extractor = feature_extractor if is_filelike(train_set): self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set self._word_set = _get_words_from_dataset(self.train_set) # Keep a hidden set of unique words. self.train_features = None
def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs): self.format_kwargs = kwargs self.feature_extractor = feature_extractor if is_filelike(train_set): self.train_set = self._read_data(train_set, format) else: # train_set is a list of tuples self.train_set = train_set self._word_set = _get_words_from_dataset(self.train_set) #Keep a hidden set of unique words. self.train_features = None
def detect(fp, max_read=1024): """Attempt to detect a file's format, trying each of the supported formats. Return the format class that was detected. If no format is detected, return ``None``. """ if not is_filelike(fp): return None for Format in _registry.values(): if Format.detect(fp.read(max_read)): fp.seek(0) return Format fp.seek(0) return None
def accuracy(self, test_set, format=None): """Compute the accuracy on a test set. :param test_set: A list of tuples of the form ``(text, label)``, or a file pointer. :param format: If ``test_set`` is a filename, the file format, e.g. ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the file format. """ if is_filelike(test_set): test_data = self._read_data(test_set, format) else: # test_set is a list of tuples test_data = test_set test_features = [(self.extract_features(d), c) for d, c in test_data] return nltk.classify.accuracy(self.classifier, test_features)
def accuracy(self, test_set, format=None): """Compute the accuracy on a test set. :param test_set: A list of tuples of the form ``(text, label)``, or a file pointer. :param format: If ``test_set`` is a filename, the file format, e.g. ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the file format. """ if is_filelike(test_set): test_data = self._read_data(test_set) else: # test_set is a list of tuples test_data = test_set test_features = [(self.extract_features(d), c) for d, c in test_data] return nltk.classify.accuracy(self.classifier, test_features)
def test_is_filelike(): with open(CSV_FILE) as fp: assert_true(is_filelike(fp)) assert_false(is_filelike('notafile')) assert_false(is_filelike(12.3))