Ejemplo n.º 1
0
 def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs):
     self.format_kwargs = kwargs
     self.feature_extractor = feature_extractor
     if is_filelike(train_set):
         self.train_set = self._read_data(train_set, format)
     else:  # train_set is a list of tuples
         self.train_set = train_set
     self.train_features = None
Ejemplo n.º 2
0
 def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs):
     self.format_kwargs = kwargs
     self.feature_extractor = feature_extractor
     if is_filelike(train_set):
         self.train_set = self._read_data(train_set, format)
     else:  # train_set is a list of tuples
         self.train_set = train_set
     self.train_features = None
Ejemplo n.º 3
0
 def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs):
     self.format_kwargs = kwargs
     self.feature_extractor = feature_extractor
     if is_filelike(train_set):
         self.train_set = self._read_data(train_set, format)
     else:  # train_set is a list of tuples
         self.train_set = train_set
     self._word_set = _get_words_from_dataset(self.train_set)  # Keep a hidden set of unique words.
     self.train_features = None
 def __init__(self, train_set, feature_extractor=basic_extractor, format=None, **kwargs):
     self.format_kwargs = kwargs
     self.feature_extractor = feature_extractor
     if is_filelike(train_set):
         self.train_set = self._read_data(train_set, format)
     else:  # train_set is a list of tuples
         self.train_set = train_set
     self._word_set = _get_words_from_dataset(self.train_set) #Keep a hidden set of unique words.
     self.train_features = None
Ejemplo n.º 5
0
def detect(fp, max_read=1024):
    """Attempt to detect a file's format, trying each of the supported
    formats. Return the format class that was detected. If no format is
    detected, return ``None``.
    """
    if not is_filelike(fp):
        return None
    for Format in _registry.values():
        if Format.detect(fp.read(max_read)):
            fp.seek(0)
            return Format
        fp.seek(0)
    return None
Ejemplo n.º 6
0
def detect(fp, max_read=1024):
    """Attempt to detect a file's format, trying each of the supported
    formats. Return the format class that was detected. If no format is
    detected, return ``None``.
    """
    if not is_filelike(fp):
        return None
    for Format in _registry.values():
        if Format.detect(fp.read(max_read)):
            fp.seek(0)
            return Format
        fp.seek(0)
    return None
Ejemplo n.º 7
0
 def accuracy(self, test_set, format=None):
     """Compute the accuracy on a test set.
     :param test_set: A list of tuples of the form ``(text, label)``, or a
         file pointer.
     :param format: If ``test_set`` is a filename, the file format, e.g.
         ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
         file format.
     """
     if is_filelike(test_set):
         test_data = self._read_data(test_set, format)
     else:  # test_set is a list of tuples
         test_data = test_set
     test_features = [(self.extract_features(d), c) for d, c in test_data]
     return nltk.classify.accuracy(self.classifier, test_features)
Ejemplo n.º 8
0
    def accuracy(self, test_set, format=None):
        """Compute the accuracy on a test set.

        :param test_set: A list of tuples of the form ``(text, label)``, or a
            file pointer.
        :param format: If ``test_set`` is a filename, the file format, e.g.
            ``"csv"`` or ``"json"``. If ``None``, will attempt to detect the
            file format.
        """
        if is_filelike(test_set):
            test_data = self._read_data(test_set)
        else:  # test_set is a list of tuples
            test_data = test_set
        test_features = [(self.extract_features(d), c) for d, c in test_data]
        return nltk.classify.accuracy(self.classifier, test_features)
Ejemplo n.º 9
0
def test_is_filelike():
    with open(CSV_FILE) as fp:
        assert_true(is_filelike(fp))
    assert_false(is_filelike('notafile'))
    assert_false(is_filelike(12.3))
Ejemplo n.º 10
0
def test_is_filelike():
    with open(CSV_FILE) as fp:
        assert_true(is_filelike(fp))
    assert_false(is_filelike('notafile'))
    assert_false(is_filelike(12.3))