def test_stop_word_rebase_functional(self):
        current_dir = os.path.dirname(__file__)
        file_name = os.path.join(current_dir, 'corpus', 'french.txt')
        file_content = open(file_name, 'rb').read().decode('utf-8')
        solution_name = os.path.join(current_dir, 'corpus',
                                     'french_solution.txt')
        solution_content = open(solution_name, 'rb').read().decode('utf-8')

        factory = StopWordFactory()
        stop_words = factory.get_stop_words('fr')
        file_content_rebased = stop_words.rebase(file_content)
        self.assertEqual(file_content_rebased, solution_content)
Beispiel #2
0
def cmdline(argv=sys.argv[1:]):
    """
    Script for rebasing a text file
    """
    parser = ArgumentParser(description='Rebase a text from his stop words')
    parser.add_argument('language', help='The language used to rebase')
    parser.add_argument('source', help='Text file to rebase')
    options = parser.parse_args(argv)

    factory = StopWordFactory()
    language = options.language
    stop_words = factory.get_stop_words(language, fail_safe=True)
    content = open(options.source, 'rb').read().decode('utf-8')
    print(stop_words.rebase(content))
Beispiel #3
0
def cmdline(argv=sys.argv[1:]):
    """
    Script for merging different collections of stop words.
    """
    parser = ArgumentParser(
        description='Create and merge collections of stop words')
    parser.add_argument('language', help='The language used in the collection')
    parser.add_argument('sources',
                        metavar='FILE',
                        nargs='+',
                        help='Source files to parse')
    options = parser.parse_args(argv)

    factory = StopWordFactory()
    language = options.language
    stop_words = factory.get_stop_words(language, fail_safe=True)

    for filename in options.sources:
        stop_words += StopWord(language, factory.read_collection(filename))

    filename = factory.get_collection_filename(stop_words.language)
    factory.write_collection(filename, stop_words.collection)
Beispiel #4
0
"""
Mots-vides
"""
from mots_vides.stop_words import StopWord
from mots_vides.factory import StopWordFactory
from mots_vides.exceptions import StopWordError

__all__ = ['StopWord', 'StopWordFactory', 'StopWordError',
           'default_factory', 'stop_words']

default_factory = StopWordFactory()


def stop_words(language, fail_safe=True):
    """
    Shortcut for getting stop words
    without initializing a factory.
    """
    return default_factory.get_stop_words(
        language, fail_safe=fail_safe)