def test_read_stop_words(self):
        corpus = Corpus('')
        stopwords_file = StringIO('translator-credits')

        assert corpus._should_select_string(u'translator-credits', '*****@*****.**')
        corpus._read_stop_words(stopwords_file)
        assert not corpus._should_select_string(u'translator-credits', '*****@*****.**')
def process_projects(src_directory, glossary_description, glossary_file):
    corpus = Corpus(src_directory)
    corpus.process()

    reference_sources = ReferenceSources()
    reference_sources.read_sources()

    metrics = Metrics()
    metrics.create(corpus)

    # Select terms
    MAX_TERMS = 8000
    sorted_terms_by_tfxdf = sorted(metrics.tfxdf, key=metrics.tfxdf.get,
                                   reverse=True)

    # Developer report
    glossary_entries = OrderedDict()
    translations = Translations()
    selected_terms = sorted_terms_by_tfxdf[:MAX_TERMS] # Sorted by frequency

    for term in selected_terms:
        glossary_entries[term] = translations.create_for_word_sorted_by_frequency(corpus.documents, term, reference_sources)

    dev_glossary_serializer = DevGlossarySerializer()
    dev_glossary_serializer.create(u"dev-" + glossary_file + ".html",
                                   glossary_description, corpus,
                                   glossary_entries, reference_sources)

    # User report
    glossary_entries = []
    selected_terms = sorted(sorted_terms_by_tfxdf[:MAX_TERMS])  # Sorted by term

    glossary = Glossary(glossary_description)
    for term in selected_terms:
        glossary_entry = GlossaryEntry(
            term,
            translations.create_for_word_sorted_by_frequency(corpus.documents,
                                                             term,
                                                             reference_sources)
        )
        glossary.entries.append(glossary_entry)

    glossary_entries = glossary.get_dict()
    process_template('terminology/templates/userglossary-html.mustache',
                     glossary_file + ".html", glossary_entries)
    process_template('terminology/templates/userglossary-csv.mustache',
                     glossary_file + ".csv", glossary_entries)

    generate_database(glossary, glossary_file)
 def test_should_select_string_nonumericalonly(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'10', '10')
     assert corpus._should_select_string(u'10 minutes ago', 'Fa 10 minuts')
 def test_should_select_string_noformatters(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'Usage: %s', 'Ús: %s')
     assert corpus._should_select_string(u'Usage: sample', 'Ús: exemple')
 def test_should_select_string_nospaces(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'accessibility;development;test;', 'accessibility;development;test;')
 def test_should_select_string_notags(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'<b>_User name</b>', '<b>_Nom d\'usuari</b>')
     assert corpus._should_select_string(u'User name', '_Nom d\'usuari')
 def test_clean_strings(self):
     corpus = Corpus('')
     assert corpus._clean_string(u'_Hard Disk') == u'hard disk'
     assert corpus._clean_string(u'Contrasen&ya:') == u'contrasenya'
     assert corpus._clean_string(u'All ~Pages') == u'all pages'
     assert corpus._clean_string(u'Properties...') == u'properties'
Esempio n. 8
0
 def test_should_select_string_noformatters(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'Usage: %s', 'Ús: %s')
     assert corpus._should_select_string(u'Usage: sample', 'Ús: exemple')
Esempio n. 9
0
 def test_should_select_string_notags(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'<b>_User name</b>',
                                             '<b>_Nom d\'usuari</b>')
     assert corpus._should_select_string(u'User name', '_Nom d\'usuari')
Esempio n. 10
0
 def test_clean_strings(self):
     corpus = Corpus('')
     assert corpus._clean_string(u'_Hard Disk') == u'hard disk'
     assert corpus._clean_string(u'Contrasen&ya:') == u'contrasenya'
     assert corpus._clean_string(u'All ~Pages') == u'all pages'
     assert corpus._clean_string(u'Properties...') == u'properties'
Esempio n. 11
0
 def test_should_select_string_empty_target(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'()', '()')
Esempio n. 12
0
 def test_clean_localized(self):
     corpus = Corpus('')
     assert corpus._clean_localized(u'accès') == u'accès'
     assert corpus._clean_localized(u'àíóè’') == u'àíóè\''
Esempio n. 13
0
 def test_should_not_select_parentesis_only(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'()', '()')
Esempio n. 14
0
 def test_should_select_string_empty_target(self):
     corpus = Corpus('')
     assert corpus._should_select_string(u'This week', 'Aquesta setmana')
     assert not corpus._should_select_string(u'This week', '')
Esempio n. 15
0
 def test_should_select_string_nonumericalonly(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'10', '10')
     assert corpus._should_select_string(u'10 minutes ago', 'Fa 10 minuts')
Esempio n. 16
0
 def test_should_select_string_empty_target(self):
     corpus = Corpus('')
     assert corpus._should_select_string(u'This week', 'Aquesta setmana')
     assert not corpus._should_select_string(u'This week', '')
Esempio n. 17
0
 def test_should_select_string_nospaces(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(
         u'accessibility;development;test;',
         'accessibility;development;test;')
 def test_should_not_select_parentesis_only(self):
     corpus = Corpus('')
     assert not corpus._should_select_string(u'()', '()')