Esempi in Python per IndexCreator.save

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: indexcreator

Classe/tipologia: IndexCreator

Metodo/funzione: save

Esempi su hotexamples.com: 1

IndexCreator.save in Python: 1 esempio trovato. Questo è il miglior esempio reale in Python per indexcreator.IndexCreator.save, estratto da progetti open source. Lo puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

IndexCreator(2)

create(2)

debug_keyword(1)

open(1)

process_files(1)

process_projects(1)

projects_names(1)

save(1)

save_index(1)

write_entry(1)

Esempio n. 1

Mostra file

File: extract-to-json.py Progetto: Softcatala/diccionari-multilingue

def _process_xml():
 
    WIKIDICTIONARY = 2
    en_labels = 0
    ca_labels = 0
    fr_labels = 0
    de_labels = 0
    es_labels = 0
    it_labels = 0
    ca_descs = 0
    it_descs = 0
    words = read_english_word_list()

    index = IndexCreator()
    index.open()
    authors = set()
    words_file_ca = open('words-ca.txt','w')
    descriptions_file_ca = open('descriptions-ca.txt','w')
 
    e = xml.etree.ElementTree.parse('cawiktionary-20160701-pages-meta-current.xml').getroot()
    for page in e.getchildren():
        verb = False
        adverbi = False
        adjectiu = False
        en_label = u''
        ca_label = u''
        fr_label = u''
        de_label = u''
        es_label = u''
        it_label = u''

        for page_element in page.getchildren():
            if 'title' in page_element.tag:
                ca_label = unicode(page_element.text)

            if 'revision' in page_element.tag:
                text = _get_revision_text(page_element)
                username = _get_username(page_element)
                if username is not None and len(username) > 0:
                    authors.add(username)

                if text is not None:
                    if '{{ca-verb' in text:
                        verb = True
                    elif '{{lema|ca|adv}}' in text:
                        adverbi = True
                    elif '{{ca-adj' in text:
                        adjectiu = True

                    if verb is True or adverbi is True or adjectiu is True:
                        en_label = _get_translation(text, '{{trad|en|')
                        es_label = _get_translation(text, '{{trad|es|')
                        fr_label = _get_translation(text, '{{trad|fr|')
                        de_label = _get_translation(text, '{{trad|de|')
                        it_label = _get_translation(text, '{{trad|it|')

                        username = _get_username(page_element)
                        if username is not None and len(username) > 0:
                            authors.add(username)
        
        if verb is False and adverbi is False and adjectiu is False:
            continue

        if ca_label.lower().strip() not in words:
            logging.debug("Discard not in word list: " + ca_label)
            continue

        if term_exists_in_index(index, ca_label, en_label):
            logging.debug("Discard already existing word in index: " + ca_label)
            continue

        # TODO: A better way to determine infinitives
        ca_label_str = to_str(ca_label)
        if verb is True and ca_label_str[len(ca_label_str) - 1] != 'r':
            logging.debug("Discard verb is not infinitive: " + ca_label)
            continue

        ca_desc = u''
        textExtract = TextExtract(text)
        s = textExtract.GetDescription()

        if len(en_label) == 0 and len(es_label) == 0 and len(fr_label) == 0 and \
           len(de_label) == 0 and len(it_label) == 0:
            logging.debug("Discard only ca_label:" + ca_label)
            continue

        if len(s) > 0:
            ca_desc = s
            ca_descs += 1

        ca_labels += 1
        if len(en_label) > 0:
            en_labels += 1

        if len(es_label) > 0:
            es_labels += 1

        if len(fr_label) > 0:
            fr_labels += 1

        if len(de_label) > 0:
            de_labels += 1

        if len(it_label) > 0:
            it_labels += 1

        words_file_ca.write(ca_label.encode('utf-8') + '\r\n')

        if ca_desc is not None:
            s = '{0} - {1}\r\n'.format(ca_label.encode('utf-8'), ca_desc.encode('utf-8'))
            descriptions_file_ca.write(s)
   
        index.write_entry(word_en=en_label,
                          word_ca=ca_label,
                          word_fr=fr_label,
                          word_de=de_label,
                          word_es=es_label,
                          word_it=it_label,
                          definition_en=None,
                          definition_ca=ca_desc,
                          definition_fr=None,
                          definition_de=None,
                          definition_es=None,
                          definition_it=None,
                          image=None,
                          permission=None,
                          gec=None,
                          wikidata_id=None,
                          wikiquote_ca=None,
                          wikidictionary_ca=ca_label,
                          source=WIKIDICTIONARY)

    stats = {
             "ca_labels": ca_labels,
             "ca_descs": ca_descs,
             "en_labels": en_labels,
             "fr_labels": fr_labels,
             "de_labels": de_labels,
             "en_labels": en_labels,
             "es_labels": es_labels,
             "it_labels": it_labels
        }

    _show_statistics(stats)
    _save_statistics(stats)
    index.save()
    words_file_ca.close()
    descriptions_file_ca.close()