Python ArticleParserWithLangnames 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: article_parsers

hotexamples.com에서의 예제들: 3

Python ArticleParserWithLangnames - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 article_parsers.ArticleParserWithLangnames에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ArticleParserWithLangnames(1)

parse_article(1)

write_word_pairs_to_file(1)

예제 #1

파일 보기

파일: wiktionary.py 프로젝트: h4ck3rm1k3/wikt2dict

 def init_parser_of_type(self):
     """ Initialize the appropriate parser specified in the configuration file """
     type_ = self.cfg['parser_type']
     if type_ == 'default':
         self.article_parser = DefaultArticleParser(self)
     elif type_ == 'langnames':
         self.article_parser = ArticleParserWithLangnames(self)

예제 #2

파일 보기

파일: wiktionary.py 프로젝트: onny/wikidict2

 def init_parser_of_type(self):
     """ Initialize the appropriate parser specified in the configuration file """
     type_ = self.cfg['parser_type']
     if type_ == 'default':
         self.article_parser = DefaultArticleParser(self)
     elif type_ == 'langnames':
         self.article_parser = ArticleParserWithLangnames(self)
     elif type_ == 'section_level':
         self.article_parser = SectionAndArticleParser(self)
     else:
         raise NotImplementedError("Parser type " + str(type_) +
                                   " not implemented\n")

예제 #3

파일 보기

파일: wiktionary.py 프로젝트: h4ck3rm1k3/wikt2dict

class Wiktionary(object):
    """ A class for handling one edition of Wiktionary """

    def __init__(self, wc, cfg_fn):
        """ 
        @param wc: Wiktionary code
        @param cfg_fn: name and path of the configuration file
        """
        try:
            self.wc = wc
            self.cfg = ConfigHandler(wc, cfg_fn)
            self.log_handler = LogHandler(self.cfg)
            self.init_parser_of_type()
            self.dump_path = (self.cfg['dumpdir'] + '/' + self.cfg['fullname'] + '/' +
                    self.wc + 'wiktionary.txt')
        except KeyError as e:
            self.log_handler.error(str(e.message) + \
                                   " parameter must be defined in config file ")
        except NoSectionError as e:
            self.log_handler.error("Section not defined " + str(wc))
        except Exception as e:
            self.log_handler.error("Unknown error " + str(e))

    def init_parser_of_type(self):
        """ Initialize the appropriate parser specified in the configuration file """
        type_ = self.cfg['parser_type']
        if type_ == 'default':
            self.article_parser = DefaultArticleParser(self)
        elif type_ == 'langnames':
            self.article_parser = ArticleParserWithLangnames(self)

    def set_parser(self, parser):
        self.article_parser = parser

    def read_dump(self):
        """ Iterate through dump and yield each article 
        as a tuple of its title and text """
        txt_f = open(self.dump_path)
        page_sep = '%%#PAGE'        
        this_title = unicode()
        this_article = unicode()
        last_title = unicode()
        last_article = unicode()
        for l in txt_f:
            if l.startswith(page_sep):
                if this_article and this_title:
                    last_article = this_article
                    last_title = this_title
                    this_article = unicode()
                    this_title = l.split(page_sep)[-1].strip().decode('utf8')
                    yield tuple([last_title, last_article])
                else:
                    this_title = l.split(page_sep)[-1].strip().decode('utf8')
            else:
                this_article += l.decode('utf8')
        txt_f.close()
        yield tuple([this_title, this_article])

    def parse_all_articles(self):
        """ Calling parse_article for each article """
        for article in self.read_dump():
            self.article_parser.parse_article(article)

    def write_pairs(self):
        """ Writing the extracted translations to file """
        self.article_parser.write_word_pairs_to_file()