예제 #1
0
def cleaner_Both(wikidoc):
    '''Use WikiExtractor for cleaning
       Use Parser from hell for links
    '''
    wikidoc.clean_text = WikiExtractor.clean(wikidoc.wiki_text)
    wp = WikiTextProcessor(wikidoc.wiki_text)
    wikidoc.meta[WdNames.LINKS] = wp.get_links()
    return wikidoc
예제 #2
0
def cleaner_Both(wikidoc):
    '''Use WikiExtractor for cleaning
       Use Parser from hell for links
    '''
    wikidoc.clean_text = WikiExtractor.clean(wikidoc.wiki_text)
    wp = WikiTextProcessor(wikidoc.wiki_text)
    wikidoc.meta[WdNames.LINKS] = wp.get_links()
    return wikidoc
예제 #3
0
def cleaner_WikiTextProcessor(wikidoc):
    wp = WikiTextProcessor(wikidoc.wiki_text)
    wikidoc.clean_text = wp.get_clean_text()
    #wikidoc.clean_text = wp.get_text_only()
    wikidoc.meta[WdNames.LINKS] = wp.get_links()
    return wikidoc
예제 #4
0
def cleaner_WikiTextProcessor(wikidoc):
    wp = WikiTextProcessor(wikidoc.wiki_text)
    wikidoc.clean_text = wp.get_clean_text()
    #wikidoc.clean_text = wp.get_text_only()
    wikidoc.meta[WdNames.LINKS] = wp.get_links()
    return wikidoc