Python Parser Examples

Programming Language: Python

Namespace/Package Name: newspaper

Class/Type: Parser

Examples at hotexamples.com: 4

Python Parser - 4 examples found. These are the top rated real world Python examples of newspaper.Parser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

css_select(3)

getText(2)

Parser(1)

Example #1

Show file

File: HEExtractors.py Project: itech001/htmlextractor

    def get_authors(self, doc):
        nodes = Parser.css_select(doc,'#ownernick')
        if len(nodes) > 0:
            s = Parser.getText(nodes[0])
            print("authors: " + s)
            return [s]

        return []

Example #2

Show file

 def build(self):
     article = Parser(self.url, language='de')
     article.download()
     article.parse()
     self.title = article.title
     self.article_model = nlp(article.text)
     self.get_readability()
     self.authors = article.authors

Example #3

Show file

File: HEExtractors.py Project: itech001/htmlextractor

    def get_publishing_date(self, url, doc):
        def parse_date_str(date_str):
            try:
                #datetime_obj = date_parser(date_str)
                datetime_obj = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
                return datetime_obj
            except Exception as e:
                print(e)
                return None

        nodes = Parser.css_select(doc,'#articlebody > div.articalTitle > span.time.SG_txtc')
        if len(nodes) > 0 :
            s = Parser.getText(nodes[0])
            s = re.sub('[\(\)]','',s)
            print("publish_date: " + s)
            return parse_date_str(s)

        return None

Example #4

Show file

File: HEExtractors.py Project: itech001/htmlextractor

 def calculate_best_node(self, doc):
     #print(lxml.html.tostring(doc))  #doc.text_content()
     top_nodes = Parser.css_select(doc,'#sina_keyword_ad_area2')
     if len(top_nodes) < 1:
         top_node = ContentExtractor.calculate_best_node(self,doc)
     else:
         top_node = top_nodes[0]
     #print(Parser.getText(top_node))
     #if top_node is None:
     #    return doc
     return top_node