def get_authors(self, doc): nodes = Parser.css_select(doc,'#ownernick') if len(nodes) > 0: s = Parser.getText(nodes[0]) print("authors: " + s) return [s] return []
def get_publishing_date(self, url, doc): def parse_date_str(date_str): try: #datetime_obj = date_parser(date_str) datetime_obj = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S") return datetime_obj except Exception as e: print(e) return None nodes = Parser.css_select(doc,'#articlebody > div.articalTitle > span.time.SG_txtc') if len(nodes) > 0 : s = Parser.getText(nodes[0]) s = re.sub('[\(\)]','',s) print("publish_date: " + s) return parse_date_str(s) return None