def run_dateguesser(htmlstring): '''try with date_guesser''' guess = guess_date(url='https://www.example.org/test/', html=htmlstring) if guess.date is None: return None date = convert_date(guess.date, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d') return date
def run_articledateextractor(htmlstring): '''try with articleDateExtractor''' dateresult = extractArticlePublishedDate('', html=htmlstring) if dateresult is None: return None date = convert_date(dateresult, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d') return date
def run_newsplease(htmlstring): '''try with newsplease''' try: article = NewsPlease.from_html(htmlstring, url=None) if article.date_publish is None: return None date = convert_date(article.date_publish, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d') return date except Exception as err: print('Exception:', err) return None
def run_newspaper(htmlstring): '''try with the newspaper module''' ## does not work! myarticle = Article('https://www.example.org/test/') myarticle.html = htmlstring myarticle.download_state = ArticleDownloadState.SUCCESS myarticle.parse() if myarticle.publish_date is None: return None date = convert_date(myarticle.publish_date, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d') return date
def run_newspaper(htmlstring): '''try with the newspaper module''' # throws error on the eval_default dataset try: myarticle = Article(htmlstring) except (TypeError, UnicodeDecodeError): return None myarticle.html = htmlstring myarticle.download_state = ArticleDownloadState.SUCCESS myarticle.parse() if myarticle.publish_date is None or myarticle.publish_date == '': return None return convert_date(myarticle.publish_date, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d')
def test_convert_date(): '''test date conversion''' assert convert_date('2016-11-18', '%Y-%m-%d', '%d %B %Y') == '18 November 2016' assert convert_date('18 November 2016', '%d %B %Y', '%Y-%m-%d') == '2016-11-18'