Beispiel #1
0
def run_dateguesser(htmlstring):
   '''try with date_guesser'''
   guess = guess_date(url='https://www.example.org/test/', html=htmlstring)
   if guess.date is None:
      return None
   date = convert_date(guess.date, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d')
   return date
Beispiel #2
0
def run_articledateextractor(htmlstring):
   '''try with articleDateExtractor'''
   dateresult = extractArticlePublishedDate('', html=htmlstring)
   if dateresult is None:
      return None
   date = convert_date(dateresult, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d')
   return date
Beispiel #3
0
def run_newsplease(htmlstring):
    '''try with newsplease'''
    try:
        article = NewsPlease.from_html(htmlstring, url=None)
        if article.date_publish is None:
             return None
        date = convert_date(article.date_publish, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d')
        return date
    except Exception as err:
        print('Exception:', err)
        return None
Beispiel #4
0
def run_newspaper(htmlstring):
    '''try with the newspaper module'''
    ## does not work!
    myarticle = Article('https://www.example.org/test/')
    myarticle.html = htmlstring
    myarticle.download_state = ArticleDownloadState.SUCCESS
    myarticle.parse()
    if myarticle.publish_date is None:
        return None
    date = convert_date(myarticle.publish_date, '%Y-%m-%d %H:%M:%S', '%Y-%m-%d')
    return date
Beispiel #5
0
def run_newspaper(htmlstring):
    '''try with the newspaper module'''
    # throws error on the eval_default dataset
    try:
        myarticle = Article(htmlstring)
    except (TypeError, UnicodeDecodeError):
        return None
    myarticle.html = htmlstring
    myarticle.download_state = ArticleDownloadState.SUCCESS
    myarticle.parse()
    if myarticle.publish_date is None or myarticle.publish_date == '':
        return None
    return convert_date(myarticle.publish_date, '%Y-%m-%d %H:%M:%S',
                        '%Y-%m-%d')
Beispiel #6
0
def test_convert_date():
    '''test date conversion'''
    assert convert_date('2016-11-18', '%Y-%m-%d', '%d %B %Y') == '18 November 2016'
    assert convert_date('18 November 2016', '%d %B %Y', '%Y-%m-%d') == '2016-11-18'