예제 #1
0
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime(
        '%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash == url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
예제 #2
0
    
    link = entry.link
    url_hash = hashlib.md5(link).hexdigest()
    date = entry.published_parsed

    published_date = arrow.get(date).to('US/Pacific').date().strftime('%Y-%m-%d')
    published_ts = arrow.get(date).to('US/Pacific').to('UTC').timestamp

    # See if we already have this story
    try:
        NewsItem.get(NewsItem.url_hash==url_hash)
        print 'Item exists. Skipping.'
        continue
    except peewee.DoesNotExist:
        print 'Creating item.'
        item = NewsItem()

    headline = entry.title
    summary = entry.summary

    item.url_hash = url_hash
    item.link = link
    item.source = 'Seattle Times'
    item.title = headline
    item.summary = summary
    item.published_date = published_date
    item.published_ts = published_ts
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
    item.link = link
    item.url_hash = url_hash
    item.title = headline
    item.summary = summary
    item.source = 'Weather Underground'

    # The author and date are in the same text string
    parts = tr_el.em.text.strip().split('\n\t\t')

    if len(parts) == 1:
        dt = dateutil.parser.parse(parts[0])
        dt = dt.replace(tzinfo=pytz.timezone('US/Eastern')).strftime(
            '%Y-%m-%d') + 'T00:00:00-05:00'
        timestamp = arrow.get(dt).to('UTC').timestamp
        published_date = arrow.get(dt).date().strftime('%Y-%m-%d')
        item.published_ts = timestamp
    else:
        dt = dateutil.parser.parse(parts[1])
        dt = dt.replace(tzinfo=pytz.timezone('US/Eastern')).strftime(
            '%Y-%m-%d') + 'T00:00:00-05:00'
        timestamp = arrow.get(dt).to('UTC').timestamp
        published_date = arrow.get(dt).date().strftime('%Y-%m-%d')
        item.published_ts = timestamp

    # Stories are posted without times, so we just assign the current time to the story
    # since we check every 30 minutes. It's better than showing every story as midnight
    item.published_ts = arrow.utcnow().timestamp

    item.published_date = published_date
    item.inserted_ts = arrow.utcnow().timestamp
    item.save()
    item.link = link 
    item.url_hash = url_hash
    item.title = headline
    item.summary = summary
    item.source = 'Weather Underground'
    
    # The author and date are in the same text string
    parts = tr_el.em.text.strip().split('\n\t\t')
    
    if len(parts) == 1:
        dt = dateutil.parser.parse(parts[0])
        dt = dt.replace(tzinfo=pytz.timezone('US/Eastern')).strftime('%Y-%m-%d') + 'T00:00:00-05:00'
        timestamp = arrow.get(dt).to('UTC').timestamp
        published_date = arrow.get(dt).date().strftime('%Y-%m-%d')
        item.published_ts = timestamp
    else:
        dt = dateutil.parser.parse(parts[1])
        dt = dt.replace(tzinfo=pytz.timezone('US/Eastern')).strftime('%Y-%m-%d') + 'T00:00:00-05:00'
        timestamp = arrow.get(dt).to('UTC').timestamp
        published_date = arrow.get(dt).date().strftime('%Y-%m-%d')
        item.published_ts = timestamp

    # Stories are posted without times, so we just assign the current time to the story
    # since we check every 30 minutes. It's better than showing every story as midnight
    item.published_ts = arrow.utcnow().timestamp

    item.published_date = published_date
    item.inserted_ts = arrow.utcnow().timestamp
    item.save()
    
예제 #5
0
        meta_og_title_el = links_soup.find('meta', {'property': 'og:title'})
        meta_og_desc_el = links_soup.find('meta', {'property': 'og:description'})
        meta_og_url_el = links_soup.find('meta', {'property': 'og:url'})
    except Exception, e:
        meta_og_title_el = None
        meta_og_desc_el = None
        meta_og_url_el = None

    if meta_og_title_el is not None:
        headline = meta_og_title_el['content'].strip()

    if meta_og_desc_el is not None:
        description = meta_og_desc_el['content'].strip()
        
    if meta_og_url_el is not None:
        link = meta_og_url_el['content']
    
    item.link = link
    item.url_hash = url_hash
    item.title = headline
    item.summary = description
    item.source = "Accuweather"
    item.published_date = published_date
    item.published_ts = utc_dt.timestamp
    item.inserted_ts = arrow.utcnow().timestamp

    item.save()
    
    # Sleep between requests
    time.sleep(1)