예제 #1
0
async def livereload_js(request: web.Request) -> web.Response:
    if request.if_modified_since:
        raise HTTPNotModified()

    lr_script = request.app['livereload_script']
    return web.Response(body=lr_script, content_type='application/javascript',
                        headers={LAST_MODIFIED: 'Fri, 01 Jan 2016 00:00:00 GMT'})
예제 #2
0
async def livereload_js(request):
    if request.if_modified_since:
        aux_logger.debug('> %s %s %s 0B', request.method, request.path, 304)
        raise HTTPNotModified()

    script_key = 'livereload_script'
    lr_script = request.app.get(script_key)
    if lr_script is None:
        lr_path = Path(__file__).absolute().parent.joinpath('livereload.js')
        with lr_path.open('rb') as f:
            lr_script = f.read()
            request.app[script_key] = lr_script

    aux_logger.debug('> %s %s %s %s', request.method, request.path, 200, fmt_size(len(lr_script)))
    return web.Response(body=lr_script, content_type='application/javascript',
                        headers={LAST_MODIFIED: 'Fri, 01 Jan 2016 00:00:00 GMT'})
예제 #3
0
    def send(self, request, filepath):
        """Send filepath to client using request."""
        gzip = False
        if 'gzip' in request.headers[hdrs.ACCEPT_ENCODING]:
            gzip_path = filepath.with_name(filepath.name + '.gz')

            if gzip_path.is_file():
                filepath = gzip_path
                gzip = True

        st = filepath.stat()

        modsince = request.if_modified_since
        if modsince is not None and st.st_mtime <= modsince.timestamp():
            raise HTTPNotModified()

        ct, encoding = mimetypes.guess_type(str(filepath))
        if not ct:
            ct = 'application/octet-stream'

        resp = self._response_factory()
        resp.content_type = ct
        if encoding:
            resp.headers[hdrs.CONTENT_ENCODING] = encoding
        if gzip:
            resp.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING
        resp.last_modified = st.st_mtime

        # CACHE HACK
        if not self.development:
            cache_time = 31 * 86400  # = 1 month
            resp.headers[hdrs.CACHE_CONTROL] = "public, max-age={}".format(
                cache_time)

        file_size = st.st_size

        resp.content_length = file_size
        resp.set_tcp_cork(True)
        try:
            with filepath.open('rb') as f:
                yield from self._sendfile(request, resp, f, file_size)

        finally:
            resp.set_tcp_nodelay(True)

        return resp
예제 #4
0
def background_check_feed(conn, feed, asyncioloop):
    logger.info(feed + ': Starting up background_check_feed')

    # Try to wait until Discord client has connected, etc:
    yield from client.wait_until_ready()
    # make sure debug output has this check run in the right order...
    yield from asyncio.sleep(1)

    user_agent = config["MAIN"].get("user_agent", USER_AGENT)

    # just a bit easier to use...
    FEED = config[feed]

    # pull config for this feed out:
    feed_url = FEED.get('feed_url')
    rss_refresh_time = FEED.getint('rss_refresh_time', 3600)
    start_skew = FEED.getint('start_skew', rss_refresh_time)
    start_skew_min = FEED.getint('start_skew_min', 1)
    max_age = FEED.getint('max_age', 86400)

    # loop through all the channels this feed is configured to send to
    channels = []
    for key in FEED.get('channels').split(','):
        logger.debug(feed + ': adding channel ' + key)
        # stick a dict in the channels array so we have more to work with
        channels.append(
            {
                'object': discord.Object(id=config['CHANNELS'][key]),
                'name': key,
                'id': config['CHANNELS'][key],
            }
        )

    if start_skew > 0:
        sleep_time = random.uniform(start_skew_min, start_skew)
        logger.info(feed + ':start_skew:sleeping for ' + str(sleep_time))
        yield from asyncio.sleep(sleep_time)

    # Basically run forever
    while not client.is_closed:
        # And tries to catch all the exceptions and just keep going
        # (but see list of except/finally stuff below)
        try:
            logger.info(feed + ': processing feed')

            # If send_typing is on for the feed, send a little "typing ..."
            # whenever a feed is being worked on.  configurable per-room
            if should_send_typing(FEED, feed):
                for channel in channels:
                    # Since this is first attempt to talk to this channel,
                    # be very verbose about failures to talk to channel
                    try:
                        yield from client.send_typing(channel['object'])
                    except discord.errors.Forbidden:
                        logger.exception(
                            "%s:%s:forbidden - is bot allowed in channel?",
                            feed, channel
                        )

            http_headers = {"User-Agent": user_agent}

            # Download the actual feed, if changed since last fetch

            # pull data about history of this *feed* from DB:
            cursor = conn.cursor()
            cursor.execute(
                "select lastmodified,etag from feed_info where feed=? OR url=?", [
                    feed, feed_url])
            data = cursor.fetchone()

            # If we've handled this feed before,
            # and we have etag from last run, add etag to headers.
            # and if we have a last modified time from last run,
            # add "If-Modified-Since" to headers.
            if data is None:  # never handled this feed before...
                logger.info(feed + ':looks like updated version. saving info')
                cursor.execute(
                    "REPLACE INTO feed_info (feed,url) VALUES (?,?)",
                    [feed, feed_url])
                conn.commit()
                logger.debug(feed + ':feed info saved')
            else:
                logger.debug(feed +
                             ':setting up extra headers for HTTP request.')
                logger.debug(data)
                lastmodified = data[0]
                etag = data[1]
                if lastmodified is not None and len(lastmodified):
                    logger.debug(feed +
                                 ':adding header If-Modified-Since: ' +
                                 lastmodified)
                    http_headers['If-Modified-Since'] = lastmodified
                else:
                    logger.debug(feed + ':no stored lastmodified')
                if etag is not None and len(etag):
                    logger.debug(feed + ':adding header ETag: ' + etag)
                    http_headers['ETag'] = etag
                else:
                    logger.debug(feed + ':no stored ETag')

            logger.debug(feed + ':sending http request for ' + feed_url)
            # Send actual request.  yield from can yield control to another
            # instance.
            http_response = yield from httpclient.request('GET',
                                                          feed_url,
                                                          headers=http_headers)
            logger.debug(http_response)

            # First check that we didn't get a "None" response, since that's
            # some sort of internal error thing:
            if http_response.status is None:
                logger.error(feed + ':HTTP response code is NONE')
                raise HTTPError()
            # Some feeds are smart enough to use that if-modified-since or
            # etag info, which gives us a 304 status.  If that happens,
            # assume no new items, fall through rest of this and try again
            # later.
            elif http_response.status == 304:
                logger.debug(feed + ':data is old; moving on')
                http_response.close()
                raise HTTPNotModified()
            # If we get anything but a 200, that's a problem and we don't
            # have good data, so give up and try later.
            # Mostly handled different than 304/not-modified to make logging
            # clearer.
            elif http_response.status != 200:
                logger.debug(feed + ':HTTP error not 200')
                # + str(http_response.status))
                # raise HTTPError()
            else:
                logger.debug(feed + ':HTTP success')

            # pull data out of the http response
            logger.debug(feed + ':reading http response')
            http_data = yield from http_response.read()

            # parse the data from the http response with feedparser
            logger.debug(feed + ':parsing http data')
            feed_data = feedparser.parse(http_data)
            logger.debug(feed + ':done fetching')

            # If we got an ETAG back in headers, store that, so we can
            # include on next fetch
            if 'ETAG' in http_response.headers:
                etag = http_response.headers['ETAG']
                logger.debug(feed + ':saving etag: ' + etag)
                cursor.execute(
                    "UPDATE feed_info SET etag=? where feed=? or url=?",
                    [etag, feed, feed_url])
                conn.commit()
                logger.debug(feed + ':etag saved')
            else:
                logger.debug(feed + ':no etag')

            # If we got a Last-Modified header back, store that, so we can
            # include on next fetch
            if 'LAST-MODIFIED' in http_response.headers:
                modified = http_response.headers['LAST-MODIFIED']
                logger.debug(feed + ':saving lastmodified: ' + modified)
                cursor.execute(
                    "UPDATE feed_info SET lastmodified=? where feed=? or url=?", [
                        modified, feed, feed_url])
                conn.commit()
                logger.debug(feed + ':saved lastmodified')
            else:
                logger.debug(feed + ':no last modified date')

            http_response.close()

            # Process all of the entries in the feed
            # Use reversed to start with end, which is usually oldest
            logger.debug(feed + ':processing entries')
            for item in reversed(feed_data.entries):
                logger.debug("%s:item:processing this entry:%r", feed, item)

                # Pull out the unique id, or just give up on this item.
                id = ''
                if 'id' in item:
                    id = item.id
                elif 'guid' in item:
                    id = item.guid
                elif 'link' in item:
                    id = item.link
                else:
                    logger.error(feed + ':item:no id, skipping')
                    continue

                # Get our best date out, in both raw and parsed form
                pubdate = extract_best_item_date(item, TIMEZONE)
                pubdate_fmt = pubdate.strftime("%a %b %d %H:%M:%S %Z %Y")

                logger.debug(feed + ':item:id:' + id)
                logger.debug(feed +
                             ':item:checking database history for this item')
                # Check DB for this item
                cursor.execute(
                    "SELECT published,title,url,reposted FROM feed_items WHERE id=?", [id])
                data = cursor.fetchone()

                # If we've never seen it before, then actually processing
                # this:
                if data is None:
                    logger.info(feed + ':item ' + id + ' unseen, processing:')

                    # Store info about this item, so next time we skip it:
                    cursor.execute(
                        "INSERT INTO feed_items (id,published) VALUES (?,?)",
                        [id, pubdate_fmt])
                    conn.commit()

                    # Doing some crazy date math stuff...
                    # max_age is mostly so that first run doesn't spew too
                    # much stuff into a room, but is also a useful safety
                    # measure in case a feed suddenly reverts to something
                    # ancient or other weird problems...
                    time_since_published = TIMEZONE.localize(
                        datetime.now()) - pubdate.astimezone(TIMEZONE)

                    if time_since_published.total_seconds() < max_age:
                        logger.info(feed + ':item:fresh and ready for parsing')

                        # Loop over all channels for this particular feed
                        # and process appropriately:
                        for channel in channels:
                            include = True
                            filter_field = FEED.get(
                                channel['name'] + '.filter_field',
                                FEED.get('filter_field',
                                         'title'))
                            # Regex if channel exists
                            if (channel['name'] +
                                    '.filter') in FEED or 'filter' in FEED:
                                logger.debug(
                                    feed + ':item:running filter for' + channel['name'])
                                regexpat = FEED.get(
                                    channel['name'] + '.filter',
                                    FEED.get('filter', '^.*$'))
                                logger.debug(
                                    feed +
                                    ':item:using filter:' +
                                    regexpat +
                                    ' on ' +
                                    item['title'] +
                                    ' field ' +
                                    filter_field)
                                regexmatch = re.search(
                                    regexpat, item[filter_field])
                                if regexmatch is None:
                                    include = False
                                    logger.info(
                                        feed + ':item:failed filter for ' + channel['name'])
                            elif (channel['name'] + '.filter_exclude') in FEED or 'filter_exclude' in FEED:
                                logger.debug(
                                    feed + ':item:running exclude filter for' + channel['name'])
                                regexpat = FEED.get(
                                    channel['name'] + '.filter_exclude',
                                    FEED.get('filter_exclude',
                                             '^.*$'))
                                logger.debug(
                                    feed +
                                    ':item:using filter_exclude:' +
                                    regexpat +
                                    ' on ' +
                                    item['title'] +
                                    ' field ' +
                                    filter_field)
                                regexmatch = re.search(regexpat, item[filter_field])
                                if regexmatch is None:
                                    include = True
                                    logger.info(
                                        feed + ':item:passed exclude filter for ' + channel['name'])
                                else:
                                    include = False
                                    logger.info(
                                        feed + ':item:failed exclude filter for ' + channel['name'])
                            else:
                                include = True  # redundant safety net
                                logger.debug(
                                    feed + ':item:no filter configured for' + channel['name'])

                            if include is True:
                                logger.debug(
                                    feed + ':item:building message for ' + channel['name'])
                                message = build_message(FEED, item, channel)
                                logger.debug(
                                    feed + ':item:sending message (eventually) to ' + channel['name'])
                                yield from send_message_wrapper(asyncioloop,
                                                                FEED,
                                                                feed,
                                                                channel,
                                                                client,
                                                                message)
                            else:
                                logger.info(
                                    feed + ':item:skipping item due to not passing filter for ' + channel['name'])

                    else:
                        # Logs of debugging info for date handling stuff...
                        logger.info("%s:too old, skipping", feed)
                        logger.debug("%s:now:now:%s", feed, time.time())
                        logger.debug("%s:now:gmtime:%s", feed, time.gmtime())
                        logger.debug("%s:now:localtime:%s", feed, time.localtime())
                        logger.debug("%s:pubDate:%r", feed, pubdate)
                        logger.debug(item)
                # seen before, move on:
                else:
                    logger.debug(feed + ':item:' + id +
                                 ' seen before, skipping')
        # This is completely expected behavior for a well-behaved feed:
        except HTTPNotModified:
            logger.debug(
                feed + ':Headers indicate feed unchanged since last time fetched:')
            logger.debug(sys.exc_info())
        # Many feeds have random periodic problems that shouldn't cause
        # permanent death:
        except HTTPError:
            logger.warn(feed + ':Unexpected HTTP error:')
            logger.warn(sys.exc_info())
            logger.warn(
                feed + ':Assuming error is transient and trying again later')
        # sqlite3 errors are probably really bad and we should just totally
        # give up on life
        except sqlite3.Error as sqlerr:
            logger.error(feed + ':sqlite3 error: ')
            logger.error(sys.exc_info())
            logger.error(sqlerr)
            raise
        # Ideally we'd remove the specific channel or something...
        # But I guess just throw an error into the log and try again later...
        except discord.errors.Forbidden:
            logger.error(feed + ':discord.errors.Forbidden')
            logger.error(sys.exc_info())
            logger.error(
                feed +
                ":Perhaps bot isn't allowed in one of the channels for this feed?")
            # raise # or not? hmm...
        # unknown error: definitely give up and die and move on
        except Exception:
            logger.exception("Unexpected error - giving up")
            raise
        # No matter what goes wrong, wait same time and try again
        finally:
            logger.debug(feed + ':sleeping for ' +
                         str(rss_refresh_time) + ' seconds')
            yield from asyncio.sleep(rss_refresh_time)
예제 #5
0
def background_check_feed(feed):
    logger.info(feed + ': Starting up background_check_feed')
    yield from client.wait_until_ready()
    # make sure debug output has this check run in the right order...
    yield from asyncio.sleep(1)

    FEED = config[feed]

    feed_url = FEED.get('feed_url')
    rss_refresh_time = FEED.getint('rss_refresh_time', 3600)
    max_age = FEED.getint('max_age', 86400)
    channels = []
    for key in FEED.get('channels').split(','):
        logger.debug(feed + ': adding channel ' + key)
        channels.append(discord.Object(id=config['CHANNELS'][key]))

    while not client.is_closed:
        try:
            logger.info(feed + ': processing feed')

            if FEED.getint('send_typing', 0) >= 1:
                for channel in channels:
                    try:
                        yield from client.send_typing(channel)
                    except discord.errors.Forbidden:
                        logger.error(feed + ':discord.errors.Forbidden')
                        logger.error(sys.exc_info())
                        logger.error(
                            feed +
                            ":Perhaps bot isn't allowed in one this channel?")
                        logger.error(channel)

            http_headers = {}
            http_headers['User-Agent'] = MAIN.get('UserAgent',
                                                  'feed2discord/1.0')

            ### Download the actual feed, if changed since last fetch
            cursor = conn.cursor()
            cursor.execute(
                "select lastmodified,etag from feed_info where feed=? OR url=?",
                [feed, feed_url])
            data = cursor.fetchone()
            if data is None:
                logger.info(feed + ':looks like updated version. saving info')
                cursor.execute(
                    "REPLACE INTO feed_info (feed,url) VALUES (?,?)",
                    [feed, feed_url])
                conn.commit()
                logger.debug(feed + ':feed info saved')
            else:
                logger.debug(feed +
                             ':setting up extra headers for HTTP request.')
                logger.debug(data)
                lastmodified = data[0]
                etag = data[1]
                if lastmodified is not None and len(lastmodified):
                    logger.debug(feed + ':adding header If-Modified-Since: ' +
                                 lastmodified)
                    http_headers['If-Modified-Since'] = lastmodified
                else:
                    logger.debug(feed + ':no stored lastmodified')
                if etag is not None and len(etag):
                    logger.debug(feed + ':adding header ETag: ' + etag)
                    http_headers['ETag'] = etag
                else:
                    logger.debug(feed + ':no stored ETag')
            logger.debug(feed + ':sending http request for ' + feed_url)
            http_response = yield from httpclient.request('GET',
                                                          feed_url,
                                                          headers=http_headers)
            logger.debug(http_response)
            if http_response.status == 304:
                logger.debug(feed + ':data is old; moving on')
                http_response.close()
                raise HTTPNotModified()
            elif http_response.status != 200:
                logger.debug(feed + ':HTTP error: ' + http_response.status)
                http_response.close()
                raise HTTPError()
            else:
                logger.debug(feed + ':HTTP success')

            logger.debug(feed + ':reading http response')
            http_data = yield from http_response.read()

            logger.debug(feed + ':parsing http data')
            feed_data = feedparser.parse(http_data)
            logger.debug(feed + ':done fetching')

            if 'ETAG' in http_response.headers:
                etag = http_response.headers['ETAG']
                logger.debug(feed + ':saving etag: ' + etag)
                cursor.execute(
                    "UPDATE feed_info SET etag=? where feed=? or url=?",
                    [etag, feed, feed_url])
                conn.commit()
                logger.debug(feed + ':etag saved')
            else:
                logger.debug(feed + ':no etag')

            if 'LAST-MODIFIED' in http_response.headers:
                modified = http_response.headers['LAST-MODIFIED']
                logger.debug(feed + ':saving lastmodified: ' + modified)
                cursor.execute(
                    "UPDATE feed_info SET lastmodified=? where feed=? or url=?",
                    [modified, feed, feed_url])
                conn.commit()
                logger.debug(feed + ':saved lastmodified')
            else:
                logger.debug(feed + ':no last modified date')

            http_response.close()

            logger.debug(feed + ':processing entries')
            for item in feed_data.entries:
                logger.debug(feed + ':item:processing this entry')
                # logger.debug(item) # can be very noisy
                id = ''
                if 'id' in item:
                    id = item.id
                elif 'guid' in item:
                    id = item.guid
                else:
                    logger.error(feed + ':item:no id, skipping')
                    continue
                pubDateDict = extract_best_item_date(item)
                pubDate = pubDateDict['date']
                pubDate_parsed = pubDateDict['date_parsed']
                logger.debug(feed + ':item:id:' + id)
                logger.debug(feed +
                             ':item:checking database history for this item')
                cursor.execute(
                    "SELECT published,title,url,reposted FROM feed_items WHERE id=?",
                    [id])
                data = cursor.fetchone()
                if data is None:
                    logger.info(feed + ':item ' + id + ' unseen, processing:')
                    cursor.execute(
                        "INSERT INTO feed_items (id,published) VALUES (?,?)",
                        [id, pubDate])
                    conn.commit()
                    if time.mktime(pubDate_parsed) > (time.time() - max_age):
                        logger.info(feed + ':item:fresh and ready for parsing')
                        logger.debug(feed + ':item:building message')
                        message = build_message(FEED, item)
                        for channel in channels:
                            logger.debug(feed + ':item:sending message')
                            yield from client.send_message(channel, message)
                    else:
                        logger.info(feed + ':too old; skipping')
                        logger.debug(feed + ':now:' + str(time.time()))
                        logger.debug(feed + ':now:gmtime:' +
                                     str(time.gmtime()))
                        logger.debug(feed + ':now:localtime:' +
                                     str(time.localtime()))
                        logger.debug(feed + ':pubDate:' + str(pubDate))
                        logger.debug(feed + ':pubDate_parsed:' +
                                     str(pubDate_parsed))
                        if debug >= 4:
                            logger.debug(item)
                else:
                    logger.debug(feed + ':item:' + id +
                                 ' seen before, skipping')
        except HTTPNotModified:
            logger.debug(
                feed +
                ':Headers indicate feed unchanged since last time fetched:')
            logger.debug(sys.exc_info())
        except HTTPError:
            logger.warn(feed + ':Unexpected HTTP error:')
            logger.warn(sys.exc_info())
            logger.warn(feed +
                        ':Assuming error is transient and trying again later')
        except sqlite3.Error as sqlerr:
            logger.error(feed + ':sqlite3 error: ')
            logger.error(sys.exc_info())
            logger.error(sqlerr)
            raise
        except discord.errors.Forbidden:
            logger.error(feed + ':discord.errors.Forbidden')
            logger.error(sys.exc_info())
            logger.error(
                feed +
                ":Perhaps bot isn't allowed in one of the channels for this feed?"
            )
            # raise # or not? hmm...
        except:
            logger.error(feed + ':Unexpected error:')
            logger.error(sys.exc_info())
            logger.error(feed + ':giving up')
            raise
        finally:
            # No matter what goes wrong, wait same time and try again
            logger.debug(feed + ':sleeping for ' + str(rss_refresh_time) +
                         ' seconds')
            yield from asyncio.sleep(rss_refresh_time)
예제 #6
0
async def background_check_feed(feed, asyncioloop):

    logger.info(feed + ": Starting up background_check_feed")

    # Try to wait until Discord client has connected, etc:
    await client.wait_until_ready()
    # make sure debug output has this check run in the right order...
    await asyncio.sleep(1)

    user_agent = config["MAIN"].get("user_agent", USER_AGENT)

    # just a bit easier to use...
    FEED = config[feed]

    # pull config for this feed out:
    feed_url = FEED.get("feed_url")
    rss_refresh_time = FEED.getint("rss_refresh_time", 3600)
    start_skew = FEED.getint("start_skew", rss_refresh_time)
    start_skew_min = FEED.getint("start_skew_min", 1)
    max_age = FEED.getint("max_age", 86400)

    # loop through all the channels this feed is configured to send to
    channels = []
    for key in FEED.get("channels").split(","):
        # stick a dict in the channels array so we have more to work with
        channel_id = config["CHANNELS"].getint(key)
        logger.info(feed + ": adding channel " + key + ":" + str(channel_id))

        channel_obj = client.get_channel(channel_id)
        logger.info(pformat(channel_obj))
        if channel_obj is not None:
            channels.append({
                "object": channel_obj,
                "name": key,
                "id": channel_id
            })
            logger.info(feed + ": added channel " + key)
        else:
            logger.warning(feed + ": did not add channel " + key + "/" +
                           str(channel_id))
            logger.warning(pformat(channel_obj))

    if start_skew > 0:
        sleep_time = random.uniform(start_skew_min, start_skew)
        logger.info(feed + ":start_skew:sleeping for " + str(sleep_time))
        await asyncio.sleep(sleep_time)

    # Basically run forever
    while True:

        # And try to catch all the exceptions and just keep going
        # (but see list of except/finally stuff below)
        try:
            # set current "game played" constantly so that it sticks around
            gameplayed = MAIN.get("gameplayed",
                                  "gitlab.com/ffreiheit/discord_feedbot")
            await client.change_presence(activity=discord.Game(name=gameplayed)
                                         )

            logger.info(feed + ": processing feed")

            # If send_typing is on for the feed, send a little "typing ..."
            # whenever a feed is being worked on.  configurable per-room
            if should_send_typing(FEED, feed):
                for channel in channels:
                    # Since this is first attempt to talk to this channel,
                    # be very verbose about failures to talk to channel
                    try:
                        await client.send_typing(channel["object"])
                    except discord.errors.Forbidden:
                        logger.exception(
                            "%s:%s:forbidden - is bot allowed in channel?",
                            feed,
                            channel,
                        )

            http_headers = {"User-Agent": user_agent}

            db_path = config["MAIN"].get("db_path", "feed2discord.db")

            # Debugging crazy issues
            logger.info(feed + ":db_debug:db_path=" + db_path)

            conn = sqlite3.connect(db_path)

            # Download the actual feed, if changed since last fetch

            # Debugging crazy issues
            logger.info(feed + ":db_debug:conn=" + type(conn).__name__)

            # pull data about history of this *feed* from DB:
            cursor = conn.execute(
                "select lastmodified,etag from feed_info where feed=? OR url=?",
                [feed, feed_url],
            )
            data = cursor.fetchone()

            # If we've handled this feed before,
            # and we have etag from last run, add etag to headers.
            # and if we have a last modified time from last run,
            # add "If-Modified-Since" to headers.
            if data is None:  # never handled this feed before...
                logger.info(feed + ":looks like updated version. saving info")
                conn.execute("REPLACE INTO feed_info (feed,url) VALUES (?,?)",
                             [feed, feed_url])
                conn.commit()
                logger.info(feed + ":feed info saved")
            else:
                logger.info(feed +
                            ":setting up extra headers for HTTP request.")
                logger.info(data)
                lastmodified = data[0]
                etag = data[1]
                if lastmodified is not None and len(lastmodified):
                    logger.info(feed + ":adding header If-Modified-Since: " +
                                lastmodified)
                    http_headers["If-Modified-Since"] = lastmodified
                else:
                    logger.info(feed + ":no stored lastmodified")
                if etag is not None and len(etag):
                    logger.info(feed + ":adding header ETag: " + etag)
                    http_headers["ETag"] = etag
                else:
                    logger.info(feed + ":no stored ETag")

            # Set up httpclient
            httpclient = aiohttp.ClientSession()

            logger.info(feed + ":sending http request for " + feed_url)
            # Send actual request.  await can yield control to another
            # instance.
            http_response = await httpclient.get(feed_url,
                                                 headers=http_headers)

            logger.info(http_response)

            # First check that we didn't get a "None" response, since that's
            # some sort of internal error thing:
            if http_response.status is None:
                logger.error(feed + ":HTTP response code is NONE")
                raise HTTPError()
            # Some feeds are smart enough to use that if-modified-since or
            # etag info, which gives us a 304 status.  If that happens,
            # assume no new items, fall through rest of this and try again
            # later.
            elif http_response.status == 304:
                logger.info(feed + ":data is old; moving on")
                http_response.close()
                raise HTTPNotModified()
            # If we get anything but a 200, that's a problem and we don't
            # have good data, so give up and try later.
            # Mostly handled different than 304/not-modified to make logging
            # clearer.
            elif http_response.status != 200:
                logger.info(feed + ":HTTP error not 200")
                http_response.close()
                raise HTTPNotModified()
            else:
                logger.info(feed + ":HTTP success")

            # pull data out of the http response
            logger.info(feed + ":reading http response")
            http_data = await http_response.read()

            await httpclient.close()

            # parse the data from the http response with feedparser
            logger.info(feed + ":parsing http data")
            feed_data = feedparser.parse(http_data)
            logger.info(feed + ":done fetching")

            # If we got an ETAG back in headers, store that, so we can
            # include on next fetch
            if "ETAG" in http_response.headers:
                etag = http_response.headers["ETAG"]
                logger.info(feed + ":saving etag: " + etag)
                conn.execute(
                    "UPDATE feed_info SET etag=? where feed=? or url=?",
                    [etag, feed, feed_url],
                )
                conn.commit()
                logger.info(feed + ":etag saved")
            else:
                logger.info(feed + ":no etag")

            # If we got a Last-Modified header back, store that, so we can
            # include on next fetch
            if "LAST-MODIFIED" in http_response.headers:
                modified = http_response.headers["LAST-MODIFIED"]
                logger.info(feed + ":saving lastmodified: " + modified)
                conn.execute(
                    "UPDATE feed_info SET lastmodified=? where feed=? or url=?",
                    [modified, feed, feed_url],
                )
                conn.commit()
                logger.info(feed + ":saved lastmodified")
            else:
                logger.info(feed + ":no last modified date")

            http_response.close()

            # Process all of the entries in the feed
            # Use reversed to start with end, which is usually oldest
            logger.info(feed + ":processing entries")
            for item in reversed(feed_data.entries):

                # Pull out the unique id, or just give up on this item.
                id = ""
                if "id" in item:
                    id = item.id
                elif "guid" in item:
                    id = item.guid
                elif "link" in item:
                    id = item.link
                else:
                    logger.error(feed + ":item:no id, skipping")
                    continue

                # Get our best date out, in both raw and parsed form
                pubdate = extract_best_item_date(item, TIMEZONE)
                pubdate_fmt = pubdate.strftime("%a %b %d %H:%M:%S %Z %Y")

                logger.info(
                    "%s:item:processing this entry:%s:%s:%s",
                    feed,
                    id,
                    pubdate_fmt,
                    item.title,
                )

                logger.info(feed + ":item:id:" + id)
                logger.info(feed +
                            ":item:checking database history for this item")
                # Check DB for this item
                cursor = conn.execute(
                    "SELECT published,title,url,reposted FROM feed_items WHERE id=?",
                    [id],
                )
                data = cursor.fetchone()

                # If we've never seen it before, then actually processing
                # this:
                if data is None:
                    logger.info(feed + ":item " + id + " unseen, processing:")

                    # Store info about this item, so next time we skip it:
                    conn.execute(
                        "INSERT INTO feed_items (id,published) VALUES (?,?)",
                        [id, pubdate_fmt],
                    )
                    conn.commit()

                    # Doing some crazy date math stuff...
                    # max_age is mostly so that first run doesn't spew too
                    # much stuff into a room, but is also a useful safety
                    # measure in case a feed suddenly reverts to something
                    # ancient or other weird problems...
                    time_since_published = TIMEZONE.localize(
                        datetime.now()) - pubdate.astimezone(TIMEZONE)

                    logger.debug(
                        '%s:time_since_published.total_seconds:%s,max_age:%s',
                        feed, time_since_published.total_seconds(), max_age)

                    if time_since_published.total_seconds() < max_age:
                        logger.info(feed + ":item:fresh and ready for parsing")

                        # Loop over all channels for this particular feed
                        # and process appropriately:
                        for channel in channels:
                            include = True
                            filter_field = FEED.get(
                                channel["name"] + ".filter_field",
                                FEED.get("filter_field", "title"),
                            )
                            # Regex if channel exists
                            if (channel["name"] +
                                    ".filter") in FEED or "filter" in FEED:
                                logger.info(feed + ":item:running filter for" +
                                            channel["name"])
                                regexpat = FEED.get(
                                    channel["name"] + ".filter",
                                    FEED.get("filter", "^.*$"),
                                )
                                logger.info(feed + ":item:using filter:" +
                                            regexpat + " on " + item["title"] +
                                            " field " + filter_field)
                                regexmatch = re.search(
                                    regexpat,
                                    process_field(filter_field, item, FEED,
                                                  channel),
                                )
                                if regexmatch is None:
                                    include = False
                                    logger.info(feed +
                                                ":item:failed filter for " +
                                                channel["name"])
                            elif (channel["name"] + ".filter_exclude"
                                  ) in FEED or "filter_exclude" in FEED:
                                logger.info(
                                    feed + ":item:running exclude filter for" +
                                    channel["name"])
                                regexpat = FEED.get(
                                    channel["name"] + ".filter_exclude",
                                    FEED.get("filter_exclude", "^.*$"),
                                )
                                logger.info(feed +
                                            ":item:using filter_exclude:" +
                                            regexpat + " on " + item["title"] +
                                            " field " + filter_field)
                                regexmatch = re.search(
                                    regexpat,
                                    process_field(filter_field, item, FEED,
                                                  channel),
                                )
                                if regexmatch is None:
                                    include = True
                                    logger.info(
                                        feed +
                                        ":item:passed exclude filter for " +
                                        channel["name"])
                                else:
                                    include = False
                                    logger.info(
                                        feed +
                                        ":item:failed exclude filter for " +
                                        channel["name"])
                            else:
                                include = True  # redundant safety net
                                logger.info(feed +
                                            ":item:no filter configured for" +
                                            channel["name"])

                            if include is True:
                                logger.info(feed +
                                            ":item:building message for " +
                                            channel["name"])
                                message = build_message(FEED, item, channel)
                                logger.info(
                                    feed +
                                    ":item:sending message (eventually) to " +
                                    channel["name"])
                                await send_message_wrapper(
                                    asyncioloop, FEED, feed, channel, client,
                                    message)
                            else:
                                logger.info(
                                    feed +
                                    ":item:skipping item due to not passing filter for "
                                    + channel["name"])

                    else:
                        # Logs of debugging info for date handling stuff...
                        logger.info("%s:too old, skipping", feed)
                        logger.debug("%s:now:now:%s", feed, time.time())
                        logger.debug("%s:now:gmtime:%s", feed, time.gmtime())
                        logger.debug("%s:now:localtime:%s", feed,
                                     time.localtime())
                        logger.debug("%s:pubDate:%r", feed, pubdate)
                        logger.debug(item)
                # seen before, move on:
                else:
                    logger.info(feed + ":item:" + id +
                                " seen before, skipping")
        # This is completely expected behavior for a well-behaved feed:
        except HTTPNotModified:
            logger.info(
                feed +
                ":Headers indicate feed unchanged since last time fetched:")
            logger.debug(sys.exc_info())
        # Many feeds have random periodic problems that shouldn't cause
        # permanent death:
        except HTTPError:
            logger.warn(feed + ":Unexpected HTTP error:")
            logger.warn(sys.exc_info())
            logger.warn(feed +
                        ":Assuming error is transient and trying again later")
        # sqlite3 errors are probably really bad and we should just totally
        # give up on life
        except sqlite3.Error as sqlerr:
            logger.error(feed + ":sqlite error: ")
            logger.error(sys.exc_info())
            logger.error(sqlerr)
            raise
        # Ideally we'd remove the specific channel or something...
        # But I guess just throw an error into the log and try again later...
        except discord.errors.Forbidden:
            logger.error(feed + ":discord.errors.Forbidden")
            logger.error(sys.exc_info())
            logger.error(
                feed +
                ":Perhaps bot isn't allowed in one of the channels for this feed?"
            )
            # raise # or not? hmm...
        # unknown error: definitely give up and die and move on
        except Exception:
            logger.exception("Unexpected error - giving up")
            raise
        # No matter what goes wrong, wait same time and try again
        finally:
            logger.info(feed + ":sleeping for " + str(rss_refresh_time) +
                        " seconds")
            await asyncio.sleep(rss_refresh_time)
예제 #7
0
async def background_check_feed(conn, feed, async_loop):
    """
    The main work loop
    One of these is run for each feed.
    It's an asyncio thing. "await" (sleep or I/O) returns to main loop
    and gives other feeds a chance to run.
    """
    logger.info(f'{feed}: Starting up background_check_feed')

    # Try to wait until Discord client has connected, etc:
    await client.wait_until_ready()
    # make sure debug output has this check run in the right order...
    await asyncio.sleep(1)

    user_agent = config["MAIN"].get("user_agent", USER_AGENT)

    # just a bit easier to use...
    _feed = config[feed]

    # pull config for this feed out:
    feed_url = _feed.get('feed_url')
    rss_refresh_time = _feed.getint('rss_refresh_time', 3600)
    start_skew = _feed.getint('start_skew', rss_refresh_time)
    start_skew_min = _feed.getint('start_skew_min', 1)
    max_age = _feed.getint('max_age', 86400)

    # loop through all the channels this feed is configured to send to
    channels = []
    for key in _feed.get('channels').split(','):
        logger.debug(feed + ': adding channel ' + key)
        # stick a dict in the channels array so we have more to work with
        channels.append({
            'channel_obj':
            client.get_channel(int(config['CHANNELS'][key])),
            'name':
            key,
            'id':
            int(config['CHANNELS'][key]),
        })

    if start_skew > 0:
        sleep_time = random.uniform(start_skew_min, start_skew)
        logger.info(f'{feed}:start_skew:sleeping for {str(sleep_time)}')
        await asyncio.sleep(sleep_time)

    # Basically run forever
    while not client.is_closed():
        # And tries to catch all the exceptions and just keep going
        # (but see list of except/finally stuff below)
        try:
            logger.info(f'{feed}: processing feed')
            http_headers = {"User-Agent": user_agent}
            # Download the actual feed, if changed since last fetch

            # pull data about history of this *feed* from DB:
            cursor = conn.cursor()
            cursor.execute(
                "select lastmodified,etag from feed_info where feed=? OR url=?",
                [feed, feed_url])
            data = cursor.fetchone()

            # If we've handled this feed before,
            # and we have etag from last run, add etag to headers.
            # and if we have a last modified time from last run,
            # add "If-Modified-Since" to headers.
            if data is None:  # never handled this feed before...
                logger.info(f"{feed}:looks like updated version. saving info")
                cursor.execute(
                    "REPLACE INTO feed_info (feed,url) VALUES (?,?)",
                    [feed, feed_url])
                conn.commit()
                logger.debug(f"{feed}:feed info saved")
            else:
                logger.debug(
                    f"{feed}:setting up extra headers for HTTP request.")
                logger.debug(data)
                lastmodified = data[0]
                etag = data[1]
                if lastmodified is not None and len(lastmodified):
                    logger.debug(
                        f"{feed}:adding header If-Modified-Since: {lastmodified}"
                    )
                    http_headers['If-Modified-Since'] = lastmodified
                else:
                    logger.debug(f"{feed}:no stored lastmodified")
                if etag is not None and len(etag):
                    logger.debug(f"{feed}:adding header ETag: {etag}")
                    http_headers['ETag'] = etag
                else:
                    logger.debug(f"{feed}:no stored ETag")

            logger.debug(f"{feed}:sending http request for {feed_url}")
            feed_data = None
            # Send actual request.
            async with aiohttp.ClientSession() as sess:
                async with sess.get(feed_url,
                                    headers=http_headers) as http_response:
                    logger.debug(http_response)
                    # First check that we didn't get a "None" response, since that's
                    # some sort of internal error thing:
                    if http_response.status is None:
                        logger.error(f"{feed}:HTTP response code is NONE")
                        http_response.close()
                        # raise not HTTPError because this is giving me NoneType errors
                        raise HTTPForbidden()
                    # Some feeds are smart enough to use that if-modified-since or
                    # etag info, which gives us a 304 status.  If that happens,
                    # assume no new items, fall through rest of this and try again
                    # later.
                    elif http_response.status == 304:
                        logger.debug(f"{feed}:data is old; moving on")
                        http_response.close()
                        raise HTTPNotModified()
                    # If we get anything but a 200, that's a problem and we don't
                    # have good data, so give up and try later.
                    # Mostly handled different than 304/not-modified to make logging
                    # clearer.
                    elif http_response.status != 200:
                        logger.debug(f"{feed}:HTTP error not 200")
                        http_response.close()
                        # raise not HTTPError because this is giving me NoneType errors
                        raise HTTPForbidden()
                    else:
                        logger.debug(f"{feed}:HTTP success")

                    # pull data out of the http response
                    logger.debug(f"{feed}:reading http response")
                    http_data = await http_response.read()

                    # parse the data from the http response with feedparser
                    logger.debug(f"{feed}:parsing http data")
                    feed_data = feedparser.parse(http_data)
                    logger.debug(f"{feed}:done fetching")

                    # If we got an ETAG back in headers, store that, so we can
                    # include on next fetch
                    if 'ETAG' in http_response.headers:
                        etag = http_response.headers['ETAG']
                        logger.debug(f"{feed}:saving etag: {etag}")
                        cursor.execute(
                            "UPDATE feed_info SET etag=? where feed=? or url=?",
                            [etag, feed, feed_url])
                        conn.commit()
                        logger.debug(f"{feed}:etag saved")
                    else:
                        logger.debug(f"{feed}:no etag")

                    # If we got a Last-Modified header back, store that, so we can
                    # include on next fetch
                    if 'LAST-MODIFIED' in http_response.headers:
                        modified = http_response.headers['LAST-MODIFIED']
                        logger.debug(f"{feed}:saving lastmodified: {modified}")
                        cursor.execute(
                            "UPDATE feed_info SET lastmodified=? where feed=? or url=?",
                            [modified, feed, feed_url])
                        conn.commit()
                        logger.debug(f"{feed}:saved lastmodified")
                    else:
                        logger.debug(f"{feed}:no last modified date")

            # Process all of the entries in the feed
            # Use reversed to start with end, which is usually oldest
            logger.debug(f"{feed}:processing entries")
            if feed_data is None:
                logger.error(f"{feed}:no data in feed_data")
                # raise not HTTPError because this is giving me NoneType errors
                raise HTTPForbidden()
            for item in reversed(feed_data.entries):
                logger.debug(f"{feed}:item:processing this entry:{item}")

                # Pull out the unique id, or just give up on this item.
                if 'id' in item:
                    uid = item.id
                elif 'guid' in item:
                    uid = item.guid
                elif 'link' in item:
                    uid = item.link
                else:
                    logger.error(f"{feed}:item:no id, skipping")
                    continue

                # Get our best date out, in both raw and parsed form
                pubdate = extract_best_item_date(item, TIMEZONE)
                pubdate_fmt = pubdate.strftime("%a %b %d %H:%M:%S %Z %Y")

                logger.debug(f"{feed}:item:id:{uid}")
                logger.debug(
                    f"{feed}:item:checking database history for this item")
                # Check DB for this item
                cursor.execute(
                    "SELECT published,title,url,reposted FROM feed_items WHERE id=?",
                    [uid])
                data = cursor.fetchone()

                # If we've never seen it before, then actually processing
                # this:
                if data is None:
                    logger.info(f"{feed}:item {uid} unseen, processing:")

                    # Store info about this item, so next time we skip it:
                    cursor.execute(
                        "INSERT INTO feed_items (id,published) VALUES (?,?)",
                        [uid, pubdate_fmt])
                    conn.commit()

                    # Doing some crazy date math stuff...
                    # max_age is mostly so that first run doesn't spew too
                    # much stuff into a room, but is also a useful safety
                    # measure in case a feed suddenly reverts to something
                    # ancient or other weird problems...
                    time_since_published = TIMEZONE.localize(
                        datetime.now()) - pubdate.astimezone(TIMEZONE)

                    if time_since_published.total_seconds() < max_age:
                        logger.info(f"{feed}:item:fresh and ready for parsing")

                        # Loop over all channels for this particular feed
                        # and process appropriately:
                        for channel in channels:
                            # just a bit easier to use...
                            _name = channel['name']

                            include = True
                            filter_field = _feed.get(
                                f"{_name}.filter",
                                _feed.get('filter_field', 'title'))
                            # Regex if channel exists
                            if f"{_name}.filter" in _feed or 'filter' in _feed:
                                logger.debug(
                                    f"{feed}:item:running filter for {_name}")
                                re_pat = _feed.get(f"{_name}.filter",
                                                   _feed.get('filter', '^.*$'))
                                logger.debug(
                                    f"{feed}:item:using filter: {re_pat} on "
                                    f"{item['title']} field {filter_field}")
                                re_match = re.search(re_pat,
                                                     item[filter_field])
                                if re_match is None:
                                    include = False
                                    logger.info(
                                        f"{feed}:item:failed filter for {_name}"
                                    )
                            elif f"{_name}.filter_exclude" in _feed or 'filter_exclude' in _feed:
                                logger.debug(
                                    f"{feed}:item:running exclude filter for{_name}"
                                )
                                re_pat = _feed.get(
                                    f"{_name}.filter_exclude",
                                    _feed.get('filter_exclude', '^.*$'))
                                logger.debug(
                                    f"{feed}:item:using filter_exclude: {re_pat} on "
                                    f"{item['title']} field {filter_field}")
                                re_match = re.search(re_pat,
                                                     item[filter_field])
                                if re_match is None:
                                    include = True
                                    logger.info(
                                        f"{feed}:item:passed exclude filter for {_name}"
                                    )
                                else:
                                    include = False
                                    logger.info(
                                        f"{feed}:item:failed exclude filter for {_name}"
                                    )
                            else:
                                include = True  # redundant safety net
                                logger.debug(
                                    f"{feed}:item:no filter configured for {_name}"
                                )

                            if include is True:
                                logger.debug(
                                    f"{feed}:item:building message for {_name}"
                                )
                                message = build_message(_feed, item, channel)
                                logger.debug(
                                    f"{feed}:item:sending message (eventually) to {_name}"
                                )
                                await send_message_wrapper(
                                    async_loop, feed, channel, message)
                            else:
                                logger.info(
                                    f"{feed}:item:skipping item due to not passing filter for {_name}"
                                )

                    else:
                        # Logs of debugging info for date handling stuff...
                        logger.info(f"{feed}:too old, skipping")
                        logger.debug(f"{feed}:now:now:{time.time()}")
                        logger.debug(f"{feed}:now:gmtime:{time.gmtime()}")
                        logger.debug(
                            f"{feed}:now:localtime:{time.localtime()}")
                        logger.debug(f"{feed}:pubDate:{pubdate}")
                        logger.debug(item)
                # seen before, move on:
                else:
                    logger.debug(f"{feed}:item: {uid} seen before, skipping")

        # This is completely expected behavior for a well-behaved feed:
        except HTTPNotModified:
            logger.debug(
                f"{datetime.today()}:{feed}: Headers indicate feed unchanged since last time fetched:"
            )
            logger.debug(sys.exc_info())
        # Many feeds have random periodic problems that shouldn't cause
        # permanent death:
        except HTTPForbidden:
            logger.warning(f"{datetime.today()}:{feed}: Unexpected HTTPError:")
            logger.warning(sys.exc_info())
            logger.warning(
                f"{datetime.today()}:{feed}: Assuming error is transient and trying again later"
            )
        # sqlite3 errors are probably really bad and we should just totally
        # give up on life
        except sqlite3.Error as sqlerr:
            logger.error(f"{datetime.today()}:{feed}: sqlite3 error: ")
            logger.error(sys.exc_info())
            logger.error(sqlerr)
            raise
        # Ideally we'd remove the specific channel or something...
        # But I guess just throw an error into the log and try again later...
        except discord.errors.Forbidden:
            logger.error(
                f"{datetime.today()}:{feed}: discord.errors.Forbidden")
            logger.error(sys.exc_info())
            logger.error(
                f"{datetime.today()}:{feed}: Perhaps bot isn't allowed in one of the channels for this feed?"
            )
            # raise  # or not? hmm...
        except asyncio.TimeoutError:
            logger.error(f"{datetime.today()}:{feed}: Timeout error")
        except aiohttp.ClientConnectorError:
            logger.error(f"{datetime.today()}:{feed}: Connection failed!")
        except aiohttp.ClientOSError:
            logger.error(
                f"{datetime.today()}:{feed}: Connection not responding!")
        except aiohttp.ServerDisconnectedError:
            logger.error(f"{datetime.today()}:{feed}: Socket closed by peer")
        # unknown error: definitely give up and die and move on
        except BaseException:
            logger.exception(
                f"{datetime.today()}:{feed}: Unexpected error - giving up")
            # raise  # or not? hmm...
        # No matter what goes wrong, wait same time and try again
        finally:
            logger.debug(
                f"{feed}:sleeping for {str(rss_refresh_time)} seconds")
            await asyncio.sleep(rss_refresh_time)