Example #1
0
def main():
    # Get requests.session
    session = load_session_or_login()

    # Get database session for add and commit sql_item
    db_engine = db_connect()
    create_engine(db_engine)
    Session = sessionmaker(bind=db_engine)
    db_session = Session()

    # Run actual spider code non-blockingly

    # Generate ebooks and push to subscribers' kindle

    return True
Example #2
0
    def __init__(self):

        # Issue links: ('http://magazine.caixin.com/2012/cw533/', ...)
        self.old_issues = set()
        self.new_issues = set()

        # By default it won't go back to 1998
        self.fetch_old_articles = True

        # {date: [link], ...}
        self.articles = dict()

        # final articles to crawl: [link, ...]
        self.articles_to_fetch = set()

        # Latest issue link to generate rss
        self.latest_issue_date = None

        # Event loop, aiohttp Session
        self.loop = asyncio.get_event_loop()
        self.session = load_session_or_login()
Example #3
0
    def __init__(self):

        # Issue links: ('http://magazine.caixin.com/2012/cw533/', ...)
        self.old_issues = set()
        self.new_issues = set()

        # By default it won't go back to 1998
        self.fetch_old_articles = True

        # {date: [link], ...}
        self.articles = dict()

        # final articles to crawl: [link, ...]
        self.articles_to_fetch = set()

        # Latest issue link to generate rss
        self.latest_issue_date = None

        # Event loop, aiohttp Session
        self.loop = asyncio.get_event_loop()
        self.session = load_session_or_login()