def download_articles(parameters): session = models.Session() urls = url.prepare_urls(parameters) i = 1 if 'start-page' in parameters: page = parameters['start-page'] else: page = 1 for u in urls: print "Getting page %d: %s ..." % (page, u) search_page = pages.SearchPage(u) for l in search_page.links: print "%d: Getting %s ..." % (i, l) article_page = pages.ArticlePage(l) db_article = convert.convert_article(article_page) session.add(db_article) i = i + 1 session.commit() print "Saved %d records; page %d finished." % (i, page) page = page + 1 session.commit() print "Saved."
def download_articles(parameters): session = models.Session() u = url.prepare_url(parameters) print "Getting %s ..." % u search_page = pages.SearchPage(u) i = 1 for l in search_page.links: print "%d: Getting %s ..." % (i, l) article_page = pages.BibliographicPage(l) db_article = convert.convert_article(article_page) session.add(db_article) if i % 100 == 0: session.commit() print "Saved %d records." % i i = i + 1 session.commit() print "Saved."