def test_rss_from_google(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "subscriptions.xml") assert os.path.exists(file) res = list(StreamRSS.enumerate_stream_from_google_list(file)) if len(res) != TestRSS.nb_rss_blog: dic = {} for i, r in enumerate(sorted(res)): dic[str(r)] = dic.get(str(r), 0) + 1 fLOG(i, r) for k in dic: if dic[k] > 1: fLOG("--double", k) raise Exception( "number of expected feed %d != %d" % (len(res), TestRSS.nb_rss_blog)) li = sorted(res) for i, r in enumerate(li): if i > 0 and li[i] < li[i - 1]: raise Exception("bad order {0} < {1}".format(li[i - 1], li[i])) fLOG("nb:", len(res)) dbfile = os.path.join(path, "temp_rss.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() StreamRSS.fill_table(db, "blogs", res) db.close() db = Database(dbfile, LOG=fLOG) db.connect() assert db.has_table("blogs") assert db.get_table_nb_lines("blogs") == TestRSS.nb_rss_blog sql = "SELECT * FROM blogs" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == TestRSS.nb_rss_blog key, value = val.popitem() assert key is not None # iterator on StreamRSS obj = list(db.enumerate_objects("blogs", StreamRSS)) assert len(obj) == TestRSS.nb_rss_blog db.close()
def rss_download_post_to_database(database="database_rss.db3", table_blog="blogs", table_post="posts", fLOG=fLOG): """ download all post from a list of blogs stored in a database by function @see fn rss_from_xml_to_database @param database database file name (SQLite format) @param table_blog table name of the blogs @param table_post table name of the post @param fLOG logging function @return number of posts downloaded """ db = Database(database, LOG=fLOG) db.connect() rss_list = list(db.enumerate_objects(table_blog, StreamRSS)) list_post = list(StreamRSS.enumerate_post_from_rsslist(rss_list, fLOG=fLOG)) BlogPost.fill_table(db, table_post, list_post, skip_exception=True) db.close() return len(list_post)