def rss_from_xml_to_database(file, database="database_rss.db3", table="blogs", fLOG=None): """ Parses a list of blogs stored in a :epkg:`XML` file using Google Reader format, stores the results in a :epkg:`SQLite` database. @param file (str) xml file containing the list of blogs, example: @param database database file (sqlite) @param table table name @param fLOG logging function @return number of stored blogs The XML file should contain the following: :: <outline text="XD blog" title="XD blog" type="rss" xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml" htmlUrl="http://www.xavierdupre.fr/blog/xd_blog.html" /> The function does not check whether or not the blogs were already added to the database, they will be added a second time. If the table does not exist, it will be created. """ res = list(StreamRSS.enumerate_stream_from_google_list(file)) db = Database(database, LOG=fLOG) db.connect() StreamRSS.fill_table(db, table, res) db.close() return len(res)
def test_sqllite_sql(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "taxi_trip.csv") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_taxi.db3") if os.path.exists(dbf): os.remove(dbf) try: import_flatfile_into_database(dbf, file, fLOG=fLOG, header=True) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM taxi_trip") fLOG(len(view)) fLOG(view) exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29', 'D961332334524990D1BBD462E2EFB8A4', 'CMT', '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0) assert len(view) > 0 assert len(view[0]) == len(exp)
def test_import_person(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "person.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_person.db3") if os.path.exists(dbf): os.remove(dbf) columns = "sequence tag timestamp dateformat x y z activity".split() try: import_flatfile_into_database(dbf, file, fLOG=fLOG, columns=columns, header=False) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") assert len(view) > 0 assert len(view[0]) == 7
def test_import_person(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "person.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_person.db3") if os.path.exists(dbf): os.remove(dbf) columns = "sequence tag timestamp dateformat x y z activity".split() try: import_flatfile_into_database( dbf, file, fLOG=fLOG, columns=columns, header=False) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") assert len(view) > 0 assert len(view[0]) == 7
def test_custom_server_location(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) dbfile = os.path.join(fold, "out_custom_server2.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) df = pandas.DataFrame([{"name": "xavier", "module": "pyrsslocal"}]) db.connect() db.import_dataframe(df, "example") db.close() server = CustomDBServer(('localhost', 8099), dbfile, CustomDBServerHandler, root=os.path.join(fold, "data")) thread = CustomDBServer.run_server(server, dbfile=dbfile, thread=True, extra_path=os.path.join(fold, "data")) url = "http://localhost:8099/index.html" cont = get_url_content_timeout(url) assert len(cont) > 0 assert "unittest" in cont thread.shutdown() assert not thread.is_alive() assert os.path.exists(dbfile)
def test_sqllite_sql(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "taxi_trip.csv") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_taxi.db3") if os.path.exists(dbf): os.remove(dbf) try: import_flatfile_into_database( dbf, file, fLOG=fLOG, header=True) except NoHeaderException: return assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM taxi_trip") fLOG(len(view)) fLOG(view) exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29', 'D961332334524990D1BBD462E2EFB8A4', 'CMT', '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0) assert len(view) > 0 assert len(view[0]) == len(exp)
def test_rss_from_google(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "subscriptions.xml") assert os.path.exists(file) res = list(StreamRSS.enumerate_stream_from_google_list(file)) if len(res) != TestRSS.nb_rss_blog: dic = {} for i, r in enumerate(sorted(res)): dic[str(r)] = dic.get(str(r), 0) + 1 fLOG(i, r) for k in dic: if dic[k] > 1: fLOG("--double", k) raise Exception( "number of expected feed %d != %d" % (len(res), TestRSS.nb_rss_blog)) li = sorted(res) for i, r in enumerate(li): if i > 0 and li[i] < li[i - 1]: raise Exception("bad order {0} < {1}".format(li[i - 1], li[i])) fLOG("nb:", len(res)) dbfile = os.path.join(path, "temp_rss.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() StreamRSS.fill_table(db, "blogs", res) db.close() db = Database(dbfile, LOG=fLOG) db.connect() assert db.has_table("blogs") assert db.get_table_nb_lines("blogs") == TestRSS.nb_rss_blog sql = "SELECT * FROM blogs" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == TestRSS.nb_rss_blog key, value = val.popitem() assert key is not None # iterator on StreamRSS obj = list(db.enumerate_objects("blogs", StreamRSS)) assert len(obj) == TestRSS.nb_rss_blog db.close()
def test_import_index(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date',))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()
def test_import_index(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date', ))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} # pylint: disable=R1721 db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()
def test_import_flatflit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database.db3") if os.path.exists(dbf): os.remove(dbf) import_flatfile_into_database(dbf, file, fLOG=fLOG) self.assertExists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") self.assertGreater(len(view), 0) self.assertEqual(len(view[0]), 7)
def rss_download_post_to_database(database="database_rss.db3", table_blog="blogs", table_post="posts", fLOG=fLOG): """ download all post from a list of blogs stored in a database by function @see fn rss_from_xml_to_database @param database database file name (SQLite format) @param table_blog table name of the blogs @param table_post table name of the post @param fLOG logging function @return number of posts downloaded """ db = Database(database, LOG=fLOG) db.connect() rss_list = list(db.enumerate_objects(table_blog, StreamRSS)) list_post = list(StreamRSS.enumerate_post_from_rsslist(rss_list, fLOG=fLOG)) BlogPost.fill_table(db, table_post, list_post, skip_exception=True) db.close() return len(list_post)
def test_import_flatflit(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database.db3") if os.path.exists(dbf): os.remove(dbf) import_flatfile_into_database(dbf, file, fLOG=fLOG) self.assertExists(dbf) db = Database(dbf, LOG=fLOG) db.connect() view = db.execute_view("SELECT * FROM ACAPA") self.assertGreater(len(view), 0) self.assertEqual(len(view[0]), 7)
def test_rss_from_google_arxiv(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "subscriptions_arxiv.xml") assert os.path.exists(file) res = list(StreamRSS.enumerate_stream_from_google_list(file)) if len(res) != 1: for r in res: print(r) raise Exception("number of expected feed %d != 1" % (len(res))) fLOG("nb:", len(res)) dbfile = os.path.join(path, "temp_rss_arxiv.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() StreamRSS.fill_table(db, "blogs", res) db.close()
def test_import_flatflitand_copy(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_copy.db3") if os.path.exists(dbf): os.remove(dbf) dbf2 = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "out_copy.db3") if os.path.exists(dbf2): os.remove(dbf2) import_flatfile_into_database(dbf, file, fLOG=fLOG) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) dbm = Database(dbf2, LOG=fLOG) db.copy_to(dbm) db.connect() dbm.connect() tbls = dbm.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) view = db.execute_view("SELECT * FROM ACAPA") viewm = dbm.execute_view("SELECT * FROM ACAPA") db.close() dbm.close() assert len(view) == len(viewm) dbm2 = Database(":memory:", LOG=fLOG) db.copy_to(dbm2) dbm2.connect() tbls = dbm2.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) viewm2 = dbm2.execute_view("SELECT * FROM ACAPA") dbm2.close() assert len(view) == len(viewm2)
def test_import_flatflitand_copy(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database_copy.db3") if os.path.exists(dbf): os.remove(dbf) dbf2 = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "out_copy.db3") if os.path.exists(dbf2): os.remove(dbf2) import_flatfile_into_database(dbf, file, fLOG=fLOG) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) dbm = Database(dbf2, LOG=fLOG) db.copy_to(dbm) db.connect() dbm.connect() tbls = dbm.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) view = db.execute_view("SELECT * FROM ACAPA") viewm = dbm.execute_view("SELECT * FROM ACAPA") db.close() dbm.close() assert len(view) == len(viewm) dbm2 = Database(":memory:", LOG=fLOG) db.copy_to(dbm2) dbm2.connect() tbls = dbm2.get_table_list() if len(tbls) != 1: raise Exception("expects one table not %d" % len(tbls)) viewm2 = dbm2.execute_view("SELECT * FROM ACAPA") dbm2.close() assert len(view) == len(viewm2)
def test_rss_parse(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") path = os.path.abspath(os.path.split(__file__)[0]) file = os.path.join(path, "data", "xdbrss.xml") assert os.path.exists(file) rss = StreamRSS(titleb="XD", type="rss", xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml", htmlUrl="http://www.xavierdupre.fr/blog/xd_blog_nojs.html", keywordsb=["python"], id=5) res = rss.enumerate_post(fLOG=fLOG) nb = 0 for _ in res: nb += 1 assert len(_.title) > 0 assert nb > 0 res = rss.enumerate_post(file, fLOG=fLOG) nb = 0 lres = list(res) nb = 0 for _ in lres: nb += 1 assert len(_.title) > 0 assert nb > 0 fLOG("nb post=", nb) dbfile = os.path.join(path, "temp_rssp.db3") if os.path.exists(dbfile): os.remove(dbfile) db = Database(dbfile, LOG=fLOG) db.connect() BlogPost.fill_table(db, "posts", lres) db.close() db = Database(dbfile, LOG=fLOG) db.connect() assert db.has_table("posts") assert db.get_table_nb_lines("posts") == nb sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 key, value = val.popitem() assert key is not None # we insert the blog a second time BlogPost.fill_table(db, "posts", lres) sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 # we insert the blog a third time BlogPost.fill_table(db, "posts", lres) sql = "SELECT * FROM posts" cur = db.execute(sql) val = {} for row in cur: val[row[-1]] = 0 assert len(val) == 6 db.close()
class CustomDBServer (ThreadingMixIn, HTTPServer): """ defines a custom server which includes an access to a database, this database will contain de table to store the clicks .. exref:: :title: create a custom local server The following code creates an instance of a local server. The server expects to find its content in the same folder. :: from pyensae import Database db = Database(dbfile) df = pandas.DataFrame ( [ {"name":"xavier", "module":"pyrsslocal"} ] ) db.connect() db.import_dataframe(df, "example") db.close() url = "http://localhost:%d/p_aserver.html" % port webbrowser.open(url) CustomDBServer.run_server(None, dbfile, port = port, extra_path = os.path.join(".")) The main page is the following one and it can contains a Python script which will be interpreter by the server. It gives access to a variable ``db`` which is a local database in SQLlite. :: <?xml version="1.0" encoding="utf-8"?> <html> <head> <link type="text/css" href="/p_aserver.css" rel="stylesheet"/> <title>Custom DB Server</title> <meta content="dupre, pyrsslocal, custom server" name="keywords"/> <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/> <link rel="shortcut icon" href="p_aserver.ico" /> <meta content="CustomServer from pyrsslocal" name="description" /> <script type="text/javascript" src="/p_aserver.js"></script> <script src="/js/run_prettify.js" type="text/javascript"></script> </head> <body onload="setPositions(['divtable', ])" class="mymainbody"> <div class="divtop"> <h1>Custom DB Server unittest</h1> </div> <div class="divtable" id="divfiles" onscroll="savePosition('divtable')"> <h2>Content of table example</h2> <script type="text/python"> print("<table>") db.connect() for row in db.execute_view("SELECT * FROM example") : srow = [ str(_) for _ in row ] print( "<tr><td>{0}</td></tr>".format("</td><td>".join(srow) ) ) db.close() print("</table>") </script> <p>end.</p> </div> </body> </html> """ @staticmethod def schema_table(table): """ returns the schema for a specific table @param table name (in ["stats", "event"]) @return dictionary """ if table == "stats": return {0: ("id_post", int), 1: ("dtime", datetime.datetime), 2: ("status", str), 3: ("rate", int), 4: ("comment", str), } elif table == "event": return {-1: ("id_event", int, "PRIMARYKEY", "AUTOINCREMENT"), 0: ("dtime", datetime.datetime), 1: ("uuid", str), 2: ("type1", str), 3: ("type2", str), 4: ("args", str), } else: raise Exception("unexpected table name") def __init__(self, server_address, dbfile, RequestHandlerClass=CustomDBServerHandler, main_page="index.html", root=None, logfile=None ): """ constructor @param server_address addess of the server @param RequestHandlerClass it should be @see cl CustomServerHandler @param dbfile database filename (SQLlite format) @param main_page main page for the service (when requested with no specific file) @param root folder or list of folders where the server will look into for files such as the main page """ HTTPServer.__init__(self, server_address, RequestHandlerClass) self._my_database = Database(dbfile, LOG=fLOG) self._my_database_ev = Database(dbfile, LOG=fLOG) this = os.path.abspath(os.path.split(__file__)[0]) if root is None: root = [this] elif isinstance(root, str): root = [root, this] elif isinstance(root, list): root = root + [this] else: raise TypeError("unable to interpret root: " + str(root)) self._my_root = root self._my_main_page = main_page self._my_address = server_address fLOG("CustomServer.init: root=", root) fLOG("CustomServer.init: db=", dbfile) self.table_event = "cs_events" self.table_stats = "cs_stats" self.logfile = logfile if self.logfile is not None: if self.logfile == "stdout": self.flog = sys.stdout elif isinstance(self.logfile, str): self.flog = open(self.logfile, "a", encoding="utf8") else: self.flog = self.logfile else: self.flog = None self._my_database_ev.connect() if not self._my_database_ev.has_table(self.table_stats): schema = CustomDBServer.schema_table("stats") self._my_database_ev.create_table(self.table_stats, schema) self._my_database_ev.commit() self._my_database_ev.create_index( "id_post_" + self.table_stats, self.table_stats, "id_post", False) self._my_database_ev.commit() if not self._my_database_ev.has_table(self.table_event): schema = CustomDBServer.schema_table("event") self._my_database_ev.create_table(self.table_event, schema) self._my_database_ev.commit() self._my_database_ev.close() def __enter__(self): """ What to do when creating the class. """ return self def __exit__(self, exc_type, exc_value, traceback): # pylint: disable=W0221 """ What to do when removing the instance (close the log file). """ if self.flog is not None and self.logfile != "stdout": self.flog.close() def process_event(self, event): """ Processes an event, it expects a format like the following: :: type1/uuid/type2/args @param event string to log """ now = datetime.datetime.now() if self.flog is not None: self.flog.write(str(now) + " " + event) self.flog.write("\n") self.flog.flush() info = event.split("/") status = None if len(info) >= 4 and info[2] == "status": status = {"status": info[4], "id_post": int(info[3]), "dtime": now, "rate": -1, "comment": ""} if len(info) > 4: info[3:] = ["/".join(info[3:])] if len(info) < 4: raise OSError("unable to log event: " + event) values = {"type1": info[0], "uuid": info[1], "type2": info[2], "dtime": now, "args": info[3]} # to avoid database to collide iscon = self._my_database_ev.is_connected() if iscon: if self.flog is not None: self.flog.write("unable to connect the database") if status is not None: self.flog.write("unable to update status: " + str(status)) return self._my_database_ev.connect() self._my_database_ev.insert(self.table_event, values) if status is not None: self._my_database_ev.insert(self.table_stats, status) self._my_database_ev.commit() self._my_database_ev.close() @staticmethod def run_server(server, dbfile, thread=False, port=8080, logfile=None, extra_path=None): """ start the server @param server if None, it becomes ``CustomServer(dbfile, ('localhost', 8080), CustomServerHandler)`` @param dbfile file to the RSS database (SQLite) @param thread if True, the server is run in a thread and the function returns right away, otherwite, it runs the server. @param port port to use @param logfile file for the log or "stdout" for the standard output @param extra_path additional path the server should look into to find a page @return server if thread is False, the thread otherwise (the thread is started) @warning If you kill the python program while the thread is still running, python interpreter might be closed completely. """ if server is None: server = CustomDBServer( ('localhost', port), dbfile, CustomDBServerHandler, logfile=logfile, root=extra_path) if thread: th = ThreadServer(server) th.start() return th else: server.serve_forever() return server
assert not thread.is_alive() assert os.path.exists(dbfile) if __name__ == "__main__": enabled = False if enabled: import webbrowser port = 8098 fold = os.path.abspath(os.path.split(__file__)[0]) dbfile = os.path.join(fold, "out_custom_server.db3") db = Database(dbfile) df = pandas.DataFrame([{"name": "xavier", "module": "pyrsslocal"}]) db.connect() db.import_dataframe(df, "example") db.close() url = "http://localhost:%d/p_aserver.html" % port fLOG("opening ", url) webbrowser.open(url) CustomDBServer.run_server( None, dbfile, port=port, extra_path=os.path.join( fold, "data")) unittest.main()
import os sys.path.append(r"D:\Dupre\_data\program\python\pyensae\src") import pyensae from time import strftime, strptime import datetime from pyensae.sql.database_main import Database tbl = "stations.txt" if not os.path.exists(tbl): sql = """SELECT DISTINCT address, contract_name,lat,lng,name,number FROM snap ORDER BY name""" db = Database(__file__.replace(".py", ".db3")) db.connect() db.export_view_into_flat_file(sql, tbl, header=True, encoding="utf8") db.close() tbl = "tb8_velib.txt" if True or not os.path.exists(tbl): dt = datetime.datetime.strptime("2013-09-13 11:26:37.738913", "%Y-%m-%d %H:%M:%S.%f") print(dt, type(dt), dt.hour) sql = """SELECT collect_date, --strftime('%Y-%m-%d %H:%M:%S', last_update) AS last_update, available_bike_stands, available_bikes, number , hour(last_update) AS heure
def test_import_flatflit(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp): os.mkdir(temp) text = [ "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] data = [{"name": text[random.randint(0, 9)], "number": random.randint(0, 99)} for i in range(0, 10000)] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:] df = pandas.DataFrame(datatab) df.to_csv( filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename, "r", encoding="utf8") as f: content = f.read() content = content.replace('"', '') with open(filename + ".2.txt", "w", encoding="utf8") as f: f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile): os.remove(dbfile) import_flatfile_into_database( dbfile, filename + ".2.txt", table="example", fLOG=fLOG) db = Database(dbfile, LOG=fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001: rows = [str(v) for v in values][:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception( "expect:10001 not {0}\nROWS:\n{1}".format( count, mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))