Exemple #1
0
    def test_sqllite_sql(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        file = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data", "taxi_trip.csv")
        dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                           "temp_taxi.db3")
        if os.path.exists(dbf):
            os.remove(dbf)
        try:
            import_flatfile_into_database(dbf, file, fLOG=fLOG, header=True)
        except NoHeaderException:
            return

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()
        view = db.execute_view("SELECT * FROM taxi_trip")
        fLOG(len(view))
        fLOG(view)
        exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29',
               'D961332334524990D1BBD462E2EFB8A4', 'CMT',
               '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0)
        assert len(view) > 0
        assert len(view[0]) == len(exp)
    def test_sqllite_sql(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        file = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data",
            "taxi_trip.csv")
        dbf = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "temp_taxi.db3")
        if os.path.exists(dbf):
            os.remove(dbf)
        try:
            import_flatfile_into_database(
                dbf,
                file,
                fLOG=fLOG,
                header=True)
        except NoHeaderException:
            return

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()
        view = db.execute_view("SELECT * FROM taxi_trip")
        fLOG(len(view))
        fLOG(view)
        exp = ('1B5C0970F2AE8CFFBA8AE4584BEAED29', 'D961332334524990D1BBD462E2EFB8A4',
               'CMT', '2013-02-08 23:35:14', 'CRD', 6.0, 0.5, 0.5, 0.0, 0, 7.0)
        assert len(view) > 0
        assert len(view[0]) == len(exp)
Exemple #3
0
    def __init__(self, dbfile,
                 table_blogs="blogs",
                 table_posts="posts",
                 table_stats="posts_stat",
                 table_event="events",
                 LOG=fLOG):
        """
        @param    dbfile        file database
        @param    table_blogs   table name for the blogs
        @param    table_posts   table name for the posts
        @param    table_stats   table name for the posts stats
        @param    table_event   table name for the events
        @param    LOG           logging function
        """
        if not os.path.exists(dbfile):
            raise FileNotFoundError(dbfile)
        Database.__init__(self, dbfile, LOG=LOG)
        self.dbfile = dbfile
        self.table_blogs = table_blogs
        self.table_posts = table_posts
        self.table_stats = table_stats
        self.table_event = table_event
        self.connect()
        for tbl in [table_blogs, table_posts]:
            if not self.has_table(tbl):
                raise Exception("table %s not found in %s" % (tbl, dbfile))

        self.create_missing_table()
        self.close()
Exemple #4
0
    def test_import_person(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        file = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data", "person.txt")
        dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                           "temp_person.db3")
        if os.path.exists(dbf):
            os.remove(dbf)
        columns = "sequence tag timestamp dateformat x y z activity".split()
        try:
            import_flatfile_into_database(dbf,
                                          file,
                                          fLOG=fLOG,
                                          columns=columns,
                                          header=False)
        except NoHeaderException:
            return

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()
        view = db.execute_view("SELECT * FROM ACAPA")
        assert len(view) > 0
        assert len(view[0]) == 7
Exemple #5
0
    def test_makedirs_badengine(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs2")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [
            {
                "name": "A",
                "age": 10,
                "score": 34.5
            },
            {
                "name": "B",
                "age": 20,
                "score": -34.5
            },
        ]
        df = pandas.DataFrame(values)
        try:
            Database.fill_sql_table(df, dbf, "newtable", engine='rty')
            raise AssertionError('engine is recognized but should not')
        except DBException:
            pass
    def test_import_person(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        file = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data",
            "person.txt")
        dbf = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "temp_person.db3")
        if os.path.exists(dbf):
            os.remove(dbf)
        columns = "sequence tag timestamp dateformat x y z activity".split()
        try:
            import_flatfile_into_database(
                dbf,
                file,
                fLOG=fLOG,
                columns=columns,
                header=False)
        except NoHeaderException:
            return

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()
        view = db.execute_view("SELECT * FROM ACAPA")
        assert len(view) > 0
        assert len(view[0]) == 7
Exemple #7
0
def rss_from_xml_to_database(file, database="database_rss.db3",
                             table="blogs", fLOG=None):
    """
    Parses a list of blogs stored in a :epkg:`XML`
    file using Google Reader format,
    stores the results in a :epkg:`SQLite` database.

    @param  file            (str) xml file containing the list of blogs, example:
    @param  database        database file (sqlite)
    @param  table           table name
    @param  fLOG            logging function
    @return                 number of stored blogs

    The XML file should contain the following:

    ::

        <outline text="XD blog"
                title="XD blog" type="rss"
                xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml"
                htmlUrl="http://www.xavierdupre.fr/blog/xd_blog.html" />

    The function does not check whether or not the blogs were
    already added to the database,
    they will be added a second time. If the table
    does not exist, it will be created.
    """
    res = list(StreamRSS.enumerate_stream_from_google_list(file))
    db = Database(database, LOG=fLOG)
    db.connect()
    StreamRSS.fill_table(db, table, res)
    db.close()
    return len(res)
    def test_makedirs_badengine(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs2")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [{"name": "A", "age": 10, "score": 34.5},
                  {"name": "B", "age": 20, "score": -34.5}, ]
        df = pandas.DataFrame(values)
        try:
            Database.fill_sql_table(df, dbf, "newtable", engine='rty')
            raise AssertionError('engine is recognized but should not')
        except DBException:
            pass
Exemple #9
0
    def test_import_df(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        dbf = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "temp_database_df.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        values = [{"name": "A", "age": 10, "score": 34.5},
                  {"name": "B", "age": 20, "score": -34.5}, ]
        df = pandas.DataFrame(values)
        db = Database.fill_sql_table(df, dbf, "newtable")
        db.execute_view("SELECT * FROM newtable")
        df2 = db.to_df("SELECT * FROM newtable")
        df3 = df2[["age", "name", "score"]]
        self.assertGreater(len(df), 0)
        self.assertEqual(len(df3), len(df))
        for a, b in zip(df.values, df3.values):
            self.assertGreater(len(a), 0)
            self.assertEqual(len(a), len(b))
            for c, d in zip(a, b):
                self.assertEqual(c, d)
        db.close()
Exemple #10
0
    def test_import_df(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                           "temp_database_df.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        values = [
            {
                "name": "A",
                "age": 10,
                "score": 34.5
            },
            {
                "name": "B",
                "age": 20,
                "score": -34.5
            },
        ]
        df = pandas.DataFrame(values)
        df = df[['age', 'name', 'score']]
        db = Database.fill_sql_table(df, dbf, "newtable")
        db.execute_view("SELECT * FROM newtable")
        df2 = db.to_df("SELECT * FROM newtable")
        df3 = df2[["age", "name", "score"]]
        self.assertGreater(len(df), 0)
        self.assertEqual(len(df3), len(df))
        for a, b in zip(df.values, df3.values):
            self.assertGreater(len(a), 0)
            self.assertEqual(len(a), len(b))
            for c, d in zip(a, b):
                self.assertEqual(c, d)
        db.close()
Exemple #11
0
 def test_import_flatflit(self):
     fLOG(__file__,
          self._testMethodName,
          OutputPrint=__name__ == "__main__")
     file = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                         "data", "ACA.PA.txt")
     dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                        "temp_database.db3")
     if os.path.exists(dbf):
         os.remove(dbf)
     import_flatfile_into_database(dbf, file, fLOG=fLOG)
     self.assertExists(dbf)
     db = Database(dbf, LOG=fLOG)
     db.connect()
     view = db.execute_view("SELECT * FROM ACAPA")
     self.assertGreater(len(view), 0)
     self.assertEqual(len(view[0]), 7)
    def test_custom_server_location(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        fold = os.path.abspath(os.path.split(__file__)[0])
        dbfile = os.path.join(fold, "out_custom_server2.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        df = pandas.DataFrame([{"name": "xavier", "module": "pyrsslocal"}])
        db.connect()
        db.import_dataframe(df, "example")
        db.close()

        server = CustomDBServer(('localhost', 8099), dbfile, CustomDBServerHandler,
                                root=os.path.join(fold, "data"))
        thread = CustomDBServer.run_server(server, dbfile=dbfile, thread=True,
                                           extra_path=os.path.join(fold, "data"))

        url = "http://localhost:8099/index.html"
        cont = get_url_content_timeout(url)
        assert len(cont) > 0
        assert "unittest" in cont

        thread.shutdown()
        assert not thread.is_alive()
        assert os.path.exists(dbfile)
Exemple #13
0
    def test_db_index(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [
            {
                "name": "A",
                "age": 10,
                "score": 34.5
            },
            {
                "name": "B",
                "age": 20,
                "score": -34.5
            },
        ]
        df = pandas.DataFrame(values)
        df = df[['age', 'name', 'score']]
        db = Database.fill_sql_table(df, dbf, "newtable")
        has = db.has_index('ind')
        self.assertFalse(has)
        resi = db.get_index_on_table('newtable', full=True)
        self.assertEqual(resi, [])
        resi = db.get_index_on_table('newtable', full=False)
        self.assertEqual(resi, [])
        resi = db.get_column_type('newtable', 'name')
        self.assertEqual(resi, str)
        self.assertRaise(lambda: db.get_column_type('newtable', 'name2'),
                         DBException)
        resi = db.get_table_nfirst_lines('newtable', 2)
        self.assertEqual(resi, [(10, 'A', 34.5, 1), (20, 'B', -34.5, 2)])
        cur = db.execute_script('SELECT name FROM newtable')
        self.assertEqual(cur, None)
        cur = db.execute_script('SELECT name FROM newtable', close=False)
        self.assertFalse(cur is None)

        db.create_table("nextable2",
                        columns={
                            -1: ("key", int, "PRIMARYKEY", "AUTOINCREMENT"),
                            0: ("name", str),
                            1: ("number", float),
                            2: ('tint', int),
                            3: ('tint64', numpy.int64),
                            4: ('tfloat64', numpy.float64),
                            5: ('tdt', datetime.datetime),
                            6: ('tdec', decimal.Decimal)
                        })
        cur.close()
        db.close()
Exemple #14
0
    def test_rss_from_google_arxiv(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "subscriptions_arxiv.xml")
        assert os.path.exists(file)
        res = list(StreamRSS.enumerate_stream_from_google_list(file))
        if len(res) != 1:
            for r in res:
                print(r)
            raise Exception("number of expected feed %d != 1" % (len(res)))
        fLOG("nb:", len(res))

        dbfile = os.path.join(path, "temp_rss_arxiv.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        StreamRSS.fill_table(db, "blogs", res)
        db.close()
    def test_makedirs(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [{"name": "A", "age": 10, "score": 34.5},
                  {"name": "B", "age": 20, "score": -34.5}, ]
        df = pandas.DataFrame(values)
        db = Database.fill_sql_table(df, dbf, "newtable")
        db.execute_view("SELECT * FROM newtable")
        db.close()
Exemple #16
0
 def test_import_flatflit(self):
     fLOG(
         __file__,
         self._testMethodName,
         OutputPrint=__name__ == "__main__")
     file = os.path.join(
         os.path.abspath(
             os.path.split(__file__)[0]),
         "data",
         "ACA.PA.txt")
     dbf = os.path.join(
         os.path.abspath(
             os.path.split(__file__)[0]),
         "temp_database.db3")
     if os.path.exists(dbf):
         os.remove(dbf)
     import_flatfile_into_database(dbf, file, fLOG=fLOG)
     self.assertExists(dbf)
     db = Database(dbf, LOG=fLOG)
     db.connect()
     view = db.execute_view("SELECT * FROM ACAPA")
     self.assertGreater(len(view), 0)
     self.assertEqual(len(view[0]), 7)
    def test_db_index(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [{"name": "A", "age": 10, "score": 34.5},
                  {"name": "B", "age": 20, "score": -34.5}, ]
        df = pandas.DataFrame(values)
        db = Database.fill_sql_table(df, dbf, "newtable")
        has = db.has_index('ind')
        self.assertFalse(has)
        resi = db.get_index_on_table('newtable', full=True)
        self.assertEqual(resi, [])
        resi = db.get_index_on_table('newtable', full=False)
        self.assertEqual(resi, [])
        resi = db.get_column_type('newtable', 'name')
        self.assertEqual(resi, str)
        self.assertRaise(lambda: db.get_column_type(
            'newtable', 'name2'), DBException)
        resi = db.get_table_nfirst_lines('newtable', 2)
        self.assertEqual(resi, [(10, 'A', 34.5, 1), (20, 'B', -34.5, 2)])
        cur = db.execute_script('SELECT name FROM newtable')
        self.assertEqual(cur, None)
        cur = db.execute_script('SELECT name FROM newtable', close=False)
        self.assertFalse(cur is None)

        db.create_table("nextable2", columns={-1: ("key", int, "PRIMARYKEY", "AUTOINCREMENT"),
                                              0: ("name", str), 1: ("number", float),
                                              2: ('tint', int),
                                              3: ('tint64', numpy.int64),
                                              4: ('tfloat64', numpy.float64),
                                              5: ('tdt', datetime.datetime),
                                              6: ('tdec', decimal.Decimal)})
        cur.close()
        db.close()
Exemple #18
0
    def test_makedirs(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        temp = get_temp_folder(__file__, "temp_missing_makedirs")
        dbf = os.path.join(temp, 'tr', 'td.db3')

        values = [
            {
                "name": "A",
                "age": 10,
                "score": 34.5
            },
            {
                "name": "B",
                "age": 20,
                "score": -34.5
            },
        ]
        df = pandas.DataFrame(values)
        db = Database.fill_sql_table(df, dbf, "newtable")
        db.execute_view("SELECT * FROM newtable")
        db.close()
Exemple #19
0
def rss_download_post_to_database(database="database_rss.db3", table_blog="blogs", table_post="posts", fLOG=fLOG):
    """
    download all post from a list of blogs stored in a database by function @see fn rss_from_xml_to_database

    @param      database        database file name (SQLite format)
    @param      table_blog      table name of the blogs
    @param      table_post      table name of the post
    @param      fLOG            logging function
    @return                     number of posts downloaded
    """
    db = Database(database, LOG=fLOG)
    db.connect()
    rss_list = list(db.enumerate_objects(table_blog, StreamRSS))
    list_post = list(StreamRSS.enumerate_post_from_rsslist(rss_list, fLOG=fLOG))
    BlogPost.fill_table(db, table_post, list_post, skip_exception=True)
    db.close()

    return len(list_post)
Exemple #20
0
    def test_rss_parse(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "xdbrss.xml")
        assert os.path.exists(file)

        rss = StreamRSS(titleb="XD",
                        type="rss",
                        xmlUrl="http://www.xavierdupre.fr/blog/xdbrss.xml",
                        htmlUrl="http://www.xavierdupre.fr/blog/xd_blog_nojs.html",
                        keywordsb=["python"],
                        id=5)

        res = rss.enumerate_post(fLOG=fLOG)
        nb = 0
        for _ in res:
            nb += 1
            assert len(_.title) > 0
        assert nb > 0

        res = rss.enumerate_post(file, fLOG=fLOG)
        nb = 0
        lres = list(res)

        nb = 0
        for _ in lres:
            nb += 1
            assert len(_.title) > 0
        assert nb > 0
        fLOG("nb post=", nb)

        dbfile = os.path.join(path, "temp_rssp.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        BlogPost.fill_table(db, "posts", lres)
        db.close()

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        assert db.has_table("posts")
        assert db.get_table_nb_lines("posts") == nb

        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6
        key, value = val.popitem()
        assert key is not None

        # we insert the blog a second time
        BlogPost.fill_table(db, "posts", lres)
        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6

        # we insert the blog a third time
        BlogPost.fill_table(db, "posts", lres)
        sql = "SELECT * FROM posts"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == 6

        db.close()
Exemple #21
0
    def test_rss_from_google(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "subscriptions.xml")
        assert os.path.exists(file)
        res = list(StreamRSS.enumerate_stream_from_google_list(file))
        if len(res) != TestRSS.nb_rss_blog:
            dic = {}
            for i, r in enumerate(sorted(res)):
                dic[str(r)] = dic.get(str(r), 0) + 1
                fLOG(i, r)
            for k in dic:
                if dic[k] > 1:
                    fLOG("--double", k)
            raise Exception(
                "number of expected feed %d != %d" %
                (len(res), TestRSS.nb_rss_blog))
        li = sorted(res)
        for i, r in enumerate(li):
            if i > 0 and li[i] < li[i - 1]:
                raise Exception("bad order {0} < {1}".format(li[i - 1], li[i]))
        fLOG("nb:", len(res))

        dbfile = os.path.join(path, "temp_rss.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        StreamRSS.fill_table(db, "blogs", res)
        db.close()

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        assert db.has_table("blogs")
        assert db.get_table_nb_lines("blogs") == TestRSS.nb_rss_blog
        sql = "SELECT * FROM blogs"
        cur = db.execute(sql)
        val = {}
        for row in cur:
            val[row[-1]] = 0
        assert len(val) == TestRSS.nb_rss_blog
        key, value = val.popitem()
        assert key is not None

        # iterator on StreamRSS
        obj = list(db.enumerate_objects("blogs", StreamRSS))
        assert len(obj) == TestRSS.nb_rss_blog

        db.close()
    def test_import_index(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        file = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data", "ACA.PA.txt")
        dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                           "temp_database_index.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        fLOG("import flat file")
        ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG)
        fLOG("import flat file pandas")
        ntbl3 = import_flatfile_into_database_pandas(dbf,
                                                     file,
                                                     fLOG=fLOG,
                                                     table="pandas_table",
                                                     chunksize=500,
                                                     sep="\t")
        fLOG("-----------------")
        db = Database(dbf, LOG=fLOG)
        db.connect()
        tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection)
        tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection)
        db.close()
        self.assertEqual(tbl1.shape, tbl2.shape)
        self.assertEqual(list(tbl1.columns), list(tbl2.columns))
        self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes))

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()

        db.create_index("index1", "ACAPA", "Date")
        li = db.get_index_list()
        self.assertEqual(
            li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)',
                  ('Date', ))])
        line = db.get_table_nfirst_lines("ACAPA")
        col = [_[0] for _ in db.get_table_columns("ACAPA")]
        line = line[0]
        add = {k: v for k, v in zip(col, line)}  # pylint: disable=R1721
        db.insert("ACAPA", add)
        db.commit()
        db.update("ACAPA", "Date", add["Date"], add)
        db.commit()

        db.close()
Exemple #23
0
    def test_import_flatflitand_copy(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        file = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "data", "ACA.PA.txt")
        dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                           "temp_database_copy.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        dbf2 = os.path.join(os.path.abspath(os.path.split(__file__)[0]),
                            "out_copy.db3")
        if os.path.exists(dbf2):
            os.remove(dbf2)

        import_flatfile_into_database(dbf, file, fLOG=fLOG)
        assert os.path.exists(dbf)

        db = Database(dbf, LOG=fLOG)
        dbm = Database(dbf2, LOG=fLOG)
        db.copy_to(dbm)

        db.connect()
        dbm.connect()
        tbls = dbm.get_table_list()
        if len(tbls) != 1:
            raise Exception("expects one table not %d" % len(tbls))
        view = db.execute_view("SELECT * FROM ACAPA")
        viewm = dbm.execute_view("SELECT * FROM ACAPA")
        db.close()
        dbm.close()
        assert len(view) == len(viewm)

        dbm2 = Database(":memory:", LOG=fLOG)
        db.copy_to(dbm2)
        dbm2.connect()
        tbls = dbm2.get_table_list()
        if len(tbls) != 1:
            raise Exception("expects one table not %d" % len(tbls))
        viewm2 = dbm2.execute_view("SELECT * FROM ACAPA")
        dbm2.close()
        assert len(view) == len(viewm2)
import sys
import os
sys.path.append(r"D:\Dupre\_data\program\python\pyensae\src")

import pyensae
from time import strftime, strptime
import datetime
from pyensae.sql.database_main import Database

tbl = "stations.txt"
if not os.path.exists(tbl):

    sql = """SELECT DISTINCT address, contract_name,lat,lng,name,number FROM snap
             ORDER BY name"""
    db = Database(__file__.replace(".py", ".db3"))
    db.connect()
    db.export_view_into_flat_file(sql, tbl, header=True, encoding="utf8")
    db.close()

tbl = "tb8_velib.txt"
if True or not os.path.exists(tbl):

    dt = datetime.datetime.strptime(
        "2013-09-13 11:26:37.738913", "%Y-%m-%d %H:%M:%S.%f")
    print(dt, type(dt), dt.hour)

    sql = """SELECT collect_date,
                --strftime('%Y-%m-%d %H:%M:%S', last_update) AS
                last_update,
                available_bike_stands, available_bikes, number
                , hour(last_update) AS heure
    def test_import_index(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        file = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data",
            "ACA.PA.txt")
        dbf = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "temp_database_index.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        fLOG("import flat file")
        ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG)
        fLOG("import flat file pandas")
        ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table",
                                                     chunksize=500, sep="\t")
        fLOG("-----------------")
        db = Database(dbf, LOG=fLOG)
        db.connect()
        tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection)
        tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection)
        db.close()
        self.assertEqual(tbl1.shape, tbl2.shape)
        self.assertEqual(list(tbl1.columns), list(tbl2.columns))
        self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes))

        assert os.path.exists(dbf)
        db = Database(dbf, LOG=fLOG)
        db.connect()

        db.create_index("index1", "ACAPA", "Date")
        li = db.get_index_list()
        self.assertEqual(
            li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date',))])
        line = db.get_table_nfirst_lines("ACAPA")
        col = [_[0] for _ in db.get_table_columns("ACAPA")]
        line = line[0]
        add = {k: v for k, v in zip(col, line)}
        db.insert("ACAPA", add)
        db.commit()
        db.update("ACAPA", "Date", add["Date"], add)
        db.commit()

        db.close()
        thread.shutdown()
        assert not thread.is_alive()
        assert os.path.exists(dbfile)


if __name__ == "__main__":

    enabled = False
    if enabled:
        import webbrowser
        port = 8098
        fold = os.path.abspath(os.path.split(__file__)[0])
        dbfile = os.path.join(fold, "out_custom_server.db3")

        db = Database(dbfile)
        df = pandas.DataFrame([{"name": "xavier", "module": "pyrsslocal"}])
        db.connect()
        db.import_dataframe(df, "example")
        db.close()

        url = "http://localhost:%d/p_aserver.html" % port
        fLOG("opening ", url)
        webbrowser.open(url)
        CustomDBServer.run_server(
            None,
            dbfile,
            port=port,
            extra_path=os.path.join(
                fold,
                "data"))
Exemple #27
0
    def test_import_flatflit(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        fold = os.path.abspath(os.path.split(__file__)[0])
        temp = os.path.join(fold, "temp_db_bug")
        if not os.path.exists(temp):
            os.mkdir(temp)

        text = [
            "one",
            "two",
            "three",
            "four",
            "five",
            "six",
            "seven",
            "eight",
            "nine",
            "ten"]
        data = [{"name": text[random.randint(0, 9)], "number": random.randint(0, 99)}
                for i in range(0, 10000)]

        filename = os.path.join(temp, "out_flatfile_tab_pos2.txt")

        datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:]
        df = pandas.DataFrame(datatab)
        df.to_csv(
            filename,
            sep="\t",
            encoding="utf8",
            header=True,
            index=False)
        with open(filename, "r", encoding="utf8") as f:
            content = f.read()
        content = content.replace('"', '')
        with open(filename + ".2.txt", "w", encoding="utf8") as f:
            f.write(content)

        dbfile = os.path.join(fold, "out_db.db3")
        if os.path.exists(dbfile):
            os.remove(dbfile)

        import_flatfile_into_database(
            dbfile,
            filename +
            ".2.txt",
            table="example",
            fLOG=fLOG)

        db = Database(dbfile, LOG=fLOG)
        db.connect()
        count = db.get_table_nb_lines("example")
        sch = db.get_table_columns("example")
        values = db.execute_view("SELECT * FROM example")
        db.close()

        if count != 10001:
            rows = [str(v) for v in values][:10]
            mes = "\n".join(rows)
            fLOG(datatab[:3])
            raise Exception(
                "expect:10001 not {0}\nROWS:\n{1}".format(
                    count,
                    mes))

        exp = [('name', str), ('number', int)]
        if sch != exp:
            raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
Exemple #28
0
class CustomDBServer (ThreadingMixIn, HTTPServer):

    """
    defines a custom server which includes an access to a database,
    this database will contain de table to store the clicks

    .. exref::
        :title: create a custom local server

        The following code creates an instance of a local server.
        The server expects to find its content in the same folder.

        ::

            from pyensae import Database

            db = Database(dbfile)
            df = pandas.DataFrame ( [ {"name":"xavier", "module":"pyrsslocal"} ] )
            db.connect()
            db.import_dataframe(df, "example")
            db.close()

            url = "http://localhost:%d/p_aserver.html" % port
            webbrowser.open(url)
            CustomDBServer.run_server(None, dbfile, port = port, extra_path = os.path.join("."))

        The main page is the following one and it can contains a Python script
        which will be interpreter by the server.
        It gives access to a variable ``db`` which is a local database
        in SQLlite.

        ::

            <?xml version="1.0" encoding="utf-8"?>
            <html>
            <head>
            <link type="text/css" href="/p_aserver.css" rel="stylesheet"/>
            <title>Custom DB Server</title>
            <meta content="dupre, pyrsslocal, custom server" name="keywords"/>
            <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
            <link rel="shortcut icon" href="p_aserver.ico" />
            <meta content="CustomServer from pyrsslocal" name="description" />
            <script type="text/javascript" src="/p_aserver.js"></script>
            <script src="/js/run_prettify.js" type="text/javascript"></script>

            </head>

            <body onload="setPositions(['divtable', ])" class="mymainbody">

            <div class="divtop">
            <h1>Custom DB Server unittest</h1>
            </div>

            <div class="divtable" id="divfiles" onscroll="savePosition('divtable')">

            <h2>Content of table example</h2>

            <script type="text/python">
            print("<table>")
            db.connect()
            for row in db.execute_view("SELECT * FROM example") :
                srow = [ str(_) for _ in row ]
                print( "<tr><td>{0}</td></tr>".format("</td><td>".join(srow) ) )
            db.close()
            print("</table>")
            </script>

            <p>end.</p>

            </div>
            </body>
            </html>
    """

    @staticmethod
    def schema_table(table):
        """
        returns the schema for a specific table

        @param      table name (in ["stats", "event"])
        @return     dictionary
        """
        if table == "stats":
            return {0: ("id_post", int),
                    1: ("dtime", datetime.datetime),
                    2: ("status", str),
                    3: ("rate", int),
                    4: ("comment", str),
                    }
        elif table == "event":
            return {-1: ("id_event", int, "PRIMARYKEY", "AUTOINCREMENT"),
                    0: ("dtime", datetime.datetime),
                    1: ("uuid", str),
                    2: ("type1", str),
                    3: ("type2", str),
                    4: ("args", str),
                    }
        else:
            raise Exception("unexpected table name")

    def __init__(self,
                 server_address,
                 dbfile,
                 RequestHandlerClass=CustomDBServerHandler,
                 main_page="index.html",
                 root=None,
                 logfile=None
                 ):
        """
        constructor

        @param  server_address          addess of the server
        @param  RequestHandlerClass     it should be @see cl CustomServerHandler
        @param  dbfile                  database filename (SQLlite format)
        @param  main_page               main page for the service (when requested with no specific file)
        @param  root                    folder or list of folders where the server will look into for files such as the main page
        """
        HTTPServer.__init__(self, server_address, RequestHandlerClass)
        self._my_database = Database(dbfile, LOG=fLOG)
        self._my_database_ev = Database(dbfile, LOG=fLOG)

        this = os.path.abspath(os.path.split(__file__)[0])
        if root is None:
            root = [this]
        elif isinstance(root, str):
            root = [root, this]
        elif isinstance(root, list):
            root = root + [this]
        else:
            raise TypeError("unable to interpret root: " + str(root))

        self._my_root = root
        self._my_main_page = main_page
        self._my_address = server_address
        fLOG("CustomServer.init: root=", root)
        fLOG("CustomServer.init: db=", dbfile)

        self.table_event = "cs_events"
        self.table_stats = "cs_stats"

        self.logfile = logfile
        if self.logfile is not None:
            if self.logfile == "stdout":
                self.flog = sys.stdout
            elif isinstance(self.logfile, str):
                self.flog = open(self.logfile, "a", encoding="utf8")
            else:
                self.flog = self.logfile
        else:
            self.flog = None

        self._my_database_ev.connect()
        if not self._my_database_ev.has_table(self.table_stats):
            schema = CustomDBServer.schema_table("stats")
            self._my_database_ev.create_table(self.table_stats, schema)
            self._my_database_ev.commit()
            self._my_database_ev.create_index(
                "id_post_" +
                self.table_stats,
                self.table_stats,
                "id_post",
                False)
            self._my_database_ev.commit()

        if not self._my_database_ev.has_table(self.table_event):
            schema = CustomDBServer.schema_table("event")
            self._my_database_ev.create_table(self.table_event, schema)
            self._my_database_ev.commit()
        self._my_database_ev.close()

    def __enter__(self):
        """
        What to do when creating the class.
        """
        return self

    def __exit__(self, exc_type, exc_value, traceback):  # pylint: disable=W0221
        """
        What to do when removing the instance (close the log file).
        """
        if self.flog is not None and self.logfile != "stdout":
            self.flog.close()

    def process_event(self, event):
        """
        Processes an event, it expects a format like the following:

        ::

            type1/uuid/type2/args

        @param      event   string to log
        """
        now = datetime.datetime.now()
        if self.flog is not None:
            self.flog.write(str(now) + " " + event)
            self.flog.write("\n")
            self.flog.flush()

        info = event.split("/")

        status = None
        if len(info) >= 4 and info[2] == "status":
            status = {"status": info[4],
                      "id_post": int(info[3]),
                      "dtime": now,
                      "rate": -1,
                      "comment": ""}

        if len(info) > 4:
            info[3:] = ["/".join(info[3:])]
        if len(info) < 4:
            raise OSError("unable to log event: " + event)

        values = {"type1": info[0],
                  "uuid": info[1],
                  "type2": info[2],
                  "dtime": now,
                  "args": info[3]}

        # to avoid database to collide
        iscon = self._my_database_ev.is_connected()
        if iscon:
            if self.flog is not None:
                self.flog.write("unable to connect the database")
                if status is not None:
                    self.flog.write("unable to update status: " + str(status))
            return

        self._my_database_ev.connect()
        self._my_database_ev.insert(self.table_event, values)
        if status is not None:
            self._my_database_ev.insert(self.table_stats, status)
        self._my_database_ev.commit()
        self._my_database_ev.close()

    @staticmethod
    def run_server(server, dbfile, thread=False, port=8080, logfile=None,
                   extra_path=None):
        """
        start the server

        @param      server      if None, it becomes ``CustomServer(dbfile, ('localhost', 8080), CustomServerHandler)``
        @param      dbfile      file to the RSS database (SQLite)
        @param      thread      if True, the server is run in a thread
                                and the function returns right away,
                                otherwite, it runs the server.
        @param      port        port to use
        @param      logfile     file for the log or "stdout" for the standard output
        @param      extra_path  additional path the server should look into to find a page
        @return                 server if thread is False, the thread otherwise (the thread is started)

        @warning If you kill the python program while the thread is still running, python interpreter might be closed completely.

        """
        if server is None:
            server = CustomDBServer(
                ('localhost',
                 port),
                dbfile,
                CustomDBServerHandler,
                logfile=logfile,
                root=extra_path)
        if thread:
            th = ThreadServer(server)
            th.start()
            return th
        else:
            server.serve_forever()
            return server
Exemple #29
0
    def __init__(self,
                 server_address,
                 dbfile,
                 RequestHandlerClass=CustomDBServerHandler,
                 main_page="index.html",
                 root=None,
                 logfile=None
                 ):
        """
        constructor

        @param  server_address          addess of the server
        @param  RequestHandlerClass     it should be @see cl CustomServerHandler
        @param  dbfile                  database filename (SQLlite format)
        @param  main_page               main page for the service (when requested with no specific file)
        @param  root                    folder or list of folders where the server will look into for files such as the main page
        """
        HTTPServer.__init__(self, server_address, RequestHandlerClass)
        self._my_database = Database(dbfile, LOG=fLOG)
        self._my_database_ev = Database(dbfile, LOG=fLOG)

        this = os.path.abspath(os.path.split(__file__)[0])
        if root is None:
            root = [this]
        elif isinstance(root, str):
            root = [root, this]
        elif isinstance(root, list):
            root = root + [this]
        else:
            raise TypeError("unable to interpret root: " + str(root))

        self._my_root = root
        self._my_main_page = main_page
        self._my_address = server_address
        fLOG("CustomServer.init: root=", root)
        fLOG("CustomServer.init: db=", dbfile)

        self.table_event = "cs_events"
        self.table_stats = "cs_stats"

        self.logfile = logfile
        if self.logfile is not None:
            if self.logfile == "stdout":
                self.flog = sys.stdout
            elif isinstance(self.logfile, str):
                self.flog = open(self.logfile, "a", encoding="utf8")
            else:
                self.flog = self.logfile
        else:
            self.flog = None

        self._my_database_ev.connect()
        if not self._my_database_ev.has_table(self.table_stats):
            schema = CustomDBServer.schema_table("stats")
            self._my_database_ev.create_table(self.table_stats, schema)
            self._my_database_ev.commit()
            self._my_database_ev.create_index(
                "id_post_" +
                self.table_stats,
                self.table_stats,
                "id_post",
                False)
            self._my_database_ev.commit()

        if not self._my_database_ev.has_table(self.table_event):
            schema = CustomDBServer.schema_table("event")
            self._my_database_ev.create_table(self.table_event, schema)
            self._my_database_ev.commit()
        self._my_database_ev.close()
Exemple #30
0
import sys
import os

sys.path.append(r"D:\Dupre\_data\program\python\pyensae\src")

import pyensae
from time import strftime, strptime
import datetime
from pyensae.sql.database_main import Database

tbl = "stations.txt"
if not os.path.exists(tbl):

    sql = """SELECT DISTINCT address, contract_name,lat,lng,name,number FROM snap
             ORDER BY name"""
    db = Database(__file__.replace(".py", ".db3"))
    db.connect()
    db.export_view_into_flat_file(sql, tbl, header=True, encoding="utf8")
    db.close()

tbl = "tb8_velib.txt"
if True or not os.path.exists(tbl):

    dt = datetime.datetime.strptime("2013-09-13 11:26:37.738913",
                                    "%Y-%m-%d %H:%M:%S.%f")
    print(dt, type(dt), dt.hour)

    sql = """SELECT collect_date,
                --strftime('%Y-%m-%d %H:%M:%S', last_update) AS
                last_update,
                available_bike_stands, available_bikes, number
    def test_import_flatflitand_copy(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        file = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "data",
            "ACA.PA.txt")
        dbf = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "temp_database_copy.db3")
        if os.path.exists(dbf):
            os.remove(dbf)

        dbf2 = os.path.join(
            os.path.abspath(
                os.path.split(__file__)[0]),
            "out_copy.db3")
        if os.path.exists(dbf2):
            os.remove(dbf2)

        import_flatfile_into_database(dbf, file, fLOG=fLOG)
        assert os.path.exists(dbf)

        db = Database(dbf, LOG=fLOG)
        dbm = Database(dbf2, LOG=fLOG)
        db.copy_to(dbm)

        db.connect()
        dbm.connect()
        tbls = dbm.get_table_list()
        if len(tbls) != 1:
            raise Exception("expects one table not %d" % len(tbls))
        view = db.execute_view("SELECT * FROM ACAPA")
        viewm = dbm.execute_view("SELECT * FROM ACAPA")
        db.close()
        dbm.close()
        assert len(view) == len(viewm)

        dbm2 = Database(":memory:", LOG=fLOG)
        db.copy_to(dbm2)
        dbm2.connect()
        tbls = dbm2.get_table_list()
        if len(tbls) != 1:
            raise Exception("expects one table not %d" % len(tbls))
        viewm2 = dbm2.execute_view("SELECT * FROM ACAPA")
        dbm2.close()
        assert len(view) == len(viewm2)