Exemple #1
0
    def test_read_all_elections(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        temp = os.path.join(path, "temp_read_all_elections")
        if not os.path.exists(temp):
            os.mkdir(temp)
        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return
        for year in [2012, 2002, 2007]:
            for level in ["Départements", "Cantons", ]:
                fLOG("******reading ", year, level)
                file = os.path.join(path, "data", "election_%d.xlsx" % year)
                if not os.path.exists(file):
                    file = os.path.join(temp, "election_%d.xlsx" % year)
                if not os.path.exists(file):
                    zip = os.path.join(path, "data", "elections.zip")
                    unzip(zip, temp)
                    file = os.path.join(temp, "election_%d.xlsx" % year)
                assert os.path.exists(file)

                el = ElectionResults(year=year, file=file, level=level)
                fLOG("saving")
                xls = os.path.join(temp, "out_%d_%s_0.xlsx" % (year, level))
                el.T0.to_excel(xls)
                xls = os.path.join(temp, "out_%d_%s_1.xlsx" % (year, level))
                el.T1.to_excel(xls)
                res = el.vote_transfer()
                fLOG(res)
    def test_read_all_elections(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        temp = os.path.join(path, "temp_read_all_elections")
        if not os.path.exists(temp):
            os.mkdir(temp)
        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return
        for year in [2012, 2002, 2007]:
            for level in [
                    "Départements",
                    "Cantons",
            ]:
                fLOG("******reading ", year, level)
                file = os.path.join(path, "data", "election_%d.xlsx" % year)
                if not os.path.exists(file):
                    file = os.path.join(temp, "election_%d.xlsx" % year)
                if not os.path.exists(file):
                    zip_ = os.path.join(path, "data", "elections.zip")
                    unzip(zip_, temp)
                    file = os.path.join(temp, "election_%d.xlsx" % year)
                assert os.path.exists(file)

                el = ElectionResults(year=year, file=file, level=level)
                fLOG("saving")
                xls = os.path.join(temp, "out_%d_%s_0.xlsx" % (year, level))
                el.T0.to_excel(xls)
                xls = os.path.join(temp, "out_%d_%s_1.xlsx" % (year, level))
                el.T1.to_excel(xls)
                res = el.vote_transfer()
                fLOG(res)
    def test_join_multiple2(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_join_multiple2")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        where = {"bucket": ("==", "bu###1")}
        n1 = db.JoinTreeNode("profile_QSSH", where=where,
                             parent_key="query", key="query")
        n2 = db.JoinTreeNode("url_QSSH", where=where,
                             parent_key=('url', 'pos'), key=('url', 'pos'))
        n1.append(n2)

        sql, fields = db.inner_joins(n1, execute=False, create_index=False)

        view = db.execute_view(sql)
        assert view == [('facebbooklogin', 1, 0, 'bu###1', 86, 0,
                         'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs',
                         'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs',
                         1, 0, 1, 1, 0, 0, 0, 0)]
        assert "WHERE" in sql

        db.close()
    def test_unicode(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_unicode")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        file = os.path.join(os.path.split(__file__)[0], "data", "unicode.txt")
        assert os.path.exists(file)

        def filter_case(s):
            return s.replace(" ", "")

        db.import_table_from_flat_file(
            file, "uni", columns=None, header=True, filter_case=filter_case)

        sql = "select * from uni limit 2"
        view = db.execute_view(sql)
        exp = [[(b'.\u904a\u6232\u6a5f\u5730'.decode("utf8"),
                 b'.\u904a\u6232\u57fa\u5730'.decode("utf8"),
                 0.66666666666700003, 4, 6, 0.0, 0.0, 0, 0.0, 1, 0.0, 0,
                 0.25, 1, 999999999, 999999999, 0, 1, 3, 1,
                 0.66666666666700003),
                (b'0401\u5f71\u97f3live\u79c0'.decode("utf8"),
                 b'0401\u5f71\u97f3\u8996\u8a0a'.decode("utf8"),
                 0.41666666666699997, 5, 12, 0.0, 0.45454545454500001,
                 5, 0.27272727272699998, 1, 0.5, 4, 0.5, 2, 999999999,
                 999999999, 0, 1, 2, 0, 1.0)],
               [(b'.\xe9\x81\x8a\xe6\x88\xb2\xe6\xa9\x9f\xe5\x9c\xb0'.decode("utf8"),
                 b'.\xe9\x81\x8a\xe6\x88\xb2\xe5\x9f\xba\xe5\x9c\xb0'.decode(
                   "utf8"),
                   0.666666666666667, 4, 6, 0.0, 0.0, 0, 0.0, 1, 0.0, 0, 0.25, 1, 999999999, 999999999, 0, 1, 3, 1, 0.666666666666667),
                (b'0401\xe5\xbd\xb1\xe9\x9f\xb3live\xe7\xa7\x80'.decode("utf8"),
                   b'0401\xe5\xbd\xb1\xe9\x9f\xb3\xe8\xa6\x96\xe8\xa8\x8a'.decode(
                    "utf8"),
                   0.416666666666667, 5, 12, 0.0, 0.454545454545455, 5, 0.272727272727273,
                   1, 0.5, 4, 0.5, 2, 999999999, 999999999, 0, 1, 2, 0, 1.0)],
               ]
        if view not in exp:
            print("2", str(view).encode("utf8"))
            i = 0
            for a, b in zip(exp[1], view):
                if a != b:
                    print("i", i)
                    if isinstance(a, tuple):
                        for c, d in zip(a, b):
                            print("1", c)
                            print("2", d)
                    else:
                        print("1", a)
                        print("2", b)
                i += 1
            raise Exception("problem")

        db.close()
Exemple #5
0
    def test_histogram2(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        filename = os.path.join(
            os.path.split(__file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_histogram2")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = db.histogram("url_QRW2",
                           values={
                               "cat1": [(1, 1), (1, 0)],
                               "cat2": [(1, 10), (2, 10), (2, 1)]
                           },
                           col_sums=["sum_nb_click"],
                           columns=("pos", "co"))
        view = db.execute_view(sql)
        assert view == [('cat1', 1115, 15), ('cat2', 3792, 411),
                        ('none', 33309, 108894)]

        db.close()
    def test_unicode(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_unicode")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        file = os.path.join(os.path.split(__file__)[0], "data", "unicode.txt")
        assert os.path.exists(file)

        def filter_case(s):
            return s.replace(" ", "")

        db.import_table_from_flat_file(
            file, "uni", columns=None, header=True, filter_case=filter_case)

        sql = "select * from uni limit 2"
        view = db.execute_view(sql)
        exp = [[(b'.\u904a\u6232\u6a5f\u5730'.decode("utf8"),
                 b'.\u904a\u6232\u57fa\u5730'.decode("utf8"),
                 0.66666666666700003, 4, 6, 0.0, 0.0, 0, 0.0, 1, 0.0, 0,
                 0.25, 1, 999999999, 999999999, 0, 1, 3, 1,
                 0.66666666666700003),
                (b'0401\u5f71\u97f3live\u79c0'.decode("utf8"),
                 b'0401\u5f71\u97f3\u8996\u8a0a'.decode("utf8"),
                 0.41666666666699997, 5, 12, 0.0, 0.45454545454500001,
                 5, 0.27272727272699998, 1, 0.5, 4, 0.5, 2, 999999999,
                 999999999, 0, 1, 2, 0, 1.0)],
               [(b'.\xe9\x81\x8a\xe6\x88\xb2\xe6\xa9\x9f\xe5\x9c\xb0'.decode("utf8"),
                 b'.\xe9\x81\x8a\xe6\x88\xb2\xe5\x9f\xba\xe5\x9c\xb0'.decode(
                   "utf8"),
                   0.666666666666667, 4, 6, 0.0, 0.0, 0, 0.0, 1, 0.0, 0, 0.25, 1, 999999999, 999999999, 0, 1, 3, 1, 0.666666666666667),
                (b'0401\xe5\xbd\xb1\xe9\x9f\xb3live\xe7\xa7\x80'.decode("utf8"),
                   b'0401\xe5\xbd\xb1\xe9\x9f\xb3\xe8\xa6\x96\xe8\xa8\x8a'.decode(
                    "utf8"),
                   0.416666666666667, 5, 12, 0.0, 0.454545454545455, 5, 0.272727272727273,
                   1, 0.5, 4, 0.5, 2, 999999999, 999999999, 0, 1, 2, 0, 1.0)],
               ]
        if view not in exp:
            print("2", str(view).encode("utf8"))
            i = 0
            for a, b in zip(exp[1], view):
                if a != b:
                    print("i", i)
                    if isinstance(a, tuple):
                        for c, d in zip(a, b):
                            print("1", c)
                            print("2", d)
                    else:
                        print("1", a)
                        print("2", b)
                i += 1
            raise Exception("problem")

        db.close()
    def test_join_multiple3(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_join_multiple3")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        where = {"bucket": ("==", "bu###1")}
        root = db.JoinTreeNode("query_QSSH", where=where)
        n1 = db.JoinTreeNode("profile_QSSH", where=where,
                             parent_key="query", key="query")
        n2 = db.JoinTreeNode("url_QSSH", where=where,
                             parent_key=('url', 'pos'), key=('url', 'pos'))
        root.append(n1)
        n1.append(n2)

        sql, fields = db.inner_joins(root,
                                     execute=False,
                                     create_index=False)

        view = db.execute_view(sql)
        assert view == [('facebbooklogin', 'bu###1', 86, 157, 520, 0, 63, 0, 503, 0, 619, 1, 3906365, 'facebbooklogin', 1, 0, 'bu###1', 86, 0,
                         'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs',
                         'digg.com/security/Hackers_Put_Social_Networks_In_Crosshairs', 1, 0, 1, 1, 0, 0, 0, 0)]
        assert fields == [('query', 'query_QSSH', 'query'), ('bucket', 'query_QSSH', 'bucket'),
                          ('nbq', 'query_QSSH', 'nbq'), ('sum_num',
                                                         'query_QSSH', 'sum_num'),
                          ('sum_view_url', 'query_QSSH', 'sum_view_url'),
                          ('sum_click_url', 'query_QSSH',
                           'sum_click_url'), ('sum_rewrite', 'query_QSSH', 'sum_rewrite'),
                          ('sum_click_ads', 'query_QSSH', 'sum_click_ads'), (
                              'sum_max_pos_view', 'query_QSSH', 'sum_max_pos_view'),
                          ('sum_max_pos_click', 'query_QSSH', 'sum_max_pos_click'), (
                              'sum_duration', 'query_QSSH', 'sum_duration'),
                          ('sum_unknown', 'query_QSSH',
                           'sum_unknown'), ('sum_daysec', 'query_QSSH', 'sum_daysec'),
                          ('aquery', 'profile_QSSH', 'query'),
                          ('apos', 'profile_QSSH', 'pos'), ('atype',
                                                            'profile_QSSH', 'type'),
                          ('abucket', 'profile_QSSH',
                           'bucket'), ('amax_nb', 'profile_QSSH', 'max_nb'),
                          ('asum_difftime', 'profile_QSSH',
                           'sum_difftime'), ('aurl', 'profile_QSSH', 'url'),
                          ('aaurl', 'url_QSSH', 'url'), ('aapos',
                                                         'url_QSSH', 'pos'), ('aaco', 'url_QSSH', 'co'),
                          ('aanb_view', 'url_QSSH', 'nb_view'), ('aasum_nb_view',
                                                                 'url_QSSH', 'sum_nb_view'),
                          ('aasum_difftime_view', 'url_QSSH',
                           'sum_difftime_view'), ('aanb_click', 'url_QSSH', 'nb_click'),
                          ('aasum_nb_click', 'url_QSSH', 'sum_nb_click'), ('aasum_difftime_click', 'url_QSSH', 'sum_difftime_click')]
        assert "WHERE" in sql
        db.close()
Exemple #8
0
    def test_loading_elections_2007(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])

        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return

        file = os.path.join(path, "data", "election_2007.xls")
        if not os.path.exists(file):
            temp = get_temp_folder(__file__, "temp_loading_elections_2007")
            zip = os.path.join(path, "data", "elections.zip")
            unzip(zip, temp)
            file = os.path.join(temp, "election_2007.xlsx")
        assert os.path.exists(file)

        el = ElectionResults(year=2007, file=file)
        res = el.vote_transfer()
        fLOG(res)
        assert len(res) > 0
Exemple #9
0
    def test_loading_elections_2007(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])

        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return

        file = os.path.join(path, "data", "election_2007.xls")
        if not os.path.exists(file):
            temp = get_temp_folder(__file__, "temp_loading_elections_2007")
            zip_ = os.path.join(path, "data", "elections.zip")
            unzip(zip_, temp)
            file = os.path.join(temp, "election_2007.xlsx")
        assert os.path.exists(file)

        el = ElectionResults(year=2007, file=file)
        res = el.vote_transfer()
        fLOG(res)
        assert len(res) > 0
Exemple #10
0
    def test_join_bis(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_join_bis")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM profile_QSSH"
        exe = db.execute_view(sql)
        assert exe[0][0] == 16

        sql, fields = db.inner_join("profile_QSSH", "url_QSSH",
                                    "url", None,
                                    execute=False,
                                    create_index=False,
                                    unique=False)
        sql = sql.strip(" \n\r\t")
        tep = TestDatabaseJoin._memo_SQL1.strip(" \n\r\t")
        if sql.replace(" ", "") != tep.replace(" ", ""):
            print(sql)
            raise Exception("sql queries should be identifical")

        assert fields == [('query', 'query'), ('profile_QSSH.pos', 'profile_QSSH_pos'), ('type', 'type'), ('bucket', 'bucket'), ('max_nb', 'max_nb'), ('sum_difftime', 'sum_difftime'), ('profile_QSSH.url', 'url'), ('url_QSSH.pos', 'url_QSSH_pos'),
                          ('co', 'co'), ('nb_view', 'nb_view'), ('sum_nb_view', 'sum_nb_view'), ('sum_difftime_view', 'sum_difftime_view'), ('nb_click', 'nb_click'), ('sum_nb_click', 'sum_nb_click'), ('sum_difftime_click', 'sum_difftime_click')]
        view = db.execute_view(sql)
        assert len(view) == 2

        sql, fields = db.inner_join("profile_QSSH", "url_QSSH",
                                    ("url", "pos"), None,
                                    execute=False,
                                    create_index=False,
                                    where="bucket == 'bu###1'")
        sql = sql.strip(" \n\r\t")
        tep = TestDatabaseJoin._memo_SQL2.strip(" \n\r\t")
        if sql.replace(" ", "") != tep.replace(" ", ""):
            for a, b in zip(sql.split("\n"), tep.split("\n")):
                print("res", a)
                print("exp", b)
                print(a == b)
        assert sql.replace(" ", "") == tep.replace(" ", "")
        assert fields == [('query', 'query'), ('profile_QSSH.pos', 'pos'), ('type', 'type'), ('bucket', 'bucket'), ('max_nb', 'max_nb'), ('sum_difftime', 'sum_difftime'), ('profile_QSSH.url', 'url'), ('co', 'co'), (
            'nb_view', 'nb_view'), ('sum_nb_view', 'sum_nb_view'), ('sum_difftime_view', 'sum_difftime_view'), ('nb_click', 'nb_click'), ('sum_nb_click', 'sum_nb_click'), ('sum_difftime_click', 'sum_difftime_click')]
        view = db.execute_view(sql)
        assert len(view) == 1

        db.close()
Exemple #11
0
    def test_summary(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_summary")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        res = db.summary()
        assert len(res) > 0
        db.close()
Exemple #12
0
    def test_summary(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_summary")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        res = db.summary()
        assert len(res) > 0
        db.close()
Exemple #13
0
    def test_histogram(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        filename = os.path.join(
            os.path.split(__file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_histogram")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns=("pos", "url"))
        view = db.execute_view(sql)
        assert len(view) == 38216

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="url")
        view = db.execute_view(sql)
        assert len(view) == 28436

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="pos",
                           values=[1, 2, 3, 4, 5])
        view = db.execute_view(sql)
        assert view == [(1, 2370, 87049), (2, 5734, 11522), (3, 4009, 5383),
                        (4, 4304, 1778), (5, 21799, 3588)]

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="pos",
                           values={
                               "pos123": [1, 2, 3],
                               "others": [4, 5, 6, 7, 8, 9, 10]
                           })
        view = db.execute_view(sql)
        assert view == [('none', 21, 0), ('others', 26082, 5366),
                        ('pos123', 12113, 103954)]

        db.close()
    def test_convert_music_file(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        temp = get_temp_folder(__file__, "temp_convert_music_file")
        data = os.path.join(temp, "..", "data")
        ffmpeg = os.path.join(data, "ffmpeg.zip")
        assert os.path.exists(ffmpeg)
        ff = unzip(ffmpeg, temp)
        mp3 = os.path.join(data, "podcasts_example.mp3")
        wav = os.path.join(temp, "podcasts_example.wav")

        if is_travis_or_appveyor() == "travis":
            warnings.warn("pydub is not available")
            return

        convert_music_file(mp3, wav, ffmpeg=ff)
        assert os.path.exists(wav)
        assert os.stat(wav).st_size > 90000
Exemple #15
0
    def test_histogram(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_histogram")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns=("pos", "url"))
        view = db.execute_view(sql)
        assert len(view) == 38216

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="url")
        view = db.execute_view(sql)
        assert len(view) == 28436

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="pos",
                           values=[1, 2, 3, 4, 5])
        view = db.execute_view(sql)
        assert view == [(1, 2370, 87049), (2, 5734, 11522),
                        (3, 4009, 5383), (4, 4304, 1778), (5, 21799, 3588)]

        sql = db.histogram("url_QRW2",
                           col_sums=["sum_nb_click"],
                           columns="pos",
                           values={"pos123": [1, 2, 3], "others": [4, 5, 6, 7, 8, 9, 10]})
        view = db.execute_view(sql)
        assert view == [('none', 21, 0), ('others', 26082,
                                          5366), ('pos123', 12113, 103954)]

        db.close()
Exemple #16
0
    def test_convert_music_file(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        temp = get_temp_folder(__file__, "temp_convert_music_file")
        data = os.path.join(temp, "..", "data")
        ffmpeg = os.path.join(data, "ffmpeg.zip")
        assert os.path.exists(ffmpeg)
        ff = unzip(ffmpeg, temp)
        mp3 = os.path.join(data, "podcasts_example.mp3")
        wav = os.path.join(temp, "podcasts_example.wav")

        if is_travis_or_appveyor() == "travis":
            warnings.warn("pydub is not available")
            return

        convert_music_file(mp3, wav, ffmpeg=ff)
        assert os.path.exists(wav)
        assert os.stat(wav).st_size > 90000
Exemple #17
0
    def test_histogram2(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_histogram2")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = db.histogram("url_QRW2",
                           values={"cat1": [(1, 1), (1, 0)], "cat2": [
                               (1, 10), (2, 10), (2, 1)]},
                           col_sums=["sum_nb_click"],
                           columns=("pos", "co"))
        view = db.execute_view(sql)
        assert view == [('cat1', 1115, 15), ('cat2', 3792,
                                             411), ('none', 33309, 108894)]

        db.close()
Exemple #18
0
    def test_loading_elections_2012(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "election_2012.xls")
        if not os.path.exists(file):
            temp = get_temp_folder(__file__, "temp_loading_elections_2012")
            zip = os.path.join(path, "data", "elections.zip")
            unzip(zip, temp)
            file = os.path.join(temp, "election_2012.xlsx")
        assert os.path.exists(file)

        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return

        el = ElectionResults(year=2012, file=file)
        fLOG(el.get_candidates_votes(0))
        el.correct("cand")
        fLOG(el.get_candidates_votes(0))
        fLOG(sum(el.get_people(0)), sum(el.get_people(1)))
        if False:
            fLOG("----")
            tbl = el.T0.head().copy()
            tbl["total"] = tbl.apply(lambda row: sum(
                [row[_] for _ in el.T0.columns if _ not in el.LevelCol]), axis=1)
            fLOG(tbl.to_html(float_format=lambda x: "%d" %
                             x, force_unicode=True, index=False))
            fLOG("----")
            tbl = el.T1.head().copy()
            tbl["total"] = tbl.apply(lambda row: sum(
                [row[_] for _ in el.T1.columns if _ not in el.LevelCol]), axis=1)
            fLOG(tbl.to_html(float_format=lambda x: "%d" %
                             x, force_unicode=True, index=False))
            fLOG("----")
        res = el.vote_transfer()
        fLOG(res)

        def pour(x):
            if x < 0.01:
                return ""
            else:
                return "%3.0f" % (x * 100) + "%"

        def pour2(x):
            if x < 0.01:
                return "0%"
            else:
                return "%3.0f" % (x * 100) + "%"

        def agg(x):
            if x[0] == x[1]:
                if x[0] == "0%":
                    return ""
                else:
                    return x[0]
            else:
                return "%s-%s" % tuple(x)

        boot = el.bootstrap(iter=10, fLOG=fLOG)
        comb = el.combine_into_string([boot[2], boot[3]], pour2, agg)
        fLOG(comb)
        assert len(comb) > 0

        fLOG(res.to_html(float_format=pour, index=True))
        fLOG(comb.to_html(index=True))
    def test_cross_1(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_cross_1")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = """CROSS pos PLACE nb_click,url FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = list(cur)
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(view[0], ['1;nb_click', '1;url', '2;nb_click', '2;url', '3;nb_click', '3;url',
                                   '4;nb_click', '4;url', '5;nb_click', '5;url', '6;nb_click', '6;url', '7;nb_click',
                                   '7;url', '8;nb_click', '8;url', '9;nb_click',
                                   '9;url', '10;nb_click', '10;url', '11;nb_click', '11;url', '12;nb_click',
                                   '12;url', '13;nb_click', '13;url', '14;nb_click',
                                   '14;url', '15;nb_click', '15;url', '16;nb_click', '16;url', '18;nb_click', '18;url'])
        self.assertEqual(len(view[0]), len(view[1]))

        exp = {
            1: [268.0, 'www.facebook.com/login.php', 97.0, 'www.friendster.com/login.php',
                29.0, 'https://login.facebook.com/login.php', 15.0,
                'https://login.yahoo.com/', 10.0, 'https://login.facebook.com/login.php', 10.0,
                'lite.facebook.com/login/?next=http%3A%2F%2Flite.facebook.com%2Ftheamloong%2Fvideo%2Fof%2FTNT-sanshou',
                13.0, 'lite.facebook.com/login/?next=http%3A%2F%2Flite.facebook.com%2Ftheamloong%2Fvideo%2Fof%2FTNT-sanshou',
                5.0, 'https://login.verizonwireless.com/amserver/UI/Login', 6.0, 'https://login.facebook.com/login.php', 2.0,
                'https://login.comcast.net/', 0.0, 'HottieMatchUp.com/matchcomlogin', 0.0,
                'FreshPCFix.com/loginscreen', 0.0, 'FreshPCFix.com/loginscreen', 0.0,
                'login.marketingblacksmith.com', 0.0, 'sites.managerslogin.com', 0.0,
                'sites.managerslogin.com', 0.0, 'sites.managerslogin.com'],
            -1: [3.0, 'https://sso.uboc.com/obc/forms/login.fcc?user_type=R', 3.0,
                 'https://www.atitesting.com/login.aspx', 1.0, 'askabouttech.com/how-to-bypass-login-password-on-windows-vista/',
                 1.0, 'homegrownfreaks.net/forums/login.html',
                 1.0, 'https://trade.htsc.com.cn/webtrade/login/loginAction1.do?method=preLogin2&opType=TD',
                 1.0, 'moodle.dist113.org/login/in/in.php?p=addicting+games+the+impossible+quiz',
                 1.0, 'www.edweek.org/login.html?source=http://www.edweek.org/ew/articles/2009/08/25/02sat.h29.html&destina',
                 1.0, 'www.gifs.net/image/Holidays/Birthday/Cake/9451?n=login.php3',
                 1.0, 'www.grazeit.com/pages/blackplanet-com-login-1814/', 1.0, 'www.myspace-login.us/',
                 None, None, None, None, None, None, None, None, None, None, None, None, None, None],
        }

        for k, v in exp.items():
            if view[k] != v:
                if len(view[k]) != len(v):
                    raise Exception(
                        "exp[%d] and view [%d] have different lengths" % (k, k))
                for a, b in zip(v, view[k]):
                    if a != b:
                        raise Exception(
                            "k=%d, different values\nexp %s\n != %s" % (k, str(a), str(b)))

        self.assertEqual(len(view[0]), len(view[-1]))

        sql = """CROSS pos PLACE nb_click FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = list(cur)
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(len(view[0]), len(view[1]))

        sql = """CROSS pos,pos PLACE nb_click FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = list(cur)
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(len(view[0]), len(view[1]))

        db.close()
Exemple #20
0
    def test_join_bis(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        filename = os.path.join(
            os.path.split(__file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_join_bis")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM profile_QSSH"
        exe = db.execute_view(sql)
        assert exe[0][0] == 16

        sql, fields = db.inner_join("profile_QSSH",
                                    "url_QSSH",
                                    "url",
                                    None,
                                    execute=False,
                                    create_index=False,
                                    unique=False)
        sql = sql.strip(" \n\r\t")
        tep = TestDatabaseJoin._memo_SQL1.strip(" \n\r\t")
        if sql.replace(" ", "") != tep.replace(" ", ""):
            print(sql)
            raise Exception("sql queries should be identifical")

        assert fields == [('query', 'query'),
                          ('profile_QSSH.pos', 'profile_QSSH_pos'),
                          ('type', 'type'), ('bucket', 'bucket'),
                          ('max_nb', 'max_nb'),
                          ('sum_difftime', 'sum_difftime'),
                          ('profile_QSSH.url', 'url'),
                          ('url_QSSH.pos', 'url_QSSH_pos'), ('co', 'co'),
                          ('nb_view', 'nb_view'),
                          ('sum_nb_view', 'sum_nb_view'),
                          ('sum_difftime_view', 'sum_difftime_view'),
                          ('nb_click', 'nb_click'),
                          ('sum_nb_click', 'sum_nb_click'),
                          ('sum_difftime_click', 'sum_difftime_click')]
        view = db.execute_view(sql)
        assert len(view) == 2

        sql, fields = db.inner_join("profile_QSSH",
                                    "url_QSSH", ("url", "pos"),
                                    None,
                                    execute=False,
                                    create_index=False,
                                    where="bucket == 'bu###1'")
        sql = sql.strip(" \n\r\t")
        tep = TestDatabaseJoin._memo_SQL2.strip(" \n\r\t")
        if sql.replace(" ", "") != tep.replace(" ", ""):
            for a, b in zip(sql.split("\n"), tep.split("\n")):
                print("res", a)
                print("exp", b)
                print(a == b)
        assert sql.replace(" ", "") == tep.replace(" ", "")
        assert fields == [('query', 'query'), ('profile_QSSH.pos', 'pos'),
                          ('type', 'type'), ('bucket', 'bucket'),
                          ('max_nb', 'max_nb'),
                          ('sum_difftime', 'sum_difftime'),
                          ('profile_QSSH.url', 'url'), ('co', 'co'),
                          ('nb_view', 'nb_view'),
                          ('sum_nb_view', 'sum_nb_view'),
                          ('sum_difftime_view', 'sum_difftime_view'),
                          ('nb_click', 'nb_click'),
                          ('sum_nb_click', 'sum_nb_click'),
                          ('sum_difftime_click', 'sum_difftime_click')]
        view = db.execute_view(sql)
        assert len(view) == 1

        db.close()
    def test_attach_database(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_attach_database")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        file2 = os.path.join(os.path.split(__file__)[
                             0], "data", "database_linked_cor.zip")
        temp = get_temp_folder(__file__, "temp_attach_database")
        file2 = unzip(file2, temp)
        assert os.path.exists(file2)

        # method 1
        attach = {"seco": file2}

        db = Database(filename, attach=attach, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM seco.word_QSSH"
        vie = db.execute_view(sql)
        self.assertEqual(len(vie), 1)
        db.close()

        # method 2
        all = filename + " ; seco , " + file2

        db = Database(all, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM seco.word_QSSH"
        vi2 = db.execute_view(sql)
        self.assertEqual(len(vi2), 1)
        self.assertEqual(vi2, vie)

        att = db.get_attached_database_list()
        self.assertEqual(att, ['seco'])
        ts = db.get_table_list(True)
        self.assertEqual(ts, ['seco.query',
                              'seco.idx_query_query', 'seco.qtok', 'seco.idx_qtok_qtok',
                              'seco.pairs', 'seco.pairs_query___q1', 'seco.bucket', 'seco.idx_bucket_bucket',
                              'seco.url', 'seco.idx_url_url', 'seco.profile', 'seco.profile_query___',
                              'seco.profile_QRW2', 'seco.profile_QRW2_query___', 'seco.profile_QSSH',
                              'seco.profile_QSSH_query___', 'seco.query_QRW2', 'seco.query_QRW2_query___',
                              'seco.query_QSSH', 'seco.query_QSSH_query___', 'seco.url_QRW2', 'seco.url_QRW2_url___',
                              'seco.url_QSSH', 'seco.url_QSSH_url___', 'seco.word', 'seco.idx_word_word',
                              'seco.word_QRW2', 'seco.word_QRW2_word___', 'seco.word_QSSH', 'seco.word_QSSH_word___'])

        assert "db.attach_database" in db.get_python_code()[1]
        assert db.has_table("seco.word_QSSH_word___")
        files = db.get_file(True)

        file = db.get_file()
        assert file in files
        for alias, file in db.get_attached_database_list(True):
            assert alias in files
            assert file in files

        db2 = Database(files, LOG=fLOG)
        db2.connect()
        db2.close()

        db.close()
Exemple #22
0
    def test_cross_1(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")
        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_cross_1")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        db = Database(filename, LOG=fLOG)
        db.connect()

        sql = """CROSS pos PLACE nb_click,url FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = [line for line in cur]
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(view[0], ['1;nb_click', '1;url', '2;nb_click', '2;url', '3;nb_click', '3;url', '4;nb_click', '4;url', '5;nb_click', '5;url', '6;nb_click', '6;url', '7;nb_click', '7;url', '8;nb_click', '8;url', '9;nb_click',
                                   '9;url', '10;nb_click', '10;url', '11;nb_click', '11;url', '12;nb_click', '12;url', '13;nb_click', '13;url', '14;nb_click', '14;url', '15;nb_click', '15;url', '16;nb_click', '16;url', '18;nb_click', '18;url'])
        self.assertEqual(len(view[0]), len(view[1]))

        exp = {
            1: [268.0, 'www.facebook.com/login.php', 97.0, 'www.friendster.com/login.php', 29.0, 'https://login.facebook.com/login.php', 15.0,
                'https://login.yahoo.com/', 10.0, 'https://login.facebook.com/login.php', 10.0, 'lite.facebook.com/login/?next=http%3A%2F%2Flite.facebook.com%2Ftheamloong%2Fvideo%2Fof%2FTNT-sanshou',
                13.0, 'lite.facebook.com/login/?next=http%3A%2F%2Flite.facebook.com%2Ftheamloong%2Fvideo%2Fof%2FTNT-sanshou',
                5.0, 'https://login.verizonwireless.com/amserver/UI/Login', 6.0, 'https://login.facebook.com/login.php', 2.0,
                'https://login.comcast.net/', 0.0, 'HottieMatchUp.com/matchcomlogin', 0.0, 'FreshPCFix.com/loginscreen', 0.0, 'FreshPCFix.com/loginscreen', 0.0,
                'login.marketingblacksmith.com', 0.0, 'sites.managerslogin.com', 0.0, 'sites.managerslogin.com', 0.0, 'sites.managerslogin.com'],
            -1: [3.0, 'https://sso.uboc.com/obc/forms/login.fcc?user_type=R', 3.0, 'https://www.atitesting.com/login.aspx', 1.0, 'askabouttech.com/how-to-bypass-login-password-on-windows-vista/', 1.0, 'homegrownfreaks.net/forums/login.html', 1.0, 'https://trade.htsc.com.cn/webtrade/login/loginAction1.do?method=preLogin2&opType=TD', 1.0, 'moodle.dist113.org/login/in/in.php?p=addicting+games+the+impossible+quiz', 1.0, 'www.edweek.org/login.html?source=http://www.edweek.org/ew/articles/2009/08/25/02sat.h29.html&destina', 1.0, 'www.gifs.net/image/Holidays/Birthday/Cake/9451?n=login.php3', 1.0, 'www.grazeit.com/pages/blackplanet-com-login-1814/', 1.0, 'www.myspace-login.us/',
                 None, None, None, None, None, None, None, None, None, None, None, None, None, None],
        }

        for k, v in exp.items():
            if view[k] != v:
                if len(view[k]) != len(v):
                    raise Exception(
                        "exp[%d] and view [k] have different lengths" % (k, k))
                for a, b in zip(v, view[k]):
                    if a != b:
                        raise Exception(
                            "k=%d, different values\nexp %s\n != %s" % (k, str(a), str(b)))

        self.assertEqual(len(view[0]), len(view[-1]))

        sql = """CROSS pos PLACE nb_click FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = [line for line in cur]
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(len(view[0]), len(view[1]))

        sql = """CROSS pos,pos PLACE nb_click FROM url_QSSH ORDER BY nb_click -- comment
                    DESC LIMIT 100"""
        cur = db.execute(sql)
        mat = [line for line in cur]
        view = db.execute_view(sql, add_column_name=True)
        self.assertEqual(len(view), len(mat) + 1)
        self.assertEqual(len(view[0]), len(view[1]))

        db.close()
Exemple #23
0
    def test_attach_database(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ ==
             "__main__", LogFile="temp_hal_log2.txt")

        filename = os.path.join(os.path.split(
            __file__)[0], "data", "database_linked.zip")
        temp = get_temp_folder(__file__, "temp_attach_database")
        filename = unzip(filename, temp)
        assert os.path.exists(filename)

        file2 = os.path.join(os.path.split(__file__)[
                             0], "data", "database_linked_cor.zip")
        temp = get_temp_folder(__file__, "temp_attach_database")
        file2 = unzip(file2, temp)
        assert os.path.exists(file2)

        # method 1
        attach = {"seco": file2}

        db = Database(filename, attach=attach, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM seco.word_QSSH"
        vie = db.execute_view(sql)
        self.assertEqual(len(vie), 1)
        db.close()

        # method 2
        all = filename + " ; seco , " + file2

        db = Database(all, LOG=fLOG)
        db.connect()

        sql = "SELECT COUNT(*) FROM seco.word_QSSH"
        vi2 = db.execute_view(sql)
        self.assertEqual(len(vi2), 1)
        self.assertEqual(vi2, vie)

        att = db.get_attached_database_list()
        self.assertEqual(att, ['seco'])
        ts = db.get_table_list(True)
        self.assertEqual(ts, ['seco.query',
                              'seco.idx_query_query', 'seco.qtok', 'seco.idx_qtok_qtok',
                              'seco.pairs', 'seco.pairs_query___q1', 'seco.bucket', 'seco.idx_bucket_bucket',
                              'seco.url', 'seco.idx_url_url', 'seco.profile', 'seco.profile_query___',
                              'seco.profile_QRW2', 'seco.profile_QRW2_query___', 'seco.profile_QSSH',
                              'seco.profile_QSSH_query___', 'seco.query_QRW2', 'seco.query_QRW2_query___',
                              'seco.query_QSSH', 'seco.query_QSSH_query___', 'seco.url_QRW2', 'seco.url_QRW2_url___',
                              'seco.url_QSSH', 'seco.url_QSSH_url___', 'seco.word', 'seco.idx_word_word',
                              'seco.word_QRW2', 'seco.word_QRW2_word___', 'seco.word_QSSH', 'seco.word_QSSH_word___'])

        assert "db.attach_database" in db.get_python_code()[1]
        assert db.has_table("seco.word_QSSH_word___")
        files = db.get_file(True)

        file = db.get_file()
        assert file in files
        for alias, file in db.get_attached_database_list(True):
            assert alias in files
            assert file in files

        db2 = Database(files, LOG=fLOG)
        db2.connect()
        db2.close()

        db.close()
    def test_loading_elections_2012(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        path = os.path.abspath(os.path.split(__file__)[0])
        file = os.path.join(path, "data", "election_2012.xls")
        if not os.path.exists(file):
            temp = get_temp_folder(__file__, "temp_loading_elections_2012")
            zip_ = os.path.join(path, "data", "elections.zip")
            unzip(zip_, temp)
            file = os.path.join(temp, "election_2012.xlsx")
        assert os.path.exists(file)

        if is_travis_or_appveyor() == "travis":
            warnings.warn(
                "The test cannot read an excel file using xlrd. Disabling it.")
            return

        el = ElectionResults(year=2012, file=file)
        fLOG(el.get_candidates_votes(0))
        el.correct("cand")
        fLOG(el.get_candidates_votes(0))
        fLOG(sum(el.get_people(0)), sum(el.get_people(1)))

        more_debug = False
        if more_debug:
            fLOG("----")
            tbl = el.T0.head().copy()
            tbl["total"] = tbl.apply(lambda row: sum(
                [row[_] for _ in el.T0.columns if _ not in el.LevelCol]),
                                     axis=1)
            fLOG(
                tbl.to_html(float_format=lambda x: "%d" % x,
                            force_unicode=True,
                            index=False))
            fLOG("----")
            tbl = el.T1.head().copy()
            tbl["total"] = tbl.apply(lambda row: sum(
                [row[_] for _ in el.T1.columns if _ not in el.LevelCol]),
                                     axis=1)
            fLOG(
                tbl.to_html(float_format=lambda x: "%d" % x,
                            force_unicode=True,
                            index=False))
            fLOG("----")
        res = el.vote_transfer()
        fLOG(res)

        def pour(x):
            if x < 0.01:
                return ""
            else:
                return "%3.0f" % (x * 100) + "%"

        def pour2(x):
            if x < 0.01:
                return "0%"
            else:
                return "%3.0f" % (x * 100) + "%"

        def agg(x):
            if x[0] == x[1]:
                if x[0] == "0%":
                    return ""
                else:
                    return x[0]
            else:
                return "%s-%s" % tuple(x)

        boot = el.bootstrap(iter=10, fLOG=fLOG)
        comb = el.combine_into_string([boot[2], boot[3]], pour2, agg)
        fLOG(comb)
        assert len(comb) > 0

        fLOG(res.to_html(float_format=pour, index=True))
        fLOG(comb.to_html(index=True))