Esempio n. 1
0
    def test_spray_file_string(self):
        thor_file = '~thor::test_spray_file_string'
        overwrite = True
        expire = 1
        delete_workunit = True
        chunk_size = 10000
        max_workers = 3
        conn = hpycc.Connection("user", test_conn=False)

        col_1_values = ['1', '3', '5', '6']
        col_2_values = ['aa', 'ab', 'ac', 'ad']
        df = pd.DataFrame({
            "a": col_1_values,
            "b": col_2_values
        }).sort_values('a')

        with TemporaryDirectory() as d:
            p = os.path.join(d, "test.csv")
            df.to_csv(p, index=False)
            spray_file(conn, p, thor_file, overwrite, expire, chunk_size,
                       max_workers, delete_workunit)

        res = get_thor_file(connection=conn, thor_file=thor_file)[['a', 'b']]

        pd.testing.assert_frame_equal(df, res)
Esempio n. 2
0
    def test_spray_file_df(self):
        thor_file = '~thor::test_spray_file_df'
        overwrite = True
        expire = 1
        delete_workunit = True
        chunk_size = 10000
        max_workers = 3
        conn = hpycc.Connection("user", test_conn=False)

        col_1_values = ['1', '3', '5', '6']
        col_2_values = ['aa', 'ab', 'ac', 'ad']
        df = pd.DataFrame({
            "a": col_1_values,
            "b": col_2_values
        }).sort_values('a')

        spray_file(conn, df, thor_file, overwrite, expire, chunk_size,
                   max_workers, delete_workunit)

        res = get_thor_file(connection=conn, thor_file=thor_file)[['a', 'b']]

        pd.testing.assert_frame_equal(df, res)
Esempio n. 3
0
    def test_spray_file_string_smallest_chunks_many_workers(self):
        thor_file = '~test_spray_file_string_smallest_chunks_many_workers'
        overwrite = True
        expire = 1
        delete_workunit = True
        chunk_size = 1
        max_workers = 100
        conn = hpycc.Connection("user", test_conn=False)

        col_1_values = ['1', '3', '5', '7', '9', '11', '13']
        col_2_values = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
        df = pd.DataFrame({
            "a": col_1_values,
            "b": col_2_values
        }).sort_values('a').reset_index(drop=True)

        spray_file(conn, df, thor_file, overwrite, expire, chunk_size,
                   max_workers, delete_workunit)

        res = get_thor_file(connection=conn, thor_file=thor_file)[[
            'a', 'b'
        ]].sort_values('a').reset_index(drop=True)

        pd.testing.assert_frame_equal(df, res)