예제 #1
0
    def testLoadShardedCsvFailsOnNonUniformColumns(self):
        input_csv_dir = tempfile.mkdtemp()
        _write_to_file("""col1,col2,col3\nA,B,C""", input_csv_dir)
        _write_to_file("""XXX,YYY,ZZZ\nD,E,F""", input_csv_dir)

        with self.assertRaisesRegex(ValueError, "YYY"):
            model_performance_analysis.load_sharded_df_csvs(
                input_csv_dir, use_given_header=True)
예제 #2
0
    def testLoadShardedCsvFailsOnConflictingArgumentsGiven(self):
        input_columns = ["letter_1", "letter_2", "letter_3"]
        with self.assertRaisesRegex(ValueError, "Cannot pass both"):
            model_performance_analysis.load_sharded_df_csvs(
                "", column_names=input_columns, use_given_header=True)

        with self.assertRaisesRegex(ValueError, "Cannot pass both"):
            model_performance_analysis.load_sharded_df_csvs(
                "", ignore_first_line=True, use_given_header=True)
예제 #3
0
    def testLoadShardedCsvUseGivenHeader(self):
        input_csv_dir = tempfile.mkdtemp()
        _write_to_file("""col1,col2,col3\nA,B,C""", input_csv_dir)
        _write_to_file("""col1,col2,col3\nD,E,F""", input_csv_dir)

        expected = pd.read_csv(
            io.StringIO("col1,col2,col3\n"
                        "A,B,C\n"
                        "D,E,F"))

        actual = model_performance_analysis.load_sharded_df_csvs(
            input_csv_dir, use_given_header=True)

        test_util.assert_dataframes_equal(self,
                                          actual,
                                          expected,
                                          sort_by_column="col1")
예제 #4
0
    def testLoadShardedCsvTest(self):
        input_csv_dir = tempfile.mkdtemp()
        _write_to_file("""A,B,C""", input_csv_dir)
        _write_to_file("""D,E,F""", input_csv_dir)
        input_columns = ["letter_1", "letter_2", "letter_3"]

        expected = pd.read_csv(
            io.StringIO("letter_1,letter_2,letter_3\n"
                        "A,B,C\n"
                        "D,E,F"))

        actual = model_performance_analysis.load_sharded_df_csvs(
            input_csv_dir, column_names=input_columns)

        test_util.assert_dataframes_equal(self,
                                          actual,
                                          expected,
                                          sort_by_column="letter_1")