def validate_summary(self): """Ensure the summary is correct.""" data_path = url_path_join(self.warehouse_path, 'course_enrollment_summary', 'dt=2014-08-07') raw_output = self.read_dfs_directory(data_path) output = StringIO(raw_output.replace('\t\\N', '\t')) columns = EnrollmentSummaryRecord.get_fields().keys() data = pandas.read_table(output, header=None, names=columns, parse_dates=True) expected_output_csv = os.path.join(self.data_dir, 'output', 'acceptance_expected_d_user_course.csv') expected = pandas.read_csv(expected_output_csv, parse_dates=True) for frame in (data, expected): frame.sort('first_enrollment_time', inplace=True, ascending=True) frame.reset_index(drop=True, inplace=True) self.assert_data_frames_equal(data, expected)
def validate_summary(self): """Ensure the summary is correct.""" data_path = url_path_join(self.warehouse_path, 'course_enrollment_summary', 'dt=2014-08-06') raw_output = self.read_dfs_directory(data_path) output = StringIO(raw_output.replace('\t\\N', '\t')) columns = EnrollmentSummaryRecord.get_fields().keys() data = pandas.read_table(output, header=None, names=columns, parse_dates=True) expected_output_csv = os.path.join( self.data_dir, 'output', 'acceptance_expected_d_user_course.csv') expected = pandas.read_csv(expected_output_csv, parse_dates=True) for frame in (data, expected): frame.sort('first_enrollment_time', inplace=True, ascending=True) frame.reset_index(drop=True, inplace=True) self.assert_data_frames_equal(data, expected)
def columns(self): return EnrollmentSummaryRecord.get_sql_schema()