def populate_source_data(source_data_item: DataSource): source_data = get_source_matrix_of_type(source_data_item.type_, source_data_item.region, source_data_item.year) # this is horrible and hacky but it's the only way I can think of without increasing the number of db hits # check there's only one classification system per input table, then assign it to the source_data_item check_only_one_classification_system([x[2] for x in source_data]) source_data_item.system = source_data[0][2] data = list() for _, _, system, source_value, target_value, total in source_data: clean_source_values = clean_value(system, source_value) clean_target_values = clean_value(system, target_value) split_total = total / (len(clean_target_values) * len(clean_source_values)) for source in clean_source_values: for target in clean_target_values: data.append((source, target, split_total)) source_data_item.add_data_from_tuple(tuple(data))
def clean_totals(raw_totals: dict) -> dict: clean_total = dict() for key, total in raw_totals.items(): keys = clean_value("SIC4", key) len_keys = len(keys) for k in keys: if total == "c": pass # clean_total[year][key] = "c" else: clean_total[k] = float(total) / len_keys return clean_total
def populate_totals_only_source_data(source_data_item: TotalsOnlyDataSource): data, row_totals, column_totals, system = get_source_matrix_of_type(source_data_item.type_, source_data_item.region, source_data_item.year) # TODO clean the row and column keys and update data to reflect. data_as_dict = {row: {column: data[i][j] for j, column in enumerate(column_totals.keys())} for i, row in enumerate(row_totals.keys())} clean_column_totals = {clean_column: float(total) / len(clean_value(system, column)) for column, total in column_totals.items() for clean_column in clean_value(system, column) } clean_row_totals = {clean_row: float(total) / len(clean_value(system, row)) for row, total in row_totals.items() for clean_row in clean_value(system, row) } clean_data = list() for row in row_totals.keys(): clean_rows = clean_value(system, row) len_rows = len(clean_rows) for column in column_totals.keys(): clean_columns = clean_value(system, column) len_columns = len(clean_columns) for clean_row in clean_rows: for clean_column in clean_columns: clean_data.append((clean_row, clean_column, "c" if data_as_dict[row][column] == "c" else float(data_as_dict[row][column]) / (len_rows * len_columns))) source_data_item.add_data_from_tuple(tuple(clean_data)) source_data_item.system = system constraints = make_constraints(data) source_data_item.set_row_and_column_totals(clean_row_totals, clean_column_totals) source_data_item.set_constraints(constraints)
def test_range(self): test_case = clean_value("SIC4", "14.49-53") self.assertListEqual(test_case, ["14_49", "14_50", "14_51", "14_52", "14_53"])
def test_with_cpa(self): test_case = clean_value("SIC4", "CPA_02") self.assertListEqual(test_case, ["02"])
def test_top_level_char(self): test_case = clean_value("SIC4", "B") self.assertListEqual(test_case, ["B"])
def test_none(self): with self.assertRaises(Exception): clean_value("SIC", None)
def test_bad_range(self): with self.assertRaises(Exception): clean_value("SIC4", "12.5-1")
def test_leading_chars(self): test_case = clean_value("SITC4", "A05-A10") self.assertListEqual(test_case, ["05", "06", "07", "08", "09", "10"])
def test_strip_slash(self): test_case = clean_value("SIC4", "23.12/4") self.assertListEqual(test_case, ["23_12"])
def test_and_(self): test_cast = clean_value("SIC4", "1 & 2") self.assertListEqual(test_cast, ["1", "2"])
def test_not_(self): test_case = clean_value("SIC4", "23.4 not 23.47") self.assertListEqual(test_case, ["23_4"] )
def test_remove_other(self): test_case = clean_value("SIC4", "23OTHER") self.assertListEqual(test_case, ["23"])
def test_smaller_range(self): test_case = clean_value("SIC4", "10.08-11") self.assertListEqual(test_case, ["10_08", "10_09", "10_10", "10_11"])