def populate_source_data(source_data_item: DataSource):
    source_data = get_source_matrix_of_type(source_data_item.type_, source_data_item.region, source_data_item.year)

    # this is horrible and hacky but it's the only way I can think of without increasing the number of db hits
    # check there's only one classification system per input table, then assign it to the source_data_item
    check_only_one_classification_system([x[2] for x in source_data])
    source_data_item.system = source_data[0][2]
    data = list()
    for _, _, system, source_value, target_value, total in source_data:
        clean_source_values = clean_value(system, source_value)
        clean_target_values = clean_value(system, target_value)

        split_total = total / (len(clean_target_values) * len(clean_source_values))
        for source in clean_source_values:
            for target in clean_target_values:
                data.append((source, target, split_total))
    source_data_item.add_data_from_tuple(tuple(data))
def clean_totals(raw_totals: dict) -> dict:
    clean_total = dict()
    for key, total in raw_totals.items():
        keys = clean_value("SIC4", key)
        len_keys = len(keys)
        for k in keys:
            if total == "c":
                pass
                # clean_total[year][key] = "c"
            else:
                clean_total[k] = float(total) / len_keys

    return clean_total
def populate_totals_only_source_data(source_data_item: TotalsOnlyDataSource):
    data, row_totals, column_totals, system = get_source_matrix_of_type(source_data_item.type_,
                                                                        source_data_item.region,
                                                                        source_data_item.year)

    # TODO clean the row and column keys and update data to reflect.

    data_as_dict = {row: {column: data[i][j]
                          for j, column in enumerate(column_totals.keys())}
                    for i, row in enumerate(row_totals.keys())}

    clean_column_totals = {clean_column: float(total) / len(clean_value(system, column))
                           for column, total in column_totals.items()
                           for clean_column in clean_value(system, column)
                           }

    clean_row_totals = {clean_row: float(total) / len(clean_value(system, row))
                        for row, total in row_totals.items()
                        for clean_row in clean_value(system, row)
                        }

    clean_data = list()
    for row in row_totals.keys():
        clean_rows = clean_value(system, row)
        len_rows = len(clean_rows)

        for column in column_totals.keys():
            clean_columns = clean_value(system, column)
            len_columns = len(clean_columns)

            for clean_row in clean_rows:
                for clean_column in clean_columns:
                    clean_data.append((clean_row,
                                       clean_column,
                                       "c"
                                       if data_as_dict[row][column] == "c"
                                       else float(data_as_dict[row][column]) / (len_rows * len_columns)))
    source_data_item.add_data_from_tuple(tuple(clean_data))
    source_data_item.system = system
    constraints = make_constraints(data)
    source_data_item.set_row_and_column_totals(clean_row_totals, clean_column_totals)
    source_data_item.set_constraints(constraints)
 def test_range(self):
     test_case = clean_value("SIC4", "14.49-53")
     self.assertListEqual(test_case, ["14_49", "14_50", "14_51", "14_52", "14_53"])
 def test_with_cpa(self):
     test_case = clean_value("SIC4", "CPA_02")
     self.assertListEqual(test_case, ["02"])
 def test_top_level_char(self):
     test_case = clean_value("SIC4", "B")
     self.assertListEqual(test_case, ["B"])
 def test_none(self):
     with self.assertRaises(Exception):
         clean_value("SIC", None)
 def test_bad_range(self):
     with self.assertRaises(Exception):
         clean_value("SIC4", "12.5-1")
 def test_leading_chars(self):
     test_case = clean_value("SITC4", "A05-A10")
     self.assertListEqual(test_case, ["05", "06", "07", "08", "09", "10"])
 def test_strip_slash(self):
     test_case = clean_value("SIC4", "23.12/4")
     self.assertListEqual(test_case, ["23_12"])
 def test_and_(self):
     test_cast = clean_value("SIC4", "1 & 2")
     self.assertListEqual(test_cast, ["1", "2"])
 def test_not_(self):
     test_case = clean_value("SIC4", "23.4 not 23.47")
     self.assertListEqual(test_case,
                          ["23_4"]
                          )
 def test_remove_other(self):
     test_case = clean_value("SIC4", "23OTHER")
     self.assertListEqual(test_case, ["23"])
 def test_smaller_range(self):
     test_case = clean_value("SIC4", "10.08-11")
     self.assertListEqual(test_case, ["10_08", "10_09", "10_10", "10_11"])