def get_value(self, table, geo_dicts): if not isinstance(geo_dicts, list): geo_dicts = [geo_dicts] acs_info = ACS2010Meta() col = acs_info.csv_column_for_matrix(table) raw_values = Row.objects.filter( fileid='ACSSF', filetype__in=['2010e5', '2010m5'], cifsn=acs_info._file_name_for_matrix(table), stusab__in=map(lambda g: g['STUSAB'].lower(), geo_dicts), logrecno__in=map(lambda g: g['LOGRECNO'], geo_dicts) ).values_list('logrecno', 'filetype', "col%s" % str(col - 5)) values = {} for logrecno, filetype, val in raw_values: if not logrecno in values: values[logrecno] = Value(0) if filetype == '2010e5': values[logrecno]._value = self._type_value(val) else: values[logrecno]._moe = self._type_value(val) return values.values()
def test_census2010(self): """ Test that the data files are read properly, and that operations and formula are handled correctly. NOTE: It is advisable to download a locally cached copy of RI's files (the state these tests are written for) before running, so there are no network side-effects. """ # RI total population (2010) cmd = load_census.Command() cmd.handle('SF1ST', 'ri') geo = { 'FILEID': 'SF1ST', 'SUMLEV': '040', 'STUSAB': 'RI', 'CHARITER': '000', 'CIFSN': '01', 'LOGRECNO': '0000001' } c2010sf1 = Census2010('sf1') self.failUnlessEqual(c2010sf1.data('P00010001', geo), Value(1052567)) self.failUnlessEqual(c2010sf1.data('P00060001', geo), Value(1091043)) self.failUnlessEqual(c2010sf1.data('P00010001-P00060001', geo), Value(1052567 - 1091043)) self.failUnlessEqual( str(c2010sf1.data('P00010001/P00060001', geo).value), str(Decimal(1052567) / Decimal(1091043)))
def get_value(self, table, geo_dicts): if not isinstance(geo_dicts, list): geo_dicts = [geo_dicts] acs_info = ACS2010Meta() col = acs_info.csv_column_for_matrix(table) raw_values = Row.objects.filter( fileid='ACSSF', filetype__in=['2010e5', '2010m5'], cifsn=acs_info._file_name_for_matrix(table), stusab__in=map(lambda g: g['STUSAB'].lower(), geo_dicts), logrecno__in=map(lambda g: g['LOGRECNO'], geo_dicts)).values_list('logrecno', 'filetype', "col%s" % str(col - 5)) values = {} for logrecno, filetype, val in raw_values: if not logrecno in values: values[logrecno] = Value(0) if filetype == '2010e5': values[logrecno]._value = self._type_value(val) else: values[logrecno]._moe = self._type_value(val) return values.values()
def check_for_existing_value(self, formula, geo_record): return None #TODO: Bypassing this for now """ Check to see if this formula exists any where else and return the value for given geo_record""" logger.debug("----Checking for existing formula " + formula + "------") # first try indicator_parts ds = DataSource.objects.get(implementation="data_adapters.%s" % self.__class__.__name__) matching_ind_parts = IndicatorPart.objects.filter(data_source=ds, formula=formula) if matching_ind_parts: # try to get the value from here. logger.debug("Found indicator parts") for ind_part in matching_ind_parts: value = ind_part.indicator.get_indicator_value( geo_record, ind_part.time) if value is not None: return Value(value=value.number, moe=value.moe) return None else: matching_denom_parts = DenominatorPart.objects.filter( data_source=ds, formula=formula) if matching_denom_parts: logger.debug("Found denom part") # get this value from the denominator for denom_part in matching_denom_parts: value = denom_part.denominator.get_value( geo_record, denom_part.part.time) if value is not None: return Value(value=value.number, moe=value.moe) return None return None
def test_value_derived_porportions(self): from census.data import Value v1 = Value(4634, 989) v2 = Value(31713, 601) v = v1 / v2 self.assertEqual(float(v.value), float(0.146123041024)) self.assertEqual(float(v.moe), float(0.03118594897991360010090499164))
def test_moe_times_value(self): # test that if a number with an moe is multiplied by a normal non-estimate # number, the moe is simply multiplied but that number a = Value(10, moe=5) b = Value(2) self.failUnlessEqual(Value(20, moe=10), a * b) self.failUnlessEqual(Value(20, moe=10), b * a)
def test_operations(self): from census.parse import FormulaParser parser = FormulaParser(MockDatasource()) self.failUnlessEqual(Value(4), parser.parse('2 + 2')(None)) self.failUnlessEqual(Value(0), parser.parse('2 - 2')(None)) self.failUnlessEqual(Value(6), parser.parse('2 * 3')(None)) self.failUnlessEqual(Value(2), parser.parse('4 / 2')(None))
def test_ratio_moe(self): """ Test that MOE is calculated properly for ratios/proportions """ v1 = Value(4634, moe=989) v2 = Value(6440, moe=1328) result = v1 / v2 # http://www.census.gov/acs/www/Downloads/handbooks/ACSResearch.pdf # page A-16 self.failUnlessEqual(round(result.value, 2), 0.72) self.failUnlessEqual(round(result.moe, 4), 0.2135)
def data(self, formula, geo_record): reader = csv.DictReader(self.data_file, delimiter=",") for row in reader: if row['level'].lower() == geo_record.level.name.lower( ) and row['geo_id'] == geo_record.geo_id: return Value(self.parse_value(row['value']), moe=self.parse_value(row['moe'])) return None
def data(self, formula, geo_record, **kwargs): # return a precalculated value if found if formula.strip() == "": return None precalc = check_for_precalculated_value(self, formula, geo_record) if precalc: return precalc else: # Get Data # From here we have to make requests to the census api based on # what Year we want cached_value = self.check_for_existing_value(formula, geo_record) if cached_value is not None: logger.debug("Using cached value") return cached_value self.parser.parse_string(formula) parts_dict = OrderedDict() pcount = 0 # build a dict of values for part in self.parser.expr_stack: if part not in self.op_map: # This is a table num if 'acs' not in self.dataset: parts_dict[pcount] = Value( self.get_api_data(part, geo_record)) else: # this is an ACS value estimate = self.get_api_data(part + "E", geo_record) moe = self.get_api_data(part + "M", geo_record) parts_dict[pcount] = Value(estimate, moe) else: # this is on op parts_dict[pcount] = part pcount += 1 # now we need to actually need to run the formula val = self.evaluate(parts_dict.values()) return val
def check_for_precalculated_value(data_adapter, formula, geo_record): """Check to see if there is a precalculated value """ try: precal_val = PrecalculatedValue.objects.get( data_source=DataSource.objects.get( implementation="data_adapters.%s" % data_adapter.__class__.__name__), table=formula, geo_record=geo_record) return Value(precal_val.value) except PrecalculatedValue.DoesNotExist: return None
def get_value(self, table, geo_dicts): census_info = Census2010Meta(self.file_type) if not isinstance(geo_dicts, list): geo_dicts = [geo_dicts] col = census_info.csv_column_for_matrix(table) raw_vals = Row.objects.filter( fileid=self.file_id, cifsn=census_info._file_name_for_matrix(table), stusab__in=map(lambda g: g['STUSAB'].upper(), geo_dicts), logrecno__in=map(lambda g: g['LOGRECNO'], geo_dicts)).values_list("col%s" % str(col - 4), flat=True) return map(lambda v: Value(self._type_value(v)), raw_vals)
def test_value_agg(self): from census.data import Value v1 = Value(10, 2) v2 = Value(15, 5) v3 = Value(20, 4) v4 = Value(5, 2) v = v1 + v2 + v3 + v4 self.assertEqual(v.value, 50) self.assertEqual(v.moe, 7) v1 = Value(52354, 3303) v2 = Value(19464, 2011) v3 = Value(17190, 1854) v = v1 + v2 + v3 self.assertEqual(v.value, 89008) self.assertEqual(float(v.moe), 4288.50160312)
def test_census2000_data(self): """ Test that the data files are read properly NOTE: It is advisable to download a locally cached copy of RI's files (the state these tests are written for) before running, so there are no network side-effects. """ return # RI total population (2000) cmd = load_census.Command() cmd.handle('uSF1', 'ri') geo = { 'FILEID': 'uSF1', 'SUMLEV': '040', 'STUSAB': 'RI', 'CHARITER': '000', 'CIFSN': '01', 'LOGRECNO': '0000001' } c2k = Census2000('SF1') self.failUnlessEqual(c2k.data('P0001001', geo), Value(1048319))
def test_operator_precedence(self): from census.parse import FormulaParser parser = FormulaParser(MockDatasource()) self.failUnlessEqual(Value(7), parser.parse('1 + 2 * 3')(None)) self.failUnlessEqual(Value(9), parser.parse('(1 + 2) * 3')(None))
def _number_parse_action(self, result): number = Value(result[0]) return Table(IdentityDatasource(), number)