Beispiel #1
0
def _read_stat_var():
    """Read all the statistical variables"""

    sv_dcid = dc.get_sv_dcids()
    """
    example of triples for one statsitical variable
    ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey')
    ('dc/014es05x0d5l', 'constraintProperties', 'income')
    ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards')
    ('dc/014es05x0d5l', 'age', 'Years15Onwards')
    ('dc/014es05x0d5l', 'statType', 'measuredValue')
    ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence')
    ('dc/014es05x0d5l', 'measuredProperty', 'count')
    ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome')
    ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth')
    ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable')
    ('dc/014es05x0d5l', 'populationType', 'Person')
    ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1')
    ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus')
    ('dc/014es05x0d5l', 'constraintProperties', 'age')
    """
    sv_triples = dc.get_triples(sv_dcid)
    stat_vars = collections.defaultdict(list)
    for dcid, triples in sv_triples.items():
        constraint_properties = []
        sv_dict = collections.defaultdict(str)
        for dcid_, prop, val in triples:
            if dcid_ != dcid:
                # triples include measurementDenomator info of other statsvars
                # eg. we will get "dc/gywfwwmg5gsrg, measurementDenominator, Count_Person"
                # in triples of "Count_Peron"
                continue
            if prop == "constraintProperties":
                constraint_properties.append(val)
            else:
                sv_dict[prop] = val
        prop_val = {}
        for property in constraint_properties:
            if property not in sv_dict:
                raise Exception(
                    'constraint property:{} not found in statistical'
                    'variable with dcid: {}'.format(property, dcid))
            prop_val[property] = sv_dict[property]
        if "measurementDenominator" in sv_dict:
            prop_val["md"] = sv_dict["measurementDenominator"]
        se = {}  # Super enum
        if 'crimeType' in prop_val:
            v = prop_val.get('crimeType', '')
            if v in [
                    'AggravatedAssault', 'ForcibleRape', 'Robbery',
                    'MurderAndNonNegligentManslaughter'
            ]:
                se = {'crimeType': 'ViolentCrime'}
            elif v in ['MotorVehicleTheft', 'LarcenyTheft', 'Burglary']:
                se = {'crimeType': 'PropertyCrime'}
        sv = StatVar(sv_dict["populationType"], sv_dict["measuredProperty"],
                     sv_dict["statType"], prop_val, dcid, se)
        stat_vars[sv.key].append(sv)
    stat_vars = removeDuplicateStatsVar(stat_vars)
    return stat_vars
Beispiel #2
0
def _read_stat_var():
    """Read all the statistical variables"""

    sv_dcid = dc.get_sv_dcids()
    """
    example of triples for one statsitical variable
    ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey')
    ('dc/014es05x0d5l', 'constraintProperties', 'income')
    ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards')
    ('dc/014es05x0d5l', 'age', 'Years15Onwards')
    ('dc/014es05x0d5l', 'statType', 'measuredValue')
    ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence')
    ('dc/014es05x0d5l', 'measuredProperty', 'count')
    ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome')
    ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth')
    ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable')
    ('dc/014es05x0d5l', 'populationType', 'Person')
    ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1')
    ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus')
    ('dc/014es05x0d5l', 'constraintProperties', 'age')
    """
    sv_triples = dc.get_triples(sv_dcid)
    stat_vars = collections.defaultdict(list)
    for dcid, triples in sv_triples.items():
        constraint_properties = []
        sv_dict = collections.defaultdict(str)
        for _, prop, val in triples:
            if prop == "constraintProperties":
                constraint_properties.append(val)
            else:
                sv_dict[prop] = val
        prop_val = {}
        for property in constraint_properties:
            if property not in sv_dict:
                raise Exception(
                    'constraint property:{} not found in statistical'
                    'variable with dcid: {}'.format(property, dcid))
            prop_val[property] = sv_dict[property]
        sv = StatVar(sv_dict["populationType"], sv_dict["measuredProperty"],
                     sv_dict["statType"], prop_val, dcid)
        stat_vars[sv.key].append(sv)
    return stat_vars
Beispiel #3
0
def _read_placeType_mapping():
    sv_dcid = dc.get_sv_dcids()
    place_mapping = {}
    for dcid in sv_dcid:
        place_mapping[dcid] = PLACE_TYPES
    return place_mapping
Beispiel #4
0
def read_stat_var():
    """ Read all the statistical variables """
    sv_dcid = dc.get_sv_dcids()
    """
    example of triples for one statsitical variable
    ('dc/014es05x0d5l', 'measurementMethod', 'CensusACS5yrSurvey')
    ('dc/014es05x0d5l', 'constraintProperties', 'income')
    ('dc/014es05x0d5l', 'income', 'USDollar75000Onwards')
    ('dc/014es05x0d5l', 'age', 'Years15Onwards')
    ('dc/014es05x0d5l', 'statType', 'measuredValue')
    ('dc/014es05x0d5l', 'placeOfBirth', 'BornInStateOfResidence')
    ('dc/014es05x0d5l', 'measuredProperty', 'count')
    ('dc/014es05x0d5l', 'incomeStatus', 'WithIncome')
    ('dc/014es05x0d5l', 'constraintProperties', 'placeOfBirth')
    ('dc/014es05x0d5l', 'typeOf', 'StatisticalVariable')
    ('dc/014es05x0d5l', 'populationType', 'Person')
    ('dc/014es05x0d5l', 'provenance', 'dc/cweckx1')
    ('dc/014es05x0d5l', 'constraintProperties', 'incomeStatus')
    ('dc/014es05x0d5l', 'constraintProperties', 'age')
    """
    # trunk statsVar dcids into smaller size and
    # get the triples
    trunk_size = 10000
    n_trunk = len(sv_dcid) // trunk_size
    sv_triples = {}
    for i in range(n_trunk + 1):
        if i == n_trunk:
            trunk_triples = dc.get_triples_processed(sv_dcid[i * trunk_size:])
            sv_triples.update(trunk_triples)
        else:
            trunk_triples = dc.get_triples_processed(
                sv_dcid[i * trunk_size:(i + 1) * trunk_size])
            sv_triples.update(trunk_triples)
    # group all the statsVars according to the triples
    stat_vars = collections.defaultdict(list)
    for dcid, triples in sv_triples.items():
        constraint_properties = []
        # sv_dict keeps all the triples of the statsVar
        sv_dict = collections.defaultdict(str)
        for dcid_, prop, val in triples:
            if dcid_ != dcid:
                # triples include measurementDenomator info of other statsvars
                # eg. we will get "dc/gywfwwmg5gsrg, measurementDenominator,
                # Count_Person" in triples of "Count_Peron"
                continue
            if prop == "constraintProperties":
                constraint_properties.append(val)
            else:
                sv_dict[prop] = val
        # prop_val keeps all the constraint pv pairs
        prop_val = {}
        for property in constraint_properties:
            if property not in sv_dict:
                raise Exception(
                    'constraint property:{} not found in statistical'
                    'variable with dcid: {}'.format(property, dcid))
            prop_val[property] = sv_dict[property]
        # create super enum, i.e. group statsvars with different p-v pairs:
        # (p,v1); (p,v2) by adding a common value: (p, v),
        # so that v1, v2 would be leaf nodes for value node v;
        se = {}
        if 'crimeType' in prop_val:
            v = prop_val.get('crimeType', '')
            if v in [
                    'AggravatedAssault', 'ForcibleRape', 'Robbery',
                    'MurderAndNonNegligentManslaughter'
            ]:
                se = {'crimeType': 'ViolentCrime'}
            elif v in ['MotorVehicleTheft', 'LarcenyTheft', 'Burglary']:
                se = {'crimeType': 'PropertyCrime'}
        if 'testResult' in prop_val:
            v = prop_val.get('testResult', '')
            if v in ['Negative', 'Positive', 'Ready']:
                se = {'testResult': 'TestResults'}
        if 'medicalStatus' in prop_val:
            v = prop_val.get('medicalStatus', '')
            if v in [
                    'ConfirmedCase', 'ConfirmedOrProbableCase',
                    'PatientDeceased', 'PatientHospitalized', 'PatientInICU',
                    'PatientOnVentilator', 'PatientRecovered'
            ]:
                se = {'medicalStatus': 'PatientStatus'}
        # create the statsVar object
        sv = StatsVar(sv_dict["populationType"], sv_dict["measuredProperty"],
                      sv_dict["statType"], sv_dict["measurementQualifier"],
                      sv_dict["measurementDenominator"], prop_val, dcid, se)
        stat_vars[sv.key].append(sv)
    stat_vars = removeDuplicateStatsVar(stat_vars)
    return stat_vars