Beispiel #1
0
def _load_data(val_row,
               societies=None,
               sources=None,
               variables=None,
               descriptions=None):
    ext_id = val_row.get('soc_id')
    if ext_id not in societies:
        logging.warn(
            "Attempting to load values for %s but no Society object exists, skipping"
            % ext_id)
        return

    society = societies[ext_id]
    variable_id = val_row['VarID']

    variable = variables.get(
        var_number_to_label(val_row['Dataset'], variable_id))
    if variable is None:
        logging.warn("Could not find variable %s for society %s" %
                     (variable_id, society.name))
        return

    v = dict(variable=variable,
             society=society,
             source=get_source(val_row['Dataset']),
             coded_value=val_row['Code'],
             code=descriptions.get((variable.id, val_row['Code'].strip())),
             focal_year=val_row['Year'],
             comment=val_row['Comment'],
             subcase=val_row['SubCase'])

    if variable.data_type == 'Continuous' and val_row[
            'Code'] and val_row['Code'] != 'NA':
        v['coded_value_float'] = float(val_row['Code'])

    refs = set()
    for r in val_row['EthnoReferences'].split(";"):
        r = r.strip()
        author, year = None, None
        m = BINFORD_REF_PATTERN.match(r)
        if m:
            author, year = m.group('author').strip(), m.group('year')
            if author.endswith(','):
                author = author[:-1].strip()
        else:
            ref_short = r.split(",")
            if len(ref_short) == 2:
                author = ref_short[0].strip()
                year = ref_short[1].strip().split(':')[0]
        if author and year:
            ref = sources.get((author, year))
            if ref:
                refs.add(ref.id)
            else:  # pragma: no cover
                logging.warn(
                    "Could not find reference %s, %s in database, skipping reference"
                    % (author, year))
    return v, refs
Beispiel #2
0
def load_codes(items):
    count = 0
    for row in items:
        label = var_number_to_label(row['Dataset'], row['VarID'])
        variable = CulturalVariable.objects.filter(label=label).first()
        if variable:
            code_description, created = CulturalCodeDescription.objects.get_or_create(
                variable=variable, code=row['Code'])
            code_description.description = row['CodeDescription']
            code_description.short_description = row['ShortName']
            code_description.save()
            logging.info(("Created CulturalCodeDescription: %s" %
                          code_description).decode('utf8'))
            count += 1
        else:
            logging.warn("Missing variable in database: %s" % label)
    return count
Beispiel #3
0
def load_codes(items):
    count = 0
    for row in items:
        label = var_number_to_label(row['Dataset'], row['VarID'])
        variable = CulturalVariable.objects.filter(label=label).first()
        if variable:
            code_description, created = CulturalCodeDescription.objects.get_or_create(
                variable=variable, code=row['Code'])
            code_description.description = row['CodeDescription']
            code_description.short_description = row['ShortName']
            code_description.save()
            logging.info(
                ("Created CulturalCodeDescription: %s" % code_description).decode('utf8'))
            count += 1
        else:
            logging.warn("Missing variable in database: %s" % label)
    return count
Beispiel #4
0
def load_var(var_dict, categories):
    label = var_number_to_label(var_dict['Dataset'], var_dict['VarID'])
    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=get_source(var_dict['Dataset']))
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']
    assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal']
    variable.units = "" if 'Units' not in var_dict else var_dict['Units']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True
Beispiel #5
0
def load_var(var_dict, categories):
    label = var_number_to_label(var_dict['Dataset'], var_dict['VarID'])
    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=get_source(var_dict['Dataset']))
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']
    assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal']
    variable.units = "" if 'Units' not in var_dict else var_dict['Units']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(
                name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True