def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None): ext_id = val_row.get('soc_id') if ext_id not in societies: logging.warn( "Attempting to load values for %s but no Society object exists, skipping" % ext_id) return society = societies[ext_id] variable_id = val_row['VarID'] variable = variables.get( var_number_to_label(val_row['Dataset'], variable_id)) if variable is None: logging.warn("Could not find variable %s for society %s" % (variable_id, society.name)) return v = dict(variable=variable, society=society, source=get_source(val_row['Dataset']), coded_value=val_row['Code'], code=descriptions.get((variable.id, val_row['Code'].strip())), focal_year=val_row['Year'], comment=val_row['Comment'], subcase=val_row['SubCase']) if variable.data_type == 'Continuous' and val_row[ 'Code'] and val_row['Code'] != 'NA': v['coded_value_float'] = float(val_row['Code']) refs = set() for r in val_row['EthnoReferences'].split(";"): r = r.strip() author, year = None, None m = BINFORD_REF_PATTERN.match(r) if m: author, year = m.group('author').strip(), m.group('year') if author.endswith(','): author = author[:-1].strip() else: ref_short = r.split(",") if len(ref_short) == 2: author = ref_short[0].strip() year = ref_short[1].strip().split(':')[0] if author and year: ref = sources.get((author, year)) if ref: refs.add(ref.id) else: # pragma: no cover logging.warn( "Could not find reference %s, %s in database, skipping reference" % (author, year)) return v, refs
def load_codes(items): count = 0 for row in items: label = var_number_to_label(row['Dataset'], row['VarID']) variable = CulturalVariable.objects.filter(label=label).first() if variable: code_description, created = CulturalCodeDescription.objects.get_or_create( variable=variable, code=row['Code']) code_description.description = row['CodeDescription'] code_description.short_description = row['ShortName'] code_description.save() logging.info(("Created CulturalCodeDescription: %s" % code_description).decode('utf8')) count += 1 else: logging.warn("Missing variable in database: %s" % label) return count
def load_codes(items): count = 0 for row in items: label = var_number_to_label(row['Dataset'], row['VarID']) variable = CulturalVariable.objects.filter(label=label).first() if variable: code_description, created = CulturalCodeDescription.objects.get_or_create( variable=variable, code=row['Code']) code_description.description = row['CodeDescription'] code_description.short_description = row['ShortName'] code_description.save() logging.info( ("Created CulturalCodeDescription: %s" % code_description).decode('utf8')) count += 1 else: logging.warn("Missing variable in database: %s" % label) return count
def load_var(var_dict, categories): label = var_number_to_label(var_dict['Dataset'], var_dict['VarID']) variable, created = CulturalVariable.objects.get_or_create( label=label, source=get_source(var_dict['Dataset'])) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal'] variable.units = "" if 'Units' not in var_dict else var_dict['Units'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create(name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True
def load_var(var_dict, categories): label = var_number_to_label(var_dict['Dataset'], var_dict['VarID']) variable, created = CulturalVariable.objects.get_or_create( label=label, source=get_source(var_dict['Dataset'])) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal'] variable.units = "" if 'Units' not in var_dict else var_dict['Units'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create( name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True