def load_var(var_dict, categories): """ Load variables from VariableList.csv """ if var_dict['Dataset'] == 'EA': label = eavar_number_to_label(var_dict['VarID']) source = get_source("EA") elif var_dict['Dataset'] == 'LRB': label = bfvar_number_to_label(var_dict['VarID']) source = get_source("Binford") else: logging.warn("Dataset %(Dataset)s not in database, skipping row" % var_dict) return False variable, created = CulturalVariable.objects.get_or_create( label=label, source=source) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create(name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True
def connect(): if app.config["MODE"] == "DATA_CAPTURE": if app.config.get("STREAMING_SOURCE", None) is None: app.config["STREAMING_SOURCE"] = get_source( app.config, device_id=get_device_id(), data_source=app.config["DATA_SOURCE"], source_type="DATA_CAPTURE", ) app.config.get("STREAMING_SOURCE").send_connect() app.config["STREAMING"] = True elif app.config["MODE"] == "RESULTS": if app.config.get("RESULTS_SOURCE", None) is None: app.config["RESULTS_SOURCE"] = get_source( app.config, device_id=get_device_id(), data_source=app.config["DATA_SOURCE"], source_type="RESULTS", ) app.config["STREAMING"] = True app.config["RESULTS_SOURCE"].send_connect() return get_config()
def load_societies(items): delete_all(Society) societies = [] for item in items: if item['dataset'] == 'EA': source = get_source("EA") elif item['dataset'] == 'LRB': source = get_source("Binford") else: logging.warn( "Could not determine source for row %s, skipping" % item ) continue societies.append(Society( ext_id=item['soc_id'], xd_id=item['xd_id'], name=item['soc_name'], source=source, alternate_names=item['alternate_names'], focal_year=item['main_focal_year'], )) logging.info("Saving society %s" % item) Society.objects.bulk_create(societies) return len(societies)
def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None): ext_id = val_row.get('soc_id') if ext_id not in societies: logging.warn( "Attempting to load values for %s but no Society object exists, skipping" % ext_id) return society = societies[ext_id] variable_id = val_row['VarID'] if val_row['Dataset'] == 'EA': source = get_source("EA") label = eavar_number_to_label(variable_id) elif val_row['Dataset'] == 'Binford': source = get_source("Binford") label = bfvar_number_to_label(variable_id) else: logging.warn("Could not determine dataset source for row %s, skipping" % str(val_row)) return variable = variables.get(label) if variable is None: logging.warn("Could not find variable %s for society %s" % (variable_id, society.name)) return v = dict( variable=variable, society=society, source=source, coded_value=val_row['Code'], code=descriptions.get((variable.id, val_row['Code'].strip())), focal_year=val_row['Year'], comment=val_row['Comment'], subcase=val_row['SubCase']) refs = set() for r in val_row['EthnoReferences'].split(";"): r = r.strip() author, year = None, None m = BINFORD_REF_PATTERN.match(r) if m: author, year = m.group('author').strip(), m.group('year') if author.endswith(','): author = author[:-1].strip() else: ref_short = r.split(",") if len(ref_short) == 2: author = ref_short[0].strip() year = ref_short[1].strip().split(':')[0] if author and year: ref = sources.get((author, year)) if ref: refs.add(ref.id) else: # pragma: no cover logging.warn( "Could not find reference %s, %s in database, skipping reference" % (author, year)) return v, refs
def config(): form = DeviceConfigureForm() if request.method == "POST": disconnect() app.config["STREAMING"] = False source = get_source( app.config, data_source=form.data["source"].upper(), source_type="DATA_CAPTURE", device_id=form.data["device_id"], ) print("SET CONFIG") source.set_config(app.config) print("SEND CONNECT") source.send_connect() app.config["STREAMING"] = True app.config["MODE"] = "DATA_CAPTURE" cache_config(app.config) app.config["STREAMING_SOURCE"] = source print(app.config) ret = parse_current_config() return Response(dumps(ret), mimetype="application/json")
def _load_environmental(val_row, variables, societies, objs): if val_row['Code'] == 'NA': return global _missing_variables society = societies.get(val_row['soc_id']) if society is None: logging.warn("Unable to find a Society with ext_id %s, skipping ..." % val_row['soc_id']) return variable = variables.get(val_row['VarID']) if variable is None: if val_row['VarID'] not in _missing_variables: logging.warn("Could not find environmental variable %s" % val_row['VarID']) _missing_variables.add(val_row['VarID']) return objs.append( EnvironmentalValue(variable=variable, value=float(val_row['Code']), comment=val_row['Comment'], society=society, source=get_source(val_row['Dataset']))) return True
def config_results(): form = DeviceConfigureForm() if request.method == "POST": disconnect() app.config["STREAMING"] = False source = get_source( app.config, data_source=form.data["source"].upper(), device_id=form.data["device_id"], source_type="RESULTS", ) source.set_config(app.config) source.send_connect() app.config["STREAMING"] = True app.config["MODE"] = "RESULTS" app.config["RESULT_SOURCE"] = source cache_config(app.config) return get_config() ret = parse_current_config() return Response(dumps(ret), mimetype="application/json")
def config(): form = DeviceConfigureForm() if request.method == "POST": disconnect() source = get_source( app.config, data_source=form.data["source"].upper(), source_type=form.data["mode"].upper(), device_id=form.data["device_id"], ) source.read_config() print("SET CONFIG") source.set_app_config(app.config) print("SEND CONNECT") source.connect() app.config["MODE"] = form.data["mode"].upper() cache_config(app.config) app.config["DEVICE_SOURCE"] = source ret = parse_current_config() return Response(dumps(ret), mimetype="application/json")
def load_var(ds, var, categories): variable = Variable.objects.create( name=var.title, type=ds.type, codebook_info=var.definition, data_type=var.type, units=var.units, label=var.id, source=get_source(ds)) for c in var.category: index_category = categories.get((ds.type, c)) if not index_category: index_category = categories[(ds.type, c)] = Category.objects.create( name=c, type=ds.type) logging.info("Created %s category: %s" % (ds.type, c)) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) for code in var.codes: code_description, created = CodeDescription.objects.get_or_create( variable=variable, code=code.code) code_description.description = code.description code_description.short_description = code.name code_description.save() logging.info( ("Created CulturalCodeDescription: %s" % code_description).decode('utf8')) variable.save() logging.info("Created Variable: %s" % variable.label) return 1
def _load_environmental(val_row, variables, societies, objs): if val_row['Code'] == 'NA': return global _missing_variables society = societies.get(val_row['soc_id']) if society is None: logging.warn( "Unable to find a Society with ext_id %s, skipping ..." % val_row['soc_id']) return variable = variables.get(val_row['VarID']) if variable is None: if val_row['VarID'] not in _missing_variables: logging.warn("Could not find environmental variable %s" % val_row['VarID']) _missing_variables.add(val_row['VarID']) return objs.append(EnvironmentalValue( variable=variable, value=float(val_row['Code']), comment=val_row['Comment'], society=society, source=get_source(val_row['Dataset']))) return True
def index(): """ Receives event data from a webhook, checks if the source is authorized, checks if the signature is verified, and then sends the data to Pub/Sub. """ # Check if the source is authorized source = sources.get_source(request.headers) if source not in sources.AUTHORIZED_SOURCES: raise Exception(f"Source not authorized: {source}") auth_source = sources.AUTHORIZED_SOURCES[source] signature_sources = {**request.headers, **request.args} signature = signature_sources.get(auth_source.signature, None) body = request.data # Verify the signature verify_signature = auth_source.verification if not verify_signature(signature, body): raise Exception("Unverified Signature") # Publish to Pub/Sub publish_to_pubsub(source, body, dict(request.headers)) # Flush the stdout to avoid log buffering. sys.stdout.flush() return "", 204
def load_var(ds, var, categories): variable = Variable.objects.create(name=var.title, type=ds.type, codebook_info=var.definition, data_type=var.type, units=var.units, label=var.id, source=get_source(ds)) for c in var.category: index_category = categories.get((ds.type, c)) if not index_category: index_category = categories[(ds.type, c)] = Category.objects.create( name=c, type=ds.type) logging.info("Created %s category: %s" % (ds.type, c)) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) for code in var.codes: code_description, created = CodeDescription.objects.get_or_create( variable=variable, code=code.code) code_description.description = code.description code_description.short_description = code.name code_description.save() logging.info(("Created CulturalCodeDescription: %s" % code_description).decode('utf8')) variable.save() logging.info("Created Variable: %s" % variable.label) return 1
def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None): ext_id = val_row.get('soc_id') if ext_id not in societies: logging.warn( "Attempting to load values for %s but no Society object exists, skipping" % ext_id) return society = societies[ext_id] variable_id = val_row['VarID'] variable = variables.get( var_number_to_label(val_row['Dataset'], variable_id)) if variable is None: logging.warn("Could not find variable %s for society %s" % (variable_id, society.name)) return v = dict(variable=variable, society=society, source=get_source(val_row['Dataset']), coded_value=val_row['Code'], code=descriptions.get((variable.id, val_row['Code'].strip())), focal_year=val_row['Year'], comment=val_row['Comment'], subcase=val_row['SubCase']) if variable.data_type == 'Continuous' and val_row[ 'Code'] and val_row['Code'] != 'NA': v['coded_value_float'] = float(val_row['Code']) refs = set() for r in val_row['EthnoReferences'].split(";"): r = r.strip() author, year = None, None m = BINFORD_REF_PATTERN.match(r) if m: author, year = m.group('author').strip(), m.group('year') if author.endswith(','): author = author[:-1].strip() else: ref_short = r.split(",") if len(ref_short) == 2: author = ref_short[0].strip() year = ref_short[1].strip().split(':')[0] if author and year: ref = sources.get((author, year)) if ref: refs.add(ref.id) else: # pragma: no cover logging.warn( "Could not find reference %s, %s in database, skipping reference" % (author, year)) return v, refs
def __init__(self, settings): self.canvas = Canvas(settings) log.info(settings) log.info(settings.get("sources")) log.info(type(settings["sources"])) self.sources = [ sources.get_source(s)(settings, self.canvas) for s in settings.get("sources", []) ]
def load_societies(items): society_links = [ 'SCCS_society_equivalent', 'WNAI_society_equivalent1', 'WNAI_society_equivalent2', 'WNAI_society_equivalent3', 'WNAI_society_equivalent4', 'WNAI_society_equivalent5', ] delete_all(Society) societies = [] for item in items: societies.append(Society( ext_id=item['soc_id'], xd_id=item['xd_id'], original_name=item['ORIG_name_and_ID_in_this_dataset'], name=item['pref_name_for_society'], source=get_source(item['dataset']), alternate_names=item['alt_names_by_society'], focal_year=item['main_focal_year'], hraf_link=item['HRAF_name_ID'], chirila_link=item['CHIRILA_society_equivalent'] )) for key in society_links: value = item.get(key) if value: ext_id = value.split('(')[len(value.split('(')) - 1] society = Society( ext_id=ext_id[0:len(ext_id) - 1], xd_id=item['xd_id'], original_name=value, name=item['pref_name_for_society'], alternate_names=item['alt_names_by_society'], focal_year=item['main_focal_year'], source=get_source(key[0:key.find('_')]) ) if society.ext_id not in [x.ext_id for x in societies]: societies.append(society) logging.info("Saving society %s" % society.ext_id) logging.info("Saving society %s" % item) Society.objects.bulk_create(societies) return len(societies)
def load_data(repos): refs = [] societies = {s.ext_id: s for s in Society.objects.all()} kw = dict(sources={(s.author, s.year): s for s in Source.objects.all()}, descriptions={(vcd.variable_id, vcd.code): vcd for vcd in CodeDescription.objects.all()}) # # To speed up the data load, we first delete all relevant objects, and then recreate # them using bulk_create. # Note that since we must rebuild the association table between values and sources # as well, we must keep control over the inserted primary keys for values. To do so, # we reset the id sequence as well. # #CulturalValue.objects.all().delete() #with connection.cursor() as c: # for table in ['culturalvalue_references', 'culturalvalue']: # c.execute("ALTER SEQUENCE dplace_app_%s_id_seq RESTART WITH 1" % table) variables = {var.label: var for var in Variable.objects.all()} objs = [] pk = 0 for ds in repos.datasets: for item in ds.data: if item.soc_id not in societies: logging.warn('value for unknown society {0}'.format( item.soc_id)) continue if item.var_id not in variables: logging.warn('value for unknown variable {0}'.format( item.var_id)) continue v, _refs = _load_data(ds, item, get_source(ds), societies[item.soc_id], variables[item.var_id], **kw) if v: pk += 1 objs.append(Value(**v)) refs.extend([(pk, sid) for sid in _refs or []]) Value.objects.bulk_create(objs, batch_size=1000) with connection.cursor() as c: c.executemany( """\ INSERT INTO dplace_app_value_references (value_id, source_id) VALUES (%s, %s)""", refs) return Value.objects.count()
def scan(): form = DeviceScanForm() print(form.data["source"].upper()) source = get_source( app.config, device_id=None, data_source=form.data["source"].upper(), connect=False, ) device_id_list = source.list_available_devices() return Response(json.dumps(device_id_list), mimetype="application/json")
def connect(): if app.config.get("DEVICE_SOURCE", None) is None: app.config["DEVICE_SOURCE"] = get_source( app.config, device_id=get_device_id(), data_source=app.config["DATA_SOURCE"], source_type=app.config["MODE"], ) app.config["DEVICE_SOURCE"].update_config(app.config) app.config["DEVICE_SOURCE"].connect() return get_config()
def results(): if app.config.get("RESULT_SOURCE", None) is None: app.config["RESULT_SOURCE"] = get_source( app.config, device_id=get_device_id(), data_source=app.config["DATA_SOURCE"], source_type="RESULTS", ) app.config["STREAMING"] = True return Response( stream_with_context(app.config["RESULT_SOURCE"].read_result_data()), mimetype="application/octet-stream", )
def stream(): if app.config.get("STREAMING_SOURCE", None) is None: app.config["STREAMING_SOURCE"] = get_source( app.config, device_id=get_device_id(), data_source=app.config["DATA_SOURCE"], source_type="DATA_CAPTURE", ) app.config["STREAMING"] = True print("source was none") return Response( stream_with_context(app.config["STREAMING_SOURCE"].read_data()), mimetype="application/octet-stream", )
def _load_environmental(env_dict, variables, societies): ext_id = env_dict['ID'] source = get_source(env_dict['Source']) # hack for B109 vs. 109 if source.author == 'Binford' and ext_id.find('B') == -1: ext_id = 'B' + ext_id society = societies.get((ext_id, source.id)) if society is None: logging.warn( "Unable to find a Society object with ext_id %s and source %s, skipping..." % (ext_id, source)) return # This limits the environmental data to one record per society record found_environmentals = Environmental.objects.filter(society=society).all() if len(found_environmentals) == 0: iso_code = iso_from_code(env_dict['iso']) # Create the base Environmental environmental, created = Environmental.objects.get_or_create( society=society, source=source, iso_code=iso_code ) for k in ENVIRONMENTAL_MAP: # keys are the columns in the CSV file var_dict = ENVIRONMENTAL_MAP[k] variable = variables.get(var_dict['name']) if variable is None: # pragma: no cover logging.warn("Did not find an EnvironmentalVariable with name %s" % var_dict['name']) continue if env_dict[k] and env_dict[k] != 'NA': value = float(env_dict[k]) EnvironmentalValue.objects.get_or_create( variable=variable, value=value, environmental=environmental, source=source) logging.info( "Created environmental value for variable %s and society %s" % (var_dict['name'], society) ) else: environmental = found_environmentals[0] return environmental
def _load_environmental(env_dict, variables, societies, objs): ext_id = env_dict['soc_ID'] source = get_source(env_dict['Source']) # hack for B109 vs. 109 if source.author == 'Binford' and ext_id.find('B') == -1: ext_id = 'B' + ext_id society = societies.get((ext_id, source.id)) if society is None: logging.warn( "Unable to find a Society object with ext_id %s and source %s, skipping..." % (ext_id, source)) return # This limits the environmental data to one record per society record found_environmentals = Environmental.objects.filter(society=society).all() if len(found_environmentals) == 0: if society.language is not None: iso_code = society.language.iso_code else: iso_code = None # Create the base Environmental environmental, created = Environmental.objects.get_or_create( society=society, source=source, iso_code=iso_code ) for v in variables: key = ''.join(v.name.split(' ')) if env_dict[key] and env_dict[key] != 'NA': value = float(env_dict[key]) objs.append(EnvironmentalValue( variable=v, value=value, environmental=environmental, source=source )) else: environmental = found_environmentals[0] return environmental
def index(): """ Receives event data from a webhook, checks if the source is authorized, checks if the signature is verified, and then sends the data to Pub/Sub. """ # Check if the source is authorized source = sources.get_source(request.headers) if source not in sources.AUTHORIZED_SOURCES: raise Exception( f"EventHandler: Source not authorized: {source}\n request headers: {request.headers}" ) auth_source = sources.AUTHORIZED_SOURCES[source] signature_sources = {**request.headers, **request.args} signature = signature_sources.get(auth_source.signature, None) body = request.data # Enable for debugging else commented checking signature passed in # print(f"Request headers carrying signatue {headers}") # print(f"Request body carrying {body}") # Verify the signature verify_signature = auth_source.verification if not verify_signature(signature, body): raise Exception("EventHandler: Unverified Signature") # Remove the Auth header so we do not publish it to Pub/Sub pubsub_headers = dict(request.headers) if "Authorization" in pubsub_headers: del pubsub_headers["Authorization"] # Publish to Pub/Sub publish_to_pubsub(source, body, pubsub_headers) # Flush the stdout to avoid log buffering. sys.stdout.flush() return "", 204
def load_societies(repos): societies = [] for ds in repos.datasets: for item in ds.societies: lat, lon, olat, olon = None, None, None, None try: lat, lon = map(float, [item.Lat, item.Long]) except (TypeError, ValueError): logging.warn("Unable to create coordinates for %s" % item) try: olat, olon = map(float, [item.origLat, item.origLong]) except (TypeError, ValueError): logging.warn("Unable to create original coordinates for %s" % item) societies.append( Society( ext_id=item.id, xd_id=item.xd_id, original_name=item.ORIG_name_and_ID_in_this_dataset, name=item.pref_name_for_society, source=get_source(ds), alternate_names=item.alt_names_by_society, focal_year=item.main_focal_year, hraf_link=item.HRAF_name_ID, #chirila_link=item.CHIRILA_society_equivalent, latitude=lat, longitude=lon, original_latitude=olat, original_longitude=olon, )) logging.info("Saving society %s" % item.id) Society.objects.bulk_create(societies) # # TODO: load cross-dataset relations! # return len(societies)
def load_var(var_dict, categories): label = var_number_to_label(var_dict['Dataset'], var_dict['VarID']) variable, created = CulturalVariable.objects.get_or_create( label=label, source=get_source(var_dict['Dataset'])) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal'] variable.units = "" if 'Units' not in var_dict else var_dict['Units'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create(name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True
def load_societies(repos): societies = [] for ds in repos.datasets: for item in ds.societies: lat, lon, olat, olon = None, None, None, None try: lat, lon = map(float, [item.Lat, item.Long]) except (TypeError, ValueError): logging.warn("Unable to create coordinates for %s" % item) try: olat, olon = map(float, [item.origLat, item.origLong]) except (TypeError, ValueError): logging.warn("Unable to create original coordinates for %s" % item) societies.append(Society( ext_id=item.id, xd_id=item.xd_id, original_name=item.ORIG_name_and_ID_in_this_dataset, name=item.pref_name_for_society, source=get_source(ds), alternate_names=item.alt_names_by_society, focal_year=item.main_focal_year, hraf_link=item.HRAF_name_ID, #chirila_link=item.CHIRILA_society_equivalent, latitude=lat, longitude=lon, original_latitude=olat, original_longitude=olon, )) logging.info("Saving society %s" % item.id) Society.objects.bulk_create(societies) # # TODO: load cross-dataset relations! # return len(societies)
def load_var(var_dict, categories): label = var_number_to_label(var_dict['Dataset'], var_dict['VarID']) variable, created = CulturalVariable.objects.get_or_create( label=label, source=get_source(var_dict['Dataset'])) variable.name = var_dict['VarTitle'] variable.codebook_info = var_dict['VarDefinition'] variable.data_type = var_dict['VarType'] assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal'] variable.units = "" if 'Units' not in var_dict else var_dict['Units'] for c in map(clean_category, var_dict['IndexCategory'].split(',')): index_category = categories.get(c) if not index_category: index_category = categories[c] = CulturalCategory.objects.create( name=c) logging.info("Created CulturalCategory: %s" % c) if index_category not in variable.index_categories.all(): variable.index_categories.add(index_category) variable.save() logging.info("Created CulturalVariable: %s" % label) logging.info("Saved variable %s - %s" % (label, variable.name)) return True
def get_sorter_source(source_id): sorter_source = sources.get_source(source_id) if sorter_source is None: raise NotFound('source ID does not have a corresponding source') return sorter_source