Ejemplo n.º 1
0
def load_var(var_dict, categories):
    """
    Load variables from VariableList.csv
    """
    if var_dict['Dataset'] == 'EA':
        label = eavar_number_to_label(var_dict['VarID'])
        source = get_source("EA")
    elif var_dict['Dataset'] == 'LRB':
        label = bfvar_number_to_label(var_dict['VarID'])
        source = get_source("Binford")
    else:
        logging.warn("Dataset %(Dataset)s not in database, skipping row" % var_dict)
        return False

    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=source)
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True
Ejemplo n.º 2
0
def connect():

    if app.config["MODE"] == "DATA_CAPTURE":
        if app.config.get("STREAMING_SOURCE", None) is None:
            app.config["STREAMING_SOURCE"] = get_source(
                app.config,
                device_id=get_device_id(),
                data_source=app.config["DATA_SOURCE"],
                source_type="DATA_CAPTURE",
            )

            app.config.get("STREAMING_SOURCE").send_connect()

            app.config["STREAMING"] = True

    elif app.config["MODE"] == "RESULTS":
        if app.config.get("RESULTS_SOURCE", None) is None:
            app.config["RESULTS_SOURCE"] = get_source(
                app.config,
                device_id=get_device_id(),
                data_source=app.config["DATA_SOURCE"],
                source_type="RESULTS",
            )

            app.config["STREAMING"] = True

            app.config["RESULTS_SOURCE"].send_connect()

    return get_config()
Ejemplo n.º 3
0
def load_societies(items):
    delete_all(Society)
    societies = []
    for item in items:
        if item['dataset'] == 'EA':
            source = get_source("EA")
        elif item['dataset'] == 'LRB':
            source = get_source("Binford")
        else:
            logging.warn(
                "Could not determine source for row %s, skipping" % item
            )
            continue

        societies.append(Society(
            ext_id=item['soc_id'],
            xd_id=item['xd_id'],
            name=item['soc_name'],
            source=source,
            alternate_names=item['alternate_names'],
            focal_year=item['main_focal_year'],
        ))
        logging.info("Saving society %s" % item)

    Society.objects.bulk_create(societies)
    return len(societies)
Ejemplo n.º 4
0
def _load_data(val_row, societies=None, sources=None, variables=None, descriptions=None):
    ext_id = val_row.get('soc_id')
    if ext_id not in societies:
        logging.warn(
            "Attempting to load values for %s but no Society object exists, skipping"
            % ext_id)
        return

    society = societies[ext_id]
    variable_id = val_row['VarID']

    if val_row['Dataset'] == 'EA':
        source = get_source("EA")
        label = eavar_number_to_label(variable_id)
    elif val_row['Dataset'] == 'Binford':
        source = get_source("Binford")
        label = bfvar_number_to_label(variable_id)
    else:
        logging.warn("Could not determine dataset source for row %s, skipping" % str(val_row))
        return

    variable = variables.get(label)
    if variable is None:
        logging.warn("Could not find variable %s for society %s" % (variable_id, society.name))
        return

    v = dict(
        variable=variable,
        society=society,
        source=source,
        coded_value=val_row['Code'],
        code=descriptions.get((variable.id, val_row['Code'].strip())),
        focal_year=val_row['Year'],
        comment=val_row['Comment'],
        subcase=val_row['SubCase'])

    refs = set()
    for r in val_row['EthnoReferences'].split(";"):
        r = r.strip()
        author, year = None, None
        m = BINFORD_REF_PATTERN.match(r)
        if m:
            author, year = m.group('author').strip(), m.group('year')
            if author.endswith(','):
                author = author[:-1].strip()
        else:
            ref_short = r.split(",")
            if len(ref_short) == 2:
                author = ref_short[0].strip()
                year = ref_short[1].strip().split(':')[0]
        if author and year:
            ref = sources.get((author, year))
            if ref:
                refs.add(ref.id)
            else:  # pragma: no cover
                logging.warn(
                    "Could not find reference %s, %s in database, skipping reference"
                    % (author, year))
    return v, refs
Ejemplo n.º 5
0
def config():
    form = DeviceConfigureForm()

    if request.method == "POST":
        disconnect()
        app.config["STREAMING"] = False

        source = get_source(
            app.config,
            data_source=form.data["source"].upper(),
            source_type="DATA_CAPTURE",
            device_id=form.data["device_id"],
        )

        print("SET CONFIG")
        source.set_config(app.config)

        print("SEND CONNECT")
        source.send_connect()
        app.config["STREAMING"] = True

        app.config["MODE"] = "DATA_CAPTURE"

        cache_config(app.config)

        app.config["STREAMING_SOURCE"] = source

        print(app.config)

    ret = parse_current_config()

    return Response(dumps(ret), mimetype="application/json")
Ejemplo n.º 6
0
def _load_environmental(val_row, variables, societies, objs):
    if val_row['Code'] == 'NA':
        return
    global _missing_variables

    society = societies.get(val_row['soc_id'])
    if society is None:
        logging.warn("Unable to find a Society with ext_id %s, skipping ..." %
                     val_row['soc_id'])
        return

    variable = variables.get(val_row['VarID'])
    if variable is None:
        if val_row['VarID'] not in _missing_variables:
            logging.warn("Could not find environmental variable %s" %
                         val_row['VarID'])
            _missing_variables.add(val_row['VarID'])
        return

    objs.append(
        EnvironmentalValue(variable=variable,
                           value=float(val_row['Code']),
                           comment=val_row['Comment'],
                           society=society,
                           source=get_source(val_row['Dataset'])))
    return True
Ejemplo n.º 7
0
def config_results():
    form = DeviceConfigureForm()

    if request.method == "POST":
        disconnect()
        app.config["STREAMING"] = False

        source = get_source(
            app.config,
            data_source=form.data["source"].upper(),
            device_id=form.data["device_id"],
            source_type="RESULTS",
        )

        source.set_config(app.config)

        source.send_connect()

        app.config["STREAMING"] = True

        app.config["MODE"] = "RESULTS"

        app.config["RESULT_SOURCE"] = source

        cache_config(app.config)

        return get_config()

    ret = parse_current_config()

    return Response(dumps(ret), mimetype="application/json")
Ejemplo n.º 8
0
def config():
    form = DeviceConfigureForm()

    if request.method == "POST":
        disconnect()

        source = get_source(
            app.config,
            data_source=form.data["source"].upper(),
            source_type=form.data["mode"].upper(),
            device_id=form.data["device_id"],
        )

        source.read_config()

        print("SET CONFIG")
        source.set_app_config(app.config)

        print("SEND CONNECT")
        source.connect()

        app.config["MODE"] = form.data["mode"].upper()

        cache_config(app.config)

        app.config["DEVICE_SOURCE"] = source

    ret = parse_current_config()

    return Response(dumps(ret), mimetype="application/json")
Ejemplo n.º 9
0
def load_var(ds, var, categories):
    variable = Variable.objects.create(
        name=var.title,
        type=ds.type,
        codebook_info=var.definition,
        data_type=var.type,
        units=var.units,
        label=var.id,
        source=get_source(ds))

    for c in var.category:
        index_category = categories.get((ds.type, c))
        if not index_category:
            index_category = categories[(ds.type, c)] = Category.objects.create(
                name=c, type=ds.type)
            logging.info("Created %s category: %s" % (ds.type, c))

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    for code in var.codes:
        code_description, created = CodeDescription.objects.get_or_create(
            variable=variable, code=code.code)
        code_description.description = code.description
        code_description.short_description = code.name
        code_description.save()
        logging.info(
            ("Created CulturalCodeDescription: %s" % code_description).decode('utf8'))

    variable.save()
    logging.info("Created Variable: %s" % variable.label)
    return 1
Ejemplo n.º 10
0
def _load_environmental(val_row, variables, societies, objs):
    if val_row['Code'] == 'NA':
        return
    global _missing_variables

    society = societies.get(val_row['soc_id'])
    if society is None:
        logging.warn(
            "Unable to find a Society with ext_id %s, skipping ..." % val_row['soc_id'])
        return

    variable = variables.get(val_row['VarID'])
    if variable is None:
        if val_row['VarID'] not in _missing_variables:
            logging.warn("Could not find environmental variable %s" % val_row['VarID'])
            _missing_variables.add(val_row['VarID'])
        return

    objs.append(EnvironmentalValue(
        variable=variable,
        value=float(val_row['Code']),
        comment=val_row['Comment'],
        society=society,
        source=get_source(val_row['Dataset'])))
    return True
Ejemplo n.º 11
0
def index():
    """
    Receives event data from a webhook, checks if the source is authorized,
    checks if the signature is verified, and then sends the data to Pub/Sub.
    """

    # Check if the source is authorized
    source = sources.get_source(request.headers)

    if source not in sources.AUTHORIZED_SOURCES:
        raise Exception(f"Source not authorized: {source}")

    auth_source = sources.AUTHORIZED_SOURCES[source]
    signature_sources = {**request.headers, **request.args}
    signature = signature_sources.get(auth_source.signature, None)
    body = request.data

    # Verify the signature
    verify_signature = auth_source.verification
    if not verify_signature(signature, body):
        raise Exception("Unverified Signature")

    # Publish to Pub/Sub
    publish_to_pubsub(source, body, dict(request.headers))

    # Flush the stdout to avoid log buffering.
    sys.stdout.flush()
    return "", 204
Ejemplo n.º 12
0
def load_var(ds, var, categories):
    variable = Variable.objects.create(name=var.title,
                                       type=ds.type,
                                       codebook_info=var.definition,
                                       data_type=var.type,
                                       units=var.units,
                                       label=var.id,
                                       source=get_source(ds))

    for c in var.category:
        index_category = categories.get((ds.type, c))
        if not index_category:
            index_category = categories[(ds.type,
                                         c)] = Category.objects.create(
                                             name=c, type=ds.type)
            logging.info("Created %s category: %s" % (ds.type, c))

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    for code in var.codes:
        code_description, created = CodeDescription.objects.get_or_create(
            variable=variable, code=code.code)
        code_description.description = code.description
        code_description.short_description = code.name
        code_description.save()
        logging.info(("Created CulturalCodeDescription: %s" %
                      code_description).decode('utf8'))

    variable.save()
    logging.info("Created Variable: %s" % variable.label)
    return 1
Ejemplo n.º 13
0
def _load_data(val_row,
               societies=None,
               sources=None,
               variables=None,
               descriptions=None):
    ext_id = val_row.get('soc_id')
    if ext_id not in societies:
        logging.warn(
            "Attempting to load values for %s but no Society object exists, skipping"
            % ext_id)
        return

    society = societies[ext_id]
    variable_id = val_row['VarID']

    variable = variables.get(
        var_number_to_label(val_row['Dataset'], variable_id))
    if variable is None:
        logging.warn("Could not find variable %s for society %s" %
                     (variable_id, society.name))
        return

    v = dict(variable=variable,
             society=society,
             source=get_source(val_row['Dataset']),
             coded_value=val_row['Code'],
             code=descriptions.get((variable.id, val_row['Code'].strip())),
             focal_year=val_row['Year'],
             comment=val_row['Comment'],
             subcase=val_row['SubCase'])

    if variable.data_type == 'Continuous' and val_row[
            'Code'] and val_row['Code'] != 'NA':
        v['coded_value_float'] = float(val_row['Code'])

    refs = set()
    for r in val_row['EthnoReferences'].split(";"):
        r = r.strip()
        author, year = None, None
        m = BINFORD_REF_PATTERN.match(r)
        if m:
            author, year = m.group('author').strip(), m.group('year')
            if author.endswith(','):
                author = author[:-1].strip()
        else:
            ref_short = r.split(",")
            if len(ref_short) == 2:
                author = ref_short[0].strip()
                year = ref_short[1].strip().split(':')[0]
        if author and year:
            ref = sources.get((author, year))
            if ref:
                refs.add(ref.id)
            else:  # pragma: no cover
                logging.warn(
                    "Could not find reference %s, %s in database, skipping reference"
                    % (author, year))
    return v, refs
Ejemplo n.º 14
0
 def __init__(self, settings):
     self.canvas = Canvas(settings)
     log.info(settings)
     log.info(settings.get("sources"))
     log.info(type(settings["sources"]))
     self.sources = [
         sources.get_source(s)(settings, self.canvas)
         for s in settings.get("sources", [])
     ]
Ejemplo n.º 15
0
def load_societies(items):
    society_links = [
        'SCCS_society_equivalent',
        'WNAI_society_equivalent1',
        'WNAI_society_equivalent2',
        'WNAI_society_equivalent3',
        'WNAI_society_equivalent4',
        'WNAI_society_equivalent5',
    ]
    delete_all(Society)
    societies = []
    for item in items:
        societies.append(Society(
            ext_id=item['soc_id'],
            xd_id=item['xd_id'],
            original_name=item['ORIG_name_and_ID_in_this_dataset'],
            name=item['pref_name_for_society'], 
            source=get_source(item['dataset']),
            alternate_names=item['alt_names_by_society'],
            focal_year=item['main_focal_year'],
            hraf_link=item['HRAF_name_ID'],
            chirila_link=item['CHIRILA_society_equivalent']
        ))
        for key in society_links:
            value = item.get(key)
            if value:
                ext_id = value.split('(')[len(value.split('(')) - 1]
                society = Society(
                    ext_id=ext_id[0:len(ext_id) - 1],
                    xd_id=item['xd_id'],
                    original_name=value,
                    name=item['pref_name_for_society'],
                    alternate_names=item['alt_names_by_society'],
                    focal_year=item['main_focal_year'],
                    source=get_source(key[0:key.find('_')])
                )
                if society.ext_id not in [x.ext_id for x in societies]:
                    societies.append(society)
                    logging.info("Saving society %s" % society.ext_id)

        logging.info("Saving society %s" % item)

    Society.objects.bulk_create(societies)
    return len(societies)
Ejemplo n.º 16
0
def load_data(repos):
    refs = []
    societies = {s.ext_id: s for s in Society.objects.all()}
    kw = dict(sources={(s.author, s.year): s
                       for s in Source.objects.all()},
              descriptions={(vcd.variable_id, vcd.code): vcd
                            for vcd in CodeDescription.objects.all()})

    #
    # To speed up the data load, we first delete all relevant objects, and then recreate
    # them using bulk_create.
    # Note that since we must rebuild the association table between values and sources
    # as well, we must keep control over the inserted primary keys for values. To do so,
    # we reset the id sequence as well.
    #
    #CulturalValue.objects.all().delete()
    #with connection.cursor() as c:
    #    for table in ['culturalvalue_references', 'culturalvalue']:
    #        c.execute("ALTER SEQUENCE dplace_app_%s_id_seq RESTART WITH 1" % table)

    variables = {var.label: var for var in Variable.objects.all()}
    objs = []
    pk = 0
    for ds in repos.datasets:
        for item in ds.data:
            if item.soc_id not in societies:
                logging.warn('value for unknown society {0}'.format(
                    item.soc_id))
                continue
            if item.var_id not in variables:
                logging.warn('value for unknown variable {0}'.format(
                    item.var_id))
                continue
            v, _refs = _load_data(ds, item, get_source(ds),
                                  societies[item.soc_id],
                                  variables[item.var_id], **kw)
            if v:
                pk += 1
                objs.append(Value(**v))
                refs.extend([(pk, sid) for sid in _refs or []])

    Value.objects.bulk_create(objs, batch_size=1000)

    with connection.cursor() as c:
        c.executemany(
            """\
INSERT INTO dplace_app_value_references (value_id, source_id) VALUES (%s, %s)""",
            refs)
    return Value.objects.count()
Ejemplo n.º 17
0
def scan():
    form = DeviceScanForm()

    print(form.data["source"].upper())

    source = get_source(
        app.config,
        device_id=None,
        data_source=form.data["source"].upper(),
        connect=False,
    )

    device_id_list = source.list_available_devices()

    return Response(json.dumps(device_id_list), mimetype="application/json")
Ejemplo n.º 18
0
def connect():

    if app.config.get("DEVICE_SOURCE", None) is None:

        app.config["DEVICE_SOURCE"] = get_source(
            app.config,
            device_id=get_device_id(),
            data_source=app.config["DATA_SOURCE"],
            source_type=app.config["MODE"],
        )

        app.config["DEVICE_SOURCE"].update_config(app.config)

        app.config["DEVICE_SOURCE"].connect()

    return get_config()
Ejemplo n.º 19
0
def results():

    if app.config.get("RESULT_SOURCE", None) is None:

        app.config["RESULT_SOURCE"] = get_source(
            app.config,
            device_id=get_device_id(),
            data_source=app.config["DATA_SOURCE"],
            source_type="RESULTS",
        )

        app.config["STREAMING"] = True

    return Response(
        stream_with_context(app.config["RESULT_SOURCE"].read_result_data()),
        mimetype="application/octet-stream",
    )
Ejemplo n.º 20
0
def stream():

    if app.config.get("STREAMING_SOURCE", None) is None:
        app.config["STREAMING_SOURCE"] = get_source(
            app.config,
            device_id=get_device_id(),
            data_source=app.config["DATA_SOURCE"],
            source_type="DATA_CAPTURE",
        )

        app.config["STREAMING"] = True
        print("source was none")

    return Response(
        stream_with_context(app.config["STREAMING_SOURCE"].read_data()),
        mimetype="application/octet-stream",
    )
Ejemplo n.º 21
0
def _load_environmental(env_dict, variables, societies):
    ext_id = env_dict['ID']
    source = get_source(env_dict['Source'])

    # hack for B109 vs. 109
    if source.author == 'Binford' and ext_id.find('B') == -1:
        ext_id = 'B' + ext_id

    society = societies.get((ext_id, source.id))
    if society is None:
        logging.warn(
            "Unable to find a Society object with ext_id %s and source %s, skipping..." %
            (ext_id, source))
        return
    
    # This limits the environmental data to one record per society record
    found_environmentals = Environmental.objects.filter(society=society).all()
    if len(found_environmentals) == 0:
        iso_code = iso_from_code(env_dict['iso'])

        # Create the base Environmental
        environmental, created = Environmental.objects.get_or_create(
            society=society,
            source=source,
            iso_code=iso_code
        )
        for k in ENVIRONMENTAL_MAP:  # keys are the columns in the CSV file
            var_dict = ENVIRONMENTAL_MAP[k]
            variable = variables.get(var_dict['name'])
            if variable is None:  # pragma: no cover
                logging.warn("Did not find an EnvironmentalVariable with name %s" % var_dict['name'])
                continue
            if env_dict[k] and env_dict[k] != 'NA':
                value = float(env_dict[k])
                EnvironmentalValue.objects.get_or_create(
                    variable=variable,
                    value=value,
                    environmental=environmental,
                    source=source)
            logging.info(
                "Created environmental value for variable %s and society %s" % (var_dict['name'], society)
            )
    else:
        environmental = found_environmentals[0]
    return environmental
Ejemplo n.º 22
0
def _load_environmental(env_dict, variables, societies, objs):
    ext_id = env_dict['soc_ID']
    source = get_source(env_dict['Source'])

    # hack for B109 vs. 109
    if source.author == 'Binford' and ext_id.find('B') == -1:
        ext_id = 'B' + ext_id

    society = societies.get((ext_id, source.id))
    if society is None:
        logging.warn(
            "Unable to find a Society object with ext_id %s and source %s, skipping..." %
            (ext_id, source))
        return
    
    # This limits the environmental data to one record per society record
    found_environmentals = Environmental.objects.filter(society=society).all()
    if len(found_environmentals) == 0:
        if society.language is not None:
            iso_code = society.language.iso_code
        else:
            iso_code = None
        # Create the base Environmental
        environmental, created = Environmental.objects.get_or_create(
            society=society,
            source=source,
            iso_code=iso_code
        )
        
        for v in variables:
            key = ''.join(v.name.split(' '))
            if env_dict[key] and env_dict[key] != 'NA':
                value = float(env_dict[key])
                objs.append(EnvironmentalValue(
                    variable=v,
                    value=value,
                    environmental=environmental,
                    source=source
                ))

    else:
        environmental = found_environmentals[0]
    return environmental
Ejemplo n.º 23
0
def index():
    """
    Receives event data from a webhook, checks if the source is authorized,
    checks if the signature is verified, and then sends the data to Pub/Sub.
    """

    # Check if the source is authorized
    source = sources.get_source(request.headers)

    if source not in sources.AUTHORIZED_SOURCES:
        raise Exception(
            f"EventHandler: Source not authorized: {source}\n request headers: {request.headers}"
        )

    auth_source = sources.AUTHORIZED_SOURCES[source]
    signature_sources = {**request.headers, **request.args}
    signature = signature_sources.get(auth_source.signature, None)
    body = request.data

    # Enable for debugging else commented checking signature passed in
    # print(f"Request headers carrying signatue {headers}")
    # print(f"Request body carrying  {body}")

    # Verify the signature
    verify_signature = auth_source.verification

    if not verify_signature(signature, body):
        raise Exception("EventHandler: Unverified Signature")

    # Remove the Auth header so we do not publish it to Pub/Sub
    pubsub_headers = dict(request.headers)
    if "Authorization" in pubsub_headers:
        del pubsub_headers["Authorization"]

    # Publish to Pub/Sub
    publish_to_pubsub(source, body, pubsub_headers)

    # Flush the stdout to avoid log buffering.
    sys.stdout.flush()
    return "", 204
Ejemplo n.º 24
0
def load_societies(repos):
    societies = []
    for ds in repos.datasets:
        for item in ds.societies:
            lat, lon, olat, olon = None, None, None, None
            try:
                lat, lon = map(float, [item.Lat, item.Long])
            except (TypeError, ValueError):
                logging.warn("Unable to create coordinates for %s" % item)
            try:
                olat, olon = map(float, [item.origLat, item.origLong])
            except (TypeError, ValueError):
                logging.warn("Unable to create original coordinates for %s" %
                             item)

            societies.append(
                Society(
                    ext_id=item.id,
                    xd_id=item.xd_id,
                    original_name=item.ORIG_name_and_ID_in_this_dataset,
                    name=item.pref_name_for_society,
                    source=get_source(ds),
                    alternate_names=item.alt_names_by_society,
                    focal_year=item.main_focal_year,
                    hraf_link=item.HRAF_name_ID,
                    #chirila_link=item.CHIRILA_society_equivalent,
                    latitude=lat,
                    longitude=lon,
                    original_latitude=olat,
                    original_longitude=olon,
                ))
            logging.info("Saving society %s" % item.id)

    Society.objects.bulk_create(societies)

    #
    # TODO: load cross-dataset relations!
    #
    return len(societies)
Ejemplo n.º 25
0
def load_var(var_dict, categories):
    label = var_number_to_label(var_dict['Dataset'], var_dict['VarID'])
    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=get_source(var_dict['Dataset']))
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']
    assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal']
    variable.units = "" if 'Units' not in var_dict else var_dict['Units']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True
Ejemplo n.º 26
0
def load_societies(repos):
    societies = []
    for ds in repos.datasets:
        for item in ds.societies:
            lat, lon, olat, olon = None, None, None, None
            try:
                lat, lon = map(float, [item.Lat, item.Long])
            except (TypeError, ValueError):
                logging.warn("Unable to create coordinates for %s" % item)
            try:
                olat, olon = map(float, [item.origLat, item.origLong])
            except (TypeError, ValueError):
                logging.warn("Unable to create original coordinates for %s" % item)

            societies.append(Society(
                ext_id=item.id,
                xd_id=item.xd_id,
                original_name=item.ORIG_name_and_ID_in_this_dataset,
                name=item.pref_name_for_society,
                source=get_source(ds),
                alternate_names=item.alt_names_by_society,
                focal_year=item.main_focal_year,
                hraf_link=item.HRAF_name_ID,
                #chirila_link=item.CHIRILA_society_equivalent,
                latitude=lat,
                longitude=lon,
                original_latitude=olat,
                original_longitude=olon,
            ))
            logging.info("Saving society %s" % item.id)

    Society.objects.bulk_create(societies)

    #
    # TODO: load cross-dataset relations!
    #
    return len(societies)
Ejemplo n.º 27
0
def load_var(var_dict, categories):
    label = var_number_to_label(var_dict['Dataset'], var_dict['VarID'])
    variable, created = CulturalVariable.objects.get_or_create(
        label=label, source=get_source(var_dict['Dataset']))
    variable.name = var_dict['VarTitle']
    variable.codebook_info = var_dict['VarDefinition']
    variable.data_type = var_dict['VarType']
    assert variable.data_type in ['Continuous', 'Categorical', 'Ordinal']
    variable.units = "" if 'Units' not in var_dict else var_dict['Units']

    for c in map(clean_category, var_dict['IndexCategory'].split(',')):
        index_category = categories.get(c)
        if not index_category:
            index_category = categories[c] = CulturalCategory.objects.create(
                name=c)
            logging.info("Created CulturalCategory: %s" % c)

        if index_category not in variable.index_categories.all():
            variable.index_categories.add(index_category)

    variable.save()
    logging.info("Created CulturalVariable: %s" % label)
    logging.info("Saved variable %s - %s" % (label, variable.name))
    return True
Ejemplo n.º 28
0
def get_sorter_source(source_id):
    sorter_source = sources.get_source(source_id)
    if sorter_source is None:
        raise NotFound('source ID does not have a corresponding source')
    return sorter_source