def test_Contributor(self): from clld.db.models.common import Contributor d = Contributor(id='abc') d.last_first() d = Contributor(id='abc', name='Robert Forkel') self.assertTrue(d.last_first().startswith('Forkel'))
def test_Contributor(): from clld.db.models.common import Contributor d = Contributor(id='abc') d.last_first() d = Contributor(id='abc', name='Robert Forkel') assert d.last_first() == 'Forkel, Robert' d = Contributor(id='abc', name='Hans Robert von Forkel') assert d.last_first() == 'von Forkel, Hans Robert'
def survey(request): id_ = request.matchdict["id"] md = jsonlib.load(ppath("Surveys", "%s.json" % id_)) html = get_html(ppath("Surveys", "%s.html" % id_)) maps = [] for fname in sorted( ppath("Surveys", processed="maps").glob("%s*.png" % id_.split(".")[1].replace("-", "_")), key=lambda fn: fn.stem ): img = b64encode(open(fname.as_posix(), "rb").read()) if "figure" in fname.stem: html = html.replace("{%s}" % fname.stem, "data:image/png;base64,%s" % img) else: maps.append(img) return { "maps": maps, "md": md, "authors": [Contributor.get(a["id"]) for a in md["authors"]], "html": html, "ctx": ApicsContribution.get(id_.split(".")[0]), }
def survey(request): id_ = request.matchdict['id'] md = jsonload(ppath('Surveys', '%s.json' % id_)) html = get_html(ppath('Surveys', '%s.html' % id_)) maps = [] for fname in sorted( ppath('Surveys', processed='maps').files( '%s*.png' % id_.split('.')[1].replace('-', '_')), key=lambda fn: fn.namebase): img = b64encode(open(fname, 'rb').read()) if 'figure' in fname.namebase: html = html.replace('{%s}' % fname.namebase, 'data:image/png;base64,%s' % img) else: maps.append(img) return { 'maps': maps, 'md': md, 'authors': [Contributor.get(a['id']) for a in md['authors']], 'html': html, 'ctx': ApicsContribution.get(id_.split('.')[0]), }
def import_contribution(path, icons, features, languages, contributors={}, trust=[]): # look for metadata # look for sources # then loop over values mdpath = path + '-metadata.json' with open(mdpath) as mdfile: md = json.load(mdfile) try: abstract = md["abstract"] except KeyError: md["abstract"] = "Typological features of {:s}. Coded by {:s} following {:}.".format( md["language"], md["creator"][0], md["source"] + md["references"]) contrib = GrambankContribution( id=md["id"], name=md["name"], #sources=sources(md["source"]) + references(md["references"]), ## GrambankContribution can't take sources arguments yet. ## We expect "source" to stand for primary linguistic data (audio files etc.), ## and "references" to point to bibliographic data. desc=md["abstract"]) contributor_name = HumanName(md["creator"][0]) contributor_id = (contributor_name.last + contributor_name.first) try: contributor = contributors[md["creator"][0]] except KeyError: contributors[md["creator"][0]] = contributor = Contributor( id=contributor_id, name=str(contributor_name)) DBSession.add( ContributionContributor(contribution=contrib, contributor=contributor)) if mdpath not in trust: with open(mdpath, "w") as mdfile: json.dump(md, mdfile, indent=2) data = pandas.io.parsers.read_csv( path, sep="," if path.endswith(".csv") else "\t", encoding='utf-8') check_features = features.index.tolist() if "Language_ID" not in data.columns: data["Language_ID"] = md["language"] elif mdpath in trust: if path in trust: assert (data["Language_ID"] == md["language"]).all() else: data["Language_ID"] = md["language"] else: if (data["Language_ID"] != md["language"]).any(): report( "Language mismatch:", md["language"], data["Language_ID"][ data["Language_ID"] != md["language"]].to_string()) language = languages.loc[md["language"]] if "Source" not in data.columns: data["Source"] = "" if "Answer" not in data.columns: data["Answer"] = "" data["Value"] = data["Value"].astype(str) data["Source"] = data["Source"].astype(str) data["Answer"] = data["Answer"].astype(str) for column in copy_from_features: if column not in data.columns: data[column] = "" data[column] = data[column].astype(str) features_seen = {} for i, row in data.iterrows(): value = possibly_int_string(row['Value']) data.set_value(i, 'Value', value) feature = row['Feature_ID'] if pandas.isnull(feature): if pandas.isnull(row['Feature']): if path in trust: raise AssertionError( "Row {:} without feature found".format(row)) else: report("Row without feature found, dropping.", row.to_string(), "") del data.loc[i] continue else: candidates = features["Feature"] == row["Feature"] if candidates.any(): feature = candidates.argmax() else: report("Row without matching feature found, ignoring.", row.to_string(), "") continue try: parameter = features.loc[feature] except (TypeError, KeyError): if path in trust: if features_path in trust: raise AssertionError("{:s} and {:s} don't match!".format( path, features_path)) else: parameter = features.loc[feature] = {} else: report("Feature mismatch:", feature, features.index) if features_path in trust: del data.loc[i] continue else: parameter = {} for column in copy_from_features: question = row[column] if (question != parameter[column] and not (pandas.isnull(question) or question != "")): if path in trust: if features_path in trust: raise AssertionError("{:s} mismatch!".format(column)) else: parameter[column] = question else: if features_path in trust: data.set_value(i, column, parameter[column]) else: report(("{:s} mismatch!".format(column)), question, parameter[column]) else: data.set_value(i, column, parameter[column]) if feature in features_seen: vs = features_seen[feature] else: vs = features_seen[feature] = ValueSet( id="{:s}-{:s}".format(md["language"], feature), parameter=parameter["db_Object"], language=language["db_Object"], contribution=contrib, source=row['Source']) domain = parameter["db_Domain"] if value not in domain: if path in trust: deid = max(domain) + 1 domainelement = domain[value] = DomainElement( id='_{:s}-{:s}'.format(i, deid), parameter=parameter['db_Object'], abbr=deid, name='{:s} - {:s}'.format(deid, desc), number=int(deid) if deid != '?' else 999, description=desc, jsondata={'icon': ORDERED_ICONS[int(deid)].name}) else: report("Feature domain mismatch:", list(domain.keys()), value) continue else: domainelement = domain[value] answer = row["Answer"] if answer != domainelement.description: if path in trust: if features_path in trust: raise AssertionError("Feature domain element mismatch!") else: domainelement.desc = answer else: if features_path in trust: data.set_value(i, "Answer", domainelement.description) else: report("Feature domain element mismatch!", answer, domainelement.description) import pdb pdb.set_trace() DBSession.add( Value(id="{:s}-{:s}-{:}{:d}".format( md["language"], feature, value if value != '?' else 'unknown', i), valueset=vs, name=str(value), description=row['Comment'], domainelement=domainelement)) print(".", end="") if feature in check_features: check_features.remove(feature) if features_path in trust: i = data.index.max() for feature in check_features: i += 1 for column in copy_from_features: data.set_value(i, column, features[column][feature]) data.set_value(i, "Language_ID", md["language"]) data.set_value(i, "Feature_ID", feature) data.set_value(i, "Value", "?") print() if path not in trust: data.sort_values(by=["Feature_ID", "Value"], inplace=True) columns = list(data.columns) first_columns = [ "Feature_ID", "Language_ID", "Feature", "Value", "Answer", "Comment", "Source", "Possible Values", "Suggested standardised comments" ] for column in columns: if column not in first_columns: first_columns.append(column) data = data[first_columns] data.to_csv(path, index=False, sep="," if path.endswith(".csv") else "\t", encoding='utf-8') return data
def import_dataset(path, provider): # look for metadata # look for sources # then loop over values dirpath, fname = os.path.split(path) basename, ext = os.path.splitext(fname) glottolog = Glottolog() mdpath = path + "-metadata.json" assert os.path.exists(mdpath) md = jsonload(mdpath) md, parameters = md["properties"], md["parameters"] cname = md["name"] if "id" in md: cname = "%s [%s]" % (cname, md["id"]) contrib = Wordlist(id=basename, name=cname) contributors = md.get("typedby", md.get("contributors")) if contributors: contributor_name = HumanName(contributors) contributor_id = slug(contributor_name.last + contributor_name.first) contributor = Contributor.get(contributor_id, default=None) if not contributor: contributor = Contributor(id=contributor_id, name="%s" % contributor_name) DBSession.add(ContributionContributor(contribution=contrib, contributor=contributor)) # bibpath = os.path.join(dirpath, basename + '.bib') # if os.path.exists(bibpath): # for rec in Database.from_file(bibpath): # if rec['key'] not in data['Source']: # data.add(Source, rec['key'], _obj=bibtex2source(rec)) data = Data() concepts = {p.id: p for p in DBSession.query(Concept)} language = None for i, row in enumerate(reader(path, dicts=True, delimiter=",")): if not row["Value"] or not row["Feature_ID"]: continue fid = row["Feature_ID"].split("/")[-1] vsid = "%s-%s-%s" % (basename, row["Language_ID"], fid) vid = "%s-%s-%s" % (provider, basename, i + 1) if language: assert language.id == row["Language_ID"] else: language = Language.get(row["Language_ID"], default=None) if language is None: # query glottolog! languoid = glottolog.languoid(row["Language_ID"]) language = LexibankLanguage( id=row["Language_ID"], name=languoid.name, latitude=languoid.latitude, longitude=languoid.longitude ) parameter = concepts.get(fid) if parameter is None: concepts[fid] = parameter = Concept( id=fid, name=parameters[row["Feature_ID"]], concepticon_url=row["Feature_ID"] ) vs = data["ValueSet"].get(vsid) if vs is None: vs = data.add( ValueSet, vsid, id=vsid, parameter=parameter, language=language, contribution=contrib, source=row.get("Source"), ) counterpart = Counterpart( id=vid, valueset=vs, name=row["Value"], description=row.get("Comment"), loan=row.get("Loan") == "yes" ) if row.get("Cognate_Set"): csid = row["Cognate_Set"].split(",")[0].strip() cs = Cognateset.get(csid, key="name", default=None) if cs is None: cs = Cognateset(name=csid) counterpart.cognateset = cs # for key, src in data['Source'].items(): # if key in vs.source: # ValueSetReference(valueset=vs, source=src, key=key) contrib.language = language