def test_Blog(env, mocker): from wals3.blog import Blog vs = ValueSet.first() class wp(object): def __init__(self, cats=False): if cats: self.cats = [ dict(id=1, name='Languages'), dict(id=2, name='Chapters'), dict(id=3, name=vs.parameter.chapter.area.name), ] else: self.cats = [] def Client(self, *args, **kw): return mocker.Mock(get_categories=lambda: self.cats, set_categories=lambda c: dict(n=1), get_post_id_from_path=lambda p: None) mocker.patch('wals3.blog.wordpress', wp()) blog = Blog(defaultdict(lambda: '')) blog.post_url(vs, env['request'], create=True) mocker.patch('wals3.blog.wordpress', wp(cats=True)) blog = Blog(defaultdict(lambda: '')) blog.post_url(vs, env['request'], create=True)
def test_Blog(self): from wals3.blog import Blog vs = ValueSet.first() class wp(object): def __init__(self, cats=False): if cats: self.cats = [ dict(id=1, name='Languages'), dict(id=2, name='Chapters'), dict(id=3, name=vs.parameter.chapter.area.name), ] else: self.cats = [] def Client(self, *args, **kw): return Mock( get_categories=lambda: self.cats, set_categories=lambda c: dict(n=1), get_post_id_from_path=lambda p: None) with patch('wals3.blog.wordpress', wp()): blog = Blog(defaultdict(lambda: '')) blog.post_url(vs, self.env['request'], create=True) with patch('wals3.blog.wordpress', wp(cats=True)): blog = Blog(defaultdict(lambda: '')) blog.post_url(vs, self.env['request'], create=True)
def comment(request): # pragma: no cover """check whether a blog post for the datapoint does exist. if not, create one and redirect there. """ vs = ValueSet.get(request.matchdict['id']) return HTTPFound( request.blog.post_url(vs, request, create=True) + '#comment')
def comment(request): """check whether a blog post for the datapoint does exist. if not, create one and redirect there. """ vs = ValueSet.get('%(fid)s-%(lid)s' % request.matchdict) return HTTPFound(request.blog.post_url(vs, request, create=True) + '#comment')
def comment(request): """check whether a blog post for the datapoint does exist. if not, create one and redirect there. """ vs = ValueSet.get('%(fid)s-%(lid)s' % request.matchdict) return HTTPFound( request.blog.post_url(vs, request, create=True) + '#comment')
def import_values(values, lang, features, codes, contributors, sources): # pragma: no cover c = Contribution( id=lang['ID'], name='Dataset for {0}'.format(lang['Name']), ) for i, cid in enumerate(lang['Coders'], start=1): DBSession.add( ContributionContributor( contribution=c, contributor_pk=contributors[cid], ord=i, )) l = GrambankLanguage( id=lang['ID'], name=lang['Name'], macroarea=lang['Macroarea'], latitude=lang['Latitude'], longitude=lang['Longitude'], ) for value in values: vs = ValueSet( id=value['ID'], parameter_pk=features[value['Parameter_ID']], language=l, contribution=c, ) Value(id=value['ID'], valueset=vs, name=value['Value'], description=value['Comment'], domainelement_pk=codes[value['Code_ID'] or '{}-NA'.format(value['Parameter_ID'])]) if value['Source']: for ref in value['Source']: sid, pages = Sources.parse(ref) ValueSetReference(valueset=vs, source_pk=sources[sid], description=pages) DBSession.add(c)
def justifications(args, languages): """ - text goes into ValueSet.description - refs go into ValueSetReference objects """ def normalized_pages(s): if PAGES_PATTERN.match(s or ""): return s or "" # # create mappings to look up glottolog languoids matching names in justification files # langs_by_hid = languages langs_by_hname = {} langs_by_name = {} for l in DBSession.query(Languoid).filter(Languoid.active == False): langs_by_hname[l.jsondatadict.get("hname")] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l for l in DBSession.query(Languoid).filter(Languoid.active == True): langs_by_hname[l.jsondatadict.get("hname")] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l for id_, type_ in [("fc", "family"), ("sc", "subclassification")]: for i, row in enumerate(dsv.reader(args.data_file("%s_justifications.tab" % type_))): name = row[0] name = name.replace("_", " ") if not name.startswith("NOCODE") else name l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name))) if not l: args.log.warn("ignoring %s" % name) continue _r = 3 if type_ == "family" else 2 comment = (row[_r].strip() or None) if len(row) > _r else None if comment and not WORD_PATTERN.search(comment): comment = None # # TODO: look for [NOCODE_ppp] patterns as well!? # refs = [(int(m.group("id")), normalized_pages(m.group("comment"))) for m in REF_PATTERN.finditer(row[2])] vs = None for _vs in l.valuesets: if _vs.parameter.id == id_: vs = _vs break if not vs: args.log.info("%s %s ++" % (l.id, type_)) vs = ValueSet( id="%s%s" % (type_, l.id), description=comment, language=l, parameter=Parameter.get(id_), contribution=Contribution.first(), ) DBSession.add(Value(id="%s%s" % (type_, l.id), name="%s - %s" % (l.level, l.status), valueset=vs)) DBSession.flush() else: if vs.description != comment: args.log.info("%s %s ~~ description" % (l.id, type_)) vs.description = comment for r in vs.references: DBSession.delete(r) for r, pages in refs: vs.references.append(ValueSetReference(source=Source.get(str(r)), description=pages)) args.log.info("%s %s" % (i, type_))
def import_contribution(path, icons, features, languages, contributors={}, trust=[]): # look for metadata # look for sources # then loop over values mdpath = path + '-metadata.json' with open(mdpath) as mdfile: md = json.load(mdfile) try: abstract = md["abstract"] except KeyError: md["abstract"] = "Typological features of {:s}. Coded by {:s} following {:}.".format( md["language"], md["creator"][0], md["source"] + md["references"]) contrib = GrambankContribution( id=md["id"], name=md["name"], #sources=sources(md["source"]) + references(md["references"]), ## GrambankContribution can't take sources arguments yet. ## We expect "source" to stand for primary linguistic data (audio files etc.), ## and "references" to point to bibliographic data. desc=md["abstract"]) contributor_name = HumanName(md["creator"][0]) contributor_id = (contributor_name.last + contributor_name.first) try: contributor = contributors[md["creator"][0]] except KeyError: contributors[md["creator"][0]] = contributor = Contributor( id=contributor_id, name=str(contributor_name)) DBSession.add( ContributionContributor(contribution=contrib, contributor=contributor)) if mdpath not in trust: with open(mdpath, "w") as mdfile: json.dump(md, mdfile, indent=2) data = pandas.io.parsers.read_csv( path, sep="," if path.endswith(".csv") else "\t", encoding='utf-8') check_features = features.index.tolist() if "Language_ID" not in data.columns: data["Language_ID"] = md["language"] elif mdpath in trust: if path in trust: assert (data["Language_ID"] == md["language"]).all() else: data["Language_ID"] = md["language"] else: if (data["Language_ID"] != md["language"]).any(): report( "Language mismatch:", md["language"], data["Language_ID"][ data["Language_ID"] != md["language"]].to_string()) language = languages.loc[md["language"]] if "Source" not in data.columns: data["Source"] = "" if "Answer" not in data.columns: data["Answer"] = "" data["Value"] = data["Value"].astype(str) data["Source"] = data["Source"].astype(str) data["Answer"] = data["Answer"].astype(str) for column in copy_from_features: if column not in data.columns: data[column] = "" data[column] = data[column].astype(str) features_seen = {} for i, row in data.iterrows(): value = possibly_int_string(row['Value']) data.set_value(i, 'Value', value) feature = row['Feature_ID'] if pandas.isnull(feature): if pandas.isnull(row['Feature']): if path in trust: raise AssertionError( "Row {:} without feature found".format(row)) else: report("Row without feature found, dropping.", row.to_string(), "") del data.loc[i] continue else: candidates = features["Feature"] == row["Feature"] if candidates.any(): feature = candidates.argmax() else: report("Row without matching feature found, ignoring.", row.to_string(), "") continue try: parameter = features.loc[feature] except (TypeError, KeyError): if path in trust: if features_path in trust: raise AssertionError("{:s} and {:s} don't match!".format( path, features_path)) else: parameter = features.loc[feature] = {} else: report("Feature mismatch:", feature, features.index) if features_path in trust: del data.loc[i] continue else: parameter = {} for column in copy_from_features: question = row[column] if (question != parameter[column] and not (pandas.isnull(question) or question != "")): if path in trust: if features_path in trust: raise AssertionError("{:s} mismatch!".format(column)) else: parameter[column] = question else: if features_path in trust: data.set_value(i, column, parameter[column]) else: report(("{:s} mismatch!".format(column)), question, parameter[column]) else: data.set_value(i, column, parameter[column]) if feature in features_seen: vs = features_seen[feature] else: vs = features_seen[feature] = ValueSet( id="{:s}-{:s}".format(md["language"], feature), parameter=parameter["db_Object"], language=language["db_Object"], contribution=contrib, source=row['Source']) domain = parameter["db_Domain"] if value not in domain: if path in trust: deid = max(domain) + 1 domainelement = domain[value] = DomainElement( id='_{:s}-{:s}'.format(i, deid), parameter=parameter['db_Object'], abbr=deid, name='{:s} - {:s}'.format(deid, desc), number=int(deid) if deid != '?' else 999, description=desc, jsondata={'icon': ORDERED_ICONS[int(deid)].name}) else: report("Feature domain mismatch:", list(domain.keys()), value) continue else: domainelement = domain[value] answer = row["Answer"] if answer != domainelement.description: if path in trust: if features_path in trust: raise AssertionError("Feature domain element mismatch!") else: domainelement.desc = answer else: if features_path in trust: data.set_value(i, "Answer", domainelement.description) else: report("Feature domain element mismatch!", answer, domainelement.description) import pdb pdb.set_trace() DBSession.add( Value(id="{:s}-{:s}-{:}{:d}".format( md["language"], feature, value if value != '?' else 'unknown', i), valueset=vs, name=str(value), description=row['Comment'], domainelement=domainelement)) print(".", end="") if feature in check_features: check_features.remove(feature) if features_path in trust: i = data.index.max() for feature in check_features: i += 1 for column in copy_from_features: data.set_value(i, column, features[column][feature]) data.set_value(i, "Language_ID", md["language"]) data.set_value(i, "Feature_ID", feature) data.set_value(i, "Value", "?") print() if path not in trust: data.sort_values(by=["Feature_ID", "Value"], inplace=True) columns = list(data.columns) first_columns = [ "Feature_ID", "Language_ID", "Feature", "Value", "Answer", "Comment", "Source", "Possible Values", "Suggested standardised comments" ] for column in columns: if column not in first_columns: first_columns.append(column) data = data[first_columns] data.to_csv(path, index=False, sep="," if path.endswith(".csv") else "\t", encoding='utf-8') return data
def justifications(args, languages, stats): """ - text goes into ValueSet.description - refs go into ValueSetReference objects """ hh_bibkey_to_glottolog_id = {} for rec in get_bib(args): for provider, bibkeys in get_bibkeys(rec).items(): if provider == 'hh': for bibkey in bibkeys: hh_bibkey_to_glottolog_id[bibkey] = rec['glottolog_ref_id'] break def substitute_hh_bibkeys(m): return '**%s**' % hh_bibkey_to_glottolog_id[m.group('bibkey')] # # create mappings to look up glottolog languoids matching names in justification files # langs_by_hid = languages langs_by_hname = {} langs_by_name = {} # order by active to make sure, we active languoid overwrite the data of obsolete ones. for l in DBSession.query(Languoid).order_by(Languoid.active): langs_by_hname[l.jsondata.get('hname')] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l def normalize_pages(s): return (s or '').strip().rstrip(',') or None for id_, type_ in [('fc', 'family'), ('sc', 'subclassification')]: for i, row in enumerate(dsv.reader( args.data_dir.joinpath('languoids', 'forkel_%s_justifications-utf8.tab' % type_))): name = row[0] name = name.replace('_', ' ') if not name.startswith('NOCODE') else name l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name))) if not l: args.log.warn('ignoring %s' % name) continue _r = 3 if type_ == 'family' else 2 comment = (row[_r].strip() or None) if len(row) > _r else None if comment and not WORD_PATTERN.search(comment): comment = None if comment: comment = re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, comment) # # TODO: look for [NOCODE_ppp] patterns as well!? # refs = [(int(m.group('id')), normalize_pages(m.group('pages'))) for m in REF_PATTERN.finditer( re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, row[2]))] vs = None for _vs in l.valuesets: if _vs.parameter.id == id_: vs = _vs break if not vs: args.log.info('%s %s ++' % (l.id, type_)) vs = ValueSet( id='%s%s' % (id_, l.pk), description=comment, language=l, parameter=Parameter.get(id_), contribution=Contribution.first()) DBSession.add(Value( id='%s%s' % (id_, l.pk), name='%s - %s' % (l.level, l.status), valueset=vs)) DBSession.flush() else: if vs.description != comment: args.log.info('%s %s ~~ description: %s ---> %s' % (l.id, type_, vs.description, comment)) vs.description = comment stats.update(['justifications-%s' % type_]) for r in vs.references: DBSession.delete(r) for r, pages in refs: # FIXME: we must make sure not to link sources which will subsequently be # replaced! vs.references.append(ValueSetReference( source=Source.get(str(r)), description=pages)) args.log.info('%s %s' % (i, type_))