Example #1
0
def test_Blog(env, mocker):
    from wals3.blog import Blog

    vs = ValueSet.first()

    class wp(object):
        def __init__(self, cats=False):
            if cats:
                self.cats = [
                    dict(id=1, name='Languages'),
                    dict(id=2, name='Chapters'),
                    dict(id=3, name=vs.parameter.chapter.area.name),
                ]
            else:
                self.cats = []

        def Client(self, *args, **kw):
            return mocker.Mock(get_categories=lambda: self.cats,
                               set_categories=lambda c: dict(n=1),
                               get_post_id_from_path=lambda p: None)

    mocker.patch('wals3.blog.wordpress', wp())
    blog = Blog(defaultdict(lambda: ''))
    blog.post_url(vs, env['request'], create=True)

    mocker.patch('wals3.blog.wordpress', wp(cats=True))
    blog = Blog(defaultdict(lambda: ''))
    blog.post_url(vs, env['request'], create=True)
Example #2
0
    def test_Blog(self):
        from wals3.blog import Blog

        vs = ValueSet.first()

        class wp(object):
            def __init__(self, cats=False):
                if cats:
                    self.cats = [
                        dict(id=1, name='Languages'),
                        dict(id=2, name='Chapters'),
                        dict(id=3, name=vs.parameter.chapter.area.name),
                    ]
                else:
                    self.cats = []

            def Client(self, *args, **kw):
                return Mock(
                    get_categories=lambda: self.cats,
                    set_categories=lambda c: dict(n=1),
                    get_post_id_from_path=lambda p: None)

        with patch('wals3.blog.wordpress', wp()):
            blog = Blog(defaultdict(lambda: ''))
            blog.post_url(vs, self.env['request'], create=True)

        with patch('wals3.blog.wordpress', wp(cats=True)):
            blog = Blog(defaultdict(lambda: ''))
            blog.post_url(vs, self.env['request'], create=True)
Example #3
0
def comment(request):  # pragma: no cover
    """check whether a blog post for the datapoint does exist.
    if not, create one and redirect there.
    """
    vs = ValueSet.get(request.matchdict['id'])
    return HTTPFound(
        request.blog.post_url(vs, request, create=True) + '#comment')
Example #4
0
File: views.py Project: clld/wals3
def comment(request):
    """check whether a blog post for the datapoint does exist.

    if not, create one and redirect there.
    """
    vs = ValueSet.get('%(fid)s-%(lid)s' % request.matchdict)
    return HTTPFound(request.blog.post_url(vs, request, create=True) + '#comment')
Example #5
0
def comment(request):
    """check whether a blog post for the datapoint does exist.

    if not, create one and redirect there.
    """
    vs = ValueSet.get('%(fid)s-%(lid)s' % request.matchdict)
    return HTTPFound(
        request.blog.post_url(vs, request, create=True) + '#comment')
Example #6
0
def import_values(values, lang, features, codes, contributors,
                  sources):  # pragma: no cover
    c = Contribution(
        id=lang['ID'],
        name='Dataset for {0}'.format(lang['Name']),
    )
    for i, cid in enumerate(lang['Coders'], start=1):
        DBSession.add(
            ContributionContributor(
                contribution=c,
                contributor_pk=contributors[cid],
                ord=i,
            ))
    l = GrambankLanguage(
        id=lang['ID'],
        name=lang['Name'],
        macroarea=lang['Macroarea'],
        latitude=lang['Latitude'],
        longitude=lang['Longitude'],
    )
    for value in values:
        vs = ValueSet(
            id=value['ID'],
            parameter_pk=features[value['Parameter_ID']],
            language=l,
            contribution=c,
        )
        Value(id=value['ID'],
              valueset=vs,
              name=value['Value'],
              description=value['Comment'],
              domainelement_pk=codes[value['Code_ID']
                                     or '{}-NA'.format(value['Parameter_ID'])])

        if value['Source']:
            for ref in value['Source']:
                sid, pages = Sources.parse(ref)
                ValueSetReference(valueset=vs,
                                  source_pk=sources[sid],
                                  description=pages)
    DBSession.add(c)
Example #7
0
def justifications(args, languages):
    """
    - text goes into ValueSet.description
    - refs go into ValueSetReference objects
    """

    def normalized_pages(s):
        if PAGES_PATTERN.match(s or ""):
            return s or ""

    #
    # create mappings to look up glottolog languoids matching names in justification files
    #
    langs_by_hid = languages
    langs_by_hname = {}
    langs_by_name = {}

    for l in DBSession.query(Languoid).filter(Languoid.active == False):
        langs_by_hname[l.jsondatadict.get("hname")] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    for l in DBSession.query(Languoid).filter(Languoid.active == True):
        langs_by_hname[l.jsondatadict.get("hname")] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    for id_, type_ in [("fc", "family"), ("sc", "subclassification")]:
        for i, row in enumerate(dsv.reader(args.data_file("%s_justifications.tab" % type_))):
            name = row[0]
            name = name.replace("_", " ") if not name.startswith("NOCODE") else name
            l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name)))
            if not l:
                args.log.warn("ignoring %s" % name)
                continue

            _r = 3 if type_ == "family" else 2
            comment = (row[_r].strip() or None) if len(row) > _r else None
            if comment and not WORD_PATTERN.search(comment):
                comment = None

            #
            # TODO: look for [NOCODE_ppp] patterns as well!?
            #

            refs = [(int(m.group("id")), normalized_pages(m.group("comment"))) for m in REF_PATTERN.finditer(row[2])]

            vs = None
            for _vs in l.valuesets:
                if _vs.parameter.id == id_:
                    vs = _vs
                    break

            if not vs:
                args.log.info("%s %s ++" % (l.id, type_))
                vs = ValueSet(
                    id="%s%s" % (type_, l.id),
                    description=comment,
                    language=l,
                    parameter=Parameter.get(id_),
                    contribution=Contribution.first(),
                )
                DBSession.add(Value(id="%s%s" % (type_, l.id), name="%s - %s" % (l.level, l.status), valueset=vs))
                DBSession.flush()
            else:
                if vs.description != comment:
                    args.log.info("%s %s ~~ description" % (l.id, type_))
                    vs.description = comment

            for r in vs.references:
                DBSession.delete(r)

            for r, pages in refs:
                vs.references.append(ValueSetReference(source=Source.get(str(r)), description=pages))

        args.log.info("%s %s" % (i, type_))
def import_contribution(path,
                        icons,
                        features,
                        languages,
                        contributors={},
                        trust=[]):
    # look for metadata
    # look for sources
    # then loop over values

    mdpath = path + '-metadata.json'
    with open(mdpath) as mdfile:
        md = json.load(mdfile)

    try:
        abstract = md["abstract"]
    except KeyError:
        md["abstract"] = "Typological features of {:s}. Coded by {:s} following {:}.".format(
            md["language"], md["creator"][0], md["source"] + md["references"])

    contrib = GrambankContribution(
        id=md["id"],
        name=md["name"],
        #sources=sources(md["source"]) + references(md["references"]),
        ## GrambankContribution can't take sources arguments yet.
        ## We expect "source" to stand for primary linguistic data (audio files etc.),
        ## and "references" to point to bibliographic data.
        desc=md["abstract"])
    contributor_name = HumanName(md["creator"][0])
    contributor_id = (contributor_name.last + contributor_name.first)
    try:
        contributor = contributors[md["creator"][0]]
    except KeyError:
        contributors[md["creator"][0]] = contributor = Contributor(
            id=contributor_id, name=str(contributor_name))
    DBSession.add(
        ContributionContributor(contribution=contrib, contributor=contributor))

    if mdpath not in trust:
        with open(mdpath, "w") as mdfile:
            json.dump(md, mdfile, indent=2)

    data = pandas.io.parsers.read_csv(
        path, sep="," if path.endswith(".csv") else "\t", encoding='utf-8')

    check_features = features.index.tolist()

    if "Language_ID" not in data.columns:
        data["Language_ID"] = md["language"]
    elif mdpath in trust:
        if path in trust:
            assert (data["Language_ID"] == md["language"]).all()
        else:
            data["Language_ID"] = md["language"]
    else:
        if (data["Language_ID"] != md["language"]).any():
            report(
                "Language mismatch:", md["language"], data["Language_ID"][
                    data["Language_ID"] != md["language"]].to_string())
    language = languages.loc[md["language"]]

    if "Source" not in data.columns:
        data["Source"] = ""
    if "Answer" not in data.columns:
        data["Answer"] = ""

    data["Value"] = data["Value"].astype(str)
    data["Source"] = data["Source"].astype(str)
    data["Answer"] = data["Answer"].astype(str)

    for column in copy_from_features:
        if column not in data.columns:
            data[column] = ""
        data[column] = data[column].astype(str)

    features_seen = {}
    for i, row in data.iterrows():
        value = possibly_int_string(row['Value'])
        data.set_value(i, 'Value', value)
        feature = row['Feature_ID']

        if pandas.isnull(feature):
            if pandas.isnull(row['Feature']):
                if path in trust:
                    raise AssertionError(
                        "Row {:} without feature found".format(row))
                else:
                    report("Row without feature found, dropping.",
                           row.to_string(), "")
                    del data.loc[i]
                    continue
            else:
                candidates = features["Feature"] == row["Feature"]
                if candidates.any():
                    feature = candidates.argmax()
                else:
                    report("Row without matching feature found, ignoring.",
                           row.to_string(), "")
                    continue

        try:
            parameter = features.loc[feature]
        except (TypeError, KeyError):
            if path in trust:
                if features_path in trust:
                    raise AssertionError("{:s} and {:s} don't match!".format(
                        path, features_path))
                else:
                    parameter = features.loc[feature] = {}
            else:
                report("Feature mismatch:", feature, features.index)
                if features_path in trust:
                    del data.loc[i]
                    continue
                else:
                    parameter = {}

        for column in copy_from_features:
            question = row[column]
            if (question != parameter[column]
                    and not (pandas.isnull(question) or question != "")):
                if path in trust:
                    if features_path in trust:
                        raise AssertionError("{:s} mismatch!".format(column))
                    else:
                        parameter[column] = question
                else:
                    if features_path in trust:
                        data.set_value(i, column, parameter[column])
                    else:
                        report(("{:s} mismatch!".format(column)), question,
                               parameter[column])
            else:
                data.set_value(i, column, parameter[column])

        if feature in features_seen:
            vs = features_seen[feature]
        else:
            vs = features_seen[feature] = ValueSet(
                id="{:s}-{:s}".format(md["language"], feature),
                parameter=parameter["db_Object"],
                language=language["db_Object"],
                contribution=contrib,
                source=row['Source'])

        domain = parameter["db_Domain"]
        if value not in domain:
            if path in trust:
                deid = max(domain) + 1
                domainelement = domain[value] = DomainElement(
                    id='_{:s}-{:s}'.format(i, deid),
                    parameter=parameter['db_Object'],
                    abbr=deid,
                    name='{:s} - {:s}'.format(deid, desc),
                    number=int(deid) if deid != '?' else 999,
                    description=desc,
                    jsondata={'icon': ORDERED_ICONS[int(deid)].name})
            else:
                report("Feature domain mismatch:", list(domain.keys()), value)
                continue
        else:
            domainelement = domain[value]

        answer = row["Answer"]
        if answer != domainelement.description:
            if path in trust:
                if features_path in trust:
                    raise AssertionError("Feature domain element mismatch!")
                else:
                    domainelement.desc = answer
            else:
                if features_path in trust:
                    data.set_value(i, "Answer", domainelement.description)
                else:
                    report("Feature domain element mismatch!", answer,
                           domainelement.description)
                    import pdb
                    pdb.set_trace()

        DBSession.add(
            Value(id="{:s}-{:s}-{:}{:d}".format(
                md["language"], feature, value if value != '?' else 'unknown',
                i),
                  valueset=vs,
                  name=str(value),
                  description=row['Comment'],
                  domainelement=domainelement))

        print(".", end="")

        if feature in check_features:
            check_features.remove(feature)

    if features_path in trust:
        i = data.index.max()
        for feature in check_features:
            i += 1
            for column in copy_from_features:
                data.set_value(i, column, features[column][feature])
            data.set_value(i, "Language_ID", md["language"])
            data.set_value(i, "Feature_ID", feature)
            data.set_value(i, "Value", "?")

    print()
    if path not in trust:
        data.sort_values(by=["Feature_ID", "Value"], inplace=True)
        columns = list(data.columns)
        first_columns = [
            "Feature_ID", "Language_ID", "Feature", "Value", "Answer",
            "Comment", "Source", "Possible Values",
            "Suggested standardised comments"
        ]
        for column in columns:
            if column not in first_columns:
                first_columns.append(column)
        data = data[first_columns]
        data.to_csv(path,
                    index=False,
                    sep="," if path.endswith(".csv") else "\t",
                    encoding='utf-8')
    return data
Example #9
0
def justifications(args, languages, stats):
    """
    - text goes into ValueSet.description
    - refs go into ValueSetReference objects
    """
    hh_bibkey_to_glottolog_id = {}
    for rec in get_bib(args):
        for provider, bibkeys in get_bibkeys(rec).items():
            if provider == 'hh':
                for bibkey in bibkeys:
                    hh_bibkey_to_glottolog_id[bibkey] = rec['glottolog_ref_id']
                break

    def substitute_hh_bibkeys(m):
        return '**%s**' % hh_bibkey_to_glottolog_id[m.group('bibkey')]

    #
    # create mappings to look up glottolog languoids matching names in justification files
    #
    langs_by_hid = languages
    langs_by_hname = {}
    langs_by_name = {}

    # order by active to make sure, we active languoid overwrite the data of obsolete ones.
    for l in DBSession.query(Languoid).order_by(Languoid.active):
        langs_by_hname[l.jsondata.get('hname')] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    def normalize_pages(s):
        return (s or '').strip().rstrip(',') or None

    for id_, type_ in [('fc', 'family'), ('sc', 'subclassification')]:
        for i, row in enumerate(dsv.reader(
                args.data_dir.joinpath('languoids', 'forkel_%s_justifications-utf8.tab' % type_))):
            name = row[0]
            name = name.replace('_', ' ') if not name.startswith('NOCODE') else name
            l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name)))
            if not l:
                args.log.warn('ignoring %s' % name)
                continue

            _r = 3 if type_ == 'family' else 2
            comment = (row[_r].strip() or None) if len(row) > _r else None
            if comment and not WORD_PATTERN.search(comment):
                comment = None
            if comment:
                comment = re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, comment)

            #
            # TODO: look for [NOCODE_ppp] patterns as well!?
            #

            refs = [(int(m.group('id')), normalize_pages(m.group('pages')))
                    for m in REF_PATTERN.finditer(
                    re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, row[2]))]

            vs = None
            for _vs in l.valuesets:
                if _vs.parameter.id == id_:
                    vs = _vs
                    break

            if not vs:
                args.log.info('%s %s ++' % (l.id, type_))
                vs = ValueSet(
                    id='%s%s' % (id_, l.pk),
                    description=comment,
                    language=l,
                    parameter=Parameter.get(id_),
                    contribution=Contribution.first())
                DBSession.add(Value(
                    id='%s%s' % (id_, l.pk),
                    name='%s - %s' % (l.level, l.status),
                    valueset=vs))
                DBSession.flush()
            else:
                if vs.description != comment:
                    args.log.info('%s %s ~~ description: %s ---> %s' % (l.id, type_, vs.description, comment))
                    vs.description = comment
                    stats.update(['justifications-%s' % type_])

            for r in vs.references:
                DBSession.delete(r)

            for r, pages in refs:
                # FIXME: we must make sure not to link sources which will subsequently be
                # replaced!
                vs.references.append(ValueSetReference(
                    source=Source.get(str(r)),
                    description=pages))

        args.log.info('%s %s' % (i, type_))