コード例 #1
0
ファイル: test_db_models.py プロジェクト: mitcho/clld
    def test_Contributor(self):
        from clld.db.models.common import Contributor

        d = Contributor(id='abc')
        d.last_first()
        d = Contributor(id='abc', name='Robert Forkel')
        self.assertTrue(d.last_first().startswith('Forkel'))
コード例 #2
0
def test_Contributor():
    from clld.db.models.common import Contributor

    d = Contributor(id='abc')
    d.last_first()
    d = Contributor(id='abc', name='Robert Forkel')
    assert d.last_first() == 'Forkel, Robert'
    d = Contributor(id='abc', name='Hans Robert von Forkel')
    assert d.last_first() == 'von Forkel, Hans Robert'
コード例 #3
0
ファイル: test_db_models.py プロジェクト: cevmartinez/clld
    def test_Contributor(self):
        from clld.db.models.common import Contributor

        d = Contributor(id='abc')
        d.last_first()
        d = Contributor(id='abc', name='Robert Forkel')
        self.assertTrue(d.last_first().startswith('Forkel'))
コード例 #4
0
ファイル: test_db_models.py プロジェクト: clld/clld
def test_Contributor():
    from clld.db.models.common import Contributor

    d = Contributor(id='abc')
    d.last_first()
    d = Contributor(id='abc', name='Robert Forkel')
    assert d.last_first() == 'Forkel, Robert'
    d = Contributor(id='abc', name='Hans Robert von Forkel')
    assert d.last_first() == 'von Forkel, Hans Robert'
コード例 #5
0
ファイル: views.py プロジェクト: clld/apics
def survey(request):
    id_ = request.matchdict["id"]
    md = jsonlib.load(ppath("Surveys", "%s.json" % id_))
    html = get_html(ppath("Surveys", "%s.html" % id_))
    maps = []
    for fname in sorted(
        ppath("Surveys", processed="maps").glob("%s*.png" % id_.split(".")[1].replace("-", "_")), key=lambda fn: fn.stem
    ):
        img = b64encode(open(fname.as_posix(), "rb").read())
        if "figure" in fname.stem:
            html = html.replace("{%s}" % fname.stem, "data:image/png;base64,%s" % img)
        else:
            maps.append(img)

    return {
        "maps": maps,
        "md": md,
        "authors": [Contributor.get(a["id"]) for a in md["authors"]],
        "html": html,
        "ctx": ApicsContribution.get(id_.split(".")[0]),
    }
コード例 #6
0
ファイル: views.py プロジェクト: AnnaLuisaD/apics
def survey(request):
    id_ = request.matchdict['id']
    md = jsonload(ppath('Surveys', '%s.json' % id_))
    html = get_html(ppath('Surveys', '%s.html' % id_))
    maps = []
    for fname in sorted(
            ppath('Surveys', processed='maps').files(
                            '%s*.png' % id_.split('.')[1].replace('-', '_')),
            key=lambda fn: fn.namebase):
        img = b64encode(open(fname, 'rb').read())
        if 'figure' in fname.namebase:
            html = html.replace('{%s}' % fname.namebase, 'data:image/png;base64,%s' % img)
        else:
            maps.append(img)

    return {
        'maps': maps,
        'md': md,
        'authors': [Contributor.get(a['id']) for a in md['authors']],
        'html': html,
        'ctx': ApicsContribution.get(id_.split('.')[0]),
    }
コード例 #7
0
def import_contribution(path,
                        icons,
                        features,
                        languages,
                        contributors={},
                        trust=[]):
    # look for metadata
    # look for sources
    # then loop over values

    mdpath = path + '-metadata.json'
    with open(mdpath) as mdfile:
        md = json.load(mdfile)

    try:
        abstract = md["abstract"]
    except KeyError:
        md["abstract"] = "Typological features of {:s}. Coded by {:s} following {:}.".format(
            md["language"], md["creator"][0], md["source"] + md["references"])

    contrib = GrambankContribution(
        id=md["id"],
        name=md["name"],
        #sources=sources(md["source"]) + references(md["references"]),
        ## GrambankContribution can't take sources arguments yet.
        ## We expect "source" to stand for primary linguistic data (audio files etc.),
        ## and "references" to point to bibliographic data.
        desc=md["abstract"])
    contributor_name = HumanName(md["creator"][0])
    contributor_id = (contributor_name.last + contributor_name.first)
    try:
        contributor = contributors[md["creator"][0]]
    except KeyError:
        contributors[md["creator"][0]] = contributor = Contributor(
            id=contributor_id, name=str(contributor_name))
    DBSession.add(
        ContributionContributor(contribution=contrib, contributor=contributor))

    if mdpath not in trust:
        with open(mdpath, "w") as mdfile:
            json.dump(md, mdfile, indent=2)

    data = pandas.io.parsers.read_csv(
        path, sep="," if path.endswith(".csv") else "\t", encoding='utf-8')

    check_features = features.index.tolist()

    if "Language_ID" not in data.columns:
        data["Language_ID"] = md["language"]
    elif mdpath in trust:
        if path in trust:
            assert (data["Language_ID"] == md["language"]).all()
        else:
            data["Language_ID"] = md["language"]
    else:
        if (data["Language_ID"] != md["language"]).any():
            report(
                "Language mismatch:", md["language"], data["Language_ID"][
                    data["Language_ID"] != md["language"]].to_string())
    language = languages.loc[md["language"]]

    if "Source" not in data.columns:
        data["Source"] = ""
    if "Answer" not in data.columns:
        data["Answer"] = ""

    data["Value"] = data["Value"].astype(str)
    data["Source"] = data["Source"].astype(str)
    data["Answer"] = data["Answer"].astype(str)

    for column in copy_from_features:
        if column not in data.columns:
            data[column] = ""
        data[column] = data[column].astype(str)

    features_seen = {}
    for i, row in data.iterrows():
        value = possibly_int_string(row['Value'])
        data.set_value(i, 'Value', value)
        feature = row['Feature_ID']

        if pandas.isnull(feature):
            if pandas.isnull(row['Feature']):
                if path in trust:
                    raise AssertionError(
                        "Row {:} without feature found".format(row))
                else:
                    report("Row without feature found, dropping.",
                           row.to_string(), "")
                    del data.loc[i]
                    continue
            else:
                candidates = features["Feature"] == row["Feature"]
                if candidates.any():
                    feature = candidates.argmax()
                else:
                    report("Row without matching feature found, ignoring.",
                           row.to_string(), "")
                    continue

        try:
            parameter = features.loc[feature]
        except (TypeError, KeyError):
            if path in trust:
                if features_path in trust:
                    raise AssertionError("{:s} and {:s} don't match!".format(
                        path, features_path))
                else:
                    parameter = features.loc[feature] = {}
            else:
                report("Feature mismatch:", feature, features.index)
                if features_path in trust:
                    del data.loc[i]
                    continue
                else:
                    parameter = {}

        for column in copy_from_features:
            question = row[column]
            if (question != parameter[column]
                    and not (pandas.isnull(question) or question != "")):
                if path in trust:
                    if features_path in trust:
                        raise AssertionError("{:s} mismatch!".format(column))
                    else:
                        parameter[column] = question
                else:
                    if features_path in trust:
                        data.set_value(i, column, parameter[column])
                    else:
                        report(("{:s} mismatch!".format(column)), question,
                               parameter[column])
            else:
                data.set_value(i, column, parameter[column])

        if feature in features_seen:
            vs = features_seen[feature]
        else:
            vs = features_seen[feature] = ValueSet(
                id="{:s}-{:s}".format(md["language"], feature),
                parameter=parameter["db_Object"],
                language=language["db_Object"],
                contribution=contrib,
                source=row['Source'])

        domain = parameter["db_Domain"]
        if value not in domain:
            if path in trust:
                deid = max(domain) + 1
                domainelement = domain[value] = DomainElement(
                    id='_{:s}-{:s}'.format(i, deid),
                    parameter=parameter['db_Object'],
                    abbr=deid,
                    name='{:s} - {:s}'.format(deid, desc),
                    number=int(deid) if deid != '?' else 999,
                    description=desc,
                    jsondata={'icon': ORDERED_ICONS[int(deid)].name})
            else:
                report("Feature domain mismatch:", list(domain.keys()), value)
                continue
        else:
            domainelement = domain[value]

        answer = row["Answer"]
        if answer != domainelement.description:
            if path in trust:
                if features_path in trust:
                    raise AssertionError("Feature domain element mismatch!")
                else:
                    domainelement.desc = answer
            else:
                if features_path in trust:
                    data.set_value(i, "Answer", domainelement.description)
                else:
                    report("Feature domain element mismatch!", answer,
                           domainelement.description)
                    import pdb
                    pdb.set_trace()

        DBSession.add(
            Value(id="{:s}-{:s}-{:}{:d}".format(
                md["language"], feature, value if value != '?' else 'unknown',
                i),
                  valueset=vs,
                  name=str(value),
                  description=row['Comment'],
                  domainelement=domainelement))

        print(".", end="")

        if feature in check_features:
            check_features.remove(feature)

    if features_path in trust:
        i = data.index.max()
        for feature in check_features:
            i += 1
            for column in copy_from_features:
                data.set_value(i, column, features[column][feature])
            data.set_value(i, "Language_ID", md["language"])
            data.set_value(i, "Feature_ID", feature)
            data.set_value(i, "Value", "?")

    print()
    if path not in trust:
        data.sort_values(by=["Feature_ID", "Value"], inplace=True)
        columns = list(data.columns)
        first_columns = [
            "Feature_ID", "Language_ID", "Feature", "Value", "Answer",
            "Comment", "Source", "Possible Values",
            "Suggested standardised comments"
        ]
        for column in columns:
            if column not in first_columns:
                first_columns.append(column)
        data = data[first_columns]
        data.to_csv(path,
                    index=False,
                    sep="," if path.endswith(".csv") else "\t",
                    encoding='utf-8')
    return data
コード例 #8
0
ファイル: util.py プロジェクト: pombredanne/lexibank
def import_dataset(path, provider):
    # look for metadata
    # look for sources
    # then loop over values
    dirpath, fname = os.path.split(path)
    basename, ext = os.path.splitext(fname)
    glottolog = Glottolog()

    mdpath = path + "-metadata.json"
    assert os.path.exists(mdpath)
    md = jsonload(mdpath)
    md, parameters = md["properties"], md["parameters"]

    cname = md["name"]
    if "id" in md:
        cname = "%s [%s]" % (cname, md["id"])
    contrib = Wordlist(id=basename, name=cname)
    contributors = md.get("typedby", md.get("contributors"))

    if contributors:
        contributor_name = HumanName(contributors)
        contributor_id = slug(contributor_name.last + contributor_name.first)
        contributor = Contributor.get(contributor_id, default=None)
        if not contributor:
            contributor = Contributor(id=contributor_id, name="%s" % contributor_name)

        DBSession.add(ContributionContributor(contribution=contrib, contributor=contributor))

    # bibpath = os.path.join(dirpath, basename + '.bib')
    # if os.path.exists(bibpath):
    #    for rec in Database.from_file(bibpath):
    #        if rec['key'] not in data['Source']:
    #            data.add(Source, rec['key'], _obj=bibtex2source(rec))

    data = Data()
    concepts = {p.id: p for p in DBSession.query(Concept)}
    language = None

    for i, row in enumerate(reader(path, dicts=True, delimiter=",")):
        if not row["Value"] or not row["Feature_ID"]:
            continue

        fid = row["Feature_ID"].split("/")[-1]
        vsid = "%s-%s-%s" % (basename, row["Language_ID"], fid)
        vid = "%s-%s-%s" % (provider, basename, i + 1)

        if language:
            assert language.id == row["Language_ID"]
        else:
            language = Language.get(row["Language_ID"], default=None)
            if language is None:
                # query glottolog!
                languoid = glottolog.languoid(row["Language_ID"])
                language = LexibankLanguage(
                    id=row["Language_ID"], name=languoid.name, latitude=languoid.latitude, longitude=languoid.longitude
                )

        parameter = concepts.get(fid)
        if parameter is None:
            concepts[fid] = parameter = Concept(
                id=fid, name=parameters[row["Feature_ID"]], concepticon_url=row["Feature_ID"]
            )

        vs = data["ValueSet"].get(vsid)
        if vs is None:
            vs = data.add(
                ValueSet,
                vsid,
                id=vsid,
                parameter=parameter,
                language=language,
                contribution=contrib,
                source=row.get("Source"),
            )

        counterpart = Counterpart(
            id=vid, valueset=vs, name=row["Value"], description=row.get("Comment"), loan=row.get("Loan") == "yes"
        )

        if row.get("Cognate_Set"):
            csid = row["Cognate_Set"].split(",")[0].strip()
            cs = Cognateset.get(csid, key="name", default=None)
            if cs is None:
                cs = Cognateset(name=csid)
            counterpart.cognateset = cs

        # for key, src in data['Source'].items():
        #    if key in vs.source:
        #        ValueSetReference(valueset=vs, source=src, key=key)

    contrib.language = language