Esempio n. 1
0
    def test_Contributor(self):
        from clld.db.models.common import Contributor

        d = Contributor(id='abc')
        d.last_first()
        d = Contributor(id='abc', name='Robert Forkel')
        self.assertTrue(d.last_first().startswith('Forkel'))
Esempio n. 2
0
def test_Contributor():
    from clld.db.models.common import Contributor

    d = Contributor(id='abc')
    d.last_first()
    d = Contributor(id='abc', name='Robert Forkel')
    assert d.last_first() == 'Forkel, Robert'
    d = Contributor(id='abc', name='Hans Robert von Forkel')
    assert d.last_first() == 'von Forkel, Hans Robert'
Esempio n. 3
0
def import_contribution(path,
                        icons,
                        features,
                        languages,
                        contributors={},
                        trust=[]):
    # look for metadata
    # look for sources
    # then loop over values

    mdpath = path + '-metadata.json'
    with open(mdpath) as mdfile:
        md = json.load(mdfile)

    try:
        abstract = md["abstract"]
    except KeyError:
        md["abstract"] = "Typological features of {:s}. Coded by {:s} following {:}.".format(
            md["language"], md["creator"][0], md["source"] + md["references"])

    contrib = GrambankContribution(
        id=md["id"],
        name=md["name"],
        #sources=sources(md["source"]) + references(md["references"]),
        ## GrambankContribution can't take sources arguments yet.
        ## We expect "source" to stand for primary linguistic data (audio files etc.),
        ## and "references" to point to bibliographic data.
        desc=md["abstract"])
    contributor_name = HumanName(md["creator"][0])
    contributor_id = (contributor_name.last + contributor_name.first)
    try:
        contributor = contributors[md["creator"][0]]
    except KeyError:
        contributors[md["creator"][0]] = contributor = Contributor(
            id=contributor_id, name=str(contributor_name))
    DBSession.add(
        ContributionContributor(contribution=contrib, contributor=contributor))

    if mdpath not in trust:
        with open(mdpath, "w") as mdfile:
            json.dump(md, mdfile, indent=2)

    data = pandas.io.parsers.read_csv(
        path, sep="," if path.endswith(".csv") else "\t", encoding='utf-8')

    check_features = features.index.tolist()

    if "Language_ID" not in data.columns:
        data["Language_ID"] = md["language"]
    elif mdpath in trust:
        if path in trust:
            assert (data["Language_ID"] == md["language"]).all()
        else:
            data["Language_ID"] = md["language"]
    else:
        if (data["Language_ID"] != md["language"]).any():
            report(
                "Language mismatch:", md["language"], data["Language_ID"][
                    data["Language_ID"] != md["language"]].to_string())
    language = languages.loc[md["language"]]

    if "Source" not in data.columns:
        data["Source"] = ""
    if "Answer" not in data.columns:
        data["Answer"] = ""

    data["Value"] = data["Value"].astype(str)
    data["Source"] = data["Source"].astype(str)
    data["Answer"] = data["Answer"].astype(str)

    for column in copy_from_features:
        if column not in data.columns:
            data[column] = ""
        data[column] = data[column].astype(str)

    features_seen = {}
    for i, row in data.iterrows():
        value = possibly_int_string(row['Value'])
        data.set_value(i, 'Value', value)
        feature = row['Feature_ID']

        if pandas.isnull(feature):
            if pandas.isnull(row['Feature']):
                if path in trust:
                    raise AssertionError(
                        "Row {:} without feature found".format(row))
                else:
                    report("Row without feature found, dropping.",
                           row.to_string(), "")
                    del data.loc[i]
                    continue
            else:
                candidates = features["Feature"] == row["Feature"]
                if candidates.any():
                    feature = candidates.argmax()
                else:
                    report("Row without matching feature found, ignoring.",
                           row.to_string(), "")
                    continue

        try:
            parameter = features.loc[feature]
        except (TypeError, KeyError):
            if path in trust:
                if features_path in trust:
                    raise AssertionError("{:s} and {:s} don't match!".format(
                        path, features_path))
                else:
                    parameter = features.loc[feature] = {}
            else:
                report("Feature mismatch:", feature, features.index)
                if features_path in trust:
                    del data.loc[i]
                    continue
                else:
                    parameter = {}

        for column in copy_from_features:
            question = row[column]
            if (question != parameter[column]
                    and not (pandas.isnull(question) or question != "")):
                if path in trust:
                    if features_path in trust:
                        raise AssertionError("{:s} mismatch!".format(column))
                    else:
                        parameter[column] = question
                else:
                    if features_path in trust:
                        data.set_value(i, column, parameter[column])
                    else:
                        report(("{:s} mismatch!".format(column)), question,
                               parameter[column])
            else:
                data.set_value(i, column, parameter[column])

        if feature in features_seen:
            vs = features_seen[feature]
        else:
            vs = features_seen[feature] = ValueSet(
                id="{:s}-{:s}".format(md["language"], feature),
                parameter=parameter["db_Object"],
                language=language["db_Object"],
                contribution=contrib,
                source=row['Source'])

        domain = parameter["db_Domain"]
        if value not in domain:
            if path in trust:
                deid = max(domain) + 1
                domainelement = domain[value] = DomainElement(
                    id='_{:s}-{:s}'.format(i, deid),
                    parameter=parameter['db_Object'],
                    abbr=deid,
                    name='{:s} - {:s}'.format(deid, desc),
                    number=int(deid) if deid != '?' else 999,
                    description=desc,
                    jsondata={'icon': ORDERED_ICONS[int(deid)].name})
            else:
                report("Feature domain mismatch:", list(domain.keys()), value)
                continue
        else:
            domainelement = domain[value]

        answer = row["Answer"]
        if answer != domainelement.description:
            if path in trust:
                if features_path in trust:
                    raise AssertionError("Feature domain element mismatch!")
                else:
                    domainelement.desc = answer
            else:
                if features_path in trust:
                    data.set_value(i, "Answer", domainelement.description)
                else:
                    report("Feature domain element mismatch!", answer,
                           domainelement.description)
                    import pdb
                    pdb.set_trace()

        DBSession.add(
            Value(id="{:s}-{:s}-{:}{:d}".format(
                md["language"], feature, value if value != '?' else 'unknown',
                i),
                  valueset=vs,
                  name=str(value),
                  description=row['Comment'],
                  domainelement=domainelement))

        print(".", end="")

        if feature in check_features:
            check_features.remove(feature)

    if features_path in trust:
        i = data.index.max()
        for feature in check_features:
            i += 1
            for column in copy_from_features:
                data.set_value(i, column, features[column][feature])
            data.set_value(i, "Language_ID", md["language"])
            data.set_value(i, "Feature_ID", feature)
            data.set_value(i, "Value", "?")

    print()
    if path not in trust:
        data.sort_values(by=["Feature_ID", "Value"], inplace=True)
        columns = list(data.columns)
        first_columns = [
            "Feature_ID", "Language_ID", "Feature", "Value", "Answer",
            "Comment", "Source", "Possible Values",
            "Suggested standardised comments"
        ]
        for column in columns:
            if column not in first_columns:
                first_columns.append(column)
        data = data[first_columns]
        data.to_csv(path,
                    index=False,
                    sep="," if path.endswith(".csv") else "\t",
                    encoding='utf-8')
    return data