예제 #1
0
    def load(self, data, record_cls):
        """Load the data."""
        parent_data = dict_lookup(data, self.keys, parent=True)

        # `None` covers the cases where exceptions were raised in _dump
        parent_data.pop(f"{self.key}_start", None)
        parent_data.pop(f"{self.key}_end", None)
예제 #2
0
    def __call__(self, ids):
        """Return the mapping when evaluated."""
        labels = {}

        for id_ in ids:
            labels[id_] = dict_lookup(self.relation.pid_field.resolve(id_),
                                      self.lookup_key)

        return labels
예제 #3
0
    def load(self, data, record_cls):
        """Remove the tokens from the data dictionary."""
        try:
            parent_data = dict_lookup(data, self.keys, parent=True)
            if parent_data is not None:
                parent_data.pop(self.key, None)

        except KeyError:
            pass
예제 #4
0
 def load(self, data, record_cls):
     """Load the data."""
     try:
         parent_data = dict_lookup(data, self.keys, parent=True)
         # `None` covers the cases where exceptions were raised in _dump
         parent_data.pop(self.range_key, None)
     except KeyError:
         # Drafts partially saved with no data
         # The empty {} gets removed by `clear_none`
         return data
예제 #5
0
    def dump(self, record, data):
        """Dump the grant tokens to the data dictionary."""
        try:
            if record.access:
                tokens = [grant.to_token() for grant in record.access.grants]
                parent_data = dict_lookup(data, self.keys, parent=True)
                parent_data[self.key] = tokens

        except KeyError:
            pass
예제 #6
0
    def load(self, data, record_cls):
        """Load the data."""
        try:
            date_list = dict_lookup(data, self.keys, parent=False)

            # `None` covers the cases where exceptions were raised in _dump
            for item in date_list:
                item.pop(self.range_key, None)

        except KeyError:
            return data
예제 #7
0
    def apply(self, stream_entry, **kwargs):
        """Applies the transformation to the stream entry."""
        record = stream_entry.entry
        person = record["person"]
        orcid_id = record["orcid-identifier"]["uri"]

        name = person.get("name")
        if name is None:
            raise TransformerError(f"Name not found in ORCiD entry.")

        entry = {
            "given_name": name.get("given-names"),
            "family_name": name.get("family-name"),
            "identifiers": [{
                "scheme": "orcid",
                "identifier": orcid_id
            }],
            "affiliations": [],
        }

        try:
            employments = dict_lookup(
                record, "activities-summary.employments.affiliation-group")
            if isinstance(employments, dict):
                employments = [employments]
            history = set()
            for employment in employments:
                terminated = employment["employment-summary"].get("end-date")
                affiliation = dict_lookup(
                    employment,
                    "employment-summary.organization.name",
                )
                if affiliation not in history and not terminated:
                    history.add(affiliation)
                    entry["affiliations"].append({"name": affiliation})
        except Exception:
            pass

        stream_entry.entry = entry
        return stream_entry
예제 #8
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            parent_data = dict_lookup(data, self.keys, parent=True)
            pd = parse_edtf(parent_data[self.key])
            parent_data[self.range_key] = {
                "gte": _format_date(pd.lower_strict()),
                "lte": _format_date(pd.upper_strict()),
            }

        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
예제 #9
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            parent_data = dict_lookup(data, self.keys, parent=True)

            pd = parse_edtf(parent_data[self.key])
            parent_data[f"{self.key}_start"] = date.fromtimestamp(
                calendar.timegm(pd.lower_strict())).isoformat()
            parent_data[f"{self.key}_end"] = date.fromtimestamp(
                calendar.timegm(pd.upper_strict())).isoformat()
        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
예제 #10
0
    def dump(self, record, data):
        """Dump the data."""
        try:
            date_list = dict_lookup(data, self.keys, parent=False)

            # EDTF parse_edtf (using pyparsing) expects a string
            for item in date_list:
                pd = parse_edtf(item[self.key])
                item[self.range_key] = {
                    "gte": _format_date(pd.lower_strict()),
                    "lte": _format_date(pd.upper_strict()),
                }

        except (KeyError, EDTFParseException):
            # The field does not exists or had wrong data
            return data  # FIXME: should log this in debug mode?
예제 #11
0
    def _collect_values(self, hits):
        """Collect all field values to be expanded."""
        grouped_values = dict()
        for hit in hits:
            for field in self._fields:
                try:
                    value = dict_lookup(hit, field.field_name)
                except KeyError:
                    continue
                else:
                    # value is not None
                    v, service = field.get_value_service(value)
                    field.add_service_value(service, v)
                    # collect values (ids) and group by service e.g.:
                    # service_1: (13, 4),
                    # service_2: (uuid1, uuid2, ...)
                    grouped_values.setdefault(service, set())
                    grouped_values[service].add(v)

        return grouped_values
예제 #12
0
def test_dict_lookup():
    """Test lookup by a key."""
    d = {
        'a': 1,
        'b': {
            'c': None
        },
        'd': ['1', '2'],
    }
    assert dict_lookup(d, 'a') == d['a']
    assert dict_lookup(d, 'b') == d['b']
    assert dict_lookup(d, 'b.c') == d['b']['c']
    assert dict_lookup(d, 'd') == d['d']
    assert dict_lookup(d, 'd.0') == d['d'][0]
    assert dict_lookup(d, 'd.1') == d['d'][1]
    assert dict_lookup(d, 'd.-1') == d['d'][-1]

    assert pytest.raises(KeyError, dict_lookup, d, 'x')
    assert pytest.raises(KeyError, dict_lookup, d, 'a.x')
    assert pytest.raises(KeyError, dict_lookup, d, 'b.x')
    assert pytest.raises(KeyError, dict_lookup, d, 'b.c.0')
    assert pytest.raises(KeyError, dict_lookup, d, 'd.3')
예제 #13
0
    def expand(self, hit):
        """Return the expanded fields for the given hit."""
        results = dict()
        for field in self._fields:
            try:
                value = dict_lookup(hit, field.field_name)
            except KeyError:
                continue
            else:
                # value is not None
                v, service = field.get_value_service(value)
                resolved_rec = field.get_dereferenced_record(service, v)
                if not resolved_rec:
                    continue
                output = field.pick(resolved_rec)

                # transform field name (potentially dotted) to nested dicts
                # to keep the nested structure of the field
                d = dict()
                dict_set(d, field.field_name, output)
                # merge dict with previous results
                dict_merge(results, d)

        return results
예제 #14
0
 def pre_load(self, data, **kwargs):
     """Called before a record is dumped in a secondary storage system."""
     if self._dump:
         keys = parse_lookup_key(self.attr_name)
         parent = dict_lookup(data, keys, parent=True)
         parent.pop(keys[-1], None)