def load(self, data, record_cls): """Load the data.""" parent_data = dict_lookup(data, self.keys, parent=True) # `None` covers the cases where exceptions were raised in _dump parent_data.pop(f"{self.key}_start", None) parent_data.pop(f"{self.key}_end", None)
def __call__(self, ids): """Return the mapping when evaluated.""" labels = {} for id_ in ids: labels[id_] = dict_lookup(self.relation.pid_field.resolve(id_), self.lookup_key) return labels
def load(self, data, record_cls): """Remove the tokens from the data dictionary.""" try: parent_data = dict_lookup(data, self.keys, parent=True) if parent_data is not None: parent_data.pop(self.key, None) except KeyError: pass
def load(self, data, record_cls): """Load the data.""" try: parent_data = dict_lookup(data, self.keys, parent=True) # `None` covers the cases where exceptions were raised in _dump parent_data.pop(self.range_key, None) except KeyError: # Drafts partially saved with no data # The empty {} gets removed by `clear_none` return data
def dump(self, record, data): """Dump the grant tokens to the data dictionary.""" try: if record.access: tokens = [grant.to_token() for grant in record.access.grants] parent_data = dict_lookup(data, self.keys, parent=True) parent_data[self.key] = tokens except KeyError: pass
def load(self, data, record_cls): """Load the data.""" try: date_list = dict_lookup(data, self.keys, parent=False) # `None` covers the cases where exceptions were raised in _dump for item in date_list: item.pop(self.range_key, None) except KeyError: return data
def apply(self, stream_entry, **kwargs): """Applies the transformation to the stream entry.""" record = stream_entry.entry person = record["person"] orcid_id = record["orcid-identifier"]["uri"] name = person.get("name") if name is None: raise TransformerError(f"Name not found in ORCiD entry.") entry = { "given_name": name.get("given-names"), "family_name": name.get("family-name"), "identifiers": [{ "scheme": "orcid", "identifier": orcid_id }], "affiliations": [], } try: employments = dict_lookup( record, "activities-summary.employments.affiliation-group") if isinstance(employments, dict): employments = [employments] history = set() for employment in employments: terminated = employment["employment-summary"].get("end-date") affiliation = dict_lookup( employment, "employment-summary.organization.name", ) if affiliation not in history and not terminated: history.add(affiliation) entry["affiliations"].append({"name": affiliation}) except Exception: pass stream_entry.entry = entry return stream_entry
def dump(self, record, data): """Dump the data.""" try: parent_data = dict_lookup(data, self.keys, parent=True) pd = parse_edtf(parent_data[self.key]) parent_data[self.range_key] = { "gte": _format_date(pd.lower_strict()), "lte": _format_date(pd.upper_strict()), } except (KeyError, EDTFParseException): # The field does not exists or had wrong data return data # FIXME: should log this in debug mode?
def dump(self, record, data): """Dump the data.""" try: parent_data = dict_lookup(data, self.keys, parent=True) pd = parse_edtf(parent_data[self.key]) parent_data[f"{self.key}_start"] = date.fromtimestamp( calendar.timegm(pd.lower_strict())).isoformat() parent_data[f"{self.key}_end"] = date.fromtimestamp( calendar.timegm(pd.upper_strict())).isoformat() except (KeyError, EDTFParseException): # The field does not exists or had wrong data return data # FIXME: should log this in debug mode?
def dump(self, record, data): """Dump the data.""" try: date_list = dict_lookup(data, self.keys, parent=False) # EDTF parse_edtf (using pyparsing) expects a string for item in date_list: pd = parse_edtf(item[self.key]) item[self.range_key] = { "gte": _format_date(pd.lower_strict()), "lte": _format_date(pd.upper_strict()), } except (KeyError, EDTFParseException): # The field does not exists or had wrong data return data # FIXME: should log this in debug mode?
def _collect_values(self, hits): """Collect all field values to be expanded.""" grouped_values = dict() for hit in hits: for field in self._fields: try: value = dict_lookup(hit, field.field_name) except KeyError: continue else: # value is not None v, service = field.get_value_service(value) field.add_service_value(service, v) # collect values (ids) and group by service e.g.: # service_1: (13, 4), # service_2: (uuid1, uuid2, ...) grouped_values.setdefault(service, set()) grouped_values[service].add(v) return grouped_values
def test_dict_lookup(): """Test lookup by a key.""" d = { 'a': 1, 'b': { 'c': None }, 'd': ['1', '2'], } assert dict_lookup(d, 'a') == d['a'] assert dict_lookup(d, 'b') == d['b'] assert dict_lookup(d, 'b.c') == d['b']['c'] assert dict_lookup(d, 'd') == d['d'] assert dict_lookup(d, 'd.0') == d['d'][0] assert dict_lookup(d, 'd.1') == d['d'][1] assert dict_lookup(d, 'd.-1') == d['d'][-1] assert pytest.raises(KeyError, dict_lookup, d, 'x') assert pytest.raises(KeyError, dict_lookup, d, 'a.x') assert pytest.raises(KeyError, dict_lookup, d, 'b.x') assert pytest.raises(KeyError, dict_lookup, d, 'b.c.0') assert pytest.raises(KeyError, dict_lookup, d, 'd.3')
def expand(self, hit): """Return the expanded fields for the given hit.""" results = dict() for field in self._fields: try: value = dict_lookup(hit, field.field_name) except KeyError: continue else: # value is not None v, service = field.get_value_service(value) resolved_rec = field.get_dereferenced_record(service, v) if not resolved_rec: continue output = field.pick(resolved_rec) # transform field name (potentially dotted) to nested dicts # to keep the nested structure of the field d = dict() dict_set(d, field.field_name, output) # merge dict with previous results dict_merge(results, d) return results
def pre_load(self, data, **kwargs): """Called before a record is dumped in a secondary storage system.""" if self._dump: keys = parse_lookup_key(self.attr_name) parent = dict_lookup(data, keys, parent=True) parent.pop(keys[-1], None)