def resolve_field_value_template(tree, data, data_index): object_paths = as_list(resolve_path(tree, ['start', 'object_path'])) for object_path in object_paths: ids = as_list(resolve_path(data, get_field_path(object_path))) if not ids: return None data = remove_none(list(map(lambda id: data_index.get(id), ids))) value_paths = as_list(resolve_path(tree, ['start', 'value_path'])) if len(value_paths) == 1: return resolve_path(data, get_field_path(value_paths[0])) else: new_value = [] for data in as_list(data): field_values = remove_empty( map( lambda value_path: as_list( resolve_path(data, get_field_path(value_path))) or None, value_paths)) product = itertools.product(*field_values) joined = map( lambda field_value: reduce( lambda acc, s: s if s.startswith(acc) else acc + " " + s, field_value, ""), product) if joined: new_value.extend(joined) return list(distinct(remove_empty(new_value)))
def transform_parse_uri(source: dict, entities: dict, entity_line: Tuple[str, str]) -> List[dict]: """ Parse JSON, get or generate ID, get or generate URI """ entity, line = entity_line data = json.loads(line) output = [] def get_or_generate_uri(source, entity, data): data_id = get_identifier(entity, data) data_uri = get_generate_uri(source, entity, data) return {'@type': entity, '@id': data_uri, 'schema:identifier': data_id} # Extract internal objects (if any) internal_object_links = filter(lambda l: l['type'] == 'internal-object', get_in(entities, [entity, 'links']) or []) for link in internal_object_links: link_entity = link['entity'] link_path = remove_empty(link['json-path'].split('.')) link_values = get_in(data, link_path) for link_value in as_list(link_values): # Output internal object output.append(get_or_generate_uri(source, link_entity, link_value)) # Output current data object output.append(get_or_generate_uri(source, entity, data)) return output
def parse_join_template(template): elements = as_list(template.get('{join}')) if not elements: raise Exception("Empty '{{join}}' template '{}'".format(elements)) return merge_dict(template, {'{join}': parse_template(elements)})
def fetch_all_links(source, logger, entities): """ Link objects across entities. - Internal: link an object (ex: study) to another using an identifier inside the JSON object (ex: link a location via study.locationDbId) - Internal object: link an object (ex: study) to another contained inside the first (ex: link a location via study.location.locationDbId) - External object: link an object (ex: study) to another using a dedicated call (ex: link to observation variables via /brapi/v1/studies/{id}/observationVariables) """ for (entity_name, entity) in entities.items(): if 'links' not in entity: continue for link in entity['links']: for (object_id, object) in entity['store'].items(): linked_entity_name = link['entity'] linked_entity = entities[linked_entity_name] linked_objects_by_id = {} if link['type'].startswith('internal'): link_path = link['json-path'] link_path_list = remove_empty(link_path.split('.')) link_values = remove_none(as_list(get_in(object, link_path_list))) if not link_values: if link.get('required'): raise BrokenLink("Could not find required field '{}' in {} object id '{}'" .format(link_path, entity_name, object_id)) continue if link['type'] == 'internal-object': for link_value in link_values: link_id = get_identifier(linked_entity_name, link_value) linked_objects_by_id[link_id] = link_value elif link['type'] == 'internal': link_id_field = linked_entity['name'] + 'DbId' link_name_field = linked_entity['name'] + 'Name' for link_value in link_values: link_id = link_value.get(link_id_field) link_name = link_value.get(link_name_field) if link_id: linked_objects_by_id[link_id] = {link_id_field: link_id, link_name_field: link_name} elif link['type'] == 'external-object': call = get_implemented_call(source, link, context=object) if not call: continue link_values = list(BreedingAPIIterator.fetch_all(source['brapi:endpointUrl'], call, logger)) for link_value in link_values: link_id = get_identifier(linked_entity_name, link_value) linked_objects_by_id[link_id] = link_value link_objects(entity, object, linked_entity, linked_objects_by_id)
def resolve_list_template(template, data, data_index): elements = template.get('{list}') transform_keys = template.get('{transform}') resolved = resolve(as_list(elements), data, data_index) if transform_keys: for transform_key in transform_keys: list_transform = list_transforms[transform_key] resolved = list_transform(resolved) return list(resolved)
def resolve_join_template(template, data, data_index): accept_none = False if template.get('{accept_none}') == False else True separator = template.get('{separator}') or '' elements = template.get('{join}') flattened_elements = flatten(resolve(elements, data, data_index)) if not accept_none: for elem in as_list(flattened_elements): if not elem: return None filtered_elements = remove_none(flattened_elements) return coll_as_str(filtered_elements, separator)
def resolve_map_template(template, data, data_index): elements = template.get('{map}') transform = template.get('{to}') resolved = as_list(resolve(elements, data, data_index)) if not isinstance(resolved, list): raise Exception("Map can only work on lists.") if not remove_empty(resolved): return None resolved = list( map(lambda value: resolve(transform, value, data_index), resolved)) return resolved
def resolve_field_value_template(tree, data, data_index): object_paths = as_list(get_in(tree, ['start', 'object_path'])) for object_path in object_paths: field_path = get_field_path(object_path) ids = as_list(get_in(data, field_path)) if not ids: return None field = field_path[-1] entity = re.sub(r"(\w+)URIs?", "\\1", field) entity_index = data_index[entity] try: dataList = [] for id in ids: dataList.append(json.loads(entity_index[id].decode())) data = remove_none(dataList) except AttributeError: data = remove_none(list(map(lambda id: entity_index[id], ids))) if getattr(entity_index, 'close', False): entity_index.close() value_paths = as_list(get_in(tree, ['start', 'value_path'])) if len(value_paths) == 1: return get_in(data, get_field_path(value_paths[0])) else: new_value = [] for data in as_list(data): field_values = remove_empty( map( lambda value_path: as_list( get_in(data, get_field_path(value_path))) or None, value_paths)) product = itertools.product(*field_values) joined = map( lambda field_value: reduce( lambda acc, s: s if s.startswith(acc) else acc + " " + s, field_value, ""), product) if joined: new_value.extend(joined) return list(distinct(remove_empty(new_value)))