def from_XML_string(self, xml_string, entrypoint_name=None): """ Loads the Entrypoint from an XML string. If no entrypoint_name is given the entrypoint will be derived from the facts. In some cases this is not possible because more than one entrypoint could exist given the list of facts and in these cases an entrypoint is required. Args: xml_string(str): String containing XML. entrypoint_name (str): Optional name of the entrypoint. Returns: OBInstance containing the loaded data. """ # NOTE: The XML parser has much less effort placed into both the coding and testing as # opposed to the coding and testing effort performed on the JSON parser. To some extent # this is on purpose since the hope is that the bulk of Orange Button data will be # transmitted using JSON. With this you are invited to (a) refactor the XML parsing code # and (b) create XML test cases if you believe that XML parser should receive the same # effort level as the JSON parser. # Create a validation error which can be used to maintain a list of error messages validation_errors = ob.OBValidationErrors( "Error(s) found in input JSON") try: root = ElementTree.fromstring(xml_string) except Exception as e: validation_errors.append(e) raise validation_errors # Read all elements that are not a context or a unit: if not entrypoint_name: fact_names = [] for child in root: if child.tag != _xn("link:schemaRef") and child.tag != _xn( "unit") and child.tag != _xn("context"): tag = child.tag tag = tag.replace( "{http://xbrl.us/Solar/v1.2/2018-03-31/solar}", "solar:") tag = tag.replace("{http://fasb.org/us-gaap/2017-01-31}", "us-gaap:") tag = tag.replace("{http://xbrl.sec.gov/dei/2014-01-31}", "dei:") fact_names.append(tag) try: entrypoint_name = self._entrypoint_name(fact_names) except ob.OBValidationError as ve: validation_errors.append(ve) raise validation_errors # Create an entrypoint. entrypoint = data_model.OBInstance(entrypoint_name, self._taxonomy, dev_validation_off=True) # Read in units units = {} for unit in root.iter(_xn("unit")): units[unit.attrib["id"]] = unit[0].text # Read in contexts contexts = {} for context in root.iter(_xn("context")): instant = None duration = None entity = None start_date = None end_date = None axis = {} for elem in context.iter(): if elem.tag == _xn("period"): if elem[0].tag == _xn("forever"): duration = "forever" elif elem[0].tag == _xn("startDate"): start_date = elem[0].text elif elem[0].tag == _xn("endDate"): end_date = elem[0].text elif elem[0].tag == _xn("instant"): instant = elem[0].text elif elem.tag == _xn("entity"): for elem2 in elem.iter(): if elem2.tag == _xn("identifier"): entity = elem2.text elif elem2.tag == _xn("segment"): for elem3 in elem2.iter(): if elem3.tag == _xn("xbrldi:typedMember"): for elem4 in elem3.iter(): if elem4.tag != _xn( "xbrldi:typedMember"): axis[elem3.attrib[ "dimension"]] = elem4.text if duration is None and start_date is not None and end_date is not None: duration = {"start": start_date, "end": end_date} kwargs = {} if instant is not None: kwargs["instant"] = instant if duration is not None: kwargs["duration"] = duration if entity is not None: kwargs["entity"] = entity if axis is not None: for a in axis: kwargs[a] = axis[a] if instant is None and duration is None: validation_errors.append( ob.OBValidationError( "Context is missing both a duration and instant tag")) if entity is None: validation_errors.append("Context is missing an entity tag") try: dm_ctx = data_model.Context(**kwargs) contexts[context.attrib["id"]] = dm_ctx except Exception as e: validation_errors.append(e) # Read all elements that are not a context or a unit: for child in root: if child.tag != _xn("link:schemaRef") and child.tag != _xn( "unit") and child.tag != _xn("context"): kwargs = {} fact_id = None if "id" in child.attrib: fact_id = child.attrib["id"] if "contextRef" in child.attrib: if child.attrib["contextRef"] in contexts: kwargs["context"] = contexts[ child.attrib["contextRef"]] kwargs["fact_id"] = fact_id tag = child.tag tag = tag.replace( "{http://xbrl.us/Solar/v1.2/2018-03-31/solar}", "solar:") tag = tag.replace( "{http://fasb.org/us-gaap/2017-01-31}", "us-gaap:") tag = tag.replace( "{http://xbrl.sec.gov/dei/2014-01-31}", "dei:") try: entrypoint.set(tag, child.text, **kwargs) except Exception as e: validation_errors.append(e) else: validation_errors.append( "referenced context is missing") else: validation_errors.append("Element is missing a context") # Raise the errors if necessary if validation_errors.get_errors(): raise validation_errors # Return populated entrypoint return entrypoint
def from_JSON_string(self, json_string, entrypoint_name=None): """ Loads the Entrypoint from a JSON string into an entrypoint. If no entrypoint_name is given the entrypoint will be derived from the facts. In some cases this is not possible because more than one entrypoint could exist given the list of facts and in these cases an entrypoint is required. Args: json_string (str): String containing JSON entrypoint_name (str): Optional name of the entrypoint. Returns: OBInstance containing the loaded data. """ # Create a validation error which can be used to maintain a list of error messages validation_errors = ob.OBValidationErrors( "Error(s) found in input JSON") # Convert string to JSON data try: json_data = json.loads(json_string) except Exception as e: validation_errors.append(e) raise validation_errors # Perform basic validation that all required parts of the document are present. if "documentType" not in json_data: validation_errors.append("JSON is missing documentType tag") raise validation_errors if "prefixes" not in json_data: validation_errors.append("JSON is missing prefixes tag") raise validation_errors if "dtsReferences" not in json_data: validation_errors.append("JSON is missing dtsRererences tag") raise validation_errors if "facts" not in json_data: validation_errors.append("JSON is missing facts tag") raise validation_errors facts = json_data["facts"] # Loop through facts to determine what type of endpoint this is. if not entrypoint_name: fact_names = [] for id in facts: fact = facts[id] if "aspects" not in fact: validation_errors.append("fact tag is missing aspects tag") elif "concept" not in fact["aspects"]: validation_errors.append( "aspects tag is missing concept tag") else: fact_names.append(fact["aspects"]["concept"]) try: entrypoint_name = self._entrypoint_name(fact_names) except ob.OBValidationError as ve: validation_errors.append(ve) raise validation_errors # If we reach this point re-initialize the validation errors because all previous errors found # will be found again. Re-initialization reduces duplicate error messages and ensures that # errors are found in the correct order. validation_errors = ob.OBValidationErrors( "Error(s) found in input JSON") # Create an entrypoint. ob_instance = data_model.OBInstance(entrypoint_name, self._taxonomy, dev_validation_off=False) # Loop through facts. for id in facts: fact = facts[id] # Track the current number of errors to see if it grows for this fact begin_error_count = len(validation_errors.get_errors()) # Create kwargs and populate with entity. kwargs = {} if "aspects" not in fact: validation_errors.append("fact tag is missing aspects tag") else: if "concept" not in fact["aspects"]: validation_errors.append( "aspects tag is missing concept tag") if "entity" not in fact["aspects"]: validation_errors.append( "aspects tag is missing entity tag") else: kwargs = {"entity": fact["aspects"]["entity"]} # TODO: id is not currently support by Entrypoint. Uncomment when it is. # if "id" in fact: # kwargs["id"] = fact["id"] if "period" in fact["aspects"]: period = fact["aspects"]["period"] if "/" in period: dates = period.split("/") if len(dates) != 2: validation_errors.append( "period component is in an incorrect format (yyyy-mm-ddT00:00:00/yyyy-mm-ddT00:00:00 expected)" ) else: start = util.convert_json_datetime(dates[0]) end = util.convert_json_datetime(dates[1]) if start is None: validation_errors.append( "period start component is in an incorrect format (yyyy-mm-ddT00:00:00 expected)" ) if end is None: validation_errors.append( "period end component is in an incorrect format (yyyy-mm-ddT00:00:00 expected)" ) kwargs["duration"] = {} kwargs["duration"]["start"] = start kwargs["duration"]["end"] = end else: start = util.convert_json_datetime( fact["aspects"]["period"]) if start is None: validation_errors.append( "start is in an incorrect format (yyyy-mm-ddT00:00:00 expected)" ) kwargs["instant"] = start elif kwargs is not None: kwargs["duration"] = "forever" # Add axis to kwargs if item is an axis. # TODO: Exception processing for axis_chk in fact["aspects"]: if "Axis" in axis_chk: kwargs[axis_chk.split(":") [1]] = fact["aspects"][axis_chk] if "aspects" in fact and "unit" in fact[ "aspects"] and kwargs is not None: kwargs["unit_name"] = fact["aspects"]["unit"] if "value" not in fact: validation_errors.append("fact tag is missing value tag") kwargs["fact_id"] = id # If validation errors were found for this fact continute to the next fact if len(validation_errors.get_errors()) > begin_error_count: continue # TODO: Temporary code # Required to match behavior of to_JSON, once the two are synchronized it should not be required. value = fact["value"] if value == "None": value = None elif value == "True": value = True elif value == "False": value = False # Done with temporary code try: ob_instance.set(fact["aspects"]["concept"], value, **kwargs) # entrypoint.set(fact["aspects"]["xbrl:concept"], fact["value"], **kwargs) except Exception as e: validation_errors.append(e) # Raise the errors if necessary if validation_errors.get_errors(): raise validation_errors return ob_instance