def parse_metadata_command(sh: Worksheet, area: AreaTupleType, name: str = None) -> IssuesLabelContentTripleType: """ Most "parse" methods are mostly syntactic (as opposed to semantic). They do not check existence of names. But in this case, the valid field names are fixed beforehand, so they are checked at this time. Some of the fields will be controlled also, according to some :param sh: Input worksheet :param area: Tuple (top, bottom, left, right) representing the rectangular area of the input worksheet where the command is present :return: list of issues (issue_type, message), command label, command content """ some_error = False issues = [] controlled = create_dictionary() mandatory = create_dictionary() keys = create_dictionary() for t in metadata_fields: controlled[t[4]] = t[3] mandatory[t[4]] = t[2] keys[t[0]] = t[4] # Scan the sheet, the first column must be one of the keys of "k_list", following # columns can contain repeating values # Map key to a list of values content = {} # Dictionary of lists, one per metadata key for r in range(area[0], area[1]): label = sh.cell(row=r, column=area[2]).value if label in keys: key = keys[label] for c in range(area[2]+1, area[3]): value = sh.cell(row=r, column=c).value if value: value = str(value).strip() if controlled[key]: # Control "value" if the field is controllable cl = {"dimensions": ["water", "energy", "food", "land", "climate"], "subject_topic_keywords": None, "geographical_level": ["local", "regional", "region", "country", "europe", "global", "sectoral", "sector"], "geographical_situation": None, # TODO Read the list of all geographical regions (A long list!!) "restriction_level": ["internal", "confidential", "public"], "language": None, # TODO Read the list of ALL languages (or just "English"??) } if cl[key] and value.lower() not in cl[key]: issues.append((3, "The key '"+key+"' should be one of: "+",".join(cl[key]))) if key not in content: content[key] = [] content[key].append(value) else: issues.append((2, "Row "+str(r)+": unknown metadata label '"+label+"'")) for key in keys.values(): if mandatory[key] and key not in content: some_error = True issues.append((3, "The value '"+key+"' is mandatory in the definition of the metadata")) return issues, None, content
def process_line(item): # Read variables mh_src_dataset = item.get("source_dataset", None) mh_src_hierarchy = item.get("source_hierarchy", None) mh_src_code = item.get("source_code", None) mh_dst_hierarchy = item.get("destination_hierarchy", None) mh_dst_code = item.get("destination_code", None) mh_weight = item.get("weight", 1.0) # Mapping name name = ((mh_src_dataset + ".") if mh_src_dataset else "") + mh_src_hierarchy + " -> " + mh_dst_hierarchy if name in mappings: issues.append( Issue(itype=IType.ERROR, description="The mapping '" + name + "' has been declared previously. Skipped.", location=IssueLocation(sheet_name=name, row=r, column=None))) return if name in local_mappings: d = local_mappings[name] else: d = DottedDict() local_mappings[name] = d d.name = name d.origin_dataset = mh_src_dataset d.origin_hierarchy = mh_src_hierarchy d.destination_hierarchy = mh_dst_hierarchy d.mapping = create_dictionary() # Specific code if mh_src_code in d.mapping: to_dict = d.mapping[mh_src_code] else: to_dict = create_dictionary() if mh_dst_code in to_dict: issues.append( Issue(itype=IType.ERROR, description="The mapping of '" + mh_src_code + "' into '" + mh_dst_code + "' has been already defined", location=IssueLocation(sheet_name=name, row=r, column=None))) return else: to_dict[mh_dst_code] = ( mh_weight, r ) # NOTE: This could be an object instead of just a FLOAT or expression d.mapping[mh_src_code] = to_dict
def lcia_method(indicator: str, method: str=None, horizon: str=None, state: State=None, lcia_methods: PartialRetrievalDictionary=None): """ :param indicator: Indicator name :param method: LCIA method weighting :param horizon: Time horizon :param state: Current values of processor plus parameters :param lcia_methods: Where LCIA data is collected :return: A dictionary with the """ if indicator is None or indicator.strip() == "": return None k = dict(d=indicator) if method: k["m"] = method if horizon: k["h"] = horizon ms = lcia_methods.get(key=k, key_and_value=True) indices = create_dictionary() for k, v in ms: idx_name = f'{k["d"]}_{k["m"]}_{k["h"]}' if idx_name in indices: lst = indices[idx_name] else: lst = [] indices[idx_name] = lst lst.append((k["i"], v[0], float(v[1]))) ifaces = create_dictionary() for t in state.list_namespace_variables(): if not t[0].startswith("_"): p = t[1] # * ureg(iface_unit) ifaces[t[0]] = p res = dict() for name, lst in indices.items(): interfaces = [] weights = [] # From " for t in lst: if t[0] in ifaces: v = ifaces[t[0]] # TODO .to(t[1]) interfaces.append(v) weights.append(t[2]) # Calculate the value ind = np.sum(np.multiply(interfaces, weights)) # * ureg(indicator_unit) res[name] = ind return res
def obtain_dataset_source(dset_name, local_datasets=None): from nexinfosys.ie_imports.data_sources.ad_hoc_dataset import AdHocDatasets # Register AdHocDatasets if local_datasets: if "AdHoc" not in nexinfosys.data_source_manager.registry: adhoc = AdHocDatasets(local_datasets) nexinfosys.data_source_manager.register_datasource_manager( adhoc) # Obtain the list of ALL datasets, and find the desired one, then find the source of the dataset lst = nexinfosys.data_source_manager.get_datasets( None, None, local_datasets) # ALL Datasets, (source, dataset) ds = create_dictionary(data={ d[0]: t[0] for t in lst for d in t[1] }) # Dataset to Source (to obtain the source given the dataset name) if dset_name in ds: source = ds[dset_name] else: source = None # Unregister AdHocDatasets if local_datasets: nexinfosys.data_source_manager.unregister_datasource_manager(adhoc) return source
def test_002_many_to_many_1(self): # Prepare a many to many map from category set to category set # Prepare a simple DataFrame containing m = create_dictionary() m["cat_o_1"] = ("cat_d_1", { "c11": [{ "d": "c21", "w": 0.6 }, { "d": "c22", "w": 0.4 }], "c12": [{ "d": "c23", "w": 1.0 }], "c13": [{ "d": "c23", "w": 1.0 }] }) # Prepare a simple DataFrame df = pd.DataFrame(data=[["c11", 4], ["c12", 3], ["c13", 1.5]], columns=["cat_o_1", "value"]) # Call df2 = augment_dataframe_with_mapped_columns(df, m, ["value"]) # Check result self.assertEqual(list(df2.columns), ["cat_o_1", "cat_d_1", "value"]) self.assertEqual(df2.shape, (4, 3))
def execute(self, state: "State"): any_error = False issues = [] sheet_name = self._content["command_name"] # Obtain global variables in state glb_idx, p_sets, hh, datasets, mappings = get_case_study_registry_objects(state) scenarios = create_dictionary() for r, param in enumerate(self._content["items"]): parameter = param["parameter"] scenario = param.get("scenario_name") p = glb_idx.get(Parameter.partial_key(parameter)) if len(p) == 0: issues.append(Issue(itype=IType.ERROR, description="The parameter '" + parameter + "' has not been declared previously.", location=IssueLocation(sheet_name=sheet_name, row=r, column=None))) any_error = True continue p = p[0] name = parameter value = param.get("parameter_value") check_parameter_value(glb_idx, p, value, issues, sheet_name, r) description = param.get("description") # For readability of the workbook. Not used for solving if scenario: if scenario in scenarios: sp = scenarios[scenario] else: sp = create_dictionary() scenarios[scenario] = sp sp[name] = value else: p.current_value = value p.default_value = value if not any_error: solver_parameters = {} # {p.name: p.current_value for p in glb_idx.get(Parameter.partial_key()) if p.group and strcmp(p.group, "NISSolverParameters")} if len(scenarios) == 0: scenarios["default"] = create_dictionary() ps = ProblemStatement(solver_parameters, scenarios) glb_idx.put(ps.key(), ps) return issues, None
def serialize_state(state: State): """ Serialization prepared for a given organization of the state :return: """ def serialize_dataframe(df): return df.to_json(orient="split", index=False), \ json.dumps({i[0]: str(i[1]) for i in df.dtypes.to_dict().items()}) # list(df.index.names), df.to_dict() print(" serialize_state IN") import copy # "_datasets" ns_ds = {} # Save and nullify before deep copy for ns in state.list_namespaces(): _, _, _, datasets, _ = get_case_study_registry_objects(state, ns) ns_ds[ns] = datasets state.set("_datasets", create_dictionary(), ns) # Nullify datasets # !!! WARNING: It destroys "state", so a DEEP COPY is performed !!! tmp = sys.getrecursionlimit() sys.setrecursionlimit(10000) state2 = copy.deepcopy(state) sys.setrecursionlimit(tmp) # Iterate all namespaces for ns in state2.list_namespaces(): glb_idx, p_sets, hh, _, mappings = get_case_study_registry_objects( state2, ns) if glb_idx: tmp = glb_idx.to_pickable() state2.set("_glb_idx", tmp, ns) datasets = ns_ds[ns] # TODO Serialize other DataFrames. # Process Datasets for ds_name in datasets: ds = datasets[ds_name] if isinstance(ds.data, pd.DataFrame): tmp = serialize_dataframe(ds.data) else: tmp = None # ds.data = None # DB serialize the datasets lst2 = serialize(ds.get_objects_list()) lst2.append(tmp) # Append the serialized DataFrame datasets[ds_name] = lst2 state2.set("_datasets", datasets, ns) tmp = serialize_from_object( state2) # <<<<<<<< SLOWEST !!!! (when debugging) print(" serialize_state length: " + str(len(tmp)) + " OUT") tmp = blosc.compress(bytearray(tmp, "utf-8"), cname="zlib", typesize=8) print(" serialize_state compressed length: " + str(len(tmp)) + " OUT") return tmp
def obtain_dictionary_with_literal_fields(item, asts): d = create_dictionary() for f in item: if not f.startswith("_"): ast = asts[f] if "complex" not in ast or ("complex" in ast and not ast["complex"]): d[f] = item[f] return d
def initialize_datasets_registry(self, datasets_list: Dict[str, Dataset]): """ Receive a list of the datasets and make a copy :param datasets_list: :return: None """ self._registry = create_dictionary() for ds_name, ds in datasets_list.items(): self.register_dataset(ds.code, ds)
def construct(name: str, description: str, levels: List[str], codes: List[CodeImmutable]): """ :param name: Name of the Code List :param description: Description of the Code List :param levels: Names of the levels :param codes: List of codes, including in each the following tuple: CodeImmutable = namedtuple("CodeTuple", "code description level children") :return: """ cl = CodeList() cl.code = name cl.description = description # Levels levels_dict = create_dictionary() for l in levels: cll = CodeListLevel() cll.code_list = cl # Point to the containing CodeList cll.code = l cll.description = None levels_dict[l] = cll # Codes codes_dict = create_dictionary() for ct in codes: c = Code() c.code = ct.code c.description = ct.description if ct.level in levels_dict: c.level = levels_dict[ct.level] # Point to the containing CodeListLevel else: c.level = None codes_dict[ct.code] = c c.children = [] c.parents = [] # Set children & parents for ct in codes: for ch in ct.children: if ch in codes_dict: c.children.append(codes_dict[ch]) codes_dict[ch].parents.append(c) return cl
def list_all_names(self): """ Returns a list of the names of registered entities considering the scopes Start from top level, end in bottom level (the current one, which takes precedence) :return: """ t = create_dictionary() for scope in self.__scope: t.update(scope._registry) return t.keys()
def obtain_problem_statement( dynamic_scenario_parameters: Dict = None) -> ProblemStatement: """ Obtain a ProblemStatement instance Obtain the solver parameters plus a list of scenarios :param dynamic_scenario_parameters: :return: """ if dynamic_scenario_parameters is not None: scenarios = create_dictionary() scenarios["dynamic"] = create_dictionary( dynamic_scenario_parameters) return ProblemStatement(scenarios=scenarios) else: ps_list: List[ProblemStatement] = glb_idx.get( ProblemStatement.partial_key()) if len(ps_list) == 0: # No scenarios (dummy), and use the default solver scenarios = create_dictionary() scenarios["default"] = create_dictionary() return ProblemStatement(scenarios=scenarios) else: return ps_list[0]
def get_case_study_registry_objects(state, namespace=None): """ Obtain the main entries of the state :param state: Input state (modified also) :param namespace: State supports several namespaces. This one serves to specify which one. Default=None :return: Tuple: (global index, processor sets, hierarchies, datasets, mappings) """ # Index of ALL objects glb_idx = state.get("_glb_idx", namespace) if not glb_idx: glb_idx = PartialRetrievalDictionary() state.set("_glb_idx", glb_idx, namespace) # ProcessorSet dict (dict of sets) p_sets = state.get("_processor_sets", namespace) if not p_sets: p_sets = create_dictionary() state.set("_processor_sets", p_sets, namespace) # Hierarchies Dict hh = state.get("_hierarchies", namespace) if not hh: hh = create_dictionary() state.set("_hierarchies", hh, namespace) # Datasets Dict datasets = state.get("_datasets", namespace) if not datasets: datasets = create_dictionary() state.set("_datasets", datasets, namespace) # Mappings Dict mappings = state.get("_mappings", namespace) if not mappings: mappings = create_dictionary() state.set("_mappings", mappings, namespace) return glb_idx, p_sets, hh, datasets, mappings
def test_003_many_to_many_2(self): # Prepare a many to many map from category set to category set # Prepare a simple DataFrame containing m = create_dictionary() m["cat_o_1"] = ("cat_d_1", { "c11": [{ "d": "c21", "w": 0.6 }, { "d": "c22", "w": 0.4 }], "c12": [{ "d": "c23", "w": 1.0 }], "c13": [{ "d": "c23", "w": 1.0 }] }) m["cat_o_2"] = ("cat_d_2", { "c31": [{ "d": "c41", "w": 0.3 }, { "d": "c42", "w": 0.7 }], "c32": [{ "d": "c43", "w": 1.0 }], "c33": [{ "d": "c43", "w": 1.0 }] }) # Prepare a simple DataFrame df = pd.DataFrame(data=[["c11", "c31", 4], ["c12", "c32", 3], ["c13", "c31", 1.5]], columns=["cat_o_1", "cat_o_2", "value"]) # >>>>> Call Cython ACCELERATED Function <<<<< df2 = augment_dataframe_with_mapped_columns2(df, m, ["value"]) # Check result self.assertEqual(list(df2.columns), ["cat_o_1", "cat_o_2", "cat_d_1", "cat_d_2", "value"]) self.assertEqual(df2.shape, (7, 5))
def execute(self, state: "State"): """ Create a set of linear scale conversions, from factor type to factor type """ some_error = False issues = [] glb_idx, p_sets, hh, datasets, mappings = get_case_study_registry_objects( state) origin_factor_types = self._content["origin_factor_types"] destination_factor_types = self._content["destination_factor_types"] scales = self._content["scales"] # Check that we have valid factor type names fts = create_dictionary() for ft_name in origin_factor_types + destination_factor_types: # Obtain (maybe Create) the mentioned Factor Types p, ft, f = find_or_create_observable( glb_idx, ft_name, Observer.no_observer_specified, None, proc_external=None, proc_attributes=None, proc_location=None, fact_roegen_type=None, fact_attributes=None, fact_incoming=None, fact_external=None, fact_location=None) if not ft: some_error = True issues.append((3, "Could not obtain/create the Factor Type '" + ft_name + "'")) fts[ft_name] = ft if some_error: return issues, None for sc in scales: origin = fts[sc["origin"]] destination = fts[sc["destination"]] scale = sc["scale"] FactorTypesRelationUnidirectionalLinearTransformObservation.create_and_append( origin, destination, scale, Observer.no_observer_specified) return None, None
def get_processor_names_to_processors_dictionary( state: PartialRetrievalDictionary): """ Obtain a dictionary with all processor names (a processor may have multiple names) and the corresponding Processor object :param state: :return: """ ps = state.get(Processor.partial_key()) ps = set(ps) # Avoid repeating Processor objects d = create_dictionary() for p in ps: for n in p.full_hierarchy_names(state): d[n] = p return d
def read_geojson(url): """ Read a GeoJSON file and index it by ID :param url: :return: A tuple with the deserialized GeoJSON file and an index of ID to position in the features list """ if url not in in_files: f = urllib.request.urlopen(url) j = geojson.loads(f.read()) id_dict = create_dictionary() for i, f in enumerate(j["features"]): fid = f["id"] id_dict[fid] = i in_files[url] = (j, id_dict) else: j, id_dict = in_files[url] return j, id_dict
def convert_code_list_to_hierarchy(cl, as_list=False): """ Receives a list of codes. Codes are sorted lexicographically (to include numbers). Two types of coding schemes are supported by assuming that trailing zeros can be ignored to match parent -> child relations. The first is uniformly sized codes (those with trailing zeros). The second is growing length codes. Those with length less than others but common prefix are parents :param cl: :param as_list: if True, return a flat tree (all nodes are siblings, descending from a single root) :return: """ def can_be_child(parent_candidate, child_candidate): # Strip zeros to the right, from parent_candidate, and # check if the child starts with the resulting substring return child_candidate.startswith(parent_candidate.rstrip("0")) root = Node("") path = [root] code_to_node = create_dictionary() for c in sorted(cl): if as_list: n = Node(c, path[-1]) else: found = False while len(path) > 0 and not found: if can_be_child(path[-1].name, c): found = True else: path.pop() if c.rstrip("0") == path[-1].name: # Just modify (it may enter here only in the root node) path[-1].name = c n = path[-1] else: # Create node and append it to the active path n = Node(c, path[-1]) path.append(n) code_to_node[c] = n # Map the code to the node return root, code_to_node
def generate_dublin_core_xml(content): """ Generate an XML string with a Simple Dublin Core Record from a Case Study Metadata Command Content :param content: :return: """ controlled = create_dictionary() for t in metadata_fields: controlled[t[4]] = t s = """<?xml version="1.0"?> <caseStudyMetadata xmlns="http://magic-nexus.org/dmp/" xmlns:dc="http://purl.org/dc/elements/1.1/"> """ for key in content: k = controlled[key][1] if k: for l in content[key]: s += " <dc:" + k + ">" + escape(str(l)) + "</dc:" + k + ">\n" s += "</caseStudyMetadata>\n" return s
def dictionary_from_key_value_list(kvl, state: State = None): """ From a string containing a list of keys and values, return a dictionary Keys must be literals, values can be expressions, to be evaluated at a later moment (syntactic validity of expressions is not checked here) :param kvl: String containing the list of keys and values :except If syntactic problems occur :return: A dictionary """ pairs = kvl.split(",") d = create_dictionary() for p in pairs: k, v = p.split("=", maxsplit=1) if not k: raise Exception( "Each key-value pair must be separated by '=' and key has to be defined, value can be empty: " + kvl) else: try: k = k.strip() v = v.strip() string_to_ast(simple_ident, k) try: # Simplest: string string_to_ast(quotedString, v) v = v[1:-1] except: issues = [] ast = string_to_ast(expression_with_parameters, v) res, unres = ast_evaluator(ast, state, None, issues) if len(unres) == 0: v = res d[k] = v except: raise Exception("Key must be a string: " + k + " in key-value list: " + kvl) return d
def _process_row(self, fields: Dict[str, Any], subrow=None) -> None: """ Create and register Benchmark object :param fields: """ name = fields["benchmark"] benchmark_group = fields["benchmark_group"] stakeholders = fields["stakeholders"] b = self._glb_idx.get(Benchmark.partial_key(name=name)) if len(b) == 1: b = b[0] elif len(b) == 0: b = Benchmark(name, benchmark_group, stakeholders.split(",") if stakeholders else []) self._glb_idx.put(b.key(), b) else: self._add_issue( IType.ERROR, f"There are {len(b)} instances of the Benchmark '{name}'" + subrow_issue_message(subrow)) return # Add range, if not repeated category = fields["category"] if category not in b.ranges: b.ranges[category] = create_dictionary( data=dict(range=fields["range"], unit=fields["unit"], category=category, label=fields["label"], description=fields["description"])) else: self._add_issue( IType.WARNING, f"Range with category '{category}' repeated" + subrow_issue_message(subrow))
def execute(self, state: "State"): def process_line(item): # Read variables mh_src_dataset = item.get("source_dataset", None) mh_src_hierarchy = item.get("source_hierarchy", None) mh_src_code = item.get("source_code", None) mh_dst_hierarchy = item.get("destination_hierarchy", None) mh_dst_code = item.get("destination_code", None) mh_weight = item.get("weight", 1.0) # Mapping name name = ((mh_src_dataset + ".") if mh_src_dataset else "") + mh_src_hierarchy + " -> " + mh_dst_hierarchy if name in mappings: issues.append( Issue(itype=IType.ERROR, description="The mapping '" + name + "' has been declared previously. Skipped.", location=IssueLocation(sheet_name=name, row=r, column=None))) return if name in local_mappings: d = local_mappings[name] else: d = DottedDict() local_mappings[name] = d d.name = name d.origin_dataset = mh_src_dataset d.origin_hierarchy = mh_src_hierarchy d.destination_hierarchy = mh_dst_hierarchy d.mapping = create_dictionary() # Specific code if mh_src_code in d.mapping: to_dict = d.mapping[mh_src_code] else: to_dict = create_dictionary() if mh_dst_code in to_dict: issues.append( Issue(itype=IType.ERROR, description="The mapping of '" + mh_src_code + "' into '" + mh_dst_code + "' has been already defined", location=IssueLocation(sheet_name=name, row=r, column=None))) return else: to_dict[mh_dst_code] = ( mh_weight, r ) # NOTE: This could be an object instead of just a FLOAT or expression d.mapping[mh_src_code] = to_dict issues = [] glb_idx, p_sets, hh, datasets, mappings = get_case_study_registry_objects( state) name = self._content["command_name"] local_mappings = create_dictionary() # Process parsed information for line in self._content["items"]: r = line["_row"] # If the line contains a reference to a dataset or hierarchy, expand it # If not, process it directly is_expansion = False if is_expansion: # TODO Iterate through dataset and/or hierarchy elements, producing a list of new items pass else: process_line(line) # Mappings post-processing for d in local_mappings: # Convert the mapping into: # [{"o": "", "to": [{"d": "", "w": ""}]}] # [ {o: origin category, to: [{d: destination category, w: weight assigned to destination category}] } ] mapping = [] ds_rows = [] # Rows in which a dataset is mentioned for orig in local_mappings[d].mapping: lst = [] for dst in local_mappings[d].mapping[orig]: t = local_mappings[d].mapping[orig][dst] lst.append(dict(d=dst, w=t[0])) if local_mappings[d].origin_dataset: ds_rows.append(t[1]) mapping.append(dict(o=orig, to=lst)) from nexinfosys.ie_imports.data_source_manager import DataSourceManager if local_mappings[d].origin_dataset: if not DataSourceManager.obtain_dataset_source( local_mappings[d].origin_dataset, datasets): for r in ds_rows: issues.append( Issue( itype=IType.ERROR, description= f"The dataset '{local_mappings[d].origin_dataset}' was not found", location=IssueLocation(sheet_name=name, row=r, column=None))) continue dims, attrs, meas = obtain_dataset_metadata( local_mappings[d].origin_dataset, None, datasets) if local_mappings[d].origin_hierarchy not in dims: issues.append( Issue(itype=IType.ERROR, description="The origin dimension '" + local_mappings[d].origin_hierarchy + "' does not exist in dataset '" + local_mappings[d].origin_dataset + "'", location=IssueLocation(sheet_name=name, row=r, column=None))) continue else: dim = dims[local_mappings[d].origin_hierarchy] mapping = fill_map_with_all_origin_categories(dim, mapping) # origin_dataset = local_mappings[d].origin_dataset origin_hierarchy = local_mappings[d].origin_hierarchy destination_hierarchy = local_mappings[d].destination_hierarchy # Create Mapping and add it to Case Study mappings variable mappings[d] = Mapping( d, DataSourceManager.obtain_dataset_source( origin_dataset, datasets), origin_dataset, origin_hierarchy, destination_hierarchy, mapping) # TODO # Use the function to perform many to many mappings, "augment_dataframe_with_mapped_columns" # Put it to work !!! # One or more mapping in sequence could be specified?. The key is "source hierarchy+dest hierarchy" # Read mapping parameters return issues, None
import geojson import urllib from nexinfosys.common.helper import create_dictionary from nexinfosys.model_services import get_case_study_registry_objects from nexinfosys.models.musiasem_concepts import Processor, GeographicReference in_files = create_dictionary() # URL -> (json, idx) def read_geojson(url): """ Read a GeoJSON file and index it by ID :param url: :return: A tuple with the deserialized GeoJSON file and an index of ID to position in the features list """ if url not in in_files: f = urllib.request.urlopen(url) j = geojson.loads(f.read()) id_dict = create_dictionary() for i, f in enumerate(j["features"]): fid = f["id"] id_dict[fid] = i in_files[url] = (j, id_dict) else: j, id_dict = in_files[url] return j, id_dict
def __init__(self, name=None): self._name = name # A name for the scope itself self._registry = create_dictionary()
def __init__(self, d: Dict[str, Any] = None): self._default_namespace = "" self._namespaces = create_dictionary() if d is not None and len(d) > 0: self.update(d)
def map_codelists(src, dst, corresp, dst_tree=False) -> (list, set): """ Obtain map of two code lists If the source is a tree, children of a mapped node are assigned to the same mapped node The same source may be mapped more than once, to different nodes The codes from the source not mapped, are stored in "unmapped" :param src: source full code list :param dst: destination full code list :param corresp: list of tuples with the correspondence :param dst_tree: Is the dst code list a tree? :return: List of tuples (source code, target code), set of unmapped codes """ def assign(n: str, v: str): """ Assign a destination code name to a source code name If the source has children, assign the same destination to children, recursively :param n: Source code name :param v: Destination code name :return: """ mapped.add(n, v) if n in unmapped: unmapped.remove(n) for c in cn_src[n].children: assign(c.name, v) unmapped = set(src) r_src, cn_src = convert_code_list_to_hierarchy(src, as_list=True) if dst_tree: r_dst, cn_dst = convert_code_list_to_hierarchy(dst) else: cn_dst = create_dictionary() for i in dst: cn_dst[i] = None # Simply create the entry mapped = create_dictionary(multi_dict=True) # MANY TO MANY for t in corresp: if t[0] in cn_src and t[1] in cn_dst: # Check that t[1] is a leaf node. If not, ERROR if isinstance(cn_dst[t[1]], Node) and len(cn_dst[t[1]].children) > 0: # TODO ERROR: the target destination code is not a leaf node pass else: # Node and its children (recursively) correspond to t[1] assign(t[0], t[1]) for k in sorted(unmapped): print("Unmapped: " + k) # for k in sorted(r): # print(k+" -> "+r[k]) # Convert mapped to a list of tuples # Upper case mapped_lst = [] for k in mapped: for i in mapped.getall(k): mapped_lst.append((k, i)) return mapped_lst, unmapped
def parse_data_input_command(sh: Worksheet, area: AreaTupleType, processors_type: str, state=None) -> IssuesLabelContentTripleType: """ Scans the "area" of input worksheet "sh" where it is assumed a "data input" command is present. It obtains a list of observations, a list of processors, a list of observables, a list of tags All those are represented in JSON format :param sh: Input worksheet :param area: Tuple (top, bottom, left, right) representing the rectangular area of the input worksheet where the command is present :param processors_type: Name for the type of processors. Also label of the command :param state: Transient state useful for checking existence of variables :return: DataInputCommand, list of issues (issue_type, message) """ some_error = False issues = [] # Define a set of observations (qualified quantities) of observables # This set can be replicated. So, ?how to refer to each replica? # Regular expression, internal name, Mandatory (True|False) known_columns = [ (r"Name|Processor[_ ]name", "processor", False), (r"Level", "level", False), (r"Parent", "parent", False), (r"FF[_ ]type", "ff_type", True), (r"Var|Variable", "factor", True), (r"Value|NUSAP\.N", "value", False), # If value is not specified, then just declare the Factor (r"Unit|NUSAP\.U", "unit", True), # If blank, a dimensionless amount is assumed (r"Relative[_ ]to", "relative_to", False), (r"Uncertainty|Spread|NUSAP\.S", "uncertainty", False), (r"Assessment|NUSAP\.A", "assessment", False), (r"Pedigree[_ ]matrix|NUSAP\.PM", "pedigree_matrix", False), (r"Pedigree|NUSAP\.P", "pedigree", False), (r"Time|Date", "time", False), (r"Geo|Geolocation", "geolocation", False), (r"Source", "source", False), (r"Comment|Comments", "comments", False) ] label = "Processors " + processors_type # First, examine columns, to know which fields are being specified # Special cases: # Open columns: the field is specified in the cell togheter with the value. Like "attr1=whatever", instead of a header "attr1" and in a row below, a value "whatever" # Complex values: the value has syntactic rules. Like expressions for both quantities AND qualities (like NUSAP) # References: the field refers to additional information in another worksheet. Unique names or ref holder (worksheet name) plus ref inside the worksheet, would be allowed. Also ref type can disambiguate mandatory = {t[1]: t[2] for t in known_columns} cre = { } # Column Regular Expression dictionary (K: regular expression; V: RegularExpression object) if not case_sensitive: flags = re.IGNORECASE else: flags = 0 for kc in known_columns: cre[kc[0]] = re.compile(kc[0], flags=flags) col_names = {} standard_cols = { } # Internal (standardized) column name to column index in the worksheet (freedom in the order of columns) attribute_cols = create_dictionary( ) # Not recognized columns are considered freely named categories, attributes or tags attributes = [ ] # List of attributes or tags (keys of the previous dictionary) col_allows_dataset = create_dictionary( ) # If the column allows the reference to a dataset dimension for c in range(area[2], area[3]): col_name = sh.cell(row=area[0], column=c).value if not col_name: continue col_name = col_name.replace("\n", " ") col_names[c] = col_name # Match found = False for kc in known_columns: res = cre[kc[0]].search(col_name) if res: if kc[1] in standard_cols: issues.append( (2, "Cannot repeat column name '" + col_name + "' (" + kc[0] + ") in data input command '" + processors_type + "'")) else: standard_cols[kc[1]] = c col_names[c] = kc[ 1] # Override column name with pseudo column name for standard columns if col_names[c].lower() in [ "factor", "value", "time", "geolocation" ]: col_allows_dataset[col_names[c]] = True else: col_allows_dataset[col_names[c]] = False found = True break if not found: if col_name not in attribute_cols: # TODO Check valid col_names. It must be a valid Variable Name attribute_cols[col_name] = c attributes.append(col_name) col_allows_dataset[col_name] = True else: issues.append( (2, "Cannot repeat column name '" + col_name + "' in data input command '" + processors_type + "'")) del cre # Check if there are mandatory columns missing # TODO There could be combinations of columns which change the character of mandatory of some columns # TODO For instance, if we are only specifying structure, Value would not be needed print("BORRAME - " + str(known_columns)) print("BORRAME 2 - " + str(standard_cols)) for kc in known_columns: # "kc[2]" is the flag indicating if the column is mandatory or not # col_map contains standard column names present in the worksheet if kc[2] and kc[1] not in standard_cols: some_error = True issues.append((3, "Column name '" + kc[0] + "' must be specified in data input command '" + processors_type + "'")) # If there are errors, do not continue if some_error: return issues, label, None processor_attribute_exclusions = create_dictionary() processor_attribute_exclusions[ "scale"] = None # Exclude these attributes when characterizing the processor processor_attributes = [ t for t in attributes if t not in processor_attribute_exclusions ] # SCAN rows lst_observations = [ ] # List of ALL observations. -- Main outcome of the parse operation -- set_pedigree_matrices = create_dictionary() # List of pedigree templates set_processors = create_dictionary() # List of processor names set_factors = create_dictionary() # List of factors set_taxa = create_dictionary( ) # Dictionary of taxa with their lists of values. Useful to return CODE LISTS set_referenced_datasets = create_dictionary( ) # Dictionary of datasets to be embedded into the result (it is a job of the execution part) processors_taxa = create_dictionary( ) # Correspondence "processor" -> taxa (to avoid changes in this correspondence) dataset_column_rule = parser_field_parsers.dataset_with_column values = [None] * area[3] # LOOP OVER EACH ROW for r in range(area[0] + 1, area[1]): # Scan rows (observations) # Each row can specify: the processor, the factor, the quantity and qualities about the factor in the processor # It can also specify a "flow+containment hierarchy" relation row = {} # Store parsed values of the row taxa = create_dictionary() # Store attributes or taxa of the row referenced_dataset = None # Once defined in a row, it cannot change!! # Scan the row first, looking for the dataset. The specification is allowed in certain columns: # attribute_cols and some standard_cols already_processed = create_dictionary() for c in range(area[2], area[3]): if c in col_names: value = sh.cell(row=r, column=c).value if isinstance(value, str) and value.startswith("#"): col_name = col_names[c] if col_allows_dataset[col_name]: if not referenced_dataset: try: ast = parser_field_parsers.string_to_ast( dataset_column_rule, value[1:]) if len(ast["parts"]) == 2: referenced_dataset = ast["parts"][0] # Remove the dataset variable. It will be stored in "_referenced_dataset" value = "#" + ast["parts"][1] else: some_error = True issues.append(( 3, "The first dataset reference of the row must contain the " "dataset variable name and the dimension name, row " + str(r))) # Mark as processed already_processed[col_name] = None except: some_error = True issues.append( (3, "Column '" + col_name + "' has an invalid dataset reference '" + value + "', in row " + str(r))) else: try: ast = parser_field_parsers.string_to_ast( simple_ident, value[1:]) # Mark as processed already_processed[col_name] = None except: some_error = True issues.append( (3, "Column '" + col_name + "' has an invalid dataset reference '" + value + "', in row " + str(r))) if col_name in standard_cols: row[col_name] = value else: taxa[col_name] = value values[c] = value # TODO If the flow type is decomposed, compose it first for c in standard_cols: if c in already_processed: continue value = values[standard_cols[c]] # != "" or not if value is None or (value is not None and value == ""): if c == "unit": value = "-" if not value: if mandatory[c]: some_error = True issues.append( (3, "Column '" + c + "' is mandatory, row " + str(r))) continue # Skip the rest of the iteration! # Parse the value if c in ["processor", "factor"]: # Check that it is a variable name, and allow hierarchical names parser_field_parsers.string_to_ast( parser_field_parsers.simple_h_name, value) elif c == "pedigree_matrix": parser_field_parsers.string_to_ast( parser_field_parsers.simple_ident, value) elif c == "relative_to": # Two elements, the first a hierarchical name, the second a unit name s = value.split(" ") if len(s) != 2: some_error = True issues.append(( 3, "The Relative To value has to have two parts, factor name and unit, separated by a whitespace (specified '" + value + "'), in row " + str(r))) else: try: parser_field_parsers.string_to_ast( parser_field_parsers.simple_h_name, s[0]) except: some_error = True issues.append(( 3, "The name specified for the relative to factor '" + s[0] + "' is not valid, in row " + str(r))) # It must be a recognized unit. Check with Pint try: ureg(s[1]) ureg.parse_unit_name(s[1], case_sensitive) except UndefinedUnitError: some_error = True issues.append(( 3, "The unit name '" + s[1] + "' is not registered in the units processing package, in row " + str(r))) elif c == "level": # A valid level name try: parser_field_parsers.string_to_ast( parser_field_parsers.level_name, value) except: some_error = True issues.append((3, "The level '" + value + "' syntax is not valid, in row " + str(r))) elif c == "parent": # Check that value is a valid parent name. It can be either a list of tags OR # a processor name, something defining a single processor try: parser_field_parsers.string_to_ast( parser_field_parsers.simple_h_name, value) except: try: parser_field_parsers.string_to_ast( parser_field_parsers.named_parameters_list, value) except: some_error = True issues.append((3, "Could not parse '" + value + "' as 'parent' in row " + str(r))) elif c == "ff_type": # The type of flow/fund must be one of a set of possible values. DEFINE THE LIST if value.lower() not in allowed_ff_types: some_error = True issues.append( (3, "ff_type must be one of :" + ', '.join(allowed_ff_types) + ", in row " + str(r))) elif c == "value": if not isinstance(value, str): value = str(value) # Expression allowed. Check syntax only. It can refer to parameters. ast = parser_field_parsers.string_to_ast( parser_field_parsers.expression, value) # TODO Check existence of used variables # TODO basic_elements_parser.ast_evaluator(ast, state, None, issues, "static") elif c == "unit": # It must be a recognized unit. Check with Pint try: value = value.replace("€", "Euro").replace("$", "Dollar") if value == "-": value = "" # Dimensionless ureg(value) ureg.parse_unit_name(value, case_sensitive) except: some_error = True issues.append(( 3, "The unit name '" + value + "' is not registered in the units processing package, in row " + str(r))) elif c == "uncertainty": # TODO It must be a valid uncertainty specifier pass elif c == "assessment": # See page 135 of Funtowicz S., Ravetz J., "Uncertainty and Quality in Science for Policy" # "c" is "cognitive" assessment, "p" is pragmatic assessment. allowed = [ "nil", "low", "medium", "high", "total", "nil_c", "low_c", "medium_c", "high_c", "total_c", "nil_p", "low_p", "medium_p", "high_p", "total_p" ] if value and value.lower() not in allowed: issues.append((3, "Assessment must be empty or one of: " + ", ".join(allowed))) elif c == "pedigree": # A valid pedigree specification is just an integer try: int(value) except: issues.append((3, "The pedigree specification '" + value + "' must be an integer")) elif c == "time": # A valid time specification. Possibilities: Year, Month-Year / Year-Month, Time span (two dates) if not isinstance(value, str): value = str(value) ast = parser_field_parsers.string_to_ast( parser_field_parsers.time_expression, value) elif c == "geolocation": # A reference to a geolocation try: parser_field_parsers.string_to_ast( parser_field_parsers.reference, value) except: some_error = True issues.append((3, "The geolocation must be a reference")) elif c == "source": # Who or what provided the information. It can be formal or informal. Formal can be references (but evaluated later) pass elif c == "comments": # Free text pass # Store the parsed value row[c] = value for c in attribute_cols: if c in already_processed: continue value = values[attribute_cols[c]] # != "" or not if not value: taxa[c] = None continue # Skip the rest of the iteration! # TODO Check value. Valid identifier, no whitespace # Validate "value", it has to be a simple ID try: if not isinstance(value, str): value = str(value) parser_field_parsers.simple_ident.parseString(value, parseAll=True) except: value = None some_error = True issues.append(( 3, "The value in column '" + c + "' has to be a simple identifier: start with letter, then letters, numbers and '_', no whitespace, in row " + str(r))) taxa[c] = value # Disable the registration of taxa. If a Dataset reference is used, there is no way to register # taxa at parse time (the dataset is still not obtained). Leave it for the execution if c not in set_taxa: set_taxa[c] = create_dictionary() if value is not None: set_taxa[c][value] = None # Now that individual columns have been parsed, do other things if referenced_dataset: row["_referenced_dataset"] = referenced_dataset # If "processor" not specified, concatenate taxa columns in order to generate an automatic name # (excluding the processor type) p_taxa = taxa.copy() for k in processor_attribute_exclusions: if k in p_taxa: del p_taxa[k] if "processor" not in row: row["processor"] = "_".join( [str(taxa[t]) for t in processor_attributes] ) # TODO Which order? (the current is "order of appearance"; maybe "alphabetical order" would be better option) # Add as "taxa" the processor type (which is an optional input parameter to this function) if processors_type: taxa["_processors_type"] = processors_type # Store taxa (attributes and taxa) row["taxa"] = taxa # Store taxa if the processor still does not have it if row["processor"] not in processors_taxa: processors_taxa[row[ "processor"]] = p_taxa # "::".join([taxa[t] for t in lst_taxa_cols]) else: # Taxa should be the same for each "processor". Error if different t = processors_taxa[row["processor"]] if t != p_taxa: issues.append( (3, "The processor '" + row["processor"] + "' has different taxa assigned, in row " + str(r))) # Register new processor names, pedigree templates, and variable names if "processor" in row: set_processors[row["processor"]] = None if "pedigree_matrix" in row: set_pedigree_matrices[row["pedigree_matrix"]] = None if "factor" in row: set_factors[row["factor"]] = None if referenced_dataset: set_referenced_datasets[referenced_dataset] = None lst_observations.append(row) content = { "factor_observations": lst_observations, "processor_attributes": processor_attributes, "processors": [k for k in set_processors], "pedigree_matrices": [k for k in set_pedigree_matrices], "factors": [k for k in set_factors], "referenced_datasets": [ds for ds in set_referenced_datasets], "code_lists": {k: [k2 for k2 in set_taxa[k]] for k in set_taxa} } return issues, label, content
def get_interfaces(glb_idx: PartialRetrievalDictionary) -> pd.DataFrame: # Used to examine "value" as expression, and find variables that are interface names vs parameter names params = create_dictionary( data={p.name: None for p in glb_idx.get(Parameter.partial_key())}) s = State() procs = glb_idx.get(Processor.partial_key()) d = {} for p in procs: parent_relations = glb_idx.get( ProcessorsRelationPartOfObservation.partial_key(child=p)) d[p.ident] = set([p.parent_processor.ident for p in parent_relations]) lst = [[ "Processor", "InterfaceType", "Interface", "Sphere", "RoegenType", "Orientation", "OppositeSubsystemType", "GeolocationRef", "GeolocationCode", "InterfaceAttributes", "Value", "Unit", "RelativeTo", "Uncertainty", "Assessment", "PedigreeMatrix", "Pedigree", "Time", "Source", "NumberAttributes", "Comments" ]] # Elaborate a DAG, then iterate over it for ident in list(toposort.toposort_flatten(d)): p = glb_idx.get(Processor.partial_key(ident=ident))[0] ifaces = glb_idx.get((Factor.partial_key(processor=p))) iface_names = create_dictionary( data={iface.name: iface for iface in ifaces}) # Elaborate DAG of Interfaces because of Observations d = {} for iface in ifaces: if iface.ident not in d: d[iface.ident] = set() for obs in iface.quantitative_observations: if obs.relative_factor: d[iface.ident].add(obs.relative_factor.ident) # Consider obs.value and non linear dependencies if isinstance(obs.value, str): ast = string_to_ast(expression_with_parameters, obs.value) evaluation_issues = [] value, unresolved_vars = ast_evaluator( exp=ast, state=s, obj=None, issue_lst=evaluation_issues) for unresolved in unresolved_vars: if unresolved not in params: d[iface.ident].add(iface_names[unresolved].ident) for ident2 in list(toposort.toposort_flatten(d)): iface = glb_idx.get(Factor.partial_key(ident=ident2))[0] lst1 = [ iface.processor.name, iface.taxon.name, iface.name, iface.sphere, iface.roegen_type.name, iface.orientation, iface.opposite_processor_type, "", "", "" ] observations = iface.quantitative_observations if len(observations) > 0: for obs in observations: lst2 = [ obs.value, obs.attributes.get("unit", ""), obs.relative_factor.name if obs.relative_factor else "", obs.attributes.get("spread", ""), obs.attributes.get("assessment", ""), obs.attributes.get("pedigree_template", ""), obs.attributes.get("pedigree", ""), obs.attributes.get("time", ""), obs.observer.name if obs.observer else "", "", obs.attributes.get("comments", "") ] lst.append(lst1 + lst2) else: lst.append(lst1 + ["", "", "", "", "", "", "", "", "", ""]) return list_to_dataframe(lst)
def get_dataset_structure(self, database, dataset) -> Dataset: """ Obtain the structure of a dataset: concepts, dimensions, attributes and measures """ refs = dict(references='all') dsd_response = estat.datastructure("DSD_" + dataset, params=refs) dsd = dsd_response.datastructure["DSD_" + dataset] metadata = dsd_response.write() # SDMXConcept = collections.namedtuple('Concept', 'type name istime description code_list') # DataSource <- Database <- DATASET <- Dimension(s) (including Measures) <- CodeList # | # v # Concept <- CodeList (NOT CONSIDERED NOW) ds = Dataset() ds.code = dataset ds.description = None # How to get description? ds.attributes = { } # Dataset level attributes? (encode them using a dictionary) ds.metadata = None # Metadata for the dataset SDMX (flow, date of production, etc.) ds.database = database # Reference to containing database dims = {} for d in dsd.dimensions: istime = str( dsd.dimensions.get(d)).split("|")[0].strip() == "TimeDimension" dd = Dimension() dd.code = d dd.description = None dd.attributes = None dd.is_time = istime dd.is_measure = False dd.dataset = ds dims[d] = dd for m in dsd.measures: dd = Dimension() dd.code = m dd.description = None dd.attributes = None dd.is_time = False dd.is_measure = True dd.dataset = ds dims[m] = dd for a in dsd.attributes: ds.attributes[a] = None # TODO Get the value for l in metadata.codelist.index.levels[0]: first = True # Read code lists cl = create_dictionary() for m, v in list( zip(metadata.codelist.loc[l].index, metadata.codelist.loc[l]["name"])): if not first: cl[m] = v.replace("\n", ";") else: first = False # Attach it to the Dimension or Measure if metadata.codelist.loc[l]["dim_or_attr"][0] == "D": # Build Code List from dictionary dims[l].code_list = CodeList.construct( l, None, [""], [CodeImmutable(k, cl[k], "", []) for k in cl]) return ds
def parse_etl_external_dataset_command(sh: Worksheet, area: AreaTupleType, dataset_name: str, state) -> IssuesLabelContentTripleType: """ Check that the syntax of the input spreadsheet is correct Return the analysis in JSON compatible format, for execution :param sh: Input worksheet :param area: Area of the input worksheet to be analysed :return: The command in a dict-list object (JSON ready) """ def obtain_column(cn, r1, r2): """ Obtain a list with the values of a column, in the range of rows [r1, r2) :param cn: Column number :param r1: Starting row :param r2: End+1 row :return: list with the cell values """ lst = [] for row in range(r1, r2): value = sh.cell(row=row, column=cn).value if value is None: continue if isinstance(value, str): lst.append(value.strip()) else: lst.append(value) return lst issues = [] # Global variables (at parse time they may not be defined, so process carefully...) glb_idx, p_sets, hh, datasets, mappings = get_case_study_registry_objects( state) # Dataset source from nexinfosys.ie_imports.data_source_manager import DataSourceManager source = DataSourceManager.obtain_dataset_source(dataset_name, datasets) # Obtain metadata dims, attrs, meas = obtain_dataset_metadata(dataset_name, source, datasets) # Load all code lists in a temporary dictionary of sets # Also check if there is a TIME dimension in the dataset cl = create_dictionary() we_have_time = False for d in dims: if dims[d].code_list: cl[d] = [k.lower() for k in dims[d].code_list.keys()] # Attach the code list else: cl[d] = None # No code list (TIME_PERIOD for instance) if dims[d].istime: we_have_time = True # Add matching mappings as more dimensions for m in mappings: if strcmp(mappings[m].source, source) and \ strcmp(mappings[m].dataset, dataset_name) and \ mappings[m].origin in dims: # Add a dictionary entry for the new dimension, add also the codes present in the map tmp = [ to["d"] for o in mappings[m].map for to in o["to"] if to["d"] ] cl[mappings[m].destination] = set( tmp) # [t[1] for t in mappings[m].map] # Scan columns for Dimensions, Measures and Aggregation. # Pivot Table is a Visualization, so now it is not in the command, there will be a command aside. # TODO The result COULD be an automatic BI cube (with a separate field) # TODO - Write into a set of tables in Mondrian # TODO - Generate Schema for Mondrian # TODO - Write the Schema for Mondrian measures = [] out_dims = [] agg_funcs = [] measures_as = [] filter_ = { } # Cannot use "create_dictionary()" because CaseInsensitiveDict is NOT serializable (which is a requirement) result_name = None # By default, no name for the result. It will be dynamically obtained for c in range(area[2], area[3]): col_name = sh.cell(row=1, column=c).value if not col_name: continue if col_name.lower().strip() in [ "dimensions_kept", "dims", "dimensions" ]: # "GROUP BY" lst = obtain_column(c, area[0] + 1, area[1]) for d in lst: if not d: continue if d not in cl: issues.append(( 3, "The dimension specified for output, '" + d + "' is neither a dataset dimension nor a mapped dimension. [" + ', '.join([d2 for d2 in cl]) + "]")) else: out_dims.append(d) elif col_name.lower().strip() in [ "aggregation_function", "aggfunc", "agg_func" ]: # "SELECT AGGREGATORS" lst = obtain_column(c, area[0] + 1, area[1]) for f in lst: if f.lower() not in [ "sum", "avg", "count", "sumna", "countav", "avgna", "pctna" ]: issues.append(( 3, "The specified aggregation function, '" + f + "' is not one of the supported ones: 'sum', 'avg', 'count', 'sumna', 'avgna', 'countav', 'pctna'" )) else: agg_funcs.append(f) elif col_name.lower().strip() in ["measures"]: # "SELECT" lst = obtain_column(c, area[0] + 1, area[1]) # Check for measures # TODO (and attributes?) for m in lst: if not m: continue if m not in meas: issues.append( (3, "The specified measure, '" + m + "' is not a measure available in the dataset. [" + ', '.join([m2 for m2 in measures]) + "]")) else: measures.append(m) elif col_name.lower().strip() in ["measuresas"]: # "AS <name>" lst = obtain_column(c, area[0] + 1, area[1]) for m in lst: measures_as.append(m) elif col_name in cl: # A dimension -> "WHERE" # Check codes, and add them to the "filter" lst = obtain_column(c, area[0] + 1, area[1]) for cd in lst: if not cd: continue if str(cd).lower() not in cl[col_name]: issues.append(( 3, "The code '" + cd + "' is not present in the codes declared for dimension '" + col_name + "'. Please, check them.")) else: if col_name not in filter_: lst2 = [] filter_[col_name] = lst2 else: lst2 = filter_[col_name] lst2.append(cd) elif we_have_time and col_name.lower() in [ "startperiod", "endperiod" ]: # SPECIAL "WHERE" FOR TIME # TODO Instead, should use a single column, "Time", using the interval syntax of the Time column in the Data Input command # Interval of time periods lst = obtain_column(c, area[0] + 1, area[1]) if len(lst) > 0: filter_[col_name] = lst[ 0] # In this case it is not a list, but a number or string !!!! elif col_name.lower() in ["result_name", "result name", "resultname"]: lst = obtain_column(c, area[0] + 1, area[1]) if len(lst) > 0: result_name = lst[0] try: parser_field_parsers.string_to_ast(simple_ident, result_name) except: issues.append((3, "Column '" + col_name + "' has an invalid dataset name '" + result_name + "'")) if len(measures) == 0: issues.append((3, "At least one measure should be specified")) if len(agg_funcs) == 0: issues.append( (2, "No aggregation function specified. Assuming 'average'")) agg_funcs.append("average") if not result_name: result_name = source + "_" + dataset_name issues.append( (2, "No result name specified. Assuming '" + result_name + "'")) content = { "dataset_source": source, "dataset_name": dataset_name, "dataset_datetime": None, "where": filter_, "dimensions": [d for d in dims], "group_by": out_dims, "measures": measures, "agg_funcs": agg_funcs, "measures_as": measures_as, "result_name": result_name } return issues, None, content