class ReceiverRDFService(object): """ Service that gets the _xml input from the html request, generates RDF triples and store them in Virtuoso triple store """ def __init__(self, content): self.parser = Parser(content) self.time = dt.datetime.now() def run_service(self, graph, host, api, graph_uri, user_ip): """Run the ReceiverRDFService :param graph: RDF graph. :param graph_uri: RDF graph URI to be stored in the triple store. :param host: Triple store host. :param api: Triple store authentication api. :param user_ip: IP from the invoker client. :returns: RDF graph. """ print "Running RDF service..." bind_namespaces(graph) # self._add_observations_triples(graph) # self._add_indicators_triples(graph) # self._add_area_triples_from_observations(graph) # self._add_area_triples_from_slices(graph) # self._add_computation_triples(graph) # self._add_data_source_triples(graph) self._add_catalog_triples(graph) self._add_dataset_triples(graph) self._add_distribution_triples(graph) # self._add_licenses_triples(graph) # self._add_organizations_triples(graph) # self._add_region_triples(graph) # self._add_slices_triples(graph) # self._add_upload_triples(graph, user_ip) # self._add_users_triples(graph) # self._add_topics_triples(graph) # self._add_dates_triples(graph) self._serialize_rdf_xml(graph) self._serialize_turtle(graph) self._load_data_set(graph_uri=graph_uri, host=host, api=api) return graph def _add_catalog_triples(self, graph): lp_catalog = "landportal-catalog" dataset_id = self.parser.get_dataset().id graph.add((base.term(lp_catalog), RDF.type, dcat.term("Catalog"))) graph.add((base.term(lp_catalog), dct.term("title"), Literal("Land Portal Catalog"))) graph.add((base.term(lp_catalog), RDFS.label, Literal("Land Portal Catalog", lang="en"))) graph.add((base.term(lp_catalog), foaf.term("homepage"), Literal("<http://5.9.221.11/book/catalog>"))) graph.add((base.term(lp_catalog), dcat.term("dataset"), base.term(dataset_id))) return graph def _add_dataset_triples(self, graph): dataset = self.parser.get_dataset() lic = self.parser.get_license() dsource = self.parser.get_datasource() graph.add((base.term(dataset.id), RDF.type, qb.term("DataSet"))) graph.add((base.term(dataset.id), RDF.type, dcat.term(Literal("Dataset")))) graph.add((base.term(dataset.id), sdmx_concept.term("freq"), sdmx_code.term(dataset.sdmx_frequency))) graph.add((base.term(dataset.id), dct.term("accrualPeriodicity"), sdmx_code.term(dataset.sdmx_frequency))) graph.add((base.term(dataset.id), lb.term("license"), URIRef(lic.url))) graph.add((base.term(dataset.id), lb.term("dataSource"), base_dsource.term(dsource.dsource_id))) graph.add((base.term(dataset.id), dct.term("issued"), Literal(dt.datetime.now().strftime("%Y-%m-%d"), datatype=XSD.date))) return graph def _add_distribution_triples(self, graph): file_name, file_extension = os.path.splitext(self.parser.get_file_name()) file_extension = file_extension.replace(".", "") dataset_id = self.parser.get_dataset().id dist_id = dataset_id + "-" + file_extension file_type = guess_type(self.parser.get_file_name()) file_size = os.path.getsize(config.RAW_FILE) graph.add((base.term(dist_id), RDF.type, dcat.term("Distribution"))) graph.add((base.term(dist_id), dcat.term("downloadURL"), Literal(config.CKAN_INSTANCE + "dataset/" + dataset_id.lower()))) graph.add((base.term(dist_id), dct.term("title"), Literal(file_extension.upper() + " distribution of dataset " + dataset_id, lang="en"))) graph.add((base.term(dist_id), dcat.term("mediaType"), Literal(file_type[0]))) graph.add((base.term(dist_id), dcat.term("byteSize"), Literal(file_size, datatype=XSD.decimal))) return graph def _add_dates_triples(self, graph): for obs in self.parser.get_observations(): time_value = obs.ref_time term_object = base_time.term(time_value.value) graph.add((term_object, RDF.type, time.term('DateTimeInterval'))) if isinstance(time_value, model.YearInterval): self._add_triples_of_year_interval(time_value, graph) elif isinstance(time_value, model.MonthInterval): self._add_triples_of_month_interval(time_value, graph) elif isinstance(time_value, model.Interval): self._add_triples_of_interval(time_value, graph) else: print "Unrecognized type of date: " + type(time_value) return graph def _add_triples_of_year_interval(self, time_object, graph): #base term term_object = base_time.term(time_object.value) #beginning and end graph.add((term_object, time.term("hasBeginning"), self._term_of_an_instant(time_object.year, 1, 1, graph))) graph.add((term_object, time.term("hasEnd"), self._term_of_an_instant(time_object.year, 12, 31, graph))) #DateTimeDescription date_time_desc_term = base_time.term(time_object.value + "_desc") graph.add((term_object, time.term("hasDateTimeDescription"), time.term(date_time_desc_term))) graph.add((date_time_desc_term, time.term("year"), Literal(str(time_object.year), datatype=XSD.gYear))) graph.add((date_time_desc_term, time.term("unitType"), time.term("unitYear"))) #Our ontology properties graph.add((term_object, base_time.term("year"), Literal(str(time_object.year), datatype=XSD.int))) def _add_triples_of_month_interval(self, time_object, graph): #base term term_object = base_time.term(time_object.value) #beginning and end graph.add((term_object, time.term("hasBeginning"), self._term_of_an_instant(time_object.year, time_object.month, 1, graph))) graph.add((term_object, time.term("hasEnd"), self._term_of_an_instant(time_object.year, time_object.month, 31, graph))) #DateTimeDescription date_time_desc_term = base_time.term(time_object.value + "_desc") graph.add((term_object, time.term("hasDateTimeDescription"), time.term(date_time_desc_term))) graph.add((date_time_desc_term, time.term("year"), Literal(str(time_object.year), datatype=XSD.gYear))) graph.add((date_time_desc_term, time.term("month"), Literal("--" + str(time_object.month), datatype=XSD.gMonth))) graph.add((date_time_desc_term, time.term("unitType"), time.term("unitMonth"))) #Our ontology properties graph.add((term_object, base_time.term("year"), Literal(str(time_object.year), datatype=XSD.int))) graph.add((term_object, base_time.term("month"), Literal(str(time_object.month), datatype=XSD.int))) def _add_triples_of_interval(self, time_object, graph): #base term term_object = base_time.term(time_object.value) #beginning and end graph.add((term_object, time.term("hasBeginning"), self._term_of_an_instant(time_object.start_time.year, 1, 1, graph))) graph.add((term_object, time.term("hasEnd"), self._term_of_an_instant(time_object.end_time.year, 12, 31, graph))) @staticmethod def _term_of_an_instant(year, month, day, graph): instant_term = base_time.term("instant_" + str(year) + "_" + str(month) + "_" + str(day)) graph.add((instant_term, RDF.type, time.term("Instant"))) graph.add((instant_term, time.term("inXSDDateTime"), Literal(str(year) + "-" + str(month) + "-" + str(day) + "T00:00:00Z"))) #2011-12-24T14:24:05Z return instant_term def _add_observations_triples(self, graph): """ """ print "Adding observations..." for obs in self.parser.get_observations(): region = self._get_area_code(obs) graph.add((base_obs.term(obs.id), RDF.type, qb.term("Observation"))) graph.add((base_obs.term(obs.id), cex.term("ref-time"), base_time.term(obs.ref_time.value))) graph.add((base_obs.term(obs.id), cex.term("ref-area"), base.term(region))) graph.add((base_obs.term(obs.id), cex.term("ref-indicator"), base_ind.term(obs.indicator_id))) if not obs.value.obs_status == "obsStatus-M": if float(obs.value.value) % 1 != 0: graph.add((base_obs.term(obs.id), cex.term("value"), Literal(obs.value.value, datatype=XSD.double))) else: graph.add((base_obs.term(obs.id), cex.term("value"), Literal(int(float(obs.value.value)), datatype=XSD.integer))) graph.add((base_obs.term(obs.id), cex.term("computation"), cex.term(str(obs.computation.uri)))) graph.add((base_obs.term(obs.id), dct.term("issued"), Literal(obs.issued.timestamp.strftime("%Y-%m-%d"), datatype=XSD.date))) graph.add((base_obs.term(obs.id), qb.term("dataSet"), base.term(obs.dataset_id))) graph.add((base_obs.term(obs.id), qb.term("slice"), base.term(str(obs.slice_id)))) graph.add((base_obs.term(obs.id), RDFS.label, Literal("Observation of " + str(region) + " within the period of " + str(obs.ref_time.value) + " for indicator " + str(obs.indicator_id), lang='en'))) graph.add((base_obs.term(obs.id), sdmx_concept.term("obsStatus"), sdmx_code.term(obs.value.obs_status))) return graph def _add_indicators_triples(self, graph): print "Adding indicators..." for ind in self.parser.get_simple_indicators(): graph.add((base_ind.term(ind.id), RDF.type, cex.term("Indicator"))) graph.add((base_ind.term(ind.id), lb.term("preferable_tendency"), cex.term(ind.preferable_tendency))) graph.add((base_ind.term(ind.id), lb.term("measurement"), Literal(ind.measurement_unit.name))) graph.add((base_ind.term(ind.id), lb.term("last_update"), Literal(self.time.strftime("%Y-%m-%d"), datatype=XSD.date))) graph.add((base_ind.term(ind.id), lb.term("starred"), Literal(ind.starred, datatype=XSD.Boolean))) graph.add((base_ind.term(ind.id), lb.term("topic"), base_topic.term(ind.topic_id))) graph.add((base_ind.term(ind.id), lb.term("indicatorType"), cex.term(ind.type))) graph.add((base_ind.term(ind.id), RDFS.label, Literal(ind.translations[0].name, lang='en'))) graph.add((base_ind.term(ind.id), RDFS.comment, Literal(ind.translations[0].description, lang='en'))) return graph def _add_slices_triples(self, graph): print "Adding slices..." for slc in self.parser.get_slices(): graph.add((base_slice.term(slc.id), RDF.type, qb.term("Slice"))) graph.add((base_slice.term(slc.id), cex.term("indicator"), base.term(str(slc.indicator_id)))) for obs in self.parser.get_observations(): if obs.slice_id == slc.id: graph.add((base_slice.term(slc.id), qb.term("observation"), base.term(str(obs.id)))) if slc.region_code is not None: dimension = slc.region_code elif slc.country_code is not None: dimension = slc.country_code else: dimension = slc.dimension.value graph.add((base_slice.term(slc.id), lb.term("dimension"), lb.term(dimension))) graph.add((base_slice.term(slc.id), qb.term("dataSet"), base.term(slc.dataset_id))) return graph def _add_users_triples(self, graph): print "Adding users..." usr = self.parser.get_user() organization = self.parser.get_organization() graph.add((base.term(usr.id), RDF.type, FOAF.Person)) graph.add((base.term(usr.id), RDFS.label, Literal(usr.id, lang='en'))) graph.add((base.term(usr.id), FOAF.name, Literal(usr.id))) graph.add((base.term(usr.id), FOAF.account, Literal(usr.id))) graph.add((base.term(usr.id), org.term("memberOf"), base_org.term(str(organization.id)))) return graph def _add_organizations_triples(self, graph): print "Adding organizations..." organization = self.parser.get_organization() graph.add((base_org.term(organization.id), RDF.type, org.term("Organization"))) graph.add((base_org.term(organization.id), RDFS.label, Literal(organization.name, lang='en'))) graph.add((base_org.term(organization.id), FOAF.homepage, Literal(organization.url))) return graph def _add_topics_triples(self, graph): for ind in self.parser.get_simple_indicators(): self._add_topic(graph, ind) return graph @staticmethod def _add_topic(graph, indicator): from create_db import MetadataPopulator topic_id = indicator.topic_id topic_label = "" for tp in MetadataPopulator.get_topics(): if topic_id == tp.id: topic_label = tp.translations[0].name graph.add((base_topic.term(topic_id), RDF.type, lb.term("Topic"))) graph.add((base_topic.term(topic_id), RDFS.label, Literal(topic_label, lang='en'))) return topic_id def _add_area_triples_from_slices(self, graph): slices = self.parser.get_slices() for slc in slices: if slc.country_code: self._add_country(graph, slc) elif slc.region_code: self._add_region(graph, slc) return graph def _add_area_triples_from_observations(self, graph): observations = self.parser.get_observations() for obs in observations: if obs.country_code: self._add_country(graph, obs) elif obs.region_code: self._add_region(graph, obs) return graph @staticmethod def _add_country(graph, arg): code = arg.country_code country_list_file = config.COUNTRY_LIST_FILE country_id = "" country_name = "" iso2 = "" fao_uri = "" # URL of the page region = "" fao_semantic_uri = "" # Semantic uri node fao_resolver = FaoUriResolver() for country in CountryReader().get_countries(country_list_file): if code == country.iso3: country_id = country.iso3 country_name = country.translations[0].name iso2 = country.iso2 fao_uri = country.faoURI region = country.is_part_of_id fao_semantic_uri = fao_resolver.get_URI_from_iso3(country.iso3) graph.add((base.term(country_id), RDF.type, cex.term("Area"))) graph.add((base.term(country_id), RDFS.label, Literal(country_name, lang="en"))) graph.add((base.term(country_id), lb.term("iso3"), Literal(code))) graph.add((base.term(country_id), lb.term("iso2"), Literal(iso2))) graph.add((base.term(country_id), lb.term("faoURI"), URIRef(fao_uri))) graph.add((base.term(country_id), lb.term("is_part_of"), base.term(region))) if fao_semantic_uri is not None: graph.add((base.term(country_id), lb.term("faoReference"), URIRef(fao_semantic_uri))) return code def _add_region_triples(self, graph): self._add_region(graph, None) @staticmethod def _add_region(graph, arg): country_list_file = config.COUNTRY_LIST_FILE for region in CountryReader().get_countries(country_list_file): region_id = region.is_part_of_id graph.add((base.term(region_id), RDF.type, cex.term("Area"))) graph.add((base.term(region_id), RDFS.label, Literal(region_id, lang="en"))) un_code = None if region_id == "Americas": un_code = 19 elif region_id == "Europe": un_code = 150 elif region_id == "Oceania": un_code = 9 elif region_id == "Africa": un_code = 2 elif region_id == "Asia": un_code = 142 graph.add((base.term(region.is_part_of_id), lb.term("UNCode"), Literal(un_code))) @staticmethod def _get_area_code(arg): area = None if arg.country_code: area = arg.country_code elif arg.region_code: area = arg.region_code return area def _add_licenses_triples(self, graph): lic = self.parser.get_license() graph.add((URIRef(lic.url), RDF.type, lb.term("License"))) graph.add((URIRef(lic.url), lb.term("name"), Literal(lic.name))) graph.add((URIRef(lic.url), lb.term("description"), Literal(lic.description))) graph.add((URIRef(lic.url), lb.term("url"), Literal(lic.url))) graph.add((URIRef(lic.url), lb.term("republish"), Literal(lic.republish, datatype=XSD.Boolean))) return graph def _add_computation_triples(self, graph): for obs in self.parser.get_observations(): graph.add((cex.term(obs.computation.uri), RDF.type, cex.term(Literal("Computation")))) graph.add((cex.term(obs.computation.uri), RDFS.label, Literal(obs.computation.description, lang='en'))) return graph def _add_data_source_triples(self, graph): data_source = self.parser.get_datasource() organization = self.parser.get_organization() user = self.parser.get_user() graph.add((base_dsource.term(data_source.dsource_id), RDF.type, lb.term(Literal("DataSource")))) graph.add((base_dsource.term(data_source.dsource_id), RDFS.label, Literal(data_source.name, lang='en'))) graph.add((base_dsource.term(data_source.dsource_id), lb.term("organization"), base_org.term(organization.id))) graph.add((base_dsource.term(data_source.dsource_id), dct.term("creator"), Literal(user.id))) return graph def _add_upload_triples(self, graph, ip): upload = "upload" + str(self.time. strftime("%y%m%d%H%M")) user = self.parser.get_user() dsource = self.parser.get_datasource() observations = self.parser.get_observations() graph.add((base_upload.term(upload), RDF.type, lb.term(Literal("Upload")))) graph.add((base_upload.term(upload), lb.term("user"), lb.term(user.id))) graph.add((base_upload.term(upload), lb.term("timestamp"), Literal(self.time.strftime("%Y-%m-%d"), datatype=XSD.date))) graph.add((base_upload.term(upload), lb.term("ip"), Literal(ip))) for obs in observations: graph.add((base_upload.term(upload), lb.term("observation"), base.term(obs.id))) graph.add((base_upload.term(upload), lb.term("dataSource"), lb.term(dsource.dsource_id))) return graph @staticmethod def _serialize_rdf_xml(graph): serialized = graph.serialize(format='application/rdf+xml') with open(config.RDF_DATA_SET, 'w') as dataset: dataset.write(serialized) @staticmethod def _serialize_turtle(graph): serialized = graph.serialize(format='turtle') with open(config.TURTLE_DATA_SET, 'w') as dataset: dataset.write(serialized) @staticmethod def _load_data_set(host, api, graph_uri): sh.curl(host + api + graph_uri, digest=True, u=config.DBA_USER + ":" + config.DBA_PASSWORD, verbose=True, X="POST", T=config.RDF_DATA_SET)
class ReceiverSQLService(object): """ Service that gets the _xml input from the html request, generates SQL output and stores it into the configured relational database. """ def __init__(self, content): self.parser = Parser(content) self.time = datetime.datetime.now() def run_service(self, user_ip): """ Stores the data in the XML into the relational database. :param user_ip: The IP of the user that calls the service. :raises: The exception that made the data storing fail. If an exception occurs, nothing will be stored into the database. """ session = app.db.session try: self._store_data(user_ip, session) session.commit() except: session.rollback() raise finally: session.close() def _store_data(self, user_ip, session): organization = self._store_organization(session) self._store_user(session, organization, user_ip) datasource = self._store_datasource(session, organization) dataset = self._store_dataset(session, datasource) # Store indicators self._store_simple_indicators(dataset, session) self._store_compound_indicators(dataset, session) self._store_indicator_relationships(session) self._store_indicator_groups(session) # Store slices self._store_slices(dataset, session) # Store observations self._store_observations(dataset, session) def _store_simple_indicators(self, dataset, session): def enrich_indicator(ind): ind.starred = DBHelper.check_indicator_starred(session, ind.id) ind.last_update = self.time return ind for item in self.parser.get_simple_indicators(): indicator = session.merge(enrich_indicator(item)) dataset.add_indicator(indicator) session.flush() def _store_compound_indicators(self, dataset, session): def enrich_indicator(ind): ind.starred = DBHelper.check_indicator_starred(session, ind.id) ind.last_update = self.time for rel in DBHelper.find_indicators(session, ind.related_id): rel.compound_indicator_id = ind.id return ind for item in self.parser.get_compound_indicators(): indicator = session.merge(enrich_indicator(item)) dataset.add_indicator(indicator) session.flush() def _store_indicator_groups(self, session): for group in self.parser.get_indicator_groups(): indicator_ref = DBHelper.find_compound_indicator(session, group.indicator_ref) indicator_ref.indicator_ref_group_id = group.id session.merge(group) session.flush() def _store_indicator_relationships(self, session): indicators = self.parser.get_simple_indicators() for ind in indicators: # Each indicator may be related with others # The related_id field was created in the parser and WILL NOT be # persisted, it is only used to create the relationship objects relationships = [] for rel_id in ind.related_id: rel = model.IndicatorRelationship() rel.source_id = ind.id rel.target_id = rel_id relationships.append(rel) session.add_all(relationships) session.flush() def _store_dataset(self, session, datasource): dataset = self.parser.get_dataset() dataset.datasource_id = datasource.id session.add(dataset) session.flush() return dataset def _store_datasource(self, session, organization): xml_datasource = self.parser.get_datasource() db_datasource = DBHelper.check_datasource(session, xml_datasource.id) # The datasource may exist in the database. if db_datasource is not None: return db_datasource else: xml_datasource.organization_id = organization.id session.add(xml_datasource) session.flush() return xml_datasource def _store_organization(self, session): xml_organization = self.parser.get_organization() db_organization = DBHelper.check_organization(session, xml_organization.url) if db_organization is not None: return db_organization else: session.add(xml_organization) session.flush() return xml_organization def _store_user(self, session, organization, user_ip): xml_user = self.parser.get_user() db_user = DBHelper.check_user(session, xml_user.id) if db_user is not None: return db_user else: xml_user.organization_id = organization.id xml_user.ip = user_ip session.add(xml_user) session.flush() return xml_user def _store_observations(self, dataset, session): def enrich_observation(obs): obs.dataset_id = dataset.id if obs.region_code is not None: obs.region_id = DBHelper.find_region_id(session, obs.region_code) elif obs.country_code is not None: obs.region_id = DBHelper.find_country_id(session, obs.country_code) return obs #We need to make the flush after a certain number of elements in order to control #that not too many inserts are made at the same time, which results in a DataBase #crash. number_inserted = 0 for observation in self.parser.get_observations(): session.add(enrich_observation(observation)) number_inserted += 1 if number_inserted == 10000: session.flush() session.expunge_all() number_inserted = 0 session.flush() session.expunge_all() def _store_slices(self, dataset, session): def enrich_slice(sli): sli.dataset_id = dataset.id if sli.region_code is not None: sli.dimension_id = DBHelper.find_region_id(session, sli.region_code) elif sli.country_code is not None: sli.dimension_id = DBHelper.find_country_id(session, sli.country_code) return sli number_inserted = 0 for slice in self.parser.get_slices(): session.add(enrich_slice(slice)) number_inserted += 1 if number_inserted == 5000: number_inserted = 0 session.flush() session.expunge_all() session.flush() session.expunge_all()