def main(): csv_data_files = os.path.join('data', 'raw', 'agriculture, forestries, fisheries', 'crops') rdf_data_files = os.path.join('data', 'rdf', 'agriculture, forestries, fisheries', 'crops') file_key = os.path.splitext(os.path.basename(__file__))[0] output_filename = file_key + '.rdf' rdf_graph = Graph() rdf_graph.namespace_manager.bind("obo", OBOLIB, replace=True) use_ontology(rdf_graph) rdf_common.serialize_rdf_to_file(rdf_graph, output_filename, "xml")
def extract(file_key, input_path, output_path, rdf_graph=None): if rdf_graph is None: rdf_graph = ConjunctiveGraph() use_ontology(rdf_graph) data_file_names = [] for data_file_name in os.listdir(input_path): if data_file_name.startswith(file_key) and data_file_name.endswith( '.csv'): data_file_names.append(data_file_name) lines = [] for data_file_name in data_file_names: input_filename = os.path.join(input_path, data_file_name) print("input_file_name:", input_filename) with open(input_filename) as input_file: stream = csv.reader(input_file) title = next(stream)[0] next(stream) headers = next(stream) lines_partial = list(stream) lines += lines_partial CROPS = list(set([line[0] for line in lines])) CROPS.sort() crops_to_uri_table = {} crops_uri_to_nodes_table = {} build_crop_rdf(CROPS, crops_to_uri_table, crops_uri_to_nodes_table, rdf_graph) places_raw = [line[1] for line in lines] place_hierarchy = {} get_place_hierarchy(places_raw, 0, 0, place_hierarchy) places_to_uri_table = {} places_uri_to_nodes_table = {} build_hierarchy_rdf(place_hierarchy, places_to_uri_table, places_uri_to_nodes_table, rdf_graph) build_production_rdf(headers, lines, places_to_uri_table, places_uri_to_nodes_table, crops_to_uri_table, crops_uri_to_nodes_table, rdf_graph) rdf_common.serialize_rdf_to_file( rdf_graph, os.path.join(output_path, file_key + ".rdf"), "xml")
from namespaces import common_namespace # import parsers.agriculture_forestries_fisheries.crops. # for i in range() # i = importlib.import_module("matplotlib.text") # identifier = URIRef(common_namespace.common_uri) # uri = Literal("sqlite://") # store = plugin.get("SQLAlchemy", Store)(identifier=identifier) # rdf_graph = Graph(store, identifier=identifier) # rdf_graph.open(uri, create=True) rdf_graph = None rdf_graph = Graph() ontology.use_ontology(rdf_graph) for domain in databases.DOMAINS.values(): input_path = domain["directories"]["input_path"] output_path = domain["directories"]["output_path"] for name, path in domain["directories"].items(): print(path) for database in domain["databases"]: file_key = database["file_key"] print("loading parser ({}) for csv database {}".format(database["parser"], file_key)) parser_module_path = '.'.join([domain["directories"]["parsers_path"], database["parser"]]) parser = importlib.import_module(parser_module_path, package=None) parser.extract(file_key, input_path, output_path, rdf_graph) rdf_common.serialize_rdf_to_file(rdf_graph, "linked_db.rdf", "xml")
def main(): owl_graph = ConjunctiveGraph() use_ontology(owl_graph) rdf_common.serialize_rdf_to_file(owl_graph, "ontology.xml", "xml")