def getSparqlInstance(QID): try: # Q43229 - organization query = """SELECT DISTINCT ?item ?itemLabel ?itemDescription ?country ?countryLabel WHERE { ?item (wdt:P31)+ wd:""" f"{QID};" """ rdfs:label ?itemLabel. FILTER(lang(?itemLabel) = 'en') OPTIONAL { ?item wdt:P17 ?country } SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } } ORDER BY ?item""" user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) # TODO adjust user agent; see https://w.wiki/CX6 sparql = SPARQLWrapper(endpoint_url, agent=user_agent) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert() except Exception as e: logger.debug(f'ERROR: {e}') return None
def insert(cls, instance): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__INSERT)) logger.debug(f'instance to insert: {instance}') values = (instance.getQID(), instance.getItemLabel()) cursor.execute(cls.__INSERT, values) return cursor.rowcount
def insert(cls, subClass): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__INSERT)) logger.debug(f'subClass to insert: {subClass}') values = (subClass.getQID(), subClass.getItemLabel()) cursor.execute(cls.__INSERT, values) return cursor.rowcount
def collect(QID): sleep_time = randint(3, 9) print('sleep {0} seconds'.format(sleep_time)) time.sleep(sleep_time) sparqlInstance = getSparqlInstance(QID) # print(sparqlInstance) if sparqlInstance and len(sparqlInstance["results"]["bindings"]) > 0: for sparqlI in sparqlInstance["results"]["bindings"]: if "countryLabel" in sparqlI and sparqlI["countryLabel"]["value"] == "Cuba": instanceCollect(sparqlI) postgresSubclass = Subclass.select() sleep_time = randint(3, 9) print('sleep {0} seconds'.format(sleep_time)) time.sleep(sleep_time) sparqlSubclass = getSparqlSubclass(QID) if sparqlSubclass and len(sparqlSubclass["results"]["bindings"]) > 0: for sparqlS in sparqlSubclass["results"]["bindings"]: _QID = sparqlS["item"]["value"].split('/') if not any(_QID[len(_QID) - 1] == postgres.getQID() for postgres in postgresSubclass): subclass = Subclass(_QID[len(_QID) - 1], sparqlS["itemLabel"]["value"]) subclass_inserted = Subclass.insert(subclass) logger.debug(f'Subclass inserted: {subclass_inserted}') collect(_QID[len(_QID) - 1]) else: logger.info(f'Subclass exist: {sparqlS["itemLabel"]["value"]}')
def createJson(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__GENERATE_JSON)) cursor.execute(cls.__GENERATE_JSON) result = cursor.fetchone() result = str(result).replace("(", "") result = str(result).replace(")", "") return result
def updateCopy(cls, instance): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__UPDATE_COPY)) logger.debug(f'instance to update: {instance.getQID()}') values = (instance.getDescription(), instance.getAlias(), instance.getStatements(), instance.getQID()) cursor.execute(cls.__UPDATE_COPY, values) return cursor.rowcount
def instanceCollect(sparql): postgresInstance = Instance.select() _QID = sparql["item"]["value"].split('/') if not any(_QID[len(_QID) - 1] == postgres.getQID() for postgres in postgresInstance): instance = Instance(_QID[len(_QID) - 1], sparql["itemLabel"]["value"]) instance_inserted = Instance.insert(instance) logger.debug(f'Subclass inserted: {instance_inserted}') else: logger.info(f'Instance exist: {sparql["itemLabel"]["value"]}')
def select(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__SELECT)) cursor.execute(cls.__SELECT) results = cursor.fetchall() subClasses = [] for result in results: subClass = Subclass(result[0], result[1]) subClasses.append(subClass) return subClasses
def select(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__SELECT)) cursor.execute(cls.__SELECT) results = cursor.fetchall() instances = [] for result in results: instance = Instance(result[0], result[1]) instances.append(instance) return instances
def getPool(cls): if cls.__pool is None: try: cls.__pool = pool.SimpleConnectionPool( cls.__MIN_CON, cls.__MAX_CON, host=cls.__HOST, user=cls.__USERNAME, password=cls.__PASSWORD, port=cls.__DB_PORT, database=cls.__DATABASE) logger.debug(f'Pool creation successfully: {cls.__pool}') return cls.__pool except Exception as e: logger.error(f'Error at poll creation: {e}') sys.exit() else: return cls.__pool
def getSparqlSubclass(QID): try: # Q43229 - organization query = """SELECT ?item ?itemLabel WHERE { ?item (wdt:P279)* wd:""" f"{QID};" """ rdfs:label ?itemLabel. FILTER(lang(?itemLabel) = 'en') } ORDER BY ?item""" user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) # TODO adjust user agent; see https://w.wiki/CX6 sparql = SPARQLWrapper(endpoint_url, agent=user_agent) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert() except Exception as e: logger.debug(f'ERROR: {e}') return None
def getInstanceDescription(itemLabel): try: # Q43229 - organization query = """SELECT DISTINCT ?item ?itemLabel ?itemDescription ?itemAltLabel WHERE { ?item rdfs:label """ f'"{itemLabel}"' " """"@en. OPTIONAL { ?item skos:altLabel ?alternative . } SERVICE wikibase:label { bd:serviceParam wikibase:language "es". } }""" user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1]) # TODO adjust user agent; see https://w.wiki/CX6 sparql = SPARQLWrapper(endpoint_url, agent=user_agent) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert() except Exception as e: logger.debug(f'ERROR: {e}') return None
def __exit__(self, exception_type, exception_value, exception_traceback): logger.debug('Execute method __exit__()') # if exception_value is not None: if exception_value: self.__conn.rollback() logger.debug(f'Error exception: {exception_value}') else: self.__conn.commit() logger.debug('Transaction commit') # Cerramos el cursor self.__cursor.close() # Regresar la conexión al pool Connection.releaseConnection(self.__conn)
self.__firstImport = firstImport def getLastImport(self): return self.__lastImport def setLastImport(self, lastImport): self.__lastImport = lastImport def getFirstUserInImport(self): return self.__firstUserInImport def setFirstUserInImport(self, firstUserInImport): self.__firstUserInImport = firstUserInImport def getLastUserImport(self): return self.__lastUserImport def setLastUserImport(self, lastUserImport): self.__lastUserImport = lastUserImport def getState(self): return self.__state def setState(self, state): self.__state = state if __name__ == '__main__': instance = Instance(QID='Q4564', label='Gomez') logger.debug(instance)
def createTableInstance(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__CREATE_INSTANCE)) cursor.execute(cls.__CREATE_INSTANCE) return cursor.rowcount
def createTableSubclass(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__CREATE_SUBCLASS)) cursor.execute(cls.__CREATE_SUBCLASS) return cursor.rowcount
def dropFunctions(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__DROP_FUNCTIONS)) cursor.execute(cls.__DROP_FUNCTIONS) return cursor.rowcount
def dropTables(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__DROP_TABLES)) cursor.execute(cls.__DROP_TABLES) return cursor.rowcount
def createInstanceCopy(cls): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__CREATE_COPY_INSTANCEOF)) cursor.execute(cls.__CREATE_COPY_INSTANCEOF) return cursor.rowcount
from cuor.harvester.wikidata.logger_base import logger class Subclass: def __init__(self, QID=None, label=None): self.__QID = QID self.__label = label def __str__(self): return (f'QID: {self.__QID}, ' f'label: {self.__label}') def getQID(self): return self.__QID def setQID(self, QID): self.__QID = QID def getItemLabel(self): return self.__label def setItemLabel(self, label): self.__label = label if __name__ == '__main__': subclass = Subclass(QID='Q4564', label='Gomez') logger.debug(subclass)
logger.debug(cursor.mogrify(cls.__SELECT)) cursor.execute(cls.__SELECT) results = cursor.fetchall() subClasses = [] for result in results: subClass = Subclass(result[0], result[1]) subClasses.append(subClass) return subClasses @classmethod def insert(cls, subClass): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__INSERT)) logger.debug(f'subClass to insert: {subClass}') values = (subClass.getQID(), subClass.getItemLabel()) cursor.execute(cls.__INSERT, values) return cursor.rowcount if __name__ == '__main__': subClasses = Subclass.select() print(subClasses) for subClass in subClasses: logger.debug(subClass) logger.debug(subClass.getQID()) # Insertamos un nuevo registro # subClass = Subclass(QID='Q525', label='Najera', id_subClass='Q566') # inserted_instances = Instance.insert(subClass) # logger.debug(f'Inserted persons: {inserted_instances}')
# inicio de with def __enter__(self): logger.debug('Start method __enter__') self.__conn = Connection.getConnection() self.__cursor = self.__conn.cursor() return self.__cursor # fin del bloque with def __exit__(self, exception_type, exception_value, exception_traceback): logger.debug('Execute method __exit__()') # if exception_value is not None: if exception_value: self.__conn.rollback() logger.debug(f'Error exception: {exception_value}') else: self.__conn.commit() logger.debug('Transaction commit') # Cerramos el cursor self.__cursor.close() # Regresar la conexión al pool Connection.releaseConnection(self.__conn) if __name__ == '__main__': # Obtenemos un cursor a partir de la conexión del pool # with se ejecuta __enter__ y termina con __exit__ with CursorPool() as cursor: cursor.execute('SELECT * FROM "subClass"') logger.debug('Listado de personas') logger.debug(cursor.fetchall())
def closeConnections(cls): # Cerrar el pool y todas sus conexiones cls.getPool().closeall() logger.debug(f'Close all connections from pool: {cls.__pool}')
def releaseConnection(cls, connection): # Regresar el objeto connection al pool cls.getPool().putconn(connection) logger.debug(f'Release connection to pool: {connection}') logger.debug(f'State of pool: {cls.__pool}')
def getConnection(cls): # Obtener una connection del pool connection = cls.getPool().getconn() logger.debug(f'Connection from pool: {connection}') return connection
def __enter__(self): logger.debug('Start method __enter__') self.__conn = Connection.getConnection() self.__cursor = self.__conn.cursor() return self.__cursor
values = (instance.getQID(), instance.getItemLabel()) cursor.execute(cls.__INSERT, values) return cursor.rowcount @classmethod def update(cls, instance): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__UPDATE)) logger.debug(f'instance to update: {instance.getQID()}') values = (instance.getDescription(), instance.getAlias(), json.dumps(instance.getStatements()), instance.getQID()) cursor.execute(cls.__UPDATE, values) return cursor.rowcount @classmethod def updateCopy(cls, instance): with CursorPool() as cursor: logger.debug(cursor.mogrify(cls.__UPDATE_COPY)) logger.debug(f'instance to update: {instance.getQID()}') values = (instance.getDescription(), instance.getAlias(), instance.getStatements(), instance.getQID()) cursor.execute(cls.__UPDATE_COPY, values) return cursor.rowcount if __name__ == '__main__': instances = Instance.select() for instance in instances: logger.debug(instance) logger.debug(instance.getQID())