def retrievePortalHash(seriesId="", metrics_database=None): """ Retrieves portal hash from the database :param seriesId: :return: """ logger = getESSyncLogger(name="es_eventlog") logger.info("Beginning hash retrieval job") if metrics_database is None: metrics_database = MetricsDatabase() metrics_database.connect() csr = metrics_database.getCursor() sql = 'SELECT series_id, hash FROM portal_metadata_test;' portal_hash = None try: csr.execute(sql) if (csr.rowcount > 0): rows = csr.fetchall() portal_hash = rows[0][1] except psycopg2.DatabaseError as e: message = 'Database error! ' + str(e) logger.exception('Operational error!\n{0}') logger.exception(e) except psycopg2.OperationalError as e: logger.exception('Operational error!\n{0}') logger.exception(e) finally: logger.info("Commiting changes to DB") metrics_database.conn.commit() return portal_hash
def getRepositoryCitationPIDs(self, nodeId): """ :param nodeId: :return: """ t_0 = time.time() self.logger.debug("enter getRepositoryCitationPIDs") metrics_database = MetricsDatabase() metrics_database.connect() csr = metrics_database.getCursor() sql = 'SELECT target_id FROM citation_metadata WHERE \'' + nodeId + '\' = ANY (node_id);' results = [] citationCount = 0 try: csr.execute(sql) rows = csr.fetchall() for i in rows: results.append(i[0]) except Exception as e: print('Database error!\n{0}', e) finally: pass self.logger.debug("exit getRepositoryCitationPIDs, elapsed=%fsec", time.time() - t_0) return (results)
def storePortalHash(seriesId="", hashVal=None, metrics_database=None, updateEntry=False): """ Stores portal hash in the database :return: """ logger = getESSyncLogger(name="es_eventlog") logger.info("Trying to store portal hash") if metrics_database is None: metrics_database = MetricsDatabase() metrics_database.connect() csr = metrics_database.getCursor() if not updateEntry: sql = "INSERT INTO portal_metadata_test (id, series_id, hash) VALUES (DEFAULT , '" + seriesId + "','" + hashVal + "');" else: sql = "UPDATE portal_metadata_test SET hash = '" + hashVal + "' WHERE series_id = '" + seriesId + "';" try: csr.execute(sql) except psycopg2.DatabaseError as e: message = 'Database error! ' + str(e) logger.exception('Operational error!\n{0}') logger.exception(e) except psycopg2.OperationalError as e: logger.exception('Operational error!\n{0}') logger.exception(e) finally: logger.info("Commiting changes to DB") metrics_database.conn.commit() return None
def gatherCitations(self, PIDs, metrics_database=None): # Retreive the citations if any! t_0 = time.time() self.logger.debug("enter gatherCitations") if metrics_database is None: metrics_database = MetricsDatabase() metrics_database.connect() csr = metrics_database.getCursor() sql = 'SELECT target_id,source_id,source_url,link_publication_date,origin,title,publisher,journal,volume,page,year_of_publishing FROM citations;' citations = [] citationCount = 0 try: csr.execute(sql) rows = csr.fetchall() for i in rows: citationObject = {} for j in PIDs: # Special use case for Dryad datasets. if ('?' in j.lower()): j = j.split("?")[0] if i[0].lower() in j.lower(): citationCount = citationCount + 1 citationObject["target_id"] = i[0] citationObject["source_id"] = i[1] citationObject["source_url"] = i[2] citationObject["link_publication_date"] = i[3] citationObject["origin"] = i[4] citationObject["title"] = i[5] citationObject["publisher"] = i[6] citationObject["journal"] = i[7] citationObject["volume"] = i[8] citationObject["page"] = i[9] citationObject["year_of_publishing"] = i[10] citations.append(citationObject) # We don't want to add duplicate citations for all the objects of the dataset break except Exception as e: print('Database error!\n{0}', e) finally: pass self.logger.debug("exit gatherCitations, elapsed=%fsec", time.time() - t_0) return (citationCount, citations)
def formatDataPerCatalog(self, data, catalogPIDs): dataCounts = {} metadataCounts = {} downloads = [] views = [] results = { "downloads": [], "views": [], "citations": [], "datasets": [], "country": [], "months": [] # "tempDict" : [] } metrics_database = MetricsDatabase() metrics_database.connect() for i in catalogPIDs: count, cits = self.gatherCitations( catalogPIDs[i], metrics_database=metrics_database) results["citations"].append(count) for i in data["aggregations"]["pid_list"]["buckets"]: if i["key"]["format"] == "DATA": dataCounts = i if i["key"]["format"] == "METADATA": metadataCounts = i for i in catalogPIDs: if i in dataCounts: results["downloads"].append( dataCounts[i]["buckets"]["pid.key"]["doc_count"]) else: results["downloads"].append(0) if i in metadataCounts: results["views"].append( metadataCounts[i]["buckets"]["pid.key"]["doc_count"]) else: results["views"].append(0) results["datasets"].append(i) return results, {}
def run(self): metrics_database = MetricsDatabase() metrics_reporter = MetricsReporter() schedule.every(1).minute.do(job_func=self.job) schedule.every().hour.do(job_func=self.job) schedule.every().day.at("00:30").do(job_func=self.job) # schedule.every().day.at("01:30").do(job_func=metrics_reporter.scheduler()) # schedule.every().day.at("02:30").do(job_func=metrics_database.getCitations()) while True: schedule.run_pending() time.sleep(1)
def queue_citation_object(self, citations_request): """ :param citations_request: :return: """ response = {"message": "", "status_code": ""} try: metrics_database = MetricsDatabase() metrics_database.queueCitationRequest(citations_request) response["message"] = "Successful" response["status_code"] = "202" except Exception as e: response["message"] = e response["status_code"] = "500" finally: pass return response
def updateCitationsDatabase(seriesId, PID_List): """ Updates the citations database table with the series identifier :param seriesId: :param PID_List: :return: """ logger = getESSyncLogger(name="es_eventlog") t_start = time.time() logger.info("Beginning citations database table udpates") # establish a connection metrics_database = MetricsDatabase() metrics_database.connect() csr = metrics_database.getCursor() # get the list of all identifiers that have citations sql = 'SELECT target_id FROM citations;' try: csr.execute(sql) if (csr.rowcount > 0): rows = csr.fetchall() citation_pid_set = set() for cit_tup in rows: citation_pid_set.add(cit_tup[0]) logger.debug("retrieved citations successfully from the DB") except psycopg2.DatabaseError as e: message = 'Database error! ' + str(e) logger.exception('Operational error!\n{0}') logger.exception(e) except psycopg2.OperationalError as e: logger.exception('Operational error!\n{0}') logger.exception(e) finally: logger.info("Commiting changes to DB") metrics_database.conn.commit() # check if the identifier exist in the PID_list portal_citation_identifiers = set() for identifers in citation_pid_set: for portal_PID in PID_List: if identifers in portal_PID: portal_citation_identifiers.add(identifers) # store the seriesID for that identifier updateCount = 0 for identifier in portal_citation_identifiers: try: sqlPortalId = "SELECT portal_id FROM citation_metadata WHERE target_id = '" + identifier + "';" csr.execute(sqlPortalId) rows = csr.fetchall() if (not rows or rows[0][0] is None): sql = "UPDATE citation_metadata SET portal_id = '{%s}' WHERE target_id = '%s';" % ( seriesId, identifier) else: # append only if the given seriesId does not already exist in the 'portal_id' array sql = "UPDATE citation_metadata SET portal_id = (select array_agg(distinct e) from unnest(portal_id || '{%s}') e) where not portal_id @> '{%s}' AND target_id = '%s'; " % ( seriesId, seriesId, identifier) csr.execute(sql) csrUpdateCount = csr.rowcount updateCount += csrUpdateCount if csrUpdateCount: logger.info( "Added citation for target id - %s with series ID - %s " % (identifier, seriesId)) else: logger.info("Target id - %s already has series ID - %s " % (identifier, seriesId)) except psycopg2.DatabaseError as e: message = 'Database error! ' + str(e) logger.exception('Operational error!\n{0}') logger.exception(e) except psycopg2.OperationalError as e: logger.exception('Operational error!\n{0}') logger.exception(e) except Exception as e: logger.exception('Other error!\n{0}') logger.exception(e) finally: logger.debug("Commiting changes to DB") # commit metrics_database.conn.commit() logger.info( "Successfully updated %s the citations pids for seriesId : %s" % (updateCount, seriesId)) pass