def retrievePortalHash(seriesId="", metrics_database=None):
    """
    Retrieves portal hash from the database
    :param seriesId:
    :return:
    """
    logger = getESSyncLogger(name="es_eventlog")
    logger.info("Beginning hash retrieval job")
    if metrics_database is None:
        metrics_database = MetricsDatabase()
        metrics_database.connect()
    csr = metrics_database.getCursor()
    sql = 'SELECT series_id, hash FROM portal_metadata_test;'

    portal_hash = None
    try:
        csr.execute(sql)

        if (csr.rowcount > 0):
            rows = csr.fetchall()
            portal_hash = rows[0][1]
    except psycopg2.DatabaseError as e:
        message = 'Database error! ' + str(e)
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    except psycopg2.OperationalError as e:
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    finally:
        logger.info("Commiting changes to DB")
        metrics_database.conn.commit()
    return portal_hash
    def getRepositoryCitationPIDs(self, nodeId):
        """

        :param nodeId:
        :return:
        """
        t_0 = time.time()
        self.logger.debug("enter getRepositoryCitationPIDs")
        metrics_database = MetricsDatabase()
        metrics_database.connect()
        csr = metrics_database.getCursor()
        sql = 'SELECT target_id FROM citation_metadata WHERE \'' + nodeId + '\' = ANY (node_id);'

        results = []
        citationCount = 0
        try:
            csr.execute(sql)
            rows = csr.fetchall()
            for i in rows:
                results.append(i[0])
        except Exception as e:
            print('Database error!\n{0}', e)
        finally:
            pass
        self.logger.debug("exit getRepositoryCitationPIDs, elapsed=%fsec",
                          time.time() - t_0)
        return (results)
def storePortalHash(seriesId="",
                    hashVal=None,
                    metrics_database=None,
                    updateEntry=False):
    """
    Stores portal hash in the database
    :return:
    """
    logger = getESSyncLogger(name="es_eventlog")
    logger.info("Trying to store portal hash")
    if metrics_database is None:
        metrics_database = MetricsDatabase()
        metrics_database.connect()
    csr = metrics_database.getCursor()
    if not updateEntry:
        sql = "INSERT INTO portal_metadata_test (id, series_id, hash) VALUES (DEFAULT , '" + seriesId + "','" + hashVal + "');"
    else:
        sql = "UPDATE portal_metadata_test SET hash = '" + hashVal + "' WHERE series_id = '" + seriesId + "';"

    try:
        csr.execute(sql)
    except psycopg2.DatabaseError as e:
        message = 'Database error! ' + str(e)
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    except psycopg2.OperationalError as e:
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    finally:
        logger.info("Commiting changes to DB")
        metrics_database.conn.commit()
    return None
    def gatherCitations(self, PIDs, metrics_database=None):
        # Retreive the citations if any!
        t_0 = time.time()
        self.logger.debug("enter gatherCitations")
        if metrics_database is None:
            metrics_database = MetricsDatabase()
            metrics_database.connect()
        csr = metrics_database.getCursor()
        sql = 'SELECT target_id,source_id,source_url,link_publication_date,origin,title,publisher,journal,volume,page,year_of_publishing FROM citations;'

        citations = []
        citationCount = 0
        try:
            csr.execute(sql)
            rows = csr.fetchall()

            for i in rows:
                citationObject = {}
                for j in PIDs:
                    # Special use case for Dryad datasets.
                    if ('?' in j.lower()):
                        j = j.split("?")[0]
                    if i[0].lower() in j.lower():

                        citationCount = citationCount + 1
                        citationObject["target_id"] = i[0]
                        citationObject["source_id"] = i[1]
                        citationObject["source_url"] = i[2]
                        citationObject["link_publication_date"] = i[3]
                        citationObject["origin"] = i[4]
                        citationObject["title"] = i[5]
                        citationObject["publisher"] = i[6]
                        citationObject["journal"] = i[7]
                        citationObject["volume"] = i[8]
                        citationObject["page"] = i[9]
                        citationObject["year_of_publishing"] = i[10]
                        citations.append(citationObject)
                        # We don't want to add duplicate citations for all the objects of the dataset
                        break
        except Exception as e:
            print('Database error!\n{0}', e)
        finally:
            pass
        self.logger.debug("exit gatherCitations, elapsed=%fsec",
                          time.time() - t_0)
        return (citationCount, citations)
    def formatDataPerCatalog(self, data, catalogPIDs):
        dataCounts = {}
        metadataCounts = {}
        downloads = []
        views = []
        results = {
            "downloads": [],
            "views": [],
            "citations": [],
            "datasets": [],
            "country": [],
            "months": []
            # "tempDict" : []
        }

        metrics_database = MetricsDatabase()
        metrics_database.connect()
        for i in catalogPIDs:
            count, cits = self.gatherCitations(
                catalogPIDs[i], metrics_database=metrics_database)
            results["citations"].append(count)

        for i in data["aggregations"]["pid_list"]["buckets"]:
            if i["key"]["format"] == "DATA":
                dataCounts = i
            if i["key"]["format"] == "METADATA":
                metadataCounts = i

        for i in catalogPIDs:
            if i in dataCounts:
                results["downloads"].append(
                    dataCounts[i]["buckets"]["pid.key"]["doc_count"])
            else:
                results["downloads"].append(0)

            if i in metadataCounts:
                results["views"].append(
                    metadataCounts[i]["buckets"]["pid.key"]["doc_count"])
            else:
                results["views"].append(0)

            results["datasets"].append(i)

        return results, {}
 def run(self):
     metrics_database = MetricsDatabase()
     metrics_reporter = MetricsReporter()
     schedule.every(1).minute.do(job_func=self.job)
     schedule.every().hour.do(job_func=self.job)
     schedule.every().day.at("00:30").do(job_func=self.job)
     # schedule.every().day.at("01:30").do(job_func=metrics_reporter.scheduler())
     # schedule.every().day.at("02:30").do(job_func=metrics_database.getCitations())
     while True:
         schedule.run_pending()
         time.sleep(1)
Esempio n. 7
0
    def queue_citation_object(self, citations_request):
        """

        :param citations_request:
        :return:
        """
        response = {"message": "", "status_code": ""}

        try:
            metrics_database = MetricsDatabase()
            metrics_database.queueCitationRequest(citations_request)
            response["message"] = "Successful"
            response["status_code"] = "202"
        except Exception as e:
            response["message"] = e
            response["status_code"] = "500"
        finally:
            pass

        return response
def updateCitationsDatabase(seriesId, PID_List):
    """
    Updates the citations database table with the series identifier
    :param seriesId:
    :param PID_List:
    :return:
    """
    logger = getESSyncLogger(name="es_eventlog")
    t_start = time.time()

    logger.info("Beginning citations database table udpates")

    # establish a connection
    metrics_database = MetricsDatabase()
    metrics_database.connect()
    csr = metrics_database.getCursor()

    # get the list of all identifiers that have citations
    sql = 'SELECT target_id FROM citations;'
    try:
        csr.execute(sql)

        if (csr.rowcount > 0):
            rows = csr.fetchall()
            citation_pid_set = set()
            for cit_tup in rows:
                citation_pid_set.add(cit_tup[0])
        logger.debug("retrieved citations successfully from the DB")

    except psycopg2.DatabaseError as e:
        message = 'Database error! ' + str(e)
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    except psycopg2.OperationalError as e:
        logger.exception('Operational error!\n{0}')
        logger.exception(e)
    finally:
        logger.info("Commiting changes to DB")
        metrics_database.conn.commit()

    # check if the identifier exist in the PID_list
    portal_citation_identifiers = set()
    for identifers in citation_pid_set:
        for portal_PID in PID_List:
            if identifers in portal_PID:
                portal_citation_identifiers.add(identifers)

    # store the seriesID for that identifier
    updateCount = 0
    for identifier in portal_citation_identifiers:
        try:

            sqlPortalId = "SELECT portal_id FROM citation_metadata WHERE target_id = '" + identifier + "';"
            csr.execute(sqlPortalId)
            rows = csr.fetchall()

            if (not rows or rows[0][0] is None):
                sql = "UPDATE citation_metadata SET portal_id = '{%s}' WHERE target_id = '%s';" % (
                    seriesId, identifier)
            else:
                # append only if the given seriesId does not already exist in the 'portal_id' array
                sql = "UPDATE citation_metadata SET portal_id = (select array_agg(distinct e) from unnest(portal_id || '{%s}') e) where not portal_id @> '{%s}' AND target_id = '%s'; " % (
                    seriesId, seriesId, identifier)

            csr.execute(sql)
            csrUpdateCount = csr.rowcount
            updateCount += csrUpdateCount

            if csrUpdateCount:
                logger.info(
                    "Added citation for target id - %s with series ID - %s " %
                    (identifier, seriesId))
            else:
                logger.info("Target id - %s already has series ID - %s " %
                            (identifier, seriesId))

        except psycopg2.DatabaseError as e:
            message = 'Database error! ' + str(e)
            logger.exception('Operational error!\n{0}')
            logger.exception(e)
        except psycopg2.OperationalError as e:
            logger.exception('Operational error!\n{0}')
            logger.exception(e)
        except Exception as e:
            logger.exception('Other error!\n{0}')
            logger.exception(e)
        finally:
            logger.debug("Commiting changes to DB")
            # commit
            metrics_database.conn.commit()
    logger.info(
        "Successfully updated %s the citations pids for seriesId : %s" %
        (updateCount, seriesId))

    pass