def _getNextArticleBatch(self):
        """download next batch of events based on the event uris in the uri list"""
        eventUri = self.queryParams["eventUri"]
        # move to the next page to download
        self._articlePage += 1
        # if we have already obtained all pages, then exit
        if self._totalPages != None and self._articlePage > self._totalPages:
            return
        if self._er._verboseOutput:
            logger.debug("Downloading article page %d from event %s" %
                         (self._articlePage, eventUri))

        self.setRequestedResult(
            RequestEventArticles(page=self._articlePage,
                                 sortBy=self._articlesSortBy,
                                 sortByAsc=self._articlesSortByAsc,
                                 returnInfo=self._returnInfo,
                                 **self.queryParams))
        res = self._er.execQuery(self)
        if "error" in res:
            logger.error(res["error"])
        else:
            self._totalPages = res.get(eventUri, {}).get("articles",
                                                         {}).get("pages", 0)
        arts = res.get(eventUri, {}).get("articles", {}).get("results", [])
        self._articleList.extend(arts)
Beispiel #2
0
 def count(self, eventRegistry):
     """
     return the number of events that match the criteria
     """
     self.setRequestedResult(RequestEventsInfo())
     res = eventRegistry.execQuery(self)
     if "error" in res:
         logger.error(res["error"])
     count = res.get("events", {}).get("totalResults", 0)
     return count
 def count(self, eventRegistry):
     """
     return the number of articles that match the criteria
     @param eventRegistry: instance of EventRegistry class. used to obtain the necessary data
     """
     self.setRequestedResult(RequestEventArticles(**self.queryParams))
     res = eventRegistry.execQuery(self)
     if "error" in res:
         logger.error(res["error"])
     count = res.get(self.queryParams["eventUri"],
                     {}).get("articles", {}).get("totalResults", 0)
     return count
Beispiel #4
0
    def jsonRequestAnalytics(self, methodUrl, paramDict):
        """
        call the analytics service to execute a method like annotation, categorization, etc.
        @param methodUrl: api endpoint url to call
        @param paramDict: a dictionary with values to send to the api endpoint
        """
        if self._apiKey:
            paramDict["apiKey"] = self._apiKey
        self._lock.acquire()
        returnData = None
        respInfo = None
        self._lastException = None
        self._headers = {}  # reset any past data
        tryCount = 0
        while self._repeatFailedRequestCount < 0 or tryCount < self._repeatFailedRequestCount:
            tryCount += 1
            try:
                url = self._hostAnalytics + methodUrl
                # make the request
                respInfo = self._reqSession.post(url, json = paramDict, timeout=60)
                # remember the returned headers
                self._headers = respInfo.headers
                # if we got some error codes print the error and repeat the request after a short time period
                if respInfo.status_code != 200:
                    raise Exception(respInfo.text)

                returnData = respInfo.json()
                break
            except Exception as ex:
                self._lastException = ex
                if self._verboseOutput:
                    logger.error("Event Registry Analytics exception while executing the request:")
                    logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4)))
                    self.printLastException()
                # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again
                if respInfo != None and respInfo.status_code in self._stopStatusCodes:
                    break
                logger.info("The request will be automatically repeated in 3 seconds...")
                time.sleep(5)   # sleep for X seconds on error
        self._lock.release()
        if returnData == None:
            raise self._lastException or Exception("No valid return data provided")
        return returnData
Beispiel #5
0
 def _getNextEventBatch(self):
     """download next batch of events based on the event uris in the uri list"""
     self._eventPage += 1
     # if we have already obtained all pages, then exit
     if self._totalPages != None and self._eventPage > self._totalPages:
         return
     self.setRequestedResult(
         RequestEventsInfo(page=self._eventPage,
                           count=self._eventBatchSize,
                           sortBy=self._sortBy,
                           sortByAsc=self._sortByAsc,
                           returnInfo=self._returnInfo))
     # download articles and make sure that we set the same archive flag as it was returned when we were processing the uriList request
     if self._er._verboseOutput:
         logger.debug("Downloading event page %d..." % (self._eventPage))
     res = self._er.execQuery(self)
     if "error" in res:
         logger.error("Error while obtaining a list of events: " +
                      res["error"])
     else:
         self._totalPages = res.get("events", {}).get("pages", 0)
     results = res.get("events", {}).get("results", [])
     self._eventList.extend(results)
Beispiel #6
0
 def _getNextArticleBatch(self):
     """download next batch of articles based on the article uris in the uri list"""
     # try to get more uris, if none
     self._articlePage += 1
     # if we have already obtained all pages, then exit
     if self._totalPages != None and self._articlePage > self._totalPages:
         return
     self.setRequestedResult(
         RequestArticlesInfo(page=self._articlePage,
                             sortBy=self._sortBy,
                             sortByAsc=self._sortByAsc,
                             returnInfo=self._returnInfo))
     if self._er._verboseOutput:
         logger.debug("Downloading article page %d..." %
                      (self._articlePage))
     res = self._er.execQuery(self)
     if "error" in res:
         logger.error("Error while obtaining a list of articles: " +
                      res["error"])
     else:
         self._totalPages = res.get("articles", {}).get("pages", 0)
     results = res.get("articles", {}).get("results", [])
     self._articleList.extend(results)
Beispiel #7
0
    def jsonRequest(self, methodUrl, paramDict, customLogFName = None, allowUseOfArchive = None):
        """
        make a request for json data. repeat it _repeatFailedRequestCount times, if they fail (indefinitely if _repeatFailedRequestCount = -1)
        @param methodUrl: url on er (e.g. "/api/v1/article")
        @param paramDict: optional object containing the parameters to include in the request (e.g. { "articleUri": "123412342" }).
        @param customLogFName: potentially a file name where the request information can be logged into
        @param allowUseOfArchive: potentially override the value set when constructing EventRegistry class.
            If not None set it to boolean to determine if the request can be executed on the archive data or not
            If left to None then the value set in the EventRegistry constructor will be used
        """
        self._sleepIfNecessary()
        self._lastException = None

        self._lock.acquire()
        if self._logRequests:
            try:
                with open(customLogFName or self._requestLogFName, "a") as log:
                    if paramDict != None:
                        log.write("# " + json.dumps(paramDict) + "\n")
                    log.write(methodUrl + "\n\n")
            except Exception as ex:
                self._lastException = ex

        if paramDict == None:
            paramDict = {}
        # if we have api key then add it to the paramDict
        if self._apiKey:
            paramDict["apiKey"] = self._apiKey
        # if we want to ignore the archive, set the flag
        if allowUseOfArchive != None:
            if not allowUseOfArchive:
                paramDict["forceMaxDataTimeWindow"] = 31
        # if we didn't override the parameter then check what we've set when constructing the EventRegistry class
        elif self._allowUseOfArchive == False:
            paramDict["forceMaxDataTimeWindow"] = 31
        # if we also have some extra parameters, then set those too
        if self._extraParams:
            paramDict.update(self._extraParams)

        tryCount = 0
        self._headers = {}  # reset any past data
        returnData = None
        respInfo = None
        url = self._host + methodUrl
        while self._repeatFailedRequestCount < 0 or tryCount < self._repeatFailedRequestCount:
            tryCount += 1
            try:
                # make the request
                respInfo = self._reqSession.post(url, json = paramDict, timeout=60)
                # remember the returned headers
                self._headers = respInfo.headers
                # if we got some error codes print the error and repeat the request after a short time period
                if respInfo.status_code != 200:
                    raise Exception(respInfo.text)
                # did we get a warning. if yes, print it
                if self.getLastHeader("warning"):
                    logger.warning("=========== WARNING ===========\n%s\n===============================" % (self.getLastHeader("warning")))
                # remember the available requests
                self._dailyAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-limit", ""), val = -1)
                self._remainingAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-remaining", ""), val = -1)

                returnData = respInfo.json()
                break
            except Exception as ex:
                self._lastException = ex
                if self._verboseOutput:
                    logger.error("Event Registry exception while executing the request:")
                    logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4)))
                    self.printLastException()
                # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again
                if respInfo != None and respInfo.status_code in self._stopStatusCodes:
                    break
                # in case of the other exceptions (maybe the service is temporarily unavailable) we try to repeat the query
                logger.info("The request will be automatically repeated in 3 seconds...")
                time.sleep(5)   # sleep for X seconds on error
        self._lock.release()
        if returnData == None:
            raise self._lastException or Exception("No valid return data provided")
        return returnData
Beispiel #8
0
 def printLastException(self):
     logger.error(str(self._lastException))