def _getNextArticleBatch(self): """download next batch of events based on the event uris in the uri list""" eventUri = self.queryParams["eventUri"] # move to the next page to download self._articlePage += 1 # if we have already obtained all pages, then exit if self._totalPages != None and self._articlePage > self._totalPages: return if self._er._verboseOutput: logger.debug("Downloading article page %d from event %s" % (self._articlePage, eventUri)) self.setRequestedResult( RequestEventArticles(page=self._articlePage, sortBy=self._articlesSortBy, sortByAsc=self._articlesSortByAsc, returnInfo=self._returnInfo, **self.queryParams)) res = self._er.execQuery(self) if "error" in res: logger.error(res["error"]) else: self._totalPages = res.get(eventUri, {}).get("articles", {}).get("pages", 0) arts = res.get(eventUri, {}).get("articles", {}).get("results", []) self._articleList.extend(arts)
def count(self, eventRegistry): """ return the number of events that match the criteria """ self.setRequestedResult(RequestEventsInfo()) res = eventRegistry.execQuery(self) if "error" in res: logger.error(res["error"]) count = res.get("events", {}).get("totalResults", 0) return count
def count(self, eventRegistry): """ return the number of articles that match the criteria @param eventRegistry: instance of EventRegistry class. used to obtain the necessary data """ self.setRequestedResult(RequestEventArticles(**self.queryParams)) res = eventRegistry.execQuery(self) if "error" in res: logger.error(res["error"]) count = res.get(self.queryParams["eventUri"], {}).get("articles", {}).get("totalResults", 0) return count
def jsonRequestAnalytics(self, methodUrl, paramDict): """ call the analytics service to execute a method like annotation, categorization, etc. @param methodUrl: api endpoint url to call @param paramDict: a dictionary with values to send to the api endpoint """ if self._apiKey: paramDict["apiKey"] = self._apiKey self._lock.acquire() returnData = None respInfo = None self._lastException = None self._headers = {} # reset any past data tryCount = 0 while self._repeatFailedRequestCount < 0 or tryCount < self._repeatFailedRequestCount: tryCount += 1 try: url = self._hostAnalytics + methodUrl # make the request respInfo = self._reqSession.post(url, json = paramDict, timeout=60) # remember the returned headers self._headers = respInfo.headers # if we got some error codes print the error and repeat the request after a short time period if respInfo.status_code != 200: raise Exception(respInfo.text) returnData = respInfo.json() break except Exception as ex: self._lastException = ex if self._verboseOutput: logger.error("Event Registry Analytics exception while executing the request:") logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) self.printLastException() # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again if respInfo != None and respInfo.status_code in self._stopStatusCodes: break logger.info("The request will be automatically repeated in 3 seconds...") time.sleep(5) # sleep for X seconds on error self._lock.release() if returnData == None: raise self._lastException or Exception("No valid return data provided") return returnData
def _getNextEventBatch(self): """download next batch of events based on the event uris in the uri list""" self._eventPage += 1 # if we have already obtained all pages, then exit if self._totalPages != None and self._eventPage > self._totalPages: return self.setRequestedResult( RequestEventsInfo(page=self._eventPage, count=self._eventBatchSize, sortBy=self._sortBy, sortByAsc=self._sortByAsc, returnInfo=self._returnInfo)) # download articles and make sure that we set the same archive flag as it was returned when we were processing the uriList request if self._er._verboseOutput: logger.debug("Downloading event page %d..." % (self._eventPage)) res = self._er.execQuery(self) if "error" in res: logger.error("Error while obtaining a list of events: " + res["error"]) else: self._totalPages = res.get("events", {}).get("pages", 0) results = res.get("events", {}).get("results", []) self._eventList.extend(results)
def _getNextArticleBatch(self): """download next batch of articles based on the article uris in the uri list""" # try to get more uris, if none self._articlePage += 1 # if we have already obtained all pages, then exit if self._totalPages != None and self._articlePage > self._totalPages: return self.setRequestedResult( RequestArticlesInfo(page=self._articlePage, sortBy=self._sortBy, sortByAsc=self._sortByAsc, returnInfo=self._returnInfo)) if self._er._verboseOutput: logger.debug("Downloading article page %d..." % (self._articlePage)) res = self._er.execQuery(self) if "error" in res: logger.error("Error while obtaining a list of articles: " + res["error"]) else: self._totalPages = res.get("articles", {}).get("pages", 0) results = res.get("articles", {}).get("results", []) self._articleList.extend(results)
def jsonRequest(self, methodUrl, paramDict, customLogFName = None, allowUseOfArchive = None): """ make a request for json data. repeat it _repeatFailedRequestCount times, if they fail (indefinitely if _repeatFailedRequestCount = -1) @param methodUrl: url on er (e.g. "/api/v1/article") @param paramDict: optional object containing the parameters to include in the request (e.g. { "articleUri": "123412342" }). @param customLogFName: potentially a file name where the request information can be logged into @param allowUseOfArchive: potentially override the value set when constructing EventRegistry class. If not None set it to boolean to determine if the request can be executed on the archive data or not If left to None then the value set in the EventRegistry constructor will be used """ self._sleepIfNecessary() self._lastException = None self._lock.acquire() if self._logRequests: try: with open(customLogFName or self._requestLogFName, "a") as log: if paramDict != None: log.write("# " + json.dumps(paramDict) + "\n") log.write(methodUrl + "\n\n") except Exception as ex: self._lastException = ex if paramDict == None: paramDict = {} # if we have api key then add it to the paramDict if self._apiKey: paramDict["apiKey"] = self._apiKey # if we want to ignore the archive, set the flag if allowUseOfArchive != None: if not allowUseOfArchive: paramDict["forceMaxDataTimeWindow"] = 31 # if we didn't override the parameter then check what we've set when constructing the EventRegistry class elif self._allowUseOfArchive == False: paramDict["forceMaxDataTimeWindow"] = 31 # if we also have some extra parameters, then set those too if self._extraParams: paramDict.update(self._extraParams) tryCount = 0 self._headers = {} # reset any past data returnData = None respInfo = None url = self._host + methodUrl while self._repeatFailedRequestCount < 0 or tryCount < self._repeatFailedRequestCount: tryCount += 1 try: # make the request respInfo = self._reqSession.post(url, json = paramDict, timeout=60) # remember the returned headers self._headers = respInfo.headers # if we got some error codes print the error and repeat the request after a short time period if respInfo.status_code != 200: raise Exception(respInfo.text) # did we get a warning. if yes, print it if self.getLastHeader("warning"): logger.warning("=========== WARNING ===========\n%s\n===============================" % (self.getLastHeader("warning"))) # remember the available requests self._dailyAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-limit", ""), val = -1) self._remainingAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-remaining", ""), val = -1) returnData = respInfo.json() break except Exception as ex: self._lastException = ex if self._verboseOutput: logger.error("Event Registry exception while executing the request:") logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) self.printLastException() # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again if respInfo != None and respInfo.status_code in self._stopStatusCodes: break # in case of the other exceptions (maybe the service is temporarily unavailable) we try to repeat the query logger.info("The request will be automatically repeated in 3 seconds...") time.sleep(5) # sleep for X seconds on error self._lock.release() if returnData == None: raise self._lastException or Exception("No valid return data provided") return returnData
def printLastException(self): logger.error(str(self._lastException))