def wiki_request(params, action="query"): params['format'] = 'json' params['formatversion'] = 2 params['action'] = action params['utf8'] = '' try: s = Session() s.trust_env = False r = s.get(URL, params=params, headers=HEADER).json() except requests.ConnectionError as cer: print("Connection Error") print(cer) r = wiki_request(params) except JSONDecodeError: return None return r
def annotate(self, text, annotators="tokenize,ssplit,pos", pattern=None, runnr=1): assert isinstance(text, str) properties = { "annotators": annotators, # Setting enforceRequirements to skip some annotators and make the process faster "enforceRequirements": "true", 'timeout': 6000000000000, 'tokenize.options': 'untokenizable=noneDelete' } params = dict() params['properties'] = str(properties) if pattern is not None: params['pattern'] = pattern try: with self.session.get(self.server_url) as req: data = text.encode('utf8') rs = Session() rs.trust_env = False r = rs.post(self.server_url, params=params, data=data, headers={'Connection': 'close'}) if r.status_code == 500: print(r.content) raise Exception500 output = r.json() # output = json.loads(r.text, encoding='utf-8', strict=True) except: print("Caught Exception") if runnr > 10: print(text.encode('utf8')) return {} else: return self.annotate(text, properties, runnr=runnr + 1) return output