Exemple #1
0
def wiki_request(params, action="query"):
    params['format'] = 'json'
    params['formatversion'] = 2
    params['action'] = action
    params['utf8'] = ''
    try:
        s = Session()
        s.trust_env = False
        r = s.get(URL, params=params, headers=HEADER).json()
    except requests.ConnectionError as cer:
        print("Connection Error")
        print(cer)
        r = wiki_request(params)
    except JSONDecodeError:
        return None
    return r
    def annotate(self,
                 text,
                 annotators="tokenize,ssplit,pos",
                 pattern=None,
                 runnr=1):
        assert isinstance(text, str)

        properties = {
            "annotators": annotators,
            # Setting enforceRequirements to skip some annotators and make the process faster
            "enforceRequirements": "true",
            'timeout': 6000000000000,
            'tokenize.options': 'untokenizable=noneDelete'
        }
        params = dict()
        params['properties'] = str(properties)

        if pattern is not None:
            params['pattern'] = pattern

        try:
            with self.session.get(self.server_url) as req:
                data = text.encode('utf8')
                rs = Session()
                rs.trust_env = False
                r = rs.post(self.server_url,
                            params=params,
                            data=data,
                            headers={'Connection': 'close'})

                if r.status_code == 500:
                    print(r.content)
                    raise Exception500
                output = r.json()
                # output = json.loads(r.text, encoding='utf-8', strict=True)
        except:
            print("Caught Exception")
            if runnr > 10:
                print(text.encode('utf8'))
                return {}
            else:
                return self.annotate(text, properties, runnr=runnr + 1)
        return output