Esempio n. 1
0
class CoreNlpClient:
    SERVER_URL = 'http://localhost:9000'
    COMMAND_PATTERN = 'java -Xmx{} -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLP {}'
    SERVER_COMMAND_PATTERN = 'java -Xmx{} -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -timeout {}'

    def __init__(self, cwd, memory='4g', timeout=15000, verbose=False):
        self._cwd = cwd
        self._memory = memory
        self._timeout = timeout
        self._verbose = verbose

        self._process = None
        self._http_client = None

    def start(self):
        command = self.SERVER_COMMAND_PATTERN.format(self._memory,
                                                     self._timeout)
        self._process = self._open_process(command, wait=False)
        self._wait_for_server()
        self._http_client = StanfordCoreNLP(self.SERVER_URL)

    def stop(self):
        self._process.kill()

    def annotate(self, text, annotators, properties=None, http=False):
        # Build properties
        properties = properties or {}
        properties = {
            **properties,
            **{
                'outputFormat': 'json',
                'annotators': ','.join(annotators)
            }
        }

        # Run annotators via HTTP request or command line execution
        if http:
            if self._http_client is None:
                raise Exception('CoreNLP client is not running!')
            result = self._http_client.annotate(text, properties)
        else:
            result = self._annotate_cmd(text, properties)

        # Raise exceptions thrown by CoreNLP
        if not isinstance(result, dict):
            raise Exception(result)

        return result

    def _annotate_cmd(self, text, properties):
        with NamedTemporaryFile(mode='w') as text_file:
            # Write text to temporary file so that is can be processed by
            # CoreNLP process
            text_file.write(text)
            text_file.flush()

            # Run CoreNLP as subprocess
            self._run_cmd({**properties, **{'file': text_file.name}})

            # Read results and delete result file afterwards
            result_file_name = os.path.join(
                self._cwd,
                os.path.basename(text_file.name) + '.json')
            with open(result_file_name) as result_file:
                try:
                    results = json.load(result_file)
                except json.decoder.JSONDecodeError:
                    results = result_file.read()
            os.remove(result_file_name)

            return results

    def semgrex(self, text, pattern, filter=False):
        if self._http_client is None:
            raise Exception('CoreNLP client is not running!')

        return self._http_client.semgrex(text, pattern=pattern, filter=filter)

    def _run_cmd(self, properties):
        arguments = ' '.join(
            ['-{} {}'.format(key, value) for key, value in properties.items()])
        command = self.COMMAND_PATTERN.format(self._memory, arguments)
        return self._open_process(command)

    def _open_process(self, command, wait=True):
        output = None if self._verbose else subprocess.DEVNULL
        process = subprocess.Popen(command,
                                   cwd=self._cwd,
                                   shell=True,
                                   stdout=output,
                                   stderr=output)
        if wait:
            process.wait()
        else:
            return process

    def _wait_for_server(self):
        ready = False
        try:
            response = requests.get(self.SERVER_URL + '/ready')
            ready = response.status_code == 200 and response.text.startswith(
                'ready')
        except ConnectionError:
            pass

        if not ready:
            time.sleep(1)
            self._wait_for_server()
    corenlp_jars=["E:/software/stanford-corenlp-full-2017-06-09/to/stanford-corenlp-full-2015-04-20/*", "E:/software/to/stanford-srparser-2014-10-23-models.jar"])
'''

res = nlp.annotate("""I love you. I hate him. You are nice. He is dumb.
                   The best way to hope for any chance of enjoying 
                   this film is by lowering your expectations.The show starts out as competent 
                   but unremarkable and gradually grows in to a considerable power. will kill you. don't come. fool""",
                   properties={
                       'annotators': 'sentiment',
                       'outputFormat': 'json'
                   })
#print(res)
#The average sentiment of tweets is between Neutral (2) and Negative (1), the range is from
#VeryNegative (0) to VeryPositive (4) which appear to be quite rare.
for s in res["sentences"]:
    print("%d: '%s': %s %s" % (s["index"], " ".join(
        [t["word"]
         for t in s["tokens"]]), s["sentimentValue"], s["sentiment"]))

text = ('Pusheen and Smitha walked along the beach. Pusheen wanted to surf,'
        'but fell off the surfboard.')
output = nlp.annotate(text,
                      properties={
                          'annotators': 'tokenize,ssplit,pos,depparse,parse',
                          'outputFormat': 'json'
                      })
print(output['sentences'][0]['parse'])
output = nlp.tokensregex(text, pattern='/Pusheen|Sumitha/', filter=False)
print(output)
output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False)
print(output)
Esempio n. 3
0
from pycorenlp import StanfordCoreNLP

if __name__ == '__main__':
    nlp = StanfordCoreNLP('http://localhost:9000')
    text = (
        'Pusheen and Smitha walked along the beach. Pusheen wanted to surf,'
        'but fell off the surfboard.')
    output = nlp.annotate(text, properties={
        'annotators': 'tokenize,ssplit,pos,depparse,parse',
        'outputFormat': 'json'
    })
    print(output['sentences'][0]['parse'])
    output = nlp.tokensregex(text, pattern='/Pusheen|Smitha/', filter=False)
    print(output)
    output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False)
    print(output)