Beispiel #1
0
def dependency_parse(raw_data):
    from nltk.parse.corenlp import CoreNLPServer

    # The server needs to know the location of the following files:
    #   - stanford-corenlp-X.X.X.jar
    #   - stanford-corenlp-X.X.X-models.jar
    STANFORD = os.path.join("..", "stanford-corenlp-full-2020-04-20")

    # Create the server
    server = CoreNLPServer(
        os.path.join(STANFORD, "stanford-corenlp-4.0.0.jar"),
        os.path.join(STANFORD, "stanford-corenlp-4.0.0-models.jar"),
    )

    # Start the server in the background
    server.start()
    from nltk.parse import CoreNLPParser
    parser = CoreNLPParser()

    new_data = []
    for example in raw_data:
        sentence, features_seq = example[0], example[-1]
        parse = next(parser.raw_parse(sentence))
        # get a few "important" neighboring words

    server.stop()
Beispiel #2
0
    def start_core_nlp_server(self):
        home = os.path.expanduser("~")
        if os.name == 'nt':
            java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe"
            download_path = os.path.join(home, "Downloads")
            STANFORD_HOME = os.path.join(download_path, "stanford-corenlp-full-2018-10-05")
        else: #'posix
            java_path ="/usr/lib/jvm/java-8-oracle/"
            download_path = os.path.join(home, "ttp_sense_python")
            STANFORD_HOME = os.path.join(download_path, "lib")

        print('Stanford_Directory: ', STANFORD_HOME)
        os.environ['JAVAHOME'] = java_path

        # # The server needs to know the location of the following files:
        # #   - stanford-corenlp-X.X.X.jar
        # #   - stanford-corenlp-X.X.X-models.jar
        # # Create the server
        server = CoreNLPServer(
            os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"),
            os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"),
            os.path.join(STANFORD_HOME, "stanford-english-corenlp-2018-10-05-models.jar"),
        )
        # # Start the server in the background
        server.start()
        print("Server Started")
Beispiel #3
0
def setup(manageServerInternally=False):
    global server

    config['isManagingServer'] = manageServerInternally

    if manageServerInternally:
        print("Starting CoreNLP server...")

        server = CoreNLPServer(
            os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"),
            os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"),
        )
        server.start()
    else:
        try:
            print("Checking connection to CoreNLP server...")

            requests.get(f'{config["coreNLPServerURL"]}/live')
        except BaseException:
            print(
                "Error connecting to CoreNLP instance! Make sure the server is running in the background."
            )
            print("The relevant command can be found in the README.")

            exit(1)

    setupQANet()
Beispiel #4
0
class CorenlpSubprocWordSplitter(CorenlpRemoteWordSplitter):
    """
    A ``WordSplitter`` that uses CoreNLP's tokenizer.
    It starts ``corenlp-server`` as a sub-process, and call it's Web API.
    """
    def __init__(
        self,
        path_to_jar: str = None,
        path_to_models_jar: str = None,
        verbose: str = False,
        java_options: str = None,
        corenlp_options: str = None,
        port: int = None,
        encoding: str = 'utf8',
    ):
        """
        Parameters
        ----------

        * For parameters from ``path_to_jar`` to ``port``, see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.
        * For parameter ``encoding``,  see https://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.CoreNLPParser
        """
        self._server = CoreNLPServer(path_to_jar, path_to_models_jar, verbose,
                                     java_options, corenlp_options, port)
        self._server.start()
        super().__init__(self._server.url, encoding)

    def __del__(self):
        self._server.stop()
Beispiel #5
0
    def startServer(self):
        java_path = "C:\\Program Files\\Java\\jdk1.8.0_201\\bin\\java.exe"
        os.environ['JAVAHOME'] = java_path

        home = os.path.expanduser("~")
        download_path = os.path.join(home, "Downloads")
        print(download_path)
        # # The server needs to know the location of the following files:
        # #   - stanford-corenlp-X.X.X.jar
        # #   - stanford-corenlp-X.X.X-models.jar
        STANFORD = os.path.join(download_path,
                                "stanford-corenlp-full-2018-10-05")

        # # Create the server
        server = CoreNLPServer(
            os.path.join(STANFORD, "stanford-corenlp-3.9.2-models.jar"),
            os.path.join(STANFORD, "stanford-corenlp-3.9.2.jar"),
            os.path.join(STANFORD,
                         "stanford-english-corenlp-2018-10-05-models"),
        )

        # # Start the server in the background
        server.start()
        print("Server Started")

        self.stanfordCoreNLP = StanfordCoreNLP('http://localhost:9000')

        return self.stanfordCoreNLP
Beispiel #6
0
def server():
    print('Starting CoreNLP server...')
    serv = CoreNLPServer(path_to_jar=config.CORENLP_JAR,
                         path_to_models_jar=config.CORENLP_MODELS_JAR)
    try:
        serv.start()
        print('Server started.')
        while True:
            pass
    except KeyboardInterrupt:
        pass
    except Exception as e:
        print(e)
    finally:
        print('Stopping server...')
        serv.stop()
class CoreNLP:
    def __init__(self, args):
        self.context = dict()
        self.server = None
        self.set_system_env(*args)

    def set_system_env(self, *args):
        idx = 1
        while idx < len(args):
            if args[idx] == '--stanford':
                idx += 1
                standford_path = args[idx]
                self.context['path_to_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2.jar')
                self.context['path_to_models_jar'] = os.path.join(standford_path, 'stanford-corenlp-3.9.2-models.jar')
                print('corenlp jar:', self.context['path_to_jar'])
                print('corenlp models jar:', self.context['path_to_models_jar'])

            elif args[idx] == '--java':
                idx += 1
                java_path = args[idx]
                os.environ['JAVAHOME'] = java_path
                print('java path:', java_path)

            idx += 1

    def start_server(self):
        self.server = CoreNLPServer(**self.context)
        self.server.start()

    def stop_server(self):
        self.server.stop()

    def parse_tree(self, s):
        parser = CoreNLPParser()

        parse = next(parser.raw_parse(s))
        # parse.draw()

        return parse

    def dependency_parse_tree(self, s):
        parser = CoreNLPDependencyParser()

        parse = next(parser.raw_parse(s))

        return parse
Beispiel #8
0
    def start_CoreNLPServer(self):
        url = 'http://localhost:9000'
        status_code = 0
        try:
            status_code = urllib.request.urlopen(url).getcode()
        except:
            pass

        if status_code != 200:
            print('CoreNLPServer is starting {}'.format(url))
            try:
                os.environ['CLASSPATH'] = self.model_path
                server = CoreNLPServer(port=9000)
                server.start()

                status_code = urllib.request.urlopen(url).getcode()
                print('server started {}'.format(status_code))

            except Exception as e:
                print(url, e)
                raise Exception(e)
Beispiel #9
0
    def start_core_nlp_server(self):
        os.environ['JAVAHOME'] = self.JAVA_HOME
        HOMEDIR = os.path.expanduser("~")
        DOWNLOAD_HOME = os.path.join(HOMEDIR, self.DOWNLOAD_HOME)
        STANFORD_HOME = os.path.join(DOWNLOAD_HOME, self.STANFORD_HOME)

        print('Stanford_Directory: ', STANFORD_HOME)

        # # The server needs to know the location of the following files:
        # #   - stanford-corenlp-X.X.X.jar
        # #   - stanford-corenlp-X.X.X-models.jar
        # # Create the server
        server = CoreNLPServer(
            os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2-models.jar"),
            os.path.join(STANFORD_HOME, "stanford-corenlp-3.9.2.jar"),
            os.path.join(STANFORD_HOME,
                         "stanford-english-corenlp-2018-10-05-models.jar"),
        )
        # # Start the server in the background
        server.start()
        print("Server Started")
Beispiel #10
0
    def setup(self):
        url = settings.CORENLP_URL

        if url is None:
            server = CoreNLPServer(
               settings.CORENLP_PATH,
               settings.CORENLP_MODEL_PATH,
            )
            server.start()

            self.server = server
            url = server.url

        else:
            print("[TreeParser] Using existing CoreNLP Server...")

        self.parser = CoreNLPParser(url=url)

        # maybe separated with another class...
        self.dependency_parser = CoreNLPDependencyParser(url=url)

        return self.parser
from clean_data import process_sentence

##2017 12 3 using a different parser to parse sentence
'''
from nltk.parse.stanford import StanfordDependencyParser
path_to_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser.jar'
path_to_models_jar = '/Users/collin/stanford/stanford-parser-full-2017-06-09/stanford-parser-3.8.0-models.jar'
dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
'''

from nltk.parse.corenlp import CoreNLPServer, CoreNLPDependencyParser
path_to_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0.jar'
path_to_models_jar = '/Users/collin/stanford/stanford-corenlp-full-2017-06-09/stanford-corenlp-3.8.0-models.jar'
server = CoreNLPServer(path_to_jar=path_to_jar,
                       path_to_models_jar=path_to_models_jar)
server.start()
dependency_parser = CoreNLPDependencyParser()

stemmer = SnowballStemmer('english')


def stem(w):
    return stemmer.stem(w)


DR_one = ['nsubj', 'dobj', 'xsubj', 'csubj', 'nmod', 'iobj', 'xcomp']
DR_two = ['amod']
#DR_two = ['nsubj','dobj','xsubj','csubj','nsubjpass','nmod','iobj']
DR_three = ['conj']
DR = DR_one + DR_three
class CoreNLPSentenceAnalyzer():
    """
    A sentence analyzer based on Stanford CoreNLP.

    Refernces:
        The CoreNLP Syntax Parser
            https://bbengfort.github.io/snippets/2018/06/22/corenlp-nltk-parses.html
        Penn Treebank II Tags
            https://gist.github.com/nlothian/9240750
    """
    def __init__(self):
        self.lab_set = set()

    def init_server(self):
        STANDFORD = os.path.join("stanford-corenlp-full-2018-10-05")
        self.server = CoreNLPServer(
            os.path.join(STANDFORD, "stanford-corenlp-3.9.2.jar"),
            os.path.join(STANDFORD, "stanford-corenlp-3.9.2-models.jar"))
        self.server.start()
        self.parser = CoreNLPParser()

    def stop_server(self):
        self.server.stop()

    def parse_syntax(self, sent):
        return next(self.parser.raw_parse(sent))

    def _collect_labels(self, node):
        """
        Collect labels in the given node recursively. This method should not be invoked directly but done by collect_labels.
        """
        try:
            self.lab_result.append(node.label())
        except AttributeError:
            return
        for nn in node:
            self._collect_labels(nn)
        return

    def collect_labels(self, node):
        """
        Collect all labels in a tree starting from the given node.
        """
        self.lab_result = []  # used to collect labels in the recursion
        self._collect_labels(node)
        lab_counter = Counter(self.lab_result)

        # Keep the tags we have seen so far
        self.lab_set = self.lab_set.union(lab_counter.keys())

        return lab_counter

    def get_lab_series(self, lab_counter_list):
        """
        Convert and merge all lab_counters in the given list (the result of "collect_labels") into a series by using tags which have been seen so far (self.lab_set).
        """
        rt = pd.DataFrame(columns=self.lab_set)
        for lab_counter in lab_counter_list:
            rt = rt.append(pd.Series(lab_counter, index=self.lab_set),
                           ignore_index=True)
        rt = rt.add_prefix('penn_')
        return rt.sum()
Beispiel #13
0
class Summarizer:
    """
    Summarizer class implementing opinion-feature extraction. Uses Stanford CoreNLP dependency parser.

    Attributes:
    server (CoreNLPServer): CoreNLP server for accessing Stanford CoreNLP services.
    parser (CoreNLPDependencyParser): CoreNLP dependency parser.

    """
    def __init__(self, jar_path, models_jar_path):
        """
        The constructor for Summarizer class.

        Parameters:
        jar_path (str): Filepath to Stanford CoreNLP .jar file.
        models_jar_path (str): Filepath to Stanford CoreNLP models .jar file.

        """
        logging.info('Starting CoreNLP server...')
        self.server = CoreNLPServer(path_to_jar=jar_path,
                                    path_to_models_jar=models_jar_path)
        try:
            self.server.start()
            logging.info('CoreNLP server started.')
        # CoreNLPServerError is thrown when a server is already running
        except CoreNLPServerError:
            logging.warning('CoreNLP server is already running.')
        self.parser = CoreNLPDependencyParser()

    def summarize(self, text):
        """
        Summarizes a review. Extracts opinion-feature pairs from it.

        Parameters:
        text (str): Review text.

        Returns:
        Summary: List of opinion-feature pairs extracted from the review text.

        """
        try:
            parse = next(self.parser.raw_parse(text))
        # An HTTPError raised by the CoreNLP server is related to unrecognized characters in the review text
        except HTTPError:
            logging.warning(f'Review skipped: {text}')
            return []

        # Search dependency parsing result to find "nsubj" or "amod" tags
        summary = list()
        for governor, dep, dependent in parse.triples():
            if dep == 'nsubj':
                # Look if the nominal subject is noun and if it is modified by an adjective
                if governor[1] == 'JJ' and dependent[1] in {'NN', 'NNS'}:
                    summary.append((governor[0].lower(), dependent[0].lower()))
            elif dep == 'amod':
                # Look if the adjective is linked to a noun
                if dependent[1] == 'JJ' and governor[1] in {'NN', 'NNS'}:
                    summary.append((dependent[0].lower(), governor[0].lower()))
        return summary

    def stop(self):
        """
        Stops the CoreNLP server of the summarizer object.

        """
        self.server.stop()
        logging.info('CoreNLP server stopped.')