Beispiel #1
0
    def __init__(self):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        print start_corenlp
        if VERBOSE:
            logger.info(start_corenlp)
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=5,
                           force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=20)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        self.corenlp.expect("done.", timeout=200)  # Loading PCFG (~3sec)
        pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        pbar.finish()
Beispiel #2
0
    def _spawn_corenlp(self):
        if VERBOSE:
            print self.start_corenlp
        self.corenlp = pexpect.spawn(self.start_corenlp,
                                     timeout=60,
                                     maxread=8192,
                                     searchwindowsize=80)

        # show progress bar while loading the models
        if VERBOSE:
            widgets = ['Loading Models: ', Fraction()]
            pbar = ProgressBar(widgets=widgets, maxval=5,
                               force_update=True).start()
            # Model timeouts:
            # pos tagger model (~5sec)
            # NER-all classifier (~33sec)
            # NER-muc classifier (~60sec)
            # CoNLL classifier (~50sec)
            # PCFG (~3sec)
            timeouts = [20, 200, 600, 600, 20]
            for i in xrange(5):
                self.corenlp.expect("done.", timeout=timeouts[i])  # Load model
                pbar.update(i + 1)
            self.corenlp.expect("Entering interactive shell.")
            pbar.finish()

        # interactive shell
        self.corenlp.expect("\nNLP> ")
    def __init__(self,
                 corenlp_path=DIRECTORY,
                 memory="3g",
                 properties='default.properties'):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """

        # spawn the server
        start_corenlp = init_corenlp_command(corenlp_path, memory, properties)
        if VERBOSE:
            print start_corenlp
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        if VERBOSE:
            widgets = ['Loading Models: ', Fraction()]
            pbar = ProgressBar(widgets=widgets, maxval=5,
                               force_update=True).start()
            # Model timeouts:
            # pos tagger model (~5sec)
            # NER-all classifier (~33sec)
            # NER-muc classifier (~60sec)
            # CoNLL classifier (~50sec)
            # PCFG (~3sec)
            timeouts = [20, 200, 600, 600, 20]
            for i in xrange(5):
                self.corenlp.expect("done.", timeout=timeouts[i])  # Load model
                pbar.update(i + 1)
            self.corenlp.expect("Entering interactive shell.")
            pbar.finish()

        # interactive shell
        self.corenlp.expect("\nNLP> ", timeout=3)
 def __init__(self, corenlp_path=None):
     """
     Checks the location of the jar files.
     Spawns the server as a process.
     """
     jars = ["stanford-corenlp-3.4.1.jar",
             #"stanford-corenlp-3.4.1-models.jar",
             "stanford-chinese-corenlp-2014-02-24-models.jar", # add chinese models
             #"joda-time.jar",
             #"xom.jar",
             #"jollyday.jar"
            ]
    
     # if CoreNLP libraries are in a different directory,
     # change the corenlp_path variable to point to them
     if not corenlp_path:
         #corenlp_path = "./stanford-corenlp-full-2014-08-27/"
         corenlp_path = "/home/kqc/tools/stanford-corenlp-full-2014-08-27/" # my own corenlp dir
     
     java_path = "java"
     classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
     # include the properties file, so you can change defaults
     # but any changes in output format will break parse_parser_results()
     props = "-props StanfordCoreNLP-chinese.properties"  # for chinese
     #props = "-props default.properties" 
     
     # add and check classpaths
     jars = [corenlp_path + jar for jar in jars]
     for jar in jars:
         if not os.path.exists(jar):
             logger.error("Error! Cannot locate %s" % jar)
             sys.exit(1)
     
     # spawn the server
     #start_corenlp = "%s -Xmx1800m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props)
     start_corenlp = "%s -Xmx3g -cp %s %s %s" % (java_path, ':'.join(jars), classname, props) # for chinese
     
     if VERBOSE: 
         logger.debug(start_corenlp)
     self.corenlp = pexpect.spawnu(start_corenlp, encoding='utf8')
     
     # show progress bar while loading the models
     widgets = ['Loading Models: ', Fraction()]
     pbar = ProgressBar(widgets=widgets, maxval=5, force_update=True).start()
     # increase the timeout setting for chinese
     self.corenlp.expect(u"done.", timeout=2000) # Load pos tagger model (~5sec)
     pbar.update(1)
     self.corenlp.expect(u"done.", timeout=2000) # Load NER-all classifier (~33sec)
     pbar.update(2)
     self.corenlp.expect(u"done.", timeout=6000) # Load NER-muc classifier (~60sec)
     pbar.update(3)
     self.corenlp.expect(u"done.", timeout=6000) # Load CoNLL classifier (~50sec)
     pbar.update(4)
     self.corenlp.expect(u"done.", timeout=2000) # Loading PCFG (~3sec)
     pbar.update(5)
     self.corenlp.expect(u"Entering interactive shell.")
     pbar.finish()
Beispiel #5
0
    def setup(self):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = [
            "stanford-corenlp-3.2.0.jar", "stanford-corenlp-3.2.0-models.jar",
            "joda-time.jar", "xom.jar", "jollyday.jar"
        ]

        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        corenlp_path = os.path.relpath(__file__).split(
            '/')[0] + "/stanford-corenlp-full-2013-06-20/"
        #corenlp_path = "stanford-corenlp-full-2013-06-20/"

        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        props = "-props " + os.path.relpath(__file__).split(
            '/')[0] + "/default.properties"

        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                print "Error! Cannot locate %s" % jar
                sys.exit(1)

        #Change from ':' to ';'
        # spawn the server
        start_corenlp = "%s -Xmx2500m -cp %s %s %s" % (
            java_path, ':'.join(jars), classname, props)
        if VERBOSE: print start_corenlp
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=4,
                           force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=20)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        #        self.corenlp.expect("done.", timeout=200) # Loading PCFG (~3sec)
        #        pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        pbar.finish()
    def __init__(self, corenlp_path=None):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = [
            "stanford-corenlp-3.6.0.jar", "stanford-corenlp-3.6.0-models.jar",
            "joda-time.jar", "xom.jar", "jollyday.jar", "ejml-0.23.jar",
            "slf4j-api.jar", "slf4j-simple.jar"
        ]

        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        if not corenlp_path:
            corenlp_path = "./stanford-corenlp-full-2015-12-09/"

        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        props = "-props default.properties"

        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                logger.error("Error! Cannot locate %s" % jar)
                sys.exit(1)

        # spawn the server
        start_corenlp = "%s -Xmx3600m -cp %s %s %s" % (
            java_path, ':'.join(jars), classname, props)
        if VERBOSE:
            logger.debug(start_corenlp)
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=5,
                           force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=20)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        self.corenlp.expect("done.", timeout=200)  # Loading PCFG (~3sec)
        pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        pbar.finish()
    def __init__(self,
                 corenlp_path="stanford-corenlp-full-2013-04-04/",
                 memory="3g"):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """

        # spawn the server
        start_corenlp = init_corenlp_command(corenlp_path, memory)
        if VERBOSE: print start_corenlp
        self.corenlp = pexpect.spawn(start_corenlp)

        # show progress bar while loading the models
        if VERBOSE:
            widgets = ['Loading Models: ', Fraction()]
            pbar = ProgressBar(widgets=widgets, maxval=5,
                               force_update=True).start()
        self.corenlp.expect("done.",
                            timeout=20)  # Load pos tagger model (~5sec)
        if VERBOSE: pbar.update(1)
        self.corenlp.expect("done.",
                            timeout=200)  # Load NER-all classifier (~33sec)
        if VERBOSE: pbar.update(2)
        self.corenlp.expect("done.",
                            timeout=600)  # Load NER-muc classifier (~60sec)
        if VERBOSE: pbar.update(3)
        self.corenlp.expect("done.",
                            timeout=600)  # Load CoNLL classifier (~50sec)
        if VERBOSE: pbar.update(4)
        self.corenlp.expect("done.", timeout=200)  # Loading PCFG (~3sec)
        if VERBOSE: pbar.update(5)
        self.corenlp.expect("Entering interactive shell.")
        if VERBOSE: pbar.finish()

        # interactive shell
        self.corenlp.expect("\nNLP> ", timeout=3)
Beispiel #8
0
    def __init__(self, corenlp_path=None):
        """
        Checks the location of the jar files.
        Spawns the server as a process.
        """
        jars = [
            "stanford-corenlp-2017-04-14-build.jar",
            "stanford-corenlp-models-current.jar",
            "stanford-chinese-corenlp-models-current.jar",
            #"stanford-english-corenlp-models-current.jar",
            #"stanford-english-kbp-corenlp-models-current.jar",
            "joda-time.jar",
            "xom.jar",
            "jollyday.jar"
        ]

        # if CoreNLP libraries are in a different directory,
        # change the corenlp_path variable to point to them
        if not corenlp_path:
            corenlp_path = "./CoreNLP/"

        java_path = "java"
        classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
        # include the properties file, so you can change defaults
        # but any changes in output format will break parse_parser_results()
        #props = "-props default.properties"
        props = "-props StanfordCoreNLP-chinese.properties"

        # add and check classpaths
        jars = [corenlp_path + jar for jar in jars]
        for jar in jars:
            if not os.path.exists(jar):
                logger.error("Error! Cannot locate %s" % jar)
                sys.exit(1)

        # spawn the server
        start_corenlp = "%s -Xmx5g -cp %s %s %s" % (java_path, ':'.join(jars),
                                                    classname, props)
        #start_corenlp = 'java -Xmx100g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer  -serverProperties StanfordCoreNL-chinese.properties -port 14444 -timeout 100000'
        if VERBOSE:
            logger.debug(start_corenlp)
        self.corenlp = pexpect.spawnu(start_corenlp)

        # show progress bar while loading the models
        widgets = ['Loading Models: ', Fraction()]
        pbar = ProgressBar(widgets=widgets, maxval=4,
                           force_update=True).start()
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=200)  # Load pos tagger model (~5sec)
        pbar.update(1)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=2000)  # Load NER-all classifier (~33sec)
        pbar.update(2)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=6000)  # Load NER-muc classifier (~60sec)
        pbar.update(3)
        self.corenlp.expect([u"done.", pexpect.EOF],
                            timeout=6000)  # Load CoNLL classifier (~50sec)
        pbar.update(4)
        #self.corenlp.expect([u"done.", pexpect.EOF], timeout=2000) # Loading PCFG (~3sec)
        #pbar.update(5)
        self.corenlp.expect([u"Entering interactive shell.", pexpect.EOF])
        pbar.finish()