예제 #1
0
def config(parsers={'ncbo':'fc5d5241-1e8e-4b44-b401-310ca39573f6', 'coreNLP':'~/corenlp'}):
    """
        Configuring apikey for ncbo and/or
        installing coreNLP and start the server
        Input: - parsers is a dictionary consisting of pairs of parser name and
                 its keyword or installation location
        Example: - config(parsers={'ncbo':'fc5d5241-1e8e-4b44-b401-310ca39573f6', 'coreNLP':'~/corenlp'})
                 - config() --> configuration with default value
    """
    import json
    import os.path
    currentPath = os.path.dirname(os.path.realpath(__file__))
    configFile = os.path.join(currentPath,'config.txt')

    # setup coreNLP server if the installation location is available
    if 'coreNLP' in parsers:

        # make sure the input is abs path, if not, convert it to abs path
        from os.path import expanduser
        parsers['coreNLP'] = expanduser(parsers['coreNLP'])
        if not os.path.isabs(parsers['coreNLP']):
            parsers['coreNLP'] = expanduser('~/' + parsers['coreNLP'])

        # install when it it not yet installed
        if not os.path.isdir(parsers['coreNLP']):
            print('Relax ... installing CoreNLP take minutes')
            import stanza
            stanza.install_corenlp(dir=parsers['coreNLP'])
            parsers['coreNLP_EP'] = 'http://localhost:9001'
        else:
            print('CoreNLP already installed')

    with open(configFile, 'w') as fp:
        json.dump(parsers, fp)
예제 #2
0
def preprocess():
    try:
        import stanza
    except ImportError:
        os.system('pip install stanza -q')
        import stanza

    stanza.install_corenlp(dir=get_path_corenlp())
예제 #3
0
def start_server() -> CoreNLPClient:
    """Starts a CoreNLP server through Stanza and returns it."""
    stanza.install_corenlp(dir="./stanza_corenlp")
    return CoreNLPClient(annotators=[
        'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse',
        'coref', 'kbp', 'natlog', 'openie'
    ],
                         timeout=30000,
                         memory='16G')
예제 #4
0
def test_install_corenlp():
    # we do not reset the CORENLP_HOME variable since this may impact the
    # client tests
    with tempfile.TemporaryDirectory(dir=".") as test_dir:

        # the download method doesn't install over existing directories
        shutil.rmtree(test_dir)
        stanza.install_corenlp(dir=test_dir)

        assert os.path.isdir(
            test_dir), "Installation destination directory not found."
        jar_files = [f for f in os.listdir(test_dir) \
                     if f.endswith('.jar') and f.startswith('stanford-corenlp')]
        assert len(jar_files) > 0, \
            "Cannot find stanford-corenlp jar files in the installation directory."
        assert not os.path.exists(os.path.join(test_dir, 'corenlp.zip')), \
            "Downloaded zip file was not removed."
    def init_stanza(self):
        if self.stanza_initialized:
            return

        corenlp_dir = './corenlp'
        if not os.path.isdir(corenlp_dir):
            stanza.install_corenlp(dir=corenlp_dir)

        try:
            with CoreNLPClient(annotators=[
                    'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse',
                    'depparse', 'coref'
            ],
                               timeout=30000,
                               memory='16G',
                               be_quiet=True) as client:
                self.client = client
                self.stanza_initialized = True
        except AssertionError as e:
            print(
                "Du har förmodligen inte ställt in $CORENLP_HOME. Kör detta:")
            print("export CORENLP_HOME={}".format(
                os.path.abspath(corenlp_dir)))
            raise e
예제 #6
0
파일: nlp.py 프로젝트: Yifan-G/EvalCraft
# stanfordnlp client
import json
from .params import *
import stanza
stanza.install_corenlp()
def ies_of(sentence):
  if not NLPclient: return
  ts=[]
  for triple in sentence['openie']:
    s1,s2 = triple['subjectSpan']
    v1,v2 = triple['relationSpan']
    o1,o2 = triple['objectSpan']
    #t=( (s1-1,s2-1),(v1-1,v2-1),(o1-1,o2-1))
    t = ((s1, s2), (v1, v2), (o1, o2))
    ts.append( t )
  yield ts

def deps_of(sentence):
  deps = []
  # print('SENT',[x for x in sentence['entitymentions']])
  for x in sentence['enhancedPlusPlusDependencies']:
    r = x['dep']
    t = x['governor']
    f = x['dependent']
    deps.append((f - 1, r, t - 1))
  return deps


def lexs_of(sentence):
  toks = sentence['tokens']
  for tok in toks:
예제 #7
0
# stanfordnlp client
import json
from .params import *
import stanza
'''
corenlp_dir = '/root/corenlp'
stanza.install_corenlp(dir=corenlp_dir)
import os
os.environ["CORENLP_HOME"] = corenlp_dir
'''

def ies_of(sentence):
  if not NLPclient: return
  ts=[]
  for triple in sentence['openie']:
    s1,s2 = triple['subjectSpan']
    v1,v2 = triple['relationSpan']
    o1,o2 = triple['objectSpan']
    #t=( (s1-1,s2-1),(v1-1,v2-1),(o1-1,o2-1))
    t = ((s1, s2), (v1, v2), (o1, o2))
    ts.append( t )
  yield ts

def deps_of(sentence):
  deps = []
  # print('SENT',[x for x in sentence['entitymentions']])
  for x in sentence['enhancedPlusPlusDependencies']:
    r = x['dep']
    t = x['governor']
    f = x['dependent']
    deps.append((f - 1, r, t - 1))
예제 #8
0
        if not os.path.exists(stanford_dir):
            os.makedirs(stanford_dir)
        download(url=stanford_srparser_url, local_dir=str(stanford_dir))
        log.info(
            f"Downloaded Stanford Shift-Reduce Constituency Parser library to '{stanford_dir}'."
        )

    # MongoDB Java Driver.
    if input_params['download_mongodb']:
        mongodb_url = 'https://repo1.maven.org/maven2/org/mongodb/mongo-java-driver/3.12.7/mongo-java-driver-3.12.7.jar'
        mongodb_dir = Path(ROOT_DIR) / 'lib' / 'mongodb'
        if not os.path.exists(mongodb_dir):
            os.makedirs(mongodb_dir)
        download(url=mongodb_url, local_dir=str(mongodb_dir))
        log.info(f"Downloaded MongoDB Java Driver to '{mongodb_dir}'.")

    # Word emebeddings.
    for model_name in ['glove', 'word2vec', 'fasttext']:
        if input_params[f"download_{model_name}"]:
            local_dir = Path(ROOT_DIR) / os.environ[
                'DATA_PATH'] / 'word_embeddings' / model_name
            if not os.path.exists(local_dir):
                os.makedirs(local_dir)
            vectors = download_gensim_vectors(model_name=model_name,
                                              local_dir=str(local_dir))

    # CoreNLP for Stanza use.
    if input_params['install_corenlp']:
        stanford_corenlp_dir = os.environ['CORENLP_HOME']
        stanza.install_corenlp(dir=stanford_corenlp_dir)