def config(parsers={'ncbo':'fc5d5241-1e8e-4b44-b401-310ca39573f6', 'coreNLP':'~/corenlp'}): """ Configuring apikey for ncbo and/or installing coreNLP and start the server Input: - parsers is a dictionary consisting of pairs of parser name and its keyword or installation location Example: - config(parsers={'ncbo':'fc5d5241-1e8e-4b44-b401-310ca39573f6', 'coreNLP':'~/corenlp'}) - config() --> configuration with default value """ import json import os.path currentPath = os.path.dirname(os.path.realpath(__file__)) configFile = os.path.join(currentPath,'config.txt') # setup coreNLP server if the installation location is available if 'coreNLP' in parsers: # make sure the input is abs path, if not, convert it to abs path from os.path import expanduser parsers['coreNLP'] = expanduser(parsers['coreNLP']) if not os.path.isabs(parsers['coreNLP']): parsers['coreNLP'] = expanduser('~/' + parsers['coreNLP']) # install when it it not yet installed if not os.path.isdir(parsers['coreNLP']): print('Relax ... installing CoreNLP take minutes') import stanza stanza.install_corenlp(dir=parsers['coreNLP']) parsers['coreNLP_EP'] = 'http://localhost:9001' else: print('CoreNLP already installed') with open(configFile, 'w') as fp: json.dump(parsers, fp)
def preprocess(): try: import stanza except ImportError: os.system('pip install stanza -q') import stanza stanza.install_corenlp(dir=get_path_corenlp())
def start_server() -> CoreNLPClient: """Starts a CoreNLP server through Stanza and returns it.""" stanza.install_corenlp(dir="./stanza_corenlp") return CoreNLPClient(annotators=[ 'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse', 'coref', 'kbp', 'natlog', 'openie' ], timeout=30000, memory='16G')
def test_install_corenlp(): # we do not reset the CORENLP_HOME variable since this may impact the # client tests with tempfile.TemporaryDirectory(dir=".") as test_dir: # the download method doesn't install over existing directories shutil.rmtree(test_dir) stanza.install_corenlp(dir=test_dir) assert os.path.isdir( test_dir), "Installation destination directory not found." jar_files = [f for f in os.listdir(test_dir) \ if f.endswith('.jar') and f.startswith('stanford-corenlp')] assert len(jar_files) > 0, \ "Cannot find stanford-corenlp jar files in the installation directory." assert not os.path.exists(os.path.join(test_dir, 'corenlp.zip')), \ "Downloaded zip file was not removed."
def init_stanza(self): if self.stanza_initialized: return corenlp_dir = './corenlp' if not os.path.isdir(corenlp_dir): stanza.install_corenlp(dir=corenlp_dir) try: with CoreNLPClient(annotators=[ 'tokenize', 'ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse', 'coref' ], timeout=30000, memory='16G', be_quiet=True) as client: self.client = client self.stanza_initialized = True except AssertionError as e: print( "Du har förmodligen inte ställt in $CORENLP_HOME. Kör detta:") print("export CORENLP_HOME={}".format( os.path.abspath(corenlp_dir))) raise e
# stanfordnlp client import json from .params import * import stanza stanza.install_corenlp() def ies_of(sentence): if not NLPclient: return ts=[] for triple in sentence['openie']: s1,s2 = triple['subjectSpan'] v1,v2 = triple['relationSpan'] o1,o2 = triple['objectSpan'] #t=( (s1-1,s2-1),(v1-1,v2-1),(o1-1,o2-1)) t = ((s1, s2), (v1, v2), (o1, o2)) ts.append( t ) yield ts def deps_of(sentence): deps = [] # print('SENT',[x for x in sentence['entitymentions']]) for x in sentence['enhancedPlusPlusDependencies']: r = x['dep'] t = x['governor'] f = x['dependent'] deps.append((f - 1, r, t - 1)) return deps def lexs_of(sentence): toks = sentence['tokens'] for tok in toks:
# stanfordnlp client import json from .params import * import stanza ''' corenlp_dir = '/root/corenlp' stanza.install_corenlp(dir=corenlp_dir) import os os.environ["CORENLP_HOME"] = corenlp_dir ''' def ies_of(sentence): if not NLPclient: return ts=[] for triple in sentence['openie']: s1,s2 = triple['subjectSpan'] v1,v2 = triple['relationSpan'] o1,o2 = triple['objectSpan'] #t=( (s1-1,s2-1),(v1-1,v2-1),(o1-1,o2-1)) t = ((s1, s2), (v1, v2), (o1, o2)) ts.append( t ) yield ts def deps_of(sentence): deps = [] # print('SENT',[x for x in sentence['entitymentions']]) for x in sentence['enhancedPlusPlusDependencies']: r = x['dep'] t = x['governor'] f = x['dependent'] deps.append((f - 1, r, t - 1))
if not os.path.exists(stanford_dir): os.makedirs(stanford_dir) download(url=stanford_srparser_url, local_dir=str(stanford_dir)) log.info( f"Downloaded Stanford Shift-Reduce Constituency Parser library to '{stanford_dir}'." ) # MongoDB Java Driver. if input_params['download_mongodb']: mongodb_url = 'https://repo1.maven.org/maven2/org/mongodb/mongo-java-driver/3.12.7/mongo-java-driver-3.12.7.jar' mongodb_dir = Path(ROOT_DIR) / 'lib' / 'mongodb' if not os.path.exists(mongodb_dir): os.makedirs(mongodb_dir) download(url=mongodb_url, local_dir=str(mongodb_dir)) log.info(f"Downloaded MongoDB Java Driver to '{mongodb_dir}'.") # Word emebeddings. for model_name in ['glove', 'word2vec', 'fasttext']: if input_params[f"download_{model_name}"]: local_dir = Path(ROOT_DIR) / os.environ[ 'DATA_PATH'] / 'word_embeddings' / model_name if not os.path.exists(local_dir): os.makedirs(local_dir) vectors = download_gensim_vectors(model_name=model_name, local_dir=str(local_dir)) # CoreNLP for Stanza use. if input_params['install_corenlp']: stanford_corenlp_dir = os.environ['CORENLP_HOME'] stanza.install_corenlp(dir=stanford_corenlp_dir)