def get_authenticated_spark_HC(HC_LICENSE, HC_SECRET, AWS_ACCESS_KEY, AWS_SECRET_KEY, gpu): import_or_install_licensed_lib(HC_SECRET, 'healthcare') authenticate_enviroment_HC(HC_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY) import sparknlp import sparknlp_jsl params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } if is_env_pyspark_2_3(): return sparknlp_jsl.start(HC_SECRET, spark23=True, gpu=gpu, public=sparknlp.version(), params=params) if is_env_pyspark_2_4(): return sparknlp_jsl.start(HC_SECRET, spark24=True, gpu=gpu, public=sparknlp.version(), params=params) if is_env_pyspark_3_0() or is_env_pyspark_3_1(): return sparknlp_jsl.start(HC_SECRET, gpu=gpu, public=sparknlp.version(), params=params) raise ValueError( f"Current Spark version {get_pyspark_version()} not supported!")
def get_authenticated_spark( SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, JSL_SECRET, gpu=False, ): """ Authenticates enviroment if not already done so and returns Spark Context with Healthcare Jar loaded 0. If no Spark-NLP-Healthcare, install it via PyPi 1. If not auth, run authenticate_enviroment() """ authenticate_enviroment(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) install_and_import_healthcare(JSL_SECRET) import sparknlp_jsl if is_env_pyspark_2_3(): return sparknlp_jsl.start(JSL_SECRET, spark23=True, gpu=gpu) if is_env_pyspark_2_4(): return sparknlp_jsl.start(JSL_SECRET, spark24=True, gpu=gpu) if is_env_pyspark_3_0() or is_env_pyspark_3_1(): return sparknlp_jsl.start(JSL_SECRET, gpu=gpu) print(f"Current Spark version {get_pyspark_version()} not supported!") raise ValueError
def get_authenticated_spark( SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, JSL_SECRET, gpu=False, ): """ Authenticates environment if not already done so and returns Spark Context with Healthcare Jar loaded 0. If no Spark-NLP-Healthcare, install it via PyPi 1. If not auth, run authenticate_enviroment() """ import sparknlp authenticate_enviroment_HC(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) import_or_install_licensed_lib(JSL_SECRET) import sparknlp_jsl params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } if is_env_pyspark_2_3(): return sparknlp_jsl.start(JSL_SECRET, spark23=True, gpu=gpu, params=params) if is_env_pyspark_2_4(): return sparknlp_jsl.start(JSL_SECRET, spark24=True, gpu=gpu, params=params) if is_env_pyspark_3_0() or is_env_pyspark_3_1(): return sparknlp_jsl.start(JSL_SECRET, gpu=gpu, public=sparknlp.version(), params=params) raise ValueError( f"Current Spark version {get_pyspark_version()} not supported!")
async def startup_event(): event_list['0_start_up'] = datetime.now() print(f'startup has been started at {datetime.now()}...', ) with open('license.json', 'r') as f: license_keys = json.load(f) # Defining license key-value pairs as local variables locals().update(license_keys) # Adding license key-value pairs to environment variables os.environ.update(license_keys) print("Spark NLP Version :", sparknlp.version()) print("Spark NLP_JSL Version :", sparknlp_jsl.version()) global spark spark = sparknlp_jsl.start(license_keys['SECRET']) print( f'****** spark nlp healthcare version fired up {datetime.now()} ******' ) event_list['1_sparknlp_fired'] = datetime.now() ner_models_clinical, ner_models_biobert = get_models_list() print( f'***** NER clinical and biobert models are listed {datetime.now()} .....' ) event_list['2_models_listed'] = datetime.now() # load NER clinical and biobert models print(f'***** Running with GLoVe Embeddings {datetime.now()} *****') model_dict = load_sparknlp_models() event_list['3_glove_embeddings'] = datetime.now() print(f'***** Running with BioBert Embeddings {datetime.now()} *****') model_dict = load_sparknlp_models_biobert() event_list['4_biobert_embeddings'] = datetime.now() print(event_list)
from pyspark.sql import functions as F from sparknlp.annotator import * from sparknlp_jsl.annotator import * from sparknlp.base import * import sparknlp_jsl import sparknlp import warnings warnings.filterwarnings('ignore') params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } print("Spark NLP Version :", sparknlp.version()) print("Spark NLP_JSL Version :", sparknlp_jsl.version()) spark = sparknlp_jsl.start(SECRET, params=params) from sparknlp.pretrained import PretrainedPipeline ner_pipeline = PretrainedPipeline("ner_model_finder", "en", "clinical/models") result = ner_pipeline.annotate("medication") print(100 * '-') print(result) print(100 * '-')
print("Spark NLP Version :", sparknlp.version()) print("Spark NLP_JSL Version :", sparknlp_jsl.version()) import json with open('/content/sparknlp_keys.json', 'r') as f: license_keys = json.load(f) # with open('/home/ubuntu/hasham/jsl_keys.json', 'r') as f: # license_keys = json.load(f) secret = license_keys['SECRET'] os.environ['SPARK_NLP_LICENSE'] = license_keys['SPARK_NLP_LICENSE'] os.environ['AWS_ACCESS_KEY_ID'] = license_keys['AWS_ACCESS_KEY_ID'] os.environ['AWS_SECRET_ACCESS_KEY'] = license_keys['AWS_SECRET_ACCESS_KEY'] spark = sparknlp_jsl.start(license_keys['SECRET']) @st.cache(allow_output_mutation=True, suppress_st_warning=True) def load_sparknlp_models(): print('loading pretrained models') sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ .setInputCols(["document"])\ .setOutputCol("sentence") embeddings_clinical = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\ .setInputCols(["sentence","token"])\ .setOutputCol("embeddings")
st.title("Spark NLP Clinical NER Playground") import json import os from pyspark.ml import Pipeline, PipelineModel from pyspark.sql import SparkSession from sparknlp.annotator import * from sparknlp_jsl.annotator import * from sparknlp.base import * import sparknlp_jsl import sparknlp import json spark = sparknlp_jsl.start(os.environ['SECRET']) print("Spark NLP Version :", sparknlp.version()) print("Spark NLP_JSL Version :", sparknlp_jsl.version()) @st.cache(allow_output_mutation=True, suppress_st_warning=True) def load_sparknlp_models(): print('loading pretrained models') sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\ .setInputCols(["document"])\ .setOutputCol("sentence") embeddings_clinical = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\
from sparknlp_display import RelationExtractionVisualizer import warnings warnings.filterwarnings('ignore') params = { "spark.driver.memory": "16G", "spark.kryoserializer.buffer.max": "2000M", "spark.driver.maxResultSize": "2000M" } #uploading license keys with open('/content/spark_nlp_for_healthcare 4.json') as f: license_keys = json.load(f) spark = sparknlp_jsl.start(license_keys['SECRET'], params=params) import streamlit as st #app page configurations(optional): st.set_page_config(page_title="aemintek app", page_icon=":shark:", layout="centered") #adding images col1, col2, col3 = st.columns(3) with col1: st.image( "https://repository-images.githubusercontent.com/104670986/2e728700-ace4-11ea-9cfc-f3e060b25ddf" )