Exemple #1
0
def get_authenticated_spark_HC(HC_LICENSE, HC_SECRET, AWS_ACCESS_KEY,
                               AWS_SECRET_KEY, gpu):
    import_or_install_licensed_lib(HC_SECRET, 'healthcare')
    authenticate_enviroment_HC(HC_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY)
    import sparknlp
    import sparknlp_jsl
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }

    if is_env_pyspark_2_3():
        return sparknlp_jsl.start(HC_SECRET,
                                  spark23=True,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    if is_env_pyspark_2_4():
        return sparknlp_jsl.start(HC_SECRET,
                                  spark24=True,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    if is_env_pyspark_3_0() or is_env_pyspark_3_1():
        return sparknlp_jsl.start(HC_SECRET,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    raise ValueError(
        f"Current Spark version {get_pyspark_version()} not supported!")
Exemple #2
0
def read_spark():
    session = dict()
    session["Spark NLP Version"] = sparknlp.version()
    session["Spark NLP_JSL Version"] = sparknlp_jsl.version()
    session["App Name"] = spark.sparkContext.getConf().getAll()[6][1]
    print(session)

    return session
Exemple #3
0
def get_authenticated_spark_OCR(OCR_LICENSE, OCR_SECRET, AWS_ACCESS_KEY,
                                AWS_SECRET_KEY, gpu):
    import_or_install_licensed_lib(OCR_SECRET, 'ocr')
    authenticate_enviroment_OCR(OCR_LICENSE, AWS_ACCESS_KEY, AWS_SECRET_KEY)
    import sparkocr
    import sparknlp
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }
    OS_version = sparknlp.version()
    spark = sparkocr.start(
        secret=OCR_SECRET,
        nlp_version=OS_version,
    )
    spark.sparkContext.setLogLevel('ERROR')
Exemple #4
0
async def startup_event():

    event_list['0_start_up'] = datetime.now()
    print(f'startup has been started at {datetime.now()}...', )

    with open('license.json', 'r') as f:
        license_keys = json.load(f)

    # Defining license key-value pairs as local variables
    locals().update(license_keys)

    # Adding license key-value pairs to environment variables
    os.environ.update(license_keys)

    print("Spark NLP Version :", sparknlp.version())
    print("Spark NLP_JSL Version :", sparknlp_jsl.version())

    global spark

    spark = sparknlp_jsl.start(license_keys['SECRET'])
    print(
        f'****** spark nlp healthcare version fired up {datetime.now()} ******'
    )
    event_list['1_sparknlp_fired'] = datetime.now()

    ner_models_clinical, ner_models_biobert = get_models_list()
    print(
        f'***** NER clinical and biobert models are listed {datetime.now()} .....'
    )
    event_list['2_models_listed'] = datetime.now()

    # load NER clinical and biobert models
    print(f'***** Running with GLoVe Embeddings  {datetime.now()} *****')
    model_dict = load_sparknlp_models()
    event_list['3_glove_embeddings'] = datetime.now()

    print(f'***** Running with BioBert Embeddings {datetime.now()} *****')
    model_dict = load_sparknlp_models_biobert()
    event_list['4_biobert_embeddings'] = datetime.now()

    print(event_list)
Exemple #5
0
def get_authenticated_spark(
    SPARK_NLP_LICENSE,
    AWS_ACCESS_KEY_ID,
    AWS_SECRET_ACCESS_KEY,
    JSL_SECRET,
    gpu=False,
):
    """
    Authenticates environment if not already done so and returns Spark Context with Healthcare Jar loaded
    0. If no Spark-NLP-Healthcare, install it via PyPi
    1. If not auth, run authenticate_enviroment()

    """
    import sparknlp
    authenticate_enviroment_HC(SPARK_NLP_LICENSE, AWS_ACCESS_KEY_ID,
                               AWS_SECRET_ACCESS_KEY)
    import_or_install_licensed_lib(JSL_SECRET)
    import sparknlp_jsl
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }
    if is_env_pyspark_2_3():
        return sparknlp_jsl.start(JSL_SECRET,
                                  spark23=True,
                                  gpu=gpu,
                                  params=params)
    if is_env_pyspark_2_4():
        return sparknlp_jsl.start(JSL_SECRET,
                                  spark24=True,
                                  gpu=gpu,
                                  params=params)
    if is_env_pyspark_3_0() or is_env_pyspark_3_1():
        return sparknlp_jsl.start(JSL_SECRET,
                                  gpu=gpu,
                                  public=sparknlp.version(),
                                  params=params)
    raise ValueError(
        f"Current Spark version {get_pyspark_version()} not supported!")
Exemple #6
0
def get_authenticated_spark_HC_and_OCR(HC_LICENSE, HC_SECRET, OCR_LICENSE,
                                       OCR_SECRET, AWS_ACCESS_KEY,
                                       AWS_SECRET_KEY, gpu):
    import_or_install_licensed_lib(HC_SECRET, 'healthcare')
    import_or_install_licensed_lib(OCR_SECRET, 'ocr')
    authenticate_enviroment_HC_and_OCR(HC_LICENSE, OCR_LICENSE, AWS_ACCESS_KEY,
                                       AWS_SECRET_KEY)
    import sparkocr
    import sparknlp
    params = {
        "spark.driver.memory": "16G",
        "spark.kryoserializer.buffer.max": "2000M",
        "spark.driver.maxResultSize": "2000M"
    }

    HC_version = HC_SECRET.split('-')[0]
    OS_version = sparknlp.version()
    spark = sparkocr.start(secret=OCR_SECRET,
                           nlp_secret=HC_SECRET,
                           nlp_version=OS_version,
                           nlp_internal=HC_version)
    spark.sparkContext.setLogLevel('ERROR')
Exemple #7
0
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("Global DEMO - Spark NLP Enterprise 2.3.4") \
    .master("local[*]") \
    .config("spark.rdd.compress","true") \
    .config("spark.driver.memory","8G") \
    .config("spark.driver.maxResultSize", "2G") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .config("spark.kryoserializer.buffer.max", "600M") \
    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:2.3.4") \
    .getOrCreate()
#spark = sparknlp.start()

print("Spark NLP version: ", sparknlp.version())
print("Apache Spark version: ", spark.version)

"""Create some data for testing purposes"""

from pyspark.sql import Row
R = Row('sentence', 'start', 'end')
test_data = spark.createDataFrame([R('Peter is a good person, and he was working at IBM',0,1)])

"""Create a custom pipeline"""

!ls
from sparknlp.training import CoNLL
training_data = CoNLL().readDataset(spark, 'con_rest_train.bio')
training_data.show()
from sparknlp.annotator import *
from sparknlp.common import RegexRule
from sparknlp.base import DocumentAssembler, Finisher

# COMMAND ----------

# MAGIC %md #### 2. Load SparkSession if not already there

# COMMAND ----------

import sparknlp 

spark = sparknlp.start()

print("Spark NLP version")
sparknlp.version()
print("Apache Spark version")
spark.version

# COMMAND ----------

! rm /tmp/sentiment.parquet.zip
! rm -rf /tmp/sentiment.parquet
! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment.parquet.zip -P /tmp
! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/lemma-corpus-small/lemmas_small.txt -P /tmp
! wget -N https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/sentiment-corpus/default-sentiment-dict.txt -P /tmp    

# COMMAND ----------

! unzip /tmp/sentiment.parquet.zip -d /tmp/
        entities.append(n.metadata['entity'])

    st.write('')
    st.write('Entities')
    st.dataframe(pd.DataFrame({'chunks': chunks, 'entities': entities}))
    #st.write(annotated_text['entities'])

if 'sentence' in annotated_text.keys():
    st.write('')
    st.write('Sentences')
    st.write('')
    st.write(annotated_text['sentence'])
    #st.dataframe(pd.DataFrame({'sentences':annotated_text['sentence']}))

if 'sentiment' in annotated_text.keys():

    st.write('')
    st.write('Sentiment')
    st.write('')
    st.dataframe(
        pd.DataFrame({
            'sentence': annotated_text['sentence'],
            'sentiment': annotated_text['sentiment']
        }))

st.subheader('Model Output')
st.write(annotated_text)

st.sidebar.markdown("Spark NLP version: {}".format(sparknlp.version()))
st.sidebar.markdown("Apache Spark version: {}".format(spark.version))
Exemple #10
0
import zipfile

# %%
print("Tensorflow: " + tf.__version__)
print("Keras: " + tf.keras.__version__)
sys.path.append('./tflow/ner/')
sys.path.append('./tflow/lib/ner/')

# %%
from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
from sparknlp.embeddings import *
import sparknlp

print("SparkNLP: " + sparknlp.version())

# %%
from embeddings_resolver import BertEmbeddingsResolver
from ner_model_saver import NerModelSaver

# %%
CORPUS_PATH = "/home/rcuesta/TFM/es.rcs.tfm/es.rcs.tfm.corpus/"
DATASET_PATH = CORPUS_PATH + "datasets/"
BERT_PATH = DATASET_PATH + 'bert/'
BIOBERT_PATH = DATASET_PATH + 'biobert/'

SPARKNLP_BERT_MODEL_PATH = CORPUS_PATH + "models/bert"

# %%
spark = sparknlp.start()