Esempio n. 1
0
def model_predict(encoder, ohe, model, text):

    # Transform characters
    chars = list(text)
    chars, encoded_chars, new_encoder, X = main.transform(chars)

    # Infer y hat value
    pred = model.predict(X)[-1]

    # Scale y hat so that it is in [0,1)
    scaled_pred = pred / (float(sum(pred)) * (1 + 1e-5))
    logging.debug('Sum of pred: {}'.format(float(sum(pred))))

    # Pick the most likely character by index
    most_likely_index = numpy.argmax(scaled_pred)

    # Pick a character from multinomial
    next_char_index = numpy.argmax(numpy.random.multinomial(1, scaled_pred, 1))

    # Convert both most likely and multinomial random from index to character
    most_likely_char = encoder.inverse_transform(most_likely_index)
    next_char = encoder.inverse_transform(next_char_index)
    logging.info('Most likely char, p: {}, {}. Next char, p: {}, {}'.format(
        most_likely_char, pred[most_likely_index], next_char,
        pred[next_char_index]))

    # Return multinomial random character
    return next_char
Esempio n. 2
0
def test_transform(spark_session: SparkSession) -> None:
    df = spark_session.createDataFrame(["a b cc", " a b ", "a", ""],
                                       schema=StringType())
    expected_transformed_df = spark_session.createDataFrame(
        [("a", 3), ("b", 2), ("cc", 1)], schema=["value", "count"])
    transformed_df = main.transform(df)
    assert transformed_df.collect() == expected_transformed_df.collect()
Esempio n. 3
0
def UploadManager(filename):
    #blob = BlobManager(config_blob.BLOB_NAME,config_blob.BLOB_KEY)

    if get_name_of_filepath(filename)!='NOT A WORD DOCUMENT':
        print(filename)
        #blob.upload_from_bytes(data,filename,config_blob.BLOB_CONTAINER)
        #print('blob connection')
        #directory_output=os.path.split(output_directorypath)[1]
        #print(directory_output)
        #root_output=os.path.split(output_directorypath)[0]
        #print(root_output)
        #blob.download(output_directorypath,filename,directory_output, config_blob.BLOB_CONTAINER)
        #blob.download_all_blobs(root_output,directory_output,config_blob.BLOB_CONTAINER)
        
        main.blob_download()
        
        logging.getLogger().setLevel(logging.INFO)
        
        # Extract data from upstream.
        observations = main.extract()

        # Spacy: Spacy NLP
        nlp = spacy.load('en')
    
        # Transform data to have appropriate fields
        observations, nlp = main.transform(observations, nlp)
    
        # Load data for downstream consumption
        main.load(observations, nlp)
        main.load_to_json(observations, nlp)
        
        #Send data to CosmosDB
        main.send_to_Cosmos()
        print('sent to cosmos')
        print("Data is ready to be send to cosmos")
        
        #Create Azure search Datasource, Index and indexer
        main.implement_Azure_search()
    else: 
        print('error')
        return 'NOT A WORD DOCUMENT'
Esempio n. 4
0
def UploadManager_path(filepath, output_directorypath):
    blob = BlobManager(config_blob.BLOB_NAME,config_blob.BLOB_KEY)

    if get_name_of_filepath(filepath)!='NOT A WORD DOCUMENT':
        print(filepath)
        path=os.path.split(get_name_of_filepath(filepath))[0]
        filename=os.path.split(get_name_of_filepath(filepath))[1]
        blob.upload(path,filename,config_blob.BLOB_CONTAINER)
        print('blob connection')
        directory_output=os.path.split(output_directorypath)[1]
        print(directory_output)
        root_output=os.path.split(output_directorypath)[0]
        print(root_output)
        #blob.download(output_directorypath,filename,directory_output, config_blob.BLOB_CONTAINER)
        blob.download_all_blobs(root_output,directory_output,config_blob.BLOB_CONTAINER)
        
        
        logging.getLogger().setLevel(logging.INFO)
        
        # Extract data from upstream.
        observations = main.extract()

        # Spacy: Spacy NLP
        nlp = spacy.load('en')
    
        # Transform data to have appropriate fields
        observations, nlp = main.transform(observations, nlp)
    
        # Load data for downstream consumption
        main.load(observations, nlp)
        main.load_to_json(observations, nlp)
        
        #Send data to CosmosDB
        send_to_Cosmos()
        print('sent to cosmos')
    else: 
        print('error' + filepath)
        return 'NOT A WORD DOCUMENT'
 def test_one(self):
     assert (transform(1) == 'Один')
 def test_null(self):
     assert (transform(0) == 'Ноль')
 def test_hex(self):
     assert (transform('0x15') == '15')
 def test_bin(self):
     assert (transform('0b00100101') == '00100101')
 def test_three(self):
     assert (transform(3) == 'Три')
 def test_nine(self):
     assert (transform(9) == 'Девять')
 def test_eight(self):
     assert (transform(8) == 'Восемь')
 def test_seven(self):
     assert (transform(7) == 'Семь')
 def test_six(self):
     assert (transform(6) == 'Шесть')
 def test_five(self):
     assert (transform(5) == 'Пять')
 def test_four(self):
     assert (transform(4) == 'Четыре')
 def test_oct(self):
     assert (transform('0o25') == '25')
 def test_two(self):
     assert (transform(2) == 'Два')