def model_predict(encoder, ohe, model, text): # Transform characters chars = list(text) chars, encoded_chars, new_encoder, X = main.transform(chars) # Infer y hat value pred = model.predict(X)[-1] # Scale y hat so that it is in [0,1) scaled_pred = pred / (float(sum(pred)) * (1 + 1e-5)) logging.debug('Sum of pred: {}'.format(float(sum(pred)))) # Pick the most likely character by index most_likely_index = numpy.argmax(scaled_pred) # Pick a character from multinomial next_char_index = numpy.argmax(numpy.random.multinomial(1, scaled_pred, 1)) # Convert both most likely and multinomial random from index to character most_likely_char = encoder.inverse_transform(most_likely_index) next_char = encoder.inverse_transform(next_char_index) logging.info('Most likely char, p: {}, {}. Next char, p: {}, {}'.format( most_likely_char, pred[most_likely_index], next_char, pred[next_char_index])) # Return multinomial random character return next_char
def test_transform(spark_session: SparkSession) -> None: df = spark_session.createDataFrame(["a b cc", " a b ", "a", ""], schema=StringType()) expected_transformed_df = spark_session.createDataFrame( [("a", 3), ("b", 2), ("cc", 1)], schema=["value", "count"]) transformed_df = main.transform(df) assert transformed_df.collect() == expected_transformed_df.collect()
def UploadManager(filename): #blob = BlobManager(config_blob.BLOB_NAME,config_blob.BLOB_KEY) if get_name_of_filepath(filename)!='NOT A WORD DOCUMENT': print(filename) #blob.upload_from_bytes(data,filename,config_blob.BLOB_CONTAINER) #print('blob connection') #directory_output=os.path.split(output_directorypath)[1] #print(directory_output) #root_output=os.path.split(output_directorypath)[0] #print(root_output) #blob.download(output_directorypath,filename,directory_output, config_blob.BLOB_CONTAINER) #blob.download_all_blobs(root_output,directory_output,config_blob.BLOB_CONTAINER) main.blob_download() logging.getLogger().setLevel(logging.INFO) # Extract data from upstream. observations = main.extract() # Spacy: Spacy NLP nlp = spacy.load('en') # Transform data to have appropriate fields observations, nlp = main.transform(observations, nlp) # Load data for downstream consumption main.load(observations, nlp) main.load_to_json(observations, nlp) #Send data to CosmosDB main.send_to_Cosmos() print('sent to cosmos') print("Data is ready to be send to cosmos") #Create Azure search Datasource, Index and indexer main.implement_Azure_search() else: print('error') return 'NOT A WORD DOCUMENT'
def UploadManager_path(filepath, output_directorypath): blob = BlobManager(config_blob.BLOB_NAME,config_blob.BLOB_KEY) if get_name_of_filepath(filepath)!='NOT A WORD DOCUMENT': print(filepath) path=os.path.split(get_name_of_filepath(filepath))[0] filename=os.path.split(get_name_of_filepath(filepath))[1] blob.upload(path,filename,config_blob.BLOB_CONTAINER) print('blob connection') directory_output=os.path.split(output_directorypath)[1] print(directory_output) root_output=os.path.split(output_directorypath)[0] print(root_output) #blob.download(output_directorypath,filename,directory_output, config_blob.BLOB_CONTAINER) blob.download_all_blobs(root_output,directory_output,config_blob.BLOB_CONTAINER) logging.getLogger().setLevel(logging.INFO) # Extract data from upstream. observations = main.extract() # Spacy: Spacy NLP nlp = spacy.load('en') # Transform data to have appropriate fields observations, nlp = main.transform(observations, nlp) # Load data for downstream consumption main.load(observations, nlp) main.load_to_json(observations, nlp) #Send data to CosmosDB send_to_Cosmos() print('sent to cosmos') else: print('error' + filepath) return 'NOT A WORD DOCUMENT'
def test_one(self): assert (transform(1) == 'Один')
def test_null(self): assert (transform(0) == 'Ноль')
def test_hex(self): assert (transform('0x15') == '15')
def test_bin(self): assert (transform('0b00100101') == '00100101')
def test_three(self): assert (transform(3) == 'Три')
def test_nine(self): assert (transform(9) == 'Девять')
def test_eight(self): assert (transform(8) == 'Восемь')
def test_seven(self): assert (transform(7) == 'Семь')
def test_six(self): assert (transform(6) == 'Шесть')
def test_five(self): assert (transform(5) == 'Пять')
def test_four(self): assert (transform(4) == 'Четыре')
def test_oct(self): assert (transform('0o25') == '25')
def test_two(self): assert (transform(2) == 'Два')