コード例 #1
0
def train_classifier(conn_str, feature_table, training_table, testing_table,
                     score_table, model_table, DNNDimension):
    levels = get_label_levels(feature_table, conn_str)  # get levels of Label
    colInfo = {"Label": {"type": "factor", "levels": levels}}
    train_query = "SELECT * FROM {} WHERE image IN (SELECT image FROM {})".format(
        feature_table, training_table)
    train_data = RxSqlServerData(sql_query=train_query,
                                 connection_string=conn_str,
                                 column_info=colInfo)

    test_query = "SELECT * FROM {} WHERE image IN (SELECT image FROM {})".format(
        feature_table, testing_table)
    test_data = RxSqlServerData(sql_query=test_query,
                                connection_string=conn_str,
                                column_info=colInfo)

    featureSet = ["feature." + str(i) for i in range(DNNDimension)]
    label = "Label"
    cols = featureSet + [label]
    classifier = rx_neural_network(
        "Label ~ feature",
        data=train_data,
        method="multiClass",
        optimizer=sgd_optimizer(learning_rate=0.011,
                                l_rate_red_ratio=0.9,
                                l_rate_red_freq=3,
                                momentum=0.2),
        num_iterations=300,
        ml_transforms=[concat(cols={"feature": featureSet})])

    test_score = rx_predict(classifier,
                            test_data,
                            extra_vars_to_write=["image", "Label"])
    train_score = rx_predict(classifier,
                             train_data,
                             extra_vars_to_write=["image", "Label"])
    testACC = float(
        len(test_score[test_score["Label"] ==
                       test_score["PredictedLabel"]])) / len(test_score)
    trainACC = float(
        len(train_score[train_score["Label"] ==
                        train_score["PredictedLabel"]])) / len(train_score)
    print("The train accuracy of the neural network model is {}".format(
        trainACC))
    print(
        "The test accuracy of the neural network model is {}".format(testACC))

    print("Saving the predictive results of all the images into SQL table...")
    outputScore = RxSqlServerData(connection_string=conn_str,
                                  table=score_table)
    rx_data_step(pd.concat([test_score, train_score]),
                 outputScore,
                 overwrite=True)
    print("Saving model into SQL table...")
    save_model(model_table, conn_str, classifier, MODEL_NAME)
コード例 #2
0
def test_step3_check_output():
    # Check that all RTS models have been created
    RTS_odbc = RxOdbcData(connection_string, table="RTS")
    forest_serialized = rx_read_object(RTS_odbc,
                                       key="RF",
                                       deserialize=False,
                                       decompress=None)
    boosted_serialized = rx_read_object(RTS_odbc,
                                        key="GBT",
                                        deserialize=False,
                                        decompress=None)
    fast_serialized = rx_read_object(RTS_odbc,
                                     key="FT",
                                     deserialize=False,
                                     decompress=None)
    NN_serialized = rx_read_object(RTS_odbc,
                                   key="NN",
                                   deserialize=False,
                                   decompress=None)

    assert forest_serialized.__str__()[:6] == "b'blob"
    assert boosted_serialized.__str__()[:6] == "b'blob"
    assert fast_serialized.__str__()[:6] == "b'blob"
    assert NN_serialized.__str__()[:6] == "b'blob"

    # Check that predictions have been made for for all models
    forest_prediction_sql = RxSqlServerData(
        sql_query=
        "SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = N'Forest_Prediction'",
        connection_string=connection_string)
    forest_prediction = rx_import(input_data=forest_prediction_sql)
    boosted_prediction_sql = RxSqlServerData(
        sql_query=
        "SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = N'Boosted_Prediction'",
        connection_string=connection_string)
    boosted_prediction = rx_import(input_data=boosted_prediction_sql)
    fast_prediction_sql = RxSqlServerData(
        sql_query=
        "SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = N'Fast_Prediction'",
        connection_string=connection_string)
    fast_prediction = rx_import(input_data=fast_prediction_sql)
    NN_prediction_sql = RxSqlServerData(
        sql_query=
        "SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = N'NN_Prediction'",
        connection_string=connection_string)
    NN_prediction = rx_import(input_data=NN_prediction_sql)

    assert isinstance(forest_prediction, DataFrame)
    assert isinstance(boosted_prediction, DataFrame)
    assert isinstance(fast_prediction, DataFrame)
    assert isinstance(NN_prediction, DataFrame)
コード例 #3
0
 def detect_table(table_name, connection_string):
     from revoscalepy import RxSqlServerData, rx_import
     detect_sql = RxSqlServerData(sql_query="IF EXISTS (select 1 from information_schema.tables where table_name = '{}') SELECT 1 AS ret ELSE SELECT 0 AS ret".format(table_name),
                              connection_string=connection_string)
     does_exist = rx_import(detect_sql)
     if does_exist.iloc[0,0] == 1: return True
     else: return False
コード例 #4
0
def get_num_rows(table, connection_string):
    count_sql = RxSqlServerData(
        sql_query="SELECT COUNT(*) FROM {};".format(table),
        connection_string=connection_string)
    count = rx_import(count_sql)
    count = count.iloc[0, 0]
    return count
コード例 #5
0
def rank_candidates(conn_str, query_table, model_table, results_table):
    print("Loading the classifier output for all the candidate images...")
    candidateImageVector = load_predicted_scores(conn_str, TABLE_SCORE)

    print("Loading query images...")
    queryImagesSql = "SELECT * FROM " + query_table
    queryImages = RxSqlServerData(connection_string=conn_str,
                                  sql_query=queryImagesSql,
                                  strings_as_factors=False)

    print("Embedding query images...")
    query_image_features = rx_data_step(input_data=queryImages,
                                        overwrite=True,
                                        transform_function=featurize_transform,
                                        report_progress=2)

    print("Loading classifier...")
    classifier = load_model(model_table, conn_str, MODEL_NAME)

    print("Getting classifier output for query images...")
    queryImageScores = rx_predict(classifier,
                                  query_image_features,
                                  extra_vars_to_write=["image", "Label"])
    queryImageVector = map_images_to_predictedscores(queryImageScores)

    print("Calculating cosine similarity between each image pair...")
    rank_candiate_images(conn_str, queryImageVector, candidateImageVector,
                         TOP_K_CANDIDATES, results_table)
コード例 #6
0
def select_top_value_of_column(table_name, connection_string, column_name):
    query = "SELECT TOP(1) " + column_name + " FROM " + table_name
    print(query)
    query_sql = RxSqlServerData(sql_query=query, connection_string=connection_string)
    data = rx_import(query_sql)
    print(data)
    return data.iloc[0, 0]
コード例 #7
0
def gather_image_paths(images_folder, connection_string):
    root = os.path.dirname(images_folder)
    query = 'SELECT ([file_stream].GetFileNamespacePath()) as image FROM [MriData] WHERE [is_directory] = 0'
    filetable_sql = RxSqlServerData(sql_query=query, connection_string=connection_string)
    data = rx_import(filetable_sql)
    data["image"] = data["image"].apply(lambda x: os.path.join(root, x[1:]))    # TODO: assert to confirm paths exist
    data["patient_id"] = data["image"].map(lambda x: os.path.basename(os.path.dirname(x)))
    return data
コード例 #8
0
def train_test_split(id, table, train_table, p, connection_string):
    from revoscalepy import rx_set_temp_compute_context, RxInSqlServer, rx_data_step, RxLocalSeq

    def transform(data, compute_context):
        return data

    data = RxSqlServerData(
        sql_query=
        "SELECT {} FROM {} WHERE ABS(CAST(CAST(HashBytes('MD5', CAST({} AS varchar(10))) AS VARBINARY(64)) AS BIGINT) % 100) < {};"
        .format(id, table, id, p),
        connection_string=connection_string)
    train_table_dest = RxSqlServerData(table='Train_Id',
                                       connection_string=connection_string)
    rx_data_step(input_data=data,
                 output_file=train_table_dest,
                 overwrite=True,
                 transform_function=transform)
    print("Created {}".format(train_table))
コード例 #9
0
def manage_prediction(patient_index):
    query = "SELECT TOP(1) * FROM {} AS t1 INNER JOIN {} AS t2 ON t1.patient_id = t2.patient_id WHERE t2.idx = {}".format(
        TABLE_PCA_FEATURES, TABLE_PATIENTS, patient_index)
    patient_sql = RxSqlServerData(sql_query=query,
                                  connection_string=connection_string)
    predictions = ml_predict(model,
                             data=patient_sql,
                             extra_vars_to_write=["label", "patient_id"])
    prob = float(predictions["Probability"].iloc[0]) * 100
    return prob
コード例 #10
0
def evaluate_model(conn_str, candidate_table, image_pairs_table):
    distMethods = ["L2", "cosine"]
    image_scores = load_predicted_scores(conn_str, TABLE_SCORE)
    imagePairQuery = "SELECT * FROM " + image_pairs_table
    imagePairData_sql = RxSqlServerData(connection_string=conn_str,
                                        sql_query=imagePairQuery,
                                        strings_as_factors=False)
    imagePairData = rx_import(imagePairData_sql)
    print("Calculating ranking metrics...")
    calculate_ranking_metrics(image_scores, imagePairData, distMethods)
コード例 #11
0
def transform(dataset, context):
    from pandas import DataFrame
    table_name = None
    connection_string = "Driver=ODBC Driver 13 for SQL Server;Server=13.91.49.253;Database=Hospital;Uid=revotester;Pwd=T3sterPwd"

    def detect_table(table_name, connection_string):
        from revoscalepy import RxSqlServerData, rx_import
        detect_sql = RxSqlServerData(sql_query="IF EXISTS (select 1 from information_schema.tables where table_name = '{}') SELECT 1 AS ret ELSE SELECT 0 AS ret".format(table_name),
                                 connection_string=connection_string)
        does_exist = rx_import(detect_sql)
        if does_exist.iloc[0,0] == 1: return True
        else: return False

    missing = detect_table("LoS0", connection_string)
    if missing is False:
        table_name = "LengthOfStay"
    else:
        table_name = "LoS0"

    LengthOfStay_cleaned_sql = RxSqlServerData(table=table_name, connection_string=connection_string)

    # Get the mean and standard deviation of those variables.

    col_list = rx_get_var_names(LengthOfStay_cleaned_sql)
    f = "+".join(col_list)
    summary = rx_summary(formula=f, data=LengthOfStay_cleaned_sql, by_term=True).summary_data_frame

    names = ["hematocrit", "neutrophils", "sodium", "glucose", "bloodureanitro", "creatinine", "bmi", "pulse", "respiration"]
    statistics = summary[summary["Name"].isin(names)]
    statistics = statistics[["Name", "Mean", "StdDev"]]
    
    # standardization transform function
    def standardize(data, context):
        for n, row in statistics.iterrows():
            data[[row["Name"]]] = (data[[row["Name"]]] - row["Mean"])/row["StdDev"]
        return data

    # number_of_issues transform function
    def calculate_number_of_issues(data, context):
        data["number_of_issues"] = to_numeric(data["hemo"]) + to_numeric(data["dialysisrenalendstage"]) + to_numeric(data["asthma"])\
                                  + to_numeric(data["irondef"]) + to_numeric(data["pneum"]) + to_numeric(data["substancedependence"])\
                                  + to_numeric(data["psychologicaldisordermajor"]) + to_numeric(data["depress"])\
                                  + to_numeric(data["psychother"]) + to_numeric(data["fibrosisandother"]) + to_numeric(data["malnutrition"])
        return data

    data = DataFrame(dataset)
    data = standardize(data, context)
    data = calculate_number_of_issues(data, context)
    return data
コード例 #12
0
def prepare_data(conn_str, feature_table, train_table, test_table,
                 evaluation_table, numQueryImagePerCat, numNegImgsPerQueryImg,
                 ratioTrainTest):
    image_data_sql = RxSqlServerData(connection_string=conn_str,
                                     sql_query="SELECT image, Label FROM " +
                                     feature_table,
                                     string_as_factors=False)
    image_data_df = rx_import(image_data_sql)
    print("Split image data into training and testing set...")
    get_training_testing_images(image_data_df, ratioTrainTest, train_table,
                                test_table, conn_str)
    print("Getting ranking set from the testing image set...")
    prepare_evaluation_set(conn_str, feature_table, test_table,
                           evaluation_table, numQueryImagePerCat,
                           numNegImgsPerQueryImg)
コード例 #13
0
def test_step2_check_output():
    LoS_sql = RxSqlServerData(
        sql_query="SELECT TOP (5) * FROM [Hospital].[dbo].[LoS] ORDER BY eid",
        connection_string=connection_string)
    LoS = rx_import(input_data=LoS_sql)
    LoS[["number_of_issues"]] = LoS[["number_of_issues"]].apply(to_numeric)

    # bmi = [0.312740, -0.671356, -0.48006, -0.921639, 0.226158]
    # lengthofstay = [3.0, 7.0, 3.0, 1.0, 4.0]
    # number_of_issues = [0, 0, 0, 0, 2]
    # d = {"bmi": bmi, "lengthofstay": lengthofstay, "number_of_issues": number_of_issues}
    # df = DataFrame(d, index = [0,1,2,3,4])

    assert (LoS.loc[4, 'number_of_issues'] == 2)
    assert (round(LoS.loc[1, 'bmi'], 6) == round(-0.67135640398687824, 6))
    assert (LoS.loc[3, 'lengthofstay'] == 1.0)
コード例 #14
0
    def loaddata(self):
        #9.2.0 sqlQuery-> sql_query; connectionString-> connection_string
        #dataSource = RxSqlServerData(sqlQuery = "select * from dbo.trainingdata", verbose=True, reportProgress =True,
        #                             connectionString = self.__connectionstring)
        dataSource = RxSqlServerData(
            sql_query="select * from dbo.trainingdata",
            verbose=True,
            reportProgress=True,
            connection_string=self.__connectionstring)

        #9.2.0 connectionString->connection_string; autoCleanup-> auto_cleanup
        #self.__computeContext = RxInSqlServer(connectionString = self.__connectionstring, autoCleanup = True)
        self.__computeContext = RxInSqlServer(
            connection_string=self.__connectionstring, auto_cleanup=True)
        #9.2.0 rx_import_datasource -> rx_import
        #data = rx_import_datasource(dataSource)
        data = rx_import(dataSource)

        return data
# Set the Compute Context to local.
rx_set_compute_context(local)

##########################################################################################################################################

## Upload the data set to to SQL

##########################################################################################################################################

# Point to the input data set while specifying the classes.
file_path = "..\\Data"
LoS_text = RxTextData(file=os.path.join(file_path, "LengthOfStay.csv"),
                      column_info=col_type_info)

# Upload the table to SQL.
LengthOfStay_sql = RxSqlServerData(table="LengthOfStay",
                                   connection_string=connection_string)
rx_data_step(input_data=LoS_text, output_file=LengthOfStay_sql, overwrite=True)

##########################################################################################################################################

## Determine if LengthOfStay has missing values

##########################################################################################################################################

# First, get the names and types of the variables to be treated.
# For rxSummary to give correct info on characters, stringsAsFactors = True should be used.
LengthOfStay_sql2 = RxSqlServerData(table="LengthOfStay",
                                    connection_string=connection_string,
                                    stringsAsFactors=True)

#col = rxCreateColInfo(LengthOfStay_sql2)    # Not yet available
コード例 #16
0

##########################################################################################################################################

## Input: Point to the SQL table with the cleaned raw data set

##########################################################################################################################################

table_name = None
missing = detect_table("LoS0", connection_string)
if missing is False:
    table_name = "LengthOfStay"
else:
    table_name = "LoS0"

LengthOfStay_cleaned_sql = RxSqlServerData(table=table_name, connection_string=connection_string)


##########################################################################################################################################

## Feature Engineering:
## 1- Standardization: hematocrit, neutrophils, sodium, glucose, bloodureanitro, creatinine, bmi, pulse, respiration.
## 2- Number of preidentified medical conditions: number_of_issues.

##########################################################################################################################################

# Combine transform functions into one overarching transform
def transform(dataset, context):
    from pandas import DataFrame
    table_name = None
    connection_string = "Driver=ODBC Driver 13 for SQL Server;Server=13.91.49.253;Database=Hospital;Uid=revotester;Pwd=T3sterPwd"
コード例 #17
0
from lung_cancer.connection_settings import get_connection_string, TABLE_CLASSIFIERS, TABLE_FEATURES, TABLE_PATIENTS, FASTTREE_MODEL_NAME
from lung_cancer.lung_cancer_utils import retrieve_model

# Connect to SQL Server and set compute context
connection_string = get_connection_string()
sql = RxInSqlServer(connection_string=connection_string)
local = RxLocalSeq()
rx_set_compute_context(local)

# Specify patient to make prediction for
PatientIndex = 20

# Select patient data
query = "SELECT TOP(1) * FROM {} AS t1 INNER JOIN {} AS t2 ON t1.patient_id = t2.patient_id WHERE t2.idx = {}".format(
    TABLE_FEATURES, TABLE_PATIENTS, PatientIndex)
patient_sql = RxSqlServerData(sql_query=query,
                              connection_string=connection_string)

# Get classifier
classifier = retrieve_model(TABLE_CLASSIFIERS, connection_string,
                            FASTTREE_MODEL_NAME)

# Make Prediction on a single patient
predictions = ml_predict(classifier,
                         data=patient_sql,
                         extra_vars_to_write=["label", "patient_id"])

print("The probability of cancer for patient {} with patient_id {} is {}%".
      format(PatientIndex, predictions["patient_id"].iloc[0],
             predictions["Probability"].iloc[0] * 100))
if predictions["label"].iloc[0] == 0:
    print("Ground Truth: This patient does not have cancer")
         "Month" : { "type" : "integer" },
         "Day" : { "type" : "integer" },
         "RentalCount" : { "type" : "integer" },
         "WeekDay" : {
             "type" : "factor",
             "levels" : ["1", "2", "3", "4", "5", "6", "7"]
         },
         "Holiday" : {
             "type" : "factor",
             "levels" : ["1", "0"]
         },
         "Snow" : {
             "type" : "factor",
             "levels" : ["1", "0"]
         }
     }
data_source = RxSqlServerData(sql_query=inputsql, connection_string=conn_str, column_info=column_info)

linmod_model = rx_lin_mod("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data = data_source, computeContext = cc)
trained_model = rx_serialize_model(linmod_model, realtime_scoring_only = True)

with open(r'c:\temp\trained_model.pickle', mode='wb') as f:
    f.write(trained_model)

print(rx_summary("RentalCount ~ Month + Day + WeekDay + Snow + Holiday", data_source))


cnxn = pyodbc.connect(conn_str)
cursor=cnxn.cursor()
cursor.execute("INSERT INTO rental_models(model_name, lang, native_model) VALUES(?, ?, ?)", ("linear_model", "Python", trained_model))
cnxn.commit()
コード例 #19
0
def submit_img(conn_str, table, img):
    inp = {'image': img, 'Label': 0}
    df = pd.DataFrame([inp], columns=inp.keys())
    out = RxSqlServerData(table=table, connection_string=conn_str)
    rx_data_step(input_data=df, output_file=out, overwrite=True)
    print("{} uploaded to {}".format(img, table))
コード例 #20
0
#from microsoftml import rx_predict as ml_predict

from length_of_stay_utils import train_test_split, evaluate_model, create_formula, get_num_rows, write_rts_model
from SQLConnection import *

# Set the Compute Context to SQL.
rx_set_compute_context(local)

##########################################################################################################################################

## Input: Point to the SQL table with the data set for modeling

##########################################################################################################################################

LoS = RxSqlServerData(table="LoS",
                      connection_string=connection_string,
                      strings_as_factors=True)

##########################################################################################################################################

##	Split the data set into a training and a testing set

##########################################################################################################################################

# Randomly split the data into a training set and a testing set, with a splitting % p.
# p % goes to the training set, and the rest goes to the testing set. Default is 70%.

p = 70

## Create the Train_Id table containing Lead_Id of training set.
train_test_split("eid", "LoS", "Train_Id", p, connection_string)
コード例 #21
0
##	Export the data to SQL Server
##########################################################################################################################################

# Set the compute context to local to export the data to SQL Server.
rx_set_compute_context(local)

# Point to the txt data sets.
News_Train_text = RxTextData(file=os.path.join(file_path, "News_Train"),
                             delimiter="\t")
News_Test_text = RxTextData(file=os.path.join(file_path, "News_Test"),
                            delimiter="\t")
Label_Names_text = RxTextData(file=os.path.join(file_path, "Label_Names"),
                              delimiter="\t")

# Point to the SQL tables where they will be written.
News_Train_sql = RxSqlServerData(table="News_Train",
                                 connection_string=connection_string)
News_Test_sql = RxSqlServerData(table="News_Test",
                                connection_string=connection_string)
Label_Names_sql = RxSqlServerData(table="Label_Names",
                                  connection_string=connection_string)

# Export them to SQL Server.
rx_data_step(input_data=News_Train_text,
             output_file=News_Train_sql,
             overwrite=True)
rx_data_step(input_data=News_Test_text,
             output_file=News_Test_sql,
             overwrite=True)
rx_data_step(input_data=Label_Names_text,
             output_file=Label_Names_sql,
             overwrite=True)
コード例 #22
0
#Connection string to connect to SQL Server named instance
conn_str = 'Driver=SQL Server;Server=DESKTOP-NLR1KT5;Database=Apple_sql;Trusted_Connection=True;'

#Define the columns we wish to import
column_info = {"Date" : { "type" : "str" },
         "Open" : { "type" : "str" },
         "High" : { "type" : "str" },
         "Low" : { "type" : "str" },
         "Close" : { "type" : "str" },
         "Adj_Close" : { "type" : "str" },
         "Volume" : { "type" : "str" }
     }

#Get the data from SQL Server Table
data_source = RxSqlServerData(table="dbo.Apple_sql",
                               connection_string=conn_str, column_info=column_info)
computeContext = RxInSqlServer(
     connection_string = conn_str,
     num_tasks = 1,
     auto_cleanup = False
)

RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)

#testing dataset
#df = pd.read_csv('H:\\JSDev\\Python_SQL_Server_Value_At_Risk\\AAPL.csv', parse_dates=["Date"], index_col="Date")
#df.Close.resample('M').mean()
df = pd.DataFrame(rx_import(input_data = data_source))
print("Data frame:", df)

returns = df.pct_change()
コード例 #23
0
# Train Test Split
resample = True
if resample:
    print("Performing Train Test Split")
    p = 80
    train_test_split(TABLE_TRAIN_ID,
                     TABLE_PATIENTS,
                     p,
                     connection_string=connection_string)

# Point to the SQL table with the training data
column_info = {"label": {"type": "numeric"}}
query = "SELECT * FROM {} WHERE patient_id IN (SELECT patient_id FROM {})".format(
    TABLE_FEATURES, TABLE_TRAIN_ID)
train_sql = RxSqlServerData(sql_query=query,
                            connection_string=connection_string,
                            column_info=column_info)

# Create formula
formula = create_formula(train_sql)
print("Formula:", formula)

# Fit a classification model
rx_set_compute_context(
    local)  # TODO: change this to sql when sql starts working
classifier = rx_fast_trees(formula=formula,
                           data=train_sql,
                           num_trees=1000,
                           method="binary",
                           random_seed=5)
rx_set_compute_context(local)
コード例 #24
0
    "WeekDay": {
        "type": "factor",
        "levels": ["1", "2", "3", "4", "5", "6", "7"]
    },
    "Holiday": {
        "type": "factor",
        "levels": ["1", "0"]
    },
    "Snow": {
        "type": "factor",
        "levels": ["1", "0"]
    }
}

#Get the data from SQL Server Table
data_source = RxSqlServerData(table="dbo.rental_data",
                              connection_string=conn_str,
                              column_info=column_info)
computeContext = RxInSqlServer(connection_string=conn_str,
                               num_tasks=1,
                               auto_cleanup=False)

RxInSqlServer(connection_string=conn_str, num_tasks=1, auto_cleanup=False)

# import data source and convert to pandas dataframe
df = pd.DataFrame(rx_import(input_data=data_source))
print("Data frame:", df)
# Get all the columns from the dataframe.
columns = df.columns.tolist()
# Filter the columns to remove ones we don't want to use in the training
columns = [c for c in columns if c not in ["Year"]]
コード例 #25
0
def select_entry_where_column_equals_value(table_name, connection_string, column_name, value):
    query = "SELECT TOP (1) * FROM {} WHERE {} = '{}'".format(table_name, column_name, value)
    print(query)
    query_sql = RxSqlServerData(sql_query=query, connection_string=connection_string)
    data = rx_import(query_sql)
    return data
コード例 #26
0
def display_head(table_name, n_rows):
    table_sql = RxSqlServerData(sql_query="SELECT TOP({}}) * FROM {}}".format(
        n_rows, table_name),
                                connection_string=connection_string)
    table = rx_import(table_sql)
    print(table)
コード例 #27
0
def get_patients_id(table_name, connection_string):
    query = "SELECT DISTINCT patient_id FROM {}".format(table_name)
    sqldata = RxSqlServerData(sql_query=query, connection_string=connection_string)
    data = rx_import(sqldata)
    return data["patient_id"].tolist()
コード例 #28
0
from revoscalepy import rx_import
from revoscalepy import rx_dforest
from revoscalepy import rx_dtree
from revoscalepy import RxSqlServerData
from revoscalepy import rx_serialize_model
import pyodbc

# Connection string to connect to SQL Server named instance
conn_str = 'Driver=SQL Server;Server=.;Database=SQLML;Trusted_Connection=True;'
sql_query ='SELECT "Sepal.Length","Sepal.Width","Petal.Length","Petal.Width",Species ' \
    'FROM dbo.vw_iris_training'

data_source = RxSqlServerData(sql_query=sql_query, connection_string=conn_str)
iris_train_data = rx_import(input_data=data_source)
iris_train_data["Species"] = iris_train_data["Species"].astype("category")

model = rx_dforest(
    "Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width",
    data=iris_train_data)
trained_rx_dforest_model = rx_serialize_model(model,
                                              realtime_scoring_only=True)
#Save model
conn = pyodbc.connect(conn_str)
cursor = conn.cursor()
cursor.execute(
    "DELETE FROM dbo.iris_models WHERE model_name ='rxDForest' AND model_language ='Python'"
)
cursor.execute(
    "INSERT INTO dbo.iris_models(model,model_name,model_language) values (?,?, ?)",
    trained_rx_dforest_model, 'rxDForest', 'Python')
conn.commit()
コード例 #29
0
     
DECLARE @PythonScript NVARCHAR(MAX) =CONCAT('
import pandas as pd
import os
import glob
from revoscalepy import RxSqlServerData, rx_data_step
sqlConnString = "Driver=SQL Server;Server=Serv; ',@DBConnectionString,'"
Filefolderepath = ImportFilePath+"*.xlsx"
if ImportAll ==1 and CombineTarget==0:
    for FullFilePath in glob.glob(Filefolderepath):
       Filename = os.path.basename(FullFilePath).replace(".xlsx","")
       xl = pd.ExcelFile(FullFilePath)
       for sheetname in xl.sheet_names:
          Output = pd.read_excel(FullFilePath, sheetname=sheetname, na_filter=False).astype(str)
       if not Output.empty:
          sqlDS = RxSqlServerData(connection_string = sqlConnString,table = "".join(fl for fl in Filename if fl.isalnum())+"_"+"".join(sh for sh in sheetname if sh.isalnum()))
          rx_data_step(input_data = Output, output_file = sqlDS,overwrite = True)
if ImportAll ==1 and CombineTarget==1:
   df2=pd.DataFrame()
   for FullFilePath in glob.glob(Filefolderepath):
       Filename = os.path.basename(FullFilePath).replace(".xlsx","")
       xl = pd.ExcelFile(FullFilePath)
       for sheetname in xl.sheet_names:
           Output = pd.read_excel(FullFilePath, sheetname=sheetname).columns.astype(str)
           Output = ",".join(list(Output))
           df1 = pd.DataFrame([[Filename,sheetname,FullFilePath,Output]],columns=["Filename","sheetname","FullFilePath","Headers"])
           df2=df2.append(df1,ignore_index=True)
   sqlDS = RxSqlServerData(connection_string = sqlConnString,table = "Tbl_PyImpExp1")
   rx_data_step(input_data = df2, output_file = sqlDS,overwrite = True)
if ImportAll ==0:
   Filename =ImportFilePath+ExcelFileName+".xlsx"
コード例 #30
0
def test_step1_check_output():
    col_info = {
        "eid": {
            'type': 'integer'
        },
        "vdate": {
            'type': 'character'
        },
        "rcount": {
            'type': 'character'
        },
        "gender": {
            'type': 'factor'
        },
        "dialysisrenalendstage": {
            'type': 'factor'
        },
        "asthma": {
            'type': 'factor'
        },
        "irondef": {
            'type': 'factor'
        },
        "pneum": {
            'type': 'factor'
        },
        "substancedependence": {
            'type': 'factor'
        },
        "psychologicaldisordermajor": {
            'type': 'factor'
        },
        "depress": {
            'type': 'factor'
        },
        "psychother": {
            'type': 'factor'
        },
        "fibrosisandother": {
            'type': 'factor'
        },
        "malnutrition": {
            'type': 'factor'
        },
        "hemo": {
            'type': 'factor'
        },
        "hematocrit": {
            'type': 'numeric'
        },
        "neutrophils": {
            'type': 'numeric'
        },
        "sodium": {
            'type': 'numeric'
        },
        "glucose": {
            'type': 'numeric'
        },
        "bloodureanitro": {
            'type': 'numeric'
        },
        "creatinine": {
            'type': 'numeric'
        },
        "bmi": {
            'type': 'numeric'
        },
        "pulse": {
            'type': 'numeric'
        },
        "respiration": {
            'type': 'numeric'
        },
        "secondarydiagnosisnonicd9": {
            'type': 'factor'
        },
        "discharged": {
            'type': 'character'
        },
        "facid": {
            'type': 'factor'
        },
        "lengthofstay": {
            'type': 'integer'
        }
    }

    # Point to the input data set while specifying the classes.
    file_path = "..\\Data"
    LoS_text = RxTextData(file=os.path.join(file_path, "LengthOfStay.csv"),
                          column_info=col_info)
    table_text = rx_import(LoS_text)

    LengthOfStay_sql = RxSqlServerData(
        sql_query="SELECT * FROM [Hospital].[dbo].[LengthOfStay] ORDER BY eid",
        connection_string=connection_string,
        column_info=col_info)
    table_sql = rx_import(LengthOfStay_sql)

    assert table_text.equals(table_sql)