loss_function=distribution, n_tree=n_trees, learning_rate=shrinkage, sample_rate=bag_fraction, max_depth=interaction_depth, min_bucket=n_min_obs_in_node, seed=seed, replace=False, max_num_bins=200) probArray = rv.rx_predict(rx_btrees_model, data=ipo_test) fpr, tpr, thresholds = roc_curve(ipo_test["underpriced"], probArray) aucResult = auc(fpr, tpr) print("rx-btrees AUC: " + str(aucResult)) # MicrosoftML Logistic Regression ml_lreg_model = rx_logistic_regression(formula=formula, data=ipo_train) ml_lreg_score = ml.rx_predict(ml_lreg_model, data=ipo_test, extra_vars_to_write=["underpriced"]) prob_pred = [ml_lreg_score.loc[i, "Probability"] if ml_lreg_score.loc[i, "PredictedLabel"] \ else (1 - ml_lreg_score.loc[i, "Probability"]) for i in range(0, ml_lreg_score.shape[0])] good = ml_lreg_score["PredictedLabel"].as_matrix() == ( ipo_test["underpriced"] == 1).as_matrix() fpr, tpr, th = roc_curve(good.ravel(), prob_pred) aucResult = auc(fpr, tpr) print("ml-logistic-reg AUC: " + str(aucResult)) # Microsoftml Fast Forest ml_ff_model = rx_fast_forest(formula=formula, data=ipo_train) ml_ff_pred = ml.rx_predict(ml_ff_model, data=ipo_test,
################################ # We define this column as a category. data["cat"] = data["cat"].astype("category") print("problem dimension:", data.shape) print(data.head()) ################################################### # Let's train a logistic regression. formula = "Label ~ {0}".format(" + ".join(data.columns[1:])) print(formula) from microsoftml import rx_logistic_regression logregml = rx_logistic_regression(formula, data=data) ######################################### # Let's predict now. from microsoftml import rx_predict scores = rx_predict(logregml, data=data) print(scores.head()) ######################################### # Let's change the type of the category into numerical # and predict again. data["cat"] = data["cat"].astype(float) try: scores = rx_predict(logregml, data=data)
import pandas data = pandas.DataFrame(data=X, columns=["X1", "X2"]) data["Label"] = Y.astype(float) ########################## # # From a geometrical point of view, a binary classification # problem consists in finding the best boundary between # two clouds of points. The simplest is to assume that it is # a straight line. In this case, a logistic regression model # will help us. from microsoftml import rx_logistic_regression, rx_predict logreg = rx_logistic_regression("Label ~ X1 + X2", data=data) ############################## # The model produces a line boundary # whose coefficients are: print(logreg.coef_) ############################### # We could trace this line but this graph # would only be valid for a linear model. # Instead we color the background of the graph with # the color of the class predicted by the model. import numpy
import pandas df = pandas.DataFrame(data=X, columns=["X1", "X2"]) df["Label"] = Y.astype(float) ########################################################################### # :epkg:`microsoftml` must be told it is a multi-class classification problem. # It may seem a regression compare to :epkg:`scikit-learn`. # However because :epkg:`microsoftml` can deal with out-of-memory datasets, # the third class could appear at the end of the training dataset. # The parameter *verbose* can take values into 0, 1, 2. # If > 0, :epkg:`microsoftml` displays information about the training # on the standard output. from microsoftml import rx_logistic_regression, rx_predict logregml = rx_logistic_regression("Label ~ X1 + X2", data=df, method="multiClass", verbose=1) ################################### # We convert the grid (numpy array) into a dataframe. dfgrid = pandas.DataFrame(data=gridX, columns=["X1", "X2"]) gridml = rx_predict(logregml, dfgrid) ################################## # :epkg:`microsoftml` returns three scores. print(gridml.head(n=3)) ################################## # We need to pick the best one. predicted_classes = np.argmax(gridml.as_matrix(), axis=1) #####################
fig, ax = plt.subplots(1, 1) ax.imshow(Image.open(test_df.loc[0, "image"])) ######################################################## # We train a multiclass classifier using the :epkg:`microsoftml:rx_logistic_regression` # algorithm. Just for kicks, and to compare from the previous sample, # we'll use the Resnet-50 model. from microsoftml import rx_featurize, load_image, resize_image, extract_pixels, featurize_image from microsoftml import rx_logistic_regression image_model = rx_logistic_regression(formula="Label~Features", data=train_df, method="multiClass", ml_transforms=[ load_image(cols=dict(Features="image")), resize_image(cols="Features", width=227, height=227), extract_pixels(cols="Features"), featurize_image(cols="Features", dnn_model="Alexnet")]) ############################ # Note that ``type="multiClass"`` indicates that this is a multiclass training task. # Finally, let's give it an image and its feature vector to classify. # Note that this image was not part of the original training set. # See the actual code for details. # Now use the model to predict the type of the image. from microsoftml import rx_predict prediction = rx_predict(image_model, data=test_df) print(prediction) ###############################
char_feature_extractor=n_gram_hash(hash_bits=17, ngram_length=3, seed=4), vector_normalizer="L2") ] # Point to the training set. News_Train_sql = RxSqlServerData(table="News_Train", connection_string=connection_string, column_info=factor_info) # Train the model. logistic_model = rx_logistic_regression(formula=training_formula, data=News_Train_sql, method="multiClass", l2_weight=1, l1_weight=1, ml_transforms=text_transform_list, train_threads=4) # Serialize and save the model to SQL Server. rx_set_compute_context(local) models_odbc = RxOdbcData(connection_string, table="Model") rx_write_object(models_odbc, key="LR", value=logistic_model, serialize=True, overwrite=True) # Set the Compute Context back to SQL. rx_set_compute_context(sql)
fig, ax = plt.subplots(1, 1) ax.scatter(data[labels == 0, 0], data[labels == 0, 1], label="class 0") ax.scatter(data[labels == 1, 0], data[labels == 1, 1], label="class 1") ############################# # We put the data into a dataframe. import pandas df = pandas.DataFrame(data=data, columns=["X1", "X2"]) df["Label"] = labels ################################# # We train a logistic regression. from microsoftml import rx_logistic_regression, rx_predict logreg = rx_logistic_regression("Label ~ X1 + X2", data=df) ################################# # And we display the results. import numpy def colorie(X, model, ax, fig, additional_columns=None, additional_names=None): if isinstance(X, pandas.DataFrame): X = X.as_matrix() xmin, xmax = numpy.min(X[:, 0]), numpy.max(X[:, 0]) ymin, ymax = numpy.min(X[:, 1]), numpy.max(X[:, 1]) hx = (xmax - xmin) / 100 hy = (ymax - ymin) / 100 xx, yy = numpy.mgrid[xmin:xmax:hx, ymin:ymax:hy]