def MitoseClassif(obj_norm, y_name_3state="Type",classif_Mitose="MitoseOrNot", num_str="0015"): print "\n We first load the unnormalized data: \n" if os.path.isfile("H2b_data.csv"): print "The file existed so I loaded it." H2b = Traj_data(file_name="H2b_data.csv",pkl_traj_file="./Pkl_file") else: H2b=Traj_data() H2b.extracting(num_str,"both_channels_0015.hdf5",'primary') ## Extracting the hdf5 file for the primary channel (H2b) H2b.Add_traj(normalize=False)## ,num_traj=10) ## (you can reduce the number of traj) ## Adding Alice's work on tracking to have trajectories file_loc="0015_PCNA.xml" H2b.label_finder(file_loc) ## Finding associated labels by minimizing distance by click and distance of cell H2b.renaming_and_merge() ## renaming the labels to have G1=="1", S=="S", G2=="2" and M=="M" #This procedure may take a long time. H2b.data.to_csv('H2b_data.csv',index=False,header=True) print "\n We train a classifier for mitosis or not: \n" obj_unnorm=H2b train_file="MitoseClassif.arff" train_1=Reader() train_1.arrf_read(train_file) train_1.renaming_for_mitosis() train_1.data["label"].value_counts() kfold=3 if train_1.Var_missing[0] in train_1.data.columns: train_1.missing_features_data() values=[100 + i*10 for i in range(15)] model_1=RandomForest_Autotunner(values) model_1.tunning(train_1.data[train_1.names],train_1.data["label"],kfold,plot=True,fit_new_model=True) plt.show() model_1.cm_normalized = model_1.cm.astype('float') / model_1.cm.sum(axis=1)[:, np.newaxis] plot_matrix(model_1.cm_normalized,title="Normalized confusion matrix",names=["M","O","S"]) plt.show() ## To reduce computation and none useless things, we remove instances that do not belong to trajectories. obj_norm.data=obj_norm.data.ix[pd.notnull(obj_norm.data["traj"]),obj_norm.data.columns] obj_unnorm.data=obj_unnorm.data.ix[pd.notnull(obj_unnorm.data["traj"]),obj_unnorm.data.columns] obj_norm.update() obj_unnorm.update() ## Predicting model 1 index_no_missing=obj_norm.data[obj_norm.names].dropna(axis=0, how='any').index obj_norm.data.ix[index_no_missing,classif_Mitose]=model_1.predict(obj_unnorm.data.ix[index_no_missing,train_1.names]) ## Carefull, we put the unnormalized data in the above prediction. print "\n A bit of statistics on the overall predictions: \n" print "Frequency of predicted values for the Mitosis or not classifier: \n" print obj_norm.data[classif_Mitose].value_counts() print "\n We were however not able to predict %d instances because of missing values" % (obj_norm.data.shape[0]-len(index_no_missing)) obj_norm.data obj_norm.update() ### Giving priority to the first classif... model_1.names_to_give=train_1.names return(obj_norm,model_1)