def train_ada_boost(X, Y, estimators, classes, features): train_x = X[0:3600, :] train_y = Y[0:3600] validate_x = X[3600:, :] validate_y = Y[3600:] clf = AdaBoostClassifier(n_estimators=estimators) clf.n_classes_ = classes clf.n_features_ = features clf.fit(train_x, train_y) predictions = clf.predict(validate_x).reshape(400, 1) result = sum([1 if predictions[i] == validate_y[i] else 0 for i in range(validate_y.shape[0])]) / \ validate_y.shape[0] * 100 return result
def main(): # Prepare data df_even = pd.read_csv('DNN/adversary_even_log.csv', index_col=0) df_even = df_even.loc[df_even['Class']==0] df_even = df_even.loc[df_even['generator']!=2] df_even = df_even.loc[df_even['nTags']==2] df_odd = pd.read_csv('DNN/adversary_odd_log.csv', index_col=0) df_odd = df_odd.loc[df_odd['Class']==0] df_odd = df_odd.loc[df_odd['generator']!=2] df_odd = df_odd.loc[df_odd['nTags']==2] df_odd[variables] = scale(df_odd[variables]) df_even[variables] = scale(df_even[variables]) # construction of bdt bdt_even = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=0.01), learning_rate=learning_rate, algorithm="SAMME", n_estimators=n_estimators ) bdt_even.n_classes_ = 2 bdt_odd = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=0.01), learning_rate=0.15, algorithm="SAMME", n_estimators=n_estimators ) bdt_odd.n_classes_ = 2 # Convert generator class to categorical z_even = to_categorical(df_even['generator'], num_classes=2) z_odd = to_categorical(df_odd['generator'], num_classes=2) # fitting to generators bdt_even.fit(df_even[variables], df_even['generator'], sample_weight=df_even['EventWeight']) bdt_odd.fit(df_odd[variables], df_odd['generator'], sample_weight=df_odd['EventWeight']) # Scoring df_odd['bdt_outcome'] = bdt_odd.decision_function(df_odd[variables]).tolist() df_even['bdt_outcome'] = bdt_even.decision_function(df_even[variables]).tolist() print(bdt_odd.score()) df = pd.concat([df_odd,df_even]) # plotting BDT outcome for different generators gen1 = df.loc[df['generator']==0] gen2 = df.loc[df['generator']==1] # plt.hist(gen1['bdt_outcome'],bins=70,color='red',alpha=0.5,density=True) # plt.hist(gen2['bdt_outcome'],bins=70,color='blue',alpha=0.5,density=True) # # plt.show() # calculating fpr, tpr and auc for roc curve fpr = dict() tpr = dict() area = dict() z_all = to_categorical(df['generator'], num_classes=2) fpr[0], tpr[0], _ = roc(z_all[:,0], df['bdt_outcome'],sample_weight=df['EventWeight']) # area[0] = auc(fpr[0], tpr[0]) # plotting the roc curve # plt.plot(fpr[0], tpr[0], color='darkorange',lw=1,label='PYTHIA, ROC curve (area = %0.2f)' % area[0]) # plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--') # plt.legend() # plt.savefig('roc.png', bbox_inches='tight') # plt.show(block=True) print('It runs. ')