예제 #1
0
def train_ada_boost(X, Y, estimators, classes, features):
    train_x = X[0:3600, :]
    train_y = Y[0:3600]
    validate_x = X[3600:, :]
    validate_y = Y[3600:]
    clf = AdaBoostClassifier(n_estimators=estimators)
    clf.n_classes_ = classes
    clf.n_features_ = features
    clf.fit(train_x, train_y)
    predictions = clf.predict(validate_x).reshape(400, 1)
    result = sum([1 if predictions[i] == validate_y[i] else 0 for i in range(validate_y.shape[0])]) / \
             validate_y.shape[0] * 100
    return result
예제 #2
0
def main():
    # Prepare data
    df_even = pd.read_csv('DNN/adversary_even_log.csv', index_col=0)
    df_even = df_even.loc[df_even['Class']==0]
    df_even = df_even.loc[df_even['generator']!=2]
    df_even = df_even.loc[df_even['nTags']==2]

    df_odd = pd.read_csv('DNN/adversary_odd_log.csv', index_col=0)
    df_odd = df_odd.loc[df_odd['Class']==0]
    df_odd = df_odd.loc[df_odd['generator']!=2]
    df_odd = df_odd.loc[df_odd['nTags']==2]
    
    df_odd[variables] = scale(df_odd[variables])
    df_even[variables] = scale(df_even[variables])
    
    # construction of bdt
    bdt_even = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=0.01),
                                          learning_rate=learning_rate,
                                          algorithm="SAMME",
                                          n_estimators=n_estimators
                                          )
    bdt_even.n_classes_ = 2
    bdt_odd = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=0.01),
                                         learning_rate=0.15,
                                         algorithm="SAMME",
                                         n_estimators=n_estimators
                                         )
    bdt_odd.n_classes_ = 2
    
    # Convert generator class to categorical
    z_even = to_categorical(df_even['generator'], num_classes=2)
    z_odd = to_categorical(df_odd['generator'], num_classes=2)
    
    # fitting to generators
    bdt_even.fit(df_even[variables], df_even['generator'], sample_weight=df_even['EventWeight'])
    bdt_odd.fit(df_odd[variables], df_odd['generator'], sample_weight=df_odd['EventWeight'])
    
    # Scoring
    df_odd['bdt_outcome'] = bdt_odd.decision_function(df_odd[variables]).tolist()
    df_even['bdt_outcome'] = bdt_even.decision_function(df_even[variables]).tolist()

    print(bdt_odd.score())
    df = pd.concat([df_odd,df_even])
    
    # plotting BDT outcome for different generators
    gen1 = df.loc[df['generator']==0]
    gen2 = df.loc[df['generator']==1]
    
#    plt.hist(gen1['bdt_outcome'],bins=70,color='red',alpha=0.5,density=True)
#    plt.hist(gen2['bdt_outcome'],bins=70,color='blue',alpha=0.5,density=True)
#    
#    plt.show()
    
    # calculating fpr, tpr and auc for roc curve
    fpr = dict()
    tpr = dict()
    area = dict()
    
    z_all = to_categorical(df['generator'], num_classes=2)

    fpr[0], tpr[0], _ = roc(z_all[:,0], df['bdt_outcome'],sample_weight=df['EventWeight'])
#    area[0] = auc(fpr[0], tpr[0])
    
    # plotting the roc curve
#    plt.plot(fpr[0], tpr[0], color='darkorange',lw=1,label='PYTHIA, ROC curve (area = %0.2f)' % area[0])
#    plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
#    plt.legend()
#    plt.savefig('roc.png', bbox_inches='tight')
#    plt.show(block=True)

    print('It runs. ')