xg_train = xgb.DMatrix( xTr, label=yTr)
xg_test = xgb.DMatrix(xTe, label=yTe)
param = {'max_depth':50, 'eta':0.4, 'silent':1, 'objective':'multi:softprob', 'max_delta_step':1,
         'num_class':len(le.classes_), 'eval_metric':'mlogloss', 'seed':app_random_state_value, 'nthread':4,
         'subsample':0.8, 'colsample_bytree':0.8, 'min_child_weight':3, 'lambda':8, 'alpha':3, 'gamma':1}
watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
num_round = 40
bst = xgb.train(param, xg_train, num_round, watchlist )
# get prediction
yhP = bst.predict( xg_test );
yhTe = np.argmax(yhP, axis=1)
classes = le.transform(le.classes_).tolist()
##

accuracy_score(yTe, yhTe)
cross_entropy(yhP, yTe, classes)
cfmat = confusion_matrix(yTe, yhTe, labels=classes)
np.savetxt('cfmat.5.4.csv',cfmat,delimiter=',',fmt='%i')
plt.matshow(cfmat)
plt.colorbar()
precision_recall_fscore_support(yTe, yhTe, average=None, labels=classes)

#MLP
from sklearn.neural_network import MLPClassifier

#temp code for cosine distance b/w DD's
# DD
x_new = sparse.lil_matrix(sparse.csr_matrix(XD)[:,list(range(47,115))])
x_new_T = x_new.T
dist = pairwise_distances(x_new_T, metric="cosine")
np.savetxt('dist_manhattan_train(68).csv',dist,delimiter=',',fmt='%f')
model.add(Activation('softmax'))

#sgd = SGD(lr=0.5, decay=1e-5, momentum=0.9, nesterov=True)
sgd = SGD(lr=0.1, decay=1e-5, momentum=0.9, nesterov=True)
#model.compile(loss='mean_squared_error', optimizer=sgd)
model.compile(loss='categorical_crossentropy', optimizer='adagrad')#, class_mode='categorical')

sc = preprocessing.StandardScaler(with_mean=False)
xTr = sc.fit_transform(xTr)
xTe = sc.transform(xTe)

model.fit(xTr.astype(np.float32), yTr.astype(np.int32), nb_epoch=50, batch_size=256, show_accuracy=True, validation_split=0.03, 
          verbose=2,shuffle=True)
proba = model.predict_proba(xTe.astype(np.float32), batch_size=256)

print(proba)
#print(proba.shape())

yhP = proba
yhTe = np.argmax(yhP, axis=1)
classes = le.transform(le.classes_).tolist()
##
ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
ts = ts.replace(' ', '_').replace(':', '-')
pickle.dump(model,open('model_'+str(ts)+'.pickle.dump','wb'))
pickle.dump(proba,open('yhP_proba_'+str(ts)+'.pickle.dump','wb'))
pickle.dump(yhTe,open('yhTe_'+str(ts)+'.pickle.dump','wb'))
##
print('accuracy:'+str(accuracy_score(yTe, yhTe)))
print('log-loss:'+str(cross_entropy(yhP, yTe, classes)))
Ejemplo n.º 3
0
#sgd = SGD(lr=0.5, decay=1e-5, momentum=0.9, nesterov=True)
sgd = SGD(lr=0.1, decay=1e-5, momentum=0.9, nesterov=True)
#model.compile(loss='mean_squared_error', optimizer=sgd)
model.compile(loss='categorical_crossentropy',
              optimizer='adagrad')  #, class_mode='categorical')

model.fit(xTr.astype(np.float32),
          yTr.astype(np.int32),
          nb_epoch=50,
          batch_size=256,
          show_accuracy=True,
          validation_split=0.03,
          shuffle=True)
proba = model.predict_proba(xTe.astype(np.float32), batch_size=256)

print(proba)
#print(proba.shape())

yhP = proba
yhTe = np.argmax(yhP, axis=1)
classes = le.transform(le.classes_).tolist()
##
ts = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
ts = ts.replace(' ', '_').replace(':', '-')
pickle.dump(model, open('model_' + str(ts) + '.pickle.dump', 'wb'))
pickle.dump(proba, open('yhP_proba_' + str(ts) + '.pickle.dump', 'wb'))
pickle.dump(yhTe, open('yhTe_' + str(ts) + '.pickle.dump', 'wb'))
##
print('accuracy:' + str(accuracy_score(yTe, yhTe)))
print('log-loss:' + str(cross_entropy(yhP, yTe, classes)))