def __CVD( data, exp ): kn = Evaluation.Evaluator() X,Y,y_raw = Features.getSamples( kn, data ) data.maxWords = 10000 kf = StratifiedKFold( n_splits=10, shuffle=True ) k = 0 for train, test in kf.split( X, y_raw ): print( "K-Fold: " + str( k + 1 ) ); x_train_raw, x_test_raw = X[train], X[test] y_train, y_test = Y[train], Y[test] x_train, x_test = Features.getCVvectors( x_train_raw, x_test_raw, data ) denseSizes = [512,1024] batches = [64,128] dropouts = [2,3] param_grid = dict( batch_size=batches, denseSize=denseSizes, dropout=dropouts, input_length=[len(x_train[0])], output_length=[len(y_train[0])] ) model = KerasClassifier(build_fn=Models.create_ann_model, epochs=30, verbose=2) y_ints = [y.argmax() for y in y_train] cweights = class_weight.compute_class_weight( 'balanced', np.unique( y_ints ), y_ints ) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(x_train, y_train, class_weight=cweights) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) model, scores = kn.evaluateModel( x_test, y_test, grid.best_estimator_.model, data, k ) k = k + 1 kn.saveResults( exp )
def __EmbeddedRNN( data, exp, filepath, network ): kn = Evaluation.Evaluator() X,Y,y_raw = Features.getSamples( kn, data ) data.maxWords = 10000 kf = StratifiedKFold( n_splits=10, shuffle=True ) k = 0 for train, test in kf.split( X, y_raw ): print( "K-Fold: " + str( k + 1 ) ); x_train_raw, x_test_raw = X[train], X[test] y_train, y_test = Y[train], Y[test] Models.embedding, x_train, x_test = Features.getEmbedded( x_train_raw, x_test_raw, y_train, y_test, y_raw, filepath, kn ) batches = [64,218] neurons = [100,200] dropouts = [2,3] param_grid = dict( batch_size=batches, neuron=neurons, dropout=dropouts, output_size=[len(y_train[0])] ) model = None if network == 'lstm': model = KerasClassifier(build_fn=Models.create_lstm_model, epochs=30, verbose=2) else: model = KerasClassifier(build_fn=Models.create_gru_model, epochs=30, verbose=2) y_ints = [y.argmax() for y in y_train] cweights = class_weight.compute_class_weight( 'balanced', np.unique( y_ints ), y_ints ) grid = GridSearchCV(estimator=model, param_grid=param_grid) grid_result = grid.fit(x_train, y_train, class_weight=cweights) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) model, scores = kn.evaluateModel( x_test, y_test, grid.best_estimator_.model, data, k ) k = k + 1 kn.saveResults( exp )
def WStack(): import Wesleyan data = Wesleyan.Wesleyan() kn = Evaluation.Evaluator() exp = "WStack" filepath = 'enwiki_20180420_300d.txt' X,Y,y_raw = Features.getSamples( kn, data ) data.maxWords = 10000 kf = StratifiedKFold( n_splits=10, shuffle=True ) k = 0 for train, test in kf.split( X, y_raw ): print( "K-Fold: " + str( k + 1 ) ); x_train_raw, x_test_raw = X[train], X[test] y_train, y_test = Y[train], Y[test] y_ints = [y.argmax() for y in y_train] cweights = class_weight.compute_class_weight( 'balanced', np.unique( y_ints ), y_ints ) Models.embedding, x_train, x_test = Features.getEmbedded( x_train_raw, x_test_raw, y_train, y_test, y_raw, filepath, kn ) y_pred_train = None y_pred_test = None def do_cnn(): model1 = Models.create_cnn_model( pool_size=3, layer_size=128, output_size=len(y_train[0]) ) model1.fit(x_train, y_train, epochs=15, verbose=2, batch_size=32, class_weight=cweights) y_pred_train = model1.predict( x_train, verbose=0 ) y_pred_test = model1.predict( x_test, verbose=0 ) model1 = None do_cnn() model2 = Models.create_ann_model( dropout=3, denseSize=512, output_length=len(y_train[0]) ) model2.fit(x_train, y_train, batch_size=64, verbose=2, epochs=30, class_weight=cweights) y_pred_train2 = model2.predict( x_train, verbose=0 ) y_pred_test2 = model2.predict( x_test, verbose=0 ) model2 = None x_train, x_test = Features.getCVvectors( x_train_raw, x_test_raw, data ) model3 = Models.create_ann_model( batch_size=64, denseSize=1024, dropout=3, input_length=[len(x_train[0])], output_length=[len(y_train[0])] ) model3.fit(x_train, y_train, epochs=100, class_weight=cweights) y_pred_train3 = model3.predict( x_train, verbose=0 ) y_pred_test3 = model3.predict( x_test, verbose=0 ) new_x_train = np.stack( (y_pred_train, y_pred_train2, y_train_3 ), axis=-1) new_x_test = np.stack( (y_pred_test, y_pred_test2, y_test_3 ), axis=-1) model = Models.create_stack_model( input_size=len(new_x_train[0]), output_size=len(y_train[0]) ) history = model.fit(new_x_train, y_train, epochs=100, verbose=2, batch_size=128, class_weight=cweights ) model, scores = kn.evaluateModel( new_x_test, y_test, model, data, k ) k = k + 1 kn.saveResults( exp )