예제 #1
0
def main():
	# files should be a list of string file names
	files = ["rt-polaritydata/rt-polarity.neg", "rt-polaritydata/rt-polarity.pos"]
	print('Loading data...')
	phrases, labels = load_data(files)
	print('Preprocessing data...')
	data = data_to_embedding(phrases, sent_len=51)

	# splitting into test (60%) validation(20%) and test (20%)
	x_first_split, x_test, y_first_split, y_test = train_test_split(data, labels, test_size=0.2)
	x_train, x_val, y_train, y_val = train_test_split(x_first_split, y_first_split, test_size=0.2)

	# --------------- simple way to make a model, train and test it ------------------
	print('Training the model...')
	model = KerasClassifier(build_fn=create_model, epochs=4, dropout=0.2, input_dim=5100, verbose=0)
	model.fit(x_train, y_train)
	# -------------- example cross validation -----------------------
	seed = 7
	kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
	# do cross validation on only the training and validation set (i.e. x_first_split)
	results = cross_val_score(model, x_first_split, y_first_split, cv=kfold)
	print("average result:{0} , std: {1}".format(results.mean(),results.std()))

	# -------------- finally, produce predictions on test set ------
	preds = model.predict(x_test)
	acc = accuracy_score(y_test, preds)
	print(acc * 100)
예제 #2
0
def stacking_NN(S_train, y_train, cv=5, epochs=20, deep = False) :
    def stack_fn(num_models=len(S_train[0])):
        model = Sequential()
        model.add(Dense(16, input_dim=num_models, activation='relu'))
        model.add(Dense(2, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model
    
    def stack_fn2(num_models=len(S_train[0])):
        model = Sequential()
        model.add(Dense(8, input_dim=num_models, activation='relu'))
        model.add(Dense(8, input_dim=8, activation='relu'))
        model.add(Dense(2, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model
    
    if deep :
        meta_model = KerasClassifier(build_fn=stack_fn2)
    else :
        meta_model = KerasClassifier(build_fn=stack_fn)
        
    meta_model.fit(S_train, y_train, epochs=epochs)
    return meta_model
 def param_tune(self):
     """
     Creates, fits, and predicts a model multiple times with every combination of hyperparameters, given below,
     in an attempt to fine-tune the model using more precise possibilities than the random tuning above.
     """
     best_params = ()
     best_acc = 0
     for comb in list(
             product(
                 [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000
                  ],  # epochs
                 [1, 3, 5, 7, 9],  # batch_size
                 ['glorot_uniform', 'normal', 'uniform'],  #init
                 ['rmsprop', 'adam'])):  #optimizer
         auto = KerasClassifier(build_fn=self.model_build,
                                epochs=comb[0],
                                batch_size=comb[1],
                                init=comb[2],
                                optimizer=comb[3])
         auto.fit(self.X_train, self.y_train)
         predictions = auto.predict(self.X_test)
         predictions = np_utils.to_categorical(predictions)
         accu_test = np.sum(self.y_test == predictions) / self.y_test.size
         if accu_test > best_acc:
             best_params = comb
             best_acc = accu_test
     self.results.write("Param Tune Results\n")
     self.results.write(str(best_params) + "\n")
     self.results.write(str(best_acc) + "\n")
    def random_param_tune(self):
        """
        Creates, fits, and predicts a model multiple times with random combinations of hyperparameters, given below,
        in an attempt to find the best set of hyperparameters from a wide range of possibilities.
        """
        best_params = ()
        best_acc = 0

        all_comb = list(
            product(
                [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],  # epochs
                [1, 3, 5, 7, 9],  # batch_size
                ['glorot_uniform', 'normal', 'uniform'],  #init
                ['rmsprop', 'adam']))  #optimizer
        if len(all_comb) > 250:
            all_comb = sample(all_comb, 250)

        for comb in all_comb:
            auto = KerasClassifier(build_fn=self.model_build,
                                   epochs=comb[0],
                                   batch_size=comb[1],
                                   init=comb[2],
                                   optimizer=comb[3])
            auto.fit(self.X_train, self.y_train)
            predictions = auto.predict(self.X_test)
            predictions = np_utils.to_categorical(predictions)
            accu_test = np.sum(self.y_test == predictions) / self.y_test.size
            if accu_test > best_acc:
                best_params = comb
                best_acc = accu_test
        self.results.write("Random Param Tune Results\n")
        self.results.write(str(best_params) + "\n")
        self.results.write(str(best_acc) + "\n")
예제 #5
0
def train(X, y, alg, scaler, pca, features, seed=7):
    """
    Trains a new model using the training data.
    """
    np.random.seed(seed)

    if scaler is not None:
        X = scaler.transform(X)

    if pca is not None:
        X = pca.transform(X)

    if alg == "deep":
        model = KerasClassifier(build_fn=create_deepmodel,
                                nb_epoch=2,
                                batch_size=2,
                                verbose=1)
    else:
        print 'No model defined for ' + alg
        exit()

    # train model on full data set
    t0 = time.time()
    kfold = StratifiedKFold(y=y, n_folds=3, shuffle=True, random_state=seed)
    results = cross_val_score(model, X, y, cv=kfold)
    print(results.mean())

    # evaluate model - how accurate is the model
    # rating = model.evaluate(X, y, verbose=1)
    # print "   - %s: %.2f" % (model.metrics_names[1], rating[1])
    model.fit(X, y, verbose=1, batch_size=1)

    return model
예제 #6
0
class Keras(BaseEstimator):
    def __init__(self, build_function, multi_class=False, keras_params = None):
        if not callable(build_function):
            raise ValueError('Model construction function must be callable.')

        self.multi_class = multi_class
        self.build_function = build_function
        if keras_params is None:
            keras_params = {}

        self.keras_params = keras_params

    def fit(self, X, y):
        if self.multi_class:
            self.n_classes_ = len(set(y))
        else:
            self.n_classes_ = 1

        build_callable = lambda: self.build_function(X.shape[1], self.n_classes_)
        keras_params=copy(self.keras_params)
        keras_params['build_fn']=build_callable

        self.classifier_ = KerasClassifier(**keras_params)
        self.classifier_.fit(X, y)

    def predict(self, X):
        return self.classifier_.predict(X)
예제 #7
0
class Keras(BaseEstimator):
    def __init__(self, build_function, multi_class=False, keras_params=None):
        if not callable(build_function):
            raise ValueError('Model construction function must be callable.')

        self.multi_class = multi_class
        self.build_function = build_function
        if keras_params is None:
            keras_params = {}

        self.keras_params = keras_params

    def fit(self, X, y):
        if self.multi_class:
            self.n_classes_ = len(set(y))
        else:
            self.n_classes_ = 1

        build_callable = lambda: self.build_function(X.shape[1], self.
                                                     n_classes_)
        keras_params = copy(self.keras_params)
        keras_params['build_fn'] = build_callable

        self.classifier_ = KerasClassifier(**keras_params)
        self.classifier_.fit(X, y)

    def predict(self, X):
        return self.classifier_.predict(X)
예제 #8
0
파일: Rec.py 프로젝트: mr3m/dac
class neural_estimator():
    def __init__(self, x, y, size=64):
        self.x = x
        self.y = np_utils.to_categorical(y)
        self.size = size
        self.model = KerasClassifier(build_fn=self.create_model,
                                     epochs=50,
                                     batch_size=512,
                                     verbose=0)  #self.create_model_()

    def create_model(self):
        model = keras.Sequential([
            keras.layers.Flatten(input_shape=(self.x.shape[1], )),
            keras.layers.BatchNormalization(),
            keras.layers.Dense(self.size, activation=tf.nn.relu),
            keras.layers.BatchNormalization(),
            keras.layers.Dense(self.size, activation=tf.nn.relu),
            keras.layers.BatchNormalization(),
            keras.layers.Dense(self.y.shape[1], activation=tf.nn.softmax)
        ])

        model.compile(tf.keras.optimizers.Adam(),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        return model

    def fit(self):
        self.model.fit(self.x, self.y)
        return self.model
예제 #9
0
    def add_candidate_feat(self,
                           X_train,
                           X_test,
                           y_train,
                           y_test,
                           constructor_kwargs,
                           scorer=mcc):
        """
        Build, fit, and score a model using a subset of input features plus one candidate features

        @params:
            X_train            - Required : Pandas dataframe containing training set input data (Dataframe)
            X_test             - Required : Pandas dataframe containing test set input data (Dataframe)
            y_train            - Required : Pandas dataframe containing training set labels (Dataframe)
            y_test             - Required : Pandas dataframe containing test set labels (Dataframe)
            constructor_kwargs - Required : kwargs parameterizing for the model constructor function, except for n_features
            scorer             - Optional : Metric which accepts true and predicted labels as inputs; used to score model
        """

        # Create compatibility-wrapped model with dim(X_train) input features, then fit and score it
        model = KerasClassifier(build_fn=construct_network,
                                n_features=len(X_train.columns.values),
                                **constructor_kwargs)
        model.fit(X_train, y_train)
        score = scorer(y_test, model.predict(X_test))
        cm = confusion_matrix(y_test, model.predict(X_test))

        return score, cm
예제 #10
0
def automl_basic(X_train,
                 X_test,
                 y_train,
                 y_test,
                 baseline,
                 min_neurons,
                 max_neurons,
                 max_layers,
                 num_runs=3):
    accuracy_scores = defaultdict(list)
    for layers_neurons in itertools.product(range(max_layers),
                                            range(min_neurons, max_neurons)):
        layers = layers_neurons[0]
        neurons = layers_neurons[1]
        print("Number of hidden layers", layers)
        for i in range(num_runs):
            deep_broad_model = partial(baseline, neurons, layers)
            estimator = KerasClassifier(build_fn=deep_broad_model,
                                        epochs=100,
                                        batch_size=5,
                                        verbose=0)
            estimator.fit(X_train, y_train)
            y_pred = estimator.predict(X_test)
            accuracy_scores[layers_neurons].append(
                metrics.accuracy_score(y_test, y_pred))
    return accuracy_scores
예제 #11
0
def start_fit(dataSet):
    index = [i for i in range(len(dataSet))]
    random.shuffle(index)
    data = dataSet[index]
    X = dataSet[:, 0:148]
    Y = dataSet[:, 148]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    # normalization
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    dbn_model = KerasClassifier(model_init,
                                epochs=500,
                                batch_size=64,
                                verbose=0)
    dbn_model.fit(X_train, y_train)
    y_ped = dbn_model.predict(X_test)
    acc, precision, npv, sensitivity, specificity, mcc, f1 = calculate_performace(
        len(y_ped), y_ped, y_test)
    print(
        'DBN:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,roc_auc=%f'
        % (acc, precision, npv, sensitivity, specificity, mcc, roc_auc))
    def train(self, features, answers):
        """
        Entrainement du classifier MLP
        Args:
            features:           Array de données
            answers:            Array de label
            ValidationMethod:   Type de validation à utiliser
        """
        print("1.Training")

        mlpPerf = [['Epoch', 'Batch Size', 'Accuracy']]
        model = KerasClassifier(build_fn=self.create_model, verbose=0)

        # Fix answer array
        answers = np_utils.to_categorical(answers)

        #Fit data to algo
        model.fit(features, answers)

        #Save results
        mlpPerf.append([
            self.epoch, self.batch_size,
            "{0:.2f}".format(model.score(features, answers) * 100)
        ])
        self.precision.append(model.score(features, answers))
        self.best_score = self.precision[0]

        #Print table
        print(Tabulate(mlpPerf, headers='firstrow'))
        print()

        return model
예제 #13
0
def explain_row_eli5():
    global map_values_eli5

    # compute explanations only once
    if bool(map_values_eli5):
        return map_values_eli5

    copy_model = tf.keras.models.load_model('{}/{}.h5'.format(name, name),
                                            custom_objects={"f1": kr.f1})

    def base_model():
        return copy_model

    my_model = KerasClassifier(build_fn=base_model)
    my_model.fit(X_test.copy(), y_test.copy())

    perm = PermutationImportance(my_model).fit(X_test.copy(), y_test.copy())
    # eli5.show_weights(perm, feature_names=list(df.drop('loan_repaid', axis=1).columns))

    s = perm.feature_importances_
    sorted_indices = sorted(range(len(s)), key=lambda k: s[k], reverse=True)
    class_1 = [(a, s[a]) for a in sorted_indices if s[a] > 0]
    sorted_indices = sorted(range(len(s)), key=lambda k: s[k])
    class_0 = [(a, s[a] * -1) for a in sorted_indices if s[a] <= 0]

    for class_value in range(max(y_test)):
        map_values_eli5[class_value] = class_1

    return map_values_eli5
예제 #14
0
 def fit(self, model, c_m):
     #class_weight = CW.compute_class_weight('balanced', np.unique(self.y[0]), self.y[0])
     model = KerasClassifier(build_fn=c_m,
                             epochs=200,
                             batch_size=10,
                             verbose=0)  #class_weight = class_weight,
     # optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
     # batch_size = [100]
     # epochs = [50]
     # learn_rate = [0.001]
     # activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
     # momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
     # init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal',
     #             'he_uniform']
     kfold = KFold(n_splits=10, shuffle=True, random_state=1234)
     neurons = [1, 5, 10, 15, 20, 25, 30]
     #
     #param_grid = dict(neurons=neurons)
     #grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
     #grid_result = grid.fit(self.X, self.y)
     results = cross_val_score(model, self.X, self.y, cv=kfold)
     model.fit(self.X, self.y)
     y_pred = model.predict(self.X, batch_size=128, verbose=1)
     # pdb.set_trace()
     # y_pred = model.predict_classes(self.X, verbose=1)
     #y_pred = cross_val_score.score
     #y_true = self.y
     print("Baseline: %.2f%% (%.2f%%)" %
           (results.mean() * 100, results.std() * 100))
     print(y_pred)
     return y_pred
예제 #15
0
def train(self, filepath, target, feature_columns, seed):
    data = get_data_from_file(filepath)
    target_data = data[target]
    #getting the relevant features.
    data = data[feature_columns]
    train, test = split(data, 0.8)
    train_target, test_target = split(target_data, 0.8)

    #Set the seed for random function
    np.random.seed(seed)

    #Get one hot encoded data_set.
    one_hot_encoded_target = get_one_hot_encoded_target_columns(train_target)

    a, b = get_num_unique_targets(train_target), get_num_feature(train)

    estimator = KerasClassifier(build_fn=baseline_model,
                                nb_epoch=200,
                                batch_size=5,
                                verbose=0)

    #Fit the model
    estimator.fit(train, target)
    pickle.dump(estimator, open(MODEL_SAVE_FILE, 'wb'))

    estimator = KerasClassifier(build_fn=baseline_model,
                                nb_epoch=200,
                                batch_size=5,
                                verbose=0)
    #Do K fold cross Validation.
    kcross_validation(estimator, train, target, seed)
예제 #16
0
    def DeepLearningClassifer_(self):
        """ディープラーニング分類実行"""

        from keras.callbacks import EarlyStopping
        from keras.wrappers.scikit_learn import KerasClassifier
        import tensorflow as tf

        """GPU使用率の設定"""
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 1.0
        session = tf.Session(config=config)

        estimator = None

        """分析グリッドサーチ実行フラグに応じて推定器作成"""
        if True == self.do_analysis_gridsearch:
            estimator = KerasClassifier(build_fn=self._make_cls_deepleaning_model)
            estimator = self.make_grid_search_estimator(estimator, scoring='accuracy')

        else:
            estimator = KerasClassifier(build_fn=self._make_cls_deepleaning_model,
                                        n_hidden=self.params[PARAM_NHIDDEN][0],
                                        n_unit=self.params[PARAM_NUNIT][0],
                                        keep_drop=self.params[PARAM_KEEPDROP][0])
            estimator.fit(np.array(self.X_train), np.array(self.y_train),
                          batch_size=self.params[PARAM_BATCHSIZE][0],
                          epochs=100000, shuffle=False,
                          validation_data=(np.array(self.X_test), np.array(self.y_test)),
                          callbacks=[EarlyStopping(patience=3)])

        """バギング/アダブースト推定器作成"""
        estimator = self.make_bagada_cls_estimator(estimator)

        return estimator
예제 #17
0
def fit_neural_network(y_train=None,
                       X_train=None,
                       data=None,
                       layers=None,
                       n_epochs=100,
                       n_batch_size=20):
    """Fit a neural network regressor.

    Args:
        layers (str): str specifying the number of hidden layers and hidden nodes in the
            neural network. Example: "100-100-100".
        n_epochs (int): Number of epochs used for model fitting.
        n_batch_size (int): Batch size used for model fitting.

    Returns:
        nnet: The fitted neural network.

    """
    if data is not None:
        y_train = data["y_train"]
        X_train = data["X_train"]

    build_model = _get_build_model_func(input_dim=X_train.shape[1],
                                        layers=layers)

    nnet = KerasClassifier(build_fn=build_model,
                           batch_size=n_batch_size,
                           epochs=n_epochs)
    nnet._estimator_type = "classifier"
    nnet.fit(X_train, y_train, verbose=False)
    return nnet
예제 #18
0
def cross_eval_dnn(dataset_name,
                   outfolder,
                   model_descriptor: str,
                   cpus,
                   nfold,
                   X_data,
                   y_data,
                   embedding_layer_max_index,
                   pretrained_embedding_matrix=None,
                   instance_data_source_tags=None,
                   accepted_ds_tags: list = None):
    print("== Perform ANN ...")
    subfolder = outfolder + "/models"
    try:
        os.stat(subfolder)
    except:
        os.mkdir(subfolder)

    create_model_with_args = \
        functools.partial(create_model, max_index=embedding_layer_max_index,
                          wemb_matrix=pretrained_embedding_matrix,
                          model_descriptor=model_descriptor)
    # model = MyKerasClassifier(build_fn=create_model_with_args, verbose=0)
    model = KerasClassifier(build_fn=create_model_with_args,
                            verbose=0,
                            batch_size=100)
    model.fit(X_data, y_data)

    nfold_predictions = cross_val_predict(model, X_data, y_data, cv=nfold)

    util.save_scores(nfold_predictions, y_data, None, None, model_descriptor,
                     dataset_name, 3, outfolder, instance_data_source_tags,
                     accepted_ds_tags)
예제 #19
0
def run_exp_nn(X_train, y_train, X_val, y_val, param_name, param_range, other_params):

    result = defaultdict(list)

    '''
    ########## BEST FOUND PARAMETERS from HW1 #####
    n1 = 75
    n2 = 14
    mid_act = 'relu'  # useleakyrelu is enabled...
    num_layers = 3
    optimizer = 'adam'
    activation = 'sigmoid'
    epo = 100  # 10
    bat = 44  # 18
    ##############################################
    '''

    for param in param_range:
        clear_session()
        result['param'].append(param)
        params = {param_name: param}
        params.update(other_params)
        result['params'].append(params)
        result['metrics'].append('accuracy')
        # Motions
        t0 = time.time()

        num_features = X_train.shape[1]
        print('num_features = {}'.format(num_features))
        def classification_model(n1=75, n2=14, n3=14, num_layers=3,  input_dim=num_features,
                                 optimizer='adam', activation='sigmoid', epo=100, bat=44):
            model = Sequential()
            model.add(Dense(n1, input_dim=64))
            model.add(LeakyReLU())
            model.add(Dense(n2))
            model.add(LeakyReLU())
            for i in range(num_layers - 2):
                model.add(Dense(n3))
                model.add(LeakyReLU())
            model.add(Dense(4, activation=activation))
            model.compile(optimizer=optimizer,
                          loss='sparse_categorical_crossentropy',
                          metrics=['accuracy'])
            return model

        model = KerasClassifier(build_fn=classification_model, verbose=0, **params)

        model.fit(X_train, y_train.values.ravel('C'))

        y_pred = model.predict(X_train)
        y_val_pred = model.predict(X_val)
        result['accuracy_m'].append(accuracy_score(y_val, y_pred))
        result['accuracy_val_m'].append(accuracy_score(y_val, y_val_pred))
        print("took {} seconds".format(time.time() - t0))
        result['time'].append(time.time() - t0)

    # matplotlib is clunky in trying to plot bars side by side, BUT
    plot_lines1(result['param_range'], result['time'], result['param'], result['param_range'], label='Motions', col='blue')

    return result
예제 #20
0
class NeuralNetworkClassificationModelFast(object):
    """ Accepts SingleStockDataSet object as input and
        trains the benchmark model on train set and
        evaluate on test set. """
    def __init__(self, dset, random_state=16):
        self.dset = dset
        self.random_state = random_state
        self.trainX, self.valX, self.testX, self.trainY, self.valY, self.testY = self.dset.get_train_val_test_sets(
            0.8, 0.1, 0.1)
        self.predictions = None
        self.num_targets = self.trainY.shape[1]
        #self.build_regressor()

    def build_nn_arch(self):
        input_dim = self.trainX.shape[1]
        num_classes = 6
        model = Sequential()
        model.add(
            Dense(80, input_dim=input_dim, init='normal', activation='relu'))
        #model.add(Dropout(0.1))
        model.add(Dense(40, init='normal', activation='sigmoid'))
        #model.add(Dense(10, init='normal', activation='relu'))
        model.add(Dense(num_classes, init='normal', activation='sigmoid'))
        sgd = SGD(lr=0.4)
        #adam = Adam(lr=0.001)
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer=sgd)
        return model

    def build_classifier(self):
        # fix random seed for reproducibility
        np.random.seed(self.random_state)
        # evaluate model with standardized dataset
        self.classifier = KerasClassifier(build_fn=self.build_nn_arch,
                                          nb_epoch=100,
                                          batch_size=8,
                                          verbose=0)

    def fit(self):
        self.build_classifier()
        self.classifier.fit(self.trainX, self.trainY[:, 1])

    def predict(self):
        self.predictions = self.classifier.predict(self.testX)
        return self.predictions

    def score(self):
        scr = f1_score(self.testY[:, 1], self.predictions)
        print 'f1 score = %f' % scr
        return scr

    def evaluate(self):
        """ fits the model, predicts the targets and returns evaluation score """
        self.fit()
        self.predict()
        return self.score()

    def to_categorical(self, y):
        # convert integers to dummy variables (i.e. one hot encoded)
        return np_utils.to_categorical(y)
def train(model_dir, data_dir, train_steps):
    # fix random seed for reproducibility
    seed = 7
    numpy.random.seed(seed)
    # load pima indians dataset
    dataset_path = os.path.join(data_dir, 'pima-indians-diabetes.csv')
    # dataset_path = '../../data/pima-indians-diabetes.csv'
    dataset = numpy.loadtxt(dataset_path, delimiter=",", skiprows=1)
    # split into input (X) and output (Y) variables
    X = dataset[:, 0:8]
    Y = dataset[:, 8]
    # create model
    model = KerasClassifier(build_fn=create_model,
                            epochs=150,
                            batch_size=10,
                            verbose=1)
    # evaluate using 10-fold cross validation, this does not fit an actual model, it just evaluates.
    # kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
    # results = cross_val_score(model, X, Y, cv=kfold)
    # print(results)
    # print(results.mean())

    model.fit(X, Y)
    model.model.save(os.path.join(model_dir, 'model.h5'))
    print(f"Model summary after training:")
    print(model.model.summary())
def trainNNmodel(X, Y):
    # Get the training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

    # Function to create the NN model, required for the wrapper
    def create_keras_model():
        model = Sequential()
        model.add(
            Dense(64,
                  input_dim=X.shape[1],
                  kernel_initializer='glorot_normal',
                  activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(
            Dense(128, kernel_initializer='glorot_normal', activation='relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(
            Dense(16, kernel_initializer='glorot_normal', activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer="adam",
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model

    # Fit the model
    early_stop = callbacks.EarlyStopping(monitor="accuracy",
                                         patience=50,
                                         mode='max')
    callbacks_list = [early_stop]

    estimator = KerasClassifier(build_fn=create_keras_model,
                                epochs=200,
                                batch_size=12,
                                verbose=0,
                                callbacks=callbacks_list)
    estimator.fit(X_train,
                  y_train,
                  batch_size=12,
                  epochs=200,
                  verbose=1,
                  callbacks=callbacks_list)

    y_pred = estimator.predict(X_test)
    y_pred = [item for sublist in y_pred for item in sublist]
    y_pred_rt = estimator.predict_proba(X_test)[:, 1]

    accuracy = str(accuracy_score(y_test, y_pred))
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_rt)
    auc_value = str(auc(fpr, tpr))
    precision = str(precision_score(y_test, y_pred))
    recall = str(recall_score(y_test, y_pred))
    f1score = str(f1_score(y_test, y_pred, average="weighted"))

    return [
        accuracy, auc_value, precision, recall, f1score, y_test, y_pred,
        y_pred_rt, estimator.model
    ]
예제 #23
0
def feature_importance(create_model):
    t_model = KerasClassifier(build_fn=create_model,
                              epochs=EPOCHS,
                              batch_size=5,
                              verbose=0)
    t_model.fit(X_train, Y_train)
    perm = PermutationImportance(t_model, random_state=1).fit(X_train, Y_train)
    display(eli5.show_weights(perm, feature_names=featureNames))
예제 #24
0
def train_model(X_train, y_train):
    classifier = KerasClassifier(build_fn = build_classifier, batch_size = 171, epochs = 94)

    cv         = ShuffleSplit(n_splits = 10, test_size = 0.2, random_state = 0)
    accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = cv, verbose = 2)
    
    classifier.fit(X_train, y_train, batch_size = 171, epochs = 94)
    return [classifier, accuracies]
예제 #25
0
 def model_probs(self, classifier=None):
     if not classifier:
         classifier = KerasClassifier(build_fn=self.model_build,
                                      epochs=200,
                                      batch_size=5)
         classifier.fit(self.X_train, self.y_train)
     predictions = classifier.predict_proba(self.X_test)
     return predictions
예제 #26
0
class r_lgb_model(object):

    def __init__(self, k, params):

        self.k = k
        self.skf = StratifiedKFold(n_splits=self.k, shuffle=True, random_state=42)

        self.params = params

        self. model_list = [0] * self.k
        
    def auroc(self, y_true, y_pred):
        return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)
        
    def custom_loss(self, y_pred, y_true):
#        ba = balanced_accuracy_score(np.where(y_true >= 0.5, 1, 0), np.where(y_pred >= 0.5, 1, 0))
        rs = recall_score(np.where(y_true >= 0.5, 1, 0), np.where(y_pred >= 0.5, 1, 0))
#        rauc = roc_auc_score(y_true, y_pred)
        return 'custom loss', 1/np.e**(np.log(rs)**2), True
    
    def create_baseline(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.dim, kernel_initializer='normal', activation='relu'))
        model.add(Dropout(0.2))
        model.add(Dense(256, kernel_initializer='normal', activation='relu'))
    #     model.add(Dense(1024, kernel_initializer='normal', activation='relu'))
    #     model.add(Dense(512, kernel_initializer='normal', activation='relu'))
        model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer='adam',metrics=[self.auroc])
        return model

    def _fit(self, X, y, verbose, esr, weights):
        
#        X_train, X_test, y_train, y_test = train_test_split(X, y,
#                                                            test_size=0.2,
#                                                            random_state=42)
#        
#        we, _, _, _ = train_test_split(weights, weights,
#                                       test_size=0.2,
#                                       random_state=42)
        
        self.dim = X.shape[1]
        
        
        callbacks = [EarlyStopping(monitor='val_auroc',
                           min_delta=0.0,
                           patience=1,
                           verbose=0,
                           mode='max',
                           restore_best_weights=True)]
            
        self.lgb_model = KerasClassifier(build_fn=self.create_baseline,
                                    epochs=30, batch_size=1024,
                                    verbose=1, validation_split=0.20,
                                    callbacks=callbacks)
        
#        model = self.create_baseline(X_train.shape[1])
        self.lgb_model.fit(X_train, y_train)
예제 #27
0
def perform_keras(name, mongo_host):
    df = load_train_all_xgb().head(10000)
    del_trash_cols(df)
    blja_nan(df)
    # add_kur_combinations(df)
    folds = 5
    seed = 42

    skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    losses = []
    n_est = []
    counter = 0
    for big_ind, small_ind in skf.split(np.zeros(len(df)), df[TARGET]):
        big = df.iloc[big_ind]
        small = df.iloc[small_ind]

        print explore_target_ratio(big)
        print explore_target_ratio(small)

        big, small = oversample(big, small, seed)

        print explore_target_ratio(big)
        print explore_target_ratio(small)

        train_target = big[TARGET]
        del big[TARGET]
        train_arr = big

        test_target = small[TARGET]
        del small[TARGET]
        test_arr = small

        global NUM_FETURES
        NUM_FETURES = len(train_arr.columns)

        scaller = StandardScaler()
        train_arr = scaller.fit_transform(train_arr)
        test_arr = scaller.transform(test_arr)

        estimator = KerasClassifier(build_fn=create_baseline,
                                    epochs=100,
                                    batch_size=5,
                                    verbose=10)

        print len(train_arr)
        print len(test_arr)
        eval_set = [(train_arr, train_target), (test_arr, test_target)]
        estimator.fit(train_arr, train_target)

        proba = estimator.predict_proba(test_arr)

        loss = log_loss(test_target, proba)
        out_loss(loss)
        losses.append(loss)
        # xgb.plot_importance(estimator)
        # plot_errors(stats)

    out_loss('avg = {}'.format(np.mean(losses)))
예제 #28
0
def evaluate(model, seed, x, y, t, ep, bz):
    print('K-fold validation ... ', end='', flush=True)
    estimator = KerasClassifier(build_fn=model,
                                epochs=ep,
                                batch_size=bz,
                                verbose=0)
    estimator.fit(x, y)
    results = cross_val_score(estimator, x, y, cv=k_fold(10, t, seed))
    print('%.2f%% (%.2f%%)' % (results.mean() * 100, results.std() * 100))
예제 #29
0
def testing(X_train=[],
            X_test=[],
            V_train=[],
            V_test=[],
            t_train=[],
            t_test=[],
            Y_train=[],
            Y_test=[],
            top_words=9444,
            max_review_length=1000,
            embedding_length=300,
            batch_size=128,
            nb_epoch=100,
            preset={},
            option='lstm'):
    X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
    X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
    if option == 'cnn':
        preset.update({
            'build_fn': cnn_train,
            'top_words': top_words,
            'max_length': max_review_length,
            'embedding_length': embedding_length,
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'verbose': 1
        })
        model = KerasClassifier(**preset)
    elif option == 'lstm':
        preset.update({
            'build_fn': lstm_train,
            'top_words': top_words,
            'max_length': max_review_length,
            'embedding_length': embedding_length,
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'verbose': 1
        })
        model = KerasClassifier(**preset)

    else:
        print("ERROR AT TRAINING PHASE OF TESTING.")

    if option == 'cnn' or option == 'lstm':
        model.fit(X_train, Y_train)
    elif option == 'classic':
        model.fit(
            decay_norm(x=np.array(V_train),
                       t_stamps=t_train,
                       embedding_length=embedding_length,
                       max_review_length=max_review_length)[0], Y_train)

    predict = model.predict(X_test)
    acc = accuracy_score(Y_test, predict)
    f1 = f1_score(Y_test, predict)
    auc = roc_auc_score(Y_test, predict)
    return ({'acc': acc, 'f1': f1, 'auc': auc})
예제 #30
0
    def _model_build(self, *arg):
        self._prepare_test_data()
        model = KerasClassifier(
            build_fn=self.create_model, verbose=0)
        optimizers = [
            'adam']
        init = [
            'normal', 'uniform']
        epochs = [
            100, 150]
        batches = [
            5, 10]
        param_grid = dict(
            optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init)
        grid = GridSearchCV(
            estimator=model, param_grid=param_grid, cv=5)
        grid_result = grid.fit(
            self.x_train, self.y_train)
        print("Best: %f using %s" % (
            grid_result.best_score_, grid_result.best_params_))
        # means = grid_result.cv_results_[
        #     'mean_test_score']
        # stds = grid_result.cv_results_[
        #     'std_test_score']
        # params = grid_result.cv_results_[
        #     'params']
        # for mean, stdev, param in zip(means, stds, params):
        #     print("%f (%f) with: %r" % (
        # mean,
        # stdev,
        # param))

        # Training
        # with Best
        # Parameter
        model = Sequential()
        model.add(Dense(
            12, input_dim=8, init=grid_result.best_params_['init'], activation='relu'))
        model.add(Dense(
            8, init=grid_result.best_params_['init'], activation='relu'))
        model.add(Dense(
            1, init=grid_result.best_params_['init'], activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer=grid_result.best_params_['optimizer'], metrics=['accuracy'])
        # Compile
        # model
        model.fit(
            self.x_train, self.y_train, nb_epoch=grid_result.best_params_['nb_epoch'], batch_size=grid_result.best_params_['batch_size'])
        yy_pred = model.predict(
            self.x_test)
        self.y_pred = [np.round(
            x) for x in yy_pred]
        self.y_true = self.y_test
        self.prob = model.predict_proba(
            self.x_test)
        self._analyse_result()
def SimpleLoss(individual, data, labels, layers, activation, *_):

	network = KerasClassifier(build_fn=CreateNeuralNetwork,
		input_size=data['train'].shape[1],
		output_size=2 if len(labels['train'].shape) < 2 else labels['train'].shape[1],
		layers=layers,activation=activation,lr=individual[1],
		dropout=individual[2],epochs=int(individual[0]),verbose=0)
	network.fit(data['train'],labels['train'])
	score = network.score(data['test'],labels['test'])
	return 1 - score
def test_keras_classifier():
    model = Sequential()
    model.add(Dense(input_dim, input_shape=(input_dim,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_class))
    model.add(Activation('softmax'))

    sklearn_clf = KerasClassifier(model, optimizer=optim, loss=loss,
                                  train_batch_size=batch_size,
                                  test_batch_size=batch_size,
                                  nb_epoch=nb_epoch)
    sklearn_clf.fit(X_train, y_train)
    sklearn_clf.score(X_test, y_test)
예제 #33
0
def main():
    code_dir = '/home/john/git/kaggle/OttoGroup/'
    data_dir = '/home/john/data/otto/'
    training_file = 'train.csv'

    os.chdir(code_dir)
    np.random.seed(1337)

    print('Starting script...')

    print('Loading data...')
    X, labels = load_training_data(data_dir, training_file)

    print('Pre-processing...')
    scaler = create_scaler(X)
    X = apply_scaler(X, scaler)
    y, y_onehot, encoder = preprocess_labels(labels)
    num_features = X.shape[1]
    num_classes = y_onehot.shape[1]
    print('Features = ' + str(num_features))
    print('Classes = ' + str(num_classes))

    print('Building model...')
    model = define_model(num_features, num_classes)
    print('Complete.')

    print('Training model...')
    wrapper = KerasClassifier(model)
    wrapper.fit(X, y_onehot, nb_epoch=20)
    print('Complete.')

    print('Training score = ' + str(wrapper.score(X, y_onehot)))

    preds = wrapper.predict(X)
    print('Predictions shape = ' + str(preds.shape))

    proba = wrapper.predict_proba(X)
    print('Probabilities shape = ' + str(proba.shape))

    print('Building ensemble...')
    ensemble = BaggingClassifier(wrapper, n_estimators=3, max_samples=1.0, max_features=1.0)
    print('Complete.')

    print('Training ensemble...')
    ensemble.fit(X, y)
    print('Complete.')

    print('Ensemble score = ' + str(ensemble.score(X, y)))

    print('Script complete.')
예제 #34
0
파일: Baseline.py 프로젝트: Ewen2015/Kaggle
class Baseline(object):
    """Provide general machine learning models as baseline."""
    def __init__(self, train, valid, target, features, impute=True):
        super(Baseline, self).__init__()
        self.target = target
        self.features = features

        self.train = train
        self.valid = valid
        
        if impute:
            import pandas as pd
            from sklearn.preprocessing import Imputer

            self.train_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.train), columns=self.train.columns)
            self.valid_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.valid), columns=self.valid.columns)
        else:
            self.train_prep = self.train
            self.valid_prep = self.valid          

    def LR(self, report=False):
        """Logistic Regression.

        Args:
            feature_num: number of feaures to keep in the model.
            report: whether print out the model analysis report.
        Returns:
            Logistic regression model."""
        from sklearn.linear_model import LogisticRegression

        self.lr = LogisticRegression(n_jobs=-1)
        self.lr.fit(self.train_prep[self.features], self.train_prep[self.target])

        if report:
            from Report import Report
            rpt = Report(self.lr, self.train_prep, self.valid_prep, self.target, self.features)
            rpt.ALL()

        return self.lr
    
    def RF(self, report=False):
        """Random Forest.

        Args:
            report: whether print out the model analysis report.
        Returns:
            Decision tree model generated from Random Forest."""
        from sklearn.ensemble import RandomForestClassifier

        self.rf = RandomForestClassifier(n_estimators=1000, 
                                        max_features='sqrt',
                                        max_depth=10,
                                        random_state=0, 
                                        n_jobs=-1)
        self.rf.fit(self.train_prep[self.features], self.train_prep[self.target])

        if report:
            from Report import Report
            rpt = Report(self.rf, self.train_prep, self.valid_prep, self.target, self.features)
            rpt.ALL()

        return self.rf

    def GBDT(self, report=False):
        """Gradient Boosting Decision Tree.

        Args:
            report: whether print out the model analysis report.
        Returns:
            Decision tree model generated from Gradient Boosting Decision Tree."""
        import lightgbm as lgb
        from sklearn.model_selection import train_test_split

        train, test = train_test_split(self.train, test_size=0.2, random_state=0)

        lgb_train = lgb.Dataset(train[self.features], train[self.target], free_raw_data=False)
        lgb_valid = lgb.Dataset(test[self.features], test[self.target], reference=lgb_train, free_raw_data=False)
        
        params = {
            'boosting_type': 'gbdt',
            'objective': 'bianry',
            'metric': 'auc',
            'num_leaves': 64,
            'learning_rate': 0.01,
            'feature_fraction': 0.75,
            'bagging_fraction': 0.75,
            'bagging_freq': 5,
            'verbose': 0
        }

        self.gbdt = lgb.train(params,
                        lgb_train,
                        num_boost_round=10000,
                        valid_set=lgb_valid,
                        early_stopping_round=200,
                        verbose_eval=100)
        if report:
            from Report import Report
            rpt = Report(self.gbdt, self.train, self.valid, self.target, self.features)
            rpt.ALL()

        return self.gbdt

    def NN(self, report=False):
        """Neutral Network.

        Args:
            report: whether print out the model analysis report.
        Returns:
            One layer neutral network model."""
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.wrappers.scikit_learn import KerasClassifier

        def baseline_model():
            model = Sequential()
            model.add(Dense(8, input_dim=len(self.features), activation='relu'))
            model.add(Dense(1, activation='sigmoid'))
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
            return model    

        self.nn = KerasClassifier(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)
        self.nn.fit(self.train[self.features], self.train[self.target])

        if report:
            from Report import Report
            rpt = Report(self.nn, self.train, self.valid, self.target, self.features)
            rpt.ALL()

        return self.nn
예제 #35
0
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=34, init='uniform', activation='relu'))
    model.add(Dense(8, init='uniform', activation='relu'))
    model.add(Dense(1, init='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
 
# create model
model = KerasClassifier(build_fn=create_model, nb_epoch=20, batch_size=32)
# evaluate using 10-fold cross validation
# kfold = KFold(n=len(features_train), n_folds=10, shuffle=True, random_state=seed)
# results = cross_val_score(model, features_train.values, labels_train.values, cv=kfold)
# print "Cross validation results:", (results.mean()*100), (results.std()*100)
model.fit(features_train.values, labels_train.values)

print "Model building complete:",round((time()-t0)/60,3),"m"

# print len(np.unique(train.user_id)), len(np.unique(test.user_id))

# features_train, features_test, labels_train, labels_test = cross_validation.train_test_split(features_train, labels_train, test_size=0.60)

# # neigh = neighbors.KNeighborsClassifier(weights='distance', n_jobs=-1).fit(train[features], train['hotel_cluster'])
# forest = ensemble.RandomForestClassifier(n_estimators=10, n_jobs=-1).fit(train[features], train['hotel_cluster'])
# # bayes = naive_bayes.GaussianNB().fit(train[features], train['hotel_cluster'])

t0 = time()
print "Predicting probabilities..."
probs = pd.DataFrame(model.predict_proba(features_test.values, batch_size=32))
예제 #36
0
# scikit-learn wrapper test #
#############################
print('Beginning scikit-learn wrapper test')

print('Defining model')
model = Sequential()
model.add(Dense(784, 50))
model.add(Activation('relu'))
model.add(Dense(50, 10))
model.add(Activation('softmax'))

print('Creating wrapper')
classifier = KerasClassifier(model)

print('Fitting model')
classifier.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch)

print('Testing score function')
score = classifier.score(X_train, Y_train)
print('Score: ', score)

print('Testing predict function')
preds = classifier.predict(X_test)
print('Preds.shape: ', preds.shape)

print('Testing predict proba function')
proba = classifier.predict_proba(X_test)
print('Proba.shape: ', proba.shape)

print('Testing get params')
print(classifier.get_params())
예제 #37
0
파일: pmrct_v5.py 프로젝트: fzhurd/fzwork
    model.add(Dense(9, init='normal', activation="softmax"))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(train_y)
encoded_y = encoder.transform(train_y)

dummy_y = np_utils.to_categorical(encoded_y)
print(dummy_y.shape)

estimator = KerasClassifier(build_fn=baseline_model, nb_epochs=10, batch_size=64)
estimator.fit(sentence_vectors[0:3321], dummy_y, validation_split=0.05)

y_pred = estimator.predict_proba(sentence_vectors[3321:])

""" Submission """
submission = pd.DataFrame(y_pred)
submission['id'] = test_index
submission.columns = ['class1', 'class2', 'class3', 'class4', 'class5', 'class6', 'class7', 'class8', 'class9', 'id']
submission.to_csv("submission_keras_classify.csv",index=False)





   
예제 #38
0
class BaseKerasSklearnModel(base_model.BaseModel):
    '''
    base keras model based on keras's model(without sklearn)
    '''
##    def __init__(self, data_file, delimiter, lst_x_keys, lst_y_keys, log_filename=DEFAULT_LOG_FILENAME, model_path=DEFAULT_MODEL_PATH, create_model_func=create_model_demo):
##        '''
##        init
##        '''
##        import framework.tools.log as log
##        loger = log.init_log(log_filename)
##        self.load_data(data_file, delimiter, lst_x_keys, lst_y_keys)
##        self.model_path = model_path
##        self.create_model_func=create_model_func

    def __init__(self, **kargs):
        '''
        init
        '''
        import framework.tools.log as log
        self.kargs = kargs
        log_filename = self.kargs["basic_params"]["log_filename"]
        model_path = self.kargs["basic_params"]["model_path"]
        self.load_data_func = self.kargs["load_data"]["method"]
        self.create_model_func = self.kargs["create_model"]["method"]
        loger = log.init_log(log_filename)
        (self.dataset, self.X, self.Y, self.X_evaluation, self.Y_evaluation) = self.load_data_func(**self.kargs["load_data"]["params"])
        self.model_path = model_path
        self.dic_params = {}
 

    def load_data(self, data_file, delimiter, lst_x_keys, lst_y_keys):
        '''
        load data
        '''
        # Load the dataset
        self.dataset = numpy.loadtxt(data_file, delimiter=",") 
        self.X = self.dataset[:, lst_x_keys] 
        self.Y = self.dataset[:, lst_y_keys]
    
    def init_callbacks(self):
        '''
        init all callbacks
        '''
        os.system("mkdir -p %s" % (self.model_path))
        checkpoint_callback = ModelCheckpoint(self.model_path + '/weights.{epoch:02d}-{acc:.2f}.hdf5', \
                monitor='acc', save_best_only=False)
        history_callback = LossHistory()
        callbacks_list = [checkpoint_callback, history_callback]
        self.dic_params["callbacks"] = callbacks_list

    def init_model(self):
        '''
        init model
        '''
        train_params = {"nb_epoch": 10, "batch_size": 10}
        self.dic_params.update(train_params)
        self.model = KerasClassifier(build_fn=self.create_model_func, **self.kargs["create_model"]["params"])
#        self.model = KerasClassifier(build_fn=self.create_model_func)
        self.model.set_params(**self.dic_params)
    
    def train_model(self):
        '''
        train model
        '''
        X = self.X
        Y = self.Y
        X_evaluation = self.X_evaluation
        Y_evaluation = self.Y_evaluation
        seed = 7
        numpy.random.seed(seed) # Load the dataset
        
        history = self.model.fit(X, Y)
        scores = self.model.score(X, Y)
#history_callback = self.dic_params["callbacks"][1]
#        print dir(history_callback)
#        logging.info(str(history_callback.losses))
        logging.info("final : %.2f%%" % (scores * 100))
        logging.info(str(history.history))
    
    def process(self):
        '''
        process
        '''
        self.init_callbacks()
        self.init_model()
        self.train_model()
예제 #39
0
# return the best three results
def top_n(matrix_prob, label_map):
	ans = []
	for line in matrix_prob:
		rank = [label_map[item[0]] for item in sorted(enumerate(line), key=lambda v:v[1], reverse=True)]
		ans.append(rank[:3])
	return ans
# basic neural network model
def basic_model():
	model = Sequential()
	model.add(Dense(output_dim=500, input_dim=100, activation='relu'))
	model.add(Dropout(0.2))
	model.add(Dense(output_dim=42, input_dim=500, activation='softmax'))
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

if __name__ == '__main__':
	X = pd.read_csv('./data/triple_train_x_mean.txt', header=None, encoding='utf-8')
	Y = pd.read_csv('./data/triple_train_y.txt', header=None, encoding='utf-8')
	X_test = pd.read_csv('./data/triple_test_x_mean.txt', header=None, encoding='utf-8')
	matrix_y = np_utils.to_categorical(Y,42)
	# KerasClassifier analysis
	classifier = KerasClassifier(build_fn=basic_model, nb_epoch=10, batch_size=500)
	classifier.fit(X, Y)

	pred_prob = classifier.predict_proba(X_test)

	with open('./model/task2_label_space.txt', encoding='utf-8') as flabel:
		label_map = flabel.read().split()
	pd.DataFrame(top_n(pred_prob, label_map)).to_csv('./data/task2_ans_int_index.txt', index=None, header=None, encoding='utf-8')
the number of epochs and the batch size.
We pass the number of training epochs to the KerasClassifier, again using
reasonable default values. Verbose output is also turned off given that the
model will be created 10 times for the 10-fold cross validation being
performed.
"""
# Rescale our data
# evaluate baseline model with standardized dataset
estimator =  KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1)


"""
We are going to use scikit-learn to evaluate the model using stratified k-fold
cross validation. This is a resampling technique that will provide an estimate
of the performance of the model. It does this by splitting the data into
k-parts, training the model on all parts except one which is held out as a test
set to evaluate the performance of the model. This process is repeated k-times
and the average score across all constructed models is used as a robust
estimate of performance. It is stratified, meaning that it will look at the
output values and attempt to balance the number of instances that belong to
each class in the k-splits of the data.
"""
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

estimator.fit(X, Y)
prediction = estimator.predict(X)
print("Real: {}".format(Y))
print("Predicted: {}".format(prediction))
예제 #41
0
    print(LRscore)
    # plt.plot(c,LRscore,'bx-')
    # plt.xlabel('penalty')
    # plt.ylabel('validation score')
    # plt.title('LR Model selection')
    # plt.show()
    # #logisticModel = LogisticRegression(penalty='l2')
    # #scores[1] = cross_val_score(logisticModel,train_data,label_data,cv=5)
    #
    #test model 3 : Neutral network
    #NNModel = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(5000,100), random_state=1,max_iter=500)
    tbCallback = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
    NNModel = KerasClassifier(build_fn=create_model,epochs=1200, batch_size=150,verbose=0)
    cv = ShuffleSplit(n_splits=1, test_size=0.3, random_state=0)
    #NNscore = cross_val_score(NNModel,train_data,label_data,fit_params={'callbacks': [tbCallback]},cv=cv)
    NNModel.fit(train_data,label_data)
    prediction = NNModel.predict(test_data)
    prediction = np.array(prediction)
    print(prediction)
    np.savetxt("prediction.csv", prediction, delimiter=",")
    #print('MLPClassifier validation score : ',NNscore)


    #test model 4 : SVM
    # c = [1]
    # SVMscore = np.zeros(len(c))
    # j = 0
    # for i in c:
    #     svmModel = SVC(C=i,kernel='linear')
    #     SVMscore[j] = np.mean(cross_val_score(svmModel,train_data,label_data,cv=5))
    #     j = j+1
def baseline_model():
    model = Sequential()
    model.add(Dense(100, input_shape=(10249,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.7))   
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# In[43]:

estimator = KerasClassifier(build_fn=baseline_model, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1)
estimator.fit(X_train_mat, y_train_cat)


# In[44]:

predictions = estimator.predict(X_test_mat)
print(set(predictions))
print(encoder.inverse_transform(predictions))


# In[45]:

print 'macro f1:', f1_score(encoded_Y_test, predictions, average='macro')


# In[ ]:
예제 #43
0
파일: Baseline.py 프로젝트: Ewen2015/Kaggle
class Baseline(object):
    """Provide general machine learning models as baseline."""
    def __init__(self, train, valid, target, features, impute=True):
        super(Baseline, self).__init__()
        self.target = target
        self.features = features

        self.train = train
        self.valid = valid
        
        if impute:
            import pandas as pd
            from sklearn.preprocessing import Imputer

            self.train_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.train), columns=self.train.columns)
            self.valid_prep = pd.DataFrame(Imputer(strategy='mean').fit_transform(self.valid), columns=self.valid.columns)
        else:
            self.train_prep = self.train
            self.valid_prep = self.valid          

    def LR(self, report=False):
        """Logistic Regression.

        Args:
            feature_num: number of feaures to keep in the model.
            report: whether print out the model analysis report.
        Returns:
            Logistic regression model."""
        from sklearn.linear_model import LogisticRegression

        self.lr = LogisticRegression(n_jobs=-1)
        self.lr.fit(self.train_prep[self.features], self.train_prep[self.target])

        if report:
            from Report import Report
            rpt = Report(self.lr, self.train_prep, self.valid_prep, self.target, self.features)
            rpt.ALL()

        return self.lr
    
    def RF(self, report=False):
        """Random Forest.

        Args:
            report: whether print out the model analysis report.
        Returns:
            Decision tree model generated from Random Forest."""
        from sklearn.ensemble import RandomForestClassifier

        self.rf = RandomForestClassifier(n_estimators=1000, 
                                        max_features='sqrt',
                                        max_depth=10,
                                        random_state=0, 
                                        n_jobs=-1)
        self.rf.fit(self.train_prep[self.features], self.train_prep[self.target])

        if report:
            from Report import Report
            rpt = Report(self.rf, self.train_prep, self.valid_prep, self.target, self.features)
            rpt.ALL()

        return self.rf

    def GBDT(self, report=False):
        """Gradient Boosting Decision Tree.

        Args:
            report: whether print out the model analysis report.
        Returns:
            Decision tree model generated from Gradient Boosting Decision Tree."""
        from xgboost.sklearn import XGBClassifier

        self.gbdt = XGBClassifier(objective='binary:logistic',
                                  booster='gbtree',
                                  learning_rate=0.01,
                                  n_estimators=5000,
                                  max_depth=3,
                                  subsample=0.75,
                                  colsample_bytree=0.75,
                                  n_jobs=4,
                                  random_state=2018)

        self.gbdt.fit(self.train_prep[self.features], self.train_prep[self.target])
        
        if report:
            from Report import Report
            rpt = Report(self.gbdt, self.train, self.valid, self.target, self.features)
            rpt.ALL()

        return self.gbdt

    def NN(self, report=False):
        """Neutral Network.

        Args:
            report: whether print out the model analysis report.
        Returns:
            One layer neutral network model."""
        from keras.models import Sequential
        from keras.layers import Dense
        from keras.wrappers.scikit_learn import KerasClassifier

        def baseline_model():
            model = Sequential()
            model.add(Dense(8, input_dim=len(self.features), activation='relu'))
            model.add(Dense(1, activation='sigmoid'))
            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
            return model    

        self.nn = KerasClassifier(build_fn=baseline_model, epochs=5, batch_size=5, verbose=1)
        self.nn.fit(self.train[self.features], self.train[self.target])

        if report:
            from Report import Report
            rpt = Report(self.nn, self.train, self.valid, self.target, self.features)
            rpt.ALL()

        return self.nn
예제 #44
0
        optimizer='adam',
        metrics=['accuracy'])
    return model

# データを読み込み --- (※2)
data = json.load(open("./newstext/data-mini.json"))
#data = json.load(open("./newstext/data.json"))
X = data["X"] # テキストを表すデータ
Y = data["Y"] # カテゴリデータ
# 最大単語数を指定
max_words = len(X[0])

# 学習 --- (※3)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
Y_train = np_utils.to_categorical(Y_train, nb_classes)
print(len(X_train),len(Y_train))
model = KerasClassifier(
    build_fn=build_model, 
    nb_epoch=nb_epoch, 
    batch_size=batch_size)
model.fit(X_train, Y_train)

# 予測 --- (※4)
y = model.predict(X_test)
ac_score = metrics.accuracy_score(Y_test, y)
cl_report = metrics.classification_report(Y_test, y)
print("正解率=", ac_score)
print("レポート=\n", cl_report)


예제 #45
0
reasonable default values. Verbose output is also turned off given that the
model will be created 10 times for the 10-fold cross validation being
performed.
"""
# Rescale our data
# evaluate baseline model with standardized dataset
estimator =  KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=1)


"""
We are going to use scikit-learn to evaluate the model using stratified k-fold
cross validation. This is a resampling technique that will provide an estimate
of the performance of the model. It does this by splitting the data into
k-parts, training the model on all parts except one which is held out as a test
set to evaluate the performance of the model. This process is repeated k-times
and the average score across all constructed models is used as a robust
estimate of performance. It is stratified, meaning that it will look at the
output values and attempt to balance the number of instances that belong to
each class in the k-splits of the data.
"""
kfold = StratifiedKFold(n_splits=1000, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

"""
estimator.fit(X, Y)
prediction = estimator.predict(X)
print("Real: {}".format(Y))
print("Predicted: {}".format(prediction))
"""
예제 #46
0
	model.add(Dense(1))
	model.add(BatchNormalization())
	model.add(Dropout(0.5))
	model.add(Activation('sigmoid'))
	model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
	return model

#thank god for wrappers

def nn_model():
	return KerasClassifier(build_fn=create_baseline, nb_epoch=20, batch_size=50, verbose = 1)


model = KerasClassifier(build_fn=create_baseline, nb_epoch=10, batch_size=80, verbose = 0)

model.fit(X_train, y_train, nb_epoch=7, batch_size=300, validation_split=0.1, show_accuracy=True)
scores = cross_validation.cross_val_score(model, X, y, cv = 5, scoring = "accuracy", n_jobs = -1, verbose = 1)
model.fit(X_train, y_train, verbose=2)

y_pred = model.predict(X_test)
'''
print y_pred
print y_test

print mean_squared_error(y_test, y_pred)


'''
#scores = roc_auc_score(y_test,y_pred)
print scores
#print f1_score(y_test, y_pred, average='macro')
예제 #47
0
        optimizer='adam',
        metrics=['accuracy'])
    print(model.summary())
    return model

# モデルを生成
model = KerasClassifier(
    build_fn=build_model, 
    nb_epoch=nb_epoch, 
    batch_size=batch_size)

# テストデータを読み込み
data = json.load(open("./newstext/data-mini.json"))
X = data["X"]
Y = data["Y"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
Y_train = np_utils.to_categorical(Y_train, nb_classes)
print(len(X_train),len(Y_train))

# 学習
model.fit(X_train, Y_train, verbose=1)

y = model.predict(X_test)
print(y)
ac_score = metrics.accuracy_score(Y_test, y)
cl_report = metrics.classification_report(Y_test, y)
print("正解率=", ac_score)
print("レポート=\n", cl_report)