コード例 #1
0
def create_model(model_num,model_type='sda',data_norm_type='area',overwrite=False,layer_sizes = [50,50,50],
                                      corruption_levels=[0.3,0.3,0.3],learning_rate=0.01):
    specs = get_spectrograms(norm_type=data_norm_type)
    
    output_folder = 'lung-sound-deep-learning-models/{}_{}'.format(model_type,model_num)
    if isdir(output_folder) and not overwrite: # Throw error is model already exists
        print('Model already exists:' + output_folder)
        return

    if model_type == 'sda':
        specs = np.expand_dims(specs,1)
        autoencoder.train_autoencoder(specs,output_folder,batch_size=20,learning_rate=learning_rate,num_epochs=10000,
                                      momentum_flag=True,momentum=0.9,layer_sizes = layer_sizes,
                                      corruption_levels=corruption_levels,nonlinearity='sigmoid')
    elif model_type == 'conv':
        specs = np.expand_dims(specs,1)
        conv_autoencoder.train_conv_autoencoder(specs,output_folder,(20,49),learning_rate=learning_rate)                              
    else:
        print('Model type not implemented:' + model_type)
        return
コード例 #2
0
def train_mnist_sda_model():
    train_set_x = load_data()
    train_set_x = np.reshape(train_set_x, (train_set_x.shape[0], 28, 28))
    train_set_x = np.expand_dims(train_set_x, 1)
    output_folder = 'lung-sound-deep-learning-models/mnist/sda0'
    autoencoder.train_autoencoder(train_set_x, output_folder)
コード例 #3
0
def trainStages():
    for stage in STAGES:
        with open("data/training/stages/stage%s" % stage, "rb") as file:
            stageFrames = pickle.load(file)
        autoencoder.train_autoencoder(stageFrames, stage)
コード例 #4
0
############# Helper functions


path_to_model = './models/autoencoder'
x = np.array([example.flatten() for example in allData])
x, y = unison_shuffle(x, stringLabels)

## Split into test/train
num_training_samples = int(.7*len(x))
x_train = x[:num_training_samples]
y_train = y[:num_training_samples]
x_test = x[num_training_samples:]
y_test = y[num_training_samples:]

train_autoencoder(x_train, x_test, path_to_model)


## Calculate encodings
model = keras.models.load_model(path_to_model+'.h5')
embedding_fn = keras.backend.function([model.layers[0].input], [model.layers[2].output])
all_sample_encodings = embedding_fn([x, 0])[0]
train_samples_encodings = embedding_fn([x_train, 0])[0]
test_sample_encodings = embedding_fn([x_test, 0])[0]
letter_encodings = compute_letter_encodings(train_samples_encodings, y_train)

## Calculate train/test accuracy
train_accuracy = get_overall_accuracy(letter_encodings, train_samples_encodings, y_train)
test_accuracy = get_overall_accuracy(letter_encodings, test_sample_encodings, y_test)

print ''
コード例 #5
0
ファイル: train.py プロジェクト: HuangNa2/kaggle_safe_driver
import numpy as np
import autoencoder

pd.set_option('display.max_columns', None)

train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

all_cols = list(train.columns.values)
feat_cols = [x for x in all_cols if x not in ['id', 'target']]

x_train = train[feat_cols]
x_test = test[feat_cols]
y_train = train['target']

encoder = autoencoder.train_autoencoder(x_test, x_train)
x_test_encode = autoencoder.encode_features(x_test, encoder)
x_train_encode = autoencoder.encode_features(x_train, encoder)

x_train = pd.concat([x_train, x_train_encode], axis=1)
x_test = pd.concat([x_test, x_test_encode], axis=1)

params = {
    'max_depth': 7,
    'eta': 0.05,
    'silent': 1,
    'objective': 'binary:logistic',
    'alpha': 4,
    'lambda': 10,
    'min_child_weight': 1,
    'gamma': 2,
コード例 #6
0
def preprocess(X,
               y,
               X_val,
               test_data,
               verbose=True,
               scale=True,
               autoencoder=True,
               qda=True,
               knn=False,
               xgb=False):
    """Preprocess the data by adding features and scaling it.

    For each method, we train the model on the training data using the
    corresponding labels, then apply the same transformation to
    validation and test data.

    Args:
        X (numpy ndarray): Training data
        y (numpy ndarray): Training labels
        X_val (numpy ndarray): Validation data
        test_data (numpy ndarray): Test data for submission
        verbose (bool): log level
        scale (bool): scale the data
        autoencoder (bool): use autoencoder feature
        qda (bool): use Quadratic Discriminant Analysis feature
        knn (bool): use k-nearest neighbours feature
        xgb (bool): use XGBoost feature

    Returns:
        The dataset appropriately transformed by the selected methods.
    """
    if autoencoder:
        if verbose:
            print("## Autoencoder")
            print("### Train...", end=" ", flush=True)
            ae = train_autoencoder(X, size=32, epochs=20, verbose=1)
        else:
            ae = train_autoencoder(X, size=32, epochs=20, verbose=0)
        if verbose:
            print("done.")
            print("### Evaluate...", end=" ", flush=True)
        ae.eval()
        X_ae = ae.layer1(Variable(torch.Tensor(X))).data
        X = np.c_[X, X_ae]
        X_val_ae = ae.layer1(Variable(torch.Tensor(X_val))).data
        X_val = np.c_[X_val, X_val_ae]
        test_data_ae = ae.layer1(Variable(torch.Tensor(test_data))).data
        test_data = np.c_[test_data, test_data_ae]
        if verbose:
            print("done.")

    if qda:
        if verbose:
            print("## Quadratic Discriminant Analysis...", end=" ", flush=True)
        qdaclf = QuadraticDiscriminantAnalysis(reg_param=0.02)
        qdaclf.fit(X, y)
        X_qda = qdaclf.predict_proba(X)
        X = np.c_[X, X_qda[:, 1]]
        X_val_qda = qdaclf.predict_proba(X_val)
        X_val = np.c_[X_val, X_val_qda[:, 1]]
        test_data_qda = qdaclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_qda[:, 1]]
        if verbose:
            print("done.")

    if knn:
        print("## K-Nearest Neighbours...", end=" ", flush=True)
        knnclf = KNeighborsClassifier(n_neighbors=10, p=2, n_jobs=-1)
        knnclf.fit(X, y)
        X_knn = knnclf.predict_proba(X)
        X = np.c_[X, X_knn[:, 1]]
        X_val_knn = knnclf.predict_proba(X_val)
        X_val = np.c_[X_val, X_val_knn[:, 1]]
        test_data_knn = knnclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_knn[:, 1]]
        print("done.")

    if xgb:
        print("## XGBoost...", end=" ", flush=True)
        xgbclf = XGBClassifier(max_depth=3,
                               learning_rate=0.1,
                               n_estimators=1000,
                               gamma=10,
                               min_child_weight=10,
                               objective='binary:logistic',
                               n_jobs=4)
        xgbclf.fit(X, y)
        X_xgb = xgbclf.predict_proba(X)
        X_val_xgb = xgbclf.predict_proba(X_val)
        X = np.c_[X, X_xgb[:, 1]]
        X_val = np.c_[X_val, X_val_xgb[:, 1]]
        test_data_xgb = xgbclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_xgb[:, 1]]
        print("done.")

    if scale:
        if verbose:
            print("## Scaling...", end=" ", flush=True)
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        X_val = scaler.transform(X_val)
        test_data = scaler.transform(test_data)
        if verbose:
            print("done.")

    return X, y, X_val, test_data
コード例 #7
0
                predictions.append(output_indices)

                explainer = BertExplainer(model)
                relevance, attentions, self_attentions = explainer.explain(
                    input_ids, segment_ids, input_mask,
                    [o["span"] for o in output_indices.values()])
                input_tensor = torch.stack([
                    r.sum(-1).unsqueeze(-1) *
                    explainer.layer_values_global["bert.encoder"]["input"][0]
                    for r in relevance
                ], 0)
                target_tensor = torch.stack(relevance, 0).sum(-1)
                loss = train_autoencoder(input_tensor,
                                         target_tensor,
                                         encoder1,
                                         decoder1,
                                         encoder_optimizer,
                                         decoder_optimizer,
                                         criterion,
                                         max_length=13)

                print('Encoder loss: %.4f' % loss)

            # For printing the results ####
            index = None
            for example in examples:
                if index != example.example_id:
                    pp.pprint(example.para_text)
                    index = example.example_id
                    print('\n')
                    print(
                        colored(