def create_model(model_num,model_type='sda',data_norm_type='area',overwrite=False,layer_sizes = [50,50,50], corruption_levels=[0.3,0.3,0.3],learning_rate=0.01): specs = get_spectrograms(norm_type=data_norm_type) output_folder = 'lung-sound-deep-learning-models/{}_{}'.format(model_type,model_num) if isdir(output_folder) and not overwrite: # Throw error is model already exists print('Model already exists:' + output_folder) return if model_type == 'sda': specs = np.expand_dims(specs,1) autoencoder.train_autoencoder(specs,output_folder,batch_size=20,learning_rate=learning_rate,num_epochs=10000, momentum_flag=True,momentum=0.9,layer_sizes = layer_sizes, corruption_levels=corruption_levels,nonlinearity='sigmoid') elif model_type == 'conv': specs = np.expand_dims(specs,1) conv_autoencoder.train_conv_autoencoder(specs,output_folder,(20,49),learning_rate=learning_rate) else: print('Model type not implemented:' + model_type) return
def train_mnist_sda_model(): train_set_x = load_data() train_set_x = np.reshape(train_set_x, (train_set_x.shape[0], 28, 28)) train_set_x = np.expand_dims(train_set_x, 1) output_folder = 'lung-sound-deep-learning-models/mnist/sda0' autoencoder.train_autoencoder(train_set_x, output_folder)
def trainStages(): for stage in STAGES: with open("data/training/stages/stage%s" % stage, "rb") as file: stageFrames = pickle.load(file) autoencoder.train_autoencoder(stageFrames, stage)
############# Helper functions path_to_model = './models/autoencoder' x = np.array([example.flatten() for example in allData]) x, y = unison_shuffle(x, stringLabels) ## Split into test/train num_training_samples = int(.7*len(x)) x_train = x[:num_training_samples] y_train = y[:num_training_samples] x_test = x[num_training_samples:] y_test = y[num_training_samples:] train_autoencoder(x_train, x_test, path_to_model) ## Calculate encodings model = keras.models.load_model(path_to_model+'.h5') embedding_fn = keras.backend.function([model.layers[0].input], [model.layers[2].output]) all_sample_encodings = embedding_fn([x, 0])[0] train_samples_encodings = embedding_fn([x_train, 0])[0] test_sample_encodings = embedding_fn([x_test, 0])[0] letter_encodings = compute_letter_encodings(train_samples_encodings, y_train) ## Calculate train/test accuracy train_accuracy = get_overall_accuracy(letter_encodings, train_samples_encodings, y_train) test_accuracy = get_overall_accuracy(letter_encodings, test_sample_encodings, y_test) print ''
import numpy as np import autoencoder pd.set_option('display.max_columns', None) train = pd.read_csv('./train.csv') test = pd.read_csv('./test.csv') all_cols = list(train.columns.values) feat_cols = [x for x in all_cols if x not in ['id', 'target']] x_train = train[feat_cols] x_test = test[feat_cols] y_train = train['target'] encoder = autoencoder.train_autoencoder(x_test, x_train) x_test_encode = autoencoder.encode_features(x_test, encoder) x_train_encode = autoencoder.encode_features(x_train, encoder) x_train = pd.concat([x_train, x_train_encode], axis=1) x_test = pd.concat([x_test, x_test_encode], axis=1) params = { 'max_depth': 7, 'eta': 0.05, 'silent': 1, 'objective': 'binary:logistic', 'alpha': 4, 'lambda': 10, 'min_child_weight': 1, 'gamma': 2,
def preprocess(X, y, X_val, test_data, verbose=True, scale=True, autoencoder=True, qda=True, knn=False, xgb=False): """Preprocess the data by adding features and scaling it. For each method, we train the model on the training data using the corresponding labels, then apply the same transformation to validation and test data. Args: X (numpy ndarray): Training data y (numpy ndarray): Training labels X_val (numpy ndarray): Validation data test_data (numpy ndarray): Test data for submission verbose (bool): log level scale (bool): scale the data autoencoder (bool): use autoencoder feature qda (bool): use Quadratic Discriminant Analysis feature knn (bool): use k-nearest neighbours feature xgb (bool): use XGBoost feature Returns: The dataset appropriately transformed by the selected methods. """ if autoencoder: if verbose: print("## Autoencoder") print("### Train...", end=" ", flush=True) ae = train_autoencoder(X, size=32, epochs=20, verbose=1) else: ae = train_autoencoder(X, size=32, epochs=20, verbose=0) if verbose: print("done.") print("### Evaluate...", end=" ", flush=True) ae.eval() X_ae = ae.layer1(Variable(torch.Tensor(X))).data X = np.c_[X, X_ae] X_val_ae = ae.layer1(Variable(torch.Tensor(X_val))).data X_val = np.c_[X_val, X_val_ae] test_data_ae = ae.layer1(Variable(torch.Tensor(test_data))).data test_data = np.c_[test_data, test_data_ae] if verbose: print("done.") if qda: if verbose: print("## Quadratic Discriminant Analysis...", end=" ", flush=True) qdaclf = QuadraticDiscriminantAnalysis(reg_param=0.02) qdaclf.fit(X, y) X_qda = qdaclf.predict_proba(X) X = np.c_[X, X_qda[:, 1]] X_val_qda = qdaclf.predict_proba(X_val) X_val = np.c_[X_val, X_val_qda[:, 1]] test_data_qda = qdaclf.predict_proba(test_data) test_data = np.c_[test_data, test_data_qda[:, 1]] if verbose: print("done.") if knn: print("## K-Nearest Neighbours...", end=" ", flush=True) knnclf = KNeighborsClassifier(n_neighbors=10, p=2, n_jobs=-1) knnclf.fit(X, y) X_knn = knnclf.predict_proba(X) X = np.c_[X, X_knn[:, 1]] X_val_knn = knnclf.predict_proba(X_val) X_val = np.c_[X_val, X_val_knn[:, 1]] test_data_knn = knnclf.predict_proba(test_data) test_data = np.c_[test_data, test_data_knn[:, 1]] print("done.") if xgb: print("## XGBoost...", end=" ", flush=True) xgbclf = XGBClassifier(max_depth=3, learning_rate=0.1, n_estimators=1000, gamma=10, min_child_weight=10, objective='binary:logistic', n_jobs=4) xgbclf.fit(X, y) X_xgb = xgbclf.predict_proba(X) X_val_xgb = xgbclf.predict_proba(X_val) X = np.c_[X, X_xgb[:, 1]] X_val = np.c_[X_val, X_val_xgb[:, 1]] test_data_xgb = xgbclf.predict_proba(test_data) test_data = np.c_[test_data, test_data_xgb[:, 1]] print("done.") if scale: if verbose: print("## Scaling...", end=" ", flush=True) scaler = StandardScaler() X = scaler.fit_transform(X) X_val = scaler.transform(X_val) test_data = scaler.transform(test_data) if verbose: print("done.") return X, y, X_val, test_data
predictions.append(output_indices) explainer = BertExplainer(model) relevance, attentions, self_attentions = explainer.explain( input_ids, segment_ids, input_mask, [o["span"] for o in output_indices.values()]) input_tensor = torch.stack([ r.sum(-1).unsqueeze(-1) * explainer.layer_values_global["bert.encoder"]["input"][0] for r in relevance ], 0) target_tensor = torch.stack(relevance, 0).sum(-1) loss = train_autoencoder(input_tensor, target_tensor, encoder1, decoder1, encoder_optimizer, decoder_optimizer, criterion, max_length=13) print('Encoder loss: %.4f' % loss) # For printing the results #### index = None for example in examples: if index != example.example_id: pp.pprint(example.para_text) index = example.example_id print('\n') print( colored(