def main(): use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") # make directory to save train history and model os.makedirs(args.result_dir, exist_ok=True) args2json(args, args.result_dir) # laod dataset and set k-fold cross validation D = LoadDataset(args.data_dir, args.batch_size_train, args.batch_size_test) train_loader, test_loader = D() # model, loss_function, optimizer model = Net().to(device) loss_function = CrossEntropy() optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.01) # train and test history = [] for e in range(args.epochs): train_loss = train(model, device, train_loader, optimizer, loss_function) test_loss, acc = test(model, device, test_loader, loss_function) history.append([train_loss, test_loss, acc]) show_progress(e + 1, args.epochs, train_loss, test_loss, acc) # save train history and model save_history(history, args.result_dir) save_model(model, args.result_dir)
def getDECNetworkResults(dec, enc): #Load test dataset test_data = LoadDataset("dataset/kaggle_original_train/", 0) test_data, test_label, val, val_label = test_data.load_data() big_data = LoadDataset("dataset/kaggle_augmented_train_new/", 0) big_data, _, _, _ = big_data.load_data() # make save directory os.makedirs(os.path.join("dec"), exist_ok=True) os.chdir("dec") encoded = enc.predict(test_data) q, _ = dec.predict(test_data, verbose=0) y_pred = q.argmax(1) print(y_pred) confusion_matrix(test_label.astype(np.int64), y_pred) #Take prediction time for i in range(20): iterate = 5000 * (i + 1) data = big_data[0:iterate, :] print(data.shape) print("KMEAN") start = time.time() q, _ = dec.predict(data, verbose=0) y_pred = q.argmax(1) end = time.time() print(end - start) train_x = np.reshape(test_data, (3720, 64, 64)) TSNE = TSNEAlgo() TSNE.tsne_fit(encoded, perplexity=35) TSNE.tsne_plot(train_x, y_pred.astype(int), save_name="Pred", save_data_dir="dec") TSNE.tsne_plot(train_x, test_label.astype(int), save_name="True", save_data_dir="dec")
def anomaly(): """ Testing models ability to find data anomalies !!! not working in current version """ #Load anomaly dataset anomaly_data = LoadDataset("dataset/kaggle_anomalies/", 0) anomaly_data, anomaly_label, val, val_label = anomaly_data.load_data() for i in range(len(anomaly_label)): anomaly_label[i] = anomaly_label[i] + 5 #Concatinate test and anomaly test_anomaly_data = np.vstack((test_data, anomaly_data)) test_anomaly_label = np.hstack((test_label, anomaly_label)) """# Get k-means cluster distance
import matplotlib.pyplot as plt import numpy as np from sklearn.cluster import AgglomerativeClustering from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors from explainer_tabular import LimeTabularExplainer from load_dataset import LoadDataset test = LoadDataset(which='ildp') X = test.data.data feature_names = test.data.feature_names target_names = test.data.target_names # train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80) # np.save("X_train_ildp.npy", train) # np.save("X_test_ildp.npy", test) # np.save("y_train_ildp.npy", labels_train) # np.save("y_test_ildp.npy", labels_test) train = np.load("data/X_train_ildp.npy") test = np.load("data/X_test_ildp.npy") labels_train = np.load("data/y_train_ildp.npy") labels_test = np.load("data/y_test_ildp.npy") rf = RandomForestClassifier(n_estimators=10, random_state=0) rf.fit(train, labels_train) i = np.random.randint(0, test.shape[0])
def train(net, device, epochs=1000, batch_size=1, lr=0.001): dataset = LoadDataset(dir_img, dir_mask, mask_suffix='_mask') n_val = int(len(dataset) * 0.2) n_train = len(dataset) - n_val train, val = random_split(dataset, [n_train, n_val]) train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) writer = SummaryWriter(comment=f'Lr:{lr}___BS:{batch_size}') global_step = 0 optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min' if net.n_classes > 1 else 'max', patience=2) if net.n_classes > 1: loss_func = nn.CrossEntropyLoss() else: loss_func = nn.BCEWithLogitsLoss() for epoch in range(epochs): net.train() epoch_loss = 0 with tqdm(total=len(dataset) * 0.9, desc='Epoch: {}/{}'.format((epoch + 1), epochs), unit='img') as pbar: for batch in train_loader: imgs = batch['image'] true_mask = batch['mask'] imgs = imgs.to(device=device, dtype=torch.float32) assert imgs.shape[1] == net.n_channels, \ f'Network has been defined with {net.n_channels} input channels, ' \ f'but loaded images have {imgs.shape[1]} channels. Please check that ' \ 'the images are loaded correctly.' if net.n_classes == 1: mask_type = torch.float32 else: mask_type = torch.long true_mask = true_mask.to(device=device, dtype=mask_type) mask_pred = net(imgs) loss = loss_func(mask_pred, true_mask) epoch_loss = epoch_loss + loss.item() writer.add_scalar('Loss/train', loss.item(), global_step) pbar.set_postfix(**{'loss (batch)': loss.item()}) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(net.parameters(), 0.1) optimizer.step() pbar.update(imgs.shape[0]) global_step = global_step + 1 if global_step % (n_train // (10 * batch_size)) == 0: for tag, value in net.named_parameters(): tag = tag.replace('.', '/') writer.add_histogram( 'weights/' + tag, value.data.cpu().detach().numpy(), global_step) writer.add_histogram( 'grads/' + tag, value.grad.data.cpu().detach().numpy(), global_step) val_score = eval_net(net, val_loader, device) scheduler.step(val_score) writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], global_step) if net.n_classes > 1: logging.info('Validation cross entropy: {}'.format( val_score)) writer.add_scalar('Loss/test', val_score, global_step) else: logging.info( 'Validation Dice Coeff: {}'.format(val_score)) writer.add_scalar('Dice/test', val_score, global_step) writer.add_images('images', imgs, global_step) if net.n_classes == 1: writer.add_images('masks/true', true_mask, global_step) writer.add_images('masks/pred', torch.sigmoid(mask_pred) > 0.5, global_step) try: os.mkdir(dir_checkpoint) except OSError: pass torch.save(net.state_dict(), dir_checkpoint + 'Epoch {}.pth'.format(epoch + 1)) writer.close()
def getNetworkResults(model, model_type): """ Main result function """ #Specify wanted results plot_input_output = False plot_data = False plot_class_activation_map = False plot_activation_map = False plot_kernel_inspection = False hierarchical_clustering = False kmean_cluster = True spectral_cluster = True pred_time = False #Load test dataset test_data = LoadDataset("dataset/kaggle_original_train/", 0) test_data, test_label, val, val_label = test_data.load_data() # make save directory os.makedirs(os.path.join(model_type), exist_ok=True) os.chdir(model_type) # get autoencoder, encoder and decoder AUTO, ENC, DEC = model.getModel() #Predict encoder output encoded = ENC.predict(test_data) #Fit the tsne algorithm TSNE = TSNEAlgo() TSNE.tsne_fit(encoded, perplexity=35) if plot_input_output == True: # Visualize input and output from autoencoder visualize_input_output(AUTO, test_data, model_type) if model_type == "globalAverage" and plot_class_activation_map == True: visualize_class_activation_map(model, test_data) if plot_activation_map == True: visualize_activation_map(model, test_data) if plot_data == True: #Predict results using TSNE TSNE.tsne_plot(test_data, test_label, model_type, model_type) if plot_kernel_inspection == True: kernel_inspection(model, test_data) if hierarchical_clustering == True: hierarchical(ENC, TSNE, test_data, test_label, save_name="hierarchical.png") if kmean_cluster == True: kmean(ENC, TSNE, test_data, test_label) if spectral_cluster == True: spectral(ENC, TSNE, test_data, test_label) if pred_time == True: os.chdir("..") data = LoadDataset("dataset/kaggle_augmented_train_new/", 0) data, test_label, val, val_label = data.load_data() os.chdir(model_type) predictionTime(ENC, data)
import matplotlib.pyplot as plt import scipy.cluster.hierarchy as shc from sklearn.datasets import load_breast_cancer from load_dataset import LoadDataset bc_data = LoadDataset(which='bc') ildp_data = LoadDataset(which='ildp') hp_data = LoadDataset(which='hp') plt.figure(figsize=(5, 4)) #plt.title("Dendograms") clust = shc.linkage(bc_data.data.data, method='ward') dend = shc.dendrogram(clust) filename = 'results/dendrogram_bc.pdf' plt.savefig(filename, bbox_inches='tight') plt.show() plt.figure(figsize=(5, 4)) #plt.title("Dendograms") clust = shc.linkage(hp_data.data.data, method='ward') dend = shc.dendrogram(clust) filename = 'results/dendrogram_ildp.pdf' plt.savefig(filename, bbox_inches='tight') plt.show() plt.figure(figsize=(5, 4)) #plt.title("Dendograms") clust = shc.linkage(ildp_data.data.data, method='ward') dend = shc.dendrogram(clust) filename = 'results/dendrogram_hp.pdf'
import matplotlib.pyplot as plt import numpy as np from sklearn.cluster import AgglomerativeClustering from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors from sklearn.neural_network import MLPClassifier from explainer_tabular import LimeTabularExplainer from load_dataset import LoadDataset test = LoadDataset(which='bc') X = test.data.data feature_names = test.data.feature_names target_names = test.data.target_names # train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80) # np.save("X_train.npy", train) # np.save("X_test.npy", test) # np.save("y_train.npy", labels_train) # np.save("y_test.npy", labels_test) train = np.load("data/X_train.npy") test = np.load("data/X_test.npy") labels_train = np.load("data/y_train.npy") labels_test = np.load("data/y_test.npy") nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2),
if __name__=="__main__": tf.keras.backend.clear_session() model = "auto" if model == "auto": model_type = "COAPNET" latent_vector = "globalAverage" model = buildNetwork(model_type, latent_vector,latent_dim = 64, epochs = 50,train = True,noisy = False) auto, enc, pre = model.getModel() getNetworkResults(model,latent_vector) if model == "dec": train_data = LoadDataset("dataset/kaggle_original_train/",0) train_data, train_label, val, val_label = train_data.load_data() encoded_x= np.reshape(train_data,(3720, 64 *64)) model_type = "COAPNET" latent_vector = "globalAverage" model = buildNetwork(model_type, latent_vector,latent_dim = 64, epochs = 5,train = True,noisy = False) auto, enc, pre = model.getModel() from DEC import DeepEmbeddedClustering from results import getDECNetworkResults dec = DeepEmbeddedClustering(auto,enc,train_data,train_label,5) dec.buildModel() dec.trainModel()
def loadData(self): train_data = LoadDataset(self.load_data_dir, 0.1) self.train_data, self.train_label, self.validation_data, self.validation_label = train_data.load_data( ) return
import matplotlib.pyplot as plt import numpy as np from sklearn.cluster import AgglomerativeClustering from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.neighbors import NearestNeighbors from explainer_tabular import LimeTabularExplainer from load_dataset import LoadDataset test = LoadDataset(which='hp') X = test.data.data feature_names = test.data.feature_names target_names = test.data.target_names # train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80) # np.save("X_train_hp.npy", train) # np.save("X_test_hp.npy", test) # np.save("y_train_hp.npy", labels_train) # np.save("y_test_hp.npy", labels_test) train = np.load("data/X_train_hp.npy") test = np.load("data/X_test_hp.npy") labels_train = np.load("data/y_train_hp.npy") labels_test = np.load("data/y_test_hp.npy") rf = RandomForestClassifier(n_estimators=10, random_state=0) rf.fit(train, labels_train) i = np.random.randint(0, test.shape[0])