def __init__(self, train=True, transform=None, target_transform=None, noise_rate=0.2, split_percentage=0.9, seed=1, num_classes=10, feature_size=28 * 28, norm_std=0.1): self.transform = transform self.target_transform = target_transform self.train = train original_images = np.load('data/mnist/train_images.npy') original_labels = np.load('data/mnist/train_labels.npy') data = torch.from_numpy(original_images).float() targets = torch.from_numpy(original_labels) dataset = zip(data, targets) new_labels = tools.get_instance_noisy_label(noise_rate, dataset, targets, num_classes, feature_size, norm_std, seed) self.train_data, self.val_data, self.train_labels, self.val_labels = tools.data_split( original_images, new_labels, split_percentage, seed)
def __init__(self, train=True, transform=None, target_transform=None, noise_rate=0.2, split_percentage=0.9, seed=1, num_classes=10, feature_size=3 * 32 * 32, norm_std=0.1): self.transform = transform self.target_transform = target_transform self.train = train original_images = np.load('data/svhn/train_images.npy') original_labels = np.load('data/svhn/train_labels.npy') data = torch.from_numpy(original_images).float() targets = torch.from_numpy(original_labels) dataset = zip(data, targets) new_labels = tools.get_instance_noisy_label(noise_rate, dataset, targets, num_classes, feature_size, norm_std, seed) self.train_data, self.val_data, self.train_labels, self.val_labels = tools.data_split( original_images, new_labels, split_percentage, seed) if self.train: self.train_data = self.train_data.reshape((-1, 3, 32, 32)) self.train_data = self.train_data.transpose((0, 2, 3, 1)) else: self.val_data = self.val_data.reshape((-1, 3, 32, 32)) self.val_data = self.val_data.transpose((0, 2, 3, 1))
""" print __doc__ import matplotlib.pyplot as plt import numpy as np import sys sys.path.append("Kaggle Competetions/House Prices") from tools import data_format train_data = data_format("House Prices/House Prices - Data/train.csv") test_data = data_format("House Prices/House Prices - Data/test.csv") from tools import data_split from sklearn.model_selection import train_test_split targets, features, features_test_final = data_split(train_data, test_data) features_train, features_test, labels_train, labels_test = train_test_split( features, targets, test_size=0.9, random_state=42) # Remove Outliers import matplotlib.pyplot as plt plt.subplot(1, 2, 1) boxplot = plt.boxplot(labels_train, notch=True) outliers = boxplot["fliers"][0].get_data()[1] indices = [i for i, x in enumerate(labels_train) if x in outliers] i = 0 for num in indices: del labels_train[num - i] del features_train[num - i] i += 1