def setUp(self): self.train_df, self.test_df = get_train_test_split() self.classes = constants["classes"] self.KNN = KNN(k=4, classes=self.classes) self.KNN.fit(self.train_df) self.NaiveBayes = NaiveBayes(n=3, classes=self.classes) self.NaiveBayes.fit(self.train_df) self.Linear = Linear(classes=self.classes, max_len=40) self.Linear.fit(self.train_df, epochs=1) self.W2V = W2V(classes=self.classes)
def _build_model(self): """Function that creates a model instance based on the model name. Here we only support MLP, Linear and SDT. Returns: model: An instance of the model. """ if self.args.model == 'MLP': model = MLP(self.args.input_dim, self.args.output_dim, self.args.d_model, self.args.layers).float() elif self.args.model == 'Linear': model = Linear( self.args.output_dim, self.args.input_dim, ).float() elif self.args.model == 'SDT': model = SDT(self.args.input_dim, self.args.output_dim, depth=self.args.depth, device=self.device).float() else: raise NotImplementedError # if multiple GPU are to be used parralize model if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model
def _build_model(self): """Function that creates a model instance based on the model name. Here we only support LSTM, Linear and ARNet. Returns: model: An instance of the model. """ if self.args.model == 'LSTM': model = LSTM(self.args.input_dim, self.args.pred_len, self.args.d_model, self.args.layers, self.args.dropout, self.device).float() elif self.args.model == 'Linear': model = Linear( self.args.pred_len * self.args.input_dim, self.args.seq_len, ).float() elif self.args.model == ' ARNet': model = ARNet(n_forecasts=self.args.pred_len * self.args.input_dim, n_lags=self.args.seq_len, device=self.device).float() else: raise NotImplementedError # if multiple GPU are to be used parralize model if self.args.use_multi_gpu and self.args.use_gpu: model = nn.DataParallel(model, device_ids=self.args.device_ids) return model
def main(num_tasks=10, dataset='MNIST', num_epochs=100, batch_size=64, component_update_frequency=100, ewc_lambda=1e-5, replay_size=-1, layer_size=64, num_layers=4, num_init_tasks=4, init_mode='random_onehot', architecture='mlp', algorithm='er_compositional', num_seeds=1, results_root='./tmp/results', save_frequency=1, initial_seed=0): ''' TODOS: Add module addition step ''' # raise ValueError('Read TODO above for next steps') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') for seed in range(initial_seed, initial_seed + num_seeds): torch.manual_seed(seed * SEED_SCALE) np.random.seed(seed * SEED_SCALE) if dataset == 'MNIST': torch_dataset = datasets.BinaryMNIST(num_tasks) freeze_encoder = True elif dataset == 'Fashion': torch_dataset = datasets.BinaryFashionMNIST(num_tasks) freeze_encoder = True elif dataset == 'CIFAR': padding = 1 torch_dataset = datasets.SplitCIFAR100(num_tasks) elif dataset == 'CUB': torch_dataset = datasets.SplitCUB200(num_tasks) freeze_encoder = False elif dataset == 'Omniglot': padding = 0 torch_dataset = datasets.Omniglot(num_tasks) elif dataset == 'Landmine': torch_dataset = datasets.Landmine(num_tasks) elif dataset == 'FacialRecognition': torch_dataset = datasets.FacialRecognition(num_tasks) elif dataset == 'LondonSchool': torch_dataset = datasets.LondonSchool(num_tasks) else: raise NotImplementedError( '{} dataset is not supported'.format(dataset)) if architecture == 'mlp': if 'dynamic' in algorithm: net = MLPSoftLLDynamic(torch_dataset.features, size=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, num_init_tasks=num_init_tasks, max_components=-1, init_ordering_mode=init_mode, device=device, freeze_encoder=freeze_encoder) elif 'nocomponents' in algorithm: net = MLP(torch_dataset.features, size=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, num_init_tasks=num_init_tasks, device=device, freeze_encoder=freeze_encoder) else: net = MLPSoftLL(torch_dataset.features, size=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, num_init_tasks=num_init_tasks, init_ordering_mode=init_mode, device=device, freeze_encoder=freeze_encoder) elif architecture == 'mlp_gated': net = MLPSoftGatedLL(torch_dataset.features, size=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, num_init_tasks=num_init_tasks, init_ordering_mode=init_mode, device=device, freeze_encoder=freeze_encoder) elif architecture == 'linear': if 'nocomponents' in algorithm: net = Linear(torch_dataset.features, num_tasks=num_tasks, num_init_tasks=num_init_tasks, regression=dataset == 'LondonSchool', device=device) else: net = LinearFactored(torch_dataset.features, depth=num_layers, num_tasks=num_tasks, num_init_tasks=num_init_tasks, init_ordering_mode=init_mode, regression=dataset == 'LondonSchool', device=device) # Ignore the batch_size in the arguments (batch learning) batch_size = torch_dataset.max_batch_size elif architecture == 'cnn': if 'dynamic' in algorithm: net = CNNSoftLLDynamic(torch_dataset.features, channels=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, conv_kernel=3, maxpool_kernel=2, padding=padding, num_init_tasks=num_init_tasks, max_components=-1, init_ordering_mode=init_mode, device=device) elif 'nocomponents' in algorithm: net = CNN(torch_dataset.features, channels=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, conv_kernel=3, maxpool_kernel=2, padding=padding, num_init_tasks=num_init_tasks, device=device) else: net = CNNSoftLL(torch_dataset.features, channels=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, conv_kernel=3, maxpool_kernel=2, padding=padding, num_init_tasks=num_init_tasks, init_ordering_mode=init_mode, device=device) elif architecture == 'cnn_gated': if 'dynamic' in algorithm: net = CNNSoftGatedLLDynamic( torch_dataset.features, channels=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, conv_kernel=3, maxpool_kernel=2, padding=padding, num_init_tasks=num_init_tasks, max_components=-1, init_ordering_mode=init_mode, device=device) else: net = CNNSoftGatedLL(torch_dataset.features, channels=layer_size, depth=num_layers, num_classes=torch_dataset.num_classes, num_tasks=num_tasks, conv_kernel=3, maxpool_kernel=2, padding=padding, num_init_tasks=num_init_tasks, init_ordering_mode=init_mode, device=device) else: raise NotImplementedError( '{} architecture is not supported'.format(architecture)) net.train() # training mode kwargs = {} results_dir = os.path.join(results_root, dataset, algorithm, 'seed_{}'.format(seed)) if algorithm == 'er_compositional': if replay_size == -1: replay_size = batch_size agent = CompositionalER(net, replay_size, results_dir=results_dir) elif algorithm == 'ewc_compositional': agent = CompositionalEWC(net, ewc_lambda, results_dir=results_dir) elif algorithm == 'van_compositional': agent = CompositionalVAN(net, results_dir=results_dir) elif algorithm == 'fm_compositional': agent = CompositionalFM(net, results_dir=results_dir) elif algorithm == 'er_joint': if replay_size == -1: replay_size = batch_size agent = JointER(net, replay_size, results_dir=results_dir) elif algorithm == 'ewc_joint': agent = JointEWC(net, ewc_lambda, results_dir=results_dir) elif algorithm == 'van_joint': agent = JointVAN(net, results_dir=results_dir) elif algorithm == 'er_nocomponents': if replay_size == -1: replay_size = batch_size agent = NoComponentsER(net, replay_size, results_dir=results_dir) elif algorithm == 'ewc_nocomponents': agent = NoComponentsEWC(net, ewc_lambda, results_dir=results_dir) elif algorithm == 'van_nocomponents': agent = NoComponentsVAN(net, results_dir=results_dir) elif algorithm == 'er_dynamic': if replay_size == -1: replay_size = batch_size agent = CompositionalDynamicER(net, replay_size, results_dir=results_dir) elif algorithm == 'ewc_dynamic': agent = CompositionalDynamicEWC(net, ewc_lambda, results_dir=results_dir) elif algorithm == 'van_dynamic': agent = CompositionalDynamicVAN(net, results_dir=results_dir) elif algorithm == 'fm_dynamic': agent = CompositionalDynamicFM(net, results_dir=results_dir) else: raise NotImplementedError( '{} algorithm is not supported'.format(algorithm)) for task_id, trainset in enumerate(torch_dataset.trainset): trainloader = (torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True, )) testloaders = { task: torch.utils.data.DataLoader( testset, batch_size=torch_dataset.max_batch_size, shuffle=False, num_workers=0, pin_memory=True, ) for task, testset in enumerate(torch_dataset.testset[:( task_id + 1)]) } if 'dynamic' in algorithm: valloader = torch.utils.data.DataLoader( torch_dataset.valset[task_id], batch_size=torch_dataset.max_batch_size, shuffle=False, num_workers=0, pin_memory=True, ) kwargs = {'valloader': valloader} agent.train(trainloader, task_id, component_update_freq=component_update_frequency, num_epochs=num_epochs, testloaders=testloaders, save_freq=save_frequency, **kwargs)
class ModelTests(unittest.TestCase): def setUp(self): self.train_df, self.test_df = get_train_test_split() self.classes = constants["classes"] self.KNN = KNN(k=4, classes=self.classes) self.KNN.fit(self.train_df) self.NaiveBayes = NaiveBayes(n=3, classes=self.classes) self.NaiveBayes.fit(self.train_df) self.Linear = Linear(classes=self.classes, max_len=40) self.Linear.fit(self.train_df, epochs=1) self.W2V = W2V(classes=self.classes) def test_knn_io(self): """ Test that KNN model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.KNN("BREST") self.assertIsInstance(output, dict) self.assertIn(pred, self.classes) for label in self.classes: self.assertIn(label, output.keys()) def test_knn_output_probabilities(self): """ Test that KNN model returns probabilities for each possible class """ _, output = self.KNN("RADE DE BREST") # sums up to one self.assertLess(abs(sum(output.values()) - 1), 1e-3) # all values between 0 and 1 for value in output.values(): self.assertGreaterEqual(value, 0) self.assertLessEqual(value, 1) def test_knn_case_unsensitive(self): pred_upper, output_upper = self.KNN("BREST") pred_lower, output_lower = self.KNN("brest") self.assertEqual(pred_upper, pred_lower) self.assertListEqual(list(output_upper.items()), list(output_lower.items())) def test_naive_bayes_io(self): """ Test that Naive Bayes model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.NaiveBayes("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) # def test_naive_bayes_output_probabilities(self): # _, output = self.NaiveBayes("BREST") # self.assertLess(abs(sum(output.values()) - 1), 1e-3) # for label in self.classes: # self.assertIn(label, output.keys()) def test_linear_io(self): """ Test that Linear model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.Linear("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) def test_linear_output_probabilities(self): _, output = self.Linear("BREST") self.assertLess(abs(sum(output.values()) - 1), 1e-3) for label in self.classes: self.assertIn(label, output.keys()) def test_w2v_io(self): """ Test that Word2Vec model takes the right inputs and outputs a dictionary with all possible class """ pred, output = self.W2V("BREST") self.assertIn(pred, self.classes) self.assertIsInstance(output, dict) def test_w2v_output_probabilities(self): _, output = self.W2V("BREST") self.assertLess(abs(sum(output.values()) - 1), 1e-3) for label in self.classes: self.assertIn(label, output.keys())
def __init__( self, num_experts=3, n_forecasts=1, n_lags=0, input_features=1, gate_type="Linear", expert_type="Linear", d_model=512, layers=3, dropout=0.0, device="cpu", ): """Initializes a IME instance. Args: num_experts: Number of experts n_forecasts: Number of time steps to forecast n_lags: Lags (past time steps) used to make forecast input_features: Input features dimension gate_type: Assignment module type can be "Linear" or "LSTM" expert_type: Interpretable experts type can be "Linear" or " ARNet" d_model: Hidden layer dimension for LSTM layers: Number of LSTM layers. dropout: Fraction of neurons affected by Dropout used by LSTM. device: Device used by the model Inputs: original_inputs: A tensor of shape `(batch_size, seqence_length, input_size)` true_output: Actual forecast this is used for teacher forcing during training. past_values: Expert errors in the last time step Returns: expert_outputs: A tensor containing forecast produced by each expert with size `(batch_size, num_experts, n_forecasts)` weights: weights assigned to each expert by the assignment module reg_out: Regression output the forecast a tensor of `(batch_size, out_len)` """ super(IME, self).__init__() self.num_experts = num_experts self.device = device self.n_forecasts = n_forecasts self.n_lags = n_lags self.expert_type = expert_type self.gate_type = gate_type # Only Linear and ARNet experts are supported for experts assert self.expert_type == "Linear" or self.expert_type == " ARNet" # Only Linear and LSTM experts are supported for assignment module assert self.gate_type == "Linear" or self.gate_type == "LSTM" if self.expert_type == "Linear": self.experts = nn.ModuleList([ Linear(n_forecasts=n_forecasts, n_lags=n_lags) for i in range(self.num_experts) ]) elif self.expert_type == " ARNet": self.experts = nn.ModuleList([ ARNet(n_forecasts=n_forecasts, n_lags=n_lags, device=device) for i in range(self.num_experts) ]) else: raise NotImplementedError # Gate networking takes the lags and the past values as inputs and gives # output forecast and prediction for each expert if self.gate_type == "Linear": self.gate_network = nn.Linear((self.num_experts) + n_lags, self.num_experts + self.n_forecasts) elif self.gate_type == "LSTM": self.gate_network = LSTMime( input_features + self.num_experts, self.num_experts, n_forecasts, d_model=d_model, layers=layers, dropout=dropout, device=device) else: raise NotImplementedError
from models.linear import Linear from file_handling.load_datasets import load_mnist (X_train, Y_train), (X_test, Y_test) = load_mnist() linear_model = Linear(X_train.shape[1], Y_train.shape[1]) train_accuracy = linear_model.train_model(X_train, Y_train, .1) print(f"Train Accuracy: {train_accuracy}") test_accuracy = linear_model.test_model(X_test, Y_test) print(f"Test Accuracy: {test_accuracy}")