def train(): train_set = torchvision.datasets.FashionMNIST( root='./data/FashionMNIST', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor() ]) ) params = OrderedDict( lr=[0.01, 0.001], batch_size=[100, 1000], shuffle=[True, False], num_workers=[2] ) runManager = RunManager() for run in RunBuilder.get_runs(params): network = finalModel() loader = DataLoader( train_set, batch_size=run.batch_size, shuffle=run.shuffle, num_workers=run.num_workers) optimizer = optim.Adam(network.parameters(), lr=run.lr) runManager.begin_run(run, network, loader) for epoch in range(10): runManager.begin_epoch() for batch in loader: images, labels = batch # support computation based on device type images = images.to(get_device_type()) labels = labels.to(get_device_type()) preds = network(images) loss = F.cross_entropy(preds, labels) optimizer.zero_grad() loss.backward() optimizer.step() runManager.track_loss(loss) runManager.track_num_correct(preds, labels) runManager.end_epoch() runManager.end_run() runManager.save('results')
print(f"{splits}") train_set, val_set = dataset.random_split(init_train_data, splits) for run in RunBuilder.get_runs(params): network = MyConvNet() loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers, shuffle=run.shuffle) optimizer = optim.Adam(network.parameters(), lr=run.lr) m.begin_run(run, network, loader) for epoch in range(50): m.begin_epoch() for batch in loader: print(m.epoch_count) images, labels = batch preds = network(images) loss = F.cross_entropy(preds, labels) optimizer.zero_grad() loss.backward() optimizer.step() m.track_loss(loss) m.track_num_correct(preds, labels) m.end_epoch() m.end_run() m.save('results')
print(f"{splits}") train_set, val_set = dataset.random_split(init_train_data, splits) for run in RunBuilder.get_runs(params): network = MyConvNet() loader = DataLoader(train_set, batch_size = run.batch_size, num_workers = run.num_workers, shuffle = run.shuffle) optimizer = optim.Adam(network.parameters(), lr = run.lr) m.begin_run(run, network, loader) for epoch in range(50): m.begin_epoch() for batch in loader: print(m.epoch_count) images, labels = batch preds = network(images) loss = F.cross_entropy(preds, labels) optimizer.zero_grad() loss.backward() optimizer.step() m.track_loss(loss) m.track_num_correct(preds, labels) m.end_epoch() m.end_run() m.save(sys.argv[1])
m = RunManager() for run in RunBuilder.get_runs(parameters): network = Network() loader = torch.utils.data.DataLoader(train_set, batch_size=run.batch_size) optimizer = optim.Adam(network.parameters(), lr=run.lr) m.begin_run(run, network, loader) for epoch in range(2): m.begin_epoch() for batch in loader: images, labels = batch preds = network(images) loss = F.cross_entropy(preds, labels) optimizer.zero_grad() loss.backward() optimizer.step() m.track_loss(loss) m.track_num_correct(preds, labels) m.end_epoch() m.end_run() m.save('CNN_image_prediction_results')
def main(): print(torch.__version__) print(torchvision.__version__) train_set, valid_set, test_set = create_datasets() device = None if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' networks = { '2conv2fc': lambda: Network(), '2conv_24_3fc': lambda: Network2(), '2conv_48_3fc': lambda: Network3() } ''' parameters = OrderedDict( #network=['2conv_48_3fc'] network=['2conv2fc', '2conv_24_3fc', '2conv_48_3fc'] #network=list(networks.keys()) , lr=[.01, .001] , batch_size=[1000] , shuffle=[True] , epochs=[10] , device=[device] , nw = [2] ) ''' parametersLeNet = OrderedDict(network=['LeNet'], lr=[.005, .01], batch_size=[1000], epochs=[10], device=[device], nw=[2], conv_out=[[16, 32], [24, 48], [32, 64]], conv_ks=[[3, 3], [3, 5], [5, 5]], dropout=[0.0, 0.2, 0.5], lin_out=[[200, 84], [500, 200, 84]], in_size=[(28, 28)], out_size=[10] #, batch_norm=[True] ) parametersBiggerLeNet = OrderedDict(network=['BiggerLeNet'], lr=[.001, .005, .01], batch_size=[1000], epochs=[10], device=[device], nw=[2], conv_out=[[16, 32, 64], [32, 64, 128], [48, 96, 192]], conv_ks=[[3, 3, 3]], dropout=[0.0, 0.2, 0.5], lin_out=[[200, 84], [512], [200]], in_size=[(28, 28)], out_size=[10] #, batch_norm=[True] ) parametersVggLikeNet = OrderedDict(network=['VggLikeNet'], lr=[.001, .005, .01], batch_size=[1000], epochs=[10], device=[device], nw=[10], conv_out=[[[16, 16], [32, 32]], [[24, 24], [48, 48]], [[32, 32], [64, 64]]], conv_ks=[[[3, 3], [3, 3]]], dropout=[0.0, 0.2, 0.5], lin_out=[[200, 84], [512], [200]], in_size=[(28, 28)], out_size=[10] #, batch_norm=[True] ) runs_data = {} best_models = None experiments = [('LeNet', parametersLeNet), ('BiggerLeNet', parametersBiggerLeNet), ('VggLikeNet', parametersVggLikeNet)] for networkName, parameters in experiments.__reversed__(): #i = 0 #if i > 0: # break #i += 1 m = RunManager() use_batch_norm = True for run in RunBuilder.get_runs(parameters): print("Run starting:", run) random_seed = random.seed() valid_split = 0.1 pin_memory = (run.device != 'cpu') train_loader, valid_loader, test_loader = get_train_valid_test_loader( train_set, valid_set, test_set, run.batch_size, random_seed, valid_split, True, run.nw, pin_memory) network = construct_network(run, use_batch_norm) print('network.name: :', networkName, ' chosen') print("network architecture: \n", network) optimizer = optim.Adam(network.parameters(), lr=run.lr) runManager_train(runManager=m, run=run, network=network, optimizer=optimizer, train_loader=train_loader, valid_loader=valid_loader, test_loader=None, valid_split=valid_split, names=train_set.classes) best_models = sorted(m.best_models, reverse=True) best_models_str = "\n".join( str(model) for model in best_models[:5]) runs_data[networkName] = best_models m.save(f'results_{networkName}') with open(f'best_models_{networkName}.txt', 'w', encoding='utf-8') as f: f.write(best_models_str) return runs_data
loss.backward() # print(torch.max(network.conv1.weight.grad)) # Update weights optimizer.step() # calculate each batch's total loss m.track_loss(loss, batch) # accumulate the num of correct prediction m.track_num_correct(preds, labels) # calculate accuracy and write run_data to file m.end_epoch() # # show information # string = 'epoch:%d loss = %f accuracy = %f' % (epoch,loss.item()) # print(string) m.end_run() # input file name to save hyper-parameters results m.save(str(time.time()) + 'results') # 四、评价模型 from sklearn.metrics import confusion_matrix from plotcm import plot_confusion_matrix from matplotlib import pyplot as plt def get_all_preds(model, data_loader): all_preds = torch.tensor([]) with torch.no_grad(): for batch in data_loader: images, labels = batch preds = model(images) all_preds = torch.cat((all_preds, preds), dim=0) return all_preds
def __train_network(self, model, train_set, run, save_logistics_file_path, epochs, type_of_model, show_plot): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("-------------------------------------------------------------------", device) loss_val = [] acc_val = [] batch_size = run.batch_size lr = run.lr shuffle = run.shuffle # set batch size data_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle, num_workers=1, pin_memory=True) save_file_name = save_logistics_file_path + self.__get_file_name(type_of_model, shuffle, lr, batch_size) # model = self.__getModel(type_of_model) tb_summary = self.__get_tb_summary_title(type_of_model) # set optimizer - Adam optimizer = optim.Adam(model.parameters(), lr=lr) # initialise summary writer run_manager = RunManager() run_manager.begin_run(run, model, data_loader, device, tb_summary) torch.backends.cudnn.enabled = False # start training for epoch in range(epochs): run_manager.begin_epoch() for batch in data_loader: images, labels = batch images = images.to(device) labels = labels.to(device) # forward propagation predictions = model(images) loss = F.cross_entropy(predictions, labels) # zero out grads for every new iteration optimizer.zero_grad() # back propagation loss.backward() # update weights # w = w - lr * grad_dw optimizer.step() run_manager.track_loss(loss) run_manager.track_total_correct_per_epoch(predictions, labels) run_manager.end_epoch() loss_val.append(run_manager.get_final_loss_val()) acc_val.append(run_manager.get_final_accuracy()) run_manager.end_run() run_manager.save(save_file_name) if show_plot: self.plot_loss_val(loss_val, run) self.plot_accuracy_val(acc_val, run) return model