def main(): for seed in range(42,45): torch.manual_seed(seed) model = MLP().cuda() optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0) train_ds = datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) train_ds.train_labels = torch.load('./random_labels_mnist.pth').long() train_loader = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=1000, shuffle=True) # 10 epoches for epoch in range(1, 150 + 1): train(model, train_loader, optimizer, epoch) test(model, test_loader) torch.save(model.state_dict(), './model_weights/mlp_random_weights_{}.pth'.format(seed)) model.load_state_dict(torch.load('./model_weights/mlp_random_weights_{}.pth'.format(seed)))
def main(): # ----- 根据data来读取不同的数据和不同的loss、metrics ----- if config.args.data == 'brca': rna = RnaData.predicted_data(config.brca_cli, config.brca_rna, {'PAM50Call_RNAseq': 'pam50'}) rna.transform(tf.LabelMapper(config.brca_label_mapper)) out_shape = len(config.brca_label_mapper) criterion = nn.CrossEntropyLoss() scorings = (mm.Loss(), mm.Accuracy(), mm.BalancedAccuracy(), mm.F1Score(average='macro'), mm.Precision(average='macro'), mm.Recall(average='macro'), mm.ROCAUC(average='macro')) elif config.args.data == 'survival': if os.path.exists('./DATA/temp_pan.pth'): rna = RnaData.load('./DATA/temp_pan.pth') else: rna = RnaData.survival_data(config.pan_cli, config.pan_rna, '_OS_IND', '_OS') out_shape = 1 if config.args.loss_type == 'cox': criterion = NegativeLogLikelihood() elif config.args.loss_type == 'svm': criterion = SvmLoss(rank_ratio=config.args.svm_rankratio) scorings = (mm.Loss(), mm.CIndex()) rna.transform(tf.ZeroFilterCol(0.8)) rna.transform(tf.MeanFilterCol(1)) rna.transform(tf.StdFilterCol(0.5)) norm = tf.Normalization() rna.transform(norm) # ----- 构建网络和优化器 ----- inpt_shape = rna.X.shape[1] if config.args.net_type == 'mlp': net = MLP(inpt_shape, out_shape, config.args.hidden_num, config.args.block_num).cuda() elif config.args.net_type == 'atten': net = SelfAttentionNet(inpt_shape, out_shape, config.args.hidden_num, config.args.bottle_num, config.args.block_num, config.args.no_res, config.act, config.args.no_head, config.args.no_bottle, config.args.no_atten, config.args.dropout_rate).cuda() elif config.args.net_type == 'resnet': net = ResidualNet(inpt_shape, out_shape, config.args.hidden_num, config.args.bottle_num, config.args.block_num).cuda() # ----- 训练网络,cross validation ----- split_iterator = rna.split_cv(config.args.test_size, config.args.cross_valid) train_hists = [] test_hists = [] for split_index, (train_rna, test_rna) in enumerate(split_iterator): print('##### save: %s, split: %d #####' % (config.args.save, split_index)) # 从train中再分出一部分用作验证集,决定停止 train_rna, valid_rna = train_rna.split(0.1) dats = { 'train': train_rna.to_torchdat(), 'valid': valid_rna.to_torchdat(), } dataloaders = { k: data.DataLoader(v, batch_size=config.args.batch_size) for k, v in dats.items() } test_dataloader = data.DataLoader(test_rna.to_torchdat(), batch_size=config.args.batch_size) # 网络训练前都进行一次参数重置,避免之前的训练的影响 net.reset_parameters() # train optimizer = optim.Adamax(net.parameters(), lr=config.args.learning_rate) lrs = config.lrs(optimizer) net, hist = train( net, criterion, optimizer, dataloaders, epoch=config.args.epoch, metrics=scorings, l2=config.args.l2, standard_metric_index=config.args.standard_metric_index, scheduler=lrs) # test test_res = evaluate(net, criterion, test_dataloader, metrics=scorings) # 将多次训练的结果保存到一个df中 hist = pd.DataFrame(hist) hist['split_index'] = split_index train_hists.append(hist) # 保存多次test的结果 test_res['split_index'] = split_index test_hists.append(test_res) # 每个split训练的模型保存为一个文件 torch.save(net.state_dict(), os.path.join(config.save_dir, 'model%d.pth' % split_index)) # 保存train的结果 train_hists = pd.concat(train_hists) train_hists.to_csv(os.path.join(config.save_dir, 'train.csv')) # 保存test的结果 test_hists = pd.DataFrame(test_hists) test_hists.to_csv(os.path.join(config.save_dir, 'test.csv'))