def freeze_model_parameters(self): # freeze for param in self.model.parameters(): param.requires_grad = True # for param in self.model.embedding_layer.parameters(): # param.requires_grad = False params_info = get_parameter_number(self.model) self.log.logger.info(f'*** Parameters: {params_info}')
return criterion(logit, real_label) dataset = utils.CONLL2003('./data') dataloader = DataLoader( dataset, batch_size=1, shuffle=False,num_workers=0, drop_last=False) net = Lstm(dataset.wn) device = torch.device('cuda') net.to(device) optimizer = optim.SGD(net.parameters(), net.config.lr) scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma = 0.96) test = dataset.testa total_sum,trainable_sum = utils.get_parameter_number(net) print("参数总数,训练参数") print(total_sum,trainable_sum) f=open('predict.txt','w') acc_list = [] loss_list = [] def eval(net): net.eval() #下面测试 taglist = [] hit = 0 a = 0 loss = 0.
def __init__(self, num_classes=100, size=4, sigmoid_size=3, model_config='all', data_flag='cifar100'): super(GoogleNet, self).__init__(size=size, sigmoid_size=sigmoid_size, teacher_channels=[512, 512, 528, 832, 832], student_channels=[256, 256, 384, 512, 512]) if model_config.count('simple'): for parameter in super(GoogleNet, self).parameters(): parameter.requires_grad = False ############################ # Teacher # ############################ self.prelayer = t.nn.Sequential( t.nn.Conv2d(3, 192, kernel_size=3, padding=1), t.nn.BatchNorm2d(192), t.nn.ReLU(inplace=True)) pre_params = get_parameter_number(self.prelayer) googlenet_params = get_parameter_number(self.prelayer) #although we only use 1 conv layer as prelayer, #we still use name a3, b3....... self.a3 = Inception_Module(192, 64, 96, 128, 16, 32, 32) #256 a3_params = get_parameter_number(self.a3) googlenet_params['Trainable'] += a3_params['Trainable'] googlenet_params['Total'] += a3_params['Total'] self.b3 = Inception_Module(256, 128, 128, 192, 32, 96, 64) #480 b3_params = get_parameter_number(self.b3) googlenet_params['Trainable'] += b3_params['Trainable'] googlenet_params['Total'] += b3_params['Total'] # """In general, an Inception network is a network consisting of # modules of the above type stacked upon each other, with occasional # max-pooling layers with stride 2 to halve the resolution of the # grid""" self.maxpool = t.nn.MaxPool2d(3, stride=2, padding=1) self.a4 = Inception_Module(480, 192, 96, 208, 16, 48, 64) #512 a4_params = get_parameter_number(self.a4) googlenet_params['Trainable'] += a4_params['Trainable'] googlenet_params['Total'] += a4_params['Total'] self.b4 = Inception_Module(512, 160, 112, 224, 24, 64, 64) #512 b4_params = get_parameter_number(self.b4) googlenet_params['Trainable'] += b4_params['Trainable'] googlenet_params['Total'] += b4_params['Total'] self.c4 = Inception_Module(512, 128, 128, 256, 24, 64, 64) #512 c4_params = get_parameter_number(self.c4) googlenet_params['Trainable'] += c4_params['Trainable'] googlenet_params['Total'] += c4_params['Total'] self.d4 = Inception_Module(512, 112, 144, 288, 32, 64, 64) #528 d4_params = get_parameter_number(self.d4) googlenet_params['Trainable'] += d4_params['Trainable'] googlenet_params['Total'] += d4_params['Total'] self.e4 = Inception_Module(528, 256, 160, 320, 32, 128, 128) #832 e4_params = get_parameter_number(self.e4) googlenet_params['Trainable'] += e4_params['Trainable'] googlenet_params['Total'] += e4_params['Total'] self.a5 = Inception_Module(832, 256, 160, 320, 32, 128, 128) #832 a5_params = get_parameter_number(self.a5) googlenet_params['Trainable'] += a5_params['Trainable'] googlenet_params['Total'] += a5_params['Total'] self.b5 = Inception_Module(832, 384, 192, 384, 48, 128, 128) #1024 b5_params = get_parameter_number(self.b5) googlenet_params['Trainable'] += b5_params['Trainable'] googlenet_params['Total'] += b5_params['Total'] #input feature size: 8*8*1024 self.avgpool = t.nn.AdaptiveAvgPool2d((1, 1)) self.dropout = t.nn.Dropout2d(p=0.4) self.linear = t.nn.Linear(1024, num_classes) linear_params = get_parameter_number(self.linear) googlenet_params['Trainable'] += linear_params['Trainable'] googlenet_params['Total'] += linear_params['Total'] print('GoogLeNet total : ', googlenet_params) string = data_flag + '---' + model_config + ':\n' string += 'Teacher params : ' + str(googlenet_params) + '\n' ############################ # Student # ############################ self.additional_classifiers = [] self.additional_classifiers.append(t.nn.Linear(256, num_classes)) self.additional_classifiers.append(t.nn.Linear(256, num_classes)) self.additional_classifiers.append(t.nn.Linear(384, num_classes)) self.additional_classifiers.append(t.nn.Linear(512, num_classes)) self.additional_classifiers.append(t.nn.Linear(512, num_classes)) self.additional_classifiers = t.nn.ModuleList( self.additional_classifiers) self.student_model_conv1 = Fire_Module(in_channels=3, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=128) student_params = get_parameter_number(self.student_model_conv1) self.student_model_conv2 = Fire_Module(in_channels=128, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=256) conv2_params = get_parameter_number(self.student_model_conv2) student_params['Total'] += conv2_params['Total'] student_params['Trainable'] += conv2_params['Trainable'] student_params['Total'] += pre_params['Total'] student_params['Trainable'] += pre_params['Trainable'] student_params['Total'] += a3_params['Total'] student_params['Trainable'] += a3_params['Trainable'] student_params['Total'] += b3_params['Total'] student_params['Trainable'] += b3_params['Trainable'] student_params['Total'] += a4_params['Total'] student_params['Trainable'] += a4_params['Trainable'] student_params['Total'] += b4_params['Total'] student_params['Trainable'] += b4_params['Trainable'] se_params = get_parameter_number(self.ses_t[0]) student_params['Total'] += se_params['Total'] student_params['Trainable'] += se_params['Trainable'] addi_params = get_parameter_number(self.additional_classifiers[0]) student_params['Total'] += addi_params['Total'] student_params['Trainable'] += addi_params['Trainable'] print('Classifier 1 has params : ', student_params) string += 'Classifier 1 params : ' + str(student_params) + '\n' self.student_model_conv3 = Fire_Module(in_channels=256, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=256) conv3_params = get_parameter_number(self.student_model_conv3) student_params['Total'] -= addi_params['Total'] student_params['Trainable'] -= addi_params['Trainable'] student_params['Total'] += conv3_params['Total'] student_params['Trainable'] += conv3_params['Trainable'] student_params['Total'] += c4_params['Total'] student_params['Trainable'] += c4_params['Trainable'] se_params = get_parameter_number(self.ses_t[1]) student_params['Total'] += se_params['Total'] student_params['Trainable'] += se_params['Trainable'] addi_params = get_parameter_number(self.additional_classifiers[1]) student_params['Total'] += addi_params['Total'] student_params['Trainable'] += addi_params['Trainable'] print('Classifier 2 has params : ', student_params) string += 'Classifier 2 params : ' + str(student_params) + '\n' self.student_model_conv4 = Fire_Module(in_channels=256, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=384) conv4_params = get_parameter_number(self.student_model_conv4) student_params['Total'] -= addi_params['Total'] student_params['Trainable'] -= addi_params['Trainable'] student_params['Total'] += conv4_params['Total'] student_params['Trainable'] += conv4_params['Trainable'] student_params['Total'] += d4_params['Total'] student_params['Trainable'] += d4_params['Trainable'] se_params = get_parameter_number(self.ses_t[2]) student_params['Total'] += se_params['Total'] student_params['Trainable'] += se_params['Trainable'] addi_params = get_parameter_number(self.additional_classifiers[2]) student_params['Total'] += addi_params['Total'] student_params['Trainable'] += addi_params['Trainable'] print('Classifier 3 has params : ', student_params) string += 'Classifier 3 params : ' + str(student_params) + '\n' self.student_model_conv5 = Fire_Module(in_channels=384, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=512) conv5_params = get_parameter_number(self.student_model_conv5) student_params['Total'] -= addi_params['Total'] student_params['Trainable'] -= addi_params['Trainable'] student_params['Total'] += conv5_params['Total'] student_params['Trainable'] += conv5_params['Trainable'] student_params['Total'] += e4_params['Total'] student_params['Trainable'] += e4_params['Trainable'] se_params = get_parameter_number(self.ses_t[3]) student_params['Total'] += se_params['Total'] student_params['Trainable'] += se_params['Trainable'] addi_params = get_parameter_number(self.additional_classifiers[3]) student_params['Total'] += addi_params['Total'] student_params['Trainable'] += addi_params['Trainable'] print('Classifier 4 has params : ', student_params) string += 'Classifier 4 params : ' + str(student_params) + '\n' self.student_model_conv6 = Fire_Module(in_channels=512, squeeze_ratio=0.125, ratio_3x3=0.5, expand_filters=512) conv6_params = get_parameter_number(self.student_model_conv6) student_params['Total'] -= addi_params['Total'] student_params['Trainable'] -= addi_params['Trainable'] student_params['Total'] += conv6_params['Total'] student_params['Trainable'] += conv6_params['Trainable'] student_params['Total'] += a5_params['Total'] student_params['Trainable'] += a5_params['Trainable'] se_params = get_parameter_number(self.ses_t[4]) student_params['Total'] += se_params['Total'] student_params['Trainable'] += se_params['Trainable'] addi_params = get_parameter_number(self.additional_classifiers[4]) student_params['Total'] += addi_params['Total'] student_params['Trainable'] += addi_params['Trainable'] print('Classifier 5 has params : ', student_params) string += 'Classifier 5 params : ' + str(student_params) + '\n' with open('./models_def/googlenet_params.txt', 'a+') as f: #save paramemters f.write(string) f.flush() self.model_config = model_config
}, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_steps = len(train_loader) // para.batch_size * para.epoch # optimizer = BertAdam(optimizer_grouped_parameters, # lr=para.lr, # warmup=para.warmup_proportion, # t_total=num_train_steps) optimizer = optim.SGD(model.parameters(), lr=para.lr) evaluator = Evaluator(model, tokenizer, para) print(utils.get_parameter_number(model)) # result = model([text_ids, text_mask]) # print(result) # print(np.shape(result)) print(evaluator.get_embedding(text1)) for epoch in range(para.epoch): loss_list = [] for step, data in tqdm(enumerate(train_loader)): X1, X2 = data X1ids, X2ids, X1mask, X2mask = [], [], [], [] for i in range(len(X1)): x1token = ['[CLS]'] + tokenizer.tokenize(X1[i]) + ['[SEP]'] x2token = ['[CLS]'] + tokenizer.tokenize(X2[i]) + ['[SEP]'] X1ids.append(tokenizer.convert_tokens_to_ids(x1token))
else: raise NotImplementedError return prediction, x if __name__ == '__main__': import os import anyconfig from utils import parse_config, load, get_parameter_number config = anyconfig.load(open("config/imagedataset_None_VGG_RNN_CTC.yaml", 'rb')) if 'base' in config: config = parse_config(config) if os.path.isfile(config['dataset']['alphabet']): config['dataset']['alphabet'] = load(config['dataset']['alphabet']) device = torch.device('cpu') net = Model(3, 95, config['arch']['args']).to(device) print(net.model_name, len(config['dataset']['alphabet'])) a = torch.randn(2, 3, 32, 320).to(device) import time text_for_pred = torch.LongTensor(2, 25 + 1).fill_(0) tic = time.time() for i in range(1): b = net(a, text_for_pred)[0] print(b.shape) print((time.time() - tic) / 1) print(get_parameter_number(net))
hparams) train_loader = DataLoader(train_dataset, 1, True, num_workers=4, pin_memory=False) test_loader = DataLoader(test_dataset, 1, True, num_workers=4, pin_memory=True) gc.collect() torch.cuda.empty_cache() model = VAD(hparams).to(DEVICE) get_parameter_number(model) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = Scheduler(optimizer, init_lr=1e-3, final_lr=1e-5, decay_rate=hparams.vad_decay_rate, start_decay=hparams.vad_start_decay, decay_steps=hparams.vad_decay_steps) loss_fn = add_loss if not RESUME: print('{} New Training...'.format( datetime.now().strftime(_format)[:-3])) train_losses, test_losses = train(model, train_loader, test_loader,
from loss import FocalLoss, ExpandMSELoss, CombinedLoss from utils import get_parameter_number, save_result TASK = "classification" if __name__ == "__main__": device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") print("begin to load data") data = load_data("data", "merge.npy") if TASK == "classification": train_dataset, valid_dataset = data.filter_na().filter_type().\ transformX().transformY().split() model = SimpleCNNClassification().to(device) print("model %s param number: %s" % (repr(model), get_parameter_number(model))) criterion = nn.CrossEntropyLoss().to(device) #criterion = FocalLoss(classes=2,alpha=torch.FloatTensor([1,1]).to(device),size_average=False).to(device) optimizer = opt.Adam(model.parameters(), lr=1e-4) elif TASK == "regression": train_dataset, valid_dataset = data.filter_na().filter_type().\ filter_value().transformX().split() model = SimpleCNNRegression().to(device) print("model %s param number: %s" % (repr(model), get_parameter_number(model))) #criterion = MSELoss().to(device) criterion = ExpandMSELoss().to(device) optimizer = opt.Adam(model.parameters(), lr=1e-4, weight_decay=5e-7) train_dataloader = DataLoader(train_dataset, batch_size=500,