def __init__(self, users, problem): self.users = users self.problem = problem self.size = len(users) ####the optimal object according to min and avg aggregation function### self.x_star_min = self.get_x_star(False) self.x_star_avg = self.get_x_star(True) for u in self.users: u.utility_star_group_x_star_min = self.x_star_min.get_utility( u.preference_weights) u.utility_star_group_x_star_avg = self.x_star_avg.get_utility( u.preference_weights) ###ONLY FOR PERCEPTRON_AVERAGE ### self.weights = [ np.zeros(problem.types), np.zeros(problem.areas), np.zeros((problem.roomtypes, problem.max_rooms)), np.zeros((problem.features, problem.options)) ] self.avg_weights_star = self.get_avg_weights() self.x_star_average_perceptron = utils.model(self.avg_weights_star, problem) self.utility_star_x_star = self.x_star_average_perceptron.get_utility( self.avg_weights_star)
def eval_epoch(model, data_loader, loss_fn, device, epoch_num, tensorboard_writer): model.eval() running_loss = 0.0 processed_size = 0 for batch, labels in data_loader: batch = batch.to(device) labels = labels.to(device) with torch.set_grad_enabled(False): prediction_x = model(batch)[0] loss_x = loss_fn(prediction_x, labels) running_loss += loss_x.item() * batch.size(0) processed_size += batch.size(0) loss = running_loss / processed_size message = f"epoch {epoch_num}: loss value={loss}" logging.info(message) print(message) if tensorboard_writer is not None: tensorboard_writer.add_scalar('Loss', loss, epoch_num) return loss
def fit_epoch(model, data_loader, loss_fn, optimizer, device): model.train(True) running_loss = 0.0 processed_data = 0 for batch, labels in data_loader: batch = batch.to(device) labels = labels.to(device) optimizer.zero_grad() prediction_x, prediction_aux1, prediction_aux2 = [model(batch)[j] for j in range(3)] loss_x = loss_fn(prediction_x, labels) loss_aux1 = loss_fn(prediction_aux1, labels) loss_aux2 = loss_fn(prediction_aux2, labels) loss_x.backward() loss_aux1.backward() loss_aux2.backward() optimizer.step() running_loss += loss_x.item() * batch.size(0) processed_data += batch.size(0) train_loss = running_loss / processed_data return train_loss
def main(): path_to_x = 'dataset/t10k-images.idx3-ubyte' path_to_y = 'dataset/t10k-labels.idx1-ubyte' path_to_model = 'result.npy' parser = argparse.ArgumentParser(description='predict.py') parser.add_argument('--x_test_dir=', dest='x_test_dir', default=path_to_x, type=str) parser.add_argument('--y_test_dir=', dest='y_test_dir', default=path_to_y, type=str) parser.add_argument('--model_input_dir=', dest='model_input_dir', default=path_to_model, type=str) args = parser.parse_args() weights = np.load(args.model_input_dir) X, y = prepare_data(args.x_test_dir, args.y_test_dir) y_pred = model(X, weights) y_pred_matrix = y_to_matrix(y_pred, 10) print(classification_report(y, y_pred_matrix))
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) if args.model == 'Transformer': output = model(data) else: output, hidden = model(data, hidden) hidden = repackage_hidden(hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
def add_phrase(self, phrase): Id=len(self.phrase_corpus) self.phrase_corpus.append(phrase) self.phrase_index = hnswlib.Index('cosine', 512) self.phrase_index.init_index(len(self.phrase_corpus), ef_construction=200, M=48, random_seed=36) if len(self.phrase_corpus) > 1: self.phrase_index.load_index("phrase_index", max_elements=len(self.phrase_corpus)) self.phrase_index.add_items(utils.model([phrase])) self.phrase_index.save_index("phrase_index") return Id, phrase
def test(model, metric_fc, test_loader): model.eval() accuracies = [] for batch_id, (spec_mag, label) in enumerate(test_loader()): feature = model(spec_mag) output = metric_fc(feature, label) label = paddle.reshape(label, shape=(-1, 1)) acc = accuracy(input=output, label=label) accuracies.append(acc.numpy()[0]) model.train() return float(sum(accuracies) / len(accuracies))
def calc_sentence_logprob(model, sentence): """ Calculates the sentence log-prob """ if len(sentence) < 1: return -float('inf') log_probs = torch.log(F.softmax(model(sentence), dim=0)) ids = torch.Tensor(sentence[1:]).long() sentence_log_prob = torch.sum(log_probs.gather(1, ids.view(-1, 1))) return sentence_log_prob.item()
def __init__(self, problem, weight_range, nb_optimal_preferences): self.problem = problem self.preference_weights = self.generate_weights(nb_optimal_preferences, weight_range) self.x_star = utils.model(self.preference_weights, problem) self.utility_star_x_star = self.get_utility_star(self.x_star.phi) self.utility_star_group_x_star_min = None self.utility_star_group_x_star_avg = None self.current_weights = [np.zeros(problem.types), np.zeros(problem.areas), np.zeros((problem.roomtypes, problem.max_rooms)), np.zeros((problem.features, problem.options))]
def deduplicate(self,phrase): if len(self.phrase_corpus)==0: return self.add_phrase(phrase) nearest_neighbor=self.phrase_index.knn_query(utils.model([phrase])) if nearest_neighbor != []: closest_neighbor, closest_distance = nearest_neighbor if closest_neighbor[0] == []: return self.add_phrase(phrase) if closest_distance[0][0] > distance_threshold: return self.add_phrase(phrase) return_phrase=self.phrase_corpus[closest_neighbor[0][0]] return self.phrase_corpus.index(return_phrase),return_phrase
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def evaluate(args, model, tokenizer, prefix=""): # Loop to handle MNLI double evaluation (matched, mis-matched) eval_output_dir = args.output_dir eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True) if args.local_rank in [-1, 0]: os.makedirs(eval_output_dir, exist_ok=True) # Prepare dataloader eval_dataloader, args = get_dataloader(eval_dataset, tokenizer, args, split='eval') # multi-gpu evaluate if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = {}".format(len(eval_dataset))) logger.info(" Batch size = {}".format(args.eval_batch_size)) eval_loss = 0.0 nb_eval_steps = 0 model.eval() for batch in tqdm(eval_dataloader, desc="Evaluating"): inputs, labels = (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) with torch.no_grad(): outputs = model(inputs, labels=labels) lm_loss = outputs[0] eval_loss += lm_loss.mean().item() nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps perplexity = torch.exp(torch.tensor(eval_loss)) result = {"perplexity": perplexity} output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return result
def add_node(self, phrase): self.phrase_corpus_length += 1 self.phrase_corpus.append(phrase) if self.phrase_corpus_length > 2: self.node_index = None self.node_index = hnswlib.Index('cosine', 512) self.node_index.init_index(self.phrase_corpus_length, ef_construction=200, M=48, random_seed=36) if self.phrase_corpus_length > 2: self.node_index.load_index(self.name, max_elements=self.phrase_corpus_length) self.node_index.add_items(utils.model([phrase])) self.node_index.save_index(self.name)
def return_node(self, phrase): non_stop_phrase = ' '.join([token.text for token in utils.sp(phrase)]) if len(non_stop_phrase) > 1: phrase = non_stop_phrase if self.node_index is None: self.add_node(phrase) nearest_neighbor = self.node_index.knn_query(utils.model([phrase])) if nearest_neighbor != []: closest_neighbor, closest_distance = nearest_neighbor if closest_neighbor[0] == []: self.add_node(phrase) return phrase if closest_distance[0][0] > self.distance_threshold: self.add_node(phrase) return phrase return self.phrase_corpus[closest_neighbor[0][0]]
def calc_sent_loss(sent, model, criterion, lang_ids=None): """ Calculate the loss value for the entire sentence """ if lang_ids is not None: lang_ids = torch.LongTensor([1 if _ == 'eng' or _ == 'engspa' or _ == '<s>' else 0 for _ in lang_ids]).to(DEVICE) targets = torch.LongTensor([model.vocab[tok] for tok in sent[1:]]).to(DEVICE) logits, lang_ids_pred = model(sent, lang_ids) loss = criterion(logits, targets) if lang_ids is not None: loss += criterion(lang_ids_pred, lang_ids[1:]) # gen_sent = ' '.join([model.vocab[idx] for idx in torch.argmax(logits, dim=1)]) # with open('log/{}_gen_sent.txt'.format(args.dataset), 'a+') as f: # f.write(gen_sent + '\n') return loss
def __init__(self): self.hp_dict = utils.hp_dictionary self.hp_dict['activation'] = [1., 2., 3.] self.position = np.array([ random.sample(list(self.hp_dict[name]), 1)[0] for name in sorted(list(self.hp_dict.keys())) ]) self.pbest_position = self.position self.pbest_value = float('inf') self.velocity = np.zeros(len(self.position), dtype='float') self.low, self.high = [0., 0., 0., 0.], [0., 0., 0., 0.] self.overal = 0. self.model = utils.model() self.update_model()
def generate_sent(model, max_len): """ Generate a sentence """ hist = ['<s>'] eos = model.vocab['<s>'] sent = [] while len(sent) < max_len: logits = model(hist + ['<s>'])[0] if logits.dim() > 1: logits = logits[-1] next_word = gumbel_argmax(logits, dim=0) if next_word == eos: break sent.append(model.vocab.itos[next_word]) hist += [model.vocab.itos[next_word]] return sent
def init(): MODEL_DIR = '/var/azureml-app/azureml-models/personlabV1/2/personlab/' multiscale = [1.0, 1.5, 2.0] global tf_img tf_img = [] global outputs outputs = [] for i in range(len(multiscale)): scale = multiscale[i] tf_img.append( tf.placeholder(tf.float32, shape=[1, int(scale * 401), int(scale * 401), 3])) outputs.append(utils.model(tf_img[i])) global sess sess = tf.Session() global_vars = tf.global_variables() saver = tf.train.Saver(var_list=global_vars) checkpoint_path = MODEL_DIR + 'model.ckpt' saver.restore(sess, checkpoint_path)
def compute_correlations(query: ExplainerQuery): """ Computes the correlation between each word of the sequence and the "label" within the hypothesis. The correlation is over all the concatenated encoder attention heads and is tokenwise. If the label contains multiple tokens, the average is taken. Returns a dataframe and list: - The dataframe where each row is a token, the first column is "word" with the text representation of the token and each additional column is the label and the correlation of that label against the input sequence token. - The list are scores coming from each label """ hypothesis_template = query.hypothesis_template candidate_labels = query.labels sequence = query.sequence Q = [ SingleQuery(hypothesis=hypothesis_template.format(label), sequence=sequence) for label in candidate_labels ] tokens = utils.tokenize(Q) # Run a single forward pass, saving the encoder hidden states with torch.no_grad(): outputs = utils.model( input_ids=tokens["input_ids"], attention_mask=tokens["attention_mask"], output_hidden_states=True, return_dict=True, ) # Get the logits from the output logits = outputs["logits"].detach().cpu().numpy() # Keep only the entailment and contradiction logits contradiction_id = 0 entailment_id = 2 logits = logits[..., [contradiction_id, entailment_id]] # Softmax over remaining logits for each sequence scores = np.exp(logits) / np.exp(logits).sum(-1, keepdims=True) scores = scores[..., -1] # Computer correlation from encoder hidden states layers = outputs["encoder_hidden_states"] # Remove from the GPU and convert to a numpy array # (layer, candidate_labels, token, embedding) state = np.array([layer.detach().cpu().numpy() for layer in layers]) # Make the state easier to work with # (candidate_labels, token, layer, embedding) state = state.transpose(1, 2, 0, 3) # Squish the layers and embeddings for correlation calculation # (candidate_labels, token, layer(+)embedding) state = state.reshape((*state.shape[:-2], -1)) # Measure the length of the hypothesis and response hypothesis_prefix = hypothesis_template.split("{}")[0] n_hypothesis_prefix = _measure_token_length(hypothesis_prefix) n_response = _measure_token_length(sequence) df = pd.DataFrame() for label, V in zip(candidate_labels, state): # Measure where each label via tokens starts and ends n_label = _measure_token_length(label) n_label_start = n_response + n_hypothesis_prefix + 2 n_label_end = n_label_start + n_label # (words, label_length) C = 1 - cdist(V, V[n_label_start:n_label_end], metric="correlation") # Add up over all label words C = np.sum(C, axis=1) # Normalize to the self-correlation C /= np.average(C[n_label_start:n_label_end]) # Truncate to the response only C = C[1:n_response + 1] df[label] = C df.insert(0, "word", _token_chunks(sequence)) return df, scores
def run(group, problem, alpha, file_prefix, total_time, simulation_time): regret_min = [] regret_avg = [] r = [] individual_regret = np.zeros([group.size, problem.iterations]) start_time = time.time() ##########RUN PERCEPTRON AVERAGE############### for counter in range(problem.iterations - 1): weights = group.weights x = utils.model(weights, problem) print('System recommendation using the average perceptron: ', x.object) regret = group.utility_star_x_star - x.get_utility( group.avg_weights_star) r.append(regret) # Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x.phi) individual_regret[i, counter] = reg if (regret == 0.0): final_time = (time.time() - start_time) + total_time file = utils.save_to_pickle(x.object, r, regret_min, regret_avg, regret_min, 'average', file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x.object, simulation_time, file # phi_y_bar, s_t = average(group, phi_y_list, alpha) improvements = [] for u in group.users: sim_start_time = time.time() x_bar = u.step(x.phi, x.object, alpha) bar = [ np.array(x_bar.phi[0]), np.array(x_bar.phi[1]), np.array(x_bar.phi[2]), np.array(x_bar.phi[3]) ] improvements.append(bar) # y, improvements[group.get_index(u)] = u.step(phi_y, alpha) temp_simulation_time = time.time() - sim_start_time simulation_time += temp_simulation_time print('Improvement user ', group.get_index(u), ': ', x_bar.object) phi_x_bar = utils.avg_phi_list(improvements, group.problem) group.update_weights(phi_x_bar, x.phi) x = utils.model(weights, problem) regret = group.utility_star_x_star - x.get_utility(group.avg_weights_star) r.append(regret) # Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x.phi) individual_regret[i, counter + 1] = reg final_time = (time.time() - start_time) + total_time #y, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy, file_prefix, problem, group, sim_time, runtime, counter file = utils.save_to_pickle(x.object, r, regret_min, regret_avg, regret_min, 'average', file_prefix, problem, group, simulation_time, final_time, problem.iterations, individual_regret) return x.object, simulation_time, file
train_set_x_flatten = train_set_x_orig.reshape( train_set_x_orig.shape[0], -1).T # reshape training set to (nx, m) test_set_x_flatten = test_set_x_orig.reshape( test_set_x_orig.shape[0], -1).T # reshape testing set to (nx, m) train_set_x = train_set_x_flatten / 255 test_set_x = test_set_x_flatten / 255 print("Started modeling") tic = time.process_time() d = utils.model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations=2000, learning_rate=0.005, print_cost=True) toc = time.process_time() print("Ended modeling. Time elapsed: " + str(1000 * (toc - tic)) + "ms") index = 19 print("y = " + str(test_set_y[0, index]) + ", the prediction is that it is a \"" + classes[d["Y_prediction_test"][0, index]].decode("utf-8") + "\" picture.") plt.figure() plt.imshow(test_set_x[:, index].reshape((num_px, num_px, 3))) plt.show()
def run(strategy, group, problem, alpha, file_prefix, total_time, simulation_time, avg=False): regret_min = [] regret_avg = [] regret_su_min = [] regret_su_avg = [] individual_regret = np.zeros([group.size, problem.iterations]) start_time = time.time() ##########RUN PERCEPTRON AVERAGE############### extra_time = 0 for counter in range(problem.iterations - 1): print('Iteration: (', counter, '/', problem.iterations, ')') ######USER SELECTION####### if (strategy == strategy.LEAST_MISERY): user, x_group = utils.get_least_misery_user(group, problem, avg) elif (strategy == strategy.RANDOM): user, x_group, extra_t = utils.get_random_user(group, problem, avg) extra_time += extra_t ############################ #Find object to present to this user x = utils.model(user.current_weights, problem) print('System recommendation using the ', strategy.value, ' strategy: ', x.object) #Calculate regret for the group recommendation #note: I use x_group because we want to know group wide regret, x is generated using individual weights rgrt_avg = utils.get_regret(x_group, group, aggregation_function='avg') regret_avg.append(rgrt_avg) rgrt_min = utils.get_regret(x_group, group, aggregation_function='min') regret_min.append(rgrt_min) #Calculate the regret for the picked user rgrt_avg = utils.get_regret(x, group, aggregation_function='avg') regret_su_avg.append(rgrt_avg) rgrt_min = utils.get_regret(x, group, aggregation_function='min') regret_su_min.append(rgrt_min) #Calculate the regret for every individual user # for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x_group.phi) individual_regret[i, counter] = reg #Algorithm ends when the ideal object is found if (rgrt_min == 0.0 and not avg): final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_group.object, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x_group.object, simulation_time, file if (rgrt_avg == 0.0 and avg): final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_group.object, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, counter, individual_regret) return x_group.object, simulation_time, file # start_time = time.time() start_sim_time = time.time() #Simulate a step from the chosen user x_bar = user.step(x.phi, x.object, alpha) temp_simulation_time = time.time() - start_sim_time print('Improvement user: '******': ', x_bar.object) simulation_time += temp_simulation_time #update weights for the chosen user user.update_weights(x_bar.phi, x.phi) x_final = utils.get_aggregation_object(group, problem, avg) regret_avg.append( utils.get_regret(x_final, group, aggregation_function='avg')) regret_min.append( utils.get_regret(x_final, group, aggregation_function='min')) regret_su_avg.append( utils.get_regret(x_final, group, aggregation_function='avg')) regret_su_min.append( utils.get_regret(x_final, group, aggregation_function='min')) for i in range(0, group.size): usr = group.users[i] reg = usr.get_regret(x_final.phi) individual_regret[i, counter + 1] = reg final_time = ((time.time() - start_time) + total_time) - extra_time file = utils.save_to_pickle(x_final, regret_avg, regret_min, regret_su_avg, regret_su_min, strategy.value, file_prefix, problem, group, simulation_time, final_time, problem.iterations, individual_regret) return x_final, simulation_time, file
def train_epoch(model, tokenizer, optimizer, scheduler, train_dataloader, tr_loss, logging_loss, global_step, steps_trained_in_current_epoch, tb_writer, args): """train one epoch""" if args.fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue inputs, labels = (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) model.train() outputs = model(inputs, labels=labels) loss = outputs[ 0] # model outputs are always tuple in transformers (see doc) if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 # Log metrics if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: if ( args.local_rank == -1 and args.evaluate_during_training ): # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): tb_writer.add_scalar("eval_{}".format(key), value, global_step) tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step) tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step) logging_loss = tr_loss # save checkpoint if args.local_rank in [ -1, 0 ] and args.save_steps > 0 and global_step % args.save_steps == 0: if args.evaluate_during_training: save_checkpoint(model, optimizer, scheduler, tokenizer, args, global_step) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close() break return model, optimizer, scheduler, global_step, tr_loss, logging_loss
def train(args): # 设置支持多卡训练 dist.init_parallel_env() if dist.get_rank() == 0: shutil.rmtree('log', ignore_errors=True) # 日志记录器 writer = LogWriter(logdir='log') # 数据输入的形状 input_shape = eval(args.input_shape) # 获取数据 train_dataset = CustomDataset(args.train_list_path, model='train', spec_len=input_shape[3]) train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) test_dataset = CustomDataset(args.test_list_path, model='test', spec_len=input_shape[3]) test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) # 获取模型 model = resnet34() metric_fc = ArcMarginProduct(feature_dim=512, class_dim=args.num_classes, easy_margin=args.easy_margin) if dist.get_rank() == 0: paddle.summary(model, input_size=input_shape) # 设置支持多卡训练 model = paddle.DataParallel(model) metric_fc = paddle.DataParallel(metric_fc) # 分段学习率 boundaries = [10, 30, 70, 100] lr = [0.1 ** l * args.learning_rate for l in range(len(boundaries) + 1)] scheduler = paddle.optimizer.lr.PiecewiseDecay(boundaries=boundaries, values=lr, verbose=True) # 设置优化方法 optimizer = paddle.optimizer.Adam(parameters=model.parameters() + metric_fc.parameters(), learning_rate=scheduler, weight_decay=paddle.regularizer.L2Decay(1e-4)) # 加载预训练模型 if args.pretrained_model is not None: model_dict = model.state_dict() param_state_dict = paddle.load(os.path.join(args.pretrained_model, 'model.pdparams')) for name, weight in model_dict.items(): if name in param_state_dict.keys(): if weight.shape != list(param_state_dict[name].shape): print('{} not used, shape {} unmatched with {} in model.'. format(name, list(param_state_dict[name].shape), weight.shape)) param_state_dict.pop(name, None) else: print('Lack weight: {}'.format(name)) model.set_dict(param_state_dict) print('成功加载预训练模型参数') # 恢复训练 if args.resume is not None: model.set_state_dict(paddle.load(os.path.join(args.resume, 'model.pdparams'))) optimizer.set_state_dict(paddle.load(os.path.join(args.resume, 'optimizer.pdopt'))) print('成功加载模型参数和优化方法参数') # 获取损失函数 loss = FocalLoss(gamma=args.gamma) train_step = 0 test_step = 0 # 开始训练 for epoch in range(args.num_epoch): loss_sum = [] for batch_id, (spec_mag, label) in enumerate(train_loader()): feature = model(spec_mag) output = metric_fc(feature, label) # 计算损失值 los = loss(output, label) loss_sum.append(los) los.backward() optimizer.step() optimizer.clear_grad() # 多卡训练只使用一个进程打印 if batch_id % 100 == 0 and dist.get_rank() == 0: print('[%s] Train epoch %d, batch_id: %d, loss: %f' % ( datetime.now(), epoch, batch_id, sum(loss_sum) / len(loss_sum))) writer.add_scalar('Train loss', los, train_step) train_step += 1 loss_sum = [] # 多卡训练只使用一个进程执行评估和保存模型 if dist.get_rank() == 0: acc = test(model, metric_fc, test_loader) print('[%s] Train epoch %d, accuracy: %f' % (datetime.now(), epoch, acc)) writer.add_scalar('Test acc', acc, test_step) # 记录学习率 writer.add_scalar('Learning rate', scheduler.last_lr, epoch) test_step += 1 save_model(args, model, optimizer) scheduler.step()
nhid553=588, lrf551=5, lrf552=5, lrf553=7) else: paras = init(nhid1=37, nhid2=12, nhid3=355, lrf1=2, lrf2=2, lrf3=9) if modelname == 'cnn': trainenergy, accuracy, di, testenergy, predarg = cnnmodel(X, Y, paras) elif modelname == 'cnnat': trainenergy, accuracy, di, testenergy, predarg, advloss = cnnatmodel( X, Y, paras, atepsilon) elif modelname == 'crfat': trainenergy, accuracy, di, testenergy, predarg, advloss = crfatmodel( X, Y, k1, k2, paras, atepsilon) elif modelname == 'crf': trainenergy, accuracy, di, testenergy, predarg = model( X, Y, k1, k2, paras) elif modelname == 'cnncomb': trainenergy, accuracy, di, testenergy, predarg = cnnmodel( X, Y, paras, flag='combine') elif modelname == 'cnncombat': trainenergy, accuracy, di, testenergy, predarg, advloss = cnnatmodel( X, Y, paras, atepsilon, flag='combine') elif modelname == 'crfcomb': trainenergy, accuracy, di, testenergy, predarg = model(X, Y, k1, k2, paras, flag='combine') elif modelname == 'crfcombat': trainenergy, accuracy, di, testenergy, predarg, advloss = crfatmodel(