def _get_Q(self, model, model_input): model.reset_noise() if not self.config.use_categorical: return model(model_input) model_output = model(model_input, ApplySoftmax.NORMAL) return torch.sum(model_output * self.support, dim=2)
def get_model_info(year): '''Takes in a year, and prints out each model, brand_name, and brand headquarters for that year using only ONE database query.''' # Query here #SELECT model, brand_name, headquarters years = Model.query.get(year) by_year = db.session.query(Model.name, Model.brand_name, Brand.headquarters).all() for year in year: print out each model (Model) brand_name (foreign key from Brand to Model) headquarters (Brand) .all()
def dev(model, dev_loader, decoder, logger): model.eval() total_cer = 0 total_tokens = 0 for data in dev_loader: inputs, targets, input_sizes, input_sizes_list, target_sizes =data batch_size = inputs.size(1) inputs = inputs.transpose(0, 1) inputs = Variable(inputs, volatile=True, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list) probs = model(inputs) probs = probs.data.cpu() if decoder.space_idx == -1: total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[1] else: total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[0] total_tokens += sum(target_sizes) acc = 1 - float(total_cer) / total_tokens return acc*100
def get_stats(): if 'url' in request.form : api_model = model() return api_model.get_url_stats(request.form['url']) return jsonify({'message': errors.HACK, 'response': {}, 'status': '0'})
def model(model_name, decorator=[]): assert isinstance(model_name, (str, unicode)) cache_key = model_name assert not decorator or config.IS_TEST, 'decorator仅能用于测试环境' if not decorator and CACHED_MODELS.has_key(cache_key): return CACHED_MODELS[cache_key] else: # 此import语句不能放到model函数外面去 # 否则会与model中的import site_helper形成互相依赖, 导致死循环 import model import modeldecorator try: for name in model_name.split('.'): assert(hasattr(model, name)) model = getattr(model, name) model = model() except: print 'the name is', name print 'the model name is', model_name raise # 仅在非测试时使用model.decorator decorator = model.decorator if not config.IS_TEST else decorator # 测试时强行使用test_decorator if config.IS_TEST and hasattr(model, 'test_decorator'): assert decorator == [], u'使用test_decorator时,不再允许指定decorator' decorator = model.test_decorator # 装饰decorator for d,arguments in decorator: model = getattr(modeldecorator, d)(model, arguments) if not decorator: CACHED_MODELS[cache_key] = model return model
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def get_orders(self, sql, model=model.Model_OrderSub): results = self.get_all(sql) model_list = [] if not results: return [] for row in results: o_model = model() o_model.order_id = str(row[0]) o_model.uid = str(row[1]) o_model.account = str(row[2]) o_model.p_info = str(row[3]) o_model.depart_date = str(row[4]) o_model.train_no = str(row[5]) o_model.depart_name = str(row[6]) o_model.arrive_name = str(row[7]) o_model.name = str(row[8]) o_model.card_type = str(row[9]) o_model.card_no = str(row[10]) o_model.phone = str(row[11]) o_model.seat_name = str(row[12]) o_model.ticket_type = str(row[13]) o_model.status = str(row[14]) o_model.price = str(row[15]) o_model.create_time = str(row[16]) model_list.append(o_model) return model_list
def test(model, quesfeaShu, labelShu, lengthShu): model.eval() idx = sorted(range(len(lengthShu)), key=lambda x: lengthShu[x], reverse=True) _quesfeaShu = [] _labelShu = [] _lengthShu = [] for j in range(len(idx)): _quesfeaShu.append(quesfeaShu[idx[j]]) _labelShu.append(labelShu[idx[j]]) _lengthShu.append(lengthShu[idx[j]]) questrainarray = np.asarray(_quesfeaShu) labeltrainarray = np.asarray(_labelShu) lengthtrainarray = np.asarray(_lengthShu) tmp = [questrainarray, labeltrainarray, lengthtrainarray] tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp] trques, trlabel, length = tmp if args.cuda: trlabel.cuda() output = model(trques, length) # st(context=27) print("precesion 1 : %s" % accuracy(output.data, trlabel.data, topk=(1,), ori_label=labeltrainarray))
def get_or_create(session, model, **kwargs): instance = session.query(model).filter_by(**kwargs).first() if instance: return instance, False else: instance = model(**kwargs) session.add(instance) return instance, True
def setSiteConfig(name, value): conf_model = model('SiteConfig') assert(name.strip()) exists = conf_model.getOneByWhere('name=%s', [name]) if exists: conf_model.update(exists.id, dict(value=str(value))) return exists.id else: return conf_model.insert(dict(name=name, value=str(value)))
def initialization(self): self.link = os.popen('echo $CONV_ROOT').read() self.link = self.link[:-1] print(self.link) P = model() P.initialization() P.modelAdaptation()
def de(model, baseline_min,baseline_max, max = 100, f = 0.75, cf = 0.3, epsilon = 0.01): curr_candidate_sol = model() # print "FROM DE-->", curr_candidate_sol np = curr_candidate_sol.numOfDec * 10 frontier = [candidate(curr_candidate_sol) for _ in xrange(np)] # for x in frontier: # print "id:", x.id, " have:", x.have, " score:", x.score # print "length of frontier:", len(frontier) # Pending : should you use else if here? for each_thing in frontier: if(each_thing.score < 0): BaseLine.baseline_min = 0 print "--------" if(each_thing.score < BaseLine.baseline_min): BaseLine.baseline_min = each_thing.score print "--------------" if(each_thing.score > BaseLine.baseline_max): BaseLine.baseline_max = each_thing.score print "---------" #Normalize the scores of each thing now # for each_thing in frontier: # prev_each_thing_score = each_thing.score # each_thing.score = float(each_thing.score - BaseLine.baseline_min)/(BaseLine.baseline_max - BaseLine.baseline_min) #total = total score of all the candidates found so far for k in xrange(max): total,n = update(f,cf,frontier,curr_candidate_sol,BaseLine.baseline_min,BaseLine.baseline_max) # print "BASELINE: MIN=", BaseLine.baseline_min," MAX=", BaseLine.baseline_max # if total/n > (1 - epsilon): # print "break: value of k=", k, " total=",total, "n=",n # break # for x in frontier: # print "print --x:",x.id," ",x.have, x.score #Now baseline everything again for each_thing in frontier: each_thing.score = (each_thing.score - BaseLine.baseline_min) / ( BaseLine.baseline_max - BaseLine.baseline_min + 0.001) score_have_dict = { obj.score:obj.have for obj in frontier} print "===================" # for key in sorted(score_have_dict.keys(),reverse = True): # print "%s: %s" % (key, score_have_dict[key]) print "BASELINE: MIN=", BaseLine.baseline_min," MAX=", BaseLine.baseline_max sorted_keys = sorted(score_have_dict.keys(),reverse = True) print "%s: %s" % (sorted_keys[0], score_have_dict[sorted_keys[0]]) return frontier
def getOrCreate(self, model, defaults=None, **kwargs): instance = self.session.query(model).filter_by(**kwargs).first() if instance: return instance #, False else: params = dict((k, v) for k, v in kwargs.iteritems() if not isinstance(v, ClauseElement)) instance = model(**params) self.session.add(instance) return instance #, True
def setSiteConfig(name, value): conf_model = model('SiteConfig') assert(name.strip()) exists = conf_model.getOneByWhere('name=%s', name) if isinstance(value, unicode): value = unicodeToStr(value) if exists: conf_model.update(exists.id, dict(value=str(value))) return exists.id else: return conf_model.insert(dict(name=name, value=str(value)))
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
def eval(data_source): total_L = 0.0 ntotal = 0 hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context) for i in range(0, data_source.shape[0] - 1, args.bptt): data, target = get_batch(data_source, i) output, hidden = model(data, hidden) L = loss(output, target) total_L += mx.nd.sum(L).asscalar() ntotal += L.size return total_L / ntotal
def get_orders(self, sql, model=model.Model_Order): results = self.get_all(sql) model_list = [] if not results: return [] for row in results: o_model = model() o_model.setVale(str(row[0]), str(row[1]), str(row[2]), str(row[3]), str(row[4]), str(row[5]), str(row[6]), str(row[7]), str(row[8]), str(row[9]), str(row[10]), str(row[11])) model_list.append(o_model) return model_list
def valid(epoch, quesfeaShu, labelShu, lengthShu): losses = AverageMeter() top1 = AverageMeter() model.eval() start_time = time.time() for i in range(0, len(quesfeaShu) / args.batch_size): if i == len(quesfeaShu) / args.batch_size - 1: batchend = len(quesfeaShu) else: batchend = (i + 1) * (args.batch_size) # print batchend batchstart = i * (args.batch_size) batch_size = batchend - batchstart quesfeabatch = [] labelbatch = [] lengthbatch = [] quesfeaOri = quesfeaShu[batchstart:batchend] labelOri = labelShu[batchstart:batchend] lengthOri = lengthShu[batchstart:batchend] idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True) for j in range(len(idxbatch)): quesfeabatch.append(quesfeaOri[idxbatch[j]]) labelbatch.append(labelOri[idxbatch[j]]) lengthbatch.append(lengthOri[idxbatch[j]]) questrainarray = np.asarray(quesfeabatch) labeltrainarray = np.asarray(labelbatch) lengthtrainarray = np.asarray(lengthbatch) tmp = [questrainarray, labeltrainarray, lengthtrainarray] tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp] trques, trlabel, length = tmp if args.cuda: trlabel.cuda() output = model(trques, length) # print output loss = criterion(output, trlabel) / (batch_size) prec1, = accuracy(output.data, trlabel.data, topk=(1,), ori_label=labeltrainarray) # label 0 or 1 losses.update(loss.data[0], batch_size) top1.update(prec1[0], batch_size) # loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i) # update better performance model global best_score if top1.avg > best_score: torch.save(model, args.save) print 'save model' best_score = top1.avg print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch)
def pairingStep(self,N,basis_trunc): ########################GENERER COUPLES X = pairs() X.initialization() #A liste qui contient tous les couples de chaque tour a la suite A = X.generatePairs(N) #Calcul nombre de tours if N%2==0: tour = N-1 else: tour = N print 'tours : ', tour #Execution de chaque tour de danse for i in range(1,tour+1): print 'tour', i #On recupere les couples du tour concerne B = X.getPairs(N,A,i) ########################ECRITURE MODELE.INP Y = model() Y.initialization() Y.modesWriting(basis_trunc, B) ########################CONVIV self.pairingConvivExecution() ########################ZPE.OUT TRIE os.system('sort -k6 ' + self.link + '/src/ui/python/work/zpe.out > ' + self.link + '/src/ui/python/work/zpe_trie.out') #Suppression zpe.out sinon les energies des contractions optimales vont etre ecrites a la suite des paires precedentes # os.system('rm ' + self.link + '/src/ui/python/work/zpe.out') ########################CONTRACTIONS OPTIMALES #C liste qui contient les contractions optimales C = X.getOptimalContractions(N) #Ecriture modele.inp Y.contractionsModelWriting() Y.modesWriting(basis_trunc, C) self.pairingConvivExecution() os.system('sort -k6 ' + self.link + '/src/ui/python/work/zpe.out > ' + self.link + '/src/ui/python/work/zpe_trie.out' + str(self.getStep())) os.system('rm ' + self.link + '/src/ui/python/work/zpe.out') os.system('rm ' + self.link + '/src/ui/python/work/zpe_trie.out') os.system('cp ' + self.link + '/src/ui/python/work/out ' + self.link + '/src/ui/python/work/out' + str(self.getStep()))
def get_min_max(model): min = 999999 max = -999999 for x in xrange(2000): temp_candidate_sol = model() temp_score = score(temp_candidate_sol) if(temp_score > max): max = temp_score if(temp_score < min): min = temp_score return (min,max)
def train(model, train_loader, loss_fn, optimizer, logger, print_every=20, USE_CUDA=True): '''训练一个epoch,即将整个训练集跑一次 Args: model : 定义的网络模型 train_loader : 加载训练集的类对象 loss_fn : 损失函数,此处为CTCLoss optimizer : 优化器类对象 logger : 日志类对象 print_every : 每20个batch打印一次loss USE_CUDA : 是否使用GPU Returns: average_loss : 一个epoch的平均loss ''' model.train() total_loss = 0 print_loss = 0 i = 0 for data in train_loader: inputs, targets, input_sizes, input_sizes_list, target_sizes = data batch_size = inputs.size(0) inputs = inputs.transpose(0, 1) inputs = Variable(inputs, requires_grad=False) input_sizes = Variable(input_sizes, requires_grad=False) targets = Variable(targets, requires_grad=False) target_sizes = Variable(target_sizes, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list) out = model(inputs) loss = loss_fn(out, targets, input_sizes, target_sizes) loss /= batch_size print_loss += loss.data[0] if (i + 1) % print_every == 0: print('batch = %d, loss = %.4f' % (i+1, print_loss / print_every)) logger.debug('batch = %d, loss = %.4f' % (i+1, print_loss / print_every)) print_loss = 0 total_loss += loss.data[0] optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm(model.parameters(), 400) optimizer.step() i += 1 average_loss = total_loss / i print("Epoch done, average loss: %.4f" % average_loss) logger.info("Epoch done, average loss: %.4f" % average_loss) return average_loss
def train(epoch, optimizer, quesfeaShu, labelShu, lengthShu): losses = AverageMeter() top1 = AverageMeter() model.train() for i in range(0, len(quesfeaShu) / args.batch_size): if i == len(quesfeaShu) / args.batch_size - 1: batchend = len(quesfeaShu) else: batchend = (i + 1) * (args.batch_size) batchstart = i * (args.batch_size) batch_size = batchend - batchstart quesfeabatch = [] labelbatch = [] lengthbatch = [] quesfeaOri = quesfeaShu[batchstart:batchend] labelOri = labelShu[batchstart:batchend] lengthOri = lengthShu[batchstart:batchend] idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True) for j in range(len(idxbatch)): quesfeabatch.append(quesfeaOri[idxbatch[j]]) labelbatch.append(labelOri[idxbatch[j]]) lengthbatch.append(lengthOri[idxbatch[j]]) questrainarray = np.asarray(quesfeabatch) labeltrainarray = np.asarray(labelbatch) lengthtrainarray = np.asarray(lengthbatch) tmp = [questrainarray, labeltrainarray, lengthtrainarray] tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp] trques, trlabel, length = tmp if args.cuda: trlabel.cuda() output = model(trques, length) loss = criterion(output, trlabel) / (batch_size) prec1, = accuracy(output.data, trlabel.data, topk=(1,)) losses.update(loss.data[0], batch_size) top1.update(prec1[0], batch_size) optimizer.zero_grad() loss.backward() optimizer.step() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) print str(top1.avg) + ' ' + str(top1.val) + ' ' + str(loss.data[0]) + ' ' + 'batch ' + str(i) print str(top1.avg) + ' ' + str(top1.val) + ' ' + str(loss.data[0]) + ' ' + 'epoch ' + str(epoch)
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def eval(data_source): total_L = 0.0 ntotal = 0 hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context) for i, (data, target) in enumerate(data_source): data = data.as_in_context(context).T target = target.as_in_context(context).T.reshape((-1, 1)) output, hidden = model(data, hidden) L = loss(output, target) total_L += mx.nd.sum(L).asscalar() ntotal += L.size return total_L / ntotal
def dev(model, dev_loader, loss_fn, decoder, logger, USE_CUDA=True): '''验证集的计算过程,与train()不同的是不需要反向传播过程,并且需要计算字符正确率 Args: model : 模型 dev_loader : 加载验证集的类对象 loss_fn : 损失函数 decoder : 解码类对象,即将网络的输出解码成文本 logger : 日志类对象 USE_CUDA : 是否使用GPU Returns: acc * 100 : 字符正确率,如果space不是一个标签的话,则为词正确率 average_loss : 验证集的平均loss ''' model.eval() total_cer = 0 total_tokens = 0 total_loss = 0 i = 0 for data in dev_loader: inputs, targets, input_sizes, input_sizes_list, target_sizes = data batch_size = inputs.size(0) inputs = inputs.transpose(0, 1) inputs = Variable(inputs, requires_grad=False) input_sizes = Variable(input_sizes, requires_grad=False) targets = Variable(targets, requires_grad=False) target_sizes = Variable(target_sizes, requires_grad=False) if USE_CUDA: inputs = inputs.cuda() inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list) out, probs = model(inputs, dev=True) loss = loss_fn(out, targets, input_sizes, target_sizes) loss /= batch_size total_loss += loss.data[0] probs = probs.data.cpu() targets = targets.data target_sizes = target_sizes.data if decoder.space_idx == -1: total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[1] else: total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[0] total_tokens += sum(target_sizes) i += 1 acc = 1 - float(total_cer) / total_tokens average_loss = total_loss / i return acc * 100, average_loss
def add_models(self, line_array): mus = [float(m) for m in line_array[3].split(",")] sis = [float(m) for m in line_array[4].split(",")] lams = [float(m) for m in line_array[5].split(",")] pis = [float(m) for m in line_array[6].split(",")] wsEMG = [float(m) for m in line_array[7].split(",")] wsF = [float(m) for m in line_array[8].split(",")] wsR = [float(m) for m in line_array[9].split(",")] bF = [float(m) for m in line_array[10].split(",")] bR = [float(m) for m in line_array[11].split(",")] for i, mu in enumerate(mus): self.models.append(model(mu, sis[i], lams[i], pis[i], wsEMG[i], wsF[i], wsR[i], bF[i], bR[i] )) self.OKAY = bool(sum([1 for m in self.models if not m.OKAY ]) == 0)
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def get_orders(self, sql, model=model.Model_Order): results = self.get_all(sql) model_list = [] if not results: return [] for row in results: value_list = [] for value in row: value_list.append(value) o_model = model() o_model.setValue(value_list) model_list.append(o_model) return model_list
def valid(epoch, quesfeaShu, labelShu, lengthShu): top1 = AverageMeter() model.eval() for i in range(0, len(quesfeaShu) / args.batch_size): if i == len(quesfeaShu) / args.batch_size - 1: batchend = len(quesfeaShu) else: batchend = (i + 1) * (args.batch_size) # print batchend batchstart = i * (args.batch_size) batch_size = batchend - batchstart quesfeabatch = [] labelbatch = [] lengthbatch = [] quesfeaOri = quesfeaShu[batchstart:batchend] labelOri = labelShu[batchstart:batchend] lengthOri = lengthShu[batchstart:batchend] idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True) for j in range(len(idxbatch)): quesfeabatch.append(quesfeaOri[idxbatch[j]]) labelbatch.append(labelOri[idxbatch[j]]) lengthbatch.append(lengthOri[idxbatch[j]]) questrainarray = np.asarray(quesfeabatch) labeltrainarray = np.asarray(labelbatch) lengthtrainarray = np.asarray(lengthbatch) tmp = [questrainarray, labeltrainarray, lengthtrainarray] tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp] trques, trlabel, length = tmp if args.cuda: trlabel.cuda() output = model(trques, length) loss = criterion(output, trlabel) / (batch_size) prec1, = accuracy(output.data, trlabel.data, topk=(1,)) top1.update(prec1[0], batch_size) print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i) global best_score if top1.avg > best_score: torch.save(model, args.save) print 'save model' best_score = top1.avg print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch)
pad_paths(paths_train[batch_start:batch_end], max_paths, max_edges)).to(device) counts = torch.DoubleTensor( pad_counts(counts_train[batch_start:batch_end], max_paths)).to(device) edgecounts = torch.LongTensor( pad_edgecounts(num_edges_all[batch_start:batch_end], max_paths)).to(device) targets = torch.LongTensor( targets_train[batch_start:batch_end]).to(device) # Backprop and perform Adam optimisation optimizer.zero_grad() # Run the forward pass outputs = model(nodes, paths, counts, edgecounts, max_paths, max_edges) loss = criterion(outputs, targets) loss.backward() optimizer.step() all_losses.append(loss.item()) print("Epoch: {}/{} Mean Loss: {}".format(epoch, num_epochs, np.mean(all_losses))) print("Training Complete!") model_dict = model.state_dict() model_dict = {
shape=[FLAGS.batch_size, FLAGS.noise_size], name='z1') z2 = tf.placeholder(tf.float32, shape=[FLAGS.batch_size, FLAGS.noise_size], name='z2') with tf.variable_scope('embedding', reuse=False): embeding_matrix = tf.Variable(tf.random_normal( [FLAGS.class_num, FLAGS.noise_size], stddev=0.35), trainable=True) onehot = tf.one_hot(input_label, FLAGS.class_num) label_embeding = tf.matmul(onehot, embeding_matrix) embedded_z1 = tf.multiply(z1, label_embeding) embedded_z2 = tf.multiply(z2, label_embeding) Net = model(embedded_z1, embedded_z2, input_spe, input_spa, input_label, FLAGS) # Add scalar summary tf.summary.scalar('discriminator_loss', Net.discrim_loss) # tf.summary.scalar('spectral_loss', Net.spectral_loss) tf.summary.scalar('learning_rate_dis', Net.learning_rate_dis) tf.summary.scalar('learning_rate_gen', Net.learning_rate_gen) tf.summary.scalar('gen_loss', Net.gen_loss) print('Finish building the network!!!') # Define the saver and weight initiallizer saver = tf.train.Saver(max_to_keep=10) # Start the session config = tf.ConfigProto(log_device_placement=True) config.gpu_options.allow_growth = True
def test( args ): # Load a trained model that you have fine-tuned (we assume evaluate on cpu) processor = data_utils.ABSAProcessor() label_list = processor.get_labels(args.task_type) label_list_map = dict(zip([i for i in range(len(label_list))], label_list)) tokenizer = ABSATokenizer.from_pretrained( modelconfig.MODEL_ARCHIVE_MAP[args.bert_model]) eval_examples = processor.get_test_examples(args.data_dir, args.task_type) eval_features = data_utils.convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) # all_tag_ids = torch.tensor([f.tag_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids) # Run prediction for full data and get a prediction file eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) model = ABSABert.from_pretrained( modelconfig.MODEL_ARCHIVE_MAP[args.bert_model], num_labels=len(label_list)) model.load_state_dict(torch.load(os.path.join(args.output_dir, "model.pt"))) model.cuda() model.eval() preds = None out_label_ids = None all_mask = [] for step, batch in enumerate(eval_dataloader): batch = tuple(t.cuda() for t in batch) input_ids, segment_ids, input_mask, label_ids = batch with torch.no_grad(): logits = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask) all_mask.append(input_mask) logits = [[np.argmax(i) for i in l.detach().cpu().numpy()] for l in logits] if preds is None: if type(logits) == list: preds = logits else: preds = logits.detach().cpu().numpy() out_label_ids = label_ids.detach().cpu().numpy() else: if type(logits) == list: preds = np.append(preds, np.asarray(logits), axis=0) else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, label_ids.detach().cpu().numpy(), axis=0) out_label_ids = out_label_ids.tolist() preds = preds.tolist() all_mask = torch.cat(all_mask, dim=0) all_mask = all_mask.tolist() # get rid of padding new_label_ids, new_preds = [], [] for i in range(len(all_mask)): l = sum(all_mask[i]) new_preds.append(preds[i][:l]) new_label_ids.append(out_label_ids[i][:l]) new_label_ids = [t[1:-1] for t in new_label_ids] new_preds = [t[1:-1] for t in new_preds] preds, out_label_ids = new_preds, new_label_ids output_eval_json = os.path.join(args.output_dir, "predictions.json") with open(output_eval_json, "w") as fw: assert len(preds) == len(eval_examples) recs = {} for qx, ex in enumerate(eval_examples): recs[int(ex.guid.split("-")[1])] = { "sentence": ex.text_a, "idx_map": ex.idx_map, "logit": preds[qx] } # skip the [CLS] tag. raw_X = [ recs[qx]["sentence"] for qx in range(len(eval_examples)) if qx in recs ] idx_map = [ recs[qx]["idx_map"] for qx in range(len(eval_examples)) if qx in recs ] for i in range(len(preds)): assert len(preds[i]) == len(out_label_ids[i]), print( len(preds[i]), len(out_label_ids[i]), idx_map[i]) tokens_list = [] for text_a in raw_X: tokens_a = [] for t in [token.lower() for token in text_a]: tokens_a.extend(tokenizer.wordpiece_tokenizer.tokenize(t)) tokens_list.append(tokens_a[:args.max_seq_length - 2]) pre = [ ' '.join([ label_list_map.get(p, -1) for p in l[:args.max_seq_length - 2] ]) for l in preds ] true = [ ' '.join([ label_list_map.get(p, -1) for p in l[:args.max_seq_length - 2] ]) for l in out_label_ids ] for i in range(len(true)): assert len(tokens_list[i]) == len(true[i].split()), print( len(tokens_list[i]), len(true[i].split()), tokens_list[i], true[i]) lines = [ ' '.join([str(t) for t in tokens_list[i]]) + '***' + pre[i] + '***' + true[i] for i in range(len(pre)) ] with open(os.path.join(args.output_dir, 'pre.txt'), 'w') as fp: fp.write('\n'.join(lines)) logger.info("Train data from: {}".format(args.data_dir)) logger.info("Out dir: {}".format(args.output_dir)) if args.task_type == 'ae': eval_result(args.output_dir) else: eval_ts_result(args.output_dir)
def train_mdnet(): ## Init dataset ## with open(data_path, 'rb') as fp: data = pickle.load(fp) K = len(data) dataset = [None] * K for k, (seqname, seq) in enumerate(data.items()): img_list = seq['images'] gt = seq['gt'] img_dir = os.path.join(img_home, seqname) dataset[k] = RegionDataset(img_dir, img_list, gt, opts) ## Init model ## model = MDNet(opts['init_model_path'], K) if opts['use_gpu']: model = model.cuda() model.set_learnable_params(opts['ft_layers']) ## Init criterion and optimizer ## criterion = BinaryLoss() evaluator = Precision() optimizer = set_optimizer(model, opts['lr']) best_prec = 0. for i in range(opts['n_cycles']): print("==== Start Cycle %d ====" % (i)) k_list = np.random.permutation(K) prec = np.zeros(K) for j, k in enumerate(k_list): tic = time.time() pos_regions, neg_regions = next(dataset[k]) pos_regions = Variable(pos_regions) neg_regions = Variable(neg_regions) if opts['use_gpu']: pos_regions = pos_regions.cuda() neg_regions = neg_regions.cuda() pos_score = model(pos_regions, k) neg_score = model(neg_regions, k) loss = criterion(pos_score, neg_score) model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) optimizer.step() prec[k] = evaluator(pos_score, neg_score) toc = time.time() - tic print("Cycle %2d, K %2d (%2d), Loss %.3f, Prec %.3f, Time %.3f" % \ (i, j, k, loss.data[0], prec[k], toc)) cur_prec = prec.mean() print("Mean Precision: %.3f" % (cur_prec)) if cur_prec > best_prec: best_prec = cur_prec if opts['use_gpu']: model = model.cpu() states = {'shared_layers': model.layers.state_dict()} print("Save model to %s" % opts['model_path']) torch.save(states, opts['model_path']) if opts['use_gpu']: model = model.cuda()
data_loader = torch.utils.data.DataLoader(dst, batch_size=2, shuffle=True) model = R2U_Net(img_ch=3, output_ch=34, t=2) model.load_state_dict(torch.load('epochs-4.pt')) model.eval() model.cuda() torch.manual_seed(10) val = iter(data_loader) for e in range(10): fig = plt.figure(figsize=(10, 10)) image, mask = next(val) image = image.cuda() mask = mask.cuda().detach().cpu() preds = model(image) preds = preds.detach().cpu() image = image.cpu() fig1 = fig.add_subplot(131) plt.imshow(image[0].transpose(0, 2).transpose(0, 1).numpy()) fig1.title.set_text("Image") fig1.axis("off") fig2 = fig.add_subplot(132) plt.imshow(mask[0].transpose(0, 2).transpose(0, 1).numpy()) fig2.title.set_text("Ground_Truth") fig2.axis("off") fig3 = fig.add_subplot(133) plt.imshow(preds.argmax(1)[0].numpy()) fig3.title.set_text("Prediction") fig3.axis("off") plt.show()
def check_condition(self, period): model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\ self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\ self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn) model = model.to(self.args.device) if self.args.load_model: model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt')) if self.args.data_dir == './data/ml-1m': dataloader = ItemRatingLoader(self.args.data_dir) elif self.args.data_dir == './data/amazon': dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0]) # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_test_tr.csv'), os.path.join(self.args.data_dir, 'fixed2_test_te.csv')) tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'test_tr.csv'), os.path.join(self.args.data_dir, 'test_te.csv')) N = tr_data_rating.shape[0] idxlist = np.array(range(N)) # print(N) np.random.seed(98764) idx_pe = np.random.permutation(len(idxlist)) idxlist = idxlist[idx_pe] # print(idxlist[:self.args.batch_size]) # if self.args.condition: valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'test', self.args.batch_size, idx_pe) with torch.no_grad(): for i, st_idx in enumerate(range(0, N, self.args.batch_size)): order, item_feature, label = next(valid_data_item) end_idx = min(st_idx + self.args.batch_size, N) x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]] X_tr = x_tr_unorder[order] x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]] X_te = x_te_unorder[order] # print(label.item()) if not label.item() == period: continue else: if sparse.isspmatrix(X_tr): X_tr = X_tr.toarray() X_tr = X_tr.astype('float32') for k in range(self.args.class_num): hidden = "h_{}".format(k+1) self.hidden_vecs[hidden]= torch.load(f"{self.args.hiddenvec_dir}/{hidden}.pt") hs = {} for _ in range(self.args.batch_size): for k in range(self.args.class_num): hidden = "h_{}".format(k+1) hs.setdefault(hidden, []).append(self.hidden_vecs[hidden].unsqueeze(0)) ndcg_result={} for j in range(self.args.class_num): hidden = "h_{}".format(j+1) hv = torch.cat(hs[hidden],0) model_input = (torch.FloatTensor(X_tr).to(self.args.device), hv) recon, _, _ = model(model_input) topk = show_recommended_items(recon.cpu().detach().numpy(), k=50) with open(f'./result/amazon/qual/topk{j}.pkl', 'wb') as f: pickle.dump(topk, f) recon[X_tr.nonzero()] = -np.inf nd_name = "ndcg_list50_{}".format(j+1) ndcg_result.setdefault(nd_name, []).append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50)) ndcg_final_result={} for j in range(self.args.class_num): nd_name = "ndcg_list50_{}".format(j+1) ndcg_final_result[nd_name] = np.concatenate(ndcg_result[nd_name]) return ndcg_final_result
def predictFull(img, model): model_out = model(imgTensor(img)[None]) softmax = torch.nn.Softmax(dim=1) soft_out = softmax(model_out) return soft_out
def run_mdnet(img_list_v, img_list_i, init_bbox, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False): # Init bbox target_bbox = np.array(init_bbox) result = np.zeros((len(img_list_v), 4)) result_bb = np.zeros((len(img_list_v), 4)) result[0] = np.copy(target_bbox) result_bb[0] = np.copy(target_bbox) iou_result = np.zeros((len(img_list_v), 1)) # execution time array exec_time_result = np.zeros((len(img_list_v), 1)) # Init model model = MDNet(opts['model_path']) if opts['adaptive_align']: align_h = model.roi_align_model.aligned_height align_w = model.roi_align_model.aligned_width spatial_s = model.roi_align_model.spatial_scale model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) if opts['use_gpu']: model = model.cuda() model.set_learnable_params(opts['ft_layers']) # Init image crop model img_crop_model = imgCropper(1.) if opts['use_gpu']: img_crop_model.gpuEnable() # Init criterion and optimizer criterion = BinaryLoss() init_optimizer = set_optimizer(model, opts['lr_init']) update_optimizer = set_optimizer(model, opts['lr_update']) tic = time.time() # Load first image cur_image_v = Image.open(img_list_v[0]).convert('RGB') cur_image_v = np.asarray(cur_image_v) cur_image_i = Image.open(img_list_i[0]).convert('RGB') cur_image_i = np.asarray(cur_image_i) init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0] + init_bbox[2]), int(init_bbox[1]):int(init_bbox[1] + init_bbox[3]), :] init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0] + init_bbox[2]), int(init_bbox[1]):int(init_bbox[1] + init_bbox[3]), :] # Draw pos/neg samples ishape = cur_image_v.shape pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init']) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts['jitter']: ## horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4. jitter_scale_horizon = 1. ## vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4. jitter_scale_vertical = 1. jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1**(-1) ## vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1**(1) ## scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1**(-2) ## scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1**(2) scene_boxes = np.concatenate([ scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2 ], axis=0) jitter_scale = [ 1., jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2 ] else: jitter_scale = [1.] model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[bidx] cropped_image_v, cur_image_var_v = img_crop_model.crop_image( cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image_v = cropped_image_v - 128. cropped_image_i, cur_image_var_i = img_crop_model.crop_image( cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image_i = cropped_image_i - 128. feat_map_v, feat_map_i, fused_feats = model(cropped_image_v, cropped_image_i, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() # pdb.set_trace() cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois) # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois) # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone() ## bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable( torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois) # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats ##bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) ##bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate( (bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts['n_pos_init']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :] if neg_feats.size(0) > opts['n_neg_init']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] ##bbreg if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] #print bbreg_examples.shape # init_target_feats = pos_feats[:400] ## open images and crop patch from obj extra_obj_size = np.array((opts['img_size'], opts['img_size'])) extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(target_bbox) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[ 0:2] + extra_scene_box[2:4] / 2. extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) extra_scene_box[ 0:2] = extra_scene_box_center - extra_scene_box_size / 2. extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] cur_extra_cropped_image_v, _ = img_crop_model.crop_image( cur_image_v, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach() cur_extra_cropped_image_i, _ = img_crop_model.crop_image( cur_image_i, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach() # extra_target_bbox = np.array(list(map(int, extra_target_bbox))) cur_extra_pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), extra_target_bbox, opts['n_pos_init'] // replicateNum, opts['overlap_pos_init']) cur_extra_neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1), extra_target_bbox, opts['n_neg_init'] / replicateNum // 4, opts['overlap_neg_init']) ##bbreg sample cur_extra_bbreg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts['n_bbreg'] / replicateNum // 4, opts['overlap_bbreg'], opts['scale_bbreg']) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0) cur_extra_pos_rois = samples2maskroi( cur_extra_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0) cur_extra_neg_rois = samples2maskroi( cur_extra_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) ## bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat(np.reshape( extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0) cur_extra_bbreg_rois = samples2maskroi( cur_extra_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_bbreg_rois = np.concatenate( (batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image_v = cur_extra_cropped_image_v extra_cropped_image_i = cur_extra_cropped_image_i extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) ##bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image_v = torch.cat( (extra_cropped_image_v, cur_extra_cropped_image_v), dim=0) extra_cropped_image_i = torch.cat( (extra_cropped_image_i, cur_extra_cropped_image_i), dim=0) extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) ##bbreg rois extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable( torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() extra_neg_rois = Variable( torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() ##bbreg rois extra_bbreg_rois = Variable( torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() extra_cropped_image_v -= 128. extra_cropped_image_i -= 128. # pdb.set_trace() for iidxxx in range(replicateNum): temp_extra_cropped_image_v = torch.unsqueeze( extra_cropped_image_v[iidxxx], dim=0) temp_extra_cropped_image_i = torch.unsqueeze( extra_cropped_image_i[iidxxx], dim=0) temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model( temp_extra_cropped_image_v, temp_extra_cropped_image_i, out_layer='conv3') temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0) # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0) if iidxxx == 0: extra_feat_maps = torch.zeros(replicateNum, temp_extra_feat_maps.shape[0], temp_extra_feat_maps.shape[1], temp_extra_feat_maps.shape[2]) # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2]) extra_feat_maps[iidxxx] = temp_extra_feat_maps # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i extra_feat_maps = extra_feat_maps.cuda() # Draw pos/neg samples ishape = cur_image_v.shape # pdb.set_trace() extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() ##bbreg feat extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() ## concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) ## concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() model.zero_grad() # Initial training train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) ##bbreg train if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(range(bbreg_feats.size(0))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] bbreg = BBRegressor((ishape[1], ishape[0])) bbreg.train(bbreg_feats, bbreg_examples, target_bbox) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats_all = [ pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) ] if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats_all = [ neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) ] spf_total = time.time() - tic # Display savefig = savefig_dir != '' if display or savefig: dpi = 80.0 figsize = (cur_image_v.shape[1] / dpi, cur_image_v.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(cur_image_v) if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi) ##################################################################### #### Main loop ##################################################################### failure_count = 0 trans_f = opts['trans_f'] for i in range(1, len(img_list_v)): tic = time.time() # Load image cur_image_v = Image.open(img_list_v[i]).convert('RGB') cur_image_v = np.asarray(cur_image_v) cur_image_i = Image.open(img_list_i[i]).convert('RGB') cur_image_i = np.asarray(cur_image_i) # Estimate target bbox ishape = cur_image_v.shape samples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples']) ######################################################################### #### Target-Aware Attention Prediction ######################################################################### attention_path = "/daTANet_rgbt_234_Attention/" + seq + "/" attentionImage_name = str(i + 1) + "_attentionMap.jpg" # pdb.set_trace() attentionFlag = os.path.exists(attention_path + attentionImage_name) # print("==>> attentionFlag ", attentionFlag) if failure_count >= 6 and attentionFlag: attentionMap = Image.open(attention_path + attentionImage_name).convert('RGB') attentionMap = np.asarray(attentionMap) # pdb.set_trace() dynamic_atttentonMAP = cv2.resize( attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]), interpolation=cv2.INTER_LINEAR) ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100, 255, cv2.THRESH_BINARY) # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP) # pdb.set_trace() label_image = measure.label(static_atttentonMAP) props = measure.regionprops(label_image) atttenton_BBox = [] attention_centerLoc = [] similarity_glob_target_max = 0 global_samples = [] #### for each candidate search region # for iii in range(len(props)): if len(props) > 1: attNum = 1 else: attNum = len(props) for iii in range(attNum): center_position = props[iii].centroid center_position = [ int(center_position[1]), int(center_position[0]) ] centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2 centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2 if math.fabs(center_position[0] - centerPos_prev_x) < 30 and math.fabs( center_position[1] - centerPos_prev_y) < 30: bbox = props[iii].bbox new_bbox2 = np.zeros((4)) new_bbox2[0] = center_position[0] - target_bbox[2] / 2 new_bbox2[1] = center_position[1] - target_bbox[3] / 2 new_bbox2[2] = target_bbox[2] new_bbox2[3] = target_bbox[3] # if new_bbox[2] > 10 and new_bbox[3] > 10: # switch_candidate_samples2 = sample_generator(new_bbox2, 100) switch_samples2 = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), new_bbox2, 256) # global_samples.append(switch_samples2) # pdb.set_trace() # samples = np.concatenate((switch_samples2, samples)) samples = switch_samples2 # print("==>> Using Global Proposals and samples: ", samples.shape[0]) # samples = np.concatenate((switch_samples2, samples)) padded_x1 = (samples[:, 0] - samples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (samples[:, 1] - samples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (samples[:, 0] + samples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (samples[:, 1] + samples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.asarray( (padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) if padded_scene_box[0] > cur_image_v.shape[1]: padded_scene_box[0] = cur_image_v.shape[1] - 1 if padded_scene_box[1] > cur_image_v.shape[0]: padded_scene_box[1] = cur_image_v.shape[0] - 1 if padded_scene_box[0] + padded_scene_box[2] < 0: padded_scene_box[2] = -padded_scene_box[0] + 1 if padded_scene_box[1] + padded_scene_box[3] < 0: padded_scene_box[3] = -padded_scene_box[1] + 1 crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') cropped_image_v, cur_image_var_v = img_crop_model.crop_image( cur_image_v, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image_v = cropped_image_v - 128. cropped_image_i, cur_image_var_i = img_crop_model.crop_image( cur_image_i, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image_i = cropped_image_i - 128. model.eval() feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') # relative target bbox with padded_scene_box rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= padded_scene_box[0:2] # Extract sample features and get target location batch_num = np.zeros((samples.shape[0], 1)) sample_rois = np.copy(samples) sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'], opts['img_size']), target_bbox[2:4], opts['padding']) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy( sample_rois.astype('float32'))).cuda() sample_feats = model.roi_align_model(feat_map, sample_rois) sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() sample_scores = model(sample_feats, sample_feats, in_layer='fc4') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.data.cpu().numpy() target_score = top_scores.data.mean() target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # # Expand search area at failure if success: trans_f = opts['trans_f'] else: trans_f = opts['trans_f_expand'] ## Bbox regression if success: bbreg_feats = sample_feats[top_idx, :] bbreg_samples = samples[top_idx] bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) if failure_count >= 3: failure_count = failure_count - 3 else: failure_count = 0 else: bbreg_bbox = target_bbox failure_count = failure_count + 1 # Save result result[i] = target_bbox result_bb[i] = bbreg_bbox iou_result[i] = 1. # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) neg_examples = gen_samples( SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.] for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bbox[2:4]) ).astype('int64') * jitter_scale[bidx] cropped_image_v, cur_image_var_v = img_crop_model.crop_image( cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image_v = cropped_image_v - 128. cropped_image_i, cur_image_var_i = img_crop_model.crop_image( cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image_i = cropped_image_i - 128. feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') rel_target_bbox = np.copy(target_bbox) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape( scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi( cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable( torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape( scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi( cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable( torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats ##index select neg_feats = cur_neg_feats else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(range(pos_feats.size(0))) np.random.shuffle(pos_idx) pos_feats = pos_feats.index_select( 0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(range(neg_feats.size(0))) np.random.shuffle(neg_idx) neg_feats = neg_feats.index_select( 0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) pos_feats_all.append(pos_feats) neg_feats_all.append(neg_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.stack(pos_feats_all, 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) spf = time.time() - tic spf_total += spf # Display if display or savefig: im.set_data(cur_image_v) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if display: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)), dpi=dpi) if opts['visual_log']: if gt is None: print("Frame %d/%d, Score %.3f, Time %.3f" % \ (i, len(img_list), target_score, spf)) else: print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)) print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0] fps = len(img_list_v) / spf_total # pdb.set_trace() # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total)) return iou_result, result_bb, fps, result
def test(model, graph, idx, labels): model.eval() pred = model(graph, 'paper')[idx].max(1)[1].cpu() acc = (pred == labels[idx]).float().mean() return acc
def train_model(dataloaders, model, criterion, optimizer, scheduler, num_epochs, save_epoch, save_name='model', save_path='./pkl'): isReduceLROnPlateau = False if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau): isReduceLROnPlateau = True since = time.time() best_model_wts = None best_loss = float("inf") trainLoss = [] valLoss = [] lrs = [] epochs = [] plt.ion() for epoch in range(1, num_epochs + 1): epochs += [epoch] lrs += [optimizer.param_groups[0]['lr']] # train: model.train() running_loss = 0.0 data_size = 0 for inputs, labels in dataloaders['train']: inputs = inputs.to(device) labels = labels.to(device) optimizer.zero_grad() torch.set_grad_enabled(True) outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # statistics data_size += inputs.size(0) running_loss += loss.item() * inputs.size( 0) # 本次Iterate*样本数=本次的总样本loss(防止最后一个batch大小不同,或train与val的不同) epoch_loss = running_loss / data_size # 一个epoch的平均loss trainLoss += [epoch_loss] # validation: model.eval() running_loss = 0.0 data_size = 0 for inputs, labels in dataloaders['val']: inputs = inputs.to(device) labels = labels.to(device) torch.set_grad_enabled(False) outputs = model(inputs) loss = criterion(outputs, labels) # statistics data_size += inputs.size(0) running_loss += loss.item() * inputs.size( 0) # 本次Iterate*样本数=本次的总样本loss(防止最后一个batch大小不同,或train与val的不同) epoch_loss = running_loss / data_size # 一个epoch的平均loss valLoss += [epoch_loss] # auto update lr if scheduler: if isReduceLROnPlateau: scheduler.step(epoch_loss) else: scheduler.step() # show each epoch if args.show_each_epoch: print('Epoch {}/{}\n{}'.format(epoch, num_epochs, '-' * 10)) print( 'train_loss: {:.4f}\n val_loss: {:.4f}\nlearning_rate: {:.4f}\n' .format(trainLoss[-1], valLoss[-1], optimizer.param_groups[0]['lr'])) # 一个epoch更新 # deep copy the model(lav loss) if valLoss[-1] < best_loss: best_loss = valLoss[-1] best_model_wts = copy.deepcopy(model.state_dict()) if not os.path.exists(save_path): os.makedirs(save_path) torch.save( model, '{}/{}_{}-trainLoss_{:.4f}-valLoss_{:.4f}.pkl'.format( save_path, save_name, epoch, trainLoss[-1], valLoss[-1])) # printHistory(epochs,trainLoss,valLoss,lrs) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Loss: {:4f}'.format(best_loss)) # load best model weights model.load_state_dict(best_model_wts) if not os.path.exists('{}/best/'.format(save_path)): os.makedirs('{}/best/'.format(save_path)) torch.save(model, '{}/best/{}.pkl'.format(save_path, save_name)) return model
def train(total_iters=0): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = args.data_size hidden = init_hidden(args.batch_size) curr_loss = 0. for i, batch in enumerate(train_data): data, targets, reset_mask = get_batch(batch) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. output, hidden = model(data, reset_mask=reset_mask) loss = criterion(output.view(-1, ntokens).contiguous().float(), targets.view(-1).contiguous()) optim.zero_grad() if args.fp16: optim.backward(loss) else: loss.backward() total_loss += loss.data.float() # clipping gradients helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip > 0: if not args.fp16: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) else: optim.clip_fp32_grads(clip=args.clip) optim.step() # step learning rate and log training progress lr = LR.get_lr()[0] if not args.fp16: LR.step() else: # if fp16 optimizer skips gradient step due to explosion do not step lr if not optim.overflow: LR.step() if i % args.log_interval == 0 and i > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:.2E} | ms/batch {:.3E} | \ loss {:.2E} | ppl {:8.2f} | loss scale {:8.2f}'.format( epoch, i, len(train_data), lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(min(cur_loss, 20)), args.loss_scale if not args.fp16 else optim.loss_scale ) ) total_loss = 0 start_time = time.time() sys.stdout.flush() # save current model progress. If distributed only save from worker 0 if args.save_iters and total_iters % (args.save_iters) == 0 and total_iters > 0 and args.rank < 1: if args.rank < 1: with open(os.path.join(os.path.splitext(args.save)[0], 'e%s.pt'%(str(total_iters),)), 'wb') as f: torch.save(model.state_dict(), f) torch.cuda.synchronize() total_iters += 1 return cur_loss
def test_testset(self): if self.args.condition: for k in range(self.args.class_num): hidden = "h_{}".format(k+1) self.hidden_vecs[hidden]= torch.load(f"{self.args.hiddenvec_dir}/{hidden}.pt") if self.args.baseline == 'DAE': p_dims = self.args.dims # p_dims = [self.args.latent_size, self.args.hidden_size_vae, self.args.input_size_vae] model = MultiDAE(p_dims) elif self.args.baseline == 'HPrior': model = HPrior_VAE(self.args.dims, self.args.hidden_size_rnn, self.args.dataset,\ self.args.input_size_rnn, self.args.activation) elif self.args.baseline == 'MF': model = MF(6040, 3355, 100) else: model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\ self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\ self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn, self.args.condition_size) model = model.to(self.args.device) model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt')) model.eval() if self.args.data_dir == './data/ml-1m': dataloader = ItemRatingLoader(self.args.data_dir) if self.args.baseline == 'MF': tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed2_test_te.csv'), 1) N = 177 * self.args.batch_size else:#fixed_valid / fixed_test # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_test_tr.csv'), \ # os.path.join(self.args.data_dir, 'fixed2_test_te.csv')) tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed_valid_tr.csv'), \ os.path.join(self.args.data_dir, 'fixed_valid_te.csv')) elif self.args.data_dir == './data/amazon' or self.args.data_dir == './data/amazon_min20_woman' or self.args.data_dir =='./data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman': dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0]) # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'test_tr.csv'),\ # os.path.join(self.args.data_dir, 'test_te.csv')) tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'valid_tr.csv'),\ os.path.join(self.args.data_dir, 'valid_te.csv')) if not self.args.baseline == 'MF': N = tr_data_rating.shape[0] idxlist = np.array(range(N)) np.random.seed(98765) idx_pe = np.random.permutation(len(idxlist)) idxlist = idxlist[idx_pe] if self.args.condition or self.args.baseline == 'HPrior': valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe) # valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'test', self.args.batch_size, idx_pe) if self.args.test_hidden == 'fixed': tr_data_hidden = dataloader.fixed_hidden(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe) with torch.no_grad(): r20_list, r50_list,r5_list,r10_list, r100_list, ndcg_list50, ndcg_list5,ndcg_list10, ndcg_list100, ndcg_list150, auc_list = [], [], [], [], [], [],[], [], [], [], [] for i, st_idx in enumerate(range(0, N, self.args.batch_size)): if self.args.condition or self.args.baseline == 'HPrior': order, item_feature, label = next(valid_data_item) end_idx = min(st_idx + self.args.batch_size, N) x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]] X_tr = x_tr_unorder[order] x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]] X_te = x_te_unorder[order] else: if not self.args.baseline == 'MF': end_idx = min(st_idx + self.args.batch_size, N) X_tr = tr_data_rating[idxlist[st_idx:end_idx]] X_te = te_data_rating[idxlist[st_idx:end_idx]] else: dd = next(tr_data_rating) X_te = dd[0] X_pre = dd[1] if not self.args.baseline == 'MF': if sparse.isspmatrix(X_tr): X_tr = X_tr.toarray() X_tr = X_tr.astype('float32') if self.args.condition: if self.args.test_hidden == 'trained': print('use trained hidden vector') h = [] for b_c in label: for j in range(self.args.class_num): hidden = "h_{}".format(j+1) if b_c == j: h.append(self.hidden_vecs[hidden].unsqueeze(0)) hidden = torch.cat(h, 0) # _, hidden = model.RNNEncoder(item_feature.to(self.args.device)) elif self.args.test_hidden == 'onehot': hidden = self.tooh(label, self.args.class_num).to(self.args.device) elif self.args.test_hidden == 'fixed': hidden = next(tr_data_hidden) else: _, hidden = model.RNNEncoder(item_feature.to(self.args.device)) if len(hidden.shape) == 1: hidden = hidden.unsqueeze(0) model_input = (torch.FloatTensor(X_tr).to(self.args.device), F.sigmoid(hidden)) recon, _, _ = model(model_input) elif self.args.baseline == 'HPrior': recon, _, _ = model(torch.FloatTensor(X_tr).to(self.args.device), item_feature.to(self.args.device)) else: if not self.args.baseline: recon, _, _ = model(torch.FloatTensor(X_tr).to(self.args.device)) elif self.args.baseline == 'MF': pre = [list(t) for t in zip(*X_pre)] user = torch.LongTensor(list(set(pre[0]))).to(self.args.device) item = torch.LongTensor(pre[1]).to(self.args.device) recon = model(user, item, None, None) recon = recon.view(1,-1) else: recon = model(torch.FloatTensor(X_tr).to(self.args.device)) # if not self.args.baseline: # recon_loss, kld = loss_function(torch.FloatTensor(X_tr).to(self.args.device), recon, mu, logvar, self.args.dist) if not self.args.baseline == 'MF': recon[X_tr.nonzero()] = -np.inf ndcg_list50.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50)) ndcg_list100.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100)) ndcg_list10.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10)) ndcg_list5.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=5)) ndcg_list150.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=150)) r20_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=20)) r50_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50)) r10_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10)) r5_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=5)) r100_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100)) auc_list.append(AUC_score(recon.cpu().detach().numpy(), X_te)) ndcg_list50 = np.concatenate(ndcg_list50) ndcg_list100 = np.concatenate(ndcg_list100) ndcg_list150 = np.concatenate(ndcg_list150) ndcg_list10 = np.concatenate(ndcg_list10) ndcg_list5 = np.concatenate(ndcg_list5) r20_list = np.concatenate(r20_list) r10_list = np.concatenate(r10_list) r50_list = np.concatenate(r50_list) r5_list = np.concatenate(r5_list) r100_list = np.concatenate(r100_list) auc_list = np.asarray(auc_list) # if not self.args.baseline: # print(f"test loss : {test_loss / (N/self.args.batch_size):.3}") print("Test NDCG@5=%.5f (%.5f)" % (ndcg_list5.mean(), np.std(ndcg_list5) / np.sqrt(len(ndcg_list5)))) print("Test NDCG@10=%.5f (%.5f)" % (ndcg_list10.mean(), np.std(ndcg_list10) / np.sqrt(len(ndcg_list10)))) print("Test NDCG@50=%.5f (%.5f)" % (ndcg_list50.mean(), np.std(ndcg_list50) / np.sqrt(len(ndcg_list50)))) print("Test NDCG@100=%.5f (%.5f)" % (ndcg_list100.mean(), np.std(ndcg_list100) / np.sqrt(len(ndcg_list100)))) print("Test NDCG@150=%.5f (%.5f)" % (ndcg_list150.mean(), np.std(ndcg_list150) / np.sqrt(len(ndcg_list150)))) print("Test Recall@5=%.5f (%.5f)" % (r5_list.mean(), np.std(r5_list) / np.sqrt(len(r5_list)))) print("Test Recall@10=%.5f (%.5f)" % (r10_list.mean(), np.std(r10_list) / np.sqrt(len(r10_list)))) print("Test Recall@20=%.5f (%.5f)" % (r20_list.mean(), np.std(r20_list) / np.sqrt(len(r20_list)))) print("Test Recall@50=%.5f (%.5f)" % (r50_list.mean(), np.std(r50_list) / np.sqrt(len(r50_list)))) print("Test Recall@100=%.5f (%.5f)" % (r100_list.mean(), np.std(r100_list) / np.sqrt(len(r100_list)))) print("Test AUC=%.5f (%.5f)" % (auc_list.mean(), np.std(auc_list) / np.sqrt(len(auc_list))))
def train(self): # create data loader train_dataset = eval(self.dataset_conf.loader_name)(self.config, split='train') dev_dataset = eval(self.dataset_conf.loader_name)(self.config, split='dev') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.train_conf.batch_size, shuffle=self.train_conf.shuffle, num_workers=self.train_conf.num_workers, collate_fn=train_dataset.collate_fn, drop_last=False) dev_loader = torch.utils.data.DataLoader( dev_dataset, batch_size=self.train_conf.batch_size, shuffle=False, num_workers=self.train_conf.num_workers, collate_fn=dev_dataset.collate_fn, drop_last=False) # create models model = eval(self.model_conf.name)(self.config) if self.use_gpu: model = nn.DataParallel(model, device_ids=self.gpus).cuda() # create optimizer params = filter(lambda p: p.requires_grad, model.parameters()) if self.train_conf.optimizer == 'SGD': optimizer = optim.SGD(params, lr=self.train_conf.lr, momentum=self.train_conf.momentum, weight_decay=self.train_conf.wd) elif self.train_conf.optimizer == 'Adam': optimizer = optim.Adam(params, lr=self.train_conf.lr, weight_decay=self.train_conf.wd) else: raise ValueError("Non-supported optimizer!") early_stop = EarlyStopper([0.0], win_size=10, is_decrease=False) lr_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.train_conf.lr_decay_steps, gamma=self.train_conf.lr_decay) # reset gradient optimizer.zero_grad() # resume training if self.train_conf.is_resume: load_model(model, self.train_conf.resume_model, optimizer=optimizer) # Training Loop iter_count = 0 best_val_loss = np.inf results = defaultdict(list) for epoch in range(self.train_conf.max_epoch): # validation if (epoch + 1) % self.train_conf.valid_epoch == 0 or epoch == 0: model.eval() val_loss = [] for data in tqdm(dev_loader): if self.use_gpu: data['node_feat'], data['node_mask'], data[ 'label'] = data_to_gpu(data['node_feat'], data['node_mask'], data['label']) if self.model_conf.name == 'LanczosNetGeneral': data['L'], data['D'], data['V'] = data_to_gpu( data['L'], data['D'], data['V']) elif self.model_conf.name == 'GraphSAGE': data['nn_idx'], data[ 'nonempty_mask'] = data_to_gpu( data['nn_idx'], data['nonempty_mask']) elif self.model_conf.name == 'GPNN': data['L'], data['L_cluster'], data[ 'L_cut'] = data_to_gpu(data['L'], data['L_cluster'], data['L_cut']) else: data['L'] = data_to_gpu(data['L'])[0] with torch.no_grad(): if self.model_conf.name == 'AdaLanczosNet': pred, _ = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'LanczosNetGeneral': pred, _ = model(data['node_feat'], data['L'], data['D'], data['V'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GraphSAGE': pred, _ = model(data['node_feat'], data['nn_idx'], data['nonempty_mask'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GPNN': pred, _ = model(data['node_feat'], data['L'], data['L_cluster'], data['L_cut'], label=data['label'], mask=data['node_mask']) else: pred, _ = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) curr_loss = (pred - data['label']).pow(2).cpu().numpy() val_loss += [curr_loss] val_loss = float(np.mean(np.concatenate(val_loss))) logger.info("Avg. Validation MSE = {}".format(val_loss)) self.writer.add_scalar('val_loss', val_loss, iter_count) results['val_loss'] += [val_loss] # save best model if val_loss < best_val_loss: best_val_loss = val_loss snapshot(model.module if self.use_gpu else model, optimizer, self.config, epoch + 1, tag='best') logger.info( "Current Best Validation MSE = {}".format(best_val_loss)) # check early stop if early_stop.tick([val_loss]): snapshot(model.module if self.use_gpu else model, optimizer, self.config, epoch + 1, tag='last') self.writer.close() break # training model.train() lr_scheduler.step() for data in train_loader: optimizer.zero_grad() if self.use_gpu: data['node_feat'], data['node_mask'], data[ 'label'] = data_to_gpu(data['node_feat'], data['node_mask'], data['label']) if self.model_conf.name == 'LanczosNetGeneral': data['L'], data['D'], data['V'] = data_to_gpu( data['L'], data['D'], data['V']) elif self.model_conf.name == 'GraphSAGE': data['nn_idx'], data['nonempty_mask'] = data_to_gpu( data['nn_idx'], data['nonempty_mask']) elif self.model_conf.name == 'GPNN': data['L'], data['L_cluster'], data[ 'L_cut'] = data_to_gpu(data['L'], data['L_cluster'], data['L_cut']) else: data['L'] = data_to_gpu(data['L'])[0] if self.model_conf.name == 'AdaLanczosNet': _, train_loss = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'LanczosNetGeneral': _, train_loss = model(data['node_feat'], data['L'], data['D'], data['V'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GraphSAGE': _, train_loss = model(data['node_feat'], data['nn_idx'], data['nonempty_mask'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GPNN': _, train_loss = model(data['node_feat'], data['L'], data['L_cluster'], data['L_cut'], label=data['label'], mask=data['node_mask']) else: _, train_loss = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) # assign gradient train_loss.backward() optimizer.step() train_loss = float(train_loss.data.cpu().numpy()) self.writer.add_scalar('train_loss', train_loss, iter_count) results['train_loss'] += [train_loss] results['train_step'] += [iter_count] # display loss if (iter_count + 1) % self.train_conf.display_iter == 0: logger.info( "Loss @ epoch {:04d} iteration {:08d} = {}".format( epoch + 1, iter_count + 1, train_loss)) iter_count += 1 # snapshot model if (epoch + 1) % self.train_conf.snapshot_epoch == 0: logger.info("Saving Snapshot @ epoch {:04d}".format(epoch + 1)) snapshot(model.module if self.use_gpu else model, optimizer, self.config, epoch + 1) results['best_val_loss'] += [best_val_loss] pickle.dump( results, open(os.path.join(self.config.save_dir, 'train_stats.p'), 'wb')) self.writer.close() logger.info("Best Validation MSE = {}".format(best_val_loss)) return best_val_loss
def test(self, model, anneal): model.eval() if self.args.data_dir == './data/ml-1m': dataloader = ItemRatingLoader(self.args.data_dir) if self.args.baseline == 'MF': tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed2_valid_te.csv'), 1) N = 177 * self.args.batch_size else: tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_valid_tr.csv'), os.path.join(self.args.data_dir, 'fixed2_valid_te.csv')) elif self.args.data_dir == './data/amazon' or self.args.data_dir =='./data/amazon_min20_woman' \ or self.args.data_dir =='./data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman': dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0]) if self.args.baseline == 'MF': tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'valid_te.csv'), 1) N = 177 * self.args.batch_size else: tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'valid_tr.csv'), os.path.join(self.args.data_dir, 'valid_te.csv')) # print(tr_data_rating.shape[0]) # if self.args.baseline == 'MF': # tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed_valid_te.csv'), 1) # N = 177 * self.args.batch_size if not self.args.baseline == 'MF': N = tr_data_rating.shape[0] idxlist = np.array(range(N)) np.random.seed(98765) idx_pe = np.random.permutation(len(idxlist)) idxlist = idxlist[idx_pe] if self.args.condition or self.args.baseline == 'HPrior': valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe) test_loss = 0 with torch.no_grad(): r20_list, r50_list,r10_list, r100_list, ndcg_list50, ndcg_list10, ndcg_list100, ndcg_list150, auc_list \ = [], [], [], [], [],[], [], [], [] for i, st_idx in enumerate(range(0, N, self.args.batch_size)): if self.args.condition or self.args.baseline == 'HPrior': order, item_feature, label = next(valid_data_item) end_idx = min(st_idx + self.args.batch_size, N) x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]] X_tr = x_tr_unorder[order] x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]] X_te = x_te_unorder[order] else: if not self.args.baseline == 'MF': end_idx = min(st_idx + self.args.batch_size, N) X_tr = tr_data_rating[idxlist[st_idx:end_idx]] X_te = te_data_rating[idxlist[st_idx:end_idx]] else: dd = next(tr_data_rating) X_te = dd[0] X_pre = dd[1] if not self.args.baseline == 'MF': if sparse.isspmatrix(X_tr): X_tr = X_tr.toarray() X_tr = X_tr.astype('float32') if self.args.condition: if self.args.test_hidden == 'trained': h = [] for b_c in label: for j in range(self.args.class_num): hidden = "h_{}".format(j+1) if b_c == j: h.append(self.hidden_vecs[hidden].unsqueeze(0)) hidden = torch.cat(h, 0) elif self.args.test_hidden == 'onehot': hidden = self.tooh(label, self.args.class_num).to(self.args.device) else: _, hidden = model.RNNEncoder(item_feature.to(self.args.device)) model_input = (torch.FloatTensor(X_tr).to(self.args.device), F.sigmoid(hidden)) recon, mu, logvar = model(model_input) elif self.args.baseline == 'HPrior': recon, mu, logvar = model(torch.FloatTensor(X_tr).to(self.args.device), item_feature.to(self.args.device)) else: if not self.args.baseline: recon, mu, logvar = model(torch.FloatTensor(X_tr).to(self.args.device)) elif self.args.baseline == 'MF': pre = [list(t) for t in zip(*X_pre)] user = torch.LongTensor(list(set(pre[0]))).to(self.args.device) item = torch.LongTensor(pre[1]).to(self.args.device) recon = model(user, item, None, None) recon = recon.view(1,-1) else: recon = model(torch.FloatTensor(X_tr).to(self.args.device)) if not self.args.baseline or self.args.baseline == 'HPrior': recon_loss, kld = loss_function(torch.FloatTensor(X_tr).to(self.args.device), recon, mu, logvar, self.args.dist, \ self.args.negsample, self.args.device, self.args.neg_num, self.args.bpr_weight) loss = recon_loss + anneal * kld test_loss += loss if not self.args.baseline == 'MF': recon[X_tr.nonzero()] = -np.inf # print(X_te.shape) ndcg_list50.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50)) ndcg_list100.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100)) ndcg_list10.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10)) ndcg_list150.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=150)) r20_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=20)) r50_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50)) r10_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10)) r100_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100)) auc_list.append(AUC_score(recon.cpu().detach().numpy(), X_te)) # print(ndcg_list100) # print(AUC_score(recon.cpu().detach().numpy(), X_te)) ndcg_list50 = np.concatenate(ndcg_list50) ndcg_list100 = np.concatenate(ndcg_list100) ndcg_list150 = np.concatenate(ndcg_list150) ndcg_list10 = np.concatenate(ndcg_list10) r20_list = np.concatenate(r20_list) r10_list = np.concatenate(r10_list) r50_list = np.concatenate(r50_list) r100_list = np.concatenate(r100_list) auc_list = np.asarray(auc_list) if not self.args.baseline: print(f"test loss : {test_loss / (N/self.args.batch_size):.3}") print("Test NDCG@10=%.5f (%.5f)" % (ndcg_list10.mean(), np.std(ndcg_list10) / np.sqrt(len(ndcg_list10)))) print("Test NDCG@50=%.5f (%.5f)" % (ndcg_list50.mean(), np.std(ndcg_list50) / np.sqrt(len(ndcg_list50)))) print("Test NDCG@100=%.5f (%.5f)" % (ndcg_list100.mean(), np.std(ndcg_list100) / np.sqrt(len(ndcg_list100)))) print("Test NDCG@150=%.5f (%.5f)" % (ndcg_list150.mean(), np.std(ndcg_list150) / np.sqrt(len(ndcg_list150)))) print("Test Recall@10=%.5f (%.5f)" % (r10_list.mean(), np.std(r10_list) / np.sqrt(len(r10_list)))) print("Test Recall@20=%.5f (%.5f)" % (r20_list.mean(), np.std(r20_list) / np.sqrt(len(r20_list)))) print("Test Recall@50=%.5f (%.5f)" % (r50_list.mean(), np.std(r50_list) / np.sqrt(len(r50_list)))) print("Test Recall@100=%.5f (%.5f)" % (r100_list.mean(), np.std(r100_list) / np.sqrt(len(r100_list)))) print("Test AUC=%.5f (%.5f)" % (auc_list.mean(), np.std(auc_list) / np.sqrt(len(auc_list)))) return np.mean(r10_list), np.mean(r20_list), np.mean(r50_list), np.mean(r100_list), \ np.mean(ndcg_list10), np.mean(ndcg_list50), np.mean(ndcg_list100), np.mean(ndcg_list150), np.mean(auc_list)
def train(self): ### DAE if self.args.baseline == 'DAE': p_dims = self.args.dims # p_dims = [self.args.latent_size, self.args.hidden_size_vae, self.args.input_size_vae] model = MultiDAE(p_dims) optimizer = optim.Adam(model.parameters(), lr = self.args.lr_vae, weight_decay=0.0) elif self.args.baseline == 'MF': model = MF(6040, 3355, 100) optimizer = optim.SparseAdam(model.parameters(), lr = self.args.lr_mf) # optimizer = optim.SGD(model.parameters(), lr=1e-6, weight_decay=1e-5) elif self.args.baseline == 'HPrior': model = HPrior_VAE(self.args.dims, self.args.hidden_size_rnn, self.args.dataset,\ self.args.input_size_rnn, self.args.activation) optimizer = optim.Adam(model.parameters(), lr = self.args.lr_vae, weight_decay=0.0) else: model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\ self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\ self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn, self.args.condition_size) optimizer = { 'encoder' : optim.Adam(model.encoder.parameters(), lr=self.args.lr_vae, weight_decay=0.0), 'decoder' : optim.Adam(model.decoder.parameters(), lr=self.args.lr_vae, weight_decay=0.0) } model = model.to(self.args.device) if self.args.data_dir == './data/ml-1m': dataloader = ItemRatingLoader(self.args.data_dir) elif self.args.data_dir == './data/amazon' or self.args.data_dir == './data/amazon_min20_woman'\ or self.args.data_dir == './data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman': dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0]) if self.args.condition: optimizer['RNNEncoder'] = optim.Adam(model.RNNEncoder.parameters(), lr=self.args.lr_rnn, weight_decay=0.0) # weight = torch.FloatTensor([0.18, 0.28, 0.54]).to(args.device) # CEloss = nn.CrossEntropyLoss(weight = weight) CEloss = nn.CrossEntropyLoss() if self.args.load_model: model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt')) self.args.timestamp = self.args.load_model[:10] if self.args.condition and self.args.load_pretrained: model.RNNEncoder.load_state_dict(torch.load(self.args.pretrained_dir + '/' + self.args.load_pretrained + '/' + 'model.pt')) print("loaded pretrained model") writer = SummaryWriter(self.args.log_dir + "/" + self.args.timestamp + "_" + self.args.config) # if self.args.baseline == 'MF': # train_data_rating = dataloader.load_train_data_mf(os.path.join(self.args.data_dir, 'train.csv'), os.path.join(self.args.data_dir, 'valid_tr.csv'),\ # self.args.batch_size, int(374215/self.args.batch_size)+1) # N = 374215 if not self.args.baseline == 'MF': train_data_rating = dataloader.load_train_data(os.path.join(self.args.data_dir, 'train.csv')) N = train_data_rating.shape[0] idxlist = np.array(range(N)) idx_pe = np.random.permutation(len(idxlist)) idxlist = idxlist[idx_pe] b_ndcg100 = 0.0 update_count = 0.0 for e in range(self.args.epoch): model.train() total_loss = 0 if self.args.baseline == 'MF': train_data_rating = dataloader.load_train_data_mf(os.path.join(self.args.data_dir, 'train.csv'), os.path.join(self.args.data_dir, 'valid_tr.csv'),os.path.join(self.args.data_dir, 'test_tr.csv'), self.args.batch_size, int(374215/self.args.batch_size)+1) N = 374215 if self.args.condition or self.args.baseline == 'HPrior': train_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'train', self.args.batch_size, idx_pe) for i, st_idx in enumerate(range(0, N, self.args.batch_size)): if self.args.condition or self.args.baseline == 'HPrior': order, item_feature, label = next(train_data_item) end_idx = min(st_idx + self.args.batch_size, N) x_unorder = train_data_rating[idxlist[st_idx:end_idx]] X = x_unorder[order] else: end_idx = min(st_idx + self.args.batch_size, N) if self.args.baseline == 'MF': d = next(train_data_rating) pos = d[0] neg = d[1] else: X = train_data_rating[idxlist[st_idx:end_idx]] if not self.args.baseline == 'MF': if sparse.isspmatrix(X): X = X.toarray() X = X.astype('float32') if self.args.condition: optimizer["RNNEncoder"].zero_grad() output, h = model.RNNEncoder(item_feature.to(self.args.device)) rnn_loss = CEloss(output, label.to(self.args.device)) rnn_loss.backward(retain_graph=True) # rnn_loss.backward() # optimizer["RNNEncoder"].step() # self.make_condition(h, label.data) self.make_condition(h, label.data) if self.args.baseline: optimizer.zero_grad() else: optimizer["encoder"].zero_grad() optimizer["decoder"].zero_grad() if self.args.condition: if self.args.test_hidden == 'onehot': h = self.tooh(label, self.args.class_num).to(self.args.device) model_input = (torch.FloatTensor(X).to(self.args.device), F.sigmoid(h)) recon, mu, logvar = model(model_input) elif self.args.baseline == 'HPrior': recon, mu, logvar = model(torch.FloatTensor(X).to(self.args.device), item_feature.to(self.args.device)) else: if self.args.baseline == 'DAE': recon = model(torch.FloatTensor(X).to(self.args.device)) elif self.args.baseline == 'MF': pos = list(zip(*pos)) user = torch.LongTensor(pos[0]).to(self.args.device) item = torch.LongTensor(pos[1]).to(self.args.device) neg = list(zip(*neg)) user_neg = torch.LongTensor(neg[0]).to(self.args.device) item_neg = torch.LongTensor(neg[1]).to(self.args.device) ps, ns = model(user, item, user_neg, item_neg) mfloss = mf_loss(ps, ns, 30) else: recon, mu, logvar = model(torch.FloatTensor(X).to(self.args.device)) if self.args.baseline == 'DAE': log_softmax_var = F.log_softmax(recon, dim=-1) recon_loss = - torch.mean(torch.sum(log_softmax_var * torch.FloatTensor(X).to(self.args.device), dim=-1)) if not self.args.baseline or self.args.baseline == 'HPrior': recon_loss, kld = loss_function(torch.FloatTensor(X).to(self.args.device), recon, mu, logvar, self.args.dist, \ self.args.negsample, self.args.device, self.args.neg_num, self.args.bpr_weight) if self.args.anneal_steps > 0: anneal = min(self.args.anneal_cap, 1. * update_count / self.args.anneal_steps) update_count += 1 else: anneal = self.args.anneal_cap if self.args.baseline == 'DAE': vae_loss = recon_loss elif self.args.baseline == 'MF': vae_loss = mfloss else: vae_loss = recon_loss + anneal * kld vae_loss.backward() if self.args.baseline: optimizer.step() else: optimizer["encoder"].step() optimizer["decoder"].step() if self.args.condition: optimizer["RNNEncoder"].step() # r10, r20, r50, r100, ndcg10, ndcg50, ndcg100, ndcg150, auc = self.test(model, anneal) # tensorboard if self.args.condition: writer.add_scalar("Train rnn loss", rnn_loss, i + e*N/self.args.batch_size) writer.add_scalar("Train vae loss", vae_loss, i + e*N/self.args.batch_size) if not self.args.baseline =='MF': writer.add_scalar("Recon loss", recon_loss, i + e*N/self.args.batch_size) if not self.args.baseline: writer.add_scalar("KLD", kld, i + e*N/self.args.batch_size) if i % 20 == 0: if not self.args.baseline: print(f"recon : {recon_loss.item():.3} | kld : {kld.item():.3}") if self.args.condition: print(f"epoch : {e} | train_vae_loss : {vae_loss.item():.3} | train_rnn_loss : {rnn_loss.item():.3}", f"[{i*self.args.batch_size} / {N}","(",f"{(i/N*self.args.batch_size)*100:.3} %", ")]") else: print(f"epoch : {e} | train_vae_loss : {vae_loss.item():.3}", f"[{i*self.args.batch_size} / {N}","(",f"{(i/N*self.args.batch_size)*100:.3} %", ")]") total_loss += vae_loss # save model # torch.save(model.state_dict(), self.args.log_dir + '/' + self.args.timestamp + '_' + self.args.config + '/model.pt') # print("model saved!") print(f"epoch : {e} | train vae loss : {total_loss / (N/self.args.batch_size):.3} ") if self.args.condition: #save condition per epoch for evaluation for j in range(self.args.class_num): hidden = "h_{}".format(j+1) torch.save(self.hidden_vecs[hidden], f"{self.args.hiddenvec_dir}/{hidden}.pt") print("hidden vector saved!") # test per epoch r10, r20, r50, r100, ndcg10, ndcg50, ndcg100, ndcg150, auc = self.test(model, anneal) if ndcg50 > b_ndcg100 : torch.save(model.state_dict(), self.args.log_dir + '/' + self.args.timestamp + '_' + self.args.config + '/model.pt') print("model saved!") b_ndcg100 = ndcg50 # tensorboard # writer.add_scalar("Test_loss", test_loss, e) writer.add_scalar("Test_Recall10", r10, e) writer.add_scalar("Test_Recall20", r20, e) writer.add_scalar("Test_Recall50", r50, e) writer.add_scalar("Test_Recall100", r100, e) writer.add_scalar("Test_NDCG10", ndcg10, e) writer.add_scalar("Test_NDCG50", ndcg50, e) writer.add_scalar("Test_NDCG100", ndcg100, e) writer.add_scalar("Test_NDCG150", ndcg150, e) writer.add_scalar("Test_AUC", auc, e)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.test_batch, num_workers=args.workers, pin_memory=args.cuda, shuffle=False) model.eval() print('---- Testing for %d images - DiLiGent Dataset ----' % (len(test_loader))) err_mean = 0 with torch.no_grad(): for i, sample in enumerate(test_loader): data = utils.parseData(args, sample, 'test') input = [data['input']] if args.in_light: input.append(data['l']) output = model(input) acc = utils.errorPred(data['tar'].data, output.data, data['m'].data) err_mean = err_mean + acc print('error: %.3f' % (acc)) result = (output.data + 1) / 2 result_masked = result * data['m'].data.expand_as(output.data) save_path = './Results/' + 'img8_mask_%d.png' % (i + 1) tv.utils.save_image(result_masked, save_path) print('saved image %d' % (i + 1)) print('------------ mean error: %.3f ------------' % (err_mean / len(test_loader)))
def train(model, optim, db): for epoch in range(1, epochs+1): train_loader = torch.utils.data.DataLoader(db['train'],batch_size=batch_size, shuffle=True) # Update (Train) model.train() for batch_idx, (data, target) in enumerate(train_loader): target = target.numpy() tgx = np.zeros((len(target), size_output)) idx = [(i, target[i]) for i in range(len(target))] for i in idx: tgx[i]=1.0 target = torch.tensor(tgx) data, target = Variable(data.to(device)), Variable((target.float()).to(device)) optimizer.zero_grad() output = model(data) loss = criterion(output,target) pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability target = target.data.max(1, keepdim=True)[1] correct = pred.eq(target.data.view_as(pred)).cpu().sum() loss.backward() optimizer.step() if batch_idx % report_every == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}, Accuracy: {}/{} ({:.6f})'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), correct, len(data), float(correct)/float(len(data)))) # Evaluate model.eval() eval_loss = float(0) correct = 0 batch_count = 0 eval_loader = torch.utils.data.DataLoader(db['eval'], batch_size=batch_size, shuffle=True) with torch.no_grad(): for data, target in eval_loader: target = target.numpy() tgx = np.zeros((len(target), size_output)) idx = [(i, target[i]) for i in range(len(target))] for i in idx: tgx[i]=1.0 target = torch.tensor(tgx) data, target = Variable(data.to(device)), Variable((target.float()).to(device)) output = model(data) eval_loss += criterion(output, target).item() # sum up batch loss pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability target = target.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() batch_count += 1 eval_loss /= batch_count accuracy = float(correct) / len(eval_loader.dataset) with open('results/one_hot.dat', 'a+') as file: file.write(str(accuracy)+"\n") print('Eval set: Average loss: {:.4f}, Accuracy: {}/{} ({:.6f})\n'.format( eval_loss, correct, len(eval_loader.dataset), accuracy))
def train(args): processor = data_utils.ABSAProcessor() label_list = processor.get_labels(args.task_type) tokenizer = ABSATokenizer.from_pretrained( modelconfig.MODEL_ARCHIVE_MAP[args.bert_model]) train_examples = processor.get_train_examples(args.data_dir, args.task_type) num_train_steps = int( math.ceil(len(train_examples) / args.train_batch_size)) * args.num_train_epochs train_features = data_utils.convert_examples_to_features( train_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) domain_dataset = PregeneratedDataset(epoch=0, training_path=args.domain_dataset, tokenizer=tokenizer, num_data_epochs=1) domain_train_sampler = RandomSampler(domain_dataset) domain_train_dataloader = DataLoader(domain_dataset, sampler=domain_train_sampler, batch_size=16) train_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask, all_label_ids) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) # >>>>> validation if args.do_valid: valid_examples = processor.get_dev_examples(args.data_dir, args.task_type) valid_features = data_utils.convert_examples_to_features( valid_examples, label_list, args.max_seq_length, tokenizer) valid_all_input_ids = torch.tensor( [f.input_ids for f in valid_features], dtype=torch.long) valid_all_segment_ids = torch.tensor( [f.segment_ids for f in valid_features], dtype=torch.long) valid_all_input_mask = torch.tensor( [f.input_mask for f in valid_features], dtype=torch.long) valid_all_label_ids = torch.tensor( [f.label_id for f in valid_features], dtype=torch.long) # valid_all_tag_ids = torch.tensor([f.tag_id for f in valid_features], dtype=torch.long) valid_data = TensorDataset(valid_all_input_ids, valid_all_segment_ids, valid_all_input_mask, valid_all_label_ids) logger.info("***** Running validations *****") logger.info(" Num orig examples = %d", len(valid_examples)) logger.info(" Num split examples = %d", len(valid_features)) logger.info(" Batch size = %d", args.train_batch_size) valid_sampler = SequentialSampler(valid_data) valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=args.train_batch_size) best_valid_loss = float('inf') valid_losses = [] # <<<<< end of validation declaration model = ABSABert.from_pretrained( modelconfig.MODEL_ARCHIVE_MAP[args.bert_model], num_labels=len(label_list)) if args.features_model != 'none': state_dict = torch.load(args.features_model) del state_dict['classifier.weight'] del state_dict['classifier.bias'] model.load_state_dict(state_dict, strict=False) logger.info('load fine-tuned model from : {}'.format( args.features_model)) model.cuda() flag = True if flag: # bert-base shared_param_optimizer = [(k, v) for k, v in model.bert.named_parameters() if v.requires_grad == True] shared_param_optimizer = [ n for n in shared_param_optimizer if 'pooler' not in n[0] ] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] shared_optimizer_grouped_parameters = [{ 'params': [ p for n, p in shared_param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [ p for n, p in shared_param_optimizer if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] t_total = num_train_steps # num_train_steps supervised_param_optimizer = model.classifier.parameters() domain_classifier_param_optimizer = model.domain_cls.parameters() shared_optimizer = BertAdam(shared_optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=t_total) supervised_optimizer = BertAdam(supervised_param_optimizer, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=t_total) domain_optimizer = BertAdam(domain_classifier_param_optimizer, lr=3e-5, warmup=args.warmup_proportion, t_total=-1) else: param_optimizer = [(k, v) for k, v in model.named_parameters() if v.requires_grad == True] param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] t_total = num_train_steps # num_train_steps optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=t_total) global_step = 0 model.train() train_steps = len(train_dataloader) total_domain_loss = 0 for e_ in range(args.num_train_epochs): train_iter = iter(train_dataloader) domain_iter = iter(domain_train_dataloader) for step in range(train_steps): batch = train_iter.next() batch = tuple(t.cuda() for t in batch) input_ids, segment_ids, input_mask, label_ids = batch # all_input_ids, all_segment_ids, all_input_mask, all_label_ids, all_tag_ids loss, _ = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids) loss.backward() if flag: shared_optimizer.step() shared_optimizer.zero_grad() supervised_optimizer.step() supervised_optimizer.zero_grad() else: optimizer.step() optimizer.zero_grad() dirt_n = 1 # 1 or 2 for _ in range(dirt_n): try: batch = domain_iter.next() except: domain_iter = iter(domain_train_dataloader) batch = domain_iter.next() batch = tuple(t.cuda() for t in batch) input_ids, input_mask, domain_labels = batch[0], batch[ 4], batch[-1] d_loss = model(input_ids, attention_mask=input_mask, domain_label=domain_labels) d_loss.backward() total_domain_loss += d_loss.item() domain_optimizer.step() domain_optimizer.zero_grad() shared_optimizer.zero_grad( ) # make sure to clear the gradients of encoder. if step % 50 == 0: logger.info('in step {} domain loss: {}'.format( dirt_n * (e_ * train_steps + step + 1), total_domain_loss / (dirt_n * (e_ * train_steps + step + 1)))) global_step += 1 # >>>> perform validation at the end of each epoch . if args.do_valid: model.eval() with torch.no_grad(): losses = [] valid_size = 0 for step, batch in enumerate(valid_dataloader): batch = tuple( t.cuda() for t in batch) # multi-gpu does scattering it-self input_ids, segment_ids, input_mask, label_ids = batch loss, _ = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids) loss = torch.mean(loss) losses.append(loss.data.item() * input_ids.size(0)) valid_size += input_ids.size(0) valid_loss = sum(losses) / valid_size logger.info("validation loss: %f", valid_loss) valid_losses.append(valid_loss) if valid_loss < best_valid_loss: torch.save(model.state_dict(), os.path.join(args.output_dir, "model.pt")) best_valid_loss = valid_loss model.train() if args.do_valid: with open(os.path.join(args.output_dir, "valid.json"), "w") as fw: json.dump({"valid_losses": valid_losses}, fw) else: torch.save(model.state_dict(), os.path.join(args.output_dir, "model.pt"))
def predict(model, input_str): model = model.eval() with torch.no_grad(): output = model(input_str) return output
def model_fn(features, labels, mode, params): """ define how to train, evaluate and predict from the transfomer model. Args: mode: params: Returns: """ inputs = features['inputs'] seq_steps = features['seq_len'] is_training = (mode == tf.estimator.ModeKeys.TRAIN) try: batch_size, length = get_shape_list(inputs, expected_rank=2) except ValueError: batch_size = 1 length = get_shape_list(inputs, expected_rank=1)[0] inputs = tf.reshape(inputs, [batch_size, length]) with tf.variable_scope('model'): # Build model model = DKT(params, is_training) logits = model(batch_size, inputs, seq_steps) # [batch, length, vocab_size] # when in prediction mode, the label/target is Bone, the model output is the prediction if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { 'predict_output': tf.estimator.export.PredictOutput( {"predict": tf.sigmoid(logits)}) } output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={'predict': tf.sigmoid(logits)}, export_outputs=export_outputs) else: # Calculate model loss target_ids = features['target_id'] target_correct = features['target_correct'] ids = features['ids'] correct = features['correct'] loss = dkt_loss(logits, target_correct, target_ids, correct, ids, seq_steps) record_dict = {} record_dict['minibatch_loss'] = loss # Save loss as named tensor will be logged with the logging hook tf.identity(loss, 'cross_entropy') if mode == tf.estimator.ModeKeys.EVAL: metric_dict = get_eval_metrics(logits, target_correct, target_ids, seq_steps) record_dict['accuracy'] = metric_dict['accuracy'] record_scalars(record_dict) output_spec = tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=loss, predictions={'predict': tf.sigmoid(logits)}, eval_metric_ops=metric_dict) else: # train # check whether restore from checkpoint tvars = tf.trainable_variables() initialized_variable_names = {} tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) train_op, metric_dict = get_train_op_and_metrics( loss, params) acc_metric = get_eval_metrics(logits, target_correct, target_ids, seq_steps) record_dict['accuracy'] = acc_metric['accuracy'] record_dict['learning_rate'] = metric_dict['learning_rate'] record_scalars(record_dict) output_spec = tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.TRAIN, loss=loss, train_op=train_op) return output_spec
isOutput = True if opt.video_out != "" else False if isOutput: print("!!! TYPE:", type(opt.video_out), type(video_FourCC), type(video_fps), type(video_size)) out = cv2.VideoWriter(opt.video_out, video_FourCC, video_fps, video_size) while True: return_value, frame = vid.read() h, w, c = frame.shape PIL_img = Image.fromarray(frame[:, :, ::-1]) tensor_img = transforms.ToTensor()(PIL_img) img, _ = pad_to_square(tensor_img, 0) # Resize img = resize(img, (opt.img_size, opt.img_size)).cuda().unsqueeze(0) with torch.no_grad(): detections = model(img) detections = NMS(detections, opt.conf_thres, opt.nms_thres) # current_time = time.time() # inference_time = current_time - prev_time # prev_time = current_time font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * h + 0.5).astype('int32')) thickness = (w + h) // 300 if detections[0] is not None: # Rescale boxes to original image detections = xywh2xyxy(detections[0]) # 先将在320*320标准下的xyxy坐标转换成max(600,800)下的坐标 再将x向或y向坐标减一下就行 detections[:, :4] *= (max(h, w) / opt.img_size) if max(h - w, 0) == 0:
def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'): model.train() batch_pos = opts['batch_pos'] batch_neg = opts['batch_neg'] batch_test = opts['batch_test'] batch_neg_cand = max(opts['batch_neg_cand'], batch_neg) pos_idx = np.random.permutation(pos_feats.size(0)) neg_idx = np.random.permutation(neg_feats.size(0)) while (len(pos_idx) < batch_pos * maxiter): pos_idx = np.concatenate( [pos_idx, np.random.permutation(pos_feats.size(0))]) while (len(neg_idx) < batch_neg_cand * maxiter): neg_idx = np.concatenate( [neg_idx, np.random.permutation(neg_feats.size(0))]) pos_pointer = 0 neg_pointer = 0 for iter in range(maxiter): # select pos idx pos_next = pos_pointer + batch_pos pos_cur_idx = pos_idx[pos_pointer:pos_next] pos_cur_idx = pos_feats.new(pos_cur_idx).long() pos_pointer = pos_next # select neg idx neg_next = neg_pointer + batch_neg_cand neg_cur_idx = neg_idx[neg_pointer:neg_next] neg_cur_idx = neg_feats.new(neg_cur_idx).long() neg_pointer = neg_next # create batch batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx)) batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx)) # hard negative mining if batch_neg_cand > batch_neg: model.eval() ## model transfer into evaluation mode for start in range(0, batch_neg_cand, batch_test): end = min(start + batch_test, batch_neg_cand) if batch_neg_feats[start:end].shape[1] == 9216: temp_neg_feats = batch_neg_feats[start:end] else: temp_neg_feats = torch.cat((batch_neg_feats[start:end], batch_neg_feats[start:end]), dim=1) score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) if start == 0: neg_cand_score = score.data[:, 1].clone() else: neg_cand_score = torch.cat( (neg_cand_score, score.data[:, 1].clone()), 0) _, top_idx = neg_cand_score.topk(batch_neg) batch_neg_feats = batch_neg_feats.index_select( 0, Variable(top_idx)) model.train() ## model transfer into train mode # forward if batch_pos_feats.shape[1] == 9216: temp_pos_feats = batch_pos_feats else: temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats), dim=1) if batch_neg_feats.shape[1] == 9216: temp_neg_feats = batch_neg_feats else: temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats), dim=1) # pdb.set_trace() pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer) neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) # optimize loss = criterion(pos_score, neg_score) model.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) optimizer.step() if opts['visual_log']: print("Iter %d, Loss %.4f" % (iter, loss.data[0]))
def test(self): test_dataset = eval(self.dataset_conf.loader_name)(self.config, split='test') # create data loader test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=self.test_conf.batch_size, shuffle=False, num_workers=self.test_conf.num_workers, collate_fn=test_dataset.collate_fn, drop_last=False) # create models model = eval(self.model_conf.name)(self.config) load_model(model, self.test_conf.test_model) if self.use_gpu: model = nn.DataParallel(model, device_ids=self.gpus).cuda() model.eval() test_loss = [] for data in tqdm(test_loader): if self.use_gpu: data['node_feat'], data['node_mask'], data[ 'label'] = data_to_gpu(data['node_feat'], data['node_mask'], data['label']) if self.model_conf.name == 'LanczosNetGeneral': data['D'], data['V'] = data_to_gpu(data['D'], data['V']) elif self.model_conf.name == 'GraphSAGE': data['nn_idx'], data['nonempty_mask'] = data_to_gpu( data['nn_idx'], data['nonempty_mask']) elif self.model_conf.name == 'GPNN': data['L'], data['L_cluster'], data['L_cut'] = data_to_gpu( data['L'], data['L_cluster'], data['L_cut']) else: data['L'] = data_to_gpu(data['L'])[0] with torch.no_grad(): if self.model_conf.name == 'AdaLanczosNet': pred, _ = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'LanczosNetGeneral': pred, _ = model(data['node_feat'], data['L'], data['D'], data['V'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GraphSAGE': pred, _ = model(data['node_feat'], data['nn_idx'], data['nonempty_mask'], label=data['label'], mask=data['node_mask']) elif self.model_conf.name == 'GPNN': pred, _ = model(data['node_feat'], data['L'], data['L_cluster'], data['L_cut'], label=data['label'], mask=data['node_mask']) else: pred, _ = model(data['node_feat'], data['L'], label=data['label'], mask=data['node_mask']) curr_loss = (pred - data['label'] ).pow(2).cpu().numpy() * self.const_factor test_loss += [curr_loss] test_loss = float(np.mean(np.concatenate(test_loss))) logger.info("Test MSE = {}".format(test_loss)) return test_loss
def train(epoch, optimizer, compression_scheduler=None): # Turn on training mode which enables dropout. model.train() total_samples = train_data.size(0) steps_per_epoch = math.ceil(total_samples / args.bptt) total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0), args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) if compression_scheduler: compression_scheduler.on_minibatch_begin( epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch) output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) if compression_scheduler: # Before running the backward phase, we allow the scheduler to modify the loss # (e.g. add regularization loss) loss = compression_scheduler.before_backward_pass( epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch, loss=loss, return_loss_components=False) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if compression_scheduler: compression_scheduler.on_minibatch_end( epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch) if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time lr = optimizer.param_groups[0]['lr'] print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.4f} | ms/batch {:5.2f} ' '| loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() stats = ('Performance/Training/', OrderedDict([('Loss', cur_loss), ('Perplexity', math.exp(cur_loss)), ('LR', lr), ('Batch Time', elapsed * 1000)])) steps_completed = batch + 1
def get_filtered_relations_and_tails_filter1_filter2_hybrid (head_id, id2entity, type2relationType2frequency, topNfilters, column, fact, model, arity, device, type_head_tail_entity_matrix, tailType_relation_headType_tensor, entityName2entityTypes, type2id, relation2id, atLeast, sparsifier, typeId2frequency, entityId2entityTypes, unk_type_id, id2type, entity2sparsifiedTypes, entitiesEvaluated): list_of_scores_per_head_with_filter = [] idx2relation_tail_in_scores = {} h = id2entity[head_id] tiled_fact = np.array([]) all_head_types = type_head_tail_entity_matrix[:,head_id] tailType_relation_matrix = torch.matmul(tailType_relation_headType_tensor, all_head_types) tailType_relation_matrix = torch.transpose(tailType_relation_matrix, 0, 1) relation_entity_matrix = torch.matmul(tailType_relation_matrix, type_head_tail_entity_matrix) relation_entity_matrix[relation_entity_matrix < atLeast] = 0 if torch.nonzero(relation_entity_matrix).shape[0] != 0: filtered_relation_tail_pairs = torch.nonzero(relation_entity_matrix) all_relations = filtered_relation_tail_pairs[:,0].tolist() entities_without_duplicates = filtered_relation_tail_pairs[:,1].tolist() r_e_tuples = list(zip(all_relations, entities_without_duplicates)) idx_iterator = range(0, len(r_e_tuples)) relation_tail_in_scores2idx = dict(zip(r_e_tuples, idx_iterator)) idx2relation_tail_in_scores = dict(zip(idx_iterator, r_e_tuples)) if len(entities_without_duplicates) > 0: tiled_fact = np.array(fact*len(entities_without_duplicates)).reshape(len(entities_without_duplicates),-1) tiled_fact[:,column] = entities_without_duplicates tiled_fact[:,0] = all_relations tiled_fact[:,2] = all_relations head_sparsified_types = [] current_head_entity = tiled_fact[0][1] if current_head_entity in entity2sparsifiedTypes: head_sparsified_types = entity2sparsifiedTypes[current_head_entity] if len(tiled_fact) > 0: new_tiled_fact = [] for current_fact in tiled_fact: current_head_entity = current_fact[1] current_tail_entity = current_fact[3] tail_sparsified_types = [] if current_tail_entity in entity2sparsifiedTypes: tail_sparsified_types = entity2sparsifiedTypes[current_tail_entity] if entitiesEvaluated == "one": if len(head_sparsified_types)==0: #h has no types head_sparsified_types = [unk_type_id] if len(tail_sparsified_types)==0: #t has no types tail_sparsified_types = [unk_type_id] headType_tailType_pairs = [] for h_t in head_sparsified_types: for t_t in tail_sparsified_types: headType_tailType_pairs.append(h_t) headType_tailType_pairs.append(t_t) current_fact = current_fact[:4] current_fact = np.append(current_fact, headType_tailType_pairs) new_tiled_fact.append(current_fact) new_tiled_fact, relation_tail_in_scores2idx, idx2relation_tail_in_scores = sort_testing_facts_according_to_arity(new_tiled_fact, relation_tail_in_scores2idx, idx2relation_tail_in_scores) # new_tiled_fact size: (num of mini batches, num of facts, arity) pred = None for facts_with_same_arities in new_tiled_fact: batch_of_facts_with_same_arities = list(chunks(facts_with_same_arities, 256)) arity = len(batch_of_facts_with_same_arities[0][0])//2 if pred == None: pred = model(batch_of_facts_with_same_arities[0], arity, "testing", device) else: pred_tmp = model(batch_of_facts_with_same_arities[0], arity, "testing", device) pred = torch.cat((pred, pred_tmp)) for batch_it in range(1, len(batch_of_facts_with_same_arities)): pred_tmp = model(batch_of_facts_with_same_arities[batch_it], arity, "testing", device) pred = torch.cat((pred, pred_tmp)) score_with_filter = pred.view(-1).detach().cpu().numpy() list_of_scores_per_head_with_filter.append(score_with_filter) return list_of_scores_per_head_with_filter, idx2relation_tail_in_scores
def train(model, train_dataloader, test_dataloader): run_id = int(time.time()) nb_batches = len(train_dataloader) crit = nn.CrossEntropyLoss() # crit = nn.NLLLoss() optim = torch.optim.Adam(model.parameters(), lr=0.0001) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, factor=0.25, patience=2) # scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=50, gamma=0.1) model.train() for ei in range(NB_EPOCHS): epoch_loss = 0.0 total_loss = 0.0 for i, (batch_src, batch_tgt, batch_out) in enumerate(train_dataloader): stime_batch = time.time() batch_src = batch_src.type(torch.LongTensor).to(device) batch_tgt = batch_tgt.type(torch.LongTensor).to(device) batch_out = batch_out.type(torch.LongTensor).to(device) optim.zero_grad() out = model(batch_src, batch_tgt) # print(batch_src[0], batch_tgt[0]) # print(batch_out[0], out[0]) out = out.reshape(-1, 336) batch_out = batch_out.reshape(-1) loss = crit(out, batch_out) total_loss += loss.item() epoch_loss += loss.item() loss.backward() optim.step() # scheduler.step() etime_batch = time.time() if not i % PRINT_INV: estimated_time = timedelta( seconds=math.floor((etime_batch - stime_batch) * (nb_batches - i))) print(f"> Epoch {ei+1}/{NB_EPOCHS} - Batch {i+1}/{nb_batches}") print( f"> Batch finished in {etime_batch - stime_batch:1.2f} seconds" ) print( f"> Estimated time to end of epoch: {str(estimated_time)}") print(f"> Loss: {total_loss / PRINT_INV}\n") total_loss = 0.0 if ei and not ei % GEN_INV: generate(model, f"{ei}-sample.csv", src=test_primer) val_loss = evaluate(model, test_dataloader, optim, crit) # scheduler.step(val_loss) print(f"> Epoch {ei+1}/{NB_EPOCHS}") print(f"> Validation Loss: {val_loss}\n") torch.save(model, f"models/{run_id}-{ei}-model.pt") torch.save(model, f"models/{run_id}-final-model.pt")
def main(): #parse input arguments parser = argparse.ArgumentParser(description="Model's hyperparameters") parser.add_argument('--indir', type=str, help='Input dir of train, test and valid data') parser.add_argument('--withTypes', type=str, default="True") parser.add_argument('--epochs', default=1000, help='Number of epochs (default: 10)' ) parser.add_argument('--batchsize', type=int, default=128, help='Batch size (default: 128)' ) parser.add_argument('--num_filters', type=int, default=100, help='number of filters CNN' ) parser.add_argument('--embsize', default=100, help='Embedding size (default: 100)' ) parser.add_argument('--learningrate', default=0.0001, help='Learning rate (default: 0.00005)' ) parser.add_argument('--outdir', type=str, help='Output dir of model') parser.add_argument('--load', default='False', help='If true, it loads a saved model in dir outdir and evaluate it (default: False). If preload, it load an existing model and keep training it' ) parser.add_argument('--modelToBeTrained', default='', help='path of the pretrained model to be loaded. It works with --load=preload' ) parser.add_argument('--gpu_ids', default='1', help='Comma-separated gpu id used to paralellize the evaluation' ) parser.add_argument('--num_negative_samples', type=int, default=1, help='number of negative samples for each positive sample' ) parser.add_argument('--atLeast', type=int, help='2' ) parser.add_argument('--topNfilters', type=int, help='2' ) parser.add_argument('--buildTypeDictionaries', type=str, default='False', help='True OR False' ) parser.add_argument('--sparsifier', type=int, default=-1, help='if type frequency is less than K in ranking, set its entry to 0 in the img. If its value is <=0 then it will not sparsify the matrix' ) parser.add_argument('--entitiesEvaluated', default='both', type=str, help='both, one, none' ) args = parser.parse_args() print("\n\n************************") for e in vars(args): print (e, getattr(args, e)) print("************************\n\n") if args.load == 'True': with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin: data_info = pickle.load(fin) test = data_info['test_facts'] relation2id = data_info['roles_indexes'] entity2id = data_info['values_indexes'] key_val = data_info['role_val'] id2entity = {} for tmpkey in entity2id: id2entity[entity2id[tmpkey]] = tmpkey id2relation = {} for tmpkey in relation2id: id2relation[relation2id[tmpkey]] = tmpkey n_entities = len(entity2id) n_relations = len(relation2id) print("Unique number of relations and head types:", n_relations) print("Unique number of entities and tail types:", n_entities) with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin: data_info1 = pickle.load(fin) whole_train = data_info1["train_facts"] whole_valid = data_info1["valid_facts"] whole_test = data_info1['test_facts'] type2id, id2type = build_type2id_v2(args.indir) type2id["UNK"] = len(type2id) id2type[len(type2id)] = "UNK" unk_type_id = type2id["UNK"] entityName2entityTypes, entityId2entityTypes, entityType2entityNames, entityType2entityIds = build_entity2types_dictionaries(args.indir, entity2id) head2relation2tails = build_head2relation2tails(args.indir, entity2id, relation2id, entityId2entityTypes, args.entitiesEvaluated) print("unique testing heads:", len(head2relation2tails)) typeId2frequency = build_typeId2frequency (args.indir, type2id) headTail2hTypetType, entityId2typeIds_with_sparsifier = build_headTail2hTypetType(args.indir, entity2id, type2id, entityName2entityTypes, args.sparsifier, typeId2frequency, args.buildTypeDictionaries) _, test, _ = add_type_pair_to_fact ([], test, [], headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id) whole_train, whole_test, whole_valid = add_type_pair_to_fact (whole_train, whole_test, whole_valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id) device = "cuda:"+str(args.gpu_ids) type2relationType2frequency = build_type2relationType2frequency(args.indir, args.buildTypeDictionaries) type_head_tail_entity_matrix, tailType_relation_headType_tensor = build_tensor_matrix(args.indir, entity2id, relation2id, entityName2entityTypes, args.topNfilters, type2relationType2frequency, entityId2typeIds_with_sparsifier, type2id, id2type, device, args.entitiesEvaluated) entity2sparsifiedTypes = build_entity2sparsifiedTypes (typeId2frequency, entityId2entityTypes, type2id, args.sparsifier, unk_type_id, id2type) epoch = args.outdir.split("/")[-1].split("_")[2] model = torch.load(args.outdir,map_location=device) t2 = TicToc() t2.tic() print("model.emb_types:", model.emb_types) evaluate_model_v2 (model, test, id2entity, type2relationType2frequency, args.topNfilters, args.atLeast, device, type2id, id2type, type_head_tail_entity_matrix, tailType_relation_headType_tensor, entityName2entityTypes, relation2id, head2relation2tails, id2relation, args.sparsifier, typeId2frequency, entityId2entityTypes, unk_type_id, entity2sparsifiedTypes, args.indir, args.entitiesEvaluated) t2.toc() print("Evaluation last epoch ", epoch, "- running time (seconds):", t2.elapsed) print("END OF SCRIPT!") sys.stdout.flush() else: with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin: data_info = pickle.load(fin) train = data_info["train_facts"] valid = data_info["valid_facts"] test = data_info['test_facts'] relation2id = data_info['roles_indexes'] entity2id = data_info['values_indexes'] key_val = data_info['role_val'] id2entity = {} for tmpkey in entity2id: id2entity[entity2id[tmpkey]] = tmpkey id2relation = {} for tmpkey in relation2id: id2relation[relation2id[tmpkey]] = tmpkey n_entities = len(entity2id) n_relations = len(relation2id) print("Unique number of relations:", n_relations) print("Unique number of entities:", n_entities) with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin: data_info1 = pickle.load(fin) whole_train = data_info1["train_facts"] whole_valid = data_info1["valid_facts"] whole_test = data_info1['test_facts'] mp.set_start_method('spawn') t1 = TicToc() t2 = TicToc() entityName2entityTypes, entityId2entityTypes, entityType2entityNames, entityType2entityIds = build_entity2types_dictionaries(args.indir, entity2id) ## img matrix type2id, id2type = build_type2id_v2(args.indir) unk_type_id = len(type2id) typeId2frequency = build_typeId2frequency (args.indir, type2id) headTail2hTypetType, entityId2typeIds_with_sparsifier = build_headTail2hTypetType(args.indir, entity2id, type2id, entityName2entityTypes, args.sparsifier, typeId2frequency, args.buildTypeDictionaries) train, test, valid = add_type_pair_to_fact (train, test, valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id) whole_train, whole_test, whole_valid = add_type_pair_to_fact (whole_train, whole_test, whole_valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id) n_batches_per_epoch = [] for i in train: ll = len(i) if ll == 0: n_batches_per_epoch.append(0) else: n_batches_per_epoch.append(int((ll - 1) / args.batchsize) + 1) device = "cuda:"+str(args.gpu_ids.split(",")[0]) print("device:", device) if args.load == "preload": model = torch.load(args.modelToBeTrained, map_location=device) starting_epoch = int(args.modelToBeTrained.rsplit('/', 1)[-1].split("_")[7].replace("epoch", "")) + 1 print("Model pre-loaded. The training will start at epoch", starting_epoch) elif args.load == "False": if args.withTypes == "True": model = RETA(len(relation2id), len(entity2id), len(type2id)+1, int(args.embsize), int(args.num_filters)).cuda() elif args.withTypes == "False": model = RETA_NO_TYPES(len(relation2id), len(entity2id), len(type2id)+1, int(args.embsize), int(args.num_filters)).cuda() model.init() starting_epoch = 1 for name, param in model.named_parameters(): if param.requires_grad: print("param:", name, param.size()) opt = torch.optim.Adam(model.parameters(), lr=float(args.learningrate)) for epoch in range(starting_epoch, int(args.epochs)+1): t1.tic() model.train() model.to(device) train_loss = 0 rel = 0 arity2numOfPos = {} arity2numOfNeg = {} for i in range(len(train)): train_i_indexes = np.array(list(train[i].keys())).astype(np.int32) train_i_values = np.array(list(train[i].values())).astype(np.float32) for batch_num in range(n_batches_per_epoch[i]): arity = len(train_i_indexes[0])//2 if arity < 3: print("ERROR: arity < 3") x_batch, y_batch, new_positive_facts_indexes_with_different_arity, new_negative_facts_indexes_with_different_arity = Batch_Loader(train_i_indexes, train_i_values, n_entities, n_relations, key_val, args.batchsize, arity, whole_train[i], id2entity, id2relation, args.num_negative_samples, type2id, args.sparsifier, typeId2frequency, entityId2entityTypes, id2entity, unk_type_id, id2type, entityName2entityTypes) x_by_arities, y_by_arities = sort_new_batch_according_to_arity_2 (new_positive_facts_indexes_with_different_arity, new_negative_facts_indexes_with_different_arity) loss = 0 for j in range (len(x_by_arities)): arity = len(x_by_arities[j][0])//2 if arity < 3: print("ERROR: arity < 3") pred = model(x_by_arities[j], arity, "training", device, id2relation, id2entity) pred = pred * torch.FloatTensor(y_by_arities[j]).cuda(device) * (-1) loss += model.loss(pred).mean() opt.zero_grad() loss.backward() opt.step() train_loss += loss.item() t1.toc() print("End of epoch", epoch, "- train_loss:", train_loss, "- training time (seconds):", t1.elapsed) sys.stdout.flush() print("END OF EPOCHS") #SAVE THE LAST MODEL if args.withTypes == "True": file_name = "RETA_batchSize" + str(args.batchsize) + "_epoch" + str(epoch) + "_embSize" + args.embsize + "_lr" + args.learningrate + "_sparsifier" + str(args.sparsifier) + "_numFilters" + str(args.num_filters) elif args.withTypes == "False": file_name = "RETA_with_NO_types_batchSize" + str(args.batchsize) + "_epoch" + str(epoch) + "_embSize" + args.embsize + "_lr" + args.learningrate + "_sparsifier" + str(args.sparsifier) + "_numFilters" + str(args.num_filters) print("Saving the model trained at epoch", epoch, "in:", args.outdir + '/' + file_name) if not os.path.exists(args.outdir): os.makedirs(args.outdir) torch.save(model, args.outdir + '/' + file_name) print("Model saved") print("END OF SCRIPT!") sys.stdout.flush()
# min_val_precision = 0.6 min_loss_val = 10 min_epoch = 100 for epoch in range(opt.epochs): model.train() start_time = time.time() for batch_i, (_, imgs, targets) in enumerate(dataloader): batches_done = len(dataloader) * epoch + batch_i imgs = Variable(imgs.to(device)) targets = Variable(targets.to(device), requires_grad=False) try: loss, outputs = model(imgs, targets) loss.backward() except RuntimeError as exception: if "out of memory" in str(exception): print("WARNING: out of memory") if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() else: raise exception # # loss, outputs = model(imgs, targets) # loss.backward() if batches_done % opt.gradient_accumulations: # Accumulates gradient before each step optimizer.step()
def train(model, data_loader, device, num_epochs, optimizer=None, lr_scheduler=None) : # 모델을 GPU나 CPU로 옮깁니다 try : os.mkdir(os.path.join('./weights')) except : pass model.to(device) # 옵티마이저(Optimizer)를 만듭니다 if optimizer == None : params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005) # 학습률 스케쥴러를 만듭니다 if lr_scheduler == None : lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=40,gamma=0.1) for epoch in range(num_epochs): print(epoch) model.train() count = 0 for images, targets in data_loader: count += len(images) images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes world_size = 1 if dist.is_available() and dist.is_initialized() : world_size = dist.get_world_size() if world_size >= 2: with torch.no_grad(): names = [] values = [] # sort the keys so that they are consistent across processes for k in sorted(loss_dict.keys()): names.append(k) values.append(loss_dict[k]) values = torch.stack(values, dim=0) dist.all_reduce(values) if average: values /= world_size loss_dict = {k: v for k, v in zip(names, values)} losses_reduced = sum(loss for loss in loss_dict.values()) loss_value = losses_reduced.item() print('epoch {} [{}/{}]loss_classifier : {} loss_box_reg : {} loss_objectness : {} loss_rpn_box_reg : {}'.format( epoch, count, len(data_loader)*data_loader.batch_size, loss_dict['loss_classifier'], loss_dict['loss_box_reg'], loss_dict['loss_objectness'], loss_dict['loss_rpn_box_reg'] )) if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(targets) print(loss_dict) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() save_model('./weights/{}'.format(epoch), model, optimizer, lr_scheduler) lr_scheduler.step()
def train(model, train_dataloader, epoch): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) hidd = model.init_hidden(args.batch_size * 19) hidd_cand = model.init_hidden(args.batch_size * 4) batch = 0 acc_list = [] total_var = 0 it = tqdm(range(len(train_dataloader)), desc="Epoch {}/{}".format(epoch, args.epochs), ncols=0) data_iter = iter(train_dataloader) for niter in it: input_ids, cand_ids, target = data_iter.next() if args.cuda: input_ids = input_ids.cuda() cand_ids = cand_ids.cuda() targets = target.cuda() hidden = repackage_hidden(hidden) hidd = repackage_hidden(hidd) hidd_cand = repackage_hidden(hidd_cand) optimizer.zero_grad() output, result_prob, hidden, rnn_hs, dropped_rnn_hs, cand_emb = model( input_ids, cand_ids, hidden, hidd, hidd_cand) distance_1 = model.distance[0][2] dis_var = torch.var(distance_1) raw_loss = criterion(result_prob, targets) _, predict = result_prob.max(dim=-1) acc = float(torch.sum(predict == targets)) / float(targets.size(0)) acc_list.append(acc) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data total_var += dis_var.data cur_loss = total_loss / len(train_dataloader) cur_var = total_var / len(train_dataloader) elapsed = time.time() - start_time print('| epoch {:3d} | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | acc {:8.4f} | var {:8.4f}'.format( epoch, optimizer.param_groups[0]['lr'], elapsed * 1000 / len(train_dataloader), cur_loss, np.mean(acc_list), cur_var))