Esempio n. 1
0
    def _get_Q(self, model, model_input):
        model.reset_noise()

        if not self.config.use_categorical:
            return model(model_input)

        model_output = model(model_input, ApplySoftmax.NORMAL)

        return torch.sum(model_output * self.support, dim=2)
Esempio n. 2
0
def get_model_info(year):
    '''Takes in a year, and prints out each model, brand_name, and brand
    headquarters for that year using only ONE database query.'''
# Query here
#SELECT model, brand_name, headquarters
	years = Model.query.get(year)

	by_year = db.session.query(Model.name, Model.brand_name, Brand.headquarters).all()

	for year in year:
		print out each model (Model) 
				brand_name (foreign key from Brand to Model)
				headquarters (Brand)
				.all()
Esempio n. 3
0
def dev(model, dev_loader, decoder, logger):
    model.eval()
    total_cer = 0
    total_tokens = 0

    for data in dev_loader:
        inputs, targets, input_sizes, input_sizes_list, target_sizes =data
        batch_size = inputs.size(1)
        inputs = inputs.transpose(0, 1)
        
        inputs = Variable(inputs, volatile=True, requires_grad=False)

        if USE_CUDA:
            inputs = inputs.cuda()

        inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list)
        probs = model(inputs)
        
        probs = probs.data.cpu()
        if decoder.space_idx == -1:
            total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[1]
        else:
            total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[0]
        total_tokens += sum(target_sizes)
    acc = 1 - float(total_cer) / total_tokens
    return acc*100
Esempio n. 4
0
def get_stats():

    if 'url' in request.form :

        api_model = model()
        return api_model.get_url_stats(request.form['url'])
    return jsonify({'message': errors.HACK, 'response': {}, 'status': '0'})
Esempio n. 5
0
def model(model_name, decorator=[]):
    assert isinstance(model_name, (str, unicode))
    cache_key = model_name
    assert not decorator or config.IS_TEST, 'decorator仅能用于测试环境'

    if not decorator and CACHED_MODELS.has_key(cache_key):
        return CACHED_MODELS[cache_key]
    else:
        # 此import语句不能放到model函数外面去
        # 否则会与model中的import site_helper形成互相依赖, 导致死循环
        import model
        import modeldecorator 
        try:
            for name in model_name.split('.'):
                assert(hasattr(model, name))
                model = getattr(model, name)
            model = model()
        except:
            print 'the name is', name
            print 'the model name is', model_name
            raise
        # 仅在非测试时使用model.decorator
        decorator = model.decorator if not config.IS_TEST else decorator
        # 测试时强行使用test_decorator
        if config.IS_TEST and hasattr(model, 'test_decorator'):
            assert decorator == [], u'使用test_decorator时,不再允许指定decorator'
            decorator = model.test_decorator
        # 装饰decorator
        for d,arguments in decorator:
            model = getattr(modeldecorator, d)(model, arguments)
        if not decorator:
            CACHED_MODELS[cache_key] = model
        return model
Esempio n. 6
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch, len(train_data) // args.bptt, lr,
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Esempio n. 7
0
    def get_orders(self, sql, model=model.Model_OrderSub):
        results = self.get_all(sql)
        model_list = []

        if not results:
            return []
        for row in results:
            o_model = model()
            o_model.order_id = str(row[0])
            o_model.uid = str(row[1])
            o_model.account = str(row[2])
            o_model.p_info = str(row[3])
            o_model.depart_date = str(row[4])
            o_model.train_no = str(row[5])
            o_model.depart_name = str(row[6])
            o_model.arrive_name = str(row[7])
            o_model.name = str(row[8])
            o_model.card_type = str(row[9])
            o_model.card_no = str(row[10])
            o_model.phone = str(row[11])
            o_model.seat_name = str(row[12])
            o_model.ticket_type = str(row[13])
            o_model.status = str(row[14])
            o_model.price = str(row[15])
            o_model.create_time = str(row[16])

            model_list.append(o_model)
        return model_list
Esempio n. 8
0
def test(model, quesfeaShu, labelShu, lengthShu):

    model.eval()

    idx = sorted(range(len(lengthShu)), key=lambda x: lengthShu[x], reverse=True)

    _quesfeaShu = []
    _labelShu = []
    _lengthShu = []

    for j in range(len(idx)):
        _quesfeaShu.append(quesfeaShu[idx[j]])
        _labelShu.append(labelShu[idx[j]])
        _lengthShu.append(lengthShu[idx[j]])

    questrainarray = np.asarray(_quesfeaShu)
    labeltrainarray = np.asarray(_labelShu)
    lengthtrainarray = np.asarray(_lengthShu)

    tmp = [questrainarray, labeltrainarray, lengthtrainarray]
    tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp]
    trques, trlabel, length = tmp
    if args.cuda:
        trlabel.cuda()
    output = model(trques, length)
    # st(context=27)
    print("precesion 1 : %s" % accuracy(output.data, trlabel.data, topk=(1,), ori_label=labeltrainarray))
Esempio n. 9
0
def get_or_create(session, model, **kwargs):
    instance = session.query(model).filter_by(**kwargs).first()
    if instance:
        return instance, False
    else:
        instance = model(**kwargs)
        session.add(instance)
        return instance, True
Esempio n. 10
0
def setSiteConfig(name, value):
    conf_model = model('SiteConfig')
    assert(name.strip())
    exists = conf_model.getOneByWhere('name=%s', [name])
    if exists:
        conf_model.update(exists.id, dict(value=str(value)))
        return exists.id
    else:
        return conf_model.insert(dict(name=name, value=str(value)))
Esempio n. 11
0
	def initialization(self):

		self.link = os.popen('echo $CONV_ROOT').read()
		self.link = self.link[:-1]
		print(self.link)

		P = model()
		P.initialization()
		P.modelAdaptation()
Esempio n. 12
0
def de(model, baseline_min,baseline_max, max = 100, f = 0.75, cf = 0.3, epsilon = 0.01):
	curr_candidate_sol = model()
	# print "FROM DE-->", curr_candidate_sol
	np = curr_candidate_sol.numOfDec * 10
	frontier = [candidate(curr_candidate_sol) for _ in xrange(np)]

	# for x in frontier:
	# 	print "id:", x.id, " have:", x.have, " score:", x.score

	# print "length of frontier:", len(frontier)

	# Pending : should you use else if here?

	for each_thing in frontier:
		if(each_thing.score < 0):
			BaseLine.baseline_min = 0
			print "--------"
		if(each_thing.score < BaseLine.baseline_min):
			BaseLine.baseline_min = each_thing.score
			print "--------------"
		if(each_thing.score > BaseLine.baseline_max):
			BaseLine.baseline_max = each_thing.score
			print "---------"



	#Normalize the scores of each thing now

	# for each_thing in frontier:
	# 	prev_each_thing_score = each_thing.score
	# 	each_thing.score = float(each_thing.score - BaseLine.baseline_min)/(BaseLine.baseline_max - BaseLine.baseline_min)
	
	#total = total score of all the candidates found so far
	for k in xrange(max):
		total,n = update(f,cf,frontier,curr_candidate_sol,BaseLine.baseline_min,BaseLine.baseline_max)
		# print "BASELINE: MIN=", BaseLine.baseline_min," MAX=", BaseLine.baseline_max
		# if total/n > (1 - epsilon):
		# 	print "break: value of k=", k, " total=",total, "n=",n 
		# 	break
	# for x in frontier:
	# 	print "print --x:",x.id," ",x.have, x.score

	#Now baseline everything again 

	for each_thing in frontier:
		each_thing.score = (each_thing.score - BaseLine.baseline_min) / ( BaseLine.baseline_max - BaseLine.baseline_min + 0.001)

	score_have_dict = { obj.score:obj.have for obj in frontier}
	print "==================="
	# for key in sorted(score_have_dict.keys(),reverse = True):
 #  		print "%s: %s" % (key, score_have_dict[key])

	print "BASELINE: MIN=", BaseLine.baseline_min," MAX=", BaseLine.baseline_max
  	sorted_keys = sorted(score_have_dict.keys(),reverse = True)
  	print "%s: %s" % (sorted_keys[0], score_have_dict[sorted_keys[0]])
	
	return frontier
Esempio n. 13
0
	def getOrCreate(self, model, defaults=None, **kwargs):
		instance = self.session.query(model).filter_by(**kwargs).first()

		if instance:
			return instance	#, False
		else:
			params = dict((k, v) for k, v in kwargs.iteritems() if not isinstance(v, ClauseElement))
			instance = model(**params)
			self.session.add(instance)
			return instance	#, True
Esempio n. 14
0
def setSiteConfig(name, value):
    conf_model = model('SiteConfig')
    assert(name.strip())
    exists = conf_model.getOneByWhere('name=%s', name)
    if isinstance(value, unicode): value = unicodeToStr(value)
    if exists:
        conf_model.update(exists.id, dict(value=str(value)))
        return exists.id
    else:
        return conf_model.insert(dict(name=name, value=str(value)))
Esempio n. 15
0
def evaluate(data_source, batch_size=10, window=args.window):
    # Turn on evaluation mode which disables dropout.
    if args.model == 'QRNN': model.reset()
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    next_word_history = None
    pointer_history = None
    for i in range(0, data_source.size(0) - 1, args.bptt):
        if i > 0: print(i, len(data_source), math.exp(total_loss / i))
        data, targets = get_batch(data_source, i, evaluation=True, args=args)
        output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
        rnn_out = rnn_outs[-1].squeeze()
        output_flat = output.view(-1, ntokens)
        ###
        # Fill pointer history
        start_idx = len(next_word_history) if next_word_history is not None else 0
        next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
        #print(next_word_history)
        pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
        #print(pointer_history)
        ###
        # Built-in cross entropy
        # total_loss += len(data) * criterion(output_flat, targets).data[0]
        ###
        # Manual cross entropy
        # softmax_output_flat = torch.nn.functional.softmax(output_flat)
        # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
        # entropy = -torch.log(soft)
        # total_loss += len(data) * entropy.mean().data[0]
        ###
        # Pointer manual cross entropy
        loss = 0
        softmax_output_flat = torch.nn.functional.softmax(output_flat)
        for idx, vocab_loss in enumerate(softmax_output_flat):
            p = vocab_loss
            if start_idx + idx > window:
                valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
                valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
                logits = torch.mv(valid_pointer_history, rnn_out[idx])
                theta = args.theta
                ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
                ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
                lambdah = args.lambdasm
                p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
            ###
            target_loss = p[targets[idx].data]
            loss += (-torch.log(target_loss)).data[0]
        total_loss += loss / batch_size
        ###
        hidden = repackage_hidden(hidden)
        next_word_history = next_word_history[-window:]
        pointer_history = pointer_history[-window:]
    return total_loss / len(data_source)
Esempio n. 16
0
def eval(data_source):
    total_L = 0.0
    ntotal = 0
    hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context)
    for i in range(0, data_source.shape[0] - 1, args.bptt):
        data, target = get_batch(data_source, i)
        output, hidden = model(data, hidden)
        L = loss(output, target)
        total_L += mx.nd.sum(L).asscalar()
        ntotal += L.size
    return total_L / ntotal
Esempio n. 17
0
    def get_orders(self, sql, model=model.Model_Order):
        results = self.get_all(sql)
        model_list = []

        if not results:
            return []
        for row in results:
            o_model = model()
            o_model.setVale(str(row[0]), str(row[1]), str(row[2]), str(row[3]), str(row[4]), str(row[5]), str(row[6]), str(row[7]), str(row[8]), str(row[9]), str(row[10]), str(row[11]))
            model_list.append(o_model)
        return model_list
Esempio n. 18
0
def valid(epoch, quesfeaShu, labelShu, lengthShu):
    losses = AverageMeter()
    top1 = AverageMeter()
    model.eval()

    start_time = time.time()
    for i in range(0, len(quesfeaShu) / args.batch_size):
        if i == len(quesfeaShu) / args.batch_size - 1:
            batchend = len(quesfeaShu)
        else:
            batchend = (i + 1) * (args.batch_size)
        # print batchend
        batchstart = i * (args.batch_size)
        batch_size = batchend - batchstart
        quesfeabatch = []
        labelbatch = []
        lengthbatch = []
        quesfeaOri = quesfeaShu[batchstart:batchend]
        labelOri = labelShu[batchstart:batchend]
        lengthOri = lengthShu[batchstart:batchend]
        idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True)
        for j in range(len(idxbatch)):
            quesfeabatch.append(quesfeaOri[idxbatch[j]])
            labelbatch.append(labelOri[idxbatch[j]])
            lengthbatch.append(lengthOri[idxbatch[j]])

        questrainarray = np.asarray(quesfeabatch)
        labeltrainarray = np.asarray(labelbatch)
        lengthtrainarray = np.asarray(lengthbatch)

        tmp = [questrainarray, labeltrainarray, lengthtrainarray]
        tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp]
        trques, trlabel, length = tmp
        if args.cuda:
            trlabel.cuda()
        output = model(trques, length)
        # print output
        loss = criterion(output, trlabel) / (batch_size)
        prec1, = accuracy(output.data, trlabel.data, topk=(1,), ori_label=labeltrainarray)
        # label 0 or 1
        losses.update(loss.data[0], batch_size)
        top1.update(prec1[0], batch_size)

        # loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i)
    # update better performance model
    global best_score
    if top1.avg > best_score:
        torch.save(model, args.save)
        print 'save model'
        best_score = top1.avg
    print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch)
Esempio n. 19
0
	def pairingStep(self,N,basis_trunc):

		########################GENERER COUPLES
		X = pairs()
		X.initialization()

		#A liste qui contient tous les couples de chaque tour a la suite
		A = X.generatePairs(N)

		#Calcul nombre de tours
		if N%2==0:
			tour = N-1

		else:
			tour = N

		print 'tours : ', tour

		#Execution de chaque tour de danse
		for i in range(1,tour+1):
			print 'tour', i

			#On recupere les couples du tour concerne
			B = X.getPairs(N,A,i)

			########################ECRITURE MODELE.INP
			Y = model()
			Y.initialization()
		 	Y.modesWriting(basis_trunc, B)

			########################CONVIV
			self.pairingConvivExecution()

		########################ZPE.OUT TRIE
		os.system('sort -k6 ' + self.link + '/src/ui/python/work/zpe.out > ' + self.link + '/src/ui/python/work/zpe_trie.out')

		#Suppression zpe.out sinon les energies des contractions optimales vont etre ecrites a la suite des paires precedentes
#		os.system('rm ' + self.link + '/src/ui/python/work/zpe.out')

		########################CONTRACTIONS OPTIMALES

		#C liste qui contient les contractions optimales
		C = X.getOptimalContractions(N)

		#Ecriture modele.inp
		Y.contractionsModelWriting()
		Y.modesWriting(basis_trunc, C)

		self.pairingConvivExecution()

		os.system('sort -k6 ' + self.link + '/src/ui/python/work/zpe.out > ' + self.link + '/src/ui/python/work/zpe_trie.out' + str(self.getStep()))
		os.system('rm ' + self.link + '/src/ui/python/work/zpe.out')
		os.system('rm ' + self.link + '/src/ui/python/work/zpe_trie.out')
		os.system('cp ' + self.link + '/src/ui/python/work/out ' + self.link + '/src/ui/python/work/out' + str(self.getStep()))
Esempio n. 20
0
def get_min_max(model):
	min = 999999
	max = -999999
	for x in xrange(2000):
		temp_candidate_sol = model()
		temp_score = score(temp_candidate_sol)
		if(temp_score > max):
			max = temp_score
		if(temp_score < min):
			min = temp_score
	return (min,max)
Esempio n. 21
0
def train(model, train_loader, loss_fn, optimizer, logger, print_every=20, USE_CUDA=True):
    '''训练一个epoch,即将整个训练集跑一次
    Args:
        model         :  定义的网络模型
        train_loader  :  加载训练集的类对象
        loss_fn       :  损失函数,此处为CTCLoss
        optimizer     :  优化器类对象
        logger        :  日志类对象
        print_every   :  每20个batch打印一次loss
        USE_CUDA      :  是否使用GPU
    Returns:
        average_loss  :  一个epoch的平均loss
    '''
    model.train()
    
    total_loss = 0
    print_loss = 0
    i = 0
    for data in train_loader:
        inputs, targets, input_sizes, input_sizes_list, target_sizes = data
        batch_size = inputs.size(0)
        inputs = inputs.transpose(0, 1)
        
        inputs = Variable(inputs, requires_grad=False)
        input_sizes = Variable(input_sizes, requires_grad=False)
        targets = Variable(targets, requires_grad=False)
        target_sizes = Variable(target_sizes, requires_grad=False)

        if USE_CUDA:
            inputs = inputs.cuda()
        
        inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list)
        
        out = model(inputs)
        loss = loss_fn(out, targets, input_sizes, target_sizes)
        loss /= batch_size
        print_loss += loss.data[0]

        if (i + 1) % print_every == 0:
            print('batch = %d, loss = %.4f' % (i+1, print_loss / print_every))
            logger.debug('batch = %d, loss = %.4f' % (i+1, print_loss / print_every))
            print_loss = 0
        
        total_loss += loss.data[0]
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), 400)
        optimizer.step()
        i += 1
    average_loss = total_loss / i
    print("Epoch done, average loss: %.4f" % average_loss)
    logger.info("Epoch done, average loss: %.4f" % average_loss)
    return average_loss
Esempio n. 22
0
def train(epoch, optimizer, quesfeaShu, labelShu, lengthShu):
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()

    for i in range(0, len(quesfeaShu) / args.batch_size):
        if i == len(quesfeaShu) / args.batch_size - 1:
            batchend = len(quesfeaShu)
        else:
            batchend = (i + 1) * (args.batch_size)
        batchstart = i * (args.batch_size)
        batch_size = batchend - batchstart

        quesfeabatch = []
        labelbatch = []
        lengthbatch = []

        quesfeaOri = quesfeaShu[batchstart:batchend]
        labelOri = labelShu[batchstart:batchend]
        lengthOri = lengthShu[batchstart:batchend]

        idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True)
        for j in range(len(idxbatch)):
            quesfeabatch.append(quesfeaOri[idxbatch[j]])
            labelbatch.append(labelOri[idxbatch[j]])
            lengthbatch.append(lengthOri[idxbatch[j]])

        questrainarray = np.asarray(quesfeabatch)
        labeltrainarray = np.asarray(labelbatch)
        lengthtrainarray = np.asarray(lengthbatch)

        tmp = [questrainarray, labeltrainarray, lengthtrainarray]
        tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp]
        trques, trlabel, length = tmp
        if args.cuda:
            trlabel.cuda()

        output = model(trques, length)
        loss = criterion(output, trlabel) / (batch_size)
        prec1, = accuracy(output.data, trlabel.data, topk=(1,))

        losses.update(loss.data[0], batch_size)
        top1.update(prec1[0], batch_size)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        print str(top1.avg) + ' ' + str(top1.val) + ' ' + str(loss.data[0]) + ' ' + 'batch ' + str(i)
    print str(top1.avg) + ' ' + str(top1.val) + ' ' + str(loss.data[0]) + ' ' + 'epoch ' + str(epoch)
Esempio n. 23
0
def train():
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    batch, i = 0, 0
    while i < train_data.size(0) - 1 - 1:
        bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
        # Prevent excessively small or negative sequence lengths
        seq_len = max(5, int(np.random.normal(bptt, 5)))
        # There's a very small chance that it could select a very long sequence length resulting in OOM
        # seq_len = min(seq_len, args.bptt + 10)

        lr2 = optimizer.param_groups[0]['lr']
        optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
        model.train()
        data, targets = get_batch(train_data, i, args, seq_len=seq_len)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optimizer.zero_grad()

        output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
        raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        optimizer.param_groups[0]['lr'] = lr2
        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
                    'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
                epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
                elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
            total_loss = 0
            start_time = time.time()
        ###
        batch += 1
        i += seq_len
Esempio n. 24
0
def eval(data_source):
    total_L = 0.0
    ntotal = 0
    hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context)
    for i, (data, target) in enumerate(data_source):
        data = data.as_in_context(context).T
        target = target.as_in_context(context).T.reshape((-1, 1))
        output, hidden = model(data, hidden)
        L = loss(output, target)
        total_L += mx.nd.sum(L).asscalar()
        ntotal += L.size
    return total_L / ntotal
Esempio n. 25
0
def dev(model, dev_loader, loss_fn, decoder, logger, USE_CUDA=True):
    '''验证集的计算过程,与train()不同的是不需要反向传播过程,并且需要计算字符正确率
    Args:
        model       :   模型
        dev_loader  :   加载验证集的类对象
        loss_fn     :   损失函数
        decoder     :   解码类对象,即将网络的输出解码成文本
        logger      :   日志类对象
        USE_CUDA    :   是否使用GPU
    Returns:
        acc * 100    :   字符正确率,如果space不是一个标签的话,则为词正确率
        average_loss :   验证集的平均loss
    '''
    model.eval()
    total_cer = 0
    total_tokens = 0
    total_loss = 0
    i = 0

    for data in dev_loader:
        inputs, targets, input_sizes, input_sizes_list, target_sizes = data
        batch_size = inputs.size(0)
        inputs = inputs.transpose(0, 1)

        inputs = Variable(inputs, requires_grad=False)
        input_sizes = Variable(input_sizes, requires_grad=False)
        targets = Variable(targets, requires_grad=False)
        target_sizes = Variable(target_sizes, requires_grad=False)

        if USE_CUDA:
            inputs = inputs.cuda()
        
        inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_sizes_list)
        out, probs = model(inputs, dev=True)
        
        loss = loss_fn(out, targets, input_sizes, target_sizes)
        loss /= batch_size
        total_loss += loss.data[0]
        
        probs = probs.data.cpu()
        targets = targets.data
        target_sizes = target_sizes.data

        if decoder.space_idx == -1:
            total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[1]
        else:
            total_cer += decoder.phone_word_error(probs, input_sizes_list, targets, target_sizes)[0]
        total_tokens += sum(target_sizes)
        i += 1
    acc = 1 - float(total_cer) / total_tokens
    average_loss = total_loss / i
    return acc * 100, average_loss
Esempio n. 26
0
File: load.py Progetto: dmalmer/EMG
	def add_models(self, line_array):
		mus 	= [float(m) for m in line_array[3].split(",")]
		sis 	= [float(m) for m in line_array[4].split(",")]
		lams 	= [float(m) for m in line_array[5].split(",")]
		pis 	= [float(m) for m in line_array[6].split(",")]
		wsEMG 	= [float(m) for m in line_array[7].split(",")]
		wsF 	= [float(m) for m in line_array[8].split(",")]
		wsR 	= [float(m) for m in line_array[9].split(",")]
		bF 		= [float(m) for m in line_array[10].split(",")]
		bR 		= [float(m) for m in line_array[11].split(",")]
		for i, mu in enumerate(mus):
			self.models.append(model(mu, sis[i], lams[i], pis[i], wsEMG[i], wsF[i], wsR[i], bF[i], bR[i] ))
		self.OKAY 	= bool(sum([1 for m in self.models if not m.OKAY  ]) == 0)
Esempio n. 27
0
def evaluate(data_source):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(eval_batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, evaluation=True)
        output, hidden = model(data, hidden)
        output_flat = output.view(-1, ntokens)
        total_loss += len(data) * criterion(output_flat, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss[0] / len(data_source)
Esempio n. 28
0
def evaluate(data_source, batch_size=10):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    if args.model == 'QRNN': model.reset()
    total_loss = 0
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(batch_size)
    for i in range(0, data_source.size(0) - 1, args.bptt):
        data, targets = get_batch(data_source, i, args, evaluation=True)
        output, hidden = model(data, hidden)
        total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
        hidden = repackage_hidden(hidden)
    return total_loss.item() / len(data_source)
Esempio n. 29
0
    def get_orders(self, sql, model=model.Model_Order):
        results = self.get_all(sql)
        model_list = []

        if not results:
            return []
        for row in results:
            value_list = []
            for value in row:
                value_list.append(value)
            o_model = model()
            o_model.setValue(value_list)
            model_list.append(o_model)
        return model_list
Esempio n. 30
0
def valid(epoch, quesfeaShu, labelShu, lengthShu):
    top1 = AverageMeter()

    model.eval()

    for i in range(0, len(quesfeaShu) / args.batch_size):
        if i == len(quesfeaShu) / args.batch_size - 1:
            batchend = len(quesfeaShu)
        else:
            batchend = (i + 1) * (args.batch_size)
        # print batchend
        batchstart = i * (args.batch_size)
        batch_size = batchend - batchstart

        quesfeabatch = []
        labelbatch = []
        lengthbatch = []

        quesfeaOri = quesfeaShu[batchstart:batchend]
        labelOri = labelShu[batchstart:batchend]
        lengthOri = lengthShu[batchstart:batchend]
        idxbatch = sorted(range(len(lengthOri)), key=lambda x: lengthOri[x], reverse=True)
        for j in range(len(idxbatch)):
            quesfeabatch.append(quesfeaOri[idxbatch[j]])
            labelbatch.append(labelOri[idxbatch[j]])
            lengthbatch.append(lengthOri[idxbatch[j]])

        questrainarray = np.asarray(quesfeabatch)
        labeltrainarray = np.asarray(labelbatch)
        lengthtrainarray = np.asarray(lengthbatch)

        tmp = [questrainarray, labeltrainarray, lengthtrainarray]
        tmp = [Variable(torch.from_numpy(_), requires_grad=False) for _ in tmp]
        trques, trlabel, length = tmp
        if args.cuda:
            trlabel.cuda()

        output = model(trques, length)
        loss = criterion(output, trlabel) / (batch_size)

        prec1, = accuracy(output.data, trlabel.data, topk=(1,))
        top1.update(prec1[0], batch_size)
        print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'batch_valid ' + str(i)
    global best_score
    if top1.avg > best_score:
        torch.save(model, args.save)
        print 'save model'
        best_score = top1.avg
    print str(top1.avg) + ' ' + str(loss.data[0]) + ' ' + 'epoch_valid ' + str(epoch)
Esempio n. 31
0
            pad_paths(paths_train[batch_start:batch_end], max_paths,
                      max_edges)).to(device)
        counts = torch.DoubleTensor(
            pad_counts(counts_train[batch_start:batch_end],
                       max_paths)).to(device)
        edgecounts = torch.LongTensor(
            pad_edgecounts(num_edges_all[batch_start:batch_end],
                           max_paths)).to(device)
        targets = torch.LongTensor(
            targets_train[batch_start:batch_end]).to(device)

        # Backprop and perform Adam optimisation
        optimizer.zero_grad()

        # Run the forward pass
        outputs = model(nodes, paths, counts, edgecounts, max_paths, max_edges)

        loss = criterion(outputs, targets)

        loss.backward()
        optimizer.step()

        all_losses.append(loss.item())

    print("Epoch: {}/{} Mean Loss: {}".format(epoch, num_epochs,
                                              np.mean(all_losses)))

print("Training Complete!")

model_dict = model.state_dict()
model_dict = {
Esempio n. 32
0
                        shape=[FLAGS.batch_size, FLAGS.noise_size],
                        name='z1')
    z2 = tf.placeholder(tf.float32,
                        shape=[FLAGS.batch_size, FLAGS.noise_size],
                        name='z2')

    with tf.variable_scope('embedding', reuse=False):
        embeding_matrix = tf.Variable(tf.random_normal(
            [FLAGS.class_num, FLAGS.noise_size], stddev=0.35),
                                      trainable=True)
        onehot = tf.one_hot(input_label, FLAGS.class_num)
        label_embeding = tf.matmul(onehot, embeding_matrix)
        embedded_z1 = tf.multiply(z1, label_embeding)
        embedded_z2 = tf.multiply(z2, label_embeding)

    Net = model(embedded_z1, embedded_z2, input_spe, input_spa, input_label,
                FLAGS)
    # Add scalar summary
    tf.summary.scalar('discriminator_loss', Net.discrim_loss)
    # tf.summary.scalar('spectral_loss', Net.spectral_loss)
    tf.summary.scalar('learning_rate_dis', Net.learning_rate_dis)
    tf.summary.scalar('learning_rate_gen', Net.learning_rate_gen)
    tf.summary.scalar('gen_loss', Net.gen_loss)

    print('Finish building the network!!!')
    # Define the saver and weight initiallizer
    saver = tf.train.Saver(max_to_keep=10)

    # Start the session
    config = tf.ConfigProto(log_device_placement=True)
    config.gpu_options.allow_growth = True
Esempio n. 33
0
def test(
    args
):  # Load a trained model that you have fine-tuned (we assume evaluate on cpu)
    processor = data_utils.ABSAProcessor()
    label_list = processor.get_labels(args.task_type)
    label_list_map = dict(zip([i for i in range(len(label_list))], label_list))
    tokenizer = ABSATokenizer.from_pretrained(
        modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
    eval_examples = processor.get_test_examples(args.data_dir, args.task_type)
    eval_features = data_utils.convert_examples_to_features(
        eval_examples, label_list, args.max_seq_length, tokenizer)

    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_examples))
    logger.info("  Batch size = %d", args.eval_batch_size)
    all_input_ids = torch.tensor([f.input_ids for f in eval_features],
                                 dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features],
                                   dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features],
                                  dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features],
                                 dtype=torch.long)
    # all_tag_ids = torch.tensor([f.tag_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask,
                              all_label_ids)
    # Run prediction for full data and get a prediction file
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    model = ABSABert.from_pretrained(
        modelconfig.MODEL_ARCHIVE_MAP[args.bert_model],
        num_labels=len(label_list))
    model.load_state_dict(torch.load(os.path.join(args.output_dir,
                                                  "model.pt")))
    model.cuda()
    model.eval()

    preds = None
    out_label_ids = None
    all_mask = []
    for step, batch in enumerate(eval_dataloader):
        batch = tuple(t.cuda() for t in batch)
        input_ids, segment_ids, input_mask, label_ids = batch

        with torch.no_grad():
            logits = model(input_ids,
                           token_type_ids=segment_ids,
                           attention_mask=input_mask)

        all_mask.append(input_mask)
        logits = [[np.argmax(i) for i in l.detach().cpu().numpy()]
                  for l in logits]
        if preds is None:
            if type(logits) == list:
                preds = logits
            else:
                preds = logits.detach().cpu().numpy()
            out_label_ids = label_ids.detach().cpu().numpy()
        else:
            if type(logits) == list:
                preds = np.append(preds, np.asarray(logits), axis=0)
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids,
                                      label_ids.detach().cpu().numpy(),
                                      axis=0)

    out_label_ids = out_label_ids.tolist()
    preds = preds.tolist()

    all_mask = torch.cat(all_mask, dim=0)
    all_mask = all_mask.tolist()

    # get rid of padding
    new_label_ids, new_preds = [], []
    for i in range(len(all_mask)):
        l = sum(all_mask[i])
        new_preds.append(preds[i][:l])
        new_label_ids.append(out_label_ids[i][:l])
    new_label_ids = [t[1:-1] for t in new_label_ids]
    new_preds = [t[1:-1] for t in new_preds]
    preds, out_label_ids = new_preds, new_label_ids

    output_eval_json = os.path.join(args.output_dir, "predictions.json")
    with open(output_eval_json, "w") as fw:
        assert len(preds) == len(eval_examples)
        recs = {}
        for qx, ex in enumerate(eval_examples):
            recs[int(ex.guid.split("-")[1])] = {
                "sentence": ex.text_a,
                "idx_map": ex.idx_map,
                "logit": preds[qx]
            }  # skip the [CLS] tag.

        raw_X = [
            recs[qx]["sentence"] for qx in range(len(eval_examples))
            if qx in recs
        ]
        idx_map = [
            recs[qx]["idx_map"] for qx in range(len(eval_examples))
            if qx in recs
        ]

        for i in range(len(preds)):
            assert len(preds[i]) == len(out_label_ids[i]), print(
                len(preds[i]), len(out_label_ids[i]), idx_map[i])

        tokens_list = []
        for text_a in raw_X:
            tokens_a = []
            for t in [token.lower() for token in text_a]:
                tokens_a.extend(tokenizer.wordpiece_tokenizer.tokenize(t))
            tokens_list.append(tokens_a[:args.max_seq_length - 2])

        pre = [
            ' '.join([
                label_list_map.get(p, -1) for p in l[:args.max_seq_length - 2]
            ]) for l in preds
        ]
        true = [
            ' '.join([
                label_list_map.get(p, -1) for p in l[:args.max_seq_length - 2]
            ]) for l in out_label_ids
        ]

        for i in range(len(true)):
            assert len(tokens_list[i]) == len(true[i].split()), print(
                len(tokens_list[i]), len(true[i].split()), tokens_list[i],
                true[i])

        lines = [
            ' '.join([str(t) for t in tokens_list[i]]) + '***' + pre[i] +
            '***' + true[i] for i in range(len(pre))
        ]
        with open(os.path.join(args.output_dir, 'pre.txt'), 'w') as fp:
            fp.write('\n'.join(lines))

        logger.info("Train data from: {}".format(args.data_dir))
        logger.info("Out dir: {}".format(args.output_dir))

        if args.task_type == 'ae':
            eval_result(args.output_dir)
        else:
            eval_ts_result(args.output_dir)
Esempio n. 34
0
def train_mdnet():

    ## Init dataset ##
    with open(data_path, 'rb') as fp:
        data = pickle.load(fp)

    K = len(data)
    dataset = [None] * K
    for k, (seqname, seq) in enumerate(data.items()):
        img_list = seq['images']
        gt = seq['gt']
        img_dir = os.path.join(img_home, seqname)
        dataset[k] = RegionDataset(img_dir, img_list, gt, opts)

    ## Init model ##
    model = MDNet(opts['init_model_path'], K)
    if opts['use_gpu']:
        model = model.cuda()
    model.set_learnable_params(opts['ft_layers'])

    ## Init criterion and optimizer ##
    criterion = BinaryLoss()
    evaluator = Precision()
    optimizer = set_optimizer(model, opts['lr'])

    best_prec = 0.
    for i in range(opts['n_cycles']):
        print("==== Start Cycle %d ====" % (i))
        k_list = np.random.permutation(K)
        prec = np.zeros(K)
        for j, k in enumerate(k_list):
            tic = time.time()
            pos_regions, neg_regions = next(dataset[k])

            pos_regions = Variable(pos_regions)
            neg_regions = Variable(neg_regions)

            if opts['use_gpu']:
                pos_regions = pos_regions.cuda()
                neg_regions = neg_regions.cuda()

            pos_score = model(pos_regions, k)
            neg_score = model(neg_regions, k)

            loss = criterion(pos_score, neg_score)
            model.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          opts['grad_clip'])
            optimizer.step()

            prec[k] = evaluator(pos_score, neg_score)

            toc = time.time() - tic
            print("Cycle %2d, K %2d (%2d), Loss %.3f, Prec %.3f, Time %.3f" % \
                    (i, j, k, loss.data[0], prec[k], toc))

        cur_prec = prec.mean()
        print("Mean Precision: %.3f" % (cur_prec))
        if cur_prec > best_prec:
            best_prec = cur_prec
            if opts['use_gpu']:
                model = model.cpu()
            states = {'shared_layers': model.layers.state_dict()}
            print("Save model to %s" % opts['model_path'])
            torch.save(states, opts['model_path'])
            if opts['use_gpu']:
                model = model.cuda()
data_loader = torch.utils.data.DataLoader(dst, batch_size=2, shuffle=True)

model = R2U_Net(img_ch=3, output_ch=34, t=2)
model.load_state_dict(torch.load('epochs-4.pt'))
model.eval()
model.cuda()

torch.manual_seed(10)
val = iter(data_loader)
for e in range(10):
    fig = plt.figure(figsize=(10, 10))
    image, mask = next(val)
    image = image.cuda()
    mask = mask.cuda().detach().cpu()
    preds = model(image)
    preds = preds.detach().cpu()
    image = image.cpu()
    fig1 = fig.add_subplot(131)
    plt.imshow(image[0].transpose(0, 2).transpose(0, 1).numpy())
    fig1.title.set_text("Image")
    fig1.axis("off")
    fig2 = fig.add_subplot(132)
    plt.imshow(mask[0].transpose(0, 2).transpose(0, 1).numpy())
    fig2.title.set_text("Ground_Truth")
    fig2.axis("off")
    fig3 = fig.add_subplot(133)
    plt.imshow(preds.argmax(1)[0].numpy())
    fig3.title.set_text("Prediction")
    fig3.axis("off")
    plt.show()
Esempio n. 36
0
    def check_condition(self, period):
        model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\
                                self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\
                                self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn)
        model = model.to(self.args.device)

        if self.args.load_model:
            model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt'))

        if self.args.data_dir == './data/ml-1m':
            dataloader = ItemRatingLoader(self.args.data_dir)
        elif self.args.data_dir == './data/amazon':
            dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0])
        # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_test_tr.csv'), os.path.join(self.args.data_dir, 'fixed2_test_te.csv'))
        tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'test_tr.csv'), os.path.join(self.args.data_dir, 'test_te.csv'))
        N = tr_data_rating.shape[0]
        idxlist = np.array(range(N))
        # print(N)
        np.random.seed(98764)
        idx_pe = np.random.permutation(len(idxlist))

        idxlist = idxlist[idx_pe]
        # print(idxlist[:self.args.batch_size])
        # if self.args.condition:  
        valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'test', self.args.batch_size, idx_pe)
        with torch.no_grad():
            for i, st_idx in enumerate(range(0, N, self.args.batch_size)):
                order, item_feature, label = next(valid_data_item)
                end_idx = min(st_idx + self.args.batch_size, N)
                x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]]
                X_tr = x_tr_unorder[order]
                x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]]
                X_te = x_te_unorder[order]
                # print(label.item())
                if not label.item() == period:
                    continue
                else:
                    if sparse.isspmatrix(X_tr):
                        X_tr = X_tr.toarray()
                    X_tr = X_tr.astype('float32')
                    for k in range(self.args.class_num):
                        hidden = "h_{}".format(k+1)
                        self.hidden_vecs[hidden]= torch.load(f"{self.args.hiddenvec_dir}/{hidden}.pt")
                    hs = {}
                    for _ in range(self.args.batch_size):
                        for k in range(self.args.class_num):
                            hidden = "h_{}".format(k+1)
                            hs.setdefault(hidden, []).append(self.hidden_vecs[hidden].unsqueeze(0))
                    ndcg_result={}
                    for j in range(self.args.class_num):
                        hidden = "h_{}".format(j+1)
                        hv = torch.cat(hs[hidden],0)
                        model_input = (torch.FloatTensor(X_tr).to(self.args.device), hv)
                        recon, _, _ = model(model_input)
                        topk = show_recommended_items(recon.cpu().detach().numpy(), k=50)
                        with open(f'./result/amazon/qual/topk{j}.pkl', 'wb') as f:
                            pickle.dump(topk, f)   
                        recon[X_tr.nonzero()] = -np.inf
                        nd_name = "ndcg_list50_{}".format(j+1)
                        ndcg_result.setdefault(nd_name, []).append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50))
            ndcg_final_result={}
            for j in range(self.args.class_num): 
                nd_name = "ndcg_list50_{}".format(j+1)
                ndcg_final_result[nd_name] = np.concatenate(ndcg_result[nd_name])
            return ndcg_final_result
Esempio n. 37
0
def predictFull(img, model):
    model_out = model(imgTensor(img)[None])
    softmax = torch.nn.Softmax(dim=1)
    soft_out = softmax(model_out)
    return soft_out
Esempio n. 38
0
def run_mdnet(img_list_v,
              img_list_i,
              init_bbox,
              gt=None,
              seq='seq_name ex)Basketball',
              savefig_dir='',
              display=False):

    # Init bbox
    target_bbox = np.array(init_bbox)
    result = np.zeros((len(img_list_v), 4))
    result_bb = np.zeros((len(img_list_v), 4))
    result[0] = np.copy(target_bbox)
    result_bb[0] = np.copy(target_bbox)

    iou_result = np.zeros((len(img_list_v), 1))

    # execution time array
    exec_time_result = np.zeros((len(img_list_v), 1))

    # Init model
    model = MDNet(opts['model_path'])
    if opts['adaptive_align']:
        align_h = model.roi_align_model.aligned_height
        align_w = model.roi_align_model.aligned_width
        spatial_s = model.roi_align_model.spatial_scale
        model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
    if opts['use_gpu']:
        model = model.cuda()

    model.set_learnable_params(opts['ft_layers'])

    # Init image crop model
    img_crop_model = imgCropper(1.)
    if opts['use_gpu']:
        img_crop_model.gpuEnable()

    # Init criterion and optimizer
    criterion = BinaryLoss()
    init_optimizer = set_optimizer(model, opts['lr_init'])
    update_optimizer = set_optimizer(model, opts['lr_update'])

    tic = time.time()
    # Load first image
    cur_image_v = Image.open(img_list_v[0]).convert('RGB')
    cur_image_v = np.asarray(cur_image_v)

    cur_image_i = Image.open(img_list_i[0]).convert('RGB')
    cur_image_i = np.asarray(cur_image_i)

    init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0] +
                                                            init_bbox[2]),
                                      int(init_bbox[1]):int(init_bbox[1] +
                                                            init_bbox[3]), :]
    init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0] +
                                                            init_bbox[2]),
                                      int(init_bbox[1]):int(init_bbox[1] +
                                                            init_bbox[3]), :]

    # Draw pos/neg samples
    ishape = cur_image_v.shape
    pos_examples = gen_samples(
        SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),
        target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
    neg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 1, 2, 1.1),
        target_bbox, opts['n_neg_init'], opts['overlap_neg_init'])
    neg_examples = np.random.permutation(neg_examples)

    cur_bbreg_examples = gen_samples(
        SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5,
                        1.1), target_bbox, opts['n_bbreg'],
        opts['overlap_bbreg'], opts['scale_bbreg'])

    # compute padded sample
    padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                 (opts['padding'] - 1.) / 2.).min()
    padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                 (opts['padding'] + 1.) / 2.).max()
    padded_scene_box = np.reshape(
        np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                    padded_y2 - padded_y1)), (1, 4))

    scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
    if opts['jitter']:
        ## horizontal shift
        jittered_scene_box_horizon = np.copy(padded_scene_box)
        jittered_scene_box_horizon[0, 0] -= 4.
        jitter_scale_horizon = 1.

        ## vertical shift
        jittered_scene_box_vertical = np.copy(padded_scene_box)
        jittered_scene_box_vertical[0, 1] -= 4.
        jitter_scale_vertical = 1.

        jittered_scene_box_reduce1 = np.copy(padded_scene_box)
        jitter_scale_reduce1 = 1.1**(-1)

        ## vertical shift
        jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
        jitter_scale_enlarge1 = 1.1**(1)

        ## scale reduction
        jittered_scene_box_reduce2 = np.copy(padded_scene_box)
        jitter_scale_reduce2 = 1.1**(-2)
        ## scale enlarge
        jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
        jitter_scale_enlarge2 = 1.1**(2)

        scene_boxes = np.concatenate([
            scene_boxes, jittered_scene_box_horizon,
            jittered_scene_box_vertical, jittered_scene_box_reduce1,
            jittered_scene_box_enlarge1, jittered_scene_box_reduce2,
            jittered_scene_box_enlarge2
        ],
                                     axis=0)
        jitter_scale = [
            1., jitter_scale_horizon, jitter_scale_vertical,
            jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2,
            jitter_scale_enlarge2
        ]
    else:
        jitter_scale = [1.]

    model.eval()
    for bidx in range(0, scene_boxes.shape[0]):
        crop_img_size = (scene_boxes[bidx, 2:4] * (
            (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                         ).astype('int64') * jitter_scale[bidx]
        cropped_image_v, cur_image_var_v = img_crop_model.crop_image(
            cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size)
        cropped_image_v = cropped_image_v - 128.

        cropped_image_i, cur_image_var_i = img_crop_model.crop_image(
            cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size)
        cropped_image_i = cropped_image_i - 128.

        feat_map_v, feat_map_i, fused_feats = model(cropped_image_v,
                                                    cropped_image_i,
                                                    out_layer='conv3')

        rel_target_bbox = np.copy(target_bbox)
        rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

        batch_num = np.zeros((pos_examples.shape[0], 1))
        cur_pos_rois = np.copy(pos_examples)
        cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_pos_rois.shape[0],
                                          axis=0)
        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
        cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
        cur_pos_rois = Variable(
            torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()

        # pdb.set_trace()
        cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois)
        cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                           -1).data.clone()

        # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois)
        # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone()

        batch_num = np.zeros((neg_examples.shape[0], 1))
        cur_neg_rois = np.copy(neg_examples)
        cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                     (1, 2)),
                                          cur_neg_rois.shape[0],
                                          axis=0)
        cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field,
                                       (scaled_obj_size, scaled_obj_size),
                                       target_bbox[2:4], opts['padding'])
        cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
        cur_neg_rois = Variable(
            torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()

        cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois)
        cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                           -1).data.clone()

        # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois)
        # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone()

        ## bbreg rois
        batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
        cur_bbreg_rois = np.copy(cur_bbreg_examples)
        cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2],
                                                       (1, 2)),
                                            cur_bbreg_rois.shape[0],
                                            axis=0)
        scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
        cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,
                                         (scaled_obj_size, scaled_obj_size),
                                         target_bbox[2:4], opts['padding'])
        cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1)
        cur_bbreg_rois = Variable(
            torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda()

        cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois)
        cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0),
                                               -1).data.clone()

        # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois)
        # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone()

        feat_dim = cur_pos_feats.size(-1)

        if bidx == 0:
            pos_feats = cur_pos_feats
            neg_feats = cur_neg_feats
            ##bbreg feature
            bbreg_feats = cur_bbreg_feats
            bbreg_examples = cur_bbreg_examples
        else:
            pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
            neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
            ##bbreg feature
            bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
            bbreg_examples = np.concatenate(
                (bbreg_examples, cur_bbreg_examples), axis=0)

    if pos_feats.size(0) > opts['n_pos_init']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :]
    if neg_feats.size(0) > opts['n_neg_init']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :]

    ##bbreg
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
        #print bbreg_examples.shape

    # init_target_feats = pos_feats[:400]

    ## open images and crop patch from obj
    extra_obj_size = np.array((opts['img_size'], opts['img_size']))
    extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6)
    replicateNum = 100
    for iidx in range(replicateNum):
        extra_target_bbox = np.copy(target_bbox)

        extra_scene_box = np.copy(extra_target_bbox)
        extra_scene_box_center = extra_scene_box[
            0:2] + extra_scene_box[2:4] / 2.
        extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6)
        extra_scene_box[
            0:2] = extra_scene_box_center - extra_scene_box_size / 2.
        extra_scene_box[2:4] = extra_scene_box_size

        extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4)
        cur_extra_scale = 1.1**np.clip(np.random.randn(1), -2, 2)

        extra_scene_box[0] += extra_shift_offset[0]
        extra_scene_box[1] += extra_shift_offset[1]
        extra_scene_box[2:4] *= cur_extra_scale[0]

        scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0]

        cur_extra_cropped_image_v, _ = img_crop_model.crop_image(
            cur_image_v, np.reshape(extra_scene_box, (1, 4)),
            extra_crop_img_size)
        cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach()

        cur_extra_cropped_image_i, _ = img_crop_model.crop_image(
            cur_image_i, np.reshape(extra_scene_box, (1, 4)),
            extra_crop_img_size)
        cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach()

        # extra_target_bbox = np.array(list(map(int, extra_target_bbox)))
        cur_extra_pos_examples = gen_samples(
            SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1,
                            1.2), extra_target_bbox,
            opts['n_pos_init'] // replicateNum, opts['overlap_pos_init'])
        cur_extra_neg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2,
                            1.1), extra_target_bbox,
            opts['n_neg_init'] / replicateNum // 4, opts['overlap_neg_init'])

        ##bbreg sample
        cur_extra_bbreg_examples = gen_samples(
            SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),
            extra_target_bbox, opts['n_bbreg'] / replicateNum // 4,
            opts['overlap_bbreg'], opts['scale_bbreg'])

        batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1))
        cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
        cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_pos_rois.shape[0],
                                                axis=0)
        cur_extra_pos_rois = samples2maskroi(
            cur_extra_pos_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois),
                                            axis=1)

        batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
        cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
        cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(
            extra_scene_box[0:2], (1, 2)),
                                                cur_extra_neg_rois.shape[0],
                                                axis=0)
        cur_extra_neg_rois = samples2maskroi(
            cur_extra_neg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois),
                                            axis=1)

        ## bbreg rois
        batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
        cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
        cur_extra_bbreg_rois[:,
                             0:2] -= np.repeat(np.reshape(
                                 extra_scene_box[0:2], (1, 2)),
                                               cur_extra_bbreg_rois.shape[0],
                                               axis=0)
        cur_extra_bbreg_rois = samples2maskroi(
            cur_extra_bbreg_rois, model.receptive_field,
            (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4],
            opts['padding'])
        cur_extra_bbreg_rois = np.concatenate(
            (batch_num, cur_extra_bbreg_rois), axis=1)

        if iidx == 0:
            extra_cropped_image_v = cur_extra_cropped_image_v
            extra_cropped_image_i = cur_extra_cropped_image_i

            extra_pos_rois = np.copy(cur_extra_pos_rois)
            extra_neg_rois = np.copy(cur_extra_neg_rois)
            ##bbreg rois
            extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
            extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
        else:
            extra_cropped_image_v = torch.cat(
                (extra_cropped_image_v, cur_extra_cropped_image_v), dim=0)
            extra_cropped_image_i = torch.cat(
                (extra_cropped_image_i, cur_extra_cropped_image_i), dim=0)

            extra_pos_rois = np.concatenate(
                (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
            extra_neg_rois = np.concatenate(
                (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
            ##bbreg rois
            extra_bbreg_rois = np.concatenate(
                (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0)
            extra_bbreg_examples = np.concatenate(
                (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)),
                axis=0)

    extra_pos_rois = Variable(
        torch.from_numpy(extra_pos_rois.astype('float32'))).cuda()
    extra_neg_rois = Variable(
        torch.from_numpy(extra_neg_rois.astype('float32'))).cuda()
    ##bbreg rois
    extra_bbreg_rois = Variable(
        torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda()

    extra_cropped_image_v -= 128.
    extra_cropped_image_i -= 128.

    # pdb.set_trace()

    for iidxxx in range(replicateNum):
        temp_extra_cropped_image_v = torch.unsqueeze(
            extra_cropped_image_v[iidxxx], dim=0)
        temp_extra_cropped_image_i = torch.unsqueeze(
            extra_cropped_image_i[iidxxx], dim=0)
        temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model(
            temp_extra_cropped_image_v,
            temp_extra_cropped_image_i,
            out_layer='conv3')
        temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0)
        # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0)

        if iidxxx == 0:
            extra_feat_maps = torch.zeros(replicateNum,
                                          temp_extra_feat_maps.shape[0],
                                          temp_extra_feat_maps.shape[1],
                                          temp_extra_feat_maps.shape[2])
            # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2])

        extra_feat_maps[iidxxx] = temp_extra_feat_maps
        # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i

    extra_feat_maps = extra_feat_maps.cuda()

    # Draw pos/neg samples
    ishape = cur_image_v.shape

    # pdb.set_trace()
    extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois)
    extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0),
                                           -1).data.clone()

    extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois)
    extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0),
                                           -1).data.clone()

    ##bbreg feat
    extra_bbreg_feats = model.roi_align_model(extra_feat_maps,
                                              extra_bbreg_rois)
    extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0),
                                               -1).data.clone()

    ## concatenate extra features to original_features
    pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0)
    neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
    ## concatenate extra bbreg feats to original_bbreg_feats
    bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
    bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples),
                                    axis=0)

    torch.cuda.empty_cache()
    model.zero_grad()

    # Initial training
    train(model, criterion, init_optimizer, pos_feats, neg_feats,
          opts['maxiter_init'])

    ##bbreg train
    if bbreg_feats.size(0) > opts['n_bbreg']:
        bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
        np.random.shuffle(bbreg_idx)
        bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
        bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]

    bbreg = BBRegressor((ishape[1], ishape[0]))
    bbreg.train(bbreg_feats, bbreg_examples, target_bbox)

    if pos_feats.size(0) > opts['n_pos_update']:
        pos_idx = np.asarray(range(pos_feats.size(0)))
        np.random.shuffle(pos_idx)
        pos_feats_all = [
            pos_feats.index_select(
                0,
                torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
        ]
    if neg_feats.size(0) > opts['n_neg_update']:
        neg_idx = np.asarray(range(neg_feats.size(0)))
        np.random.shuffle(neg_idx)
        neg_feats_all = [
            neg_feats.index_select(
                0,
                torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())
        ]

    spf_total = time.time() - tic

    # Display
    savefig = savefig_dir != ''
    if display or savefig:
        dpi = 80.0
        figsize = (cur_image_v.shape[1] / dpi, cur_image_v.shape[0] / dpi)

        fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        im = ax.imshow(cur_image_v)

        if gt is not None:
            gt_rect = plt.Rectangle(tuple(gt[0, :2]),
                                    gt[0, 2],
                                    gt[0, 3],
                                    linewidth=3,
                                    edgecolor="#00ff00",
                                    zorder=1,
                                    fill=False)
            ax.add_patch(gt_rect)

        rect = plt.Rectangle(tuple(result_bb[0, :2]),
                             result_bb[0, 2],
                             result_bb[0, 3],
                             linewidth=3,
                             edgecolor="#ff0000",
                             zorder=1,
                             fill=False)
        ax.add_patch(rect)

        if display:
            plt.pause(.01)
            plt.draw()
        if savefig:
            fig.savefig(os.path.join(savefig_dir, '0000.jpg'), dpi=dpi)

    #####################################################################
    ####                        Main loop
    #####################################################################
    failure_count = 0
    trans_f = opts['trans_f']

    for i in range(1, len(img_list_v)):

        tic = time.time()
        # Load image
        cur_image_v = Image.open(img_list_v[i]).convert('RGB')
        cur_image_v = np.asarray(cur_image_v)
        cur_image_i = Image.open(img_list_i[i]).convert('RGB')
        cur_image_i = np.asarray(cur_image_i)

        # Estimate target bbox
        ishape = cur_image_v.shape
        samples = gen_samples(
            SampleGenerator('gaussian', (ishape[1], ishape[0]),
                            trans_f,
                            opts['scale_f'],
                            valid=True), target_bbox, opts['n_samples'])

        #########################################################################
        ####                Target-Aware Attention Prediction
        #########################################################################

        attention_path = "/daTANet_rgbt_234_Attention/" + seq + "/"
        attentionImage_name = str(i + 1) + "_attentionMap.jpg"

        # pdb.set_trace()
        attentionFlag = os.path.exists(attention_path + attentionImage_name)
        # print("==>> attentionFlag ", attentionFlag)

        if failure_count >= 6 and attentionFlag:

            attentionMap = Image.open(attention_path +
                                      attentionImage_name).convert('RGB')
            attentionMap = np.asarray(attentionMap)
            # pdb.set_trace()

            dynamic_atttentonMAP = cv2.resize(
                attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]),
                interpolation=cv2.INTER_LINEAR)
            ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100,
                                                     255, cv2.THRESH_BINARY)
            # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP)

            # pdb.set_trace()

            label_image = measure.label(static_atttentonMAP)
            props = measure.regionprops(label_image)

            atttenton_BBox = []
            attention_centerLoc = []
            similarity_glob_target_max = 0
            global_samples = []

            #### for each candidate search region
            # for iii in range(len(props)):

            if len(props) > 1:
                attNum = 1
            else:
                attNum = len(props)

            for iii in range(attNum):
                center_position = props[iii].centroid
                center_position = [
                    int(center_position[1]),
                    int(center_position[0])
                ]

                centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2
                centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2

                if math.fabs(center_position[0] -
                             centerPos_prev_x) < 30 and math.fabs(
                                 center_position[1] - centerPos_prev_y) < 30:

                    bbox = props[iii].bbox

                    new_bbox2 = np.zeros((4))
                    new_bbox2[0] = center_position[0] - target_bbox[2] / 2
                    new_bbox2[1] = center_position[1] - target_bbox[3] / 2
                    new_bbox2[2] = target_bbox[2]
                    new_bbox2[3] = target_bbox[3]

                    # if new_bbox[2] > 10 and new_bbox[3] > 10:
                    # switch_candidate_samples2 = sample_generator(new_bbox2, 100)
                    switch_samples2 = gen_samples(
                        SampleGenerator('gaussian', (ishape[1], ishape[0]),
                                        trans_f,
                                        opts['scale_f'],
                                        valid=True), new_bbox2, 256)
                    # global_samples.append(switch_samples2)
                    # pdb.set_trace()
                    # samples = np.concatenate((switch_samples2, samples))
                    samples = switch_samples2

            # print("==>> Using Global Proposals and samples: ", samples.shape[0])
            # samples = np.concatenate((switch_samples2, samples))

        padded_x1 = (samples[:, 0] - samples[:, 2] *
                     (opts['padding'] - 1.) / 2.).min()
        padded_y1 = (samples[:, 1] - samples[:, 3] *
                     (opts['padding'] - 1.) / 2.).min()
        padded_x2 = (samples[:, 0] + samples[:, 2] *
                     (opts['padding'] + 1.) / 2.).max()
        padded_y2 = (samples[:, 1] + samples[:, 3] *
                     (opts['padding'] + 1.) / 2.).max()
        padded_scene_box = np.asarray(
            (padded_x1, padded_y1, padded_x2 - padded_x1,
             padded_y2 - padded_y1))

        if padded_scene_box[0] > cur_image_v.shape[1]:
            padded_scene_box[0] = cur_image_v.shape[1] - 1
        if padded_scene_box[1] > cur_image_v.shape[0]:
            padded_scene_box[1] = cur_image_v.shape[0] - 1
        if padded_scene_box[0] + padded_scene_box[2] < 0:
            padded_scene_box[2] = -padded_scene_box[0] + 1
        if padded_scene_box[1] + padded_scene_box[3] < 0:
            padded_scene_box[3] = -padded_scene_box[1] + 1

        crop_img_size = (padded_scene_box[2:4] *
                         ((opts['img_size'], opts['img_size']) /
                          target_bbox[2:4])).astype('int64')
        cropped_image_v, cur_image_var_v = img_crop_model.crop_image(
            cur_image_v, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
        cropped_image_v = cropped_image_v - 128.
        cropped_image_i, cur_image_var_i = img_crop_model.crop_image(
            cur_image_i, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
        cropped_image_i = cropped_image_i - 128.

        model.eval()
        feat_map_v, feat_map_i, feat_map = model(cropped_image_v,
                                                 cropped_image_i,
                                                 out_layer='conv3')

        # relative target bbox with padded_scene_box
        rel_target_bbox = np.copy(target_bbox)
        rel_target_bbox[0:2] -= padded_scene_box[0:2]

        # Extract sample features and get target location
        batch_num = np.zeros((samples.shape[0], 1))
        sample_rois = np.copy(samples)
        sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2],
                                                    (1, 2)),
                                         sample_rois.shape[0],
                                         axis=0)
        sample_rois = samples2maskroi(sample_rois, model.receptive_field,
                                      (opts['img_size'], opts['img_size']),
                                      target_bbox[2:4], opts['padding'])
        sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
        sample_rois = Variable(torch.from_numpy(
            sample_rois.astype('float32'))).cuda()

        sample_feats = model.roi_align_model(feat_map, sample_rois)
        sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()

        sample_scores = model(sample_feats, sample_feats, in_layer='fc4')
        top_scores, top_idx = sample_scores[:, 1].topk(5)
        top_idx = top_idx.data.cpu().numpy()
        target_score = top_scores.data.mean()
        target_bbox = samples[top_idx].mean(axis=0)

        success = target_score > opts['success_thr']

        # # Expand search area at failure
        if success:
            trans_f = opts['trans_f']
        else:
            trans_f = opts['trans_f_expand']

        ## Bbox regression
        if success:
            bbreg_feats = sample_feats[top_idx, :]
            bbreg_samples = samples[top_idx]
            bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples)
            bbreg_bbox = bbreg_samples.mean(axis=0)

            if failure_count >= 3:
                failure_count = failure_count - 3
            else:
                failure_count = 0
        else:
            bbreg_bbox = target_bbox
            failure_count = failure_count + 1

        # Save result
        result[i] = target_bbox
        result_bb[i] = bbreg_bbox
        iou_result[i] = 1.

        # Data collect
        if success:

            # Draw pos/neg samples
            pos_examples = gen_samples(
                SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),
                target_bbox, opts['n_pos_update'], opts['overlap_pos_update'])
            neg_examples = gen_samples(
                SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2),
                target_bbox, opts['n_neg_update'], opts['overlap_neg_update'])

            padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] *
                         (opts['padding'] - 1.) / 2.).min()
            padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] *
                         (opts['padding'] - 1.) / 2.).min()
            padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] *
                         (opts['padding'] + 1.) / 2.).max()
            padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] *
                         (opts['padding'] + 1.) / 2.).max()
            padded_scene_box = np.reshape(
                np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1,
                            padded_y2 - padded_y1)), (1, 4))

            scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
            jitter_scale = [1.]

            for bidx in range(0, scene_boxes.shape[0]):
                crop_img_size = (scene_boxes[bidx, 2:4] * (
                    (opts['img_size'], opts['img_size']) / target_bbox[2:4])
                                 ).astype('int64') * jitter_scale[bidx]
                cropped_image_v, cur_image_var_v = img_crop_model.crop_image(
                    cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)),
                    crop_img_size)
                cropped_image_v = cropped_image_v - 128.
                cropped_image_i, cur_image_var_i = img_crop_model.crop_image(
                    cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)),
                    crop_img_size)
                cropped_image_i = cropped_image_i - 128.

                feat_map_v, feat_map_i, feat_map = model(cropped_image_v,
                                                         cropped_image_i,
                                                         out_layer='conv3')

                rel_target_bbox = np.copy(target_bbox)
                rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]

                batch_num = np.zeros((pos_examples.shape[0], 1))
                cur_pos_rois = np.copy(pos_examples)
                cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(
                    scene_boxes[bidx, 0:2], (1, 2)),
                                                  cur_pos_rois.shape[0],
                                                  axis=0)
                scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
                cur_pos_rois = samples2maskroi(
                    cur_pos_rois, model.receptive_field,
                    (scaled_obj_size, scaled_obj_size), target_bbox[2:4],
                    opts['padding'])
                cur_pos_rois = np.concatenate((batch_num, cur_pos_rois),
                                              axis=1)
                cur_pos_rois = Variable(
                    torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()

                cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
                cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0),
                                                   -1).data.clone()

                batch_num = np.zeros((neg_examples.shape[0], 1))
                cur_neg_rois = np.copy(neg_examples)
                cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(
                    scene_boxes[bidx, 0:2], (1, 2)),
                                                  cur_neg_rois.shape[0],
                                                  axis=0)
                cur_neg_rois = samples2maskroi(
                    cur_neg_rois, model.receptive_field,
                    (scaled_obj_size, scaled_obj_size), target_bbox[2:4],
                    opts['padding'])
                cur_neg_rois = np.concatenate((batch_num, cur_neg_rois),
                                              axis=1)
                cur_neg_rois = Variable(
                    torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()

                cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
                cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0),
                                                   -1).data.clone()

                feat_dim = cur_pos_feats.size(-1)

                if bidx == 0:
                    pos_feats = cur_pos_feats  ##index select
                    neg_feats = cur_neg_feats
                else:
                    pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
                    neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)

            if pos_feats.size(0) > opts['n_pos_update']:
                pos_idx = np.asarray(range(pos_feats.size(0)))
                np.random.shuffle(pos_idx)
                pos_feats = pos_feats.index_select(
                    0,
                    torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
            if neg_feats.size(0) > opts['n_neg_update']:
                neg_idx = np.asarray(range(neg_feats.size(0)))
                np.random.shuffle(neg_idx)
                neg_feats = neg_feats.index_select(
                    0,
                    torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())

            pos_feats_all.append(pos_feats)
            neg_feats_all.append(neg_feats)

            if len(pos_feats_all) > opts['n_frames_long']:
                del pos_feats_all[0]
            if len(neg_feats_all) > opts['n_frames_short']:
                del neg_feats_all[0]

        # Short term update
        if not success:
            nframes = min(opts['n_frames_short'], len(pos_feats_all))
            pos_data = torch.stack(pos_feats_all[-nframes:],
                                   0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        # Long term update
        elif i % opts['long_interval'] == 0:
            pos_data = torch.stack(pos_feats_all, 0).view(-1, feat_dim)
            neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim)
            train(model, criterion, update_optimizer, pos_data, neg_data,
                  opts['maxiter_update'])

        spf = time.time() - tic
        spf_total += spf

        # Display
        if display or savefig:
            im.set_data(cur_image_v)

            if gt is not None:
                gt_rect.set_xy(gt[i, :2])
                gt_rect.set_width(gt[i, 2])
                gt_rect.set_height(gt[i, 3])

            rect.set_xy(result_bb[i, :2])
            rect.set_width(result_bb[i, 2])
            rect.set_height(result_bb[i, 3])

            if display:
                plt.pause(.01)
                plt.draw()
            if savefig:
                fig.savefig(os.path.join(savefig_dir, '%04d.jpg' % (i)),
                            dpi=dpi)

        if opts['visual_log']:
            if gt is None:
                print("Frame %d/%d, Score %.3f, Time %.3f" % \
                    (i, len(img_list), target_score, spf))
            else:
                print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
                    (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf))

        print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
            (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))

        iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0]

    fps = len(img_list_v) / spf_total

    # pdb.set_trace()
    # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total))

    return iou_result, result_bb, fps, result
Esempio n. 39
0
def test(model, graph, idx, labels):
    model.eval()
    pred = model(graph, 'paper')[idx].max(1)[1].cpu()
    acc = (pred == labels[idx]).float().mean()
    return acc
Esempio n. 40
0
def train_model(dataloaders,
                model,
                criterion,
                optimizer,
                scheduler,
                num_epochs,
                save_epoch,
                save_name='model',
                save_path='./pkl'):
    isReduceLROnPlateau = False
    if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau):
        isReduceLROnPlateau = True
    since = time.time()

    best_model_wts = None
    best_loss = float("inf")

    trainLoss = []
    valLoss = []
    lrs = []
    epochs = []
    plt.ion()
    for epoch in range(1, num_epochs + 1):
        epochs += [epoch]
        lrs += [optimizer.param_groups[0]['lr']]

        # train:
        model.train()
        running_loss = 0.0
        data_size = 0
        for inputs, labels in dataloaders['train']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            torch.set_grad_enabled(True)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # statistics
            data_size += inputs.size(0)
            running_loss += loss.item() * inputs.size(
                0)  # 本次Iterate*样本数=本次的总样本loss(防止最后一个batch大小不同,或train与val的不同)

        epoch_loss = running_loss / data_size  # 一个epoch的平均loss
        trainLoss += [epoch_loss]

        # validation:
        model.eval()
        running_loss = 0.0
        data_size = 0
        for inputs, labels in dataloaders['val']:
            inputs = inputs.to(device)
            labels = labels.to(device)
            torch.set_grad_enabled(False)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # statistics
            data_size += inputs.size(0)
            running_loss += loss.item() * inputs.size(
                0)  # 本次Iterate*样本数=本次的总样本loss(防止最后一个batch大小不同,或train与val的不同)

        epoch_loss = running_loss / data_size  # 一个epoch的平均loss
        valLoss += [epoch_loss]

        # auto update lr
        if scheduler:
            if isReduceLROnPlateau:
                scheduler.step(epoch_loss)
            else:
                scheduler.step()

        # show each epoch
        if args.show_each_epoch:
            print('Epoch {}/{}\n{}'.format(epoch, num_epochs, '-' * 10))
            print(
                'train_loss: {:.4f}\n  val_loss: {:.4f}\nlearning_rate: {:.4f}\n'
                .format(trainLoss[-1], valLoss[-1],
                        optimizer.param_groups[0]['lr']))  # 一个epoch更新

        # deep copy the model(lav loss)
        if valLoss[-1] < best_loss:
            best_loss = valLoss[-1]
            best_model_wts = copy.deepcopy(model.state_dict())
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            torch.save(
                model, '{}/{}_{}-trainLoss_{:.4f}-valLoss_{:.4f}.pkl'.format(
                    save_path, save_name, epoch, trainLoss[-1], valLoss[-1]))

    # printHistory(epochs,trainLoss,valLoss,lrs)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    if not os.path.exists('{}/best/'.format(save_path)):
        os.makedirs('{}/best/'.format(save_path))
    torch.save(model, '{}/best/{}.pkl'.format(save_path, save_name))
    return model
Esempio n. 41
0
def train(total_iters=0):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = args.data_size
    hidden = init_hidden(args.batch_size)
    curr_loss = 0.
    for i, batch in enumerate(train_data):

        data, targets, reset_mask = get_batch(batch)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        output, hidden = model(data, reset_mask=reset_mask)
        loss = criterion(output.view(-1, ntokens).contiguous().float(), targets.view(-1).contiguous())

        optim.zero_grad()

        if args.fp16:
            optim.backward(loss)
        else:
            loss.backward()
        total_loss += loss.data.float()

        # clipping gradients helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip > 0:
            if not args.fp16:
                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
            else:
                optim.clip_fp32_grads(clip=args.clip)
        optim.step()

        # step learning rate and log training progress
        lr = LR.get_lr()[0]
        if not args.fp16:
            LR.step()
        else:
            # if fp16 optimizer skips gradient step due to explosion do not step lr
            if not optim.overflow:
                LR.step()

        if i % args.log_interval == 0 and i > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:.2E} | ms/batch {:.3E} | \
                  loss {:.2E} | ppl {:8.2f} | loss scale {:8.2f}'.format(
                      epoch, i, len(train_data), lr,
                      elapsed * 1000 / args.log_interval, cur_loss, math.exp(min(cur_loss, 20)),
                      args.loss_scale if not args.fp16 else optim.loss_scale 
                  )
            )
            total_loss = 0
            start_time = time.time()
            sys.stdout.flush()

        # save current model progress. If distributed only save from worker 0
        if args.save_iters and total_iters % (args.save_iters) == 0 and total_iters > 0 and args.rank < 1:
            if args.rank < 1:
                with open(os.path.join(os.path.splitext(args.save)[0], 'e%s.pt'%(str(total_iters),)), 'wb') as f:
                    torch.save(model.state_dict(), f)
            torch.cuda.synchronize()
        total_iters += 1

    return cur_loss
Esempio n. 42
0
    def test_testset(self):
        if self.args.condition:
            for k in range(self.args.class_num):
                hidden = "h_{}".format(k+1)
                self.hidden_vecs[hidden]= torch.load(f"{self.args.hiddenvec_dir}/{hidden}.pt")
        if self.args.baseline == 'DAE':
            p_dims = self.args.dims
            # p_dims = [self.args.latent_size, self.args.hidden_size_vae, self.args.input_size_vae]
            model = MultiDAE(p_dims)
        elif self.args.baseline == 'HPrior':
            model = HPrior_VAE(self.args.dims, self.args.hidden_size_rnn, self.args.dataset,\
                                    self.args.input_size_rnn, self.args.activation)
        elif self.args.baseline == 'MF':
            model = MF(6040, 3355, 100)
        else:
            model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\
                                self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\
                                self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn, self.args.condition_size)
        model = model.to(self.args.device)

        model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt'))
        model.eval()
        if self.args.data_dir == './data/ml-1m':
            dataloader = ItemRatingLoader(self.args.data_dir)
            if self.args.baseline == 'MF':
                tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed2_test_te.csv'), 1)
                N = 177 * self.args.batch_size
            else:#fixed_valid / fixed_test
                # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_test_tr.csv'), \
                # os.path.join(self.args.data_dir, 'fixed2_test_te.csv'))
                tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed_valid_tr.csv'), \
                os.path.join(self.args.data_dir, 'fixed_valid_te.csv'))
        elif self.args.data_dir == './data/amazon' or self.args.data_dir == './data/amazon_min20_woman' or self.args.data_dir =='./data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman':
            dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0])
            # tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'test_tr.csv'),\
                                                                    #  os.path.join(self.args.data_dir, 'test_te.csv'))
            tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'valid_tr.csv'),\
                                                                     os.path.join(self.args.data_dir, 'valid_te.csv'))

        if not self.args.baseline == 'MF':
            N = tr_data_rating.shape[0]
            idxlist = np.array(range(N))
            np.random.seed(98765)
            idx_pe = np.random.permutation(len(idxlist))
            idxlist = idxlist[idx_pe]
        if self.args.condition or self.args.baseline == 'HPrior':  
            valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe)
            # valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'test', self.args.batch_size, idx_pe)
            if self.args.test_hidden == 'fixed':
                tr_data_hidden = dataloader.fixed_hidden(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe)
            
        with torch.no_grad():
            r20_list, r50_list,r5_list,r10_list, r100_list, ndcg_list50, ndcg_list5,ndcg_list10, ndcg_list100, ndcg_list150, auc_list = [], [], [], [], [], [],[], [], [], [], []
            for i, st_idx in enumerate(range(0, N, self.args.batch_size)):
                if self.args.condition or self.args.baseline == 'HPrior':
                    order, item_feature, label = next(valid_data_item)
                    end_idx = min(st_idx + self.args.batch_size, N)
                    x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]]
                    X_tr = x_tr_unorder[order]
                    x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]]
                    X_te = x_te_unorder[order]
                else:
                    if not self.args.baseline == 'MF':
                        end_idx = min(st_idx + self.args.batch_size, N)
                        X_tr = tr_data_rating[idxlist[st_idx:end_idx]]
                        X_te = te_data_rating[idxlist[st_idx:end_idx]]

                    else:
                        dd = next(tr_data_rating)
                        X_te = dd[0]
                        X_pre = dd[1]
                if not self.args.baseline == 'MF':
                    if sparse.isspmatrix(X_tr):
                        X_tr = X_tr.toarray()
                    X_tr = X_tr.astype('float32')

                if self.args.condition:
                    if self.args.test_hidden == 'trained':
                        print('use trained hidden vector')
                        h = []
                        for b_c in label:
                            for j in range(self.args.class_num):
                                hidden = "h_{}".format(j+1)
                                if b_c == j:
                                    h.append(self.hidden_vecs[hidden].unsqueeze(0))
                        hidden = torch.cat(h, 0)
                        # _, hidden = model.RNNEncoder(item_feature.to(self.args.device))
                    elif self.args.test_hidden == 'onehot':
                        hidden = self.tooh(label, self.args.class_num).to(self.args.device)
                    elif self.args.test_hidden == 'fixed':
                        hidden = next(tr_data_hidden)
                    else:
                        _, hidden = model.RNNEncoder(item_feature.to(self.args.device))
                    if len(hidden.shape) == 1:
                        hidden = hidden.unsqueeze(0)
                    model_input = (torch.FloatTensor(X_tr).to(self.args.device), F.sigmoid(hidden))
                    recon, _, _ = model(model_input)
                elif self.args.baseline == 'HPrior':
                    recon, _, _ = model(torch.FloatTensor(X_tr).to(self.args.device), item_feature.to(self.args.device))
                else:
                    if not self.args.baseline:
                        recon, _, _ = model(torch.FloatTensor(X_tr).to(self.args.device))
                    elif self.args.baseline == 'MF':
                        pre = [list(t) for t in zip(*X_pre)]
                        user = torch.LongTensor(list(set(pre[0]))).to(self.args.device)
                        item = torch.LongTensor(pre[1]).to(self.args.device)
                        recon = model(user, item, None, None)
                        recon = recon.view(1,-1)
                    else:
                        recon = model(torch.FloatTensor(X_tr).to(self.args.device))
                # if not self.args.baseline: 
                    # recon_loss, kld = loss_function(torch.FloatTensor(X_tr).to(self.args.device), recon, mu, logvar, self.args.dist)
                if not self.args.baseline == 'MF':
                    recon[X_tr.nonzero()] = -np.inf
                ndcg_list50.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50))
                ndcg_list100.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100))
                ndcg_list10.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10))
                ndcg_list5.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=5))
                ndcg_list150.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=150))
                r20_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=20))
                r50_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50))
                r10_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10))
                r5_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=5))
                r100_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100))
                auc_list.append(AUC_score(recon.cpu().detach().numpy(), X_te))

            ndcg_list50 = np.concatenate(ndcg_list50)
            ndcg_list100 = np.concatenate(ndcg_list100)
            ndcg_list150 = np.concatenate(ndcg_list150)
            ndcg_list10 = np.concatenate(ndcg_list10)
            ndcg_list5 = np.concatenate(ndcg_list5)
            r20_list = np.concatenate(r20_list)
            r10_list = np.concatenate(r10_list)
            r50_list = np.concatenate(r50_list)       
            r5_list = np.concatenate(r5_list)       
            r100_list = np.concatenate(r100_list)       
            auc_list = np.asarray(auc_list)    
            # if not self.args.baseline:
                # print(f"test loss : {test_loss / (N/self.args.batch_size):.3}")
            print("Test NDCG@5=%.5f (%.5f)" % (ndcg_list5.mean(), np.std(ndcg_list5) / np.sqrt(len(ndcg_list5))))
            print("Test NDCG@10=%.5f (%.5f)" % (ndcg_list10.mean(), np.std(ndcg_list10) / np.sqrt(len(ndcg_list10))))
            print("Test NDCG@50=%.5f (%.5f)" % (ndcg_list50.mean(), np.std(ndcg_list50) / np.sqrt(len(ndcg_list50))))
            print("Test NDCG@100=%.5f (%.5f)" % (ndcg_list100.mean(), np.std(ndcg_list100) / np.sqrt(len(ndcg_list100))))
            print("Test NDCG@150=%.5f (%.5f)" % (ndcg_list150.mean(), np.std(ndcg_list150) / np.sqrt(len(ndcg_list150))))
            print("Test Recall@5=%.5f (%.5f)" % (r5_list.mean(), np.std(r5_list) / np.sqrt(len(r5_list))))
            print("Test Recall@10=%.5f (%.5f)" % (r10_list.mean(), np.std(r10_list) / np.sqrt(len(r10_list))))
            print("Test Recall@20=%.5f (%.5f)" % (r20_list.mean(), np.std(r20_list) / np.sqrt(len(r20_list))))
            print("Test Recall@50=%.5f (%.5f)" % (r50_list.mean(), np.std(r50_list) / np.sqrt(len(r50_list))))
            print("Test Recall@100=%.5f (%.5f)" % (r100_list.mean(), np.std(r100_list) / np.sqrt(len(r100_list))))
            print("Test AUC=%.5f (%.5f)" % (auc_list.mean(), np.std(auc_list) / np.sqrt(len(auc_list))))
Esempio n. 43
0
    def train(self):
        # create data loader
        train_dataset = eval(self.dataset_conf.loader_name)(self.config,
                                                            split='train')
        dev_dataset = eval(self.dataset_conf.loader_name)(self.config,
                                                          split='dev')
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self.train_conf.batch_size,
            shuffle=self.train_conf.shuffle,
            num_workers=self.train_conf.num_workers,
            collate_fn=train_dataset.collate_fn,
            drop_last=False)
        dev_loader = torch.utils.data.DataLoader(
            dev_dataset,
            batch_size=self.train_conf.batch_size,
            shuffle=False,
            num_workers=self.train_conf.num_workers,
            collate_fn=dev_dataset.collate_fn,
            drop_last=False)

        # create models
        model = eval(self.model_conf.name)(self.config)

        if self.use_gpu:
            model = nn.DataParallel(model, device_ids=self.gpus).cuda()

        # create optimizer
        params = filter(lambda p: p.requires_grad, model.parameters())
        if self.train_conf.optimizer == 'SGD':
            optimizer = optim.SGD(params,
                                  lr=self.train_conf.lr,
                                  momentum=self.train_conf.momentum,
                                  weight_decay=self.train_conf.wd)
        elif self.train_conf.optimizer == 'Adam':
            optimizer = optim.Adam(params,
                                   lr=self.train_conf.lr,
                                   weight_decay=self.train_conf.wd)
        else:
            raise ValueError("Non-supported optimizer!")

        early_stop = EarlyStopper([0.0], win_size=10, is_decrease=False)

        lr_scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=self.train_conf.lr_decay_steps,
            gamma=self.train_conf.lr_decay)

        # reset gradient
        optimizer.zero_grad()

        # resume training
        if self.train_conf.is_resume:
            load_model(model,
                       self.train_conf.resume_model,
                       optimizer=optimizer)

        # Training Loop
        iter_count = 0
        best_val_loss = np.inf
        results = defaultdict(list)
        for epoch in range(self.train_conf.max_epoch):
            # validation
            if (epoch + 1) % self.train_conf.valid_epoch == 0 or epoch == 0:
                model.eval()
                val_loss = []

                for data in tqdm(dev_loader):
                    if self.use_gpu:
                        data['node_feat'], data['node_mask'], data[
                            'label'] = data_to_gpu(data['node_feat'],
                                                   data['node_mask'],
                                                   data['label'])

                        if self.model_conf.name == 'LanczosNetGeneral':
                            data['L'], data['D'], data['V'] = data_to_gpu(
                                data['L'], data['D'], data['V'])
                        elif self.model_conf.name == 'GraphSAGE':
                            data['nn_idx'], data[
                                'nonempty_mask'] = data_to_gpu(
                                    data['nn_idx'], data['nonempty_mask'])
                        elif self.model_conf.name == 'GPNN':
                            data['L'], data['L_cluster'], data[
                                'L_cut'] = data_to_gpu(data['L'],
                                                       data['L_cluster'],
                                                       data['L_cut'])
                        else:
                            data['L'] = data_to_gpu(data['L'])[0]

                    with torch.no_grad():
                        if self.model_conf.name == 'AdaLanczosNet':
                            pred, _ = model(data['node_feat'],
                                            data['L'],
                                            label=data['label'],
                                            mask=data['node_mask'])
                        elif self.model_conf.name == 'LanczosNetGeneral':
                            pred, _ = model(data['node_feat'],
                                            data['L'],
                                            data['D'],
                                            data['V'],
                                            label=data['label'],
                                            mask=data['node_mask'])
                        elif self.model_conf.name == 'GraphSAGE':
                            pred, _ = model(data['node_feat'],
                                            data['nn_idx'],
                                            data['nonempty_mask'],
                                            label=data['label'],
                                            mask=data['node_mask'])
                        elif self.model_conf.name == 'GPNN':
                            pred, _ = model(data['node_feat'],
                                            data['L'],
                                            data['L_cluster'],
                                            data['L_cut'],
                                            label=data['label'],
                                            mask=data['node_mask'])
                        else:
                            pred, _ = model(data['node_feat'],
                                            data['L'],
                                            label=data['label'],
                                            mask=data['node_mask'])

                    curr_loss = (pred - data['label']).pow(2).cpu().numpy()
                    val_loss += [curr_loss]

                val_loss = float(np.mean(np.concatenate(val_loss)))
                logger.info("Avg. Validation MSE = {}".format(val_loss))
                self.writer.add_scalar('val_loss', val_loss, iter_count)
                results['val_loss'] += [val_loss]

                # save best model
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    snapshot(model.module if self.use_gpu else model,
                             optimizer,
                             self.config,
                             epoch + 1,
                             tag='best')

                logger.info(
                    "Current Best Validation MSE = {}".format(best_val_loss))

                # check early stop
                if early_stop.tick([val_loss]):
                    snapshot(model.module if self.use_gpu else model,
                             optimizer,
                             self.config,
                             epoch + 1,
                             tag='last')
                    self.writer.close()
                    break

            # training
            model.train()
            lr_scheduler.step()
            for data in train_loader:
                optimizer.zero_grad()

                if self.use_gpu:
                    data['node_feat'], data['node_mask'], data[
                        'label'] = data_to_gpu(data['node_feat'],
                                               data['node_mask'],
                                               data['label'])

                    if self.model_conf.name == 'LanczosNetGeneral':
                        data['L'], data['D'], data['V'] = data_to_gpu(
                            data['L'], data['D'], data['V'])
                    elif self.model_conf.name == 'GraphSAGE':
                        data['nn_idx'], data['nonempty_mask'] = data_to_gpu(
                            data['nn_idx'], data['nonempty_mask'])
                    elif self.model_conf.name == 'GPNN':
                        data['L'], data['L_cluster'], data[
                            'L_cut'] = data_to_gpu(data['L'],
                                                   data['L_cluster'],
                                                   data['L_cut'])
                    else:
                        data['L'] = data_to_gpu(data['L'])[0]

                if self.model_conf.name == 'AdaLanczosNet':
                    _, train_loss = model(data['node_feat'],
                                          data['L'],
                                          label=data['label'],
                                          mask=data['node_mask'])
                elif self.model_conf.name == 'LanczosNetGeneral':
                    _, train_loss = model(data['node_feat'],
                                          data['L'],
                                          data['D'],
                                          data['V'],
                                          label=data['label'],
                                          mask=data['node_mask'])
                elif self.model_conf.name == 'GraphSAGE':
                    _, train_loss = model(data['node_feat'],
                                          data['nn_idx'],
                                          data['nonempty_mask'],
                                          label=data['label'],
                                          mask=data['node_mask'])
                elif self.model_conf.name == 'GPNN':
                    _, train_loss = model(data['node_feat'],
                                          data['L'],
                                          data['L_cluster'],
                                          data['L_cut'],
                                          label=data['label'],
                                          mask=data['node_mask'])
                else:
                    _, train_loss = model(data['node_feat'],
                                          data['L'],
                                          label=data['label'],
                                          mask=data['node_mask'])

                # assign gradient
                train_loss.backward()
                optimizer.step()
                train_loss = float(train_loss.data.cpu().numpy())
                self.writer.add_scalar('train_loss', train_loss, iter_count)
                results['train_loss'] += [train_loss]
                results['train_step'] += [iter_count]

                # display loss
                if (iter_count + 1) % self.train_conf.display_iter == 0:
                    logger.info(
                        "Loss @ epoch {:04d} iteration {:08d} = {}".format(
                            epoch + 1, iter_count + 1, train_loss))

                iter_count += 1

            # snapshot model
            if (epoch + 1) % self.train_conf.snapshot_epoch == 0:
                logger.info("Saving Snapshot @ epoch {:04d}".format(epoch + 1))
                snapshot(model.module if self.use_gpu else model, optimizer,
                         self.config, epoch + 1)

        results['best_val_loss'] += [best_val_loss]
        pickle.dump(
            results,
            open(os.path.join(self.config.save_dir, 'train_stats.p'), 'wb'))
        self.writer.close()
        logger.info("Best Validation MSE = {}".format(best_val_loss))

        return best_val_loss
Esempio n. 44
0
    def test(self, model, anneal):
        model.eval()
        if self.args.data_dir == './data/ml-1m':
            dataloader = ItemRatingLoader(self.args.data_dir)
            if self.args.baseline == 'MF':
                tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed2_valid_te.csv'), 1)
                N = 177 * self.args.batch_size
            else:
                tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'fixed2_valid_tr.csv'), os.path.join(self.args.data_dir, 'fixed2_valid_te.csv'))
        elif self.args.data_dir == './data/amazon' or self.args.data_dir =='./data/amazon_min20_woman' \
                            or self.args.data_dir =='./data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman':
            dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0])
            if self.args.baseline == 'MF':
                tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'valid_te.csv'), 1)
                N = 177 * self.args.batch_size
            else:
                tr_data_rating, te_data_rating = dataloader.load_tr_te_data(os.path.join(self.args.data_dir, 'valid_tr.csv'), os.path.join(self.args.data_dir, 'valid_te.csv'))
                # print(tr_data_rating.shape[0])
        # if self.args.baseline == 'MF':
            # tr_data_rating = dataloader.load_tr_te_data_mf(os.path.join(self.args.data_dir, 'fixed_valid_te.csv'), 1)
            # N = 177 * self.args.batch_size
        if not self.args.baseline == 'MF':
            N = tr_data_rating.shape[0]
            idxlist = np.array(range(N))
            np.random.seed(98765)
            idx_pe = np.random.permutation(len(idxlist))
            idxlist = idxlist[idx_pe]
        if self.args.condition or self.args.baseline == 'HPrior':  
            valid_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'valid', self.args.batch_size, idx_pe)
        
        test_loss = 0 
        with torch.no_grad():
            r20_list, r50_list,r10_list, r100_list, ndcg_list50, ndcg_list10, ndcg_list100, ndcg_list150, auc_list \
                                                                                    = [], [], [], [], [],[], [], [], []
            for i, st_idx in enumerate(range(0, N, self.args.batch_size)):
                if self.args.condition or self.args.baseline == 'HPrior':
                    order, item_feature, label = next(valid_data_item)
                    end_idx = min(st_idx + self.args.batch_size, N)
                    x_tr_unorder = tr_data_rating[idxlist[st_idx:end_idx]]
                    X_tr = x_tr_unorder[order]
                    x_te_unorder = te_data_rating[idxlist[st_idx:end_idx]]
                    X_te = x_te_unorder[order]
                else:
                    if not self.args.baseline == 'MF':
                        end_idx = min(st_idx + self.args.batch_size, N)
                        X_tr = tr_data_rating[idxlist[st_idx:end_idx]]
                        X_te = te_data_rating[idxlist[st_idx:end_idx]]
                    
                    else:
                        dd = next(tr_data_rating)
                        X_te = dd[0]
                        X_pre = dd[1]

                if not self.args.baseline == 'MF':
                    if sparse.isspmatrix(X_tr):
                        X_tr = X_tr.toarray()
                    X_tr = X_tr.astype('float32') 
                if self.args.condition:
                    if self.args.test_hidden == 'trained':
                        h = []
                        for b_c in label:
                            for j in range(self.args.class_num):
                                hidden = "h_{}".format(j+1)
                                if b_c == j:
                                    h.append(self.hidden_vecs[hidden].unsqueeze(0))
                        hidden = torch.cat(h, 0)
                    elif self.args.test_hidden == 'onehot':
                        hidden = self.tooh(label, self.args.class_num).to(self.args.device)
                    else:
                        _, hidden = model.RNNEncoder(item_feature.to(self.args.device))
                    model_input = (torch.FloatTensor(X_tr).to(self.args.device), F.sigmoid(hidden))
                    recon, mu, logvar = model(model_input)
                elif self.args.baseline == 'HPrior':
                    recon, mu, logvar = model(torch.FloatTensor(X_tr).to(self.args.device), item_feature.to(self.args.device))
                else:
                    if not self.args.baseline:
                        recon, mu, logvar = model(torch.FloatTensor(X_tr).to(self.args.device))
                    elif self.args.baseline == 'MF':
                        pre = [list(t) for t in zip(*X_pre)]
                        user = torch.LongTensor(list(set(pre[0]))).to(self.args.device)
                        item = torch.LongTensor(pre[1]).to(self.args.device)
                        recon = model(user, item, None, None)
                        recon = recon.view(1,-1)
                    else:
                        recon = model(torch.FloatTensor(X_tr).to(self.args.device))
                if not self.args.baseline or self.args.baseline == 'HPrior': 
                    recon_loss, kld = loss_function(torch.FloatTensor(X_tr).to(self.args.device), recon, mu, logvar, self.args.dist, \
                                                    self.args.negsample, self.args.device, self.args.neg_num, self.args.bpr_weight)
                    loss = recon_loss + anneal * kld
                    test_loss += loss

                if not self.args.baseline == 'MF':
                    recon[X_tr.nonzero()] = -np.inf
                # print(X_te.shape)
                ndcg_list50.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50))
                ndcg_list100.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100))
                ndcg_list10.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10))
                ndcg_list150.append(NDCG_binary_at_k_batch(recon.cpu().detach().numpy(), X_te, k=150))
                r20_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=20))
                r50_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=50))
                r10_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=10))
                r100_list.append(Recall_at_k_batch(recon.cpu().detach().numpy(), X_te, k=100))
                auc_list.append(AUC_score(recon.cpu().detach().numpy(), X_te))
                # print(ndcg_list100)
                # print(AUC_score(recon.cpu().detach().numpy(), X_te))
                

            ndcg_list50 = np.concatenate(ndcg_list50)
            ndcg_list100 = np.concatenate(ndcg_list100)
            ndcg_list150 = np.concatenate(ndcg_list150)
            ndcg_list10 = np.concatenate(ndcg_list10)
            r20_list = np.concatenate(r20_list)
            r10_list = np.concatenate(r10_list)
            r50_list = np.concatenate(r50_list)       
            r100_list = np.concatenate(r100_list) 
            auc_list = np.asarray(auc_list)      

            if not self.args.baseline:
                print(f"test loss : {test_loss / (N/self.args.batch_size):.3}")
            print("Test NDCG@10=%.5f (%.5f)" % (ndcg_list10.mean(), np.std(ndcg_list10) / np.sqrt(len(ndcg_list10))))
            print("Test NDCG@50=%.5f (%.5f)" % (ndcg_list50.mean(), np.std(ndcg_list50) / np.sqrt(len(ndcg_list50))))
            print("Test NDCG@100=%.5f (%.5f)" % (ndcg_list100.mean(), np.std(ndcg_list100) / np.sqrt(len(ndcg_list100))))
            print("Test NDCG@150=%.5f (%.5f)" % (ndcg_list150.mean(), np.std(ndcg_list150) / np.sqrt(len(ndcg_list150))))
            print("Test Recall@10=%.5f (%.5f)" % (r10_list.mean(), np.std(r10_list) / np.sqrt(len(r10_list))))
            print("Test Recall@20=%.5f (%.5f)" % (r20_list.mean(), np.std(r20_list) / np.sqrt(len(r20_list))))
            print("Test Recall@50=%.5f (%.5f)" % (r50_list.mean(), np.std(r50_list) / np.sqrt(len(r50_list))))
            print("Test Recall@100=%.5f (%.5f)" % (r100_list.mean(), np.std(r100_list) / np.sqrt(len(r100_list))))
            print("Test AUC=%.5f (%.5f)" % (auc_list.mean(), np.std(auc_list) / np.sqrt(len(auc_list))))
        return np.mean(r10_list), np.mean(r20_list), np.mean(r50_list), np.mean(r100_list), \
                    np.mean(ndcg_list10), np.mean(ndcg_list50),  np.mean(ndcg_list100), np.mean(ndcg_list150), np.mean(auc_list)
Esempio n. 45
0
    def train(self):
        ### DAE
        if self.args.baseline == 'DAE':
            p_dims = self.args.dims
            # p_dims = [self.args.latent_size, self.args.hidden_size_vae, self.args.input_size_vae]
            model = MultiDAE(p_dims)
            optimizer = optim.Adam(model.parameters(), lr = self.args.lr_vae, weight_decay=0.0)
        elif self.args.baseline == 'MF':
            model = MF(6040, 3355, 100)
            optimizer = optim.SparseAdam(model.parameters(), lr = self.args.lr_mf)
            # optimizer = optim.SGD(model.parameters(), lr=1e-6, weight_decay=1e-5)
        elif self.args.baseline == 'HPrior':
            model = HPrior_VAE(self.args.dims, self.args.hidden_size_rnn, self.args.dataset,\
                                    self.args.input_size_rnn, self.args.activation)
            optimizer = optim.Adam(model.parameters(), lr = self.args.lr_vae, weight_decay=0.0)
        else:
            model = VAE_RNN_rec(self.args.dims, self.args.input_size_rnn, self.args.embedding_size,\
                                self.args.num_layer, self.args.dropout_rate, self.args.bidirectional, self.args.class_num,\
                                self.args.hidden_size_rnn, self.args.condition, self.args.dataset, self.args.activation, self.args.freeze, self.args.attn, self.args.condition_size)
            optimizer = {
            'encoder' : optim.Adam(model.encoder.parameters(), lr=self.args.lr_vae, weight_decay=0.0),
            'decoder' : optim.Adam(model.decoder.parameters(), lr=self.args.lr_vae, weight_decay=0.0)
            }
        
        model = model.to(self.args.device)
        if self.args.data_dir == './data/ml-1m':
            dataloader = ItemRatingLoader(self.args.data_dir)
        elif self.args.data_dir == './data/amazon' or self.args.data_dir == './data/amazon_min20_woman'\
                or self.args.data_dir == './data/amazon_min20_woman_fix' or self.args.dataset=='amazon_min10_woman':
            dataloader = AmazonRatingLoader(self.args.data_dir, self.args.dims[0])

        if self.args.condition:
            optimizer['RNNEncoder'] = optim.Adam(model.RNNEncoder.parameters(), lr=self.args.lr_rnn, weight_decay=0.0)
            # weight = torch.FloatTensor([0.18, 0.28, 0.54]).to(args.device)
            # CEloss = nn.CrossEntropyLoss(weight = weight)
            CEloss = nn.CrossEntropyLoss()

        if self.args.load_model:
            model.load_state_dict(torch.load(self.args.log_dir + '/' + self.args.load_model + '/' + 'model.pt'))
            self.args.timestamp = self.args.load_model[:10]
        if self.args.condition and self.args.load_pretrained:
            model.RNNEncoder.load_state_dict(torch.load(self.args.pretrained_dir + '/' + self.args.load_pretrained + '/' + 'model.pt'))
            print("loaded pretrained model")
        
        writer = SummaryWriter(self.args.log_dir + "/" + self.args.timestamp + "_" + self.args.config)
        # if self.args.baseline == 'MF':
        #     train_data_rating = dataloader.load_train_data_mf(os.path.join(self.args.data_dir, 'train.csv'), os.path.join(self.args.data_dir, 'valid_tr.csv'),\
        #                                                             self.args.batch_size, int(374215/self.args.batch_size)+1)
        #     N = 374215
        if not self.args.baseline == 'MF':
            train_data_rating = dataloader.load_train_data(os.path.join(self.args.data_dir, 'train.csv'))
            N = train_data_rating.shape[0]
        
            idxlist = np.array(range(N))
            idx_pe = np.random.permutation(len(idxlist))
            idxlist = idxlist[idx_pe]
        b_ndcg100 = 0.0
        update_count = 0.0
        for e in range(self.args.epoch):
            model.train()
            total_loss = 0
            if self.args.baseline == 'MF':
                train_data_rating = dataloader.load_train_data_mf(os.path.join(self.args.data_dir, 'train.csv'), os.path.join(self.args.data_dir, 'valid_tr.csv'),os.path.join(self.args.data_dir, 'test_tr.csv'),
                                                                        self.args.batch_size, int(374215/self.args.batch_size)+1)
                N = 374215
            if self.args.condition or self.args.baseline == 'HPrior':
                train_data_item = dataloader.load_sequence_data_generator(int(N/self.args.batch_size)+1, 'train', self.args.batch_size, idx_pe)
            
            for i, st_idx in enumerate(range(0, N, self.args.batch_size)):
                if self.args.condition or self.args.baseline == 'HPrior':
                    order, item_feature, label = next(train_data_item)
                    end_idx = min(st_idx + self.args.batch_size, N)
                    x_unorder = train_data_rating[idxlist[st_idx:end_idx]]
                    X = x_unorder[order]
                else:
                    end_idx = min(st_idx + self.args.batch_size, N)
                    if self.args.baseline == 'MF':
                        d = next(train_data_rating)
                        pos = d[0]
                        neg = d[1]
                    else:
                        X = train_data_rating[idxlist[st_idx:end_idx]]
                if not self.args.baseline == 'MF':
                    if sparse.isspmatrix(X):
                        X = X.toarray()
                    X = X.astype('float32')   

                if self.args.condition:
                    optimizer["RNNEncoder"].zero_grad()
                    output, h = model.RNNEncoder(item_feature.to(self.args.device))
                    rnn_loss = CEloss(output, label.to(self.args.device))
                    rnn_loss.backward(retain_graph=True)
                    # rnn_loss.backward()
                    # optimizer["RNNEncoder"].step()
                    # self.make_condition(h, label.data)
                    self.make_condition(h, label.data)


                if self.args.baseline:
                    optimizer.zero_grad()
                else:
                    optimizer["encoder"].zero_grad()
                    optimizer["decoder"].zero_grad()
                
                if self.args.condition:
                    if self.args.test_hidden == 'onehot':
                        h = self.tooh(label, self.args.class_num).to(self.args.device)
                    model_input = (torch.FloatTensor(X).to(self.args.device), F.sigmoid(h))
                    recon, mu, logvar = model(model_input)
                elif self.args.baseline == 'HPrior':
                    recon, mu, logvar = model(torch.FloatTensor(X).to(self.args.device), item_feature.to(self.args.device))
                else:
                    if self.args.baseline == 'DAE':
                        recon = model(torch.FloatTensor(X).to(self.args.device))
                    elif self.args.baseline == 'MF':
                        pos = list(zip(*pos))
                        user = torch.LongTensor(pos[0]).to(self.args.device)
                        item = torch.LongTensor(pos[1]).to(self.args.device)
                        neg = list(zip(*neg))
                        user_neg = torch.LongTensor(neg[0]).to(self.args.device)
                        item_neg = torch.LongTensor(neg[1]).to(self.args.device)
                        ps, ns = model(user, item, user_neg, item_neg)
                        mfloss = mf_loss(ps, ns, 30)
                    else:
                        recon, mu, logvar = model(torch.FloatTensor(X).to(self.args.device))
                
                if self.args.baseline == 'DAE':
                    log_softmax_var = F.log_softmax(recon, dim=-1)
                    recon_loss = - torch.mean(torch.sum(log_softmax_var * torch.FloatTensor(X).to(self.args.device), dim=-1))
                if not self.args.baseline or self.args.baseline == 'HPrior':
                    recon_loss, kld = loss_function(torch.FloatTensor(X).to(self.args.device), recon, mu, logvar, self.args.dist, \
                                                                self.args.negsample, self.args.device, self.args.neg_num, self.args.bpr_weight)
                if self.args.anneal_steps > 0:
                    anneal = min(self.args.anneal_cap, 1. * update_count / self.args.anneal_steps)
                    update_count  += 1
                else:
                    anneal = self.args.anneal_cap
                if self.args.baseline == 'DAE':
                    vae_loss = recon_loss
                elif self.args.baseline == 'MF':
                    vae_loss = mfloss
                else:
                    vae_loss = recon_loss + anneal * kld
                vae_loss.backward()

                if self.args.baseline:
                    optimizer.step()
                else:
                    optimizer["encoder"].step()
                    optimizer["decoder"].step()

                if self.args.condition:
                    optimizer["RNNEncoder"].step()
                # r10, r20, r50, r100, ndcg10, ndcg50, ndcg100, ndcg150, auc = self.test(model, anneal)
                # tensorboard
                if self.args.condition:
                    writer.add_scalar("Train rnn loss", rnn_loss, i + e*N/self.args.batch_size)
                writer.add_scalar("Train vae loss", vae_loss, i + e*N/self.args.batch_size)
                if not self.args.baseline =='MF':
                    writer.add_scalar("Recon loss", recon_loss, i + e*N/self.args.batch_size)
                if not self.args.baseline:
                    writer.add_scalar("KLD", kld, i + e*N/self.args.batch_size)
                
                if i % 20 == 0:
                    if not self.args.baseline:
                        print(f"recon : {recon_loss.item():.3} | kld : {kld.item():.3}")
                    if self.args.condition:
                        print(f"epoch : {e} | train_vae_loss : {vae_loss.item():.3} | train_rnn_loss : {rnn_loss.item():.3}", 
                        f"[{i*self.args.batch_size} / {N}","(",f"{(i/N*self.args.batch_size)*100:.3} %", ")]")
                    else:
                        print(f"epoch : {e} | train_vae_loss : {vae_loss.item():.3}", 
                        f"[{i*self.args.batch_size} / {N}","(",f"{(i/N*self.args.batch_size)*100:.3} %", ")]")
                total_loss += vae_loss
            # save model
            # torch.save(model.state_dict(), self.args.log_dir + '/' + self.args.timestamp + '_' + self.args.config + '/model.pt')
            # print("model saved!")
            print(f"epoch : {e} | train vae loss : {total_loss / (N/self.args.batch_size):.3} ")
            if self.args.condition:
                #save condition per epoch for evaluation
                for j in range(self.args.class_num):
                    hidden = "h_{}".format(j+1)
                    torch.save(self.hidden_vecs[hidden], f"{self.args.hiddenvec_dir}/{hidden}.pt")
                print("hidden vector saved!")
            # test per epoch
            r10, r20, r50, r100, ndcg10, ndcg50, ndcg100, ndcg150, auc = self.test(model, anneal)
            if ndcg50 > b_ndcg100 :
                torch.save(model.state_dict(), self.args.log_dir + '/' + self.args.timestamp + '_' + self.args.config + '/model.pt')
                print("model saved!")
                b_ndcg100 = ndcg50
            # tensorboard
            # writer.add_scalar("Test_loss", test_loss, e)
            writer.add_scalar("Test_Recall10", r10, e)
            writer.add_scalar("Test_Recall20", r20, e)
            writer.add_scalar("Test_Recall50", r50, e)
            writer.add_scalar("Test_Recall100", r100, e)
            writer.add_scalar("Test_NDCG10", ndcg10, e)
            writer.add_scalar("Test_NDCG50", ndcg50, e)
            writer.add_scalar("Test_NDCG100", ndcg100, e)
            writer.add_scalar("Test_NDCG150", ndcg150, e)
            writer.add_scalar("Test_AUC", auc, e)
Esempio n. 46
0
test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size=args.test_batch,
                                          num_workers=args.workers,
                                          pin_memory=args.cuda,
                                          shuffle=False)

model.eval()
print('---- Testing for %d images - DiLiGent Dataset ----' %
      (len(test_loader)))

err_mean = 0
with torch.no_grad():
    for i, sample in enumerate(test_loader):
        data = utils.parseData(args, sample, 'test')
        input = [data['input']]
        if args.in_light:
            input.append(data['l'])
        output = model(input)
        acc = utils.errorPred(data['tar'].data, output.data, data['m'].data)
        err_mean = err_mean + acc
        print('error: %.3f' % (acc))
        result = (output.data + 1) / 2
        result_masked = result * data['m'].data.expand_as(output.data)

        save_path = './Results/' + 'img8_mask_%d.png' % (i + 1)
        tv.utils.save_image(result_masked, save_path)
        print('saved image %d' % (i + 1))

print('------------ mean error: %.3f ------------' %
      (err_mean / len(test_loader)))
def train(model, optim, db):

	for epoch in range(1, epochs+1):

		train_loader = torch.utils.data.DataLoader(db['train'],batch_size=batch_size, shuffle=True)

		# Update (Train)
		model.train()
		for batch_idx, (data, target) in enumerate(train_loader):

			target = target.numpy()
			tgx = np.zeros((len(target), size_output))
			idx = [(i, target[i]) for i in range(len(target))]
			for i in idx:
				tgx[i]=1.0
			target = torch.tensor(tgx)

			data, target = Variable(data.to(device)), Variable((target.float()).to(device))
			optimizer.zero_grad()
			output = model(data)
			loss = criterion(output,target)
			pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
			target = target.data.max(1, keepdim=True)[1]
			correct = pred.eq(target.data.view_as(pred)).cpu().sum()
			loss.backward()
			optimizer.step()

			if batch_idx % report_every == 0:
				print('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}, Accuracy: {}/{} ({:.6f})'.format(
					epoch, batch_idx * len(data), len(train_loader.dataset),
					100.0 * batch_idx / len(train_loader), loss.item(), correct, len(data), float(correct)/float(len(data))))


		# Evaluate
		model.eval()
		eval_loss = float(0)
		correct = 0
		batch_count = 0
		eval_loader = torch.utils.data.DataLoader(db['eval'], batch_size=batch_size, shuffle=True)
		with torch.no_grad():
			for data, target in eval_loader:

				target = target.numpy()
				tgx = np.zeros((len(target), size_output))
				idx = [(i, target[i]) for i in range(len(target))]
				for i in idx:
					tgx[i]=1.0
				target = torch.tensor(tgx)

				data, target = Variable(data.to(device)), Variable((target.float()).to(device))
				output = model(data)
				eval_loss += criterion(output, target).item() # sum up batch loss
				pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
				target = target.data.max(1, keepdim=True)[1]
				correct += pred.eq(target.data.view_as(pred)).cpu().sum()
				batch_count += 1

		eval_loss /= batch_count
		accuracy = float(correct) / len(eval_loader.dataset)

		with open('results/one_hot.dat', 'a+') as file:
			file.write(str(accuracy)+"\n")
		print('Eval set: Average loss: {:.4f}, Accuracy: {}/{} ({:.6f})\n'.format(
			eval_loss, correct, len(eval_loader.dataset),
			accuracy))					
Esempio n. 48
0
def train(args):
    processor = data_utils.ABSAProcessor()
    label_list = processor.get_labels(args.task_type)

    tokenizer = ABSATokenizer.from_pretrained(
        modelconfig.MODEL_ARCHIVE_MAP[args.bert_model])
    train_examples = processor.get_train_examples(args.data_dir,
                                                  args.task_type)
    num_train_steps = int(
        math.ceil(len(train_examples) /
                  args.train_batch_size)) * args.num_train_epochs

    train_features = data_utils.convert_examples_to_features(
        train_examples, label_list, args.max_seq_length, tokenizer)
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_examples))
    logger.info("  Batch size = %d", args.train_batch_size)
    logger.info("  Num steps = %d", num_train_steps)

    all_input_ids = torch.tensor([f.input_ids for f in train_features],
                                 dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                   dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in train_features],
                                  dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in train_features],
                                 dtype=torch.long)

    domain_dataset = PregeneratedDataset(epoch=0,
                                         training_path=args.domain_dataset,
                                         tokenizer=tokenizer,
                                         num_data_epochs=1)

    domain_train_sampler = RandomSampler(domain_dataset)
    domain_train_dataloader = DataLoader(domain_dataset,
                                         sampler=domain_train_sampler,
                                         batch_size=16)

    train_data = TensorDataset(all_input_ids, all_segment_ids, all_input_mask,
                               all_label_ids)

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data,
                                  sampler=train_sampler,
                                  batch_size=args.train_batch_size)

    # >>>>> validation
    if args.do_valid:
        valid_examples = processor.get_dev_examples(args.data_dir,
                                                    args.task_type)
        valid_features = data_utils.convert_examples_to_features(
            valid_examples, label_list, args.max_seq_length, tokenizer)
        valid_all_input_ids = torch.tensor(
            [f.input_ids for f in valid_features], dtype=torch.long)
        valid_all_segment_ids = torch.tensor(
            [f.segment_ids for f in valid_features], dtype=torch.long)
        valid_all_input_mask = torch.tensor(
            [f.input_mask for f in valid_features], dtype=torch.long)
        valid_all_label_ids = torch.tensor(
            [f.label_id for f in valid_features], dtype=torch.long)
        # valid_all_tag_ids = torch.tensor([f.tag_id for f in valid_features], dtype=torch.long)
        valid_data = TensorDataset(valid_all_input_ids, valid_all_segment_ids,
                                   valid_all_input_mask, valid_all_label_ids)

        logger.info("***** Running validations *****")
        logger.info("  Num orig examples = %d", len(valid_examples))
        logger.info("  Num split examples = %d", len(valid_features))
        logger.info("  Batch size = %d", args.train_batch_size)

        valid_sampler = SequentialSampler(valid_data)
        valid_dataloader = DataLoader(valid_data,
                                      sampler=valid_sampler,
                                      batch_size=args.train_batch_size)

        best_valid_loss = float('inf')
        valid_losses = []

    # <<<<< end of validation declaration
    model = ABSABert.from_pretrained(
        modelconfig.MODEL_ARCHIVE_MAP[args.bert_model],
        num_labels=len(label_list))

    if args.features_model != 'none':
        state_dict = torch.load(args.features_model)
        del state_dict['classifier.weight']
        del state_dict['classifier.bias']
        model.load_state_dict(state_dict, strict=False)
        logger.info('load fine-tuned model from : {}'.format(
            args.features_model))

    model.cuda()

    flag = True
    if flag:
        # bert-base
        shared_param_optimizer = [(k, v)
                                  for k, v in model.bert.named_parameters()
                                  if v.requires_grad == True]
        shared_param_optimizer = [
            n for n in shared_param_optimizer if 'pooler' not in n[0]
        ]
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        shared_optimizer_grouped_parameters = [{
            'params': [
                p for n, p in shared_param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params': [
                p for n, p in shared_param_optimizer
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]

        t_total = num_train_steps  # num_train_steps
        supervised_param_optimizer = model.classifier.parameters()

        domain_classifier_param_optimizer = model.domain_cls.parameters()

        shared_optimizer = BertAdam(shared_optimizer_grouped_parameters,
                                    lr=args.learning_rate,
                                    warmup=args.warmup_proportion,
                                    t_total=t_total)

        supervised_optimizer = BertAdam(supervised_param_optimizer,
                                        lr=args.learning_rate,
                                        warmup=args.warmup_proportion,
                                        t_total=t_total)

        domain_optimizer = BertAdam(domain_classifier_param_optimizer,
                                    lr=3e-5,
                                    warmup=args.warmup_proportion,
                                    t_total=-1)
    else:
        param_optimizer = [(k, v) for k, v in model.named_parameters()
                           if v.requires_grad == True]
        param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        t_total = num_train_steps  # num_train_steps
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=t_total)

    global_step = 0
    model.train()

    train_steps = len(train_dataloader)
    total_domain_loss = 0
    for e_ in range(args.num_train_epochs):
        train_iter = iter(train_dataloader)
        domain_iter = iter(domain_train_dataloader)
        for step in range(train_steps):
            batch = train_iter.next()
            batch = tuple(t.cuda() for t in batch)
            input_ids, segment_ids, input_mask, label_ids = batch  # all_input_ids, all_segment_ids, all_input_mask, all_label_ids, all_tag_ids
            loss, _ = model(input_ids,
                            token_type_ids=segment_ids,
                            attention_mask=input_mask,
                            labels=label_ids)

            loss.backward()
            if flag:
                shared_optimizer.step()
                shared_optimizer.zero_grad()
                supervised_optimizer.step()
                supervised_optimizer.zero_grad()
            else:
                optimizer.step()
                optimizer.zero_grad()

            dirt_n = 1  # 1 or 2
            for _ in range(dirt_n):
                try:
                    batch = domain_iter.next()
                except:
                    domain_iter = iter(domain_train_dataloader)
                    batch = domain_iter.next()
                batch = tuple(t.cuda() for t in batch)
                input_ids, input_mask, domain_labels = batch[0], batch[
                    4], batch[-1]
                d_loss = model(input_ids,
                               attention_mask=input_mask,
                               domain_label=domain_labels)
                d_loss.backward()
                total_domain_loss += d_loss.item()

                domain_optimizer.step()
                domain_optimizer.zero_grad()
                shared_optimizer.zero_grad(
                )  # make sure to clear the gradients of encoder.

            if step % 50 == 0:
                logger.info('in step {} domain loss: {}'.format(
                    dirt_n * (e_ * train_steps + step + 1), total_domain_loss /
                    (dirt_n * (e_ * train_steps + step + 1))))

            global_step += 1
            # >>>> perform validation at the end of each epoch .

        if args.do_valid:
            model.eval()
            with torch.no_grad():
                losses = []
                valid_size = 0
                for step, batch in enumerate(valid_dataloader):
                    batch = tuple(
                        t.cuda()
                        for t in batch)  # multi-gpu does scattering it-self
                    input_ids, segment_ids, input_mask, label_ids = batch
                    loss, _ = model(input_ids,
                                    token_type_ids=segment_ids,
                                    attention_mask=input_mask,
                                    labels=label_ids)
                    loss = torch.mean(loss)
                    losses.append(loss.data.item() * input_ids.size(0))
                    valid_size += input_ids.size(0)
                valid_loss = sum(losses) / valid_size
                logger.info("validation loss: %f", valid_loss)
                valid_losses.append(valid_loss)

            if valid_loss < best_valid_loss:
                torch.save(model.state_dict(),
                           os.path.join(args.output_dir, "model.pt"))
                best_valid_loss = valid_loss
            model.train()

    if args.do_valid:
        with open(os.path.join(args.output_dir, "valid.json"), "w") as fw:
            json.dump({"valid_losses": valid_losses}, fw)
    else:
        torch.save(model.state_dict(), os.path.join(args.output_dir,
                                                    "model.pt"))
Esempio n. 49
0
def predict(model, input_str):
    model = model.eval()
    with torch.no_grad():
        output = model(input_str)
    
    return output
Esempio n. 50
0
    def model_fn(features, labels, mode, params):
        """
        define how to train, evaluate and predict from the transfomer model.
        Args:

            mode:
            params:

        Returns:

        """
        inputs = features['inputs']
        seq_steps = features['seq_len']

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        try:
            batch_size, length = get_shape_list(inputs, expected_rank=2)
        except ValueError:
            batch_size = 1
            length = get_shape_list(inputs, expected_rank=1)[0]
            inputs = tf.reshape(inputs, [batch_size, length])

        with tf.variable_scope('model'):
            # Build model
            model = DKT(params, is_training)
            logits = model(batch_size, inputs,
                           seq_steps)  # [batch, length, vocab_size]

            # when in prediction mode, the label/target is Bone, the model output is the prediction
            if mode == tf.estimator.ModeKeys.PREDICT:
                export_outputs = {
                    'predict_output':
                    tf.estimator.export.PredictOutput(
                        {"predict": tf.sigmoid(logits)})
                }
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions={'predict': tf.sigmoid(logits)},
                    export_outputs=export_outputs)
            else:
                # Calculate model loss
                target_ids = features['target_id']
                target_correct = features['target_correct']
                ids = features['ids']
                correct = features['correct']
                loss = dkt_loss(logits, target_correct, target_ids, correct,
                                ids, seq_steps)
                record_dict = {}
                record_dict['minibatch_loss'] = loss
                # Save loss as named tensor will be logged with the logging hook
                tf.identity(loss, 'cross_entropy')

                if mode == tf.estimator.ModeKeys.EVAL:
                    metric_dict = get_eval_metrics(logits, target_correct,
                                                   target_ids, seq_steps)
                    record_dict['accuracy'] = metric_dict['accuracy']
                    record_scalars(record_dict)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=tf.estimator.ModeKeys.EVAL,
                        loss=loss,
                        predictions={'predict': tf.sigmoid(logits)},
                        eval_metric_ops=metric_dict)
                else:  # train
                    # check whether restore from checkpoint
                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}

                    tf.logging.info("**** Trainable Variables ****")
                    for var in tvars:
                        init_string = ""
                        if var.name in initialized_variable_names:
                            init_string = ", *INIT_FROM_CKPT*"
                        tf.logging.info("  name = %s, shape = %s%s", var.name,
                                        var.shape, init_string)

                    train_op, metric_dict = get_train_op_and_metrics(
                        loss, params)
                    acc_metric = get_eval_metrics(logits, target_correct,
                                                  target_ids, seq_steps)
                    record_dict['accuracy'] = acc_metric['accuracy']
                    record_dict['learning_rate'] = metric_dict['learning_rate']
                    record_scalars(record_dict)
                    output_spec = tf.estimator.EstimatorSpec(
                        mode=tf.estimator.ModeKeys.TRAIN,
                        loss=loss,
                        train_op=train_op)
        return output_spec
Esempio n. 51
0
    isOutput = True if opt.video_out != "" else False
    if isOutput:
        print("!!! TYPE:", type(opt.video_out), type(video_FourCC),
              type(video_fps), type(video_size))
        out = cv2.VideoWriter(opt.video_out, video_FourCC, video_fps,
                              video_size)
    while True:
        return_value, frame = vid.read()
        h, w, c = frame.shape
        PIL_img = Image.fromarray(frame[:, :, ::-1])
        tensor_img = transforms.ToTensor()(PIL_img)
        img, _ = pad_to_square(tensor_img, 0)
        # Resize
        img = resize(img, (opt.img_size, opt.img_size)).cuda().unsqueeze(0)
        with torch.no_grad():
            detections = model(img)
            detections = NMS(detections, opt.conf_thres, opt.nms_thres)

        # current_time = time.time()
        # inference_time = current_time - prev_time
        # prev_time = current_time
        font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                                  size=np.floor(3e-2 * h +
                                                0.5).astype('int32'))
        thickness = (w + h) // 300
        if detections[0] is not None:
            # Rescale boxes to original image
            detections = xywh2xyxy(detections[0])
            # 先将在320*320标准下的xyxy坐标转换成max(600,800)下的坐标 再将x向或y向坐标减一下就行
            detections[:, :4] *= (max(h, w) / opt.img_size)
            if max(h - w, 0) == 0:
Esempio n. 52
0
def train(model,
          criterion,
          optimizer,
          pos_feats,
          neg_feats,
          maxiter,
          in_layer='fc4'):
    model.train()

    batch_pos = opts['batch_pos']
    batch_neg = opts['batch_neg']
    batch_test = opts['batch_test']
    batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)

    pos_idx = np.random.permutation(pos_feats.size(0))
    neg_idx = np.random.permutation(neg_feats.size(0))
    while (len(pos_idx) < batch_pos * maxiter):
        pos_idx = np.concatenate(
            [pos_idx, np.random.permutation(pos_feats.size(0))])
    while (len(neg_idx) < batch_neg_cand * maxiter):
        neg_idx = np.concatenate(
            [neg_idx, np.random.permutation(neg_feats.size(0))])
    pos_pointer = 0
    neg_pointer = 0

    for iter in range(maxiter):

        # select pos idx
        pos_next = pos_pointer + batch_pos
        pos_cur_idx = pos_idx[pos_pointer:pos_next]
        pos_cur_idx = pos_feats.new(pos_cur_idx).long()
        pos_pointer = pos_next

        # select neg idx
        neg_next = neg_pointer + batch_neg_cand
        neg_cur_idx = neg_idx[neg_pointer:neg_next]
        neg_cur_idx = neg_feats.new(neg_cur_idx).long()
        neg_pointer = neg_next

        # create batch
        batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
        batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))

        # hard negative mining
        if batch_neg_cand > batch_neg:
            model.eval()  ## model transfer into evaluation mode
            for start in range(0, batch_neg_cand, batch_test):
                end = min(start + batch_test, batch_neg_cand)

                if batch_neg_feats[start:end].shape[1] == 9216:
                    temp_neg_feats = batch_neg_feats[start:end]
                else:
                    temp_neg_feats = torch.cat((batch_neg_feats[start:end],
                                                batch_neg_feats[start:end]),
                                               dim=1)

                score = model(temp_neg_feats,
                              temp_neg_feats,
                              in_layer=in_layer)
                if start == 0:
                    neg_cand_score = score.data[:, 1].clone()
                else:
                    neg_cand_score = torch.cat(
                        (neg_cand_score, score.data[:, 1].clone()), 0)

            _, top_idx = neg_cand_score.topk(batch_neg)
            batch_neg_feats = batch_neg_feats.index_select(
                0, Variable(top_idx))
            model.train()  ## model transfer into train mode

        # forward
        if batch_pos_feats.shape[1] == 9216:
            temp_pos_feats = batch_pos_feats
        else:
            temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats),
                                       dim=1)

        if batch_neg_feats.shape[1] == 9216:
            temp_neg_feats = batch_neg_feats
        else:
            temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats),
                                       dim=1)

        # pdb.set_trace()
        pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer)
        neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer)

        # optimize
        loss = criterion(pos_score, neg_score)
        model.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip'])
        optimizer.step()

        if opts['visual_log']:
            print("Iter %d, Loss %.4f" % (iter, loss.data[0]))
Esempio n. 53
0
    def test(self):
        test_dataset = eval(self.dataset_conf.loader_name)(self.config,
                                                           split='test')
        # create data loader
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=self.test_conf.batch_size,
            shuffle=False,
            num_workers=self.test_conf.num_workers,
            collate_fn=test_dataset.collate_fn,
            drop_last=False)

        # create models
        model = eval(self.model_conf.name)(self.config)
        load_model(model, self.test_conf.test_model)

        if self.use_gpu:
            model = nn.DataParallel(model, device_ids=self.gpus).cuda()

        model.eval()
        test_loss = []
        for data in tqdm(test_loader):
            if self.use_gpu:
                data['node_feat'], data['node_mask'], data[
                    'label'] = data_to_gpu(data['node_feat'],
                                           data['node_mask'], data['label'])

                if self.model_conf.name == 'LanczosNetGeneral':
                    data['D'], data['V'] = data_to_gpu(data['D'], data['V'])
                elif self.model_conf.name == 'GraphSAGE':
                    data['nn_idx'], data['nonempty_mask'] = data_to_gpu(
                        data['nn_idx'], data['nonempty_mask'])
                elif self.model_conf.name == 'GPNN':
                    data['L'], data['L_cluster'], data['L_cut'] = data_to_gpu(
                        data['L'], data['L_cluster'], data['L_cut'])
                else:
                    data['L'] = data_to_gpu(data['L'])[0]

            with torch.no_grad():
                if self.model_conf.name == 'AdaLanczosNet':
                    pred, _ = model(data['node_feat'],
                                    data['L'],
                                    label=data['label'],
                                    mask=data['node_mask'])
                elif self.model_conf.name == 'LanczosNetGeneral':
                    pred, _ = model(data['node_feat'],
                                    data['L'],
                                    data['D'],
                                    data['V'],
                                    label=data['label'],
                                    mask=data['node_mask'])
                elif self.model_conf.name == 'GraphSAGE':
                    pred, _ = model(data['node_feat'],
                                    data['nn_idx'],
                                    data['nonempty_mask'],
                                    label=data['label'],
                                    mask=data['node_mask'])
                elif self.model_conf.name == 'GPNN':
                    pred, _ = model(data['node_feat'],
                                    data['L'],
                                    data['L_cluster'],
                                    data['L_cut'],
                                    label=data['label'],
                                    mask=data['node_mask'])
                else:
                    pred, _ = model(data['node_feat'],
                                    data['L'],
                                    label=data['label'],
                                    mask=data['node_mask'])

                curr_loss = (pred - data['label']
                             ).pow(2).cpu().numpy() * self.const_factor
                test_loss += [curr_loss]

        test_loss = float(np.mean(np.concatenate(test_loss)))
        logger.info("Test MSE = {}".format(test_loss))

        return test_loss
Esempio n. 54
0
def train(epoch, optimizer, compression_scheduler=None):
    # Turn on training mode which enables dropout.
    model.train()

    total_samples = train_data.size(0)
    steps_per_epoch = math.ceil(total_samples / args.bptt)

    total_loss = 0.
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    for batch, i in enumerate(range(0, train_data.size(0), args.bptt)):
        data, targets = get_batch(train_data, i)
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)

        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(
                epoch,
                minibatch_id=batch,
                minibatches_per_epoch=steps_per_epoch)
        output, hidden = model(data, hidden)
        loss = criterion(output.view(-1, ntokens), targets)

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            loss = compression_scheduler.before_backward_pass(
                epoch,
                minibatch_id=batch,
                minibatches_per_epoch=steps_per_epoch,
                loss=loss,
                return_loss_components=False)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        total_loss += loss.item()

        if compression_scheduler:
            compression_scheduler.on_minibatch_end(
                epoch,
                minibatch_id=batch,
                minibatches_per_epoch=steps_per_epoch)

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss / args.log_interval
            elapsed = time.time() - start_time
            lr = optimizer.param_groups[0]['lr']
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.4f} | ms/batch {:5.2f} '
                '| loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch,
                    len(train_data) // args.bptt, lr,
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
            stats = ('Performance/Training/',
                     OrderedDict([('Loss', cur_loss),
                                  ('Perplexity', math.exp(cur_loss)),
                                  ('LR', lr), ('Batch Time', elapsed * 1000)]))
            steps_completed = batch + 1
Esempio n. 55
0
def get_filtered_relations_and_tails_filter1_filter2_hybrid (head_id, id2entity, type2relationType2frequency, topNfilters, column, fact, model, arity, device, type_head_tail_entity_matrix, tailType_relation_headType_tensor, entityName2entityTypes, type2id, relation2id, atLeast, sparsifier, typeId2frequency, entityId2entityTypes, unk_type_id, id2type, entity2sparsifiedTypes, entitiesEvaluated):

	list_of_scores_per_head_with_filter = []
	idx2relation_tail_in_scores = {}

	h = id2entity[head_id]

	tiled_fact = np.array([])

	all_head_types = type_head_tail_entity_matrix[:,head_id]
	tailType_relation_matrix = torch.matmul(tailType_relation_headType_tensor, all_head_types)
	tailType_relation_matrix = torch.transpose(tailType_relation_matrix, 0, 1)
	relation_entity_matrix = torch.matmul(tailType_relation_matrix, type_head_tail_entity_matrix)

	relation_entity_matrix[relation_entity_matrix < atLeast] = 0

	if torch.nonzero(relation_entity_matrix).shape[0] != 0:
		filtered_relation_tail_pairs = torch.nonzero(relation_entity_matrix)

		all_relations = filtered_relation_tail_pairs[:,0].tolist()
		entities_without_duplicates = filtered_relation_tail_pairs[:,1].tolist()
		r_e_tuples = list(zip(all_relations, entities_without_duplicates))
		idx_iterator = range(0, len(r_e_tuples))
		relation_tail_in_scores2idx = dict(zip(r_e_tuples, idx_iterator))
		idx2relation_tail_in_scores = dict(zip(idx_iterator, r_e_tuples))

		if len(entities_without_duplicates) > 0:

			tiled_fact = np.array(fact*len(entities_without_duplicates)).reshape(len(entities_without_duplicates),-1)
			tiled_fact[:,column] = entities_without_duplicates

			tiled_fact[:,0] = all_relations
			tiled_fact[:,2] = all_relations

			head_sparsified_types = []
			current_head_entity = tiled_fact[0][1]
			if current_head_entity in entity2sparsifiedTypes:
				head_sparsified_types = entity2sparsifiedTypes[current_head_entity]

			if len(tiled_fact) > 0:

				new_tiled_fact = []
				for current_fact in tiled_fact:
					current_head_entity = current_fact[1]
					current_tail_entity = current_fact[3]

					tail_sparsified_types = []
					if current_tail_entity in entity2sparsifiedTypes:
						tail_sparsified_types = entity2sparsifiedTypes[current_tail_entity]

					if entitiesEvaluated == "one":
						if len(head_sparsified_types)==0: #h has no types
							head_sparsified_types = [unk_type_id]
						if len(tail_sparsified_types)==0: #t has no types
							tail_sparsified_types = [unk_type_id]

					headType_tailType_pairs = []
					for h_t in head_sparsified_types:
						for t_t in tail_sparsified_types:
							headType_tailType_pairs.append(h_t)
							headType_tailType_pairs.append(t_t)

					current_fact = current_fact[:4]
					current_fact = np.append(current_fact, headType_tailType_pairs)
					new_tiled_fact.append(current_fact)


				new_tiled_fact, relation_tail_in_scores2idx, idx2relation_tail_in_scores = sort_testing_facts_according_to_arity(new_tiled_fact, relation_tail_in_scores2idx, idx2relation_tail_in_scores) # new_tiled_fact size: (num of mini batches, num of facts, arity)

				pred = None
				for facts_with_same_arities in new_tiled_fact:
					batch_of_facts_with_same_arities = list(chunks(facts_with_same_arities, 256))
					arity = len(batch_of_facts_with_same_arities[0][0])//2
					if pred == None:
						pred = model(batch_of_facts_with_same_arities[0], arity, "testing", device)
					else:
						pred_tmp = model(batch_of_facts_with_same_arities[0], arity, "testing", device)
						pred = torch.cat((pred, pred_tmp))
					for batch_it in range(1, len(batch_of_facts_with_same_arities)):
						pred_tmp = model(batch_of_facts_with_same_arities[batch_it], arity, "testing", device)
						pred = torch.cat((pred, pred_tmp))

				score_with_filter = pred.view(-1).detach().cpu().numpy()
				list_of_scores_per_head_with_filter.append(score_with_filter)

	return list_of_scores_per_head_with_filter, idx2relation_tail_in_scores
Esempio n. 56
0
def train(model, train_dataloader, test_dataloader):
    run_id = int(time.time())
    nb_batches = len(train_dataloader)

    crit = nn.CrossEntropyLoss()
    # crit = nn.NLLLoss()
    optim = torch.optim.Adam(model.parameters(), lr=0.0001)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, factor=0.25, patience=2)
    # scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=50, gamma=0.1)

    model.train()

    for ei in range(NB_EPOCHS):
        epoch_loss = 0.0
        total_loss = 0.0
        for i, (batch_src, batch_tgt,
                batch_out) in enumerate(train_dataloader):

            stime_batch = time.time()

            batch_src = batch_src.type(torch.LongTensor).to(device)
            batch_tgt = batch_tgt.type(torch.LongTensor).to(device)
            batch_out = batch_out.type(torch.LongTensor).to(device)

            optim.zero_grad()
            out = model(batch_src, batch_tgt)

            # print(batch_src[0], batch_tgt[0])
            # print(batch_out[0], out[0])

            out = out.reshape(-1, 336)
            batch_out = batch_out.reshape(-1)

            loss = crit(out, batch_out)
            total_loss += loss.item()
            epoch_loss += loss.item()
            loss.backward()

            optim.step()
            # scheduler.step()

            etime_batch = time.time()

            if not i % PRINT_INV:
                estimated_time = timedelta(
                    seconds=math.floor((etime_batch - stime_batch) *
                                       (nb_batches - i)))
                print(f"> Epoch {ei+1}/{NB_EPOCHS} - Batch {i+1}/{nb_batches}")
                print(
                    f"> Batch finished in {etime_batch - stime_batch:1.2f} seconds"
                )
                print(
                    f"> Estimated time to end of epoch: {str(estimated_time)}")
                print(f"> Loss: {total_loss / PRINT_INV}\n")
                total_loss = 0.0

        if ei and not ei % GEN_INV:
            generate(model, f"{ei}-sample.csv", src=test_primer)

        val_loss = evaluate(model, test_dataloader, optim, crit)
        # scheduler.step(val_loss)
        print(f"> Epoch {ei+1}/{NB_EPOCHS}")
        print(f"> Validation Loss: {val_loss}\n")

        torch.save(model, f"models/{run_id}-{ei}-model.pt")
    torch.save(model, f"models/{run_id}-final-model.pt")
Esempio n. 57
0
def main():

	#parse input arguments
	parser = argparse.ArgumentParser(description="Model's hyperparameters")
	parser.add_argument('--indir', type=str, help='Input dir of train, test and valid data')
	parser.add_argument('--withTypes', type=str, default="True")
	parser.add_argument('--epochs', default=1000, help='Number of epochs (default: 10)' )
	parser.add_argument('--batchsize', type=int, default=128, help='Batch size (default: 128)' )
	parser.add_argument('--num_filters', type=int, default=100, help='number of filters CNN' )
	parser.add_argument('--embsize', default=100, help='Embedding size (default: 100)' )
	parser.add_argument('--learningrate', default=0.0001, help='Learning rate (default: 0.00005)' )
	parser.add_argument('--outdir', type=str, help='Output dir of model')
	parser.add_argument('--load', default='False', help='If true, it loads a saved model in dir outdir and evaluate it (default: False). If preload, it load an existing model and keep training it' )
	parser.add_argument('--modelToBeTrained', default='', help='path of the pretrained model to be loaded. It works with --load=preload' )
	parser.add_argument('--gpu_ids', default='1', help='Comma-separated gpu id used to paralellize the evaluation' )
	parser.add_argument('--num_negative_samples', type=int, default=1, help='number of negative samples for each positive sample' )
	parser.add_argument('--atLeast', type=int, help='2' )
	parser.add_argument('--topNfilters', type=int, help='2' )
	parser.add_argument('--buildTypeDictionaries', type=str, default='False', help='True OR False' )
	parser.add_argument('--sparsifier', type=int, default=-1, help='if type frequency is less than K in ranking, set its entry to 0 in the img. If its value is <=0 then it will not sparsify the matrix' )
	parser.add_argument('--entitiesEvaluated', default='both', type=str, help='both, one, none' )
	args = parser.parse_args()
	print("\n\n************************")
	for e in vars(args):
		print (e, getattr(args, e))
	print("************************\n\n")

	if args.load == 'True':

		with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin:
			data_info = pickle.load(fin)
		test = data_info['test_facts']

		relation2id = data_info['roles_indexes']
		entity2id = data_info['values_indexes']
		key_val = data_info['role_val']

		id2entity = {}
		for tmpkey in entity2id:
			id2entity[entity2id[tmpkey]] = tmpkey
		id2relation = {}
		for tmpkey in relation2id:
			id2relation[relation2id[tmpkey]] = tmpkey

		n_entities = len(entity2id)
		n_relations = len(relation2id)
		print("Unique number of relations and head types:", n_relations)
		print("Unique number of entities and tail types:", n_entities)

		with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin:
			data_info1 = pickle.load(fin)
		whole_train = data_info1["train_facts"]
		whole_valid = data_info1["valid_facts"]
		whole_test = data_info1['test_facts']

		type2id, id2type = build_type2id_v2(args.indir)
		type2id["UNK"] = len(type2id)
		id2type[len(type2id)] = "UNK"
		unk_type_id = type2id["UNK"]

		entityName2entityTypes, entityId2entityTypes, entityType2entityNames, entityType2entityIds = build_entity2types_dictionaries(args.indir, entity2id)

		head2relation2tails = build_head2relation2tails(args.indir, entity2id, relation2id, entityId2entityTypes, args.entitiesEvaluated)
		print("unique testing heads:", len(head2relation2tails))

		typeId2frequency = build_typeId2frequency (args.indir, type2id)
		headTail2hTypetType, entityId2typeIds_with_sparsifier = build_headTail2hTypetType(args.indir, entity2id, type2id, entityName2entityTypes, args.sparsifier, typeId2frequency, args.buildTypeDictionaries)

		_, test, _ = add_type_pair_to_fact ([], test, [], headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id)
		whole_train, whole_test, whole_valid = add_type_pair_to_fact (whole_train, whole_test, whole_valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id)

		device = "cuda:"+str(args.gpu_ids)

		type2relationType2frequency = build_type2relationType2frequency(args.indir, args.buildTypeDictionaries)
		type_head_tail_entity_matrix, tailType_relation_headType_tensor = build_tensor_matrix(args.indir, entity2id, relation2id, entityName2entityTypes, args.topNfilters, type2relationType2frequency, entityId2typeIds_with_sparsifier, type2id, id2type, device, args.entitiesEvaluated)

		entity2sparsifiedTypes = build_entity2sparsifiedTypes (typeId2frequency, entityId2entityTypes, type2id, args.sparsifier, unk_type_id, id2type)

		epoch = args.outdir.split("/")[-1].split("_")[2]
		model = torch.load(args.outdir,map_location=device)
		t2 = TicToc()
		t2.tic()

		print("model.emb_types:", model.emb_types)

		evaluate_model_v2 (model, test, id2entity, type2relationType2frequency, args.topNfilters, args.atLeast, device, type2id, id2type, type_head_tail_entity_matrix, tailType_relation_headType_tensor, entityName2entityTypes, relation2id, head2relation2tails, id2relation, args.sparsifier, typeId2frequency, entityId2entityTypes, unk_type_id, entity2sparsifiedTypes, args.indir, args.entitiesEvaluated)
		t2.toc()
		print("Evaluation last epoch ", epoch, "- running time (seconds):", t2.elapsed)


		print("END OF SCRIPT!")

		sys.stdout.flush()

	else:

		with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin:
			data_info = pickle.load(fin)
		train = data_info["train_facts"]
		valid = data_info["valid_facts"]
		test = data_info['test_facts']
		relation2id = data_info['roles_indexes']
		entity2id = data_info['values_indexes']

		key_val = data_info['role_val']

		id2entity = {}
		for tmpkey in entity2id:
			id2entity[entity2id[tmpkey]] = tmpkey
		id2relation = {}
		for tmpkey in relation2id:
			id2relation[relation2id[tmpkey]] = tmpkey

		n_entities = len(entity2id)
		n_relations = len(relation2id)
		print("Unique number of relations:", n_relations)
		print("Unique number of entities:", n_entities)

		with open(args.indir + "/dictionaries_and_facts.bin", 'rb') as fin:
			data_info1 = pickle.load(fin)
		whole_train = data_info1["train_facts"]
		whole_valid = data_info1["valid_facts"]
		whole_test = data_info1['test_facts']

		mp.set_start_method('spawn')
		t1 = TicToc()
		t2 = TicToc()

		entityName2entityTypes, entityId2entityTypes, entityType2entityNames, entityType2entityIds = build_entity2types_dictionaries(args.indir, entity2id)

		## img matrix
		type2id, id2type = build_type2id_v2(args.indir)
		unk_type_id = len(type2id)
		typeId2frequency = build_typeId2frequency (args.indir, type2id)
		headTail2hTypetType, entityId2typeIds_with_sparsifier = build_headTail2hTypetType(args.indir, entity2id, type2id, entityName2entityTypes, args.sparsifier, typeId2frequency, args.buildTypeDictionaries)

		train, test, valid = add_type_pair_to_fact (train, test, valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id)
		whole_train, whole_test, whole_valid = add_type_pair_to_fact (whole_train, whole_test, whole_valid, headTail2hTypetType, entityId2typeIds_with_sparsifier, unk_type_id)


		n_batches_per_epoch = []
		for i in train:
			ll = len(i)
			if ll == 0:
				n_batches_per_epoch.append(0)
			else:
				n_batches_per_epoch.append(int((ll - 1) / args.batchsize) + 1)

		device = "cuda:"+str(args.gpu_ids.split(",")[0])
		print("device:", device)

		if args.load == "preload":
			model = torch.load(args.modelToBeTrained, map_location=device)
			starting_epoch = int(args.modelToBeTrained.rsplit('/', 1)[-1].split("_")[7].replace("epoch", "")) + 1
			print("Model pre-loaded. The training will start at epoch", starting_epoch)
		elif args.load == "False":
			if args.withTypes == "True":
				model = RETA(len(relation2id), len(entity2id), len(type2id)+1, int(args.embsize), int(args.num_filters)).cuda()
			elif args.withTypes == "False":
				model = RETA_NO_TYPES(len(relation2id), len(entity2id), len(type2id)+1, int(args.embsize), int(args.num_filters)).cuda()
			model.init()
			starting_epoch = 1

		for name, param in model.named_parameters():
			if param.requires_grad:
				print("param:", name, param.size())

		opt = torch.optim.Adam(model.parameters(), lr=float(args.learningrate))

		for epoch in range(starting_epoch, int(args.epochs)+1):
			t1.tic()
			model.train()
			model.to(device)
			train_loss = 0
			rel = 0

			arity2numOfPos = {}
			arity2numOfNeg = {}

			for i in range(len(train)):

				train_i_indexes = np.array(list(train[i].keys())).astype(np.int32)
				train_i_values = np.array(list(train[i].values())).astype(np.float32)

				for batch_num in range(n_batches_per_epoch[i]):

					arity = len(train_i_indexes[0])//2

					if arity < 3:
						print("ERROR: arity < 3")

					x_batch, y_batch, new_positive_facts_indexes_with_different_arity, new_negative_facts_indexes_with_different_arity = Batch_Loader(train_i_indexes, train_i_values, n_entities, n_relations, key_val, args.batchsize, arity, whole_train[i], id2entity, id2relation, args.num_negative_samples, type2id, args.sparsifier, typeId2frequency, entityId2entityTypes, id2entity, unk_type_id, id2type, entityName2entityTypes)

					x_by_arities, y_by_arities = sort_new_batch_according_to_arity_2 (new_positive_facts_indexes_with_different_arity, new_negative_facts_indexes_with_different_arity)

					loss = 0
					for j in range (len(x_by_arities)):
						arity = len(x_by_arities[j][0])//2
						if arity < 3:
							print("ERROR: arity < 3")
						pred = model(x_by_arities[j], arity, "training", device, id2relation, id2entity)
						pred = pred * torch.FloatTensor(y_by_arities[j]).cuda(device) * (-1)
						loss += model.loss(pred).mean()

					opt.zero_grad()
					loss.backward()
					opt.step()
					train_loss += loss.item()

			t1.toc()
			print("End of epoch", epoch, "- train_loss:", train_loss, "- training time (seconds):", t1.elapsed)

			sys.stdout.flush()

		print("END OF EPOCHS")

		#SAVE THE LAST MODEL
		if args.withTypes == "True":
			file_name = "RETA_batchSize" + str(args.batchsize) + "_epoch" + str(epoch) + "_embSize" + args.embsize + "_lr" + args.learningrate + "_sparsifier" + str(args.sparsifier) + "_numFilters" + str(args.num_filters)
		elif args.withTypes == "False":
			file_name = "RETA_with_NO_types_batchSize" + str(args.batchsize) + "_epoch" + str(epoch) + "_embSize" + args.embsize + "_lr" + args.learningrate + "_sparsifier" + str(args.sparsifier) + "_numFilters" + str(args.num_filters)
		print("Saving the model trained at epoch", epoch, "in:", args.outdir + '/' + file_name)
		if not os.path.exists(args.outdir):
			os.makedirs(args.outdir)
		torch.save(model, args.outdir + '/' + file_name)
		print("Model saved")

		print("END OF SCRIPT!")

		sys.stdout.flush()
Esempio n. 58
0
    # min_val_precision = 0.6
    min_loss_val = 10
    min_epoch = 100

    for epoch in range(opt.epochs):
        model.train()
        start_time = time.time()
        for batch_i, (_, imgs, targets) in enumerate(dataloader):
            batches_done = len(dataloader) * epoch + batch_i

            imgs = Variable(imgs.to(device))
            targets = Variable(targets.to(device), requires_grad=False)

            try:
                loss, outputs = model(imgs, targets)
                loss.backward()
            except RuntimeError as exception:
                if "out of memory" in str(exception):
                    print("WARNING: out of memory")
                    if hasattr(torch.cuda, 'empty_cache'):
                        torch.cuda.empty_cache()
                else:
                    raise exception
            #
            # loss, outputs = model(imgs, targets)
            # loss.backward()

            if batches_done % opt.gradient_accumulations:
                # Accumulates gradient before each step
                optimizer.step()
Esempio n. 59
0
def train(model, data_loader, device, num_epochs, optimizer=None, lr_scheduler=None) :
    # 모델을 GPU나 CPU로 옮깁니다
    try :
        os.mkdir(os.path.join('./weights'))
    except :
        pass
    
    model.to(device)

    # 옵티마이저(Optimizer)를 만듭니다
    if optimizer == None :
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.001,
                                    momentum=0.9, weight_decay=0.0005)
        
    # 학습률 스케쥴러를 만듭니다
    if lr_scheduler == None :
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=40,gamma=0.1)

    for epoch in range(num_epochs):
        
        print(epoch)
        
        model.train()
        count = 0
        
        for images, targets in data_loader:
            count += len(images)
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())


            # reduce losses over all GPUs for logging purposes
            world_size = 1

            if dist.is_available() and dist.is_initialized() :
                world_size = dist.get_world_size()

            if world_size >= 2:
                with torch.no_grad():
                    names = []
                    values = []
                    # sort the keys so that they are consistent across processes
                    for k in sorted(loss_dict.keys()):
                        names.append(k)
                        values.append(loss_dict[k])
                    values = torch.stack(values, dim=0)
                    dist.all_reduce(values)
                    if average:
                        values /= world_size
                    loss_dict = {k: v for k, v in zip(names, values)}


            losses_reduced = sum(loss for loss in loss_dict.values())

            loss_value = losses_reduced.item()

            print('epoch {} [{}/{}]loss_classifier : {} loss_box_reg : {} loss_objectness : {} loss_rpn_box_reg : {}'.format(
                epoch, count, len(data_loader)*data_loader.batch_size, loss_dict['loss_classifier'], loss_dict['loss_box_reg'], loss_dict['loss_objectness'], loss_dict['loss_rpn_box_reg']
            ))

            if not math.isfinite(loss_value):
                print("Loss is {}, stopping training".format(loss_value))
                print(targets)
                print(loss_dict)
                sys.exit(1)

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
        save_model('./weights/{}'.format(epoch), model, optimizer, lr_scheduler)
        lr_scheduler.step()
Esempio n. 60
0
def train(model, train_dataloader, epoch):
    # Turn on training mode which enables dropout.
    if args.model == 'QRNN': model.reset()
    model.train()
    total_loss = 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)
    hidd = model.init_hidden(args.batch_size * 19)
    hidd_cand = model.init_hidden(args.batch_size * 4)
    batch = 0
    acc_list = []
    total_var = 0

    it = tqdm(range(len(train_dataloader)),
              desc="Epoch {}/{}".format(epoch, args.epochs),
              ncols=0)
    data_iter = iter(train_dataloader)
    for niter in it:
        input_ids, cand_ids, target = data_iter.next()
        if args.cuda:
            input_ids = input_ids.cuda()
            cand_ids = cand_ids.cuda()
            targets = target.cuda()

        hidden = repackage_hidden(hidden)
        hidd = repackage_hidden(hidd)
        hidd_cand = repackage_hidden(hidd_cand)
        optimizer.zero_grad()

        output, result_prob, hidden, rnn_hs, dropped_rnn_hs, cand_emb = model(
            input_ids, cand_ids, hidden, hidd, hidd_cand)
        distance_1 = model.distance[0][2]
        dis_var = torch.var(distance_1)

        raw_loss = criterion(result_prob, targets)

        _, predict = result_prob.max(dim=-1)
        acc = float(torch.sum(predict == targets)) / float(targets.size(0))
        acc_list.append(acc)

        loss = raw_loss
        # Activiation Regularization
        if args.alpha:
            loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean()
                              for dropped_rnn_h in dropped_rnn_hs[-1:])
        # Temporal Activation Regularization (slowness)
        if args.beta:
            loss = loss + sum(args.beta *
                              (rnn_h[1:] - rnn_h[:-1]).pow(2).mean()
                              for rnn_h in rnn_hs[-1:])
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
        optimizer.step()

        total_loss += raw_loss.data
        total_var += dis_var.data

    cur_loss = total_loss / len(train_dataloader)
    cur_var = total_var / len(train_dataloader)
    elapsed = time.time() - start_time
    print('| epoch {:3d} | lr {:05.5f} | ms/batch {:5.2f} | '
          'loss {:5.2f} | acc {:8.4f} | var {:8.4f}'.format(
              epoch, optimizer.param_groups[0]['lr'],
              elapsed * 1000 / len(train_dataloader), cur_loss,
              np.mean(acc_list), cur_var))