Exemple #1
0
def run(args):
	parameters = RnnParameterData(loc_emb_size=args.loc_emb_size, uid_emb_size=args.uid_emb_size, tim_emb_size=args.tim_emb_size,
								  app_size=args.app_size, app_encoder_size=args.app_encoder_size,acc_threshold=args.acc_threshold,
								  hidden_size=args.hidden_size, dropout_p=args.dropout_p, top_size=args.top_size,
								  data_name=args.data_name, lr=args.learning_rate,users_start=args.users_start,users_end=args.users_end,
								  lr_step=args.lr_step, lr_decay=args.lr_decay, L2=args.L2, rnn_type=args.rnn_type, loss_alpha=args.loss_alpha,
								  optim=args.optim, lr_schedule=args.lr_schedule, attn_type=args.attn_type, app_emb_mode=args.app_emb_mode,
								  clip=args.clip, epoch_max=args.epoch_max, history_mode=args.history_mode, loss_beta=args.loss_beta,
								  model_mode=args.model_mode, data_path=args.data_path, save_path=args.save_path, baseline_mode=args.baseline_mode,
								  loc_emb_negative=args.loc_emb_negative, loc_emb_batchsize=args.loc_emb_batchsize, input_mode=args.input_mode, test_start = args.test_start)
	"""metric"""
	T = 10
	T0 = T
	lr = parameters.lr
	metrics = {'train_loss': [], 'valid_loss': [], 'avg_app_auc': [], 'avg_app_map': [], 'avg_app_precision': [], 'avg_app_recall': [], 'avg_loc_top1': [], 'avg_loc_top5': [],'avg_loc_top10': [], 'avg_uid_top1': [], 'avg_uid_top10': [], 'valid_acc': {}}
	if args.candidate == 'loc_entropy':
		candidate = candidate_entropy(parameters.data_neural, start=parameters.users_start, end=parameters.users_end, mode=args.reverse_mode)
	elif args.candidate == 'app_number':
		candidate = candidate_number(parameters.data_neural, start=parameters.users_start, end=parameters.users_end, mode=args.reverse_mode)
	else:
		candidate_tmp = [u for u in parameters.data_neural.keys()]
		candidate = candidate_tmp[parameters.users_start:parameters.users_end]
	parameters.uid_size = max(candidate) + 1
	print('Candidate:',candidate[:5])
	print('Candidate:{} max user_id:{} min user_id:{}'.format(args.candidate, max(candidate), min(candidate)))
	app_number = static_app(parameters,candidate)
	print('App: max app number used:{} min appp number userd:{} average appp number userd:{}'.format(max(app_number), min(app_number), np.mean(app_number)))
	
	"""baseline"""
	if parameters.baseline_mode == 'App':
		print("======Run baseline: beyasian_prediction==========")
		auc, users_auc, f1, users_f1, precision, users_precision, recall, users_recall = beyasian_prediction(parameters,candidate=candidate)
		print('==> NB auc: {:.4f} map: {:.4f} precision: {:.4f} recall: {:.4f}'.format(float(auc), float(f1),float(precision), float(recall)))
		print("======Run baseline: history average==========")
		auc, users_auc, f1, users_f1, precision, users_precision, recall, users_recall = history_average(parameters,candidate=candidate)
		print('==> HA auc: {:.4f} map: {:.4f} precision: {:.4f} recall: {:.4f}'.format(float(auc), float(f1),float(precision), float(recall)))
		print("======Run baseline: most recent==========")
		auc, users_auc, f1, users_f1, precision, users_precision, recall, users_recall = most_recently(parameters,candidate=candidate)
		json.dump(users_f1, open("users_f1_MRU_App.json","w"))	
		print('==> MR auc: {:.4f} map: {:.4f} precision: {:.4f} recall: {:.4f}'.format(float(auc), float(f1),float(precision), float(recall)))
		print("======Run baseline: most popular==========")
		auc, users_auc, f1, users_f1, precision, users_precision, recall, users_recall = most_popular(parameters,candidate=candidate)
		print('==> MF auc: {:.4f} map: {:.4f} precision: {:.4f} recall: {:.4f}'.format(float(auc), float(f1),float(precision), float(recall)))
	elif parameters.baseline_mode == 'Loc':
		print("======Run Loc baseline: one order markov==========")
		avg_acc_top1,avg_acc_top5,avg_acc_top10, user_acc = markov(parameters,candidate=candidate)
		print('==> Markov acc@1: {:.4f} acc@5: {:.4f} acc@10: {:.4f}'.format(float(avg_acc_top1), float(avg_acc_top5),float(avg_acc_top10)))
		print("======Run Loc baseline: most recently==========")
		avg_acc_top1,user_acc  = most_recently_loc(parameters,candidate=candidate)
		json.dump(user_acc, open("avg_acc_top1_MRU_Loc.json","w"))	
		print('==> MR acc@1: {:.4f} '.format(float(avg_acc_top1)))
		print("======Run Loc baseline: most popular==========")
		avg_acc_top1,avg_acc_top5,avg_acc_top10, user_acc  = most_popular_loc(parameters,candidate=candidate)
		print('==> MF acc@1: {:.4f} acc@5: {:.4f} acc@10: {:.4f}'.format(float(avg_acc_top1), float(avg_acc_top5),float(avg_acc_top10)))
	elif parameters.baseline_mode == None:			
		print("================Run models=============")
		"""get loc embedding graph"""
		loc_old2newid = {}
		loc_old2newid,loc_graph = get_loc_graph(parameters, candidate)
		parameters.loc_size = len(loc_old2newid)
		parameters.uid_size = max(candidate)+1
		#Model Training
		print('Split training and testing data', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
		if 'Topk' in parameters.model_mode:
			print('using topk model')
			user_topk = generate_input_topk(parameters,'train',loc_old2newid, mode2=None, candidate=candidate)
		else:
			user_topk = None
		if parameters.input_mode == 'short':
			data_train, train_idx = generate_input(parameters, 'train', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
			data_test, test_idx = generate_input(parameters, 'test', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
		elif parameters.input_mode == 'short_history':
			data_train, train_idx = generate_input_history(parameters, 'train', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
			data_test, test_idx = generate_input_history(parameters, 'test', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
		elif parameters.input_mode == 'long':
			data_train, train_idx = generate_input_long_history(parameters, 'train', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
			data_test, test_idx = generate_input_long_history(parameters, 'test', loc_old2newid, user_topk, mode2=parameters.history_mode,candidate=candidate)
		#print('Generating Line first order similarity')
		#loc_emb_data_1 = SmapleDataset(loc_graph, 0)  #the edge with link, return one pair with label=1
		#loc_emb_data_loaer_1 = DataLoader(loc_emb_data_1, shuffle=True, batch_size=parameters.loc_emb_batchsize, num_workers=4)
		#print('Generating Line second order similarity')
		#loc_emb_data_2 = SmapleDataset(loc_graph, parameters.loc_emb_negative) #negative sample, return 5 pairs with label=-1
		#loc_emb_data_loaer_2 = DataLoader(loc_emb_data_2, shuffle=True, batch_size=parameters.loc_emb_batchsize//6, num_workers=4)
		#print('data len:', len(loc_emb_data_1))
		#T1 = int(len(loc_emb_data_1)//parameters.loc_emb_batchsize)
		
		"""Model Init"""
		print('Model Init!', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
		for mi in range(1):
			#"""Pre App"""
			if parameters.model_mode in ['AppPre']:	
				model = AppPre(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreGtr']:	
				model = AppPreGtr(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUser']:
				model = AppPreUser(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserCGtr']:
				model = AppPreUserCGtr(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserRGtr']:
				model = AppPreUserRGtr(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserPGtr']:
				model = AppPreUserPGtr(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserCGtrTopk']:
				model = AppPreUserCGtrTopk(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserCGtrHis']:
				model = AppPreUserCGtrHis(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserCGtrHisAttn']:
				model = AppPreUserCGtrHisAttn(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserPGtrTopk']:
				model = AppPreUserPGtrTopk(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocRUserCGtrHis']:
				model = AppPreLocRUserCGtrHis(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()	
			elif parameters.model_mode in ['AppPreLocCUserCGtrHis']:
				model = AppPreLocCUserCGtrHis(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()	
			elif parameters.model_mode in ['AppPreLocRUserCGtr']:
				model = AppPreLocRUserCGtr(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()	
			elif parameters.model_mode in ['AppPreLocRUserCGtrTopk']:
				model = AppPreLocRUserCGtrTopk(parameters=parameters).cuda()
				criterion = AppLoss(parameters=parameters).cuda()	
			#"""Pre Loc"""	
			elif parameters.model_mode in ['LocPre']:
				model = LocPre(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['LocPreGt','LocPreGtr']:
				model = LocPreGtr(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['LocPreUser']:
				model = LocPreUser(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['LocPreUserGt', 'LocPreUserGtr']: 
				model = LocPreUserGtr(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['LocPreUserGtTopk', 'LocPreUserGtrTopk']:
				model = LocPreUserGtrTopk(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['LocPreUserGtRec', 'LocPreUserGtrRec']:
				model = LocPreUserGtrRec(parameters=parameters).cuda()
				criterion = LocLoss(parameters=parameters).cuda()
			#"""Iden user"""		
			elif parameters.model_mode in ['UserIden']:
				model = UserIden(parameters=parameters).cuda()
				criterion = nn.NLLLoss().cuda()
			#"""Pre App, Loc and user"""		
			elif parameters.model_mode in ['AppPreUserIden']:
				model = AppPreUserIden(parameters=parameters).cuda()
				criterion = AppUserLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocPreGtr']:
				model = AppPreLocPreGtr(parameters=parameters).cuda()
				criterion = AppLocLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreUserIdenGt','AppPreUserIdenGtr']:
				model = AppPreUserIdenGtr(parameters=parameters).cuda()
				criterion = AppUserLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocUserIdenGtr']:
				model = AppPreLocUserIdenGtr(parameters=parameters).cuda()
				criterion = AppUserLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocPreUserIden']:
				model = AppPreLocPreUserIden(parameters=parameters).cuda()
				criterion = AppLocUserLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocPreUserIdenGt','AppPreLocPreUserIdenGtr']:
				model = AppPreLocPreUserIdenGtr(parameters=parameters).cuda()
				criterion = AppLocUserLoss(parameters=parameters).cuda()
			elif parameters.model_mode in ['AppPreLocPreUserIdenGtrLinear']:
				model = AppPreLocPreUserIdenGtrLinear(parameters=parameters).cuda()
				criterion = AppLocUserLoss(parameters=parameters).cuda()
			#"""For embedding"""		
			elif parameters.model_mode in ['LocEmbed', 'LocPreUserGtrLocEmb']:	
				line_1st = Line_1st(parameters.loc_size, parameters.loc_emb_size).cuda()
				line_2nd = Line_2nd(parameters.loc_size, parameters.loc_emb_size).cuda()
				model = LocPreUserGtrLocEmb(parameters=parameters,line_1st=line_1st,line_2nd=line_2nd,alpha=1).cuda()
				criterion = nn.NLLLoss().cuda()
				T0 = T-1

		print(model)
		params = list(model.parameters())
		k = 0
		for i in params:
			l = 1
			for j in i.size():
				l *= j
			k = k + l
		print("The number of parameters:" + str(k))

		"Forward network with randomly initialization  "
		for epoch in range(1):
			optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=parameters.L2)
			#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=parameters.lr_step, factor=parameters.lr_decay, threshold=1e-3)	
			scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=parameters.lr_step,factor=parameters.lr_decay, threshold=1e-3)
			#scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [10,20,30], gamma=parameters.lr_decay)
			#prediction_all[epoch] = {}
			#train dataset but test model without feedback
			model, avg_train_loss, prediction = run_simple(data_train, train_idx, 'train_test', parameters.input_mode, lr, parameters.clip, model, optimizer, criterion, parameters.model_mode, parameters.test_start, parameters.acc_threshold)
			print('========================>Epoch:{:0>2d} lr:{}<=================='.format(epoch,lr))
			print('==>Train Epoch:{:0>2d} Loss:{:.4f} lr:{}'.format(epoch, avg_train_loss, lr))
			metrics['train_loss'].append(avg_train_loss)
			#prediction_all[epoch]['train'] = prediction
			avg_loss, avg_acc, users_acc, prediction = run_simple(data_test, test_idx, 'test', parameters.input_mode, lr, parameters.clip, model,optimizer, criterion, parameters.model_mode, parameters.test_start, parameters.acc_threshold)
			print('==>Test Loss:{:.4f}'.format(avg_loss))
			print('==>Test Acc App_AUC:{:.4f}   App_map:{:.4f}    App_Precision:{:.4f}   App_Recall:{:.4f} '.format(avg_acc['app_auc'], avg_acc['app_map'], avg_acc['app_precision'], avg_acc['app_recall']))
			print('            Loc_top1:{:.4f}  Loc_top5:{:.4f}  Loc_top10:{:.4f}'.format(avg_acc['loc_top1'],avg_acc['loc_top5'], avg_acc['loc_top10']))
			print('            Uid_top1:{:.4f}  Uid_top10:{:.4f}'.format(avg_acc['uid_top1'], avg_acc['uid_top10']))
			metrics['valid_loss'].append(avg_loss) #total average loss
			metrics['valid_acc'][epoch] = users_acc #accuracy for each user
			metrics['avg_app_auc'].append(0) #total average accuracy
			metrics['avg_app_map'].append(0)
			metrics['avg_app_precision'].append(0)
			metrics['avg_app_recall'].append(0)
			metrics['avg_loc_top1'].append(0)
			metrics['avg_loc_top5'].append(0)
			metrics['avg_loc_top10'].append(0)
			metrics['avg_uid_top1'].append(0)
			metrics['avg_uid_top10'].append(0)
			#prediction_all[epoch]['test'] = prediction

		st = time.time()
		start_time = time.time()
		for epoch in range(1, parameters.epoch):
			#prediction_all[epoch] = {}
			if epoch%T < T0:
				#optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=parameters.L2) 		
				#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=parameters.lr_step, factor=parameters.lr_decay, threshold=1e-3)
				model, avg_train_loss, prediction = run_simple(data_train, train_idx, 'train', parameters.input_mode, lr, parameters.clip, model, optimizer, criterion, parameters.model_mode, parameters.test_start, parameters.acc_threshold)
				print('========================>Epoch:{:0>2d} lr:{}<=================='.format(epoch,lr))
				print('==>Train Epoch:{:0>2d} Loss:{:.4f} lr:{}'.format(epoch, avg_train_loss, lr))
				metrics['train_loss'].append(avg_train_loss)
				#prediction_all[epoch]['train'] = prediction
				avg_loss, avg_acc, users_acc, prediction = run_simple(data_test, test_idx, 'test', parameters.input_mode, lr, parameters.clip, model,optimizer, criterion, parameters.model_mode, parameters.test_start, parameters.acc_threshold)
				print('==>Test Loss:{:.4f}'.format(avg_loss))
				print('==>Test Acc App_AUC:{:.4f}   App_map:{:.4f}    App_Precision:{:.4f}   App_Recall:{:.4f} '.format(avg_acc['app_auc'], avg_acc['app_map'], avg_acc['app_precision'], avg_acc['app_recall']))
				print('            Loc_top1:{:.4f}  Loc_top5:{:.4f}  Loc_top10:{:.4f}'.format(avg_acc['loc_top1'],avg_acc['loc_top5'], avg_acc['loc_top10']))
				print('            Uid_top1:{:.4f}  Uid_top10:{:.4f}'.format(avg_acc['uid_top1'], avg_acc['uid_top10']))
				metrics['valid_loss'].append(avg_loss) #total average loss
				metrics['valid_acc'][epoch] = users_acc #accuracy for each user
				metrics['avg_app_auc'].append(avg_acc['app_auc']) #total average accuracy
				metrics['avg_app_map'].append(avg_acc['app_map'])
				metrics['avg_app_precision'].append(avg_acc['app_precision'])
				metrics['avg_app_recall'].append(avg_acc['app_recall'])
				metrics['avg_loc_top1'].append(avg_acc['loc_top1'])
				metrics['avg_loc_top5'].append(avg_acc['loc_top5'])
				metrics['avg_loc_top10'].append(avg_acc['loc_top10'])
				metrics['avg_uid_top1'].append(avg_acc['uid_top1'])
				metrics['avg_uid_top10'].append(avg_acc['uid_top10'])
				#prediction_all[epoch]['test'] = prediction

				save_name_tmp = 'ep_' + str(epoch) + '_' + str(start_time) + '.m'
				torch.save(model.state_dict(), parameters.save_path + 'tmp/' + save_name_tmp)

				if parameters.lr_schedule == 'Loss':
					if 'AppPre' in parameters.model_mode:
						scheduler.step(avg_acc['app_map'])
					elif 'LocPre' in parameters.model_mode:
						scheduler.step(avg_acc['loc_top1'])
					elif 'UserIden' in parameters.model_mode:
						scheduler.step(avg_acc['uid_top1'])
					lr_last = lr
					lr = optimizer.param_groups[0]['lr']
					if lr_last > lr:
						if 'AppPre' in parameters.model_mode:
							load_epoch = np.argmax(metrics['avg_app_map'])
						elif 'LocPre' in parameters.model_mode:
							load_epoch = np.argmax(metrics['avg_loc_top1'])
						else:
							load_epoch = np.argmax(metrics['avg_uid_top1'])          
						load_name_tmp = 'ep_' + str(load_epoch) + '_' + str(start_time) + '.m'
						model.load_state_dict(torch.load(parameters.save_path + 'tmp/' + load_name_tmp))
						print('load epoch={} model state'.format(load_epoch)) #lr decreased
				if epoch == 1:
					print('single epoch time cost:{}'.format(time.time() - start_time))
				if lr <= 0.9 * 1e-6:
					break

			else:
				optimizer1 = optim.Adam(filter(lambda p: p.requires_grad, line_1st.parameters()), lr=1e-3)
				run_embedding(line_1st, loc_emb_data_loaer_1, 1, optimizer1, 0, epoch, parameters.epoch)
				optimizer2 = optim.Adam(filter(lambda p: p.requires_grad, line_2nd.parameters()), lr=1e-3)
				run_embedding(line_2nd, loc_emb_data_loaer_2, 2, optimizer2, parameters.loc_emb_negative, epoch, parameters.epoch)
				line = Line(line_1st, line_2nd, alpha=1,name='epoch'+str(epoch)) #the ration of 1st and 2nd
				line.save_emb()
		
		overhead = time.time() - start_time
		if 'AppPre' in parameters.model_mode:
			load_epoch = np.argmax(metrics['avg_app_map'])
			print('==>Test Best Epoch:{:0>2d}   App_AUC:{:.4f}   app_map:{:.4f}   App_Precision:{:.4f}   App_Recall:{:.4f} '.format(load_epoch, metrics['avg_app_auc'][load_epoch], metrics['avg_app_map'][load_epoch], metrics['avg_app_precision'][load_epoch], metrics['avg_app_recall'][load_epoch]))
		elif 'LocPre' in parameters.model_mode: 
			load_epoch = np.argmax(metrics['avg_loc_top1'])
			print('==>Test Best Epoch:{:0>2d}   Loc_Top1:{:.4f}   Loc_top10:{:.4f}'.format(load_epoch, metrics['avg_loc_top1'][load_epoch], metrics['avg_loc_top10'][load_epoch]))
		else:
			load_epoch = np.argmax(metrics['avg_uid_top1'])
			print('==>Test Best Epoch:{:0>2d}   Uid_Top1:{:.4f}   Uid_top10:{:.4f}'.format(load_epoch, metrics['avg_uid_top1'][load_epoch], metrics['avg_uid_top10'][load_epoch]))
		load_name_tmp = 'ep_' + str(load_epoch) + '_' + str(start_time) + '.m'
		model.load_state_dict(torch.load(parameters.save_path + 'tmp/' + load_name_tmp))
		save_name = args.model_mode + '_' + str(args.users_start) + '-' + str(args.users_end) + '_' + str(args.uid_emb_size) + '_' + \
					str(args.hidden_size) + '_' + str(args.top_size)+ '_' + \
					str(metrics['avg_app_auc'][load_epoch])[:6] + '_' + str(metrics['avg_app_map'][load_epoch])[:6] + '_' + \
					str(metrics['avg_app_precision'][load_epoch])[:6]+ '_' + str(metrics['avg_app_recall'][load_epoch])[:6] + '_' + \
					str(args.process_name) + '_' + str(args.loss_alpha)[:4] + '_' + str(args.loss_beta)[:4] + '_' + str(overhead/60)[:5]
		#save_name = args.model_mode + '_' + str(args.users_start) + '-' + str(args.users_end) + '_' + str(args.app_encoder_size) + '_' + \
		#			str(args.hidden_size) + '_' + str(metrics['avg_app_map'][load_epoch])[:6] + '_' + \
		#			str(metrics['avg_loc_top1'][load_epoch])[:6]+ '_' + str(metrics['avg_uid_top1'][load_epoch])[:6] + '_' + \
		#			str(args.process_name) + '_' + str(args.loss_alpha)[:4] + '_' + str(args.loss_beta)[:4] + '_' + str(overhead/60)[:5]
		json.dump(metrics['valid_acc'][load_epoch], open("users_acc_"+save_name+".json","w"))	

		"""saving embedding"""
		if parameters.model_mode in ['LocPreUserGtrLocEmb','LocPreUserGtr']:
			model.save_emb()
		
		"""precess visualization"""
		for p in range(1):
			fig = plt.figure(dpi=300)
			if args.plot_mode == 'both':
				ax1 = plt.subplot(221)
				plt.plot(metrics['train_loss'],'r-',label='train_loss')
				plt.plot(metrics['valid_loss'],'b-',label='test_loss')
				plt.legend(loc='best')
				ax2 = plt.subplot(222)
				plt.plot(metrics['avg_app_auc'],'g-',label='test_app_auc')
				plt.plot(metrics['avg_app_map'],'y-',label='test_app_map')
				plt.legend(loc='best')
				ax3 = plt.subplot(223)
				plt.plot(metrics['avg_loc_top1'],'g-',label='test_loc_top1')
				plt.plot(metrics['avg_loc_top10'],'y-',label='test_loc_top10')
				plt.legend(loc='best')
				ax4 = plt.subplot(224)
				plt.plot(metrics['avg_uid_top1'],'g-',label='test_uid_top1')
				plt.plot(metrics['avg_uid_top10'],'y-',label='test_uid_top10')
				plt.legend(loc='best')
			else:
				ax1 = plt.subplot(211)
				plt.plot(metrics['train_loss'],'r-',label='train_loss')
				plt.plot(metrics['valid_loss'],'b-',label='test_loss')
				plt.legend(loc='best')
				ax2 = plt.subplot(212)
				if args.plot_mode == 'App':
					plt.plot(metrics['avg_app_auc'],'g-',label='test_app_auc')
					plt.plot(metrics['avg_app_map'],'y-',label='test_app_map')
				elif args.plot_mode == 'Loc':
					plt.plot(metrics['avg_loc_top1'],'g-',label='test_loc_top1')
					plt.plot(metrics['avg_loc_top10'],'y-',label='test_loc_top10')
				elif args.plot_mode == 'User':
					plt.plot(metrics['avg_uid_top1'],'g-',label='test_uid_top1')
					plt.plot(metrics['avg_uid_top10'],'y-',label='test_uid_top10')
				elif args.plot_mode == 'Loc_emb':
					plt.plot(metrics['avg_loc_emb_P'],'g-',label='avg_loc_emb_P')
					plt.plot(metrics['avg_loc_emb_R'],'y-',label='avg_loc_emb_R')
				plt.legend(loc='best')
			plt.savefig(save_name + '.png')
			precess = np.zeros([10,len(metrics['train_loss'])])
			precess[0,:]=np.array(metrics['train_loss'])
			precess[1,:]=np.array(metrics['valid_loss'])
			precess[2,:]=np.array(metrics['avg_app_auc'])
			precess[3,:]=np.array(metrics['avg_app_map'])
			precess[4,:]=np.array(metrics['avg_loc_top1'])
			precess[5,:]=np.array(metrics['avg_loc_top10'])
Exemple #2
0
def run(args):
    parameters = RnnParameterData(loc_emb_size=args.loc_emb_size,
                                  uid_emb_size=args.uid_emb_size,
                                  voc_emb_size=args.voc_emb_size,
                                  tim_emb_size=args.tim_emb_size,
                                  hidden_size=args.hidden_size,
                                  dropout_p=args.dropout_p,
                                  data_name=args.data_name,
                                  lr=args.learning_rate,
                                  lr_step=args.lr_step,
                                  lr_decay=args.lr_decay,
                                  L2=args.L2,
                                  rnn_type=args.rnn_type,
                                  optim=args.optim,
                                  attn_type=args.attn_type,
                                  clip=args.clip,
                                  epoch_max=args.epoch_max,
                                  history_mode=args.history_mode,
                                  model_mode=args.model_mode,
                                  data_path=args.data_path,
                                  save_path=args.save_path)
    argv = {
        'loc_emb_size': args.loc_emb_size,
        'uid_emb_size': args.uid_emb_size,
        'voc_emb_size': args.voc_emb_size,
        'tim_emb_size': args.tim_emb_size,
        'hidden_size': args.hidden_size,
        'dropout_p': args.dropout_p,
        'data_name': args.data_name,
        'learning_rate': args.learning_rate,
        'lr_step': args.lr_step,
        'lr_decay': args.lr_decay,
        'L2': args.L2,
        'act_type': 'selu',
        'optim': args.optim,
        'attn_type': args.attn_type,
        'clip': args.clip,
        'rnn_type': args.rnn_type,
        'epoch_max': args.epoch_max,
        'history_mode': args.history_mode,
        'model_mode': args.model_mode
    }
    print('*' * 15 + 'start training' + '*' * 15)
    print('model_mode:{} history_mode:{} users:{}'.format(
        parameters.model_mode, parameters.history_mode, parameters.uid_size))

    if parameters.model_mode in ['simple', 'simple_long']:
        model = TrajPreSimple(parameters=parameters).cuda()
    elif parameters.model_mode == 'attn_avg_long_user':
        model = TrajPreAttnAvgLongUser(parameters=parameters).cuda()
    elif parameters.model_mode == 'attn_local_long':
        model = TrajPreLocalAttnLong(parameters=parameters).cuda()
    if args.pretrain == 1:
        model.load_state_dict(
            torch.load("../pretrain/" + args.model_mode + "/res.m"))

    if 'max' in parameters.model_mode:
        parameters.history_mode = 'max'
    elif 'avg' in parameters.model_mode:
        parameters.history_mode = 'avg'
    else:
        parameters.history_mode = 'whole'

    criterion = nn.NLLLoss().cuda()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=parameters.lr,
                           weight_decay=parameters.L2)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'max',
        patience=parameters.lr_step,
        factor=parameters.lr_decay,
        threshold=1e-3)

    lr = parameters.lr
    metrics = {
        'train_loss': [],
        'valid_loss': [],
        'accuracy': [],
        'valid_acc': {}
    }

    candidate = parameters.data_neural.keys()
    avg_acc_markov, users_acc_markov = markov(parameters, candidate)
    metrics['markov_acc'] = users_acc_markov

    if 'long' in parameters.model_mode:
        long_history = True
    else:
        long_history = False

    if long_history is False:
        data_train, train_idx = generate_input_history(
            parameters.data_neural,
            'train',
            mode2=parameters.history_mode,
            candidate=candidate)
        data_test, test_idx = generate_input_history(
            parameters.data_neural,
            'test',
            mode2=parameters.history_mode,
            candidate=candidate)
    elif long_history is True:
        if parameters.model_mode == 'simple_long':
            data_train, train_idx = generate_input_long_history2(
                parameters.data_neural, 'train', candidate=candidate)
            data_test, test_idx = generate_input_long_history2(
                parameters.data_neural, 'test', candidate=candidate)
        else:
            data_train, train_idx = generate_input_long_history(
                parameters.data_neural, 'train', candidate=candidate)
            data_test, test_idx = generate_input_long_history(
                parameters.data_neural, 'test', candidate=candidate)

    print('users:{} markov:{} train:{} test:{}'.format(
        len(candidate), avg_acc_markov,
        len([y for x in train_idx for y in train_idx[x]]),
        len([y for x in test_idx for y in test_idx[x]])))
    SAVE_PATH = args.save_path
    tmp_path = 'checkpoint/'
    os.mkdir(SAVE_PATH + tmp_path)
    for epoch in range(parameters.epoch):
        st = time.time()
        if args.pretrain == 0:
            model, avg_loss = run_simple(data_train, train_idx, 'train', lr,
                                         parameters.clip, model, optimizer,
                                         criterion, parameters.model_mode)
            print('==>Train Epoch:{:0>2d} Loss:{:.4f} lr:{}'.format(
                epoch, avg_loss, lr))
            metrics['train_loss'].append(avg_loss)

        avg_loss, avg_acc, users_acc = run_simple(data_test, test_idx, 'test',
                                                  lr, parameters.clip, model,
                                                  optimizer, criterion,
                                                  parameters.model_mode)
        print('==>Test Acc:{:.4f} Loss:{:.4f}'.format(avg_acc, avg_loss))

        metrics['valid_loss'].append(avg_loss)
        metrics['accuracy'].append(avg_acc)
        metrics['valid_acc'][epoch] = users_acc

        save_name_tmp = 'ep_' + str(epoch) + '.m'
        torch.save(model.state_dict(), SAVE_PATH + tmp_path + save_name_tmp)

        scheduler.step(avg_acc)
        lr_last = lr
        lr = optimizer.param_groups[0]['lr']
        if lr_last > lr:
            load_epoch = np.argmax(metrics['accuracy'])
            load_name_tmp = 'ep_' + str(load_epoch) + '.m'
            model.load_state_dict(
                torch.load(SAVE_PATH + tmp_path + load_name_tmp))
            print('load epoch={} model state'.format(load_epoch))
        if epoch == 0:
            print('single epoch time cost:{}'.format(time.time() - st))
        if lr <= 0.9 * 1e-5:
            break
        if args.pretrain == 1:
            break

    mid = np.argmax(metrics['accuracy'])
    avg_acc = metrics['accuracy'][mid]
    load_name_tmp = 'ep_' + str(mid) + '.m'
    model.load_state_dict(torch.load(SAVE_PATH + tmp_path + load_name_tmp))
    save_name = 'res'
    json.dump({
        'args': argv,
        'metrics': metrics
    },
              fp=open(SAVE_PATH + save_name + '.rs', 'w'),
              indent=4)
    metrics_view = {'train_loss': [], 'valid_loss': [], 'accuracy': []}
    for key in metrics_view:
        metrics_view[key] = metrics[key]
    json.dump({
        'args': argv,
        'metrics': metrics_view
    },
              fp=open(SAVE_PATH + save_name + '.txt', 'w'),
              indent=4)
    torch.save(model.state_dict(), SAVE_PATH + save_name + '.m')

    for rt, dirs, files in os.walk(SAVE_PATH + tmp_path):
        for name in files:
            remove_path = os.path.join(rt, name)
            os.remove(remove_path)
    os.rmdir(SAVE_PATH + tmp_path)

    return avg_acc
Exemple #3
0
def run(args):
    parameters = RnnParameterData(
        loc_emb_size=args.loc_emb_size,
        uid_emb_size=args.uid_emb_size,  #500,40
        cid_emb_size=args.cid_emb_size,
        tim_emb_size=args.tim_emb_size,  #50,10
        hidden_size=args.hidden_size,
        dropout_p=args.dropout_p,  #500,0.3
        data_name=args.data_name,
        lr=args.learning_rate,
        lr_step=args.lr_step,
        lr_decay=args.lr_decay,
        L2=args.L2,
        optim=args.optim,
        clip=args.clip,
        epoch_max=args.epoch_max,
        data_path=args.data_path,
        save_path=args.save_path)

    argv = {
        'loc_emb_size': args.loc_emb_size,
        'uid_emb_size': args.uid_emb_size,
        'cid_emb_size': args.cid_emb_size,
        'tim_emb_size': args.tim_emb_size,
        'hidden_size': args.hidden_size,
        'dropout_p': args.dropout_p,
        'data_name': args.data_name,
        'learning_rate': args.learning_rate,
        'lr_step': args.lr_step,
        'lr_decay': args.lr_decay,
        'L2': args.L2,
        'act_type': 'selu',
        'optim': args.optim,
        'clip': args.clip,
        'epoch_max': args.epoch_max
    }

    auxiliary_rate = 0.05
    model = PG2Net(parameters=parameters, weight=weight,
                   weight_cid=weight_cid).cuda()
    if args.pretrain == 1:
        model.load_state_dict(
            torch.load("../pretrain/" + args.model_mode + "/res.m"))

    criterion = nn.NLLLoss().cuda()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=parameters.lr,
                           weight_decay=parameters.L2)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'max',
        patience=parameters.lr_step,
        factor=parameters.lr_decay,
        threshold=1e-3)
    lr = parameters.lr
    #Metrics
    metrics = {
        'train_loss': [],
        'valid_loss': [],
        'accuracy': [],
        'valid_acc': {}
    }
    candidate = parameters.data_neural.keys()  #937 users

    data_train, train_idx = generate_input_long_history(parameters.data_neural,
                                                        'train',
                                                        candidate=candidate)
    data_test, test_idx = generate_input_long_history(parameters.data_neural,
                                                      'test',
                                                      candidate=candidate)

    print('users:{} markov:{} train:{} test:{}'.format(
        len(candidate), avg_acc_markov,
        len([y for x in train_idx for y in train_idx[x]]),
        len([y for x in test_idx for y in test_idx[x]])))
    SAVE_PATH = args.save_path
    msg = 'users:{} markov:{} train:{} test:{}'.format(
        len(candidate), avg_acc_markov,
        len([y for x in train_idx for y in train_idx[x]]),
        len([y for x in test_idx for y in test_idx[x]]))
    with open(SAVE_PATH + "result.txt", "a") as file:
        file.write(msg + "\n")
    file.close()
    tmp_path = 'checkpoint/'
    if not SAVE_PATH + tmp_path:
        os.mkdir(SAVE_PATH + tmp_path)

    #Computing time similarity
    time_sim_matrix = caculate_time_sim(parameters.data_neural)  #(48,48)
    #Import time and category relationship
    poi_cid_tim = pickle.load(open('cid_time.pkl', 'rb'),
                              encoding='iso-8859-1')
    #Import the spatial distance between each location
    poi_distance_matrix = pickle.load(open('distance.pkl', 'rb'),
                                      encoding='iso-8859-1')

    for epoch in range(parameters.epoch):
        pred = []
        st = time.time()
        if args.pretrain == 0:
            model, avg_loss, pred = run_simple(
                pred, data_train, train_idx, auxiliary_rate, 'train', lr,
                parameters.clip, model, optimizer, criterion,
                parameters.model_mode, time_sim_matrix, poi_distance_matrix,
                poi_cid_tim)

            print('auxiliary_rate:{}'.format(auxiliary_rate))
            msg = 'auxiliary_rate:{}'.format(auxiliary_rate)
            with open(SAVE_PATH + "result.txt", "a") as file:
                file.write(msg + "\n")
            file.close()
            print('==>Train Epoch:{:0>2d} Loss:{:.4f} lr:{}'.format(
                epoch, avg_loss, lr))
            msg = '==>Train Epoch:{:0>2d} Loss:{:.4f} lr:{}'.format(
                epoch, avg_loss, lr)
            with open(SAVE_PATH + "result.txt", "a") as file:
                file.write(msg + "\n")
            file.close()
            metrics['train_loss'].append(avg_loss)
        avg_loss, avg_acc, users_acc, pred = run_simple(
            pred, data_test, test_idx, auxiliary_rate, 'test', lr,
            parameters.clip, model, optimizer, criterion,
            parameters.model_mode, time_sim_matrix, poi_distance_matrix,
            poi_cid_tim)
        #print('==>Test Acc:{:.4f} Loss:{:.4f}'.format(avg_acc, avg_loss))
        print(
            '==>Rec@1:{:.4f} Rec@5:{:.4f} Rec@10:{:.4f} NDCG@1:{:.4f} NDCG@5:{:.4f} NDCG@10:{:.4f} Loss:{:.4f}'
            .format(avg_acc[0], avg_acc[1], avg_acc[2], avg_acc[3], avg_acc[4],
                    avg_acc[5], avg_loss))
        msg = '==>Rec@1:{:.4f} Rec@5:{:.4f} Rec@10:{:.4f} NDCG@1:{:.4f} NDCG@5:{:.4f} NDCG@10:{:.4f} Loss:{:.4f}'.format(
            avg_acc[0], avg_acc[1], avg_acc[2], avg_acc[3], avg_acc[4],
            avg_acc[5], avg_loss)
        with open(SAVE_PATH + "result.txt", "a") as file:
            file.write(msg + "\n")
        file.close()

        pickle.dump(pred, open("{}_our_nyc_loc.pkl".format(epoch), 'wb'))
        metrics['valid_loss'].append(avg_loss)
        metrics['accuracy'].append(avg_acc[0])
        metrics['valid_acc'][epoch] = users_acc
        save_name_tmp = 'ep_' + str(epoch) + '.m'
        torch.save(model.state_dict(), SAVE_PATH + tmp_path + save_name_tmp)

        scheduler.step(avg_acc[0])
        lr_last = lr
        lr = optimizer.param_groups[0]['lr']
        if lr_last > lr:
            load_epoch = np.argmax(metrics['accuracy'])
            load_name_tmp = 'ep_' + str(load_epoch) + '.m'
            model.load_state_dict(
                torch.load(SAVE_PATH + tmp_path + load_name_tmp))
            auxiliary_rate += 0.05
            print('load epoch={} model state'.format(load_epoch))

            msg = 'load epoch={} model state'.format(load_epoch)
            with open(SAVE_PATH + "result.txt", "a") as file:
                file.write(msg + "\n")
            file.close()

        if epoch == 0:
            print('single epoch time cost:{}'.format(time.time() - st))
            msg = 'single epoch time cost:{}'.format(time.time() - st)
            with open(SAVE_PATH + "result.txt", "a") as file:
                file.write(msg + "\n")
            file.close()
        if lr <= 0.9 * 1e-7:
            break
        if args.pretrain == 1:
            break

    mid = np.argmax(metrics['accuracy'])
    avg_acc = metrics['accuracy'][mid]
    load_name_tmp = 'ep_' + str(mid) + '.m'
    print("Best model:", SAVE_PATH + tmp_path + load_name_tmp)
    return avg_acc