def train_eval_model(graph_hyper_params): # global pos_train_data, neg_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict all_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict, re_uid_map, re_aid_map = get_prod_dataset(graph_hyper_params['formal']) print graph_hyper_params # 重新 split train dev o_dev_size = graph_hyper_params['o_dev_size'] atd = pd.concat([all_train_data, dev_data]) pos_atd, neg_atd = atd[atd['label'] == 1], atd[atd['label'] == 0] dev_data = pd.concat([pos_atd[:o_dev_size], neg_atd[:o_dev_size]]) pos_train_data, neg_train_data = pos_atd[o_dev_size:], neg_atd[o_dev_size:] print 'dev_size', len(dev_data) print 'pos-neg-all', len(pos_train_data), len(neg_train_data), len(all_train_data) del all_train_data gc.collect() # ********************************** print 'map row start' uid_map_row, aid_map_row = dict(zip(relevant_user_data['uid'].values, np.arange(len(relevant_user_data)))), dict(zip(ad_data['aid'].values, np.arange(len(ad_data)))) print 'map row end' # 对 creativeSize 这一个连续特征的处理 if graph_hyper_params['creativeSize_pro'] == 'min_max': print 'min-max norm creativeSize', ad_data['creativeSize'].max(), ad_data['creativeSize'].min() norm_cs = (ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min()) / (ad_data['creativeSize'].max() - ad_data['creativeSize'].min()) ad_data = ad_data.drop(['creativeSize'], axis=1) ad_data['creativeSize'] = norm_cs creativesize_p = tf.placeholder(tf.float32, [None, 1], name="creativeSize") elif graph_hyper_params['creativeSize_pro'] == 'li_san': print '离散化 creativeSize' sh = ShrinkSep() ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh) feature_conf_dict['creativeSize'] = len(sh.d) + 1 creativesize_p = tf.placeholder(tf.int32, [None, 1], name="creativeSize") else: print 'no process creativeSize' print feature_conf_dict # ****************************************************************** place holder start uid_p = tf.placeholder(tf.int32, [None, 1], name="uid") lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS") age_p = tf.placeholder(tf.int32, [None, 1], name="age") carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier") consumptionability_p = tf.placeholder(tf.int32, [None, 1], name="consumptionAbility") education_p = tf.placeholder(tf.int32, [None, 1], name="education") gender_p = tf.placeholder(tf.int32, [None, 1], name="gender") house_p = tf.placeholder(tf.int32, [None, 1], name="house") os_p = tf.placeholder(tf.int32, [None, 1], name="os") ct_p = tf.placeholder(tf.int32, [None, 1], name="ct") marriagestatus_p = tf.placeholder(tf.int32, [None, 1], name="marriageStatus") appidaction_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['appIdAction'][1]], name="appidaction_index") appidaction_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]], name="appidaction_val") appIdInstall_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_index") appIdInstall_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_val") interest1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest1'][1]], name="interest1_index") interest1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest1'][1]], name="interest1_val") interest2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest2'][1]], name="interest2_index") interest2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest2'][1]], name="interest2_val") interest3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest3'][1]], name="interest3_index") interest3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest3'][1]], name="interest3_val") interest4_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest4'][1]], name="interest4_index") interest4_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest4'][1]], name="interest4_val") interest5_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['interest5'][1]], name="interest5_index") interest5_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['interest5'][1]], name="interest5_val") kw1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw1'][1]], name="kw1_index") kw1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw1'][1]], name="kw1_val") kw2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw2'][1]], name="kw2_index") kw2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw2'][1]], name="kw2_val") kw3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw3'][1]], name="kw3_index") kw3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw3'][1]], name="kw3_val") topic1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic1'][1]], name="topic1_index") topic1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic1'][1]], name="topic1_val") topic2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic2'][1]], name="topic2_index") topic2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic2'][1]], name="topic2_val") topic3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic3'][1]], name="topic3_index") topic3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic3'][1]], name="topic3_val") aid_p = tf.placeholder(tf.int32, [None, 1], name="aid") advertiserid_p = tf.placeholder(tf.int32, [None, 1], name="advertiserId") campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId") creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId") adcategoryid_p = tf.placeholder(tf.int32, [None, 1], name="adCategoryId") productid_p = tf.placeholder(tf.int32, [None, 1], name="productId") producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType") true_label = tf.placeholder(tf.float32, [None, 1], name="true_label") train_p = tf.placeholder(tf.bool, name="train_p") dropout_p = tf.placeholder(tf.float32, shape=[None], name="dropout_p") # ****************************************************************** place holder end pred_val, model_loss, network_params = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p, gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p, appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p, interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p, kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p, topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, train_p, dropout_p) # pred_val_for_pre, _, __ = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p, # gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p, # appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p, # interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p, # kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p, # topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, istrain=False) global_step = tf.Variable(0, name="global_step", trainable=False) train_step = None if graph_hyper_params['opt'] == 'adam': train_step = tf.train.AdamOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adgrad': train_step = tf.train.AdagradOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adadelta': train_step = tf.train.AdadeltaOptimizer(graph_hyper_params['learn_rate']).minimize(model_loss, global_step=global_step) else: print 'No optimizer !' time_now = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" + time_now) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 # sess = tf.Session(config=config) sess = tf.Session() sess.run(tf.global_variables_initializer()) def get_fed_dict(b_data, split_vector_data): if graph_hyper_params['formal']: aid_list = b_data['aid'].values uid_list = b_data['uid'].values else: if len(b_data) == 4: aid_list, uid_list = [11, 11, 11, 11], [11, 190, 191, 11] elif len(b_data) == 3: aid_list, uid_list = [11, 11, 11], [11, 190, 191] else: aid_list, uid_list = [11], [11] # print 11 # d1 = datetime.now() b_u_d, b_a_d = [], [] for b_uid in uid_list: b_u_d.append(relevant_user_data.iloc[uid_map_row[b_uid]]) for b_aid in aid_list: b_a_d.append(ad_data.iloc[aid_map_row[b_aid]]) b_u_d = pd.concat(b_u_d, axis=1).transpose() b_a_d = pd.concat(b_a_d, axis=1).transpose() # d3 = datetime.now() # print 12 # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose() fed_dict = {} fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1) fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1) fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1) fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1) fed_dict[consumptionability_p] = np.expand_dims(b_u_d['consumptionAbility'], axis=1) fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1) fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1) fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1) fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1) fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1) fed_dict[marriagestatus_p] = np.expand_dims(b_u_d['marriageStatus'], axis=1) # print 121 appidaction_li = split_vector_data(b_u_d['appIdAction']) # print 1212 fed_dict[appidaction_index_p], fed_dict[appidaction_val_p] = appidaction_li[0], appidaction_li[1] appIdInstall_li = split_vector_data(b_u_d['appIdInstall']) fed_dict[appIdInstall_index_p], fed_dict[appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1] # print 122 interest1_li = split_vector_data(b_u_d['interest1']) fed_dict[interest1_index_p], fed_dict[interest1_val_p] = interest1_li[0], interest1_li[1] interest2_li = split_vector_data(b_u_d['interest2']) fed_dict[interest2_index_p], fed_dict[interest2_val_p] = interest2_li[0], interest2_li[1] interest3_li = split_vector_data(b_u_d['interest3']) fed_dict[interest3_index_p], fed_dict[interest3_val_p] = interest3_li[0], interest3_li[1] interest4_li = split_vector_data(b_u_d['interest4']) fed_dict[interest4_index_p], fed_dict[interest4_val_p] = interest4_li[0], interest4_li[1] interest5_li = split_vector_data(b_u_d['interest5']) fed_dict[interest5_index_p], fed_dict[interest5_val_p] = interest5_li[0], interest5_li[1] # print 123 kw1_li = split_vector_data(b_u_d['kw1']) fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1] kw2_li = split_vector_data(b_u_d['kw2']) fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1] kw3_li = split_vector_data(b_u_d['kw3']) fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1] # print 124 topic1_li = split_vector_data(b_u_d['topic1']) fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[0], topic1_li[1] topic2_li = split_vector_data(b_u_d['topic2']) fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[0], topic2_li[1] topic3_li = split_vector_data(b_u_d['topic3']) fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[0], topic3_li[1] # print 125 # # ad fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1) fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'], axis=1) fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'], axis=1) fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'], axis=1) fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'], axis=1) fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1) fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'], axis=1) # print 13 # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1) if graph_hyper_params['creativeSize_pro'] == 'min_max': fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1).astype(np.float32) elif graph_hyper_params['creativeSize_pro'] == 'li_san': fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1) else: print 'wrong feed' # label # print 14 fed_dict[true_label] = np.expand_dims(b_data['label'].values, axis=1).astype(np.float32) # print 15 # d4 = datetime.now() # print d2-d1, d3-d2, d4-d3 # print fed_dict[true_label] # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]), return fed_dict def eval_on_dev(split_vector_data): e_b_s = len(dev_data) / graph_hyper_params['batch_size'] auc_true, auc_pre = [], [] # auc = [] for index in tqdm(range(e_b_s)): start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(dev_data) else len(dev_data) b_dev_data = dev_data[start:end] fed_dict = get_fed_dict(b_dev_data, split_vector_data) fed_dict[train_p] = False fed_dict[dropout_p] = np.array([1.0]) pred_value = sess.run([pred_val], feed_dict=fed_dict) pre_real_val = np.array(pred_value).reshape((-1)) auc_true = auc_true + list(b_dev_data['label'].values) auc_pre = auc_pre + pre_real_val.tolist() # auc.append() # auc_pre = np.array(auc_pre) # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum() # print auc_true # print auc_pre fpr, tpr, thresholds = metrics.roc_curve(auc_true, auc_pre, pos_label=1) # >> > metrics.auc(fpr, tpr) return metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre) # def predict_csv(split_vector_data): # e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len(predict_data) % graph_hyper_params['batch_size']==0 else len(predict_data) / graph_hyper_params['batch_size'] + 1 # pred = [] # for index in tqdm(range(e_b_s)): # start = index * graph_hyper_params['batch_size'] # end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(predict_data) else len(predict_data)+1 # b_predict_data = predict_data[start:end] # # print len(b_predict_data), start, end # fed_dict = get_fed_dict(b_predict_data, split_vector_data) # pred_value = sess.run([pred_val], feed_dict=fed_dict) # pre_real_val = np.array(pred_value).reshape((-1)) # pred = pred + pre_real_val.tolist() # # print len(pred), len(predict_data) # predict_data['pred_label'] = pred # csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']] # csv_data.columns = ['aid', 'uid', 'score'] # csv_path = checkpoint_dir+'/submission.csv' # csv_data.to_csv(csv_path, index=False) # return csv_path def save_predict_material(user_data, ad_data): user_data_file = os.path.join(checkpoint_dir, 'user_data_file.csv') ad_data_file = os.path.join(checkpoint_dir, 'ad_data_file.csv') graph_hyper_params_file = os.path.join(checkpoint_dir, 'graph_hyper_params_file.pic') user_data.to_csv(user_data_file, index=False) ad_data.to_csv(ad_data_file, index=False) pickle.dump(graph_hyper_params, open(graph_hyper_params_file, 'w')) pass def construct_train_data(start_neg, pos_train_data, neg_train_data, graph_hyper_params): # global pos_train_data, neg_train_data, start_neg pos_len, neg_len = len(pos_train_data), len(neg_train_data) # print start_neg, pos_len, neg_len if start_neg + pos_len < neg_len: this_neg_train_data = neg_train_data[start_neg : start_neg + graph_hyper_params['neg_size']*pos_len] start_neg += pos_len*graph_hyper_params['neg_size'] else: this_neg_train_data = pd.concat([neg_train_data[start_neg : neg_len], neg_train_data[0 : graph_hyper_params['neg_size']*pos_len - (neg_len-start_neg)]]) start_neg = graph_hyper_params['neg_size']*pos_len - (neg_len-start_neg) train_data = pd.concat([pos_train_data, this_neg_train_data]) return shuffle(train_data), start_neg best_auc = 0.0 start_neg = 0 split_vector_data = SplitClass() save_data_for_predict = False for epoch in range(graph_hyper_params['epoch']): train_data, start_neg = construct_train_data(start_neg, pos_train_data, neg_train_data, graph_hyper_params) if start_neg < graph_hyper_params['neg_size'] * len(pos_train_data): neg_train_data = shuffle(neg_train_data) e_b_s = len(train_data) / graph_hyper_params['batch_size'] one_epoch_loss, one_epoch_batchnum = 0.0, 0.0 early_stop_hit = 0 split_vector_data.clean() for index in tqdm(range(e_b_s)): # print 0 start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(train_data) else len(train_data) b_data = train_data[start:end] # print 1 # d1 = datetime.now() fed_dict = get_fed_dict(b_data, split_vector_data) fed_dict[train_p] = True fed_dict[dropout_p] = np.array([graph_hyper_params['dropout_keep']]) # d2 = datetime.now() # print 2 _, loss_val = sess.run([train_step, model_loss], feed_dict=fed_dict) # print 3 # d3 = datetime.now() # print d2-d1, d3-d2 one_epoch_loss += loss_val one_epoch_batchnum += 1.0 if graph_hyper_params['debug']: print datetime.now(), index, loss_val if index != 0 and index % ((e_b_s - 1) / graph_hyper_params['show_peroid']) == 0: auc, gn = eval_on_dev(split_vector_data) best_auc = max(auc, best_auc) format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f' print (format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), (epoch + 1.0 * (index+1) / e_b_s), one_epoch_loss / one_epoch_batchnum, auc, best_auc, gn)) one_epoch_loss = one_epoch_batchnum = 0.0 # global split_cache, split_cache_rem_size # if len(split_cache) > 10000000: # keys = split_cache.keys() # for key in keys: # if split_cache_rem_size[key] < 2: # del split_cache_rem_size[key], split_cache[key] if (auc >= best_auc and (epoch + 1.0 * (index+1) / e_b_s) >= 0.6 and auc > 0.72) or (auc >= best_auc and auc>0.75): current_step = tf.train.global_step(sess, global_step) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("saved model to: %s" % path) if not save_data_for_predict: udp = pd.concat([relevant_user_data, no_relevant_user_data]) save_predict_material(udp, ad_data) save_data_for_predict = True early_stop_hit = 0 elif auc < best_auc and abs(auc-best_auc) > 0.02: early_stop_hit += 1 if early_stop_hit >= 3: print 'eary_stop_best:', best_auc import sys sys.exit(0) # csv_path = predict_csv(split_vector_data) # print 'save csv to: ', csv_path pass
def train_eval_model(graph_hyper_params): # global pos_train_data, neg_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict all_train_data, dev_data, predict_data, relevant_user_data, no_relevant_user_data, ad_data, feature_conf_dict, re_uid_map, re_aid_map = get_prod_dataset( graph_hyper_params['formal']) print graph_hyper_params var_name_val = {} if graph_hyper_params['mp_w'] is not None and graph_hyper_params[ 'mp_d'] is not None: print 'reload model start !' print '\t reload wide' graph_wide = tf.Graph() with graph_wide.as_default(): checkpoint_file = tf.train.latest_checkpoint( graph_hyper_params['mp_w']) wide_saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) sess_wide = tf.Session() wide_saver.restore(sess_wide, checkpoint_file) for na in graph_wide.get_collection('trainable_variables'): if na in var_name_val: print "wrong 1!" var_name_val[na.name] = np.array(sess_wide.run(na)).astype( np.float) sess_wide.close() # print '1', var_name_val.keys() print '\t reload deep' graph_deep = tf.Graph() with graph_deep.as_default(): checkpoint_file = tf.train.latest_checkpoint( graph_hyper_params['mp_d']) deep_saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) sess_deep = tf.Session(graph=graph_deep) deep_saver.restore(sess_deep, checkpoint_file) for na in graph_deep.get_collection('trainable_variables'): if na in var_name_val: print "wrong 2!" var_name_val[na.name] = np.array(sess_deep.run(na)).astype( np.float32) sess_deep.close() print 'reload model done !' graph = tf.Graph() with graph.as_default(): # 重新 split train dev o_dev_size = graph_hyper_params['o_dev_size'] atd = pd.concat([all_train_data, dev_data]) pos_atd, neg_atd = atd[atd['label'] == 1], atd[atd['label'] == 0] dev_data = pd.concat([pos_atd[:o_dev_size], neg_atd[:o_dev_size]]) pos_train_data, neg_train_data = pos_atd[o_dev_size:], neg_atd[ o_dev_size:] print 'dev_size', len(dev_data) print 'pos-neg-all', len(pos_train_data), len(neg_train_data), len( all_train_data) del all_train_data gc.collect() # ********************************** print 'map row start' uid_map_row, aid_map_row = dict( zip(relevant_user_data['uid'].values, np.arange(len(relevant_user_data)))), dict( zip(ad_data['aid'].values, np.arange(len(ad_data)))) print 'map row end' # 对 creativeSize 这一个连续特征的处理 if graph_hyper_params['creativeSize_pro'] == 'min_max': print 'min-max norm creativeSize', ad_data['creativeSize'].max( ), ad_data['creativeSize'].min() norm_cs = ( ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min() ) / (ad_data['creativeSize'].max() - ad_data['creativeSize'].min()) ad_data = ad_data.drop(['creativeSize'], axis=1) ad_data['creativeSize'] = norm_cs creativesize_p = tf.placeholder(tf.float32, [None, 1], name="creativeSize") elif graph_hyper_params['creativeSize_pro'] == 'li_san': print '离散化 creativeSize' sh = ShrinkSep() ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh) feature_conf_dict['creativeSize'] = len(sh.d) + 1 creativesize_p = tf.placeholder(tf.int32, [None, 1], name="creativeSize") else: print 'no process creativeSize' print 'for cross feature' sh2 = ShrinkSep() ad_data['creativeSize_cross'] = ad_data['creativeSize'].apply(sh2) feature_conf_dict['creativeSize_cross'] = len(sh2.d) + 1 print feature_conf_dict # ****************************************************************** place holder start uid_p = tf.placeholder(tf.int32, [None, 1], name="uid") lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS") age_p = tf.placeholder(tf.int32, [None, 1], name="age") carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier") consumptionability_p = tf.placeholder(tf.int32, [None, 1], name="consumptionAbility") education_p = tf.placeholder(tf.int32, [None, 1], name="education") gender_p = tf.placeholder(tf.int32, [None, 1], name="gender") house_p = tf.placeholder(tf.int32, [None, 1], name="house") os_p = tf.placeholder(tf.int32, [None, 1], name="os") ct_p = tf.placeholder(tf.int32, [None, 1], name="ct") marriagestatus_p = tf.placeholder(tf.int32, [None, 1], name="marriageStatus") appidaction_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['appIdAction'][1]], name="appidaction_index") appidaction_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]], name="appidaction_val") appIdInstall_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_index") appIdInstall_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_val") interest1_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest1'][0]], name="interest1_index") interest1_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest1'][0]], name="interest1_val") interest2_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest2'][0]], name="interest2_index") interest2_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest2'][0]], name="interest2_val") interest3_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest3'][0]], name="interest3_index") interest3_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest3'][0]], name="interest3_val") interest4_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest4'][0]], name="interest4_index") interest4_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest4'][0]], name="interest4_val") interest5_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest5'][0]], name="interest5_index") interest5_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest5'][0]], name="interest5_val") kw1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw1'][1]], name="kw1_index") kw1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw1'][1]], name="kw1_val") kw2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw2'][1]], name="kw2_index") kw2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw2'][1]], name="kw2_val") kw3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw3'][1]], name="kw3_index") kw3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw3'][1]], name="kw3_val") topic1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic1'][1]], name="topic1_index") topic1_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['topic1'][1]], name="topic1_val") topic2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic2'][1]], name="topic2_index") topic2_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['topic2'][1]], name="topic2_val") topic3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic3'][1]], name="topic3_index") topic3_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['topic3'][1]], name="topic3_val") aid_p = tf.placeholder(tf.int32, [None, 1], name="aid") advertiserid_p = tf.placeholder(tf.int32, [None, 1], name="advertiserId") campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId") creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId") adcategoryid_p = tf.placeholder(tf.int32, [None, 1], name="adCategoryId") productid_p = tf.placeholder(tf.int32, [None, 1], name="productId") producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType") # for cross part user_input_len, user_all_len, user_feature_start = 0, 0, {} ad_input_len, ad_all_len, ad_feature_start = 0, 0, {} for fea in user_features: if fea == 'uid': continue user_feature_start[fea] = user_all_len if type(feature_conf_dict[fea]) is int: user_input_len += 1 user_all_len += feature_conf_dict[fea] elif 'interest' in fea: user_input_len += feature_conf_dict[fea][0] user_all_len += feature_conf_dict[fea][0] else: user_input_len += feature_conf_dict[fea][1] user_all_len += feature_conf_dict[fea][0] for fea in ad_features_for_cross: if fea == 'aid': continue ad_feature_start[fea] = ad_all_len if type(feature_conf_dict[fea]) is int: ad_input_len += 1 ad_all_len += feature_conf_dict[fea] else: ad_input_len += feature_conf_dict[fea][1] ad_all_len += feature_conf_dict[fea][0] # cross_ind_p = tf.placeholder(tf.int32, [None, user_input_len*ad_input_len], name="productType") # cross_val_p = tf.placeholder(tf.float32, [None, 1, user_input_len*ad_input_len], name="productType") feature_conf_dict['cross_len_for_emb'] = user_all_len * ad_all_len print '-------cross-info-start-------' print 'user_input_len_all_len', user_input_len, user_all_len print 'ad_input_len_all_len', ad_input_len, ad_all_len print '-------cross-info-end---------' true_label = tf.placeholder(tf.float32, [None, 1], name="true_label") train_p = tf.placeholder(tf.bool, name="train_p") dropout_p = tf.placeholder(tf.float32, shape=[None], name="dropout_p") # ****************************************************************** place holder end pred_val, model_loss, network_params = inference( uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p, gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p, appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p, interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p, kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p, topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, train_p, dropout_p, user_feature_start, ad_feature_start, user_input_len, user_all_len, ad_all_len) # pred_val_for_pre, _, __ = inference(uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p, # gender_p, house_p, os_p, ct_p, marriagestatus_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p, # appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p, # interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p, # kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p, # topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params, istrain=False) global_step = tf.Variable(0, name="global_step", trainable=False) train_step = None learning_rate = tf.Variable(float(graph_hyper_params['learn_rate']), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.5) if graph_hyper_params['opt'] == 'adam': train_step = tf.train.AdamOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adgrad': train_step = tf.train.AdagradOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adadelta': train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'ftrl': train_step = tf.train.FtrlOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'sgd': train_step = tf.train.GradientDescentOptimizer( learning_rate).minimize(model_loss, global_step=global_step) else: print 'No optimizer !' time_now = 'mtyp' + str(graph_hyper_params['mtyp']) + datetime.now( ).strftime("-%Y-%m-%d-%H-%M-%S") checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" + time_now) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 # sess = tf.Session(config=config) sess = tf.Session() sess.run(tf.global_variables_initializer()) if graph_hyper_params['mtyp'] == 4: for na in graph.get_collection('trainable_variables'): if na.name in var_name_val: print 'assign: ', na.name sess.run( tf.assign(graph.get_tensor_by_name(na.name), var_name_val[na.name])) del var_name_val[na.name] else: print 'not in: ', na.name def get_fed_dict(b_data, split_vector_data, feature_conf_dict): if graph_hyper_params['formal']: aid_list = b_data['aid'].values uid_list = b_data['uid'].values else: if len(b_data) == 4: aid_list, uid_list = [11, 11, 11, 11], [11, 190, 191, 11] elif len(b_data) == 3: aid_list, uid_list = [11, 11, 11], [11, 190, 191] else: aid_list, uid_list = [11], [11] # print 11 # d1 = datetime.now() b_u_d, b_a_d = [], [] for b_uid in uid_list: b_u_d.append(relevant_user_data.iloc[uid_map_row[b_uid]]) for b_aid in aid_list: b_a_d.append(ad_data.iloc[aid_map_row[b_aid]]) b_u_d = pd.concat(b_u_d, axis=1).transpose() b_a_d = pd.concat(b_a_d, axis=1).transpose() # d3 = datetime.now() # print 12 # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose() fed_dict = {} fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1) fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1) fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1) fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1) fed_dict[consumptionability_p] = np.expand_dims( b_u_d['consumptionAbility'], axis=1) fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1) fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1) fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1) fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1) fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1) fed_dict[marriagestatus_p] = np.expand_dims( b_u_d['marriageStatus'], axis=1) # print 121 appidaction_li = split_vector_data(b_u_d['appIdAction']) # print 1212 fed_dict[appidaction_index_p], fed_dict[ appidaction_val_p] = appidaction_li[0], appidaction_li[1] appIdInstall_li = split_vector_data(b_u_d['appIdInstall']) fed_dict[appIdInstall_index_p], fed_dict[ appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1] # print 122 interest1_li = split_vector_data(b_u_d['interest1'], interest='interest1', feature_config=feature_conf_dict) fed_dict[interest1_index_p], fed_dict[ interest1_val_p] = interest1_li[0], interest1_li[1] interest2_li = split_vector_data(b_u_d['interest2'], interest='interest2', feature_config=feature_conf_dict) fed_dict[interest2_index_p], fed_dict[ interest2_val_p] = interest2_li[0], interest2_li[1] interest3_li = split_vector_data(b_u_d['interest3'], interest='interest3', feature_config=feature_conf_dict) fed_dict[interest3_index_p], fed_dict[ interest3_val_p] = interest3_li[0], interest3_li[1] interest4_li = split_vector_data(b_u_d['interest4'], interest='interest4', feature_config=feature_conf_dict) fed_dict[interest4_index_p], fed_dict[ interest4_val_p] = interest4_li[0], interest4_li[1] interest5_li = split_vector_data(b_u_d['interest5'], interest='interest5', feature_config=feature_conf_dict) fed_dict[interest5_index_p], fed_dict[ interest5_val_p] = interest5_li[0], interest5_li[1] # print 123 kw1_li = split_vector_data(b_u_d['kw1']) fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1] kw2_li = split_vector_data(b_u_d['kw2']) fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1] kw3_li = split_vector_data(b_u_d['kw3']) fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1] # print 124 topic1_li = split_vector_data(b_u_d['topic1']) fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[ 0], topic1_li[1] topic2_li = split_vector_data(b_u_d['topic2']) fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[ 0], topic2_li[1] topic3_li = split_vector_data(b_u_d['topic3']) fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[ 0], topic3_li[1] # print 125 # cross user # user_vec_fed_index = np.hstack([ # fed_dict[lbs_p] + user_feature_start['LBS'], # fed_dict[age_p] + user_feature_start['age'], # fed_dict[carrier_p] + user_feature_start['carrier'], # fed_dict[consumptionability_p] + user_feature_start['consumptionAbility'], # fed_dict[education_p] + user_feature_start['education'], # fed_dict[gender_p] + user_feature_start['gender'], # fed_dict[house_p] + user_feature_start['house'], # fed_dict[os_p] + user_feature_start['os'], # fed_dict[ct_p] + user_feature_start['ct'], # fed_dict[marriagestatus_p] + user_feature_start['marriageStatus'], # fed_dict[appidaction_index_p] + user_feature_start['appIdAction'], # fed_dict[appIdInstall_index_p] + user_feature_start['appIdInstall'], # fed_dict[interest1_index_p] + user_feature_start['interest1'], # fed_dict[interest2_index_p] + user_feature_start['interest2'], # fed_dict[interest3_index_p] + user_feature_start['interest3'], # fed_dict[interest4_index_p] + user_feature_start['interest4'], # fed_dict[interest5_index_p] + user_feature_start['interest5'], # fed_dict[kw1_index_p] + user_feature_start['kw1'], # fed_dict[kw2_index_p] + user_feature_start['kw2'], # fed_dict[kw3_index_p] + user_feature_start['kw3'], # fed_dict[topic1_index_p] + user_feature_start['topic1'], # fed_dict[topic2_index_p] + user_feature_start['topic2'], # fed_dict[topic3_index_p] + user_feature_start['topic3'], # ]) # user_vec_fed_val = np.hstack([ # # ]) # # ad fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1) fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'], axis=1) fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'], axis=1) fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'], axis=1) fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'], axis=1) fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1) fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'], axis=1) # print 13 # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1) if graph_hyper_params['creativeSize_pro'] == 'min_max': fed_dict[creativesize_p] = np.expand_dims( b_a_d['creativeSize'], axis=1).astype(np.float32) elif graph_hyper_params['creativeSize_pro'] == 'li_san': fed_dict[creativesize_p] = np.expand_dims( b_a_d['creativeSize'], axis=1) else: print 'wrong feed' # cross ad # advec_fed = np.hstack([ fed_dict[advertiserid_p] + ad_feature_start['advertiserId'], # fed_dict[campaignid_p] + ad_feature_start['campaignId'], # fed_dict[creativeid_p] + ad_feature_start['creativeId'], # fed_dict[adcategoryid_p] + ad_feature_start['adCategoryId'], # fed_dict[productid_p] + ad_feature_start['productId'], # fed_dict[producttype_p] + ad_feature_start['productType'], # fed_dict[creativesize_p] + ad_feature_start['creativeSize_cross']]) # label # print 14 fed_dict[true_label] = np.expand_dims(b_data['label'].values, axis=1).astype(np.float32) # print 15 # d4 = datetime.now() # print d2-d1, d3-d2, d4-d3 # print fed_dict[true_label] # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]), return fed_dict def eval_on_dev(split_vector_data): e_b_s = len(dev_data) / graph_hyper_params['batch_size'] auc_true, auc_pre = [], [] # auc = [] for index in tqdm(range(e_b_s)): start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if ( index + 1) * graph_hyper_params['batch_size'] < len( dev_data) else len(dev_data) b_dev_data = dev_data[start:end] fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict) fed_dict[train_p] = False fed_dict[dropout_p] = np.array([1.0]) pred_value, pre_pred_value, final_vec, uu, vv = sess.run( [ pred_val, network_params[0], network_params[1], network_params[2], network_params[3] ], feed_dict=fed_dict) pre_real_val = np.array(pred_value).reshape((-1)) auc_true = auc_true + list(b_dev_data['label'].values) auc_pre = auc_pre + pre_real_val.tolist() if True in np.isnan(pre_real_val): print 'contain nan: ', np.array(pre_pred_value).reshape( (-1)) print np.array(final_vec).reshape((-1)) print np.array(uu).reshape((-1)) print np.array(vv).reshape((-1)) # auc.append() # auc_pre = np.array(auc_pre) # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum() # print auc_true # print auc_pre fpr, tpr, thresholds = metrics.roc_curve(auc_true, auc_pre, pos_label=1) auc_v, gni = metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre) auc_pre_2 = np.array(auc_pre) auc_pre_2.sort() print('dev_pre_top2=%.4f %.4f min2=%.4f %.4f' % (auc_pre_2.tolist()[-1], auc_pre_2.tolist()[-2], auc_pre_2.tolist()[0], auc_pre_2.tolist()[1])) return auc_v, gni def save_predict_material(user_data, ad_data): user_data_file = os.path.join(checkpoint_dir, 'user_data_file.csv') ad_data_file = os.path.join(checkpoint_dir, 'ad_data_file.csv') graph_hyper_params_file = os.path.join( checkpoint_dir, 'graph_hyper_params_file.pic') feature_conf_dict_file = os.path.join(checkpoint_dir, 'feature_conf_dict.pic') user_data.to_csv(user_data_file, index=False) ad_data.to_csv(ad_data_file, index=False) pickle.dump(graph_hyper_params, open(graph_hyper_params_file, 'w')) pickle.dump(feature_conf_dict, open(feature_conf_dict_file, 'w')) pass def construct_train_data(start_neg, pos_train_data, neg_train_data, graph_hyper_params): # global pos_train_data, neg_train_data, start_neg pos_len, neg_len = len(pos_train_data), len(neg_train_data) # print start_neg, pos_len, neg_len if start_neg + pos_len < neg_len: this_neg_train_data = neg_train_data[ start_neg:start_neg + graph_hyper_params['neg_size'] * pos_len] start_neg += pos_len * graph_hyper_params['neg_size'] else: this_neg_train_data = pd.concat([ neg_train_data[start_neg:neg_len], neg_train_data[0:graph_hyper_params['neg_size'] * pos_len - (neg_len - start_neg)] ]) start_neg = graph_hyper_params['neg_size'] * pos_len - ( neg_len - start_neg) train_data = pd.concat([pos_train_data, this_neg_train_data]) return shuffle(train_data), start_neg best_auc = 0.0 start_neg = 0 split_vector_data = SplitClass() save_data_for_predict = False cut_lr = True for epoch in range(graph_hyper_params['epoch']): train_data, start_neg = construct_train_data( start_neg, pos_train_data, neg_train_data, graph_hyper_params) if start_neg < graph_hyper_params['neg_size'] * len( pos_train_data): neg_train_data = shuffle(neg_train_data) e_b_s = len(train_data) / graph_hyper_params['batch_size'] one_epoch_loss, one_epoch_batchnum = 0.0, 0.0 early_stop_hit = 0 split_vector_data.clean() for index in tqdm(range(e_b_s)): # print 0 start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if ( index + 1) * graph_hyper_params['batch_size'] < len( train_data) else len(train_data) b_data = train_data[start:end] # print 1 # d1 = datetime.now() fed_dict = get_fed_dict(b_data, split_vector_data, feature_conf_dict) fed_dict[train_p] = True fed_dict[dropout_p] = np.array( [graph_hyper_params['dropout_keep']]) # d2 = datetime.now() # print 2 _, loss_val, pre_tr_val = sess.run( [train_step, model_loss, network_params[0]], feed_dict=fed_dict) # print 3 # d3 = datetime.now() # print d2-d1, d3-d2 one_epoch_loss += loss_val one_epoch_batchnum += 1. if graph_hyper_params['debug']: print datetime.now(), index, loss_val pre_tr_val = np.array(pre_tr_val).reshape((-1)) if graph_hyper_params['debug'] or True in np.isnan(pre_tr_val): print pre_tr_val if (graph_hyper_params['mtyp'] == 4 or index != 0 ) and index % ( (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0: auc, gn = eval_on_dev(split_vector_data) best_auc = max(auc, best_auc) format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f' print(format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), (epoch + 1.0 * (index + 1) / e_b_s), one_epoch_loss / one_epoch_batchnum, auc, best_auc, gn)) one_epoch_loss = one_epoch_batchnum = 0.0 if (auc >= best_auc and (epoch + 1.0 * (index + 1) / e_b_s) >= 0.6) or (auc >= best_auc and auc > 0.74): current_step = tf.train.global_step(sess, global_step) path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("saved model to: %s" % path) if not save_data_for_predict: udp = pd.concat( [relevant_user_data, no_relevant_user_data]) save_predict_material(udp, ad_data) save_data_for_predict = True early_stop_hit = 0 elif auc < best_auc and abs(auc - best_auc) > 0.02: early_stop_hit += 1 if early_stop_hit >= 3: if cut_lr: print 'cut_lr_ori:', sess.run(learning_rate) sess.run(learning_rate_decay_op) print 'cut_lr_now:', sess.run(learning_rate) cut_lr = False early_stop_hit = -5 else: print 'eary_stop_best:', best_auc import sys sys.exit(0) # csv_path = predict_csv(split_vector_data) # print 'save csv to: ', csv_path pass
def train_eval_model(graph_hyper_params): def construct_train_data(pos_train_data, neg_train_data, graph_hyper_params): # global pos_train_data, neg_train_data, start_neg pos_len, neg_len = len(pos_train_data), len(neg_train_data) # print start_neg, pos_len, neg_len if graph_hyper_params['neg_start'] * pos_len + graph_hyper_params[ 'neg_size'] * pos_len < neg_len: this_neg_train_data = neg_train_data[graph_hyper_params['neg_start'] * pos_len: \ graph_hyper_params['neg_start'] * pos_len + graph_hyper_params[ 'neg_size'] * pos_len] else: print 'fianl ! fianl ! fianl ! fianl !' this_neg_train_data = pd.concat([ neg_train_data[graph_hyper_params['neg_start'] * pos_len:], neg_train_data[:pos_len - max( 0, neg_len - graph_hyper_params['neg_start'] * pos_len)] ]) train_data = pd.concat([pos_train_data, this_neg_train_data]) return shuffle(train_data) print graph_hyper_params print 'read data start !' pos_train_data, neg_train_data, predict_data1, predict_data2, user_data, ad_data, feature_conf_dict, uid_map, aid_map = get_prod_dataset( graph_hyper_params['formal']) print 'read data done !' # 重新 split train dev # o_dev_size = graph_hyper_params['o_dev_size'] # dev_data = pd.concat([pos_train_data[:o_dev_size], neg_train_data[:o_dev_size]]) # pos_train_data, neg_train_data = pos_train_data[o_dev_size:], neg_train_data[o_dev_size:] # print 'dev_size:', len(dev_data) # print 'pos-neg-len:', len(pos_train_data), len(neg_train_data) train_data = construct_train_data(pos_train_data, neg_train_data, graph_hyper_params) # if graph_hyper_params['only_train']: # if graph_hyper_params['formal']: # formal_set = set(list(train_data['uid']) + list(dev_data['uid'])) # else: # formal_set = set(list(train_data['uid']) + list(dev_data['uid']) + [1, 2, 3, 4]) # user_data = user_data[user_data['uid'].isin(formal_set)] print 'map row start' user_data_train = user_data[user_data['uid'].isin(train_data['uid'])] user_data_predict1 = user_data[user_data['uid'].isin(predict_data1['uid'])] user_data_predict2 = user_data[user_data['uid'].isin(predict_data2['uid'])] del user_data gc.collect() uid_map_row_train, aid_map_row = dict( zip(user_data_train['uid'].values, np.arange(len(user_data_train)))), dict( zip(ad_data['aid'].values, np.arange(len(ad_data)))) uid_map_row_predict_1 = dict( zip(user_data_predict1['uid'].values, np.arange(len(user_data_predict1)))) uid_map_row_predict_2 = dict( zip(user_data_predict2['uid'].values, np.arange(len(user_data_predict2)))) print 'map row end' print feature_conf_dict # graph = tf.Graph() # with graph.as_default(): # 对 creativeSize 这一个连续特征的处理 if graph_hyper_params['creativeSize_pro'] == 'min_max': print 'min-max norm creativeSize', ad_data['creativeSize'].max( ), ad_data['creativeSize'].min() norm_cs = ( ad_data['creativeSize'] * 1.0 - ad_data['creativeSize'].min()) / ( ad_data['creativeSize'].max() - ad_data['creativeSize'].min()) ad_data = ad_data.drop(['creativeSize'], axis=1) ad_data['creativeSize'] = norm_cs creativesize_p = tf.placeholder(tf.float32, [None, 1], name="creativeSize") elif graph_hyper_params['creativeSize_pro'] == 'li_san': print '离散化 creativeSize' sh = ShrinkSep() ad_data['creativeSize'] = ad_data['creativeSize'].apply(sh) feature_conf_dict['creativeSize'] = len(sh.d) + 1 creativesize_p = tf.placeholder(tf.int32, [None, 1], name="creativeSize") else: print 'no process creativeSize' # ****************************************************************** place holder start uid_p = tf.placeholder(tf.int32, [None, 1], name="uid") lbs_p = tf.placeholder(tf.int32, [None, 1], name="LBS") age_p = tf.placeholder(tf.int32, [None, 1], name="age") carrier_p = tf.placeholder(tf.int32, [None, 1], name="carrier") consumptionability_p = tf.placeholder(tf.int32, [None, 1], name="consumptionAbility") education_p = tf.placeholder(tf.int32, [None, 1], name="education") gender_p = tf.placeholder(tf.int32, [None, 1], name="gender") house_p = tf.placeholder(tf.int32, [None, 1], name="house") os_p = tf.placeholder(tf.int32, [None, 1], name="os") ct_p = tf.placeholder(tf.int32, [None, 1], name="ct") # marriagestatus_p = tf.placeholder(tf.int32, [None, 1], name="marriageStatus") appidaction_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['appIdAction'][1]], name="appidaction_index") appidaction_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['appIdAction'][1]], name="appidaction_val") appIdInstall_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_index") appIdInstall_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['appIdInstall'][1]], name="appIdInstall_val") marriagestatus_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['marriageStatus'][0]], name="marriageStatus_index") marriagestatus_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['marriageStatus'][0]], name="marriageStatus_val") interest1_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest1'][0]], name="interest1_index") interest1_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest1'][0]], name="interest1_val") interest2_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest2'][0]], name="interest2_index") interest2_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest2'][0]], name="interest2_val") interest3_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest3'][0]], name="interest3_index") interest3_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest3'][0]], name="interest3_val") interest4_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest4'][0]], name="interest4_index") interest4_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest4'][0]], name="interest4_val") interest5_index_p = tf.placeholder( tf.int32, [None, feature_conf_dict['interest5'][0]], name="interest5_index") interest5_val_p = tf.placeholder( tf.float32, [None, 1, feature_conf_dict['interest5'][0]], name="interest5_val") # kmeans type # clu_200_p = tf.placeholder(tf.int32, [None, 1], name="clu_200_p") # clu_400_p = tf.placeholder(tf.int32, [None, 1], name="clu_400_p") kw1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw1'][1]], name="kw1_index") kw1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw1'][1]], name="kw1_val") kw2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw2'][1]], name="kw2_index") kw2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw2'][1]], name="kw2_val") kw3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['kw3'][1]], name="kw3_index") kw3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['kw3'][1]], name="kw3_val") topic1_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic1'][1]], name="topic1_index") topic1_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic1'][1]], name="topic1_val") topic2_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic2'][1]], name="topic2_index") topic2_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic2'][1]], name="topic2_val") topic3_index_p = tf.placeholder(tf.int32, [None, feature_conf_dict['topic3'][1]], name="topic3_index") topic3_val_p = tf.placeholder(tf.float32, [None, 1, feature_conf_dict['topic3'][1]], name="topic3_val") aid_p = tf.placeholder(tf.int32, [None, 1], name="aid") advertiserid_p = tf.placeholder(tf.int32, [None, 1], name="advertiserId") campaignid_p = tf.placeholder(tf.int32, [None, 1], name="campaignId") creativeid_p = tf.placeholder(tf.int32, [None, 1], name="creativeId") adcategoryid_p = tf.placeholder(tf.int32, [None, 1], name="adCategoryId") productid_p = tf.placeholder(tf.int32, [None, 1], name="productId") producttype_p = tf.placeholder(tf.int32, [None, 1], name="productType") true_label = tf.placeholder(tf.float32, [None, 1], name="true_label") # ****************************************************************** place holder end pred_val, model_loss, network_params = inference( uid_p, lbs_p, age_p, carrier_p, consumptionability_p, education_p, gender_p, house_p, os_p, ct_p, marriagestatus_index_p, marriagestatus_val_p, appidaction_index_p, appidaction_val_p, appIdInstall_index_p, appIdInstall_val_p, interest1_index_p, interest1_val_p, interest2_index_p, interest2_val_p, interest3_index_p, interest3_val_p, interest4_index_p, interest4_val_p, interest5_index_p, interest5_val_p, kw1_index_p, kw1_val_p, kw2_index_p, kw2_val_p, kw3_index_p, kw3_val_p, topic1_index_p, topic1_val_p, topic2_index_p, topic2_val_p, topic3_index_p, topic3_val_p, aid_p, advertiserid_p, campaignid_p, creativeid_p, adcategoryid_p, productid_p, producttype_p, creativesize_p, true_label, feature_conf_dict, graph_hyper_params) global_step = tf.Variable(0, name="global_step", trainable=False) train_step = None learning_rate = tf.Variable(float(graph_hyper_params['learn_rate']), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.5) if graph_hyper_params['opt'] == 'adam': train_step = tf.train.AdamOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adgrad': train_step = tf.train.AdagradOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'adadelta': train_step = tf.train.AdadeltaOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'ftrl': train_step = tf.train.FtrlOptimizer(learning_rate).minimize( model_loss, global_step=global_step) elif graph_hyper_params['opt'] == 'sgd': train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( model_loss, global_step=global_step) else: print 'No optimizer !' time_now = 'model_' + str(graph_hyper_params['model']) + datetime.now( ).strftime("_%Y_%m_%d_%H_%M_%S") checkpoint_dir = os.path.abspath("./checkpoints/dmf_tencent/" + time_now) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) def get_fed_dict(b_data, split_vector_data, feature_conf_dict, user_data_in, user_data_in_map_row, predict=False): if graph_hyper_params['formal']: aid_list = b_data['aid'].values uid_list = b_data['uid'].values else: if len(b_data) == 4: aid_list, uid_list = [1, 2, 3, 4], [1, 2, 3, 4] elif len(b_data) == 3: aid_list, uid_list = [1, 2, 3], [1, 2, 3] elif len(b_data) == 2: aid_list, uid_list = [1, 2], [1, 2] else: aid_list, uid_list = [1], [1] # print 11 # d1 = datetime.now() b_u_d, b_a_d = [], [] for b_uid in uid_list: b_u_d.append(user_data_in.iloc[user_data_in_map_row[b_uid]]) # if predict == 0: # b_u_d.append(user_data_train.iloc[uid_map_row_train[b_uid]]) # elif predict == 1: # b_u_d.append(user_data_predict1.iloc[uid_map_row_predict_1[b_uid]]) # elif predict == 2: # b_u_d.append(user_data_predict2.iloc[uid_map_row_predict_2[b_uid]]) # else: # print 'fed wrong!' for b_aid in aid_list: b_a_d.append(ad_data.iloc[aid_map_row[b_aid]]) b_u_d = pd.concat(b_u_d, axis=1).transpose() b_a_d = pd.concat(b_a_d, axis=1).transpose() # d3 = datetime.now() # print 12 # pd.concat([data.iloc[1].to_frame(), data.iloc[2].to_frame()], axis=1).transpose() fed_dict = {} fed_dict[uid_p] = np.expand_dims(b_u_d['uid'], axis=1) fed_dict[lbs_p] = np.expand_dims(b_u_d['LBS'], axis=1) fed_dict[age_p] = np.expand_dims(b_u_d['age'], axis=1) fed_dict[carrier_p] = np.expand_dims(b_u_d['carrier'], axis=1) fed_dict[consumptionability_p] = np.expand_dims( b_u_d['consumptionAbility'], axis=1) fed_dict[education_p] = np.expand_dims(b_u_d['education'], axis=1) fed_dict[gender_p] = np.expand_dims(b_u_d['gender'], axis=1) fed_dict[house_p] = np.expand_dims(b_u_d['house'], axis=1) fed_dict[os_p] = np.expand_dims(b_u_d['os'], axis=1) fed_dict[ct_p] = np.expand_dims(b_u_d['ct'], axis=1) # fed_dict[marriagestatus_p] = np.expand_dims(b_u_d['marriageStatus'], axis=1) # print 121 appidaction_li = split_vector_data(b_u_d['appIdAction']) # print 1212 fed_dict[appidaction_index_p], fed_dict[ appidaction_val_p] = appidaction_li[0], appidaction_li[1] appIdInstall_li = split_vector_data(b_u_d['appIdInstall']) fed_dict[appIdInstall_index_p], fed_dict[ appIdInstall_val_p] = appIdInstall_li[0], appIdInstall_li[1] # print 122 marriagestatus_li = split_vector_data(b_u_d['marriageStatus'], interest='marriageStatus', feature_config=feature_conf_dict) fed_dict[marriagestatus_index_p], fed_dict[ marriagestatus_val_p] = marriagestatus_li[0], marriagestatus_li[1] interest1_li = split_vector_data(b_u_d['interest1'], interest='interest1', feature_config=feature_conf_dict) fed_dict[interest1_index_p], fed_dict[interest1_val_p] = interest1_li[ 0], interest1_li[1] interest2_li = split_vector_data(b_u_d['interest2'], interest='interest2', feature_config=feature_conf_dict) fed_dict[interest2_index_p], fed_dict[interest2_val_p] = interest2_li[ 0], interest2_li[1] interest3_li = split_vector_data(b_u_d['interest3'], interest='interest3', feature_config=feature_conf_dict) fed_dict[interest3_index_p], fed_dict[interest3_val_p] = interest3_li[ 0], interest3_li[1] interest4_li = split_vector_data(b_u_d['interest4'], interest='interest4', feature_config=feature_conf_dict) fed_dict[interest4_index_p], fed_dict[interest4_val_p] = interest4_li[ 0], interest4_li[1] interest5_li = split_vector_data(b_u_d['interest5'], interest='interest5', feature_config=feature_conf_dict) fed_dict[interest5_index_p], fed_dict[interest5_val_p] = interest5_li[ 0], interest5_li[1] # print 123 kw1_li = split_vector_data(b_u_d['kw1']) fed_dict[kw1_index_p], fed_dict[kw1_val_p] = kw1_li[0], kw1_li[1] kw2_li = split_vector_data(b_u_d['kw2']) fed_dict[kw2_index_p], fed_dict[kw2_val_p] = kw2_li[0], kw2_li[1] kw3_li = split_vector_data(b_u_d['kw3']) fed_dict[kw3_index_p], fed_dict[kw3_val_p] = kw3_li[0], kw3_li[1] # print 124 topic1_li = split_vector_data(b_u_d['topic1']) fed_dict[topic1_index_p], fed_dict[topic1_val_p] = topic1_li[ 0], topic1_li[1] topic2_li = split_vector_data(b_u_d['topic2']) fed_dict[topic2_index_p], fed_dict[topic2_val_p] = topic2_li[ 0], topic2_li[1] topic3_li = split_vector_data(b_u_d['topic3']) fed_dict[topic3_index_p], fed_dict[topic3_val_p] = topic3_li[ 0], topic3_li[1] # print 125 # # ad fed_dict[aid_p] = np.expand_dims(b_a_d['aid'], axis=1) fed_dict[advertiserid_p] = np.expand_dims(b_a_d['advertiserId'], axis=1) fed_dict[campaignid_p] = np.expand_dims(b_a_d['campaignId'], axis=1) fed_dict[creativeid_p] = np.expand_dims(b_a_d['creativeId'], axis=1) fed_dict[adcategoryid_p] = np.expand_dims(b_a_d['adCategoryId'], axis=1) fed_dict[productid_p] = np.expand_dims(b_a_d['productId'], axis=1) fed_dict[producttype_p] = np.expand_dims(b_a_d['productType'], axis=1) # print 13 # fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1) if graph_hyper_params['creativeSize_pro'] == 'min_max': fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1).astype( np.float32) elif graph_hyper_params['creativeSize_pro'] == 'li_san': fed_dict[creativesize_p] = np.expand_dims(b_a_d['creativeSize'], axis=1) else: print 'wrong feed' # label # print 14 if not predict: fed_dict[true_label] = np.expand_dims(b_data['label'].values, axis=1).astype(np.float32) # print 15 # d4 = datetime.now() # print d2-d1, d3-d2, d4-d3 # print fed_dict[true_label] # print len(fed_dict[true_label]), len(fed_dict[aid_p]), len(fed_dict[uid_p]), return fed_dict # def eval_on_dev(split_vector_data): # e_b_s = len(dev_data) / graph_hyper_params['batch_size'] # auc_true, auc_pre = [], [] # # auc = [] # for index in tqdm(range(e_b_s)): # start = index * graph_hyper_params['batch_size'] # end = (index + 1) * graph_hyper_params['batch_size'] if (index + 1) * graph_hyper_params['batch_size'] < len(dev_data) else len(dev_data) # b_dev_data = dev_data[start:end] # fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict) # pred_value, pre_pred_value, final_vec, uu, vv = sess.run([pred_val, network_params[0], network_params[1], network_params[2], network_params[3]], feed_dict=fed_dict) # # pre_real_val = np.array(pred_value).reshape((-1)) # auc_true = auc_true + list(b_dev_data['label'].values) # auc_pre = auc_pre + pre_real_val.tolist() # # if True in np.isnan(pre_real_val): # print 'contain nan: ', np.array(pre_pred_value).reshape((-1)) # print np.array(final_vec).reshape((-1)) # print np.array(uu).reshape((-1)) # print np.array(vv).reshape((-1)) # # # auc.append() # # auc_pre = np.array(auc_pre) # # auc_pre = np.exp(auc_pre) / np.exp(auc_pre).sum() # # print auc_true # # print auc_pre # fpr, tpr, thresholds = metrics.roc_curve(auc_true, auc_pre, pos_label=1) # auc_v, gni = metrics.auc(fpr, tpr), gini_norm(auc_true, auc_pre) # # auc_pre_2 = np.array(auc_pre) # auc_pre_2.sort() # print('dev_pre_top2=%.4f %.4f min2=%.4f %.4f' % # (auc_pre_2.tolist()[-1], auc_pre_2.tolist()[-2], auc_pre_2.tolist()[0], auc_pre_2.tolist()[1])) # return auc_v, gni best_auc = 0.0 split_vector_data = SplitClass() sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(graph_hyper_params['epoch']): # 只训练 1 轮 e_b_s = len(train_data) / graph_hyper_params['batch_size'] one_epoch_loss, one_epoch_batchnum = 0.0, 0.0 for index in tqdm(range(e_b_s)): # print 0 start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if ( index + 1) * graph_hyper_params['batch_size'] < len( train_data) else len(train_data) b_data = train_data[start:end] # print 1 # d1 = datetime.now() fed_dict = get_fed_dict(b_data, split_vector_data, feature_conf_dict, user_data_train, uid_map_row_train) # d2 = datetime.now() # print 2 _, loss_val, pre_tr_val = sess.run( [train_step, model_loss, network_params[0]], feed_dict=fed_dict) # print 3 # d3 = datetime.now() # print d2-d1, d3-d2 one_epoch_loss += loss_val one_epoch_batchnum += 1. if graph_hyper_params['debug']: print datetime.now(), index, loss_val pre_tr_val = np.array(pre_tr_val).reshape((-1)) if graph_hyper_params['debug'] or True in np.isnan(pre_tr_val): print pre_tr_val if index != 0 and index % ( (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0: split_vector_data.clean() # auc, gn = eval_on_dev(split_vector_data) # best_auc = max(auc, best_auc) # format_str = '%s epoch=%.2f avg_loss=%.4f auc=%.4f best_auc=%.4f gn=%.4f' # print (format_str % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), (epoch + 1.0 * (index+1) / e_b_s), one_epoch_loss / one_epoch_batchnum, auc, best_auc, gn)) # one_epoch_loss = one_epoch_batchnum = 0.0 # pass del split_vector_data, user_data_train, train_data gc.collect() split_vector_data = 1 if graph_hyper_params['test1']: predict_data = predict_data1.sort_values(by='uid') if graph_hyper_params['formal']: graph_hyper_params['batch_size'] = 512 e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len( predict_data) % graph_hyper_params['batch_size'] == 0 else len( predict_data) / graph_hyper_params['batch_size'] + 1 split_vector_data = SplitClass() pred = [] for index in tqdm(range(e_b_s)): start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if ( index + 1) * graph_hyper_params['batch_size'] <= len( predict_data) else len(predict_data) + 1 b_predict_data = predict_data[start:end] # print len(b_predict_data), start, end # fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict) fed_dict = get_fed_dict(b_predict_data, split_vector_data, feature_conf_dict, user_data_predict1, uid_map_row_predict_1, predict=True) pred_value = sess.run([pred_val], feed_dict=fed_dict) # print pred_value pre_real_val = np.array(pred_value).reshape((-1)) pred = pred + pre_real_val.tolist() if graph_hyper_params['formal'] and index != 0 and index % ( (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0: split_vector_data.clean() print len(predict_data), len(pred) predict_data['pred_label'] = pred csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']] csv_data.columns = ['aid', 'uid', 'score'] csv_path = os.path.join( checkpoint_dir, 'test1_' + 'n' + str(graph_hyper_params['neg_start']) + '_submission.csv') csv_data.to_csv(csv_path, index=False) print 'submission_path:', csv_path del split_vector_data, user_data_predict1, predict_data1 gc.collect() if graph_hyper_params['test2']: predict_data = predict_data2.sort_values(by='uid') if graph_hyper_params['formal']: graph_hyper_params['batch_size'] = 512 e_b_s = len(predict_data) / graph_hyper_params['batch_size'] if len( predict_data) % graph_hyper_params['batch_size'] == 0 else len( predict_data) / graph_hyper_params['batch_size'] + 1 split_vector_data = SplitClass() # split_vector_data.clean() pred = [] for index in tqdm(range(e_b_s)): start = index * graph_hyper_params['batch_size'] end = (index + 1) * graph_hyper_params['batch_size'] if ( index + 1) * graph_hyper_params['batch_size'] <= len( predict_data) else len(predict_data) + 1 b_predict_data = predict_data[start:end] # print len(b_predict_data), start, end # fed_dict = get_fed_dict(b_dev_data, split_vector_data, feature_conf_dict) fed_dict = get_fed_dict(b_predict_data, split_vector_data, feature_conf_dict, user_data_predict2, uid_map_row_predict_2, predict=True) pred_value = sess.run([pred_val], feed_dict=fed_dict) # print pred_value pre_real_val = np.array(pred_value).reshape((-1)) pred = pred + pre_real_val.tolist() if graph_hyper_params['formal'] and index != 0 and index % ( (e_b_s - 1) / graph_hyper_params['show_peroid']) == 0: split_vector_data.clean() print len(predict_data), len(pred) predict_data['pred_label'] = pred csv_data = predict_data[['ori_aid', 'ori_uid', 'pred_label']] csv_data.columns = ['aid', 'uid', 'score'] csv_path = os.path.join( checkpoint_dir, 'test2_' + 'n' + str(graph_hyper_params['neg_start']) + '_submission.csv') csv_data.to_csv(csv_path, index=False) print 'submission_path:', csv_path pass
def super_resolve_MCdropout( dt_lowres, method='mlp_h=3', n_h1=500, n_h2=200, n_h3=100, n=2, m=2, us=2, dropout_rate=0.25, no_samples=10, network_dir='/Users/ryutarotanno/DeepLearning/nsampler/models/linear'): """Perform a patch-based super-resolution on a given low-res image. Args: dt_lowres (numpy array): a low-res diffusion tensor image volume n (int): the width of an input patch is 2*n + 1 m (int): the width of an output patch is m us (int): the upsampling factord Returns: the estimated high-res volume """ # Specify the network: print('... defining the network model %s .' % method) n_in, n_out = 6 * (2 * n + 1)**3, 6 * m**3 # dimensions of input and output x_scaled = tf.placeholder(tf.float32, shape=[None, n_in]) y_scaled = tf.placeholder(tf.float32, shape=[None, n_out]) keep_prob = tf.placeholder(tf.float32) # keep probability for dropout y_pred_scaled, L2_sqr, L1 = models.inference(method, x_scaled, keep_prob, n_in, n_out, n_h1=n_h1, n_h2=n_h2, n_h3=n_h3) # load the transforms used for normalisation of the training data: transform_file = os.path.join(network_dir, 'transforms.pkl') transform = cPickle.load(open(transform_file, 'rb')) train_set_x_mean = transform['input_mean'].reshape( (1, n_in)) # row vector representing the mean train_set_x_std = transform['input_std'].reshape((1, n_in)) train_set_y_mean = transform['output_mean'].reshape((1, n_out)) train_set_y_std = transform['output_std'].reshape((1, n_out)) del transform # load the weights with the best performance: settings_file = os.path.join(network_dir, 'settings.pkl') details = cPickle.load(open(settings_file, 'rb')) best_step = details['best step'] # Restore all the variables and perform reconstruction: saver = tf.train.Saver() with tf.Session() as sess: # Restore variables from disk. saver.restore(sess, os.path.join(network_dir, "model-" + str(best_step))) print("Model restored.") # reconstruct dt_lowres = dt_lowres[ 0::us, 0::us, 0:: us, :] # take every us th entry to reduce it to the original resolution. (xsize, ysize, zsize, comp) = dt_lowres.shape dt_hires = np.zeros( (xsize * us, ysize * us, zsize * us, comp)) # the base array for the output high-res volume. dt_hires[:, :, :, 0] = -1 # initialise all the voxels as 'background' dt_std = np.zeros((xsize * us, ysize * us, zsize * us, comp)) # the base array for the output uncertainty. dt_std[:, :, :, 0] = -1 # initialise all the voxels as 'background'. for k in np.arange(n + 1, zsize - n + 1): print('Slice %i of %i.' % (k, zsize)) for j in np.arange(n + 1, ysize - n + 1): for i in np.arange(n + 1, xsize - n + 1): ipatch = dt_lowres[(i - n - 1):(i + n), (j - n - 1):(j + n), (k - n - 1):(k + n), 2:comp] # input patch # Process only if the whole patch is foreground if np.min(dt_lowres[(i - n - 1):(i + n), (j - n - 1):(j + n), (k - n - 1):(k + n), 0]) >= 0: opatch_MCsamples = np.zeros((no_samples, 6 * m**3)) for sample_idx in np.arange(no_samples): # Vectorise input patch (following 'Fortran' reshape ordering) and normalise: ipatch_row = ipatch.reshape((1, ipatch.size), order='F') ipatch_row_scaled = (ipatch_row - train_set_x_mean ) / train_set_x_std # Predict the corresponding high-res output patch in the normalised space: opatch_row_scaled = y_pred_scaled.eval( feed_dict={ x_scaled: ipatch_row_scaled, keep_prob: (1.0 - dropout_rate) }) # Send back into the original space and reshape into a cubic patch: opatch_row = train_set_y_std * opatch_row_scaled + train_set_y_mean # Store each predicted row-vector high-res patch: opatch_MCsamples[sample_idx, :] = opatch_row opatch_row_mean = opatch_MCsamples.mean(axis=0) opatch_row_std = opatch_MCsamples.std(axis=0) opatch_mean = opatch_row_mean.reshape( (m, m, m, comp - 2), order='F') opatch_std = opatch_row_std.reshape( (m, m, m, comp - 2), order='F') # Select the correct location of the output patch in the brain and store: x_temp_1, x_temp_2 = (us * (i - 1) + 1 - (m - us) / 2) - 1, (us * i + (m - us) / 2) y_temp_1, y_temp_2 = (us * (j - 1) + 1 - (m - us) / 2) - 1, (us * j + (m - us) / 2) z_temp_1, z_temp_2 = (us * (k - 1) + 1 - (m - us) / 2) - 1, (us * k + (m - us) / 2) dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] \ = dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] + opatch_mean dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] \ = dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 2:comp] + opatch_std # Label only reconstructed voxels as foreground. dt_hires[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 0] = 0 dt_std[x_temp_1:x_temp_2, y_temp_1:y_temp_2, z_temp_1:z_temp_2, 0] = 0 return dt_hires, dt_std
def sr_train(method='linear', n_h1=500, n_h2=200, n_h3=100, data_dir='/Users/ryutarotanno/DeepLearning/Test_1/data/', cohort='Diverse', no_subjects=8, sample_rate=32, us=2, n=2, m=2, optimisation_method='adam', dropout_rate=0.0, learning_rate=1e-4, L1_reg=0.00, L2_reg=1e-5, n_epochs=1000, batch_size=25, save_dir='/Users/ryutarotanno/DeepLearning/nsampler/models'): # -------------------------- Load the training data---------------------------: # get the full path to the training set: dataset = data_dir + 'PatchLibs%sDS%02i_%ix%i_%ix%i_TS%i_SRi%03i_0001.mat' \ % (cohort, us, 2 * n + 1, 2 * n + 1, m, m, no_subjects, sample_rate) data_dir, data_file = os.path.split(dataset) # load print('... loading the training dataset %s' % data_file) patchlib = sr_utility.load_patchlib(patchlib=dataset) train_set_x, valid_set_x, train_set_y, valid_set_y = patchlib # load the original patch libs # normalise the data and keep the transforms: (train_set_x_scaled, train_set_x_mean, train_set_x_std, train_set_y_scaled, train_set_y_mean, train_set_y_std)\ = sr_utility.standardise_data(train_set_x, train_set_y, option='default') # normalise the data # normalise the validation sets into the same space as training sets: valid_set_x_scaled = (valid_set_x - train_set_x_mean) / train_set_x_std valid_set_y_scaled = (valid_set_y - train_set_y_mean) / train_set_y_std del train_set_x, valid_set_x, train_set_y, valid_set_y, patchlib # clear original data as you don't need them. # --------------------------- Define the model--------------------------: # clear the graph tf.reset_default_graph() # define input and output: n_in, n_out = 6 * (2 * n + 1)**3, 6 * m**3 # dimensions of input and output x_scaled = tf.placeholder(tf.float32, shape=[None, n_in]) # normalised input low-res patch y_scaled = tf.placeholder(tf.float32, shape=[None, n_out ]) # normalised output high-res patch keep_prob = tf.placeholder(tf.float32) # keep probability for dropout global_step = tf.Variable(0, name="global_step", trainable=False) y_pred_scaled, L2_sqr, L1 = models.inference(method, x_scaled, keep_prob, n_in, n_out, n_h1=n_h1, n_h2=n_h2, n_h3=n_h3) cost = models.cost(y_scaled, y_pred_scaled, L2_sqr, L1, L2_reg, L1_reg) train_step = models.training(cost, learning_rate, global_step=global_step, option=optimisation_method) mse = tf.reduce_mean( tf.square(train_set_y_std * (y_scaled - y_pred_scaled))) # -------------------------- Start training -----------------------------: # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Set the directory for saving checkpoints: nn_file = sr_utility.name_network(method=method, n_h1=n_h1, n_h2=n_h2, n_h3=n_h3, cohort=cohort, no_subjects=no_subjects, sample_rate=sample_rate, us=us, n=n, m=m, optimisation_method=optimisation_method, dropout_rate=dropout_rate) checkpoint_dir = os.path.join(save_dir, nn_file) if not os.path.exists( checkpoint_dir): # create a subdirectory to save the model. os.makedirs(checkpoint_dir) # Save the transforms used for data normalisation: print( '... saving the transforms used for data normalisation for the test time' ) transform = { 'input_mean': train_set_x_mean, 'input_std': train_set_x_std, 'output_mean': train_set_y_mean, 'output_std': train_set_y_std } f = file(os.path.join(checkpoint_dir, 'transforms.pkl'), 'wb') cPickle.dump(transform, f, protocol=cPickle.HIGHEST_PROTOCOL) # Create a session for running Ops on the Graph. print('... training') with tf.Session() as sess: # Run the Op to initialize the variables. sess.run(init) # Compute number of minibatches for training, validation and testing n_train_batches = train_set_x_scaled.shape[0] // batch_size n_valid_batches = valid_set_x_scaled.shape[0] // batch_size # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many minibatches before checking the network on the validation set; # in this case we check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0 start_time = timeit.default_timer() epoch = 0 done_looping = False iter_valid = 0 total_validation_loss_epoch = 0 total_training_loss_epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch += 1 start_time_epoch = timeit.default_timer() for minibatch_index in range(n_train_batches): # Select batches: x_batch_train = train_set_x_scaled[ minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :] y_batch_train = train_set_y_scaled[ minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :] x_batch_valid = valid_set_x_scaled[ minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :] y_batch_valid = valid_set_y_scaled[ minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :] # track the number of steps current_step = tf.train.global_step(sess, global_step) # perform gradient descent: train_step.run( feed_dict={ x_scaled: x_batch_train, y_scaled: y_batch_train, keep_prob: (1.0 - dropout_rate) }) # Accumulate validation/training errors for each epoch: total_validation_loss_epoch += mse.eval( feed_dict={ x_scaled: x_batch_valid, y_scaled: y_batch_valid, keep_prob: (1.0 - dropout_rate) }) total_training_loss_epoch += mse.eval( feed_dict={ x_scaled: x_batch_train, y_scaled: y_batch_train, keep_prob: (1.0 - dropout_rate) }) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index iter_valid += 1 if (iter + 1) % validation_frequency == 0: # Print out the errors for each epoch: this_validation_loss = total_validation_loss_epoch / iter_valid this_training_loss = total_training_loss_epoch / iter_valid end_time_epoch = timeit.default_timer() print('\nEpoch %i, minibatch %i/%i:\n' ' training error (rmse) %f times 1E-5\n' ' validation error (rmse) %f times 1E-5\n' ' took %f secs' % (epoch, minibatch_index + 1, n_train_batches, np.sqrt(this_training_loss * 10**10), np.sqrt(this_validation_loss * 10**10), end_time_epoch - start_time_epoch)) print(' number of minibatches = %i and patience = %i' % (iter + 1, patience)) print(' validation frequency = %i, iter_valid = %i' % (validation_frequency, iter_valid)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) print( ' reduces the previous error by more than %f %%' % ((1 - improvement_threshold) * 100.)) best_validation_loss = this_validation_loss best_training_loss = this_training_loss best_iter = iter best_step = current_step + 1 # Save the model: checkpoint_prefix = os.path.join(checkpoint_dir, "model") save_path = saver.save(sess, checkpoint_prefix, global_step=global_step) print("Model saved in file: %s" % save_path) # Save the model details: print('... saving the model details') model_details = { 'method': method, 'cohort': cohort, 'no of subjects': no_subjects, 'sample rate': sample_rate, 'upsampling factor': us, 'n': n, 'm': m, 'optimisation': optimisation_method, 'dropout rate': dropout_rate, 'learning rate': learning_rate, 'L1 coefficient': L1_reg, 'L2 coefficient': L2_reg, 'max no of epochs': n_epochs, 'batch size': batch_size, 'training length': end_time_epoch - start_time, 'best validation rmse': np.sqrt(best_validation_loss), 'best training rmse': np.sqrt(best_training_loss), 'best step': best_step } cPickle.dump(model_details, file( os.path.join(checkpoint_dir, 'settings.pkl'), 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) # Terminate training when the validation loss starts decreasing. if this_validation_loss > best_validation_loss: patience = 0 print( 'Validation error increases - terminate training ...' ) break # Start counting again: total_validation_loss_epoch = 0 total_training_loss_epoch = 0 iter_valid = 0 start_time_epoch = timeit.default_timer() if patience <= iter: done_looping = True break # Display the best results: print(('\nOptimization complete. Best validation score of %f ' 'obtained at iteration %i') % (np.sqrt(best_validation_loss * 10**10), best_step)) end_time = timeit.default_timer() time_train = end_time - start_time print('Training done!!! It took %f secs.' % time_train) # clear the graph tf.reset_default_graph()
batch_size, preprocessing.valSetPath, preprocessing.valLabelPath) # trainData, numTrainExamples, trainIterator = preprocessing.inputValFlows(batch_size, preprocessing.trainSetPath, preprocessing.trainLabelPath) perGPUValData = [list([]) for i in range(numGpus)] for tD in valData[:-1]: split = tf.split(tD, numGpus, axis=0) for gpu in range(numGpus): perGPUValData[gpu].append(split[gpu]) netOut = [] for gpu in range(numGpus): with tf.name_scope('tower_%d' % (gpu)) as scope: with tf.device('/gpu:%d' % gpu): print(perGPUValData[gpu][0].get_shape()) print(len(perGPUValData[gpu])) valCode = models.inference(perGPUValData[gpu], first=(gpu == 0), useType="test", modelType=modelType) print(valCode.get_shape()) gpuValPredictions = models.predictForces(valCode, 5 * batch_size // numGpus, log, useType="test", first=(gpu == 0)) netOut.append(gpuValPredictions) # netOut = [] # for gpu in range(numGpus): # with tf.name_scope('tower_%d' % (gpu)) as scope: # with tf.device('/gpu:%d' % gpu): # print(perGPUValData[gpu][0].get_shape()) # print(len(perGPUValData[gpu]))
perGPUValData[gpu].append(split[gpu]) perGPUTrainData = [list([]) for i in range(numGpus)] for tD in trainData[:-1]: split = tf.split(tD, numGpus, axis=0) for gpu in range(numGpus): perGPUTrainData[gpu].append(split[gpu]) netOut = [] for gpu in range(numGpus): with tf.name_scope('tower_%d' % (gpu)) as scope: with tf.device('/gpu:%d' % gpu): print(perGPUValData[gpu][0].get_shape()) print(len(perGPUValData[gpu])) valCode = models.inference(perGPUValData[gpu], first=(gpu == 0), useType="test", modelType=modelType) print(valCode.get_shape()) gpuValPredictions = models.predictForces(valCode, 5 * batch_size // numGpus, log, useType="test", first=(gpu == 0)) netOut.append(gpuValPredictions) trainNetOut = [] for gpu in range(numGpus): with tf.name_scope('tower_%d' % (gpu)) as scope: with tf.device('/gpu:%d' % gpu): print(perGPUTrainData[gpu][0].get_shape()) print(len(perGPUTrainData[gpu]))
def sr_train(method='mlp_h=1_kingma', n_h1=500, n_h2=200, data_dir='./data/', cohort='Diverse', no_subjects=8, sample_rate=32, us=2, n=2, m=2, optimisation_method='adam', dropout_rate=0.5, learning_rate=1e-4, L1_reg=0.00, L2_reg=1e-5, n_epochs=1000, batch_size=25, save_dir='./models'): ########################## # Load the training data: ########################## # get the full path to the training set: dataset = data_dir + 'PatchLibs%sDS%02i_%ix%i_%ix%i_TS%i_SRi%03i_0001.mat' \ % (cohort, us, 2 * n + 1, 2 * n + 1, m, m, no_subjects, sample_rate) data_dir, data_file = os.path.split(dataset) # load print('... loading the training dataset %s' % data_file) patchlib = sr_utility.load_patchlib(patchlib=dataset) train_set_x, valid_set_x, train_set_y, valid_set_y = patchlib # load the original patch libs # normalise the data and keep the transforms: (train_set_x_scaled, train_set_x_mean, train_set_x_std, train_set_y_scaled, train_set_y_mean, train_set_y_std)\ = sr_utility.standardise_data(train_set_x, train_set_y, option='default') # normalise the data # normalise the validation sets into the same space as training sets: valid_set_x_scaled = (valid_set_x - train_set_x_mean) / train_set_x_std valid_set_y_scaled = (valid_set_y - train_set_y_mean) / train_set_y_std del train_set_x, valid_set_x, train_set_y, valid_set_y # clear original data as you don't need them. #################### # Define the model: #################### print('... defining the model') # clear the graph tf.reset_default_graph() # define input and output: n_in, n_out = 6 * (2 * n + 1)**3, 6 * m**3 # dimensions of input and output x_scaled = tf.placeholder(tf.float32, shape=[None, n_in]) # normalised input low-res patch y_scaled = tf.placeholder(tf.float32, shape=[None, n_out ]) # normalised output high-res patch keep_prob = tf.placeholder(tf.float32) # keep probability for dropout y_pred_scaled, L2_sqr, L1, reg = models.inference(method, x_scaled, keep_prob, n_in, n_out, n_h1, n_h2) cost = models.cost(y_scaled, y_pred_scaled, L2_sqr, L1, L2_reg, L1_reg) train_step = models.training(cost, learning_rate, option=optimisation_method) mse = tf.reduce_mean( tf.square(train_set_y_std * (y_scaled - y_pred_scaled))) cost += tf.add_n(reg) / 3. ####################### # Start training: ####################### # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. print('... training') with tf.Session() as sess: # Run the Op to initialize the variables. sess.run(init) # Compute number of minibatches for training, validation and testing n_train_batches = train_set_x_scaled.shape[0] // batch_size n_valid_batches = valid_set_x_scaled.shape[0] // batch_size # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in range(n_train_batches): # perform gradient descent: train_step.run( feed_dict={ x_scaled: train_set_x_scaled[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :], y_scaled: train_set_y_scaled[minibatch_index * batch_size:(minibatch_index + 1) * batch_size, :], keep_prob: (1.0 - dropout_rate) }) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ mse.eval( feed_dict={ x_scaled: valid_set_x_scaled[index * batch_size:(index + 1) * batch_size, :], y_scaled: valid_set_y_scaled[index * batch_size:(index + 1) * batch_size, :], keep_prob: (1.0 - dropout_rate) }) for index in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) training_losses = [ mse.eval( feed_dict={ x_scaled: train_set_x_scaled[index * batch_size:(index + 1) * batch_size, :], y_scaled: train_set_y_scaled[index * batch_size:(index + 1) * batch_size, :], keep_prob: (1.0 - dropout_rate) }) for index in range(n_valid_batches) ] this_training_loss = np.mean(training_losses) print('\nEpoch %i, minibatch %i/%i:\n' ' training error (rmse) %f times 1E-5\n' ' validation error (rmse) %f times 1E-5' % (epoch, minibatch_index + 1, n_train_batches, np.sqrt(this_training_loss * 10**10), np.sqrt(this_validation_loss * 10**10))) print(' number of minibatches = %i and patience = %i' % (iter, patience)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) print( ' reduces the previous error by more than %f %%' % ((1 - improvement_threshold) * 100.)) best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('\nOptimization complete. Best validation score of %f ' 'obtained at iteration %i') % (np.sqrt(best_validation_loss * 10**10), best_iter + 1)) print('Training done!!! It took %f secs.' % (end_time - start_time)) # Save the model: nn_file = sr_utility.name_network( method=method, n_h1=n_h1, n_h2=n_h2, cohort=cohort, no_subjects=no_subjects, sample_rate=sample_rate, us=us, n=n, m=m, optimisation_method=optimisation_method, dropout_rate=dropout_rate) save_subdir = os.path.join(save_dir, nn_file) if not os.path.exists( save_subdir): # create a subdirectory to save the model. os.makedirs(save_subdir) save_path = saver.save(sess, os.path.join(save_subdir, "model.ckpt")) print("Model saved in file: %s" % save_path) # Save the model details: print('... saving the model details') model_details = { 'method': method, 'cohort': cohort, 'no of subjects': no_subjects, 'sample rate': sample_rate, 'upsampling factor': us, 'n': n, 'm': m, 'optimisation': optimisation_method, 'dropout rate': dropout_rate, 'learning rate': learning_rate, 'L1 coefficient': L1_reg, 'L2 coefficient': L2_reg, 'max no of epochs': n_epochs, 'batch size': batch_size } cPickle.dump(model_details, file(os.path.join(save_subdir, 'settings.pkl'), 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) print( '... saving the transforms used for data normalisation for the test time' ) transform = { 'input_mean': train_set_x_mean, 'input_std': train_set_x_std, 'output_mean': train_set_y_mean, 'output_std': train_set_y_std } f = file(os.path.join(save_subdir, 'transforms.pkl'), 'wb') cPickle.dump(transform, f, protocol=cPickle.HIGHEST_PROTOCOL) # clear the graph tf.reset_default_graph()