def test(model, data, te_starts=3): model.eval() with torch.no_grad(): zs, preds = model(data, TData.TR) ps, ns = get_sample(data, TData.TE, nsize=1) sscore = model.static.score(ps[-te_starts:], ns[-te_starts:], zs[-te_starts:]) dscore = model.dynamic.score(ps[-te_starts - 1:], ns[-te_starts - 1:], preds[-te_starts - 1:]) dnp, dnn, _ = dynamic_new_link_prediction(data, data.te, preds) dnscore = model.dynamic.score(dnp[-te_starts - 1:], dnn[-te_starts - 1:], preds[-te_starts - 1:]) sscore = get_score(*sscore) dscore = get_score(*dscore) dnscore = get_score(*dnscore) print('''Scores: Static AUC: %0.6f\tAP: %0.6f Dynamic AUC: %0.6f\tAP: %0.6f Dyn New AUC: %0.6f\tAP: %0.6f ''' % (sscore[0], sscore[1], dscore[0], dscore[1], dnscore[0], dnscore[1])) return { 'static_auc': sscore[0], 'static_ap': sscore[1], 'dyn_auc': dscore[0], 'dyn_ap': dscore[1], 'new_auc': dnscore[0], 'new_ap': dnscore[1] }
def global_alignment(v, w, score_matrix): n = len(v) + 1 m = len(w) + 1 s = [[0] * m for i in range(0, n)] b = [['?'] * m for i in range(0, n)] for i in range (1, n): s[i][0] = s[i - 1][0] - FORFEIT for j in range (1, m): s[0][j] = s[0][j - 1] - FORFEIT for i in range(1, n): for j in range(1, m): vi = v[i - 1] wi = w[j - 1] s[i][j] = max( \ s[i - 1][j] - FORFEIT, \ s[i][j - 1] - FORFEIT, \ s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix) \ ) if s[i][j] == s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix): b[i][j] = "↖" elif s[i][j] == s[i - 1][j] - FORFEIT: b[i][j] = "↑" else: b[i][j] = "←" words = {"v": '', "w": ''} restore_words(words, b, v, w, n - 1, m - 1) words["v"] = words["v"][::-1] words["w"] = words["w"][::-1] score = s[n - 1][m - 1] return score, words
def fit_model(X, labels): sgd = SGDClassifier(loss="log", max_iter=300,tol=1e-3,class_weight="balanced") model = OneVsRestClassifier(sgd, n_jobs=1) model.fit(X, labels) train_pred = model.predict_proba(X) train_pred = train_pred > 0.4 #train_pred = model.predict(X) utils.get_score(train_pred, labels) return model
def test_get_score(): logger.debug('Testing get_score()') state = [['X', 'X', 'O'], ['O', 'O', 'X'], ['X', 'O', 'X']] assert (get_score(state, True) == 0) assert (get_score(state, False) == 0) state = [['X', None, None], ['O', 'X', None], [None, 'O', 'X']] assert (get_score(state, True) == 10) state = [[None, 'X', 'O'], [None, 'X', None], [None, 'O', 'X']] assert (get_score(state, True) == 0) assert (get_score(state, False) == 0) state = [['O', None, 'X'], [None, None, 'X'], [None, 'O', 'X']] assert (get_score(state, True) == 10) assert (get_score(state, False) == 0) state = [['X', None, 'O'], [None, None, 'O'], [None, 'X', 'O']] assert (get_score(state, True) == 0) assert (get_score(state, False) == -10) logger.debug('Passed')
def direction(direction): direction = direction.lower() if direction not in ["left", "right"]: return "Unknown direction {0}".format(direction), 404 if request.method == "GET": if direction == "left": return jsonify({"left": get_score("left")}) else: return jsonify({"right": get_score("right")}) else: incr_score(str(direction)) return "ok"
def direction(direction): direction = direction.lower() if direction not in ['left', 'right']: return 'Unknown direction {0}'.format(direction), 404 if request.method == 'GET': if direction == 'left': return jsonify({'left': get_score('left')}) else: return jsonify({'right': get_score('right')}) else: incr_score(str(direction)) return 'ok'
def train(model, data): opt = Adam(model.parameters(), lr=TR_PARAMS['lr']) val_best = (0, None) no_change = 0 for e in range(TR_PARAMS['epochs']): model.train() opt.zero_grad() zs, preds = model.forward(data, TData.TR) ps, ns = get_sample(data, TData.TR) loss = model.loss_fn(ps, ns, zs, preds) print("[%d] Loss: %0.6f" % (e, loss.item()), end='') loss.backward() opt.step() with torch.no_grad(): zs, preds = model.forward(data, TData.VA) ps, ns = get_sample(data, TData.VA) sscore, dscore = model.score(ps, ns, zs, preds) #loss = model.loss_fn(ps, ns, zs, preds).item() sscore = get_score(*sscore) dscore = get_score(*dscore) score = sum(sscore + dscore) / 4.0 star = '*' if score > val_best[0] else '' #print('''\tEval loss: %0.6f%s #Static: # AUC: %0.6f\tAP: %0.6f #Dynamic Eval: # AUC: %0.6f\tAP: %0.6f # #''' % (loss, star, sscore[0], sscore[1], dscore[0], dscore[1])) print("\tEval score: %0.6f%s" % (score, star)) if score < val_best[0]: no_change += 1 if no_change > TR_PARAMS['patience']: print("Early stopping!") break else: no_change = 0 val_best = (score, deepcopy(model)) return val_best[1]
def test_default(**kwargs): import utils result = None testfile = test_theory options = [clingo,bk,ex,testfile,exmpl_constr,'0 --asp09'] command = ' '.join(options) out = os.popen(command).read().split('. ') if out[0].strip() == 'UNSATISFIABLE': result = False else: out = set([x.strip() for x in out if not x.strip() == '']) out = filter(lambda x: 'OPTIMUM FOUND' not in x,out) if list(out) == [] : result = True elif all('posCovered' in x or 'negsCovered' in x for x in out): (_,_,score) = utils.get_score(out) if score == gl.current_example_object.positive_count: return True else: return False else: raise excps.HypothesisTestingException('ASP reasoner returned %s'%(' '.join(out)),gl.logger) if 'last_seen' in kwargs: if result: print(gl.current_example,'ok') else: print(gl.current_example,'Not ok!') i = kwargs['last_seen'] utils.get_example(i) return result
def option_V(): """ This option allows the user to record the votes for the project. """ project_name = str(input("\nEnter the project name: ")) project = project_dict[project_name] print("There are {} team members".format(project.n_members)) for member in project.team_members.values(): while True: print("\n\tEnter {}'s votes, points must add up to 100:".format( member.name)) for team_member in project.get_members_name(): if team_member != member.name: points = utils.get_score(member.name, team_member) member.votes[team_member] = points if sum(member.votes.values()) != 100: print("\t\tError!!! The points must add up to 100.") continue else: break project.calculate_point_allocation() main()
def target(args): w1,w2,w3 = args r = a + b*w1 +c*w2 + d*w3 result = r.topk(5,1)[1] predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)] score,_,_,_ = get_score(predict_label_and_marked_label_list) print (args,score,_)#list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)] return -score
def train(model: nn.Module, data: TData): opt = Adam(model.parameters(), lr=0.001) best = (None, 0) no_progress = 0 times = [] for e in range(EPOCHS): model.train() opt.zero_grad() start_t = time() zs, _ = model.forward(data, TData.TRAIN) p = [data.ei_masked(TData.TRAIN, i) for i in range(data.T)] n = data.get_negative_edges(TData.TRAIN, nratio=10) loss = model.calc_loss(p, n, zs) loss.backward() opt.step() elapsed = time() - start_t times.append(elapsed) print("[%d] Loss: %0.4f\t%0.4fs" % (e, loss.item(), elapsed)) model.eval() with torch.no_grad(): zs, _ = model.forward(data, TData.TRAIN) p = [data.ei_masked(TData.VAL, i) for i in range(data.T)] n = data.get_negative_edges(TData.VAL, nratio=10) p, n = model.calc_scores(p, n, zs) auc, ap = get_score(p, n) print("\tVal AUC: %0.4f AP: %0.4f" % (auc, ap), end='') tot = auc + ap if tot > best[1]: best = (deepcopy(model), tot) print("*") else: print() if e >= MIN: no_progress += 1 if no_progress == PATIENCE: print("Early stopping!") break model = best[0] _, h0 = model.forward(data, TData.ALL) tpe = sum(times) / len(times) print("TPE: %0.4f" % tpe) return model, h0, tpe
def validation_epoch_end(self, outputs): pred = np.concatenate([out['pred'] for out in outputs]) target = np.concatenate([out['target'] for out in outputs]) with open("pred.txt", "w") as f: for s in pred: f.write(s + '\n') score = get_score(target, pred) print("\n", score) self.log('LD', score, prog_bar=True)
def target(args): r = 0 for r_, k_ in zip(args, probs): r = r + r_ * k_ result = r.topk(5, 1)[1] predict_label_and_marked_label_list = [ [_1, _2] for _1, _2 in zip(result, true_labels) ] score, _, _, _ = get_score(predict_label_and_marked_label_list) print(args, score, _ ) #list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)] return -score
def local_alignment(v, w, score_matrix): n = len(v) + 1 m = len(w) + 1 s = [[0] * m for i in range(0, n)] b = [["↯"] * m for i in range(0, n)] max_elem = {"score": 0, "i": 0, "j": 0} for i in range(1, n): for j in range(1, m): if i == n - 1 and j == m - 1: s[i][j] = max_elem["score"] vi = v[i - 1] wi = w[j - 1] s[i][j] = max( \ 0, \ s[i - 1][j] - FORFEIT, \ s[i][j - 1] - FORFEIT, \ s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix) \ ) if s[i][j] >= max_elem["score"]: max_elem["score"] = s[i][j] max_elem["i"] = i max_elem["j"] = j if s[i][j] == s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix): b[i][j] = "↖" elif s[i][j] == s[i - 1][j] - FORFEIT: b[i][j] = "↑" elif s[i][j] == s[i][j - 1] - FORFEIT: b[i][j] = "←" else: b[i][j] = "↯" words = {"v": '', "w": ''} restore_words(words, b, v, w, max_elem["i"], max_elem["j"]) words["v"] = words["v"][::-1] words["w"] = words["w"][::-1] return max_elem["score"], words
def do_versus(event): splitted = event.message.text.split(' vs. ') if len(splitted) < 2: return ranked = ((word, get_score(word.strip())) for word in splitted) rank_sorted = sorted(ranked, key=lambda m: m[1], reverse=True) message = '' for i, (word, score) in enumerate(rank_sorted): winner = '' if i > 0 else ' (winner)' message += '{}. {} ({}){}\n'.format(i+1, word, score, winner) line_bot_api.reply_message(event.reply_token, TextSendMessage(text=message))
def lstm_test(sents, tags): model = LSTM() model.load_state_dict(torch.load("blstm.pkl")) tags = vec_flat(tags) tags_p = [] for s in sents: out = model(torch.unsqueeze(a2ft(s), 0)) out = out.data.numpy() for out_i in out: max_idx = np.argmax(out_i) tags_p.append(max_idx) utils.print_score(tags, tags_p) return utils.get_score(tags, tags_p)
def target(args): r=0 for r_,k_ in enumerate(args): if r_<model_num: r +=k_*probs[r_] else: tmp=t.load(files_path[r_]).cuda().float() r=r+k_*tmp.cpu() result = r.topk(5,1)[1] predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)] score,_,_,_ = get_score(predict_label_and_marked_label_list) print (args,score,_)#list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)] return -score
def main(): with open("04/input.txt", encoding="UTF-8") as file: content = file.read() numbers = [int(number) for number in content.split("\n\n")[0].split(",")] boards = get_boards(content.split("\n\n")[1:]) for number in numbers: boards = mark_boards(boards, number) winner = check_winner(boards) if (winner is not None): score = get_score(winner, number) print(score) break
def start_message(message): bot.send_message(message.chat.id, 'Привет, ты написал мне /start') room_id = message.chat.id #bot.send_message(message.chat.id, 'Привет, ты написал мне /start') #bot.send_photo(chat_id=room_id, photo=open('images/resized-2C.png', 'rb')) player_cards = get_random_cards() score = get_score(player_cards) bot.send_message(message.chat.id, 'Вы набрали: %s!' % score, reply_markup=markup) combine_images(player_cards, 'player.png') bot.send_photo(chat_id=room_id, photo=open('player.png', 'rb')) TEST = 'ffffff' print(TEST)
def main(**kwargs): model = getattr(models, opt.model)(opt).cuda() pre_loss = 1.0 lr, lr2 = opt.lr, opt.lr2 loss_function = getattr(models, opt.loss)() best_score = 0 dataset = ZhihuData(opt.train_data_path, opt.labels_path, type_=opt.type_, augument=opt.augument) dataloader = data.DataLoader(dataset=dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.num_workers, pin_memory=True) optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay) for epoch in range(opt.max_epoch): for i, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): title, content, label = title.cuda(), content.cuda(), label.cuda() optimizer.zero_grad() score = model(title, content) loss = loss_function(score, opt.weight * label.float()) loss.backward() optimizer.step() predict = score.data.topk(5, dim=1)[1].cpu().tolist() true_target = label.data.float().cpu().topk(5, dim=1) true_label = true_target[0][:, :5] true_index = true_target[1][:, :5] predict_label_and_marked_label_list = [] for j in range(label.size(0)): true_index_ = true_index[j] true_label_ = true_label[j] true = true_index_[true_label_ > 0] predict_label_and_marked_label_list.append( predict[j], true.tolist()) score_, prec_, recall_, ss = get_score( predict_label_and_marked_label_list) scores, prec_, recall_, _ss = val(model, dataset) if scores > best_score: best_score = scores best_path = model.save(name=str(scores), new=True) if scores < best_score: model.load(best_path, change_opt=False) lr = lr * opt.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr, lr2, 0)
def max_exp_shot(env, network, device, topk_coors, turn, x): number_of_sim = 16 max_v = -99 virtual_e = 0 s = time.time() if turn != 15: # make input # virtual_inputs = torch.empty(0) list_for_cat = [] for i, v_action in enumerate(topk_coors): for _ in range(number_of_sim): v_state, _ = env.virtual_step(v_action, 0.145) list_for_cat.append( utils.to_input(v_state, turn + 1).unsqueeze(0)) # more time-consuming when cat in each iteration # virtual_inputs = torch.cat((virtual_inputs, utils.to_input(v_state, turn + 1).unsqueeze(0)), 0) virtual_inputs = torch.cat(list_for_cat, dim=0) # forward with torch.no_grad(): _, virtual_v_out = network(virtual_inputs.to(device)) virtual_v_out_prob = to_prob(virtual_v_out) # -x: because of op turn's value # 128x17 -> 128 (#sim x #topk) virtual_e = expectation(-x, virtual_v_out_prob) virtual_e = virtual_e.split(number_of_sim) virtual_e = [x.mean() for x in virtual_e] # hum.. highlight = virtual_e.index(max(virtual_e)) max_action = topk_coors[highlight] else: for i, v_action in enumerate(topk_coors): for _ in range(number_of_sim): v_state, _ = env.virtual_step(v_action, 0.145) # order is 1 because of last shot virtual_e += utils.get_score(v_state, 1) virtual_e = virtual_e / number_of_sim print(v_action, virtual_e) if max_v < virtual_e: max_v = virtual_e max_action = v_action highlight = i print(time.time() - s) return max_action, highlight
def main(**kwargs): print("@@@@@@@@@@@@@@@@@@@@@@@") inpath = 'result' outfile = 'score.csv' label_path = 'labels_enhanceA.json' test_data_path = 'test_enhanceA.npz' for k, v in kwargs.items(): print(k) print(v) if k == 'inpath': inpath = v if k == 'outfile': outfile = v if k == 'label_path': label_path = v if k == 'test_data_path': test_data_path = v print(label_path) if inpath[-1] == '/': inpath = inpath[0:-1] index2qid = np.load(test_data_path)['index2qid'].item() with open(label_path) as f: labels_info = json.load(f) qid2label = labels_info['d'] label2qid = labels_info['id2label'] files = glob.glob(inpath + '/*.pth') print(files) for file in files: f = open(outfile, 'a', encoding='utf-8') print(file) if not os.path.isfile(file): print('is path') continue r = t.load(file) true_labels = [] for ii in range(len(r)): true_labels.append(qid2label[index2qid[ii]]) tmp = r result = (tmp).topk(5, 1)[1] predict_label_and_marked_label_list = [ [_1, _2] for _1, _2 in zip(result, true_labels) ] score, _, _, ss = get_score(predict_label_and_marked_label_list) print(score) print(ss) writer = csv.writer(f) writer.writerow([file, str(score)]) f.close()
def val(model, dataset): ''' 计算模型在验证集上的分数a ''' dataset.train(False) model.eval() dataloader = data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) predict_label_and_marked_label_list = [] for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)): title,content,label = Variable(title.cuda(),volatile=True),\ Variable(content.cuda(),volatile=True),\ Variable(label.cuda(),volatile=True) score = model(title, content) # !TODO: 优化此处代码 # 1. append # 2. for循环 # 3. topk 代替sort predict = score.data.topk(5, dim=1)[1].cpu().tolist() true_target = label.data.float().topk(5, dim=1) true_index = true_target[1][:, :5] true_label = true_target[0][:, :5] tmp = [] for jj in range(label.size(0)): true_index_ = true_index[jj] true_label_ = true_label[jj] true = true_index_[true_label_ > 0] tmp.append((predict[jj], true.tolist())) predict_label_and_marked_label_list.extend(tmp) del score dataset.train(True) model.train() scores, prec_, recall_, _ss = get_score( predict_label_and_marked_label_list) return (scores, prec_, recall_, _ss)
def test(model: nn.Module, data: TData, h0: torch.Tensor): pred = 1 if model.pred else 0 model.eval() with torch.no_grad(): zs, _ = model.forward(data, TData.ALL, h0=h0) scores = torch.cat([ model.decode(data.eis[i + pred][0], data.eis[i + pred][1], zs[i]) for i in range(data.T - pred) ], dim=0) y = torch.cat(data.ys[pred:]) y_hat = torch.zeros(scores.size(0)) y_hat[scores <= model.cutoff] = 1 tpr = y_hat[y == 1].mean() * 100 fpr = y_hat[y == 0].mean() * 100 tp = y_hat[y == 1].sum() fp = y_hat[y == 0].sum() fn = (y == 1).sum() - tp f1 = tp / (tp + 0.5 * (fp + fn)) pscore = scores[y == 0] nscore = scores[y == 1] auc, ap = get_score(pscore, nscore) print("TPR: %0.2f, FPR: %0.2f" % (tpr, fpr)) print("TP: %d FP: %d" % (tp, fp)) print("F1: %0.8f" % f1) print("AUC: %0.4f" % auc) print("AP: %0.8f" % ap) return { 'tpr': tpr, 'fpr': fpr, 'tp': tp, 'fp': fp, 'auc': auc, 'ap': ap, 'f1': f1 }
def target(args): weight = args aaa = t.ones(1999) for ii,_ in enumerate(large_index): aaa[_] = args[ii] # aaa[0],aaa[1],aaa[2],aaa[3],aaa[4] = args weight = aaa.view(1,-1).expand(200000,1999) r2 = weight*(r.float()) result = r2.topk(5,1)[1] predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)] score,_,_,_ = get_score(predict_label_and_marked_label_list) # if score>previous_best_score: print score # previous_best_score = score # with open(str(score) ,'wb') as f: # pickle.dump(args,f) return -score
def get_score(self, pos, time): """ >>> w = RectangularWorld('../test/') >>> w.get_score(np.array([2.1,2.9]), 0) 0.0 """ if self.full_noise_file: return utils.get_score(pos, time, self.noise_location, self.pos_limits, self.noise_line_width) else: if self.edge_goal: return utils.wall_score(pos, self.centers[time], self.center_radius, self.pos_limits, self.shape) else: return utils.calculate_score(pos, self.centers[time], self.center_radius, self.pos_limits, self.shape)
def main(vocab_file, embeddings_file, pretrained_file, max_length=50, gpu_index=0, batch_size=128): device = torch.device( "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for testing ", 20 * "=") if platform == "linux" or platform == "linux2": checkpoint = torch.load(pretrained_file) else: checkpoint = torch.load(pretrained_file, map_location=device) # Retrieving model parameters from checkpoint. embeddings = load_embeddings(embeddings_file) print("\t* Loading test data...") # test_data = LCQMC_Dataset(test_file, vocab_file, max_length) # test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size) print("\t* Building model...") model = SiaGRU(embeddings, device=device).to(device) model.load_state_dict(checkpoint["model"]) print(20 * "=", " Testing SiaGRU model on device: {} ".format(device), 20 * "=") database = [ line for line in open('./data/rumors.txt', 'r', encoding='utf-8') ] while True: input("enter to continue") inputs = [ line for line in open('./data/input.txt', 'r', encoding='utf-8') ] init_csv(inputs, database, './data/work_data.csv') dataset = LCQMC_Dataset('./data/work_data.csv', vocab_file, max_length) dataloader = DataLoader(dataset, shuffle=False, batch_size=batch_size) prob = get_score(model, dataloader) for i, p in enumerate(prob): if p > 0.5: print("text:", inputs[i // len(database)]) print("rumor:", database[i % len(database)]) print("prob:", p)
def on_epoch_end(self, epoch, logs): # Validation start_proba, end_proba = get_proba_prediction(self.model, self.word_ids, self.mask, self.segm_ids) current = get_score(start_proba, end_proba, self.df, self.sample_ind2new_ind2old_ind) # Save best model if current > self.best: self.best = current self.model.save_weights(self.best_weights_path, overwrite=True) # Log score info abs_epoch = self.start_epoch + epoch with open(self.log_path, 'a') as f: f.write( f'\n[fold: {self.n_fold}, epoch: {abs_epoch}] Val Score : {current:.5f} (time: {(time.time() - self.checkpoint)// 60 } min.)' ) self.checkpoint = time.time()
def match(args): num_of_game, n, model_file_name, order, opponent = args device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device) # model_file_name = "zero_final_0" load_model(model, model_file_name) if opponent is not None: model2 = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device) load_model(model2, opponent) # num_of_game = 1000 num_of_win = 0 # order = 0 p_bar = trange(num_of_game, position=n) for i in p_bar: state = np.zeros((1, 32)) for turn in range(8): if turn % 2 == order: state_plane = coordinates_to_plane(state, turn, order).to(device) prob, _ = model(state_plane) action = best_shot_parm(prob) elif opponent is not None: state_plane = coordinates_to_plane(state, turn, (order+1) % 2).to(device) prob, _ = model2(state_plane) action = best_shot_parm(prob) else: # action = (2.375, 4.88, random.randint(0, 1)) action = (random.random() * 4.75, random.random() * 11.28, random.randint(0, 1)) state = sim.simulate(state, turn, action[0], action[1], action[2], 0.145)[0] if get_score(state, order) > 0: num_of_win += 1 p_bar.set_description("%.3f" % (num_of_win/(i+1))) return num_of_win / num_of_game
def minimax(state, max_player, depth): if is_end_state(state) or depth == 0: # base case score = get_score(state, not max_player) # get_score with previous player return score else: states = [] scores = [] # populate moves and scores player = get_player(max_player) for child_state in get_possible_states(state, player): score = minimax(child_state, not max_player, depth - 1) scores.append(score) states.append(child_state) # based on the player, choose the best move if max_player: max_score_index = scores.index(max(scores)) choice['choice'] = states[max_score_index] return scores[max_score_index] else: min_score_index = scores.index(min(scores)) choice['choice'] = states[min_score_index] return scores[min_score_index]
def main(): with open("04/input.txt", encoding="UTF-8") as file: content = file.read() numbers = [int(number) for number in content.split("\n\n")[0].split(",")] boards = get_boards(content.split("\n\n")[1:]) winners = list() winning_number = 0 for number in numbers: boards = mark_boards(boards, number) current_winners = check_winners(boards) if (len(current_winners) > 0): winning_number = number winners.extend(current_winners) # Remove winners from boards for winner in current_winners: boards = list(filter(lambda x: not np.array_equal(x, winner), boards)) last_winner = winners[-1] score = get_score(last_winner, winning_number) print(score)
def run_model(): """ Implementa y ejecuta el MLP """ logging.info("Corriendo modelo...") # Un modelo independiente para cada salida target_name = ['arriendo', 'profesional', 'tipo_construccion_id'] perceptron_clf_dict = {} for target in target_name: logging.info( "Entrenando el modelo para la salida {}...".format(target)) perceptron_clf_dict[target] = MLPClassifier(activation='logistic', solver='lbfgs', hidden_layer_sizes=(100, ), max_iter=1000) perceptron_clf_dict[target].fit(train_tfidf, train_data[target]) print('Precisión del modelo para la salida {}: {:.3f}'.format( target, perceptron_clf_dict[target].score(test_tfidf, test_data[target]))) joint_accuracy = get_score( test_tfidf, test_data[['arriendo', 'profesional', 'tipo_construccion_id']], perceptron_clf_dict) print('Precisión del modelo: {:.3f}\n'.format(joint_accuracy)) # Se realizan las predicciones if os.path.isfile(args.predict): predict(classifiers=perceptron_clf_dict, filename=args.predict, data_object=data_object) elif args.predict != '': logging.error('El archivo {} no existe.'.format(args.predict))
def train(**kwargs): # ---------------------- 更新参数 ---------------------- opt = DefaultConfig() opt.update(**kwargs) opt.printf() # ---------------------- 数据处理 ---------------------- # 获取数据 train1, train2 = get_train_data(opt) # 获取样本 # train_sample = get_sample(train1, train2, load=True) # 获取特征 # train_feat = get_feat(train1, train_sample) # 获取标签 # train_all = get_label(train_feat, opt) # gc.collect() # train_all.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf', 'w', complib='blosc', complevel=5) train_all = pd.read_hdf( '/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf') print(train_all.shape) # 取出需要用的特征 # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl' # gbm, use_feat = load_model(opt) # predictors_100 = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()}) # predictors_100 = predictors_100.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100] # use_feat = list(predictors_100) + ['orderid', 'geohashed_end_loc', 'label'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] # train_all = train_all[use_feat] # gc.collect() # -------------------- 训练第一层 ------------------------ # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义使用哪些特征 # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl' # gbm, use_feat = load_model(opt) filters = set([ 'orderid', 'userid', 'biketype', 'geohashed_start_loc', 'bikeid', 'starttime', 'geohashed_end_loc', 'label' ]) predictors = list( filter(lambda x: x not in filters, train_all.columns.tolist())) # predictors = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()}) # predictors = predictors.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100] # use_feat = list(predictors) + ['orderid', 'geohashed_end_loc'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] # predictors = list(predictors_100) + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] print('使用的特征:{}维\n'.format(len(predictors)), predictors) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # ********* LightGBM ********* # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # 配置 params = { 'objective': 'binary', 'metric': {'auc', 'binary_logloss'}, 'is_unbalance': True, 'num_leaves': opt['lgb_leaves'], 'learning_rate': opt['lgb_lr'], 'feature_fraction': 0.886, 'bagging_fraction': 0.886, 'bagging_freq': 5 } gc.collect() # ********** 开始训练 ********* gbm1 = lgb.train(params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5) gc.collect() # # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') # save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb', cur_time, score[0]) save_path = '{}/{}_{}.pkl'.format(opt['model_dir'], 'lgb', cur_time) with open(save_path, 'wb') as fout: pickle.dump(gbm1, fout) print('保存模型:', save_path) gc.collect() # # ********* 评估 ********* # # 在训练集上看效果 del X_train, y_train, X_val, y_val gc.collect() score = get_score(train_all, predictors, gbm1, opt) print('训练集分数:{}'.format(score)) import sys sys.exit(0) # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_1_300_top25') # with open(save_path, 'wb') as fout: # pickle.dump(gbm1, fout) # print('保存模型(第一层):', save_path) # ********* save predict ***** # train_all[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/train2324_80_pred_res.hdf', 'w', complib='blosc', complevel=5) # print('Save train_pred_res.hdf successful!!!') # import sys # sys.exit(0) # -------------------- 训练第二层 ------------------------ # opt['model_name'] = 'lgb_1_300_top25.pkl' # gbm1, use_feat1 = load_model(opt) # train_all.loc[:, 'pred'] = gbm1.predict(train_all[use_feat1]) # 去掉重要性较低的特征,筛选出排名前十的候选样本,重新训练模型(后期可以载入模型finetune, # 尤其是对于样本量较少的情况,甚至可以选前5, # 但15可以覆盖99.5%的原始label,10可以覆盖98%的原始label, # 这两者可能会好一些,备选方案:5(+finetune),10(+finetune),15(+finetune)) predictors = pd.DataFrame( data={ 'feature_name': gbm1.feature_name(), 'feature_importance': gbm1.feature_importance() }) predictors = predictors[ predictors['feature_importance'] > 0]['feature_name'].values print('第二层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values( by=['orderid', 'pred'], ascending=False).groupby('orderid').head(15) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第二层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm2 = lgb.train(params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm1 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm2, opt) print('训练集分数(第二层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_2', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm2, fout) print('保存模型(第二层):', save_path) # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_2_300_top15') # with open(save_path, 'wb') as fout: # pickle.dump(gbm2, fout) # print('保存模型(第二层):', save_path) import sys sys.exit(0) # -------------------- 训练第三层 ------------------------ # 筛选出排名前五的候选样本 predictors = pd.DataFrame( data={ 'feature_name': gbm2.feature_name(), 'feature_importance': gbm2.feature_importance() }) predictors = predictors[ predictors['feature_importance'] > 0]['feature_name'].values print('第三层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values( by=['orderid', 'pred'], ascending=False).groupby('orderid').head(10) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第三层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm3 = lgb.train(params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm2 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm3, opt) print('训练集分数(第三层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_3', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm3, fout) print('保存模型(第三层):', save_path) save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_3_300_top10') with open(save_path, 'wb') as fout: pickle.dump(gbm3, fout) print('保存模型(第三层):', save_path) # -------------------- 训练第四层 ------------------------ # 筛选出排名前三的候选样本 predictors = pd.DataFrame( data={ 'feature_name': gbm3.feature_name(), 'feature_importance': gbm3.feature_importance() }) predictors = predictors[ predictors['feature_importance'] > 0]['feature_name'].values print('第四层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values( by=['orderid', 'pred'], ascending=False).groupby('orderid').head(5) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第四层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm4 = lgb.train(params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm3 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm4, opt) print('训练集分数(第四层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_4', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm4, fout) print('保存模型(第四层):', save_path) save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_4_300_top5') with open(save_path, 'wb') as fout: pickle.dump(gbm4, fout) print('保存模型(第四层):', save_path)
def roc_score(clf, data, labels): predictions = get_score(clf, data) return metrics.roc_auc_score(labels, predictions)
def train(**kwargs): # ---------------------- 更新参数 ---------------------- opt = DefaultConfig() opt.update(**kwargs) opt.printf() # ---------------------- 数据处理 ---------------------- # 获取数据 # train1, train2 = get_train_data(opt) # 获取样本 # train_sample = get_sample(train1, train2, load=True) # 获取特征 # train_feat = get_feat(train1, train_sample) # 获取标签 # train_all = get_label(train_feat, opt) # gc.collect() # train_all.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf', 'w', complib='blosc', complevel=5) train_all = pd.read_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf') print(train_all.shape) # 取出需要用的特征 # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl' # gbm, use_feat = load_model(opt) # predictors_100 = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()}) # predictors_100 = predictors_100.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100] # use_feat = list(predictors_100) + ['orderid', 'geohashed_end_loc', 'label'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] # train_all = train_all[use_feat] # gc.collect() # -------------------- 训练第一层 ------------------------ # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义使用哪些特征 # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl' # gbm, use_feat = load_model(opt) filters = set(['orderid', 'userid', 'biketype', 'geohashed_start_loc', 'bikeid', 'starttime', 'geohashed_end_loc', 'label']) predictors = list(filter(lambda x: x not in filters, train_all.columns.tolist())) # predictors = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()}) # predictors = predictors.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100] # use_feat = list(predictors) + ['orderid', 'geohashed_end_loc'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] # predictors = list(predictors_100) + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate'] print('使用的特征:{}维\n'.format(len(predictors)), predictors) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # ********* LightGBM ********* # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # 配置 params = { 'objective': 'binary', 'metric': {'auc', 'binary_logloss'}, 'is_unbalance': True, 'num_leaves': opt['lgb_leaves'], 'learning_rate': opt['lgb_lr'], 'feature_fraction': 0.886, 'bagging_fraction': 0.886, 'bagging_freq': 5 } gc.collect() # ********** 开始训练 ********* gbm1 = lgb.train( params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 ) gc.collect() # # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') # save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb', cur_time, score[0]) save_path = '{}/{}_{}.pkl'.format(opt['model_dir'], 'lgb', cur_time) with open(save_path, 'wb') as fout: pickle.dump(gbm1, fout) print('保存模型:', save_path) gc.collect() # # ********* 评估 ********* # # 在训练集上看效果 del X_train, y_train, X_val, y_val gc.collect() score = get_score(train_all, predictors, gbm1, opt) print('训练集分数:{}'.format(score)) import sys sys.exit(0) # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_1_300_top25') # with open(save_path, 'wb') as fout: # pickle.dump(gbm1, fout) # print('保存模型(第一层):', save_path) # ********* save predict ***** # train_all[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/train2324_80_pred_res.hdf', 'w', complib='blosc', complevel=5) # print('Save train_pred_res.hdf successful!!!') # import sys # sys.exit(0) # -------------------- 训练第二层 ------------------------ # opt['model_name'] = 'lgb_1_300_top25.pkl' # gbm1, use_feat1 = load_model(opt) # train_all.loc[:, 'pred'] = gbm1.predict(train_all[use_feat1]) # 去掉重要性较低的特征,筛选出排名前十的候选样本,重新训练模型(后期可以载入模型finetune,尤其是对于样本量较少的情况,甚至可以选前5,但15可以覆盖99.5%的原始label,10可以覆盖98%的原始label,这两者可能会好一些,备选方案:5(+finetune),10(+finetune),15(+finetune)) predictors = pd.DataFrame(data={'feature_name': gbm1.feature_name(), 'feature_importance': gbm1.feature_importance()}) predictors = predictors[predictors['feature_importance']>0]['feature_name'].values print('第二层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(15) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第二层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm2 = lgb.train( params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm1 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm2, opt) print('训练集分数(第二层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_2', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm2, fout) print('保存模型(第二层):', save_path) # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_2_300_top15') # with open(save_path, 'wb') as fout: # pickle.dump(gbm2, fout) # print('保存模型(第二层):', save_path) import sys sys.exit(0) # -------------------- 训练第三层 ------------------------ # 筛选出排名前五的候选样本 predictors = pd.DataFrame(data={'feature_name': gbm2.feature_name(), 'feature_importance': gbm2.feature_importance()}) predictors = predictors[predictors['feature_importance']>0]['feature_name'].values print('第三层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(10) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第三层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm3 = lgb.train( params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm2 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm3, opt) print('训练集分数(第三层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_3', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm3, fout) print('保存模型(第三层):', save_path) save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_3_300_top10') with open(save_path, 'wb') as fout: pickle.dump(gbm3, fout) print('保存模型(第三层):', save_path) # -------------------- 训练第四层 ------------------------ # 筛选出排名前三的候选样本 predictors = pd.DataFrame(data={'feature_name': gbm3.feature_name(), 'feature_importance': gbm3.feature_importance()}) predictors = predictors[predictors['feature_importance']>0]['feature_name'].values print('第四层使用的特征:{}维\n'.format(len(predictors)), predictors) train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(5) # train_all = rank(train_all, 'orderid', 'pred', ascending=False) del train_all['pred'] print('第四层数据:', train_all.shape) # ********* 准备数据 ********** # 划分验证集 train, val = train_test_split(train_all, test_size=0.1) # 定义数据集 X_train = train[predictors] y_train = train['label'] X_val = val[predictors] y_val = val['label'] del train, val gc.collect() # 数据集 lgb_train = lgb.Dataset(X_train, y_train) lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train) # ********** 开始训练 ********* gbm4 = lgb.train( params, lgb_train, num_boost_round=1200, valid_sets=[lgb_train, lgb_val], early_stopping_rounds=5 # init_model=gbm3 # finetune ) # ********* 评估 ********* # 在训练集上看效果 score = get_score(train_all, predictors, gbm4, opt) print('训练集分数(第四层):{}'.format(score)) # ********* 保存模型 ********* cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S') save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_4', cur_time, score[0]) with open(save_path, 'wb') as fout: pickle.dump(gbm4, fout) print('保存模型(第四层):', save_path) save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_4_300_top5') with open(save_path, 'wb') as fout: pickle.dump(gbm4, fout) print('保存模型(第四层):', save_path)
def index(): return render_template("index.html", left=get_score("left"), right=get_score("right"))