Exemple #1
0
def test(model, data, te_starts=3):
    model.eval()
    with torch.no_grad():
        zs, preds = model(data, TData.TR)

    ps, ns = get_sample(data, TData.TE, nsize=1)
    sscore = model.static.score(ps[-te_starts:], ns[-te_starts:],
                                zs[-te_starts:])

    dscore = model.dynamic.score(ps[-te_starts - 1:], ns[-te_starts - 1:],
                                 preds[-te_starts - 1:])

    dnp, dnn, _ = dynamic_new_link_prediction(data, data.te, preds)
    dnscore = model.dynamic.score(dnp[-te_starts - 1:], dnn[-te_starts - 1:],
                                  preds[-te_starts - 1:])

    sscore = get_score(*sscore)
    dscore = get_score(*dscore)
    dnscore = get_score(*dnscore)

    print('''Scores:
        Static AUC: %0.6f\tAP: %0.6f
        Dynamic AUC: %0.6f\tAP: %0.6f
        Dyn New AUC: %0.6f\tAP: %0.6f
    ''' % (sscore[0], sscore[1], dscore[0], dscore[1], dnscore[0], dnscore[1]))

    return {
        'static_auc': sscore[0],
        'static_ap': sscore[1],
        'dyn_auc': dscore[0],
        'dyn_ap': dscore[1],
        'new_auc': dnscore[0],
        'new_ap': dnscore[1]
    }
def global_alignment(v, w, score_matrix):
    n = len(v) + 1
    m = len(w) + 1
    s = [[0] * m for i in range(0, n)]
    b = [['?'] * m for i in range(0, n)]

    for i in range (1, n):
        s[i][0] = s[i - 1][0] - FORFEIT
    for j in range (1, m):
        s[0][j] = s[0][j - 1] - FORFEIT

    for i in range(1, n):
        for j in range(1, m):
            vi = v[i - 1]
            wi = w[j - 1]
            s[i][j] = max( \
                s[i - 1][j] - FORFEIT, \
                s[i][j - 1] - FORFEIT, \
                s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix) \
            )
            if s[i][j] == s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix):
                b[i][j] = "↖"
            elif s[i][j] == s[i - 1][j] - FORFEIT:
                b[i][j] = "↑"
            else:
                b[i][j] = "←"

    words = {"v": '', "w": ''}
    restore_words(words, b, v, w, n - 1, m - 1)
    words["v"] = words["v"][::-1]
    words["w"] = words["w"][::-1]
    score = s[n - 1][m - 1]
    return score, words
Exemple #3
0
def fit_model(X, labels):
    sgd = SGDClassifier(loss="log", max_iter=300,tol=1e-3,class_weight="balanced")
    model = OneVsRestClassifier(sgd, n_jobs=1)
    model.fit(X, labels)
    train_pred = model.predict_proba(X)
    train_pred = train_pred > 0.4
    #train_pred = model.predict(X)
    utils.get_score(train_pred, labels)
    return model
Exemple #4
0
def test_get_score():
    logger.debug('Testing get_score()')
    state = [['X', 'X', 'O'],
             ['O', 'O', 'X'],
             ['X', 'O', 'X']]
    assert (get_score(state, True) == 0)
    assert (get_score(state, False) == 0)
    state = [['X', None, None],
             ['O', 'X', None],
             [None, 'O', 'X']]
    assert (get_score(state, True) == 10)
    state = [[None, 'X', 'O'],
             [None, 'X', None],
             [None, 'O', 'X']]
    assert (get_score(state, True) == 0)
    assert (get_score(state, False) == 0)
    state = [['O', None, 'X'],
             [None, None, 'X'],
             [None, 'O', 'X']]
    assert (get_score(state, True) == 10)
    assert (get_score(state, False) == 0)
    state = [['X', None, 'O'],
             [None, None, 'O'],
             [None, 'X', 'O']]
    assert (get_score(state, True) == 0)
    assert (get_score(state, False) == -10)
    logger.debug('Passed')
Exemple #5
0
def direction(direction):
    direction = direction.lower()
    if direction not in ["left", "right"]:
        return "Unknown direction {0}".format(direction), 404

    if request.method == "GET":
        if direction == "left":
            return jsonify({"left": get_score("left")})
        else:
            return jsonify({"right": get_score("right")})
    else:
        incr_score(str(direction))

    return "ok"
Exemple #6
0
def direction(direction):
    direction = direction.lower()
    if direction not in ['left', 'right']:
        return 'Unknown direction {0}'.format(direction), 404

    if request.method == 'GET':
        if direction == 'left':
            return jsonify({'left': get_score('left')})
        else:
            return jsonify({'right': get_score('right')})
    else:
        incr_score(str(direction))

    return 'ok'
Exemple #7
0
def train(model, data):
    opt = Adam(model.parameters(), lr=TR_PARAMS['lr'])
    val_best = (0, None)
    no_change = 0

    for e in range(TR_PARAMS['epochs']):
        model.train()
        opt.zero_grad()
        zs, preds = model.forward(data, TData.TR)
        ps, ns = get_sample(data, TData.TR)

        loss = model.loss_fn(ps, ns, zs, preds)
        print("[%d] Loss: %0.6f" % (e, loss.item()), end='')

        loss.backward()
        opt.step()

        with torch.no_grad():
            zs, preds = model.forward(data, TData.VA)
            ps, ns = get_sample(data, TData.VA)

            sscore, dscore = model.score(ps, ns, zs, preds)
            #loss = model.loss_fn(ps, ns, zs, preds).item()
            sscore = get_score(*sscore)
            dscore = get_score(*dscore)

            score = sum(sscore + dscore) / 4.0
            star = '*' if score > val_best[0] else ''

            #print('''\tEval loss: %0.6f%s
            #Static:
            #    AUC: %0.6f\tAP: %0.6f
            #Dynamic Eval:
            #    AUC: %0.6f\tAP: %0.6f
            #
            #''' % (loss, star, sscore[0], sscore[1], dscore[0], dscore[1]))
            print("\tEval score: %0.6f%s" % (score, star))

            if score < val_best[0]:
                no_change += 1
                if no_change > TR_PARAMS['patience']:
                    print("Early stopping!")
                    break

            else:
                no_change = 0
                val_best = (score, deepcopy(model))

    return val_best[1]
Exemple #8
0
def test_default(**kwargs):
    import utils
    result = None
    testfile = test_theory
    options = [clingo,bk,ex,testfile,exmpl_constr,'0 --asp09']
    command = ' '.join(options)
    out = os.popen(command).read().split('. ')
    if out[0].strip() == 'UNSATISFIABLE':
        result = False
    else:    
        out = set([x.strip() for x in out if not x.strip() == ''])
        out = filter(lambda x: 'OPTIMUM FOUND' not in x,out)
        if list(out) == [] :
            result = True
        elif all('posCovered' in x or 'negsCovered' in x for x in out):
            (_,_,score) = utils.get_score(out)
            if score == gl.current_example_object.positive_count:
                return True
            else:
                return False     
        else:
            raise excps.HypothesisTestingException('ASP reasoner returned %s'%(' '.join(out)),gl.logger) 
    if 'last_seen' in kwargs:
        if result:
            print(gl.current_example,'ok')
        else:
            print(gl.current_example,'Not ok!')
        i = kwargs['last_seen']
        utils.get_example(i)   
    return result
Exemple #9
0
def test_default(**kwargs):
    import utils
    result = None
    testfile = test_theory
    options = [clingo,bk,ex,testfile,exmpl_constr,'0 --asp09']
    command = ' '.join(options)
    out = os.popen(command).read().split('. ')
    if out[0].strip() == 'UNSATISFIABLE':
        result = False
    else:    
        out = set([x.strip() for x in out if not x.strip() == ''])
        out = filter(lambda x: 'OPTIMUM FOUND' not in x,out)
        if list(out) == [] :
            result = True
        elif all('posCovered' in x or 'negsCovered' in x for x in out):
            (_,_,score) = utils.get_score(out)
            if score == gl.current_example_object.positive_count:
                return True
            else:
                return False     
        else:
            raise excps.HypothesisTestingException('ASP reasoner returned %s'%(' '.join(out)),gl.logger) 
    if 'last_seen' in kwargs:
        if result:
            print(gl.current_example,'ok')
        else:
            print(gl.current_example,'Not ok!')
        i = kwargs['last_seen']
        utils.get_example(i)   
    return result
def option_V():
    """
	This option allows the user to record the votes for the project.
	"""
    project_name = str(input("\nEnter the project name: "))
    project = project_dict[project_name]
    print("There are {} team members".format(project.n_members))
    for member in project.team_members.values():
        while True:
            print("\n\tEnter {}'s votes, points must add up to 100:".format(
                member.name))
            for team_member in project.get_members_name():
                if team_member != member.name:
                    points = utils.get_score(member.name, team_member)
                    member.votes[team_member] = points

            if sum(member.votes.values()) != 100:
                print("\t\tError!!! The points must add up to 100.")
                continue
            else:
                break

    project.calculate_point_allocation()

    main()
def target(args):
        w1,w2,w3 = args
        r = a + b*w1 +c*w2 + d*w3
        result = r.topk(5,1)[1]
        predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)]
        score,_,_,_ = get_score(predict_label_and_marked_label_list)
        print (args,score,_)#list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)]
        return -score
Exemple #12
0
def train(model: nn.Module, data: TData):
    opt = Adam(model.parameters(), lr=0.001)

    best = (None, 0)
    no_progress = 0
    times = []

    for e in range(EPOCHS):
        model.train()
        opt.zero_grad()

        start_t = time()
        zs, _ = model.forward(data, TData.TRAIN)

        p = [data.ei_masked(TData.TRAIN, i) for i in range(data.T)]
        n = data.get_negative_edges(TData.TRAIN, nratio=10)
        loss = model.calc_loss(p, n, zs)

        loss.backward()
        opt.step()
        elapsed = time() - start_t
        times.append(elapsed)

        print("[%d] Loss: %0.4f\t%0.4fs" % (e, loss.item(), elapsed))

        model.eval()
        with torch.no_grad():
            zs, _ = model.forward(data, TData.TRAIN)

            p = [data.ei_masked(TData.VAL, i) for i in range(data.T)]
            n = data.get_negative_edges(TData.VAL, nratio=10)
            p, n = model.calc_scores(p, n, zs)

            auc, ap = get_score(p, n)
            print("\tVal  AUC: %0.4f  AP: %0.4f" % (auc, ap), end='')

            tot = auc + ap
            if tot > best[1]:
                best = (deepcopy(model), tot)
                print("*")
            else:
                print()
                if e >= MIN:
                    no_progress += 1

            if no_progress == PATIENCE:
                print("Early stopping!")
                break

    model = best[0]
    _, h0 = model.forward(data, TData.ALL)
    tpe = sum(times) / len(times)
    print("TPE: %0.4f" % tpe)

    return model, h0, tpe
Exemple #13
0
    def validation_epoch_end(self, outputs):

        pred = np.concatenate([out['pred'] for out in outputs])
        target = np.concatenate([out['target'] for out in outputs])

        with open("pred.txt", "w") as f:
            for s in pred:
                f.write(s + '\n')
        score = get_score(target, pred)
        print("\n", score)
        self.log('LD', score, prog_bar=True)
Exemple #14
0
def target(args):
    r = 0
    for r_, k_ in zip(args, probs):
        r = r + r_ * k_
    result = r.topk(5, 1)[1]
    predict_label_and_marked_label_list = [
        [_1, _2] for _1, _2 in zip(result, true_labels)
    ]
    score, _, _, _ = get_score(predict_label_and_marked_label_list)
    print(args, score, _
          )  #list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)]
    return -score
Exemple #15
0
def local_alignment(v, w, score_matrix):
    n = len(v) + 1
    m = len(w) + 1
    s = [[0] * m for i in range(0, n)]
    b = [["↯"] * m for i in range(0, n)]

    max_elem = {"score": 0, "i": 0, "j": 0}

    for i in range(1, n):
        for j in range(1, m):
            if i == n - 1 and j == m - 1:
                s[i][j] = max_elem["score"]
            vi = v[i - 1]
            wi = w[j - 1]
            s[i][j] = max( \
                0, \
                s[i - 1][j] - FORFEIT, \
                s[i][j - 1] - FORFEIT, \
                s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix) \
                )

            if s[i][j] >= max_elem["score"]:
                max_elem["score"] = s[i][j]
                max_elem["i"] = i
                max_elem["j"] = j

            if s[i][j] == s[i - 1][j - 1] + utils.get_score(vi, wi, score_matrix):
                b[i][j] = "↖"
            elif s[i][j] == s[i - 1][j] - FORFEIT:
                b[i][j] = "↑"
            elif s[i][j] == s[i][j - 1] - FORFEIT:
                b[i][j] = "←"
            else:
                b[i][j] = "↯"

    words = {"v": '', "w": ''}
    restore_words(words, b, v, w, max_elem["i"], max_elem["j"])
    words["v"] = words["v"][::-1]
    words["w"] = words["w"][::-1]
    return max_elem["score"], words
Exemple #16
0
def do_versus(event):
    splitted = event.message.text.split(' vs. ')
    if len(splitted) < 2:
        return

    ranked = ((word, get_score(word.strip())) for word in splitted)
    rank_sorted = sorted(ranked, key=lambda m: m[1], reverse=True)

    message = ''
    for i, (word, score) in enumerate(rank_sorted):
        winner = '' if i > 0 else ' (winner)'
        message += '{}. {} ({}){}\n'.format(i+1, word, score, winner)
    line_bot_api.reply_message(event.reply_token, TextSendMessage(text=message))
Exemple #17
0
def lstm_test(sents, tags):
    model = LSTM()
    model.load_state_dict(torch.load("blstm.pkl"))
    tags = vec_flat(tags)
    tags_p = []
    for s in sents:
        out = model(torch.unsqueeze(a2ft(s), 0))
        out = out.data.numpy()
        for out_i in out:
            max_idx = np.argmax(out_i)
            tags_p.append(max_idx)
    utils.print_score(tags, tags_p)
    return utils.get_score(tags, tags_p)
Exemple #18
0
def target(args):
    r=0
    for r_,k_ in enumerate(args):
        if r_<model_num:
            r +=k_*probs[r_]
        else:
            tmp=t.load(files_path[r_]).cuda().float()
            r=r+k_*tmp.cpu()
    result = r.topk(5,1)[1]
    predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)]
    score,_,_,_ = get_score(predict_label_and_marked_label_list)
    print (args,score,_)#list_space = [hp.uniform('a',0,1),hp.uniform('b',0,1)]
    return -score
Exemple #19
0
def main():
    with open("04/input.txt", encoding="UTF-8") as file:
        content = file.read()

    numbers = [int(number) for number in content.split("\n\n")[0].split(",")]
    boards = get_boards(content.split("\n\n")[1:])

    for number in numbers:
        boards = mark_boards(boards, number)
        winner = check_winner(boards)
        if (winner is not None):
            score = get_score(winner, number)
            print(score)
            break
Exemple #20
0
def start_message(message):
    bot.send_message(message.chat.id, 'Привет, ты написал мне /start')
    room_id = message.chat.id
    #bot.send_message(message.chat.id, 'Привет, ты написал мне /start')
    #bot.send_photo(chat_id=room_id, photo=open('images/resized-2C.png', 'rb'))
    player_cards = get_random_cards()
    score = get_score(player_cards)
    bot.send_message(message.chat.id,
                     'Вы набрали: %s!' % score,
                     reply_markup=markup)
    combine_images(player_cards, 'player.png')
    bot.send_photo(chat_id=room_id, photo=open('player.png', 'rb'))
    TEST = 'ffffff'
    print(TEST)
def main(**kwargs):
    model = getattr(models, opt.model)(opt).cuda()
    pre_loss = 1.0
    lr, lr2 = opt.lr, opt.lr2
    loss_function = getattr(models, opt.loss)()
    best_score = 0
    dataset = ZhihuData(opt.train_data_path,
                        opt.labels_path,
                        type_=opt.type_,
                        augument=opt.augument)
    dataloader = data.DataLoader(dataset=dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=opt.shuffle,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)
    optimizer = model.get_optimizer(lr, opt.lr2, opt.weight_decay)

    for epoch in range(opt.max_epoch):
        for i, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
            title, content, label = title.cuda(), content.cuda(), label.cuda()
            optimizer.zero_grad()
            score = model(title, content)
            loss = loss_function(score, opt.weight * label.float())
            loss.backward()
            optimizer.step()

            predict = score.data.topk(5, dim=1)[1].cpu().tolist()
            true_target = label.data.float().cpu().topk(5, dim=1)
            true_label = true_target[0][:, :5]
            true_index = true_target[1][:, :5]
            predict_label_and_marked_label_list = []
            for j in range(label.size(0)):
                true_index_ = true_index[j]
                true_label_ = true_label[j]
                true = true_index_[true_label_ > 0]
                predict_label_and_marked_label_list.append(
                    predict[j], true.tolist())
        score_, prec_, recall_, ss = get_score(
            predict_label_and_marked_label_list)

    scores, prec_, recall_, _ss = val(model, dataset)
    if scores > best_score:
        best_score = scores
        best_path = model.save(name=str(scores), new=True)

    if scores < best_score:
        model.load(best_path, change_opt=False)
        lr = lr * opt.lr_decay
        lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8
        optimizer = model.get_optimizer(lr, lr2, 0)
Exemple #22
0
def max_exp_shot(env, network, device, topk_coors, turn, x):
    number_of_sim = 16

    max_v = -99
    virtual_e = 0
    s = time.time()
    if turn != 15:

        # make input
        # virtual_inputs = torch.empty(0)
        list_for_cat = []
        for i, v_action in enumerate(topk_coors):
            for _ in range(number_of_sim):
                v_state, _ = env.virtual_step(v_action, 0.145)
                list_for_cat.append(
                    utils.to_input(v_state, turn + 1).unsqueeze(0))
                # more time-consuming when cat in each iteration
                # virtual_inputs = torch.cat((virtual_inputs, utils.to_input(v_state, turn + 1).unsqueeze(0)), 0)
        virtual_inputs = torch.cat(list_for_cat, dim=0)

        # forward
        with torch.no_grad():
            _, virtual_v_out = network(virtual_inputs.to(device))
        virtual_v_out_prob = to_prob(virtual_v_out)
        # -x: because of op turn's value
        # 128x17 -> 128 (#sim x #topk)
        virtual_e = expectation(-x, virtual_v_out_prob)
        virtual_e = virtual_e.split(number_of_sim)
        virtual_e = [x.mean() for x in virtual_e]

        # hum..
        highlight = virtual_e.index(max(virtual_e))
        max_action = topk_coors[highlight]

    else:
        for i, v_action in enumerate(topk_coors):
            for _ in range(number_of_sim):
                v_state, _ = env.virtual_step(v_action, 0.145)
                # order is 1 because of last shot
                virtual_e += utils.get_score(v_state, 1)

            virtual_e = virtual_e / number_of_sim
            print(v_action, virtual_e)
            if max_v < virtual_e:
                max_v = virtual_e
                max_action = v_action
                highlight = i
    print(time.time() - s)
    return max_action, highlight
Exemple #23
0
def main(**kwargs):
    print("@@@@@@@@@@@@@@@@@@@@@@@")
    inpath = 'result'
    outfile = 'score.csv'
    label_path = 'labels_enhanceA.json'
    test_data_path = 'test_enhanceA.npz'
    for k, v in kwargs.items():
        print(k)
        print(v)
        if k == 'inpath':
            inpath = v
        if k == 'outfile':
            outfile = v
        if k == 'label_path':
            label_path = v
        if k == 'test_data_path':
            test_data_path = v
    print(label_path)
    if inpath[-1] == '/':
        inpath = inpath[0:-1]
    index2qid = np.load(test_data_path)['index2qid'].item()
    with open(label_path) as f:
        labels_info = json.load(f)
    qid2label = labels_info['d']
    label2qid = labels_info['id2label']
    files = glob.glob(inpath + '/*.pth')
    print(files)
    for file in files:
        f = open(outfile, 'a', encoding='utf-8')
        print(file)
        if not os.path.isfile(file):
            print('is path')
            continue
        r = t.load(file)
        true_labels = []
        for ii in range(len(r)):
            true_labels.append(qid2label[index2qid[ii]])
        tmp = r
        result = (tmp).topk(5, 1)[1]
        predict_label_and_marked_label_list = [
            [_1, _2] for _1, _2 in zip(result, true_labels)
        ]
        score, _, _, ss = get_score(predict_label_and_marked_label_list)
        print(score)
        print(ss)
        writer = csv.writer(f)
        writer.writerow([file, str(score)])
        f.close()
Exemple #24
0
def val(model, dataset):
    '''
    计算模型在验证集上的分数a
    '''

    dataset.train(False)
    model.eval()

    dataloader = data.DataLoader(dataset,
                                 batch_size=opt.batch_size,
                                 shuffle=False,
                                 num_workers=opt.num_workers,
                                 pin_memory=True)

    predict_label_and_marked_label_list = []
    for ii, ((title, content), label) in tqdm.tqdm(enumerate(dataloader)):
        title,content,label = Variable(title.cuda(),volatile=True),\
                              Variable(content.cuda(),volatile=True),\
                              Variable(label.cuda(),volatile=True)
        score = model(title, content)
        # !TODO: 优化此处代码
        #       1. append
        #       2. for循环
        #       3. topk 代替sort

        predict = score.data.topk(5, dim=1)[1].cpu().tolist()
        true_target = label.data.float().topk(5, dim=1)
        true_index = true_target[1][:, :5]
        true_label = true_target[0][:, :5]
        tmp = []

        for jj in range(label.size(0)):
            true_index_ = true_index[jj]
            true_label_ = true_label[jj]
            true = true_index_[true_label_ > 0]
            tmp.append((predict[jj], true.tolist()))

        predict_label_and_marked_label_list.extend(tmp)
    del score

    dataset.train(True)
    model.train()

    scores, prec_, recall_, _ss = get_score(
        predict_label_and_marked_label_list)
    return (scores, prec_, recall_, _ss)
Exemple #25
0
def test(model: nn.Module, data: TData, h0: torch.Tensor):
    pred = 1 if model.pred else 0

    model.eval()
    with torch.no_grad():
        zs, _ = model.forward(data, TData.ALL, h0=h0)

        scores = torch.cat([
            model.decode(data.eis[i + pred][0], data.eis[i + pred][1], zs[i])
            for i in range(data.T - pred)
        ],
                           dim=0)

    y = torch.cat(data.ys[pred:])
    y_hat = torch.zeros(scores.size(0))
    y_hat[scores <= model.cutoff] = 1

    tpr = y_hat[y == 1].mean() * 100
    fpr = y_hat[y == 0].mean() * 100

    tp = y_hat[y == 1].sum()
    fp = y_hat[y == 0].sum()

    fn = (y == 1).sum() - tp
    f1 = tp / (tp + 0.5 * (fp + fn))

    pscore = scores[y == 0]
    nscore = scores[y == 1]
    auc, ap = get_score(pscore, nscore)

    print("TPR: %0.2f, FPR: %0.2f" % (tpr, fpr))
    print("TP: %d  FP: %d" % (tp, fp))
    print("F1: %0.8f" % f1)
    print("AUC: %0.4f" % auc)
    print("AP: %0.8f" % ap)

    return {
        'tpr': tpr,
        'fpr': fpr,
        'tp': tp,
        'fp': fp,
        'auc': auc,
        'ap': ap,
        'f1': f1
    }
Exemple #26
0
def target(args):
        weight = args
        aaa = t.ones(1999)
        for ii,_ in enumerate(large_index):
              aaa[_] = args[ii]
      #   aaa[0],aaa[1],aaa[2],aaa[3],aaa[4] = args
        weight = aaa.view(1,-1).expand(200000,1999)
        r2 = weight*(r.float())
        result = r2.topk(5,1)[1]
        predict_label_and_marked_label_list = [[_1,_2] for _1,_2 in zip(result,true_labels)]
        score,_,_,_ = get_score(predict_label_and_marked_label_list)
      #   if score>previous_best_score:
        print score
            # previous_best_score = score
            # with open(str(score) ,'wb') as f:
            #     pickle.dump(args,f)
            
        return -score
Exemple #27
0
    def get_score(self, pos, time):
        """
        >>> w = RectangularWorld('../test/')
        >>> w.get_score(np.array([2.1,2.9]), 0)
        0.0
        """

        if self.full_noise_file:
            return utils.get_score(pos, time, self.noise_location,
                                   self.pos_limits, self.noise_line_width)
        else:
            if self.edge_goal:
                return utils.wall_score(pos, self.centers[time],
                                        self.center_radius, self.pos_limits,
                                        self.shape)
            else:
                return utils.calculate_score(pos, self.centers[time],
                                             self.center_radius,
                                             self.pos_limits, self.shape)
Exemple #28
0
def main(vocab_file,
         embeddings_file,
         pretrained_file,
         max_length=50,
         gpu_index=0,
         batch_size=128):
    device = torch.device(
        "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu")
    print(20 * "=", " Preparing for testing ", 20 * "=")
    if platform == "linux" or platform == "linux2":
        checkpoint = torch.load(pretrained_file)
    else:
        checkpoint = torch.load(pretrained_file, map_location=device)
    # Retrieving model parameters from checkpoint.
    embeddings = load_embeddings(embeddings_file)
    print("\t* Loading test data...")
    # test_data = LCQMC_Dataset(test_file, vocab_file, max_length)
    # test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)
    print("\t* Building model...")
    model = SiaGRU(embeddings, device=device).to(device)
    model.load_state_dict(checkpoint["model"])
    print(20 * "=", " Testing SiaGRU model on device: {} ".format(device),
          20 * "=")

    database = [
        line for line in open('./data/rumors.txt', 'r', encoding='utf-8')
    ]

    while True:
        input("enter to continue")
        inputs = [
            line for line in open('./data/input.txt', 'r', encoding='utf-8')
        ]
        init_csv(inputs, database, './data/work_data.csv')
        dataset = LCQMC_Dataset('./data/work_data.csv', vocab_file, max_length)
        dataloader = DataLoader(dataset, shuffle=False, batch_size=batch_size)
        prob = get_score(model, dataloader)
        for i, p in enumerate(prob):
            if p > 0.5:
                print("text:", inputs[i // len(database)])
                print("rumor:", database[i % len(database)])
                print("prob:", p)
Exemple #29
0
    def on_epoch_end(self, epoch, logs):
        # Validation
        start_proba, end_proba = get_proba_prediction(self.model,
                                                      self.word_ids, self.mask,
                                                      self.segm_ids)
        current = get_score(start_proba, end_proba, self.df,
                            self.sample_ind2new_ind2old_ind)

        # Save best model
        if current > self.best:
            self.best = current
            self.model.save_weights(self.best_weights_path, overwrite=True)

        # Log score info
        abs_epoch = self.start_epoch + epoch
        with open(self.log_path, 'a') as f:
            f.write(
                f'\n[fold: {self.n_fold}, epoch: {abs_epoch}] Val Score : {current:.5f} (time: {(time.time() - self.checkpoint)// 60 } min.)'
            )
        self.checkpoint = time.time()
Exemple #30
0
def match(args):
    num_of_game, n, model_file_name, order, opponent = args
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device)

    # model_file_name = "zero_final_0"
    load_model(model, model_file_name)

    if opponent is not None:
        model2 = ResNet(ResidualBlock, [2, 2, 2, 2]).to(device)
        load_model(model2, opponent)

    # num_of_game = 1000
    num_of_win = 0
    # order = 0
    p_bar = trange(num_of_game, position=n)
    for i in p_bar:
        state = np.zeros((1, 32))
        for turn in range(8):
            if turn % 2 == order:
                state_plane = coordinates_to_plane(state, turn, order).to(device)

                prob, _ = model(state_plane)
                action = best_shot_parm(prob)
            elif opponent is not None:
                state_plane = coordinates_to_plane(state, turn, (order+1) % 2).to(device)

                prob, _ = model2(state_plane)
                action = best_shot_parm(prob)
            else:
                # action = (2.375, 4.88, random.randint(0, 1))
                action = (random.random() * 4.75, random.random() * 11.28, random.randint(0, 1))

            state = sim.simulate(state, turn, action[0], action[1], action[2], 0.145)[0]
        if get_score(state, order) > 0:
            num_of_win += 1
        p_bar.set_description("%.3f" % (num_of_win/(i+1)))

    return num_of_win / num_of_game
Exemple #31
0
 def minimax(state, max_player, depth):
     if is_end_state(state) or depth == 0:  # base case
         score = get_score(state, not max_player)  # get_score with previous player
         return score
     else:
         states = []
         scores = []
         # populate moves and scores
         player = get_player(max_player)
         for child_state in get_possible_states(state, player):
             score = minimax(child_state, not max_player, depth - 1)
             scores.append(score)
             states.append(child_state)
         #  based on the player, choose the best move
         if max_player:
             max_score_index = scores.index(max(scores))
             choice['choice'] = states[max_score_index]
             return scores[max_score_index]
         else:
             min_score_index = scores.index(min(scores))
             choice['choice'] = states[min_score_index]
             return scores[min_score_index]
def main():
    with open("04/input.txt", encoding="UTF-8") as file:
        content = file.read()

    numbers = [int(number) for number in content.split("\n\n")[0].split(",")]
    boards = get_boards(content.split("\n\n")[1:])
    winners = list()
    winning_number = 0

    for number in numbers:
        boards = mark_boards(boards, number)
        current_winners = check_winners(boards)

        if (len(current_winners) > 0):
            winning_number = number
            winners.extend(current_winners)

            # Remove winners from boards
            for winner in current_winners:
                boards = list(filter(lambda x: not np.array_equal(x, winner), boards))

    last_winner = winners[-1]
    score = get_score(last_winner, winning_number)
    print(score)
def run_model():
    """
    Implementa y ejecuta el MLP
    """
    logging.info("Corriendo modelo...")

    # Un modelo independiente para cada salida
    target_name = ['arriendo', 'profesional', 'tipo_construccion_id']
    perceptron_clf_dict = {}

    for target in target_name:
        logging.info(
            "Entrenando el modelo para la salida {}...".format(target))

        perceptron_clf_dict[target] = MLPClassifier(activation='logistic',
                                                    solver='lbfgs',
                                                    hidden_layer_sizes=(100, ),
                                                    max_iter=1000)
        perceptron_clf_dict[target].fit(train_tfidf, train_data[target])
        print('Precisión del modelo para la salida {}: {:.3f}'.format(
            target, perceptron_clf_dict[target].score(test_tfidf,
                                                      test_data[target])))

    joint_accuracy = get_score(
        test_tfidf,
        test_data[['arriendo', 'profesional',
                   'tipo_construccion_id']], perceptron_clf_dict)
    print('Precisión del modelo: {:.3f}\n'.format(joint_accuracy))

    # Se realizan las predicciones
    if os.path.isfile(args.predict):
        predict(classifiers=perceptron_clf_dict,
                filename=args.predict,
                data_object=data_object)
    elif args.predict != '':
        logging.error('El archivo {} no existe.'.format(args.predict))
Exemple #34
0
def train(**kwargs):

    # ---------------------- 更新参数 ----------------------
    opt = DefaultConfig()
    opt.update(**kwargs)
    opt.printf()

    # ---------------------- 数据处理 ----------------------

    # 获取数据
    train1, train2 = get_train_data(opt)
    # 获取样本
    # train_sample = get_sample(train1, train2, load=True)
    # 获取特征
    # train_feat = get_feat(train1, train_sample)
    # 获取标签
    # train_all = get_label(train_feat, opt)
    # gc.collect()

    # train_all.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf', 'w', complib='blosc', complevel=5)
    train_all = pd.read_hdf(
        '/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf')
    print(train_all.shape)

    # 取出需要用的特征
    # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
    # gbm, use_feat = load_model(opt)
    # predictors_100 = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
    # predictors_100 = predictors_100.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
    # use_feat = list(predictors_100) + ['orderid', 'geohashed_end_loc', 'label'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
    # train_all = train_all[use_feat]
    # gc.collect()

    # -------------------- 训练第一层 ------------------------

    # ********* 准备数据 **********
    # 划分验证集
    train, val = train_test_split(train_all, test_size=0.1)
    # 定义使用哪些特征
    # opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
    # gbm, use_feat = load_model(opt)
    filters = set([
        'orderid', 'userid', 'biketype', 'geohashed_start_loc', 'bikeid',
        'starttime', 'geohashed_end_loc', 'label'
    ])
    predictors = list(
        filter(lambda x: x not in filters, train_all.columns.tolist()))
    # predictors = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
    # predictors = predictors.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
    # use_feat = list(predictors) + ['orderid', 'geohashed_end_loc'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
    # predictors = list(predictors_100) + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
    print('使用的特征:{}维\n'.format(len(predictors)), predictors)
    # 定义数据集
    X_train = train[predictors]
    y_train = train['label']
    X_val = val[predictors]
    y_val = val['label']
    del train, val
    gc.collect()

    # ********* LightGBM *********
    # 数据集
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)
    # 配置
    params = {
        'objective': 'binary',
        'metric': {'auc', 'binary_logloss'},
        'is_unbalance': True,
        'num_leaves': opt['lgb_leaves'],
        'learning_rate': opt['lgb_lr'],
        'feature_fraction': 0.886,
        'bagging_fraction': 0.886,
        'bagging_freq': 5
    }
    gc.collect()
    # ********** 开始训练 *********
    gbm1 = lgb.train(params,
                     lgb_train,
                     num_boost_round=1200,
                     valid_sets=[lgb_train, lgb_val],
                     early_stopping_rounds=5)
    gc.collect()

    # #  ********* 保存模型 *********

    cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
    # save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb', cur_time, score[0])
    save_path = '{}/{}_{}.pkl'.format(opt['model_dir'], 'lgb', cur_time)
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm1, fout)
    print('保存模型:', save_path)
    gc.collect()

    # #  ********* 评估  *********

    # # 在训练集上看效果
    del X_train, y_train, X_val, y_val
    gc.collect()
    score = get_score(train_all, predictors, gbm1, opt)
    print('训练集分数:{}'.format(score))

    import sys
    sys.exit(0)

    # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_1_300_top25')
    # with open(save_path, 'wb') as fout:
    #     pickle.dump(gbm1, fout)
    # print('保存模型(第一层):', save_path)

    # ********* save predict *****

    # train_all[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/train2324_80_pred_res.hdf', 'w', complib='blosc', complevel=5)
    # print('Save train_pred_res.hdf successful!!!')

    # import sys
    # sys.exit(0)

    # -------------------- 训练第二层 ------------------------

    # opt['model_name'] = 'lgb_1_300_top25.pkl'
    # gbm1, use_feat1 = load_model(opt)
    # train_all.loc[:, 'pred'] = gbm1.predict(train_all[use_feat1])

    # 去掉重要性较低的特征,筛选出排名前十的候选样本,重新训练模型(后期可以载入模型finetune,
    # 尤其是对于样本量较少的情况,甚至可以选前5,
    # 但15可以覆盖99.5%的原始label,10可以覆盖98%的原始label,
    # 这两者可能会好一些,备选方案:5(+finetune),10(+finetune),15(+finetune))
    predictors = pd.DataFrame(
        data={
            'feature_name': gbm1.feature_name(),
            'feature_importance': gbm1.feature_importance()
        })
    predictors = predictors[
        predictors['feature_importance'] > 0]['feature_name'].values
    print('第二层使用的特征:{}维\n'.format(len(predictors)), predictors)
    train_all = train_all.sort_values(
        by=['orderid', 'pred'], ascending=False).groupby('orderid').head(15)
    # train_all = rank(train_all, 'orderid', 'pred', ascending=False)
    del train_all['pred']
    print('第二层数据:', train_all.shape)

    # ********* 准备数据 **********
    # 划分验证集
    train, val = train_test_split(train_all, test_size=0.1)

    # 定义数据集
    X_train = train[predictors]
    y_train = train['label']
    X_val = val[predictors]
    y_val = val['label']
    del train, val
    gc.collect()

    # 数据集
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

    # ********** 开始训练 *********
    gbm2 = lgb.train(params,
                     lgb_train,
                     num_boost_round=1200,
                     valid_sets=[lgb_train, lgb_val],
                     early_stopping_rounds=5
                     # init_model=gbm1 # finetune
                     )

    #  ********* 评估  *********

    # 在训练集上看效果
    score = get_score(train_all, predictors, gbm2, opt)
    print('训练集分数(第二层):{}'.format(score))

    #  ********* 保存模型 *********

    cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
    save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_2',
                                             cur_time, score[0])
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm2, fout)
    print('保存模型(第二层):', save_path)
    # save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_2_300_top15')
    # with open(save_path, 'wb') as fout:
    #     pickle.dump(gbm2, fout)
    # print('保存模型(第二层):', save_path)

    import sys
    sys.exit(0)

    # -------------------- 训练第三层 ------------------------

    # 筛选出排名前五的候选样本
    predictors = pd.DataFrame(
        data={
            'feature_name': gbm2.feature_name(),
            'feature_importance': gbm2.feature_importance()
        })
    predictors = predictors[
        predictors['feature_importance'] > 0]['feature_name'].values
    print('第三层使用的特征:{}维\n'.format(len(predictors)), predictors)
    train_all = train_all.sort_values(
        by=['orderid', 'pred'], ascending=False).groupby('orderid').head(10)
    # train_all = rank(train_all, 'orderid', 'pred', ascending=False)
    del train_all['pred']
    print('第三层数据:', train_all.shape)

    # ********* 准备数据 **********
    # 划分验证集
    train, val = train_test_split(train_all, test_size=0.1)

    # 定义数据集
    X_train = train[predictors]
    y_train = train['label']
    X_val = val[predictors]
    y_val = val['label']
    del train, val
    gc.collect()

    # 数据集
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

    # ********** 开始训练 *********
    gbm3 = lgb.train(params,
                     lgb_train,
                     num_boost_round=1200,
                     valid_sets=[lgb_train, lgb_val],
                     early_stopping_rounds=5
                     # init_model=gbm2 # finetune
                     )

    #  ********* 评估  *********

    # 在训练集上看效果
    score = get_score(train_all, predictors, gbm3, opt)
    print('训练集分数(第三层):{}'.format(score))

    #  ********* 保存模型 *********

    cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
    save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_3',
                                             cur_time, score[0])
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm3, fout)
    print('保存模型(第三层):', save_path)
    save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_3_300_top10')
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm3, fout)
    print('保存模型(第三层):', save_path)

    # -------------------- 训练第四层 ------------------------

    # 筛选出排名前三的候选样本
    predictors = pd.DataFrame(
        data={
            'feature_name': gbm3.feature_name(),
            'feature_importance': gbm3.feature_importance()
        })
    predictors = predictors[
        predictors['feature_importance'] > 0]['feature_name'].values
    print('第四层使用的特征:{}维\n'.format(len(predictors)), predictors)
    train_all = train_all.sort_values(
        by=['orderid', 'pred'], ascending=False).groupby('orderid').head(5)
    # train_all = rank(train_all, 'orderid', 'pred', ascending=False)
    del train_all['pred']
    print('第四层数据:', train_all.shape)

    # ********* 准备数据 **********
    # 划分验证集
    train, val = train_test_split(train_all, test_size=0.1)

    # 定义数据集
    X_train = train[predictors]
    y_train = train['label']
    X_val = val[predictors]
    y_val = val['label']
    del train, val
    gc.collect()

    # 数据集
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

    # ********** 开始训练 *********
    gbm4 = lgb.train(params,
                     lgb_train,
                     num_boost_round=1200,
                     valid_sets=[lgb_train, lgb_val],
                     early_stopping_rounds=5
                     # init_model=gbm3 # finetune
                     )

    #  ********* 评估  *********

    # 在训练集上看效果
    score = get_score(train_all, predictors, gbm4, opt)
    print('训练集分数(第四层):{}'.format(score))

    #  ********* 保存模型 *********

    cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
    save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_4',
                                             cur_time, score[0])
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm4, fout)
    print('保存模型(第四层):', save_path)
    save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_4_300_top5')
    with open(save_path, 'wb') as fout:
        pickle.dump(gbm4, fout)
    print('保存模型(第四层):', save_path)
def roc_score(clf, data, labels):
    predictions = get_score(clf, data)
    return metrics.roc_auc_score(labels, predictions)
Exemple #36
0
def train(**kwargs):

	# ---------------------- 更新参数 ----------------------
	opt = DefaultConfig()
	opt.update(**kwargs)
	opt.printf()

	# ---------------------- 数据处理 ----------------------

	# 获取数据
	# train1, train2 = get_train_data(opt)
	# 获取样本
	# train_sample = get_sample(train1, train2, load=True)
	# 获取特征
	# train_feat = get_feat(train1, train_sample)
	# 获取标签
	# train_all = get_label(train_feat, opt)
	# gc.collect()

	# train_all.to_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf', 'w', complib='blosc', complevel=5)
	train_all = pd.read_hdf('/home/xuwenchao/dyj-storage/all-feat/feat_23_24_label.hdf')
	print(train_all.shape)
    
	# 取出需要用的特征
	# opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
	# gbm, use_feat = load_model(opt)
	# predictors_100 = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
	# predictors_100 = predictors_100.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
	# use_feat = list(predictors_100) + ['orderid', 'geohashed_end_loc', 'label'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	# train_all = train_all[use_feat]
	# gc.collect()
    
	# -------------------- 训练第一层 ------------------------

	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
	# 定义使用哪些特征
	# opt['model_name'] = 'lgb_1_2017-09-15#19:50:48_0.58820.pkl'
	# gbm, use_feat = load_model(opt)
	filters = set(['orderid', 'userid', 'biketype', 'geohashed_start_loc', 'bikeid', 'starttime', 'geohashed_end_loc', 'label'])
	predictors = list(filter(lambda x: x not in filters, train_all.columns.tolist()))
	# predictors = pd.DataFrame(data={'feature_name': gbm.feature_name(), 'feature_importance': gbm.feature_importance()})
	# predictors = predictors.sort_values(by=['feature_importance'], ascending=False)['feature_name'].values[:100]
	# use_feat = list(predictors) + ['orderid', 'geohashed_end_loc'] + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	# predictors = list(predictors_100) + ['sloc_eloc_common_eloc_count', 'sloc_eloc_common_sloc_count', 'sloc_eloc_common_conn1_count', 'sloc_eloc_common_conn2_count', 'sloc_eloc_common_eloc_rate', 'sloc_eloc_common_sloc_rate', 'sloc_eloc_common_conn1_rate', 'sloc_eloc_common_conn2_rate', 'user_sloc_eloc_common_eloc_count', 'user_sloc_eloc_common_sloc_count', 'user_sloc_eloc_common_conn1_count', 'user_sloc_eloc_common_conn2_count', 'user_sloc_eloc_common_eloc_rate', 'user_sloc_eloc_common_sloc_rate', 'user_sloc_eloc_common_conn1_rate', 'user_sloc_eloc_common_conn2_rate']
	print('使用的特征:{}维\n'.format(len(predictors)), predictors)
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# ********* LightGBM *********
	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)
	# 配置
	params = {
	    'objective': 'binary',
	    'metric': {'auc', 'binary_logloss'},
	    'is_unbalance': True,
	    'num_leaves': opt['lgb_leaves'],
	    'learning_rate': opt['lgb_lr'],
	    'feature_fraction': 0.886,
	    'bagging_fraction': 0.886,
	    'bagging_freq': 5
	}
	gc.collect()    
	# ********** 开始训练 *********
	gbm1 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
	)
	gc.collect()
    
	# #  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	# save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb', cur_time, score[0])
	save_path = '{}/{}_{}.pkl'.format(opt['model_dir'], 'lgb', cur_time)
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm1, fout)
	print('保存模型:', save_path)
	gc.collect()    

	# #  ********* 评估  *********

	# # 在训练集上看效果
	del X_train, y_train, X_val, y_val
	gc.collect()    
	score = get_score(train_all, predictors, gbm1, opt)
	print('训练集分数:{}'.format(score))
    
	import sys
	sys.exit(0)
    
	# save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_1_300_top25')
	# with open(save_path, 'wb') as fout:
	#     pickle.dump(gbm1, fout)
	# print('保存模型(第一层):', save_path)

	# ********* save predict *****

	# train_all[['orderid', 'geohashed_end_loc', 'pred']].to_hdf('/home/xuwenchao/dyj-storage/train2324_80_pred_res.hdf', 'w', complib='blosc', complevel=5)
	# print('Save train_pred_res.hdf successful!!!')

	# import sys
	# sys.exit(0)
    
	# -------------------- 训练第二层 ------------------------
    
	# opt['model_name'] = 'lgb_1_300_top25.pkl'
	# gbm1, use_feat1 = load_model(opt)
	# train_all.loc[:, 'pred'] = gbm1.predict(train_all[use_feat1])

	# 去掉重要性较低的特征,筛选出排名前十的候选样本,重新训练模型(后期可以载入模型finetune,尤其是对于样本量较少的情况,甚至可以选前5,但15可以覆盖99.5%的原始label,10可以覆盖98%的原始label,这两者可能会好一些,备选方案:5(+finetune),10(+finetune),15(+finetune))
	predictors = pd.DataFrame(data={'feature_name': gbm1.feature_name(), 'feature_importance': gbm1.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第二层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(15)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第二层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm2 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm1 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm2, opt)
	print('训练集分数(第二层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_2', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm2, fout)
	print('保存模型(第二层):', save_path)
	# save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_2_300_top15')
	# with open(save_path, 'wb') as fout:
	#     pickle.dump(gbm2, fout)
	# print('保存模型(第二层):', save_path)
    
	import sys
	sys.exit(0)
    
	# -------------------- 训练第三层 ------------------------
    
	# 筛选出排名前五的候选样本
	predictors = pd.DataFrame(data={'feature_name': gbm2.feature_name(), 'feature_importance': gbm2.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第三层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(10)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第三层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm3 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm2 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm3, opt)
	print('训练集分数(第三层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_3', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm3, fout)
	print('保存模型(第三层):', save_path)
	save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_3_300_top10')
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm3, fout) 
	print('保存模型(第三层):', save_path)

    
	# -------------------- 训练第四层 ------------------------
    
	# 筛选出排名前三的候选样本
	predictors = pd.DataFrame(data={'feature_name': gbm3.feature_name(), 'feature_importance': gbm3.feature_importance()})
	predictors = predictors[predictors['feature_importance']>0]['feature_name'].values
	print('第四层使用的特征:{}维\n'.format(len(predictors)), predictors)
	train_all = train_all.sort_values(by=['orderid', 'pred'], ascending=False).groupby('orderid').head(5)
	# train_all = rank(train_all, 'orderid', 'pred', ascending=False)
	del train_all['pred']
	print('第四层数据:', train_all.shape)
    
	# ********* 准备数据 **********
	# 划分验证集
	train, val = train_test_split(train_all, test_size=0.1)
    
	# 定义数据集
	X_train = train[predictors]
	y_train = train['label']
	X_val = val[predictors]
	y_val = val['label']
	del train, val
	gc.collect()

	# 数据集
	lgb_train = lgb.Dataset(X_train, y_train)
	lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

	# ********** 开始训练 *********
	gbm4 = lgb.train(
				params,
                lgb_train,
                num_boost_round=1200,
                valid_sets=[lgb_train, lgb_val],
                early_stopping_rounds=5
                # init_model=gbm3 # finetune
	        )

	#  ********* 评估  *********

	# 在训练集上看效果    
	score = get_score(train_all, predictors, gbm4, opt)
	print('训练集分数(第四层):{}'.format(score))

	#  ********* 保存模型 *********

	cur_time = datetime.datetime.now().strftime('%Y-%m-%d#%H:%M:%S')
	save_path = '{}/{}_{}_{:.5f}.pkl'.format(opt['model_dir'], 'lgb_4', cur_time, score[0])
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm4, fout)
	print('保存模型(第四层):', save_path)
	save_path = '{}/{}.pkl'.format(opt['model_dir'], 'lgb_4_300_top5')
	with open(save_path, 'wb') as fout:
	    pickle.dump(gbm4, fout) 
	print('保存模型(第四层):', save_path)
Exemple #37
0
def index():
    return render_template("index.html", left=get_score("left"), right=get_score("right"))