def evaluation(args):
    source = pickle_load(os.path.join(args.model_path, 'source.pkl'))
    target = pickle_load(os.path.join(args.model_path, 'target.pkl'))
    target_test = pickle_load(os.path.join(args.model_path, 'target_test.pkl'))
    setting = load_setting(os.path.join(args.model_path, 'setting.yaml'))
    start_id, end_id = setting['start_id'], setting['end_id']
    type_size = setting['type_size']
    player_size = setting['player_size']
    team_size = setting['team_size']
    detail_size = setting['detail_size']
    detail_dim = setting['detail_dim']
    src_embed = setting['src_embed']
    event_size = setting['event_size']
    vocab_size = setting['vocab_size']
    trg_embed = setting['trg_embed']
    hidden = setting['hidden']
    start_id = setting['start_id']
    end_id = setting['end_id']
    class_weight = None
    mlp_layers = setting['mlp_layers']
    max_length = setting['max_length']
    dropout = setting['dropout']
    loss_weight = None
    disc_loss = setting['disc_loss']
    loss_func = setting['loss_func']
    net = setting['net']
    dataset = setting['dataset']
    numbering = setting['numbering']
    reverse_decode = setting['reverse_decode']
    home_player_tag = target.word_to_id.get(target.home_player_tag)
    away_player_tag = target.word_to_id.get(target.away_player_tag)
    home_team_tag = target.word_to_id.get(target.home_team_tag)
    away_team_tag = target.word_to_id.get(target.away_team_tag)
    test = OptaDataset(path=dataset + '.test',
                       fields={
                           'source': source,
                           'target': target_test
                       })
    test20 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=20)
    test15 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=15)
    test10 = OptaDataset(path=dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=10)

    if 'disc' in net:
        content_word_size = len(target.content_word_to_id)
    print('vocab size: {}'.format(vocab_size))
    if net == 'plain':
        model = MLPEncoder2AttentionDecoder(type_size,
                                            player_size,
                                            team_size,
                                            detail_size,
                                            detail_dim,
                                            src_embed,
                                            event_size,
                                            vocab_size,
                                            trg_embed,
                                            hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            mlp_layers,
                                            max_length,
                                            dropout,
                                            IGNORE_LABEL,
                                            reverse_decode=reverse_decode)
    elif net == 'tmpl':
        model = MLPEncoder2AttentionDecoder(type_size,
                                            player_size,
                                            team_size,
                                            detail_size,
                                            detail_dim,
                                            src_embed,
                                            event_size,
                                            vocab_size,
                                            trg_embed,
                                            hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            mlp_layers,
                                            max_length,
                                            dropout,
                                            IGNORE_LABEL,
                                            source.id_to_player,
                                            home_player_tag,
                                            away_player_tag,
                                            source.id_to_team,
                                            home_team_tag,
                                            away_team_tag,
                                            target.player_to_id,
                                            target.players,
                                            reverse_decode=reverse_decode)
    elif net == 'gate':
        model = MLPEncoder2GatedAttentionDecoder(type_size,
                                                 player_size,
                                                 team_size,
                                                 detail_size,
                                                 detail_dim,
                                                 src_embed,
                                                 event_size,
                                                 vocab_size,
                                                 trg_embed,
                                                 hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 mlp_layers,
                                                 max_length,
                                                 dropout,
                                                 IGNORE_LABEL,
                                                 reverse_decode=reverse_decode)
    elif net == 'gate-tmpl':
        model = MLPEncoder2GatedAttentionDecoder(type_size,
                                                 player_size,
                                                 team_size,
                                                 detail_size,
                                                 detail_dim,
                                                 src_embed,
                                                 event_size,
                                                 vocab_size,
                                                 trg_embed,
                                                 hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 mlp_layers,
                                                 max_length,
                                                 dropout,
                                                 IGNORE_LABEL,
                                                 source.id_to_player,
                                                 home_player_tag,
                                                 away_player_tag,
                                                 source.id_to_team,
                                                 home_team_tag,
                                                 away_team_tag,
                                                 target.player_to_id,
                                                 target.players,
                                                 reverse_decode=reverse_decode)
    elif net == 'disc':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif net == 'disc-tmpl':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif net == 'gate-disc':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif net == 'gate-disc-tmpl':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            type_size,
            player_size,
            team_size,
            detail_size,
            detail_dim,
            src_embed,
            event_size,
            vocab_size,
            content_word_size,
            trg_embed,
            hidden,
            start_id,
            end_id,
            class_weight,
            loss_weight,
            disc_loss,
            loss_func,
            mlp_layers,
            max_length,
            dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    if numbering:
        model.player_id = target.player_id
        model.team_id = target.team_id
    # load best model
    if args.gpu is not None:
        model.use_gpu(args.gpu)
    model.id_to_word = target.id_to_word
    model.load_model(os.path.join(args.model_path, 'best.model'))
    batch_size = args.batch
    src_test_iter = SequentialIterator(test.source,
                                       batch_size,
                                       None,
                                       event_size,
                                       source.fillvalue,
                                       gpu=args.gpu)
    src_test20_iter = SequentialIterator(test20.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test15_iter = SequentialIterator(test15.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test10_iter = SequentialIterator(test10.source,
                                         batch_size,
                                         None,
                                         event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    trg_test_iter = Iterator(test.target,
                             batch_size,
                             wrapper=EndTokenIdRemoval(end_id),
                             gpu=None)
    trg_test20_iter = Iterator(test20.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test15_iter = Iterator(test15.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test10_iter = Iterator(test10.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)

    with open('./dataset/player_list.json.new') as f:
        id_to_player = json.load(f)
    with open('./dataset/team_list.json.new') as f:
        id_to_team = json.load(f)

    def convert(ind, no_tag=False):
        if 'player' in ind:
            if no_tag:
                i = ind.replace('player', '')
                return id_to_player.get(i, ind)
            else:
                return ind
        elif 'team' in ind:
            if no_tag:
                i = ind.replace('team', '')
                return id_to_team.get(i, ind)
            else:
                return ind
        else:
            return ind

    if 'disc' in net:
        bleu_score, accuracy, hypotheses = evaluate_bleu_and_accuracy(
            model, src_test_iter, trg_test_iter)
        bleu_score20, _, hypotheses20 = evaluate_bleu_and_accuracy(
            model, src_test20_iter, trg_test20_iter)
        bleu_score15, _, hypotheses15 = evaluate_bleu_and_accuracy(
            model, src_test15_iter, trg_test15_iter)
        bleu_score10, _, hypotheses10 = evaluate_bleu_and_accuracy(
            model, src_test10_iter, trg_test10_iter)
    else:
        bleu_score, hypotheses = evaluate_bleu(model, src_test_iter,
                                               trg_test_iter)
        bleu_score20, hypotheses20 = evaluate_bleu(model, src_test20_iter,
                                                   trg_test20_iter)
        bleu_score15, hypotheses15 = evaluate_bleu(model, src_test15_iter,
                                                   trg_test15_iter)
        bleu_score10, hypotheses10 = evaluate_bleu(model, src_test10_iter,
                                                   trg_test10_iter)

    print('best score: {}'.format(bleu_score))
    print('best score20: {}'.format(bleu_score20))
    print('best score15: {}'.format(bleu_score15))
    print('best score10: {}'.format(bleu_score10))
    # save hypothesis
    hypotheses_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses
    ]
    hypotheses20_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses20
    ]
    hypotheses15_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses15
    ]
    hypotheses10_for_save = [
        ' '.join([convert(y, True) for y in h]) for h in hypotheses10
    ]
    references_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test.target
    ]
    references20_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test20.target
    ]
    references15_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test15.target
    ]
    references10_for_save = [
        ' '.join(convert(y, True) for y in r[0]) for r in test10.target
    ]
    TextFile(os.path.join(args.model_path, 'hypo'), hypotheses_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len20'),
             hypotheses20_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len15'),
             hypotheses15_for_save).save()
    TextFile(os.path.join(args.model_path, 'hypo_len10'),
             hypotheses10_for_save).save()
    TextFile(os.path.join('./dataset', 'ref'), references_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len20'),
             references20_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len15'),
             references15_for_save).save()
    TextFile(os.path.join('./dataset', 'ref_len10'),
             references10_for_save).save()
    # generate readable text
    result = []
    for ref, hyp in zip(test.target.data, hypotheses):
        if type(ref) == tuple:
            ref = ref[0]
        ref = ' '.join([convert(y) for y in ref]).split()
        try:
            bleu_score = sentence_bleu(
                [ref], hyp, smoothing_function=SmoothingFunction().method1)
        except:
            bleu_score = 0
        ref = ' '.join([convert(y, True) for y in ref]).split()
        hyp = ' '.join([convert(y, True) for y in hyp]).split()
        result.append((' '.join(ref), ' '.join(hyp), bleu_score))
    inputs = []
    for xs in test20.source.data:
        data = []
        for x in xs[:5]:
            event = event_type_mapper.get(x[0], x[0])
            player = id_to_player.get(str(x[1]), x[1])
            team = id_to_team.get(str(x[2]), x[2])
            detail = ','.join(
                [qualifier_type_mapper.get(i[-1], i[-1]) for i in x[-1]])
            data.append('event: {} player: {} team: {} detail: {}'.format(
                event, player, team, detail))
        inputs.append('\n'.join(data))
    result = [[x, *y] for x, y in zip(inputs, result)]
    result = sorted(result, key=lambda x: -x[-1])
    TextFile(os.path.join(args.model_path, 'test20_gate_disc_tmpl.txt'), [
        'src:\n{}\nref: {}\nhyp: {}\nbleu: {}\n##\n'.format(*x) for x in result
    ]).save()
def main():
    parser = ArgumentParser(description='train a seq2seq model',
                            formatter_class=RawTextHelpFormatter)
    parser.add_argument('CONFIG',
                        default=None,
                        type=str,
                        help='path to config file')
    parser.add_argument('--gpu',
                        '-g',
                        default=None,
                        type=str,
                        help='gpu numbers\nto specify')
    parser.add_argument('--debug',
                        default=False,
                        action='store_true',
                        help='switch to debug mode')
    args = parser.parse_args()

    with open(args.CONFIG, "r") as f:
        config = json.load(f)

    os.makedirs(os.path.dirname(config['arguments']['save_path']),
                exist_ok=True)

    source_id_to_word, target_id_to_word, model, device, train_data_loader, valid_data_loader, optimizer = \
        load_setting(config, args)

    n_pred = 5
    n_sample = 1 if model == Seq2seq else 5
    threshold = 5
    clip = 1e-2

    bar = ProgressBar(0, len(train_data_loader))
    for epoch in range(1, config['arguments']['epoch'] + 1):
        print(f'*** epoch {epoch} ***')
        # train
        model.train()
        annealing = sigmoid(epoch - threshold)
        total_loss = 0
        total_rec_loss = 0
        total_reg_loss = 0
        total_c_loss = 0
        for batch_idx, (source, source_mask, target_inputs, target_outputs, target_mask) \
                in enumerate(train_data_loader):
            bar.update(batch_idx)
            source = source.to(device)
            source_mask = source_mask.to(device)
            target = target_inputs.to(device)
            target_mask = target_mask.to(device)
            label = target_outputs.to(device)

            # Forward pass
            loss, details = model(source, source_mask, target, target_mask,
                                  label, annealing)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

            total_loss += loss
            total_rec_loss += details[0]
            total_reg_loss += details[1]
            total_c_loss += details[2]
        else:
            print('')
            print(f'train_loss={total_loss / (batch_idx + 1):.3f}'
                  f'/rec:{total_rec_loss / (batch_idx + 1):.3f}'
                  f'/reg:{total_reg_loss / (batch_idx + 1):.3f}'
                  f'/c:{total_c_loss / (batch_idx + 1):.3f}')

        # validation
        model.eval()
        with torch.no_grad():
            total_loss = 0
            total_rec_loss = 0
            total_reg_loss = 0
            total_c_loss = 0
            for batch_idx, (source, source_mask, target_inputs, target_outputs, target_mask) \
                    in enumerate(valid_data_loader):
                source = source.to(device)
                source_mask = source_mask.to(device)
                target = target_inputs.to(device)
                target_mask = target_mask.to(device)
                label = target_outputs.to(device)

                loss, details = model(source, source_mask, target, target_mask,
                                      label, annealing)
                total_loss += loss
                total_rec_loss += details[0]
                total_reg_loss += details[1]
                total_c_loss += details[2]
            else:
                print(f'valid_loss={total_loss / (batch_idx + 1):.3f}'
                      f'/rec:{total_rec_loss / (batch_idx + 1):.3f}'
                      f'/reg:{total_reg_loss / (batch_idx + 1):.3f}'
                      f'/c:{total_c_loss / (batch_idx + 1):.3f}')
                random_indices = choice(np.arange(len(source)),
                                        n_pred,
                                        replace=False)
                print(random_indices)
                s_translation = translate(source[random_indices],
                                          source_id_to_word,
                                          is_target=False)
                t_translation = translate(target[random_indices],
                                          target_id_to_word,
                                          is_target=True)
                p_translation = \
                    [translate(model.predict(source, source_mask)[random_indices], target_id_to_word,is_target=True)
                     for _ in range(n_sample)]
                p_translation = list(zip(*p_translation))
                for s, t, ps in zip(s_translation, t_translation,
                                    p_translation):
                    print(f'source:{" ".join(s)} / target:{" ".join(t)}')
                    for i, p in enumerate(ps):
                        print(f'predict{i+1}:{" ".join(p)}')

    # TODO: add metrics
    torch.save(model.state_dict(),
               os.path.join(config['arguments']['save_path'], f'sample.pth'))
Example #3
0
# !/usr/bin/env python
# @Time    : 2019-08-29 17:50
# @Author  : [email protected]
# @Site    :
# @File    : common.py
import asyncio

from sanic import Sanic
from jinja2 import Environment, FileSystemLoader, select_autoescape

from utils import load_setting
from connections import RedisConnectionPool

__all__ = ["app"]

app = Sanic(__name__)
app.config = load_setting()
app.template = Environment(loader=FileSystemLoader(
    app.config["TEMPLATES_PATH"]),
                           autoescape=select_autoescape(['html', 'xml']),
                           enable_async=False)
app.static("/static", "./static")


@app.listener('before_server_start')
async def before_server_start(app, loop):
    queue = asyncio.Queue()
    app.queue = queue
    app.redis = await RedisConnectionPool(loop=loop
                                          ).init(app.config['REDIS_CONFIG'])
Example #4
0
from sqlalchemy import create_engine, INT, VARCHAR, Column, DATETIME, FLOAT
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from utils import load_setting, json
setting = load_setting()


def new_db(name):
    if not setting['db_status']:
        url = setting['url']
        engine = create_engine(url)  # ,echo =True)
        conn = engine.connect()
        conn.execute("commit")
        try:
            conn.execute(f"CREATE DATABASE {name}")
        except Exception as e:
            print(e)
        conn.close()
    setting['db_status'] = 1
    with open('setting.json', 'w') as f:
        json.dump(setting, f)


# 建立数据库后注释
new_db('jd_price')

# 初始化用
url = setting['url'] + 'jd_price'
engine = create_engine(url)  #,echo =True)
Base = declarative_base(engine)
#创建会话
Example #5
0
def Cloud189SignGUI():
    utils.load_setting()
    sg.theme(config.DEFAULT_THEME)
    layout = [[sg.Text('Cloud189 Signer', size=(40, 1), font=('Any 15'))],
              [
                  sg.T('账号,多个账号用#号分割:', font=config.default_font),
                  sg.Input(default_text=config.username,
                           size=(40, 1),
                           key='username')
              ],
              [
                  sg.T('密码,多个密码用#号分割:', font=config.default_font),
                  sg.Input(default_text=config.password,
                           size=(40, 1),
                           key='password')
              ], [sg.Button('签到', key='Sign')],
              [sg.Output(size=(90, 20), font=config.default_font)],
              [
                  sg.Button('退出',
                            key='Exit',
                            button_color=('white', 'firebrick3')),
                  sg.Button('源码',
                            key='home_page',
                            button_color=('white', 'springgreen4'))
              ]]

    window = sg.Window('天翼云签到',
                       layout,
                       text_justification='r',
                       default_element_size=(15, 1),
                       font=('Any 14'))

    while True:
        event, values = window.read()
        if event in ('Exit', None):
            utils.save_setting()
            break  # exit button clicked
        if event == 'Sign':
            window.refresh()
            username = values['username']
            password = values['password']
            err = False
            if username == "":
                err = True
                print("账号不能为空!")

            if password == "":
                err = True
                print("密码不能为空!")
            if not err:
                user_list = username.split("#")
                pass_list = password.split("#")
                if len(user_list) != len(pass_list):
                    print("账号和密码个数不对应!")
                else:
                    config.username = username
                    config.password = password
                    print('开始签到....')
                    try:
                        for i in range(len(user_list)):
                            str_paras = utils.to_command_paras(
                                user_list[i], pass_list[i])
                            thread_download = Thread(
                                target=checkin.main,
                                args=[shlex.split(str_paras)])
                            thread_download.start()
                    except Exception as e:
                        print(e)
        elif event == 'home_page':
            webbrowser.open_new('https://github.com/xiaogouxo/cloud189-signer')
    window.close()