Exemple #1
0
 async def on_message_delete(self, msg):
     """Log each message deleted. Log message provides information
     about author, content and date of the message.
     """
     log_msg = (f"{msg.author} has deleted his message: "
                f"{msg.content!r} sent at {msg.created_at}")
     logger.info(log_msg)
Exemple #2
0
def tag_mapping(sentences):
    """
    建立标签和id对应的字典,按频率降序排列
    由于用了CRF,所以需要在标签前后加<start>和<end>
    但是torchcrf那个包会自动处理,那么在字典中不用加入这两个标记
    """
    
    f = open('data/tag_to_id.txt','w',encoding='utf8')
    f1 = open('data/id_to_tag.txt','w',encoding='utf8')
    
    tags = [[x[-1] for x in s] for s in sentences]
    
    dico = create_dico(tags)
    dico["<pad>"] = 100000002
    #为了不改变my_pytorch_crf模型,添加START,END
    dico["<start>"] = 100000003
    dico["<stop>"] = 100000004

    tag_to_id, id_to_tag = create_mapping(dico)
    
    logger.info("Found %i unique named entity tags" % len(dico))
    for k,v in tag_to_id.items():
        f.write(k+":"+str(v)+"\n")
    for k,v in id_to_tag.items():
        f1.write(str(k)+":"+str(v)+"\n")
    return dico, tag_to_id, id_to_tag
Exemple #3
0
async def login(request: Request) -> json_response:
    try:
        user = await request.json()
        if 8 > len(user['email']) > 20:
            return failure_response(400, 'Invalid email length')
        if 8 > len(user['password']) > 20:
            return failure_response(400, 'Invalid password length')
        pool = request.app['pool']
        async with pool.acquire() as conn:
            users = await conn.fetch(
                'SELECT * FROM users WHERE email=\'{}\''.format(user['email']))
            if len(users) == 0:
                return failure_response(401, 'Invalid email or password')
            else:
                if users[0]['password'] != encode(
                        str(user['password']).encode('utf-8')):
                    return failure_response(401, 'Invalid email or password')
                else:
                    token = get_token({
                        'user': user_record_to_json(users[0]),
                        'key': users[0]['key']
                    })
                    logger.info('User {} login at site'.format(user['email']))
                    return success_response(200, 'ok', token=token)
    except Exception as e:
        return server_error_response(e)
Exemple #4
0
    async def on_command(self, msg):
        """Log each command submitted. Log message provides information
        about name, author and arguments of the command.
        """
        args = msg.args[2:]

        args_info = ', '.join(repr(arg) for arg in args) if args else ""

        log_msg = (f"{msg.command.name} called by {msg.author} with "
                   f"args {args_info}.")
        logger.info(log_msg)
Exemple #5
0
def char_mapping(sentences, lower):
    """
    建立字和id对应的字典,按频率降序排列
    """
    chars = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(chars)
    dico["<pad>"] = 100000003
    dico['<unk>'] = 100000002
    
    char_to_id, id_to_char = create_mapping(dico)
    logger.info("Found %i unique words (%i in total)" % (len(dico), sum(len(x) for x in chars)))
    
    return dico, char_to_id, id_to_char
Exemple #6
0
def evaluate(model, data, id_to_tag, test=False):
    """ 得到预测的标签(非id)和损失 """
    ner_results, aver_loss = evaluate_helper(model, data, id_to_tag)
    """ 用CoNLL-2000的实体识别评估脚本来计算F1值 """
    eval_lines = test_ner(ner_results, config.save_dir)

    if test:
        """ 如果是测试,则打印评估结果 """
        for line in eval_lines:
            logger.info(line)

    f1 = float(eval_lines[1].strip().split()[-1]) / 100

    return f1, aver_loss
Exemple #7
0
async def check(request: Request) -> json_response:
    try:
        pool = request.app['pool']
        async with pool.acquire() as conn:
            tr = await conn.fetch('SELECT * FROM transfers')
            for i in range(1, len(tr)):
                if tr[i]['transfer_hash'] == encode(
                        transfer_record_to_json_string(tr[i -
                                                          1]).encode('utf-8')):
                    logger.info(str(tr[i - 1]['id']) + ' : OK')
                else:
                    logger.info(str(tr[i - 1]['id']) + ' : WRONG')
        return success_response(200, 'ok')
    except Exception as e:
        return server_error_response(e)
Exemple #8
0
def char_mapping(sentences, lower):
    """
    建立字和id对应的字典,按频率降序排列
    由于用了CRF,所以需要在句子前后加<start>和<end>
    那么在字典中也加入这两个标记
    """
    chars = [[x[0].lower() if lower else x[0] for x in s] for s in sentences]
    dico = create_dico(chars)
    dico["<pad>"] = 100000003
    dico['<unk>'] = 100000002
    dico["<start>"] = 100000001
    dico["<end>"] = 100000000
    char_to_id, id_to_char = create_mapping(dico)
    logger.info("Found %i unique words (%i in total)" % (len(dico), sum(len(x) for x in chars)))
    
    return dico, char_to_id, id_to_char
Exemple #9
0
def augment_with_pretrained(dictionary, ext_emb_path):
    """
    预训练字向量中的字,如果不在训练集的字典中,就加入,拓展字典。
    """
    logger.info('Loading pretrained embeddings from %s...' % ext_emb_path)
    assert os.path.isfile(ext_emb_path)
    """ 加载预训练的字向量 """
    pretrained = set([
        line.rstrip().split()[0].strip()
        for line in codecs.open(ext_emb_path, 'r', 'utf-8')
        if len(ext_emb_path) > 0
    ])

    for char in pretrained:
        if char not in dictionary:
            dictionary[char] = 0

    char_to_id, id_to_char = create_mapping(dictionary)
    return dictionary, char_to_id, id_to_char
Exemple #10
0
async def register(request: Request) -> json_response:
    try:
        user = await request.json()
        if 8 > len(user['name']) > 20:
            return failure_response(400, 'Invalid name length')
        if 8 > len(user['email']) > 20:
            return failure_response(400, 'Invalid email length')
        if 8 > len(user['password']) > 20:
            return failure_response(400, 'Invalid email length')
        if 3 > len(user['country']) > 15:
            return failure_response(400, 'Invalid country')
        if user['age'] is None or 6 > int(user['age']) > 65:
            return failure_response(400, 'Invalid age')
        pool = request.app['pool']
        async with pool.acquire() as conn:
            async with conn.transaction():
                usr = await conn.fetch(
                    'SELECT * FROM users WHERE email=\'{}\''.format(
                        user['email']))
                if len(usr) == 0:
                    password_hash = encode(
                        str(user['password']).encode('utf-8'))
                    u = User(name=user['name'],
                             email=user['email'],
                             password=password_hash,
                             age=user['age'],
                             country=user['country'])
                    await conn.fetch(
                        '''INSERT INTO users (name,email,password,age,country,balance,key)
                                        VALUES (\'{}\', \'{}\', \'{}\', {}, \'{}\', {}, \'{}\')'''
                        .format(u.name, u.email, u.password, u.age, u.country,
                                u.balance, u.key))
                    token = get_token({'user': u.to_json(), 'key': u.key})
                    logger.info(
                        'Registrate new User(name={}, email={}, age={}, country={})'
                        .format(u.name, u.email, u.age, u.country))
                    return success_response(200, 'ok', token=token)
                else:
                    return failure_response(200, 'This login is already taken')
    except Exception as e:
        return server_error_response(e)
Exemple #11
0
async def delete_user(request: Request) -> json_response:
    try:
        data = await request.json()
        if data['key'] is None or len(data['key']) < 10:
            return failure_response(401, 'Authorize please')
        pool = request.app['pool']
        async with pool.acquire() as conn:
            async with conn.transaction():
                users = await conn.fetch(
                    'SELECT * FROM users WHERE key=\'{}\''.format(data['key']))
                if len(users) == 0:
                    return failure_response(401, 'Authorize please')
                else:
                    await conn.fetch('''
                    DELETE FROM users WHERE email=\'{}\';
                    DELETE FROM transfers WHERE master=\'{}\'
                    '''.format(users[0]['email'], users[0]['email']))
                    logger.info('User {} was deleted (key={})'.format(
                        users[0]['email'], data['key']))
                    return success_response(200, 'ok')
    except Exception as e:
        return server_error_response(e)
async def check_csgo_rss(channel):
    global title_in_memory
    await asyncio.sleep(120)
    logger.info("Checking CSGO blog for update...")
    latest_title, update_text = get_latest_update()
    logger.info(latest_title)
    if title_in_memory != latest_title:
        logger.info(
            "New CSGO update found titled \"%s\". Sending message to %s...",
            latest_title, channel)
        await send_message(update_text, channel)
        write_latest_title(latest_title)
        title_in_memory = latest_title
    else:
        logger.info("No new update found.")
Exemple #13
0
def train():
    
    """ 1: 加载数据集,把样本和标签都转化为id"""
    if os.path.isfile(config.data_proc_file):
        
        with open(config.data_proc_file, "rb") as f:
            train_data,dev_data,test_data = pickle.load(f)
            char_to_id,id_to_char,tag_to_id,id_to_tag = pickle.load(f)
            emb_matrix = pickle.load(f)
            
        logger.info("%i / %i / %i sentences in train / dev / test." % (len(train_data), len(dev_data), len(test_data)))
            
    else:
        
        train_data,dev_data,test_data, char_to_id, tag_to_id, id_to_tag, emb_matrix = build_dataset()
        
    """ 2: 产生batch训练数据 """
    train_manager = BatchManager(train_data, config.batch_size)
    dev_manager = BatchManager(dev_data, config.batch_size)
    test_manager = BatchManager(test_data, config.batch_size) 
    
    model = NERLSTM_CRF(config, char_to_id, tag_to_id, emb_matrix)
    model.train()
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    
    """ 3: 用early stop 防止过拟合 """
    total_batch = 0  
    dev_best_f1 = float('-inf')
    last_improve = 0  
    flag = False     
    
    start_time = time.time()
    logger.info(" 开始训练模型 ...... ")
    for epoch in range(config.max_epoch):
        
        logger.info('Epoch [{}/{}]'.format(epoch + 1, config.max_epoch))
        
        for index, batch in enumerate(train_manager.iter_batch(shuffle=True)):
            
            optimizer.zero_grad()
            
            """ 计算损失和反向传播 """
            _, char_ids, seg_ids, tag_ids, mask = batch
            loss = model.log_likelihood(char_ids,seg_ids,tag_ids, mask)
            loss.backward()
            
            """ 梯度截断,最大梯度为5 """
            nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=config.clip)
            optimizer.step()
            
            if total_batch % config.steps_check == 0:
                
                model.eval()
                dev_f1,dev_loss = evaluate(model, dev_manager, id_to_tag)
                
                """ 以f1作为early stop的监控指标 """
                if dev_f1 > dev_best_f1:
                    
                    evaluate(model, test_manager, id_to_tag, test=True)
                    dev_best_f1 = dev_f1
                    torch.save(model, os.path.join(config.save_dir,"medical_ner.ckpt"))
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''
                    
                time_dif = get_time_dif(start_time)
                msg = 'Iter: {} | Dev Loss: {:.4f} | Dev F1-macro: {:.4f} | Time: {} | {}'
                logger.info(msg.format(total_batch, dev_loss, dev_f1, time_dif, improve))  
                
                model.train()
                
            total_batch += 1
            if total_batch - last_improve > config.require_improve:
                """ 验证集f1超过5000batch没上升,结束训练 """
                logger.info("No optimization for a long time, auto-stopping...")
                flag = True
                break
        if flag:
            break                
Exemple #14
0
    else:
        return os.path.join(os.path.dirname(os.path.abspath(__file__)), file)


if __name__ == '__main__':
    if len(sys.argv) > 1:
        cfgFile = sys.argv[0]
    else:
        cfgFile = 'merge.json'

    path = toAbspath(cfgFile)
    if not os.path.isfile(path):
        logger.error('%s is not a file' % path)
        sys.exit(1)

    with open(path, 'r', encoding='utf-8') as f:
        cfg = json.load(f)

    logger.info('merge excel')
    xls = ExcelOps(toAbspath(cfg['dstFile']),
                   sheetNameOrIndex=cfg.get('dtsFileSheet', 0),
                   keyName=cfg['keyName'],
                   startCell=cfg.get('dtsFileStartCell', 'A1'))
    print(xls.dataDict)
    xls.merge(toAbspath(cfg['srcFile']),
              sheetNameOrIndex=cfg.get('srcFileSheet', 0),
              startCell=cfg.get('srcFileStartCell', 'A1'),
              forceOverWriteCols=cfg.get('forceOverWriteCols'))
    print(xls.dataDict)
    xls.flush()
Exemple #15
0
async def new_transfer(request: Request) -> json_response:
    try:
        transfer = await request.json()
        if len(transfer['master']) < 0:
            return failure_response(403, 'Authorize please')
        if 8 > len(transfer['whom']) > 20:
            return failure_response(400, 'Invalid length of destination email')
        if transfer['amount'] is None or int(transfer['amount']) < 0:
            return failure_response(400, 'Invalid amount')
        pool = request.app['pool']
        async with pool.acquire() as conn:
            async with conn.transaction():
                users = await conn.fetch(
                    'SELECT * FROM users WHERE key=\'{}\''.format(
                        transfer['master']))
                if len(users) == 0:
                    return failure_response(403, 'Authorize please')
                else:
                    if users[0]['email'] == transfer['whom']:
                        return failure_response(400, 'Not bad')
                    if int(users[0]['balance']) < 0:
                        return failure_response(
                            402, 'There is no money in your account')
                    if users[0]['balance'] < int(transfer['amount']):
                        return failure_response(
                            400, 'There is not enough money in your account')
                    u_dests = await conn.fetch(
                        'SELECT * FROM users WHERE email=\'{}\''.format(
                            transfer['whom']))
                    if len(u_dests) == 0:
                        return failure_response(400,
                                                'No such destination email')
                    else:
                        dest = u_dests[0]
                        # how to get last element F**K (MAX & LIMIT didn't work)
                        transfers = await conn.fetch(
                            '''SELECT * FROM transfers''')
                        t = Transfer(master=transfer['master'],
                                     amount=transfer['amount'],
                                     whom=transfer['whom'],
                                     prev=transfer_record_to_json_string(
                                         transfers[-1]))
                        await conn.fetch('''
                        INSERT INTO transfers (transfer_hash, master, whom, amount, time)
                        VALUES (\'{}\', \'{}\', \'{}\', {}, \'{}\')
                        '''.format(t.transfer_hash, users[0]['email'], t.whom,
                                   t.amount, t.time))
                        await conn.fetch(
                            'UPDATE users SET balance={}  WHERE email=\'{}\''.
                            format((users[0]['balance'] -
                                    int(transfer['amount'])),
                                   users[0]['email']))
                        await conn.fetch(
                            'UPDATE users SET balance={}  WHERE email=\'{}\''.
                            format((dest['balance'] + int(transfer['amount'])),
                                   dest['email']))
                        logger.info('New Transfer {} send {} to {}'.format(
                            users[0]['email'], transfer['amount'],
                            transfer['whom']))
                        return success_response(200, 'ok')
    except Exception as e:
        return server_error_response(e)
Exemple #16
0
def cpu_predict_file_content():
    def predict_text(model, input_str):
        if not input_str:
            input_str = input("请输入文本: ")

        _, char_ids, seg_ids, _ = prepare_dataset([input_str],
                                                  char_to_id,
                                                  tag_to_id,
                                                  test=True)[0]
        char_tensor = torch.LongTensor(char_ids).view(1, -1)
        seg_tensor = torch.LongTensor(seg_ids).view(1, -1)

        with torch.no_grad():
            """ 得到维特比解码后的路径,并转换为标签 """
            paths = model(char_tensor, seg_tensor)
            tags = [id_to_tag[idx] for idx in paths[0]]

        return result_to_json(input_str, tags)

    with open(config.data_proc_file, "rb") as f:
        train_data, dev_data, test_data = pickle.load(f)
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
        emb_matrix = pickle.load(f)

    with open(config.map_file, "rb") as f:
        char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)

    device = torch.device("cuda" if None else "cpu")
    model = NERLSTM_CRF(config, char_to_id, tag_to_id, emb_matrix, device)
    state_dict = torch.load(os.path.join(config.save_dir, "medical_ner.ckpt"),
                            map_location="cpu")
    model.load_state_dict(state_dict)
    """ 用cpu预测 """

    model.eval()

    file_all = []
    c_root = "/home/demo1/guoxin/chinese/"
    for file in os.listdir(c_root):
        #if "txtoriginal.txt" in file:
        file_all.append(file)

    path = "/home/demo1/ner1.csv"
    i = 1
    word_set = set()
    for file in file_all:
        fp = open(c_root + file, 'r', encoding='utf8')

        i += 1
        print(i)

        for line in fp:
            for ele in line.split("。"):
                if len(ele) < 3:
                    continue

                results = predict_text(model, ele)
                for result in results["entities"]:
                    word_set.add(result["word"])

        if i > 10000:
            break

        logger.info("i-{0}".format(i))

    import pandas as pd
    df = pd.DataFrame({"key": list(word_set)})
    df.to_csv(path, header=False, encoding='utf_8_sig', index=False)
Exemple #17
0
def train():

    #load the dataset

    train_data, dev_data, test_data, char_to_id, id_to_char, tag_to_id, id_to_tag, emb_matrix = build_dataset(
    )
    logger.info("%i / %i / %i sentences in train / dev / test." %
                (len(train_data), len(dev_data), len(test_data)))

    #Batch
    train_manager = BatchManager(train_data, config.batch_size)
    dev_manager = BatchManager(dev_data, config.batch_size)
    test_manager = BatchManager(test_data, config.batch_size)

    model = NERLSTM_CRF(config, char_to_id, tag_to_id, emb_matrix)
    model.train()
    model.to(device)

    optimizer = optim.Adam(model.parameters(),
                           lr=config.lr,
                           weight_decay=config.weight_decay)
    #lr:learning rate  #weight decay:L2 penalty

    #early stop
    total_batch = 0
    dev_best_f1 = float('-inf')
    last_improve = 0
    flag = False

    start_time = time.time()
    logger.info(" Start Training ...... ")
    for epoch in range(config.max_epoch):

        logger.info('Epoch [{}/{}]'.format(epoch + 1, config.max_epoch))

        for index, batch in enumerate(train_manager.iter_batch(shuffle=True)):

            optimizer.zero_grad()  #initialize the gradient
            """"Loss and backward propagation"""
            _, char_ids, len_ids, tag_ids, mask = batch
            loss = model.log_likelihood(char_ids, len_ids, tag_ids, mask)
            loss.backward()
            """ Gradient Clip Maximum: 5 """
            nn.utils.clip_grad_norm_(parameters=model.parameters(),
                                     max_norm=config.clip)
            optimizer.step()

            #Check model after certain number of batchs
            if total_batch % config.steps_check == 0:

                model.eval()
                dev_f1, dev_loss = evaluate(model, dev_manager, id_to_tag)
                """ check f1 value for early stop """
                if dev_f1 > dev_best_f1:

                    evaluate(model, test_manager, id_to_tag, test=True)
                    dev_best_f1 = dev_f1
                    torch.save(
                        model, os.path.join(config.save_dir,
                                            "medical_ner.ckpt"))
                    improve = '*'
                    last_improve = total_batch
                else:
                    improve = ''

                time_used = training_timer(start_time)
                msg = 'Iter: {} | Dev Loss: {:.4f} | Dev F1-macro: {:.4f} | Time: {} | {}'
                logger.info(
                    msg.format(total_batch, dev_loss, dev_f1, time_used,
                               improve))

                model.train()

            total_batch += 1
            if total_batch - last_improve > config.require_improve:
                """if the f1 on dev dataset does not imporve for more than 5000batches, stop the train"""
                logger.info(
                    "No optimization for a long time, auto-stopping...")
                flag = True
                break
        if flag:
            break
Exemple #18
0
def build_dataset():
    train_sentences = load_sentences(config.train_file, config.lower,
                                     config.zero)
    dev_sentences = load_sentences(config.dev_file, config.lower, config.zero)
    test_sentences = load_sentences(config.test_file, config.lower,
                                    config.zero)
    logger.info("成功读取标注好的数据")

    update_tag_scheme(train_sentences, config.tag_schema)
    update_tag_scheme(test_sentences, config.tag_schema)
    update_tag_scheme(dev_sentences, config.tag_schema)
    logger.info("成功将IOB格式转化为IOBES格式")

    if not os.path.isfile(config.map_file):
        char_to_id, id_to_char, tag_to_id, id_to_tag = create_maps(
            train_sentences)
        logger.info("根据训练集建立字典完毕")
    else:
        with open(config.map_file, "rb") as f:
            char_to_id, id_to_char, tag_to_id, id_to_tag = pickle.load(f)
        logger.info("已有字典文件,加载完毕")

    emb_matrix = load_emb_matrix(char_to_id)
    logger.info("加载预训练的字向量完毕")

    train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id,
                                 config.lower)
    dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id,
                               config.lower)
    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id,
                                config.lower)
    logger.info("把样本和标签处理为id完毕")
    logger.info("%i / %i / %i sentences in train / dev / test." %
                (len(train_data), len(dev_data), len(test_data)))

    with open(config.data_proc_file, "wb") as f:
        pickle.dump([train_data, dev_data, test_data], f)
        pickle.dump([char_to_id, id_to_char, tag_to_id, id_to_tag], f)
        pickle.dump(emb_matrix, f)

    return train_data, dev_data, test_data, char_to_id, tag_to_id, id_to_tag, emb_matrix
Exemple #19
0
 async def on_ready(self):
     """Log information about bot launching."""
     logger.info("Bot %s connected on %s servers",
                 self.user.name,
                 len(self.guilds))
Exemple #20
0
import asyncio
from aiohttp import web
from app import init_app
from logs.logger import logger, log_format

if __name__ == '__main__':
    try:
        loop = asyncio.get_event_loop()
        app = loop.run_until_complete(init_app())
        logger.info('Server started')
        web.run_app(app, access_log=logger, access_log_format=log_format)
        logger.info('Server was stopped')
    except Exception as e:
        logger.warning(e)
async def on_ready():
    logger.info('Bot logged in as {0.user}'.format(client))
    channel = client.get_channel(700066521317113886)
    logger.info("Connected to %s", channel)
    while True:
        await check_csgo_rss(channel)