コード例 #1
0
ファイル: main.py プロジェクト: mottled233/MRC_FastFrame
from dataset.dataset_for_mrc_squad import DatasetForMrcSquad as Dataset
from preprocess.preprocess_for_mrc import PreprocessForMRCChinese as Preprocess
from model.network.mrc_net import MRCNet as Net
from engine.mrc_train_engine import MRCTrainEngine as TrainEngine
from engine.mrc_predict_engine import MRCPredictEngine as PredictEngine
from util.util_parameter import UtilParameter as UParam
from util.util_logging import UtilLogging as ULog

if __name__ == "__main__":
    # 设置参数
    param = UParam()
    param.read_config_file("config_ernie")
    param.set_config(sys.argv[1:])
    args = param.get_config(param.GLOBAL)
    # 初始化日志
    logger = ULog(args, params=param)
    app_name = args["app_name"]
    dataset_args = param.get_config(param.DATASET)

    # 训练数据预处理
    train_dataset = Dataset(dataset_args)
    train_dataset.read_from_srcfile(
        dataset_args['train_file_path'],
        cache=dataset_args['train_example_file_name'],
        is_training=True)
    train_preprocess = Preprocess(
        args=dataset_args,
        examples=train_dataset.get_examples(),
        cache=dataset_args['train_feature_file_name'])
    train_data_generator = train_preprocess.do_preprocess()
コード例 #2
0
    sents = re.split('(。|,|,|!|\!|\.|?|\?)', paragraph)
    res = []
    for sent in sents:
        if len(sent) != 0 and not re.match('(。|,|,|!|\!|\.|?|\?)', sent):
            res.append(sent)
    return res


if __name__ == "__main__":

    # 设置参数
    param = UParam()
    param.read_config_file("config_roberta_large")
    args = param.get_config(param.PREDICT)
    # 初始化日志
    logger = ULog(param)

    app_name = args["app_name"]
    '''
    常数定义
    '''
    file_name = "File_Directory/results/{}.json".format(app_name)
    new_data_name = "{}_re_predict_data".format(app_name)
    new_result_name = "{}_re_predict_out".format(app_name)
    final_result_name = "{}_final_out".format(app_name)
    threshold = args["re_predict_threshold"]
    mix_rate = args['re_predict_mix_rate']
    decay_rate = args['re_predict_decay_rate']
    select_threshold = args['re_predict_select_threshold']
    '''
    预测过程
コード例 #3
0
ファイル: dataset.py プロジェクト: mottled233/MRC_FastFrame
 def __init__(self, args):
     self.examples = []
     self.args = args
     self.logger = ULog(args, __name__)