Ejemplo n.º 1
0
def decoder_wrapper(
        model_file_path: str = "http://dmserv4.cs.illinois.edu/pner0.th",
        configs: dict = {}):
    """
    Wrapper for different decode functions.

    Parameters
    ----------
    model_file_path: ``str``, optional, (default = "http://dmserv4.cs.illinois.edu/pner0.th").
        Path to loaded checkpoint.
    configs: ``dict``, optional, (default = "{}").
        Additional configs.
    """
    pw = wrapper(configs.get("log_path", None))

    logger.info(
        "Loading model from {} (might download from source if not cached).".
        format(model_file_path))
    model_file = wrapper.restore_checkpoint(model_file_path)

    model_type = model_file['config'].get("model_type", 'char-lstm-crf')
    logger.info('Preparing the pre-trained {} model.'.format(model_type))
    model_type_dict = { \
            "char-lstm-crf": decoder_wc,
            "char-lstm-two-level": decoder_tl}
    return model_type_dict[model_type](model_file, pw, configs)
Ejemplo n.º 2
0
def decoder_wrapper(model_file_path: str, configs: dict = {}):
    """
    Wrapper for different decode functions.

    Parameters
    ----------
    model_file_path: ``str``, required.
        Path to loaded checkpoint.
    configs: ``dict``, optional, (default = "{}").
        Additional configs.
    """

    pw = wrapper(configs.get("log_path", None))
    pw.set_level(configs.get("log_level", 'info'))

    pw.info(
        "Loading model from {} (might download from source if not cached).".
        format(model_file_path))
    model_file = wrapper.restore_checkpoint(model_file_path)

    model_type = configs.get("model_type", 'char-lstm-crf')
    pw.info('Preparing the pre-trained {} model.'.format(model_type))
    model_type_dict = {"char-lstm-crf": decoder_wc}
    return model_type_dict[model_type](model_file, pw, configs)
Ejemplo n.º 3
0
    parser.add_argument('--add_relu', action='store_true')
    parser.add_argument('--layer_drop', type=float, default=0.5)
    parser.add_argument('--epoch', type=int, default=400)
    parser.add_argument('--clip', type=float, default=5)
    parser.add_argument('--update', choices=['Adam', 'Adagrad', 'Adadelta'], default='Adam', help='adam is the best')
    parser.add_argument('--rnn_layer', choices=['Basic', 'DenseNet', 'LDNet'], default='LDNet')
    parser.add_argument('--rnn_unit', choices=['gru', 'lstm', 'rnn'], default='lstm')
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--lr_decay', type=float, default=0.1)
    parser.add_argument('--cut_off', nargs='+', default=[4000,40000,200000])
    parser.add_argument('--interval', type=int, default=100)
    parser.add_argument('--epoch_size', type=int, default=4000)
    parser.add_argument('--patience', type=float, default=10)
    args = parser.parse_args()

    pw = wrapper(os.path.join(args.cp_root, args.checkpoint_name), args.checkpoint_name, enable_git_track=args.git_tracking)
    pw.set_level('info')

    gpu_index = pw.auto_device() if 'auto' == args.gpu else int(args.gpu)
    device = torch.device("cuda:" + str(gpu_index) if gpu_index >= 0 else "cpu")
    if gpu_index >= 0:
        torch.cuda.set_device(gpu_index)

    pw.info('Loading dataset.')
    dataset = pickle.load(open(args.dataset_folder + 'test.pk', 'rb'))
    w_map, test_data, range_idx = dataset['w_map'], dataset['test_data'], dataset['range']
    train_loader = LargeDataset(args.dataset_folder, range_idx, args.batch_size, args.sequence_length)
    test_loader = EvalDataset(test_data, args.batch_size)

    pw.info('Building models.')
    rnn_map = {'Basic': BasicRNN, 'DenseNet': DenseRNN, 'LDNet': functools.partial(LDRNN, layer_drop = args.layer_drop)}
Ejemplo n.º 4
0
from abnlp.optim import Nadam

logger = logging.getLogger(__name__)

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('--cp_root', default="./checkpoint")
    parser.add_argument('--config', default="./config/ner0.json")
    parser.add_argument('--train_file', default="./config/ner0.json")
    conf = parser.parse_args()

    with open(conf.config, 'r') as fin:
        args = json.load(fin)

    pw = wrapper(os.path.join(conf.cp_root, args["checkpoint_name"]),
                 args["checkpoint_name"])

    logger.info('Loading the data...')
    train_data = strFromFileEncoderWrapper(args,
                                           processed_file=conf.train_file)

    logger.info('Checking the device...')
    gpu_index = pw.auto_device() if 'auto' == args["gpu"] else int(args["gpu"])
    device = torch.device("cuda:" +
                          str(gpu_index) if gpu_index >= 0 else "cpu")
    if gpu_index >= 0:
        torch.cuda.set_device(gpu_index)

    logger.info("Exp: {}".format(args['checkpoint_name']))
    logger.info("Config: {}".format(args))
Ejemplo n.º 5
0
from betanlp.model import seqLabel, seqLabelEvaluator
from betanlp.encoder import strFromFileEncoderWrapper
from betanlp.common.utils import adjust_learning_rate
from betanlp.optim import Nadam

logger = logging.getLogger(__name__)

if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default="./config/twitter_ner.json")
    conf = parser.parse_args()
    with open(conf.config, 'r') as fin:
        args = json.load(fin)

    pw = wrapper(os.path.join(args["cp_root"], args["checkpoint_name"]), args["checkpoint_name"], enable_git_track=args["git_tracking"])

    gpu_index = pw.auto_device() if 'auto' == args["gpu"] else int(args["gpu"])
    device = torch.device("cuda:" + str(gpu_index) if gpu_index >= 0 else "cpu")
    if gpu_index >= 0:
        torch.cuda.set_device(gpu_index)

    logger.info("Exp: {}".format(args['checkpoint_name']))
    logger.info("Config: {}".format(args))

    logger.info('Saving the configure...')
    pw.save_configue(args)

    logger.info('Building the model...')
    model = seqLabel(args)
    evaluator = seqLabelEvaluator(model.spDecoder.to_spans)