예제 #1
0
def create_arg_parser(args=None):
    _arg_parser = argparse.ArgumentParser(description='CRF')
    add_arg.arg_parser = _arg_parser
    ## File IO
    add_arg('--only_test', False, help='Only do the test')
    add_arg('--save_model_param',
            'best-parameters',
            help='The best model will be saved there')
    add_arg('--training_data',
            'train-weiboner.crfsuite.txt',
            help='training file name')
    add_arg('--valid_data',
            'dev-weiboner.crfsuite.txt',
            help='develop file name')
    add_arg('--test_data', 'test-weiboner.crfsuite.txt', help='test file name')
    add_arg('--output_dir',
            '/export/projects/npeng/weiboNER_data/',
            help='the output dir that stores the prediction')
    add_arg(
        '--eval_test',
        True,
        help=
        'Whether evaluate the test data: test data may not have annotations.')
    add_arg('--emb_type',
            'char',
            help='The embedding type, choose from (char, word, charpos)')
    add_arg('--emb_file',
            '/export/projects/npeng/weiboNER_data/weibo_char_vectors',
            help='The initial embedding file name')
    add_arg('--emb_init', 'RANDOM', help='The initial embedding type for cws')
    add_arg(
        '--ner_feature_thresh',
        0,
        help="The minimum count (upto and including) OOV threshold for NER")
    ## Training
    add_arg_to_L(TRAIN_PARAM, '--use_features', True)
    add_arg_to_L(TRAIN_PARAM, '--lr', 0.05)
    add_arg_to_L(TRAIN_PARAM, '--use_emb', True)
    add_arg_to_L(TRAIN_PARAM, '--fine_tuning', True)
    add_arg_to_L(TRAIN_PARAM, '--nepochs', 200)
    add_arg_to_L(TRAIN_PARAM, '--neval_epochs', 5)
    add_arg_to_L(TRAIN_PARAM, '--optimizer', 'sgd')
    add_arg_to_L(TRAIN_PARAM, '--seed', 1)
    add_arg_to_L(TRAIN_PARAM,
                 '--decay',
                 True,
                 help='whether learning rate decay')
    add_arg_to_L(TRAIN_PARAM, '--decay_epochs', 10)
    add_arg_to_L(TRAIN_PARAM, '--minimum_lr', 1e-5)
    ## Topology
    add_arg_to_L(TOPO_PARAM,
                 '--circuit',
                 'plainOrderOneCRF',
                 help="the conbination of different models")
    add_arg_to_L(TOPO_PARAM, '--emb_output_transform_out_dim', 500)
    add_arg_to_L(TOPO_PARAM, '--wemb1_out_dim', 100)
    add_arg_to_L(TOPO_PARAM, '--in_dim', -1)
    add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim', -1)
    add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight', 0.0)
    add_arg_to_L(TOPO_PARAM, '--win', 1)
    ## DEBUG
    add_arg('--verbose', 2)

    return _arg_parser
예제 #2
0
def create_arg_parser(args=None):
    _arg_parser = argparse.ArgumentParser(description='LSTM')
    add_arg.arg_parser = _arg_parser
    ## File IO
    add_arg('--cws_train_path'        , '.')
    add_arg('--cws_valid_path'        , '.')
    add_arg('--cws_test_path'         , '.')
    add_arg('--ner_train_path'        , '.')
    add_arg('--ner_valid_path'        , '.')
    add_arg('--ner_test_path'         , '.')
    add_arg('--cws_use_features'         , False)
    add_arg('--ner_use_features'        , True)
    add_arg('--cws_circuit'         , 'LSTMOrderOneCRF')
    add_arg('--ner_circuit'        , 'plainOrderOneCRF')
    #add_arg('--cws_emb_init'       , 'RANDOM', help='The initial embedding type for cws')
    #add_arg('--ner_emb_init'       , 'FILE', help='The initial embedding type for ner')
    add_arg('--emb_init'       , 'RANDOM', help='The initial embedding type for cws')
    add_arg('--emb_file'       , '', help='The initial embedding file name')
    add_arg('--m1_wemb1_dropout_rate'    , 0.2, help='Dropout rate for the input embedding layer')
    add_arg('--use_emb'   , True, help='cws always use embeddings. so this always true. Just need to set it.')
    add_arg('--cws_use_emb'   , True, help='cws always use embeddings. so this always true. Just need to set it.')
    add_arg('--ner_use_emb'   , True)
    add_arg('--cws_fine_tuning'   , True)
    add_arg('--ner_fine_tuning'   , True)
    add_arg('--ner_eval_test', True, help='Whether evaluate the test data: test data may not have annotations.')
    add_arg('--ner_feature_thresh'     , 0)
    ## Task
    add_arg('--ner_oovthresh'     , 0    , help="The minimum count (upto and including) OOV threshold for NER") # Maybe 1 ?
    add_arg('--chunking_oovthresh', 0)
    add_arg('--pos_oovthresh'     , 2)
    ## Training
    add_arg_to_L(TRAIN_PARAM, '--train_mode'    , 'alternative' , help='possible train mode including joint, alternative, cws and ner')
    add_arg_to_L(TRAIN_PARAM, '--lr'           , 0.01)
    add_arg_to_L(TRAIN_PARAM, '--cws_lr'           , 0.01)
    add_arg_to_L(TRAIN_PARAM, '--ner_lr'           , 0.05)
    add_arg_to_L(TRAIN_PARAM, '--sample_coef'           , 10)
    add_arg_to_L(TRAIN_PARAM, '--nepochs'      , 30)
    add_arg_to_L(TRAIN_PARAM, '--ner_nepochs'      , 30)
    add_arg_to_L(TRAIN_PARAM, '--cws_nepochs'      , 10)
    add_arg_to_L(TRAIN_PARAM, '--cws_joint_weight' , 0.1)
    add_arg_to_L(TRAIN_PARAM, '--optimizer'    , 'sgd', help='sgd or adadelta')
    add_arg_to_L(TRAIN_PARAM, '--seed'         , 1) #int(random.getrandbits(10)))
    add_arg_to_L(TRAIN_PARAM, '--decay'        , True,  help='whether learning rate decay')
    add_arg_to_L(TRAIN_PARAM, '--cws_decay_epochs' , 5)
    add_arg_to_L(TRAIN_PARAM, '--ner_decay_epochs' , 10)
    add_arg_to_L(TRAIN_PARAM, '--minimum_lr'   , 1e-5)
    add_arg_to_L(TRAIN_PARAM, '--lower_case_input',     0)
    add_arg_to_L(TRAIN_PARAM, '--digit_to_zero'   ,     1)
    ## Topology
    add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim',    -1)
    add_arg_to_L(TOPO_PARAM, '--crf_viterbi',   False)
    add_arg_to_L(TOPO_PARAM, '--m1_wemb1_out_dim',                100)
    add_arg_to_L(TOPO_PARAM, '--m1_lstm_out_dim',                150)
    #add_arg_to_L(TOPO_PARAM, '--emb_output_transform_out_dim',500)
    #add_arg_to_L(TOPO_PARAM, '--lstm_activation_activation_fn',RELU_FN)
    add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight',             0.0)
    add_arg_to_L(TOPO_PARAM, '--cws_win_l',                          0)
    add_arg_to_L(TOPO_PARAM, '--ner_win_l',                          0)
    add_arg_to_L(TOPO_PARAM, '--cws_win_r',                          2)
    add_arg_to_L(TOPO_PARAM, '--ner_win_r',                          0)
    ## DEBUG
    add_arg('--verbose'      , 2)
    add_arg('--debugtopo'    , False)

    return _arg_parser
예제 #3
0
def create_arg_parser(args=None):
    _arg_parser = argparse.ArgumentParser(description='LSTM')
    add_arg.arg_parser = _arg_parser
    add_arg('--setting', 'run_single_corpus', help='running single corpus')
    ## File IO
    # For single task
    add_arg('--data_dir', '.')
    # For wild prediction
    add_arg('--train_path', '.')
    add_arg('--valid_path', '.')
    add_arg('--train_graph', '.')
    add_arg('--valid_graph', '.')
    add_arg('--content_file', 'sentences')
    add_arg('--dependent_file', 'graph_arcs')
    add_arg('--parameters_file', 'best_parameters')
    add_arg('--prediction_file', 'prediction')
    add_arg('--drug_gene_prediction_file', '.')
    add_arg('--drug_var_prediction_file', '.')
    add_arg('--triple_prediction_file', '.')
    add_arg('--num_entity', 2)
    add_arg('--total_fold', 10)
    add_arg('--dev_fold', 0)
    add_arg('--test_fold', 1)
    add_arg('--circuit', 'LSTMRelation')
    add_arg('--emb_dir',
            '../treelstm/data',
            help='The initial embedding file name for cws')
    add_arg('--wemb1_dropout_rate',
            0.0,
            help='Dropout rate for the input embedding layer')
    add_arg('--lstm_dropout_rate',
            0.0,
            help='Dropout rate for the lstm output embedding layer')
    add_arg('--representation', 'charpos', help='Use which representation')
    add_arg('--fine_tuning', True)
    add_arg('--feature_thresh', 0)
    add_arg('--graph', False)
    add_arg('--weighted', False)
    add_arg('--add', False)
    add_arg('--print_prediction', True)
    add_arg('--factor_set', 3)
    add_arg('--additional', False)
    ## Task
    add_arg('--task', 'news_cws')
    add_arg('--oovthresh',
            0,
            help="The minimum count (upto and including) OOV threshold for NER"
            )  # Maybe 1 ?
    ## Training
    add_arg_to_L(TRAIN_PARAM, '--cost_coef', 0.0)
    add_arg_to_L(TRAIN_PARAM, '--sample_coef', 0.0)
    add_arg_to_L(TRAIN_PARAM, '--batch_size', 1)
    add_arg_to_L(TRAIN_PARAM, '--train_mode', 'alternative')
    add_arg_to_L(TRAIN_PARAM, '--lr', 0.01)
    add_arg_to_L(TRAIN_PARAM, '--dg_lr', 0.005)
    add_arg_to_L(TRAIN_PARAM, '--dv_lr', 0.005)
    add_arg_to_L(TRAIN_PARAM, '--dgv_lr', 0.005)
    add_arg_to_L(TRAIN_PARAM, '--nepochs', 30)
    add_arg_to_L(TRAIN_PARAM, '--optimizer', 'sgd', help='sgd or adadelta')
    add_arg_to_L(TRAIN_PARAM, '--seed', 1)  #int(random.getrandbits(10)))
    add_arg_to_L(TRAIN_PARAM,
                 '--decay',
                 True,
                 help='whether learning rate decay')
    add_arg_to_L(TRAIN_PARAM, '--decay_epochs', 5)
    add_arg_to_L(TRAIN_PARAM, '--minimum_lr', 1e-5)
    ## Topology
    add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim', -1)
    add_arg_to_L(TOPO_PARAM, '--crf_viterbi', False)
    add_arg_to_L(TOPO_PARAM, '--lstm_win_size', 5)
    add_arg_to_L(TOPO_PARAM, '--wemb1_out_dim', 300)
    add_arg_to_L(TOPO_PARAM, '--lstm_out_dim', 150)
    add_arg_to_L(TOPO_PARAM, '--CNN_out_dim', 500)
    add_arg_to_L(TOPO_PARAM, '--lstm_type_dim', 50)
    add_arg_to_L(TOPO_PARAM, '--MLP_hidden_out_dim', 1000)
    add_arg_to_L(TOPO_PARAM, '--MLP_activation_fn', 'tanh')
    add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight', 0.0)
    add_arg_to_L(TOPO_PARAM, '--win_l', 0)
    add_arg_to_L(TOPO_PARAM, '--win_r', 0)
    ## DEBUG
    add_arg('--verbose', 2)

    return _arg_parser