def create_arg_parser(args=None): _arg_parser = argparse.ArgumentParser(description='CRF') add_arg.arg_parser = _arg_parser ## File IO add_arg('--only_test', False, help='Only do the test') add_arg('--save_model_param', 'best-parameters', help='The best model will be saved there') add_arg('--training_data', 'train-weiboner.crfsuite.txt', help='training file name') add_arg('--valid_data', 'dev-weiboner.crfsuite.txt', help='develop file name') add_arg('--test_data', 'test-weiboner.crfsuite.txt', help='test file name') add_arg('--output_dir', '/export/projects/npeng/weiboNER_data/', help='the output dir that stores the prediction') add_arg( '--eval_test', True, help= 'Whether evaluate the test data: test data may not have annotations.') add_arg('--emb_type', 'char', help='The embedding type, choose from (char, word, charpos)') add_arg('--emb_file', '/export/projects/npeng/weiboNER_data/weibo_char_vectors', help='The initial embedding file name') add_arg('--emb_init', 'RANDOM', help='The initial embedding type for cws') add_arg( '--ner_feature_thresh', 0, help="The minimum count (upto and including) OOV threshold for NER") ## Training add_arg_to_L(TRAIN_PARAM, '--use_features', True) add_arg_to_L(TRAIN_PARAM, '--lr', 0.05) add_arg_to_L(TRAIN_PARAM, '--use_emb', True) add_arg_to_L(TRAIN_PARAM, '--fine_tuning', True) add_arg_to_L(TRAIN_PARAM, '--nepochs', 200) add_arg_to_L(TRAIN_PARAM, '--neval_epochs', 5) add_arg_to_L(TRAIN_PARAM, '--optimizer', 'sgd') add_arg_to_L(TRAIN_PARAM, '--seed', 1) add_arg_to_L(TRAIN_PARAM, '--decay', True, help='whether learning rate decay') add_arg_to_L(TRAIN_PARAM, '--decay_epochs', 10) add_arg_to_L(TRAIN_PARAM, '--minimum_lr', 1e-5) ## Topology add_arg_to_L(TOPO_PARAM, '--circuit', 'plainOrderOneCRF', help="the conbination of different models") add_arg_to_L(TOPO_PARAM, '--emb_output_transform_out_dim', 500) add_arg_to_L(TOPO_PARAM, '--wemb1_out_dim', 100) add_arg_to_L(TOPO_PARAM, '--in_dim', -1) add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim', -1) add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight', 0.0) add_arg_to_L(TOPO_PARAM, '--win', 1) ## DEBUG add_arg('--verbose', 2) return _arg_parser
def create_arg_parser(args=None): _arg_parser = argparse.ArgumentParser(description='LSTM') add_arg.arg_parser = _arg_parser ## File IO add_arg('--cws_train_path' , '.') add_arg('--cws_valid_path' , '.') add_arg('--cws_test_path' , '.') add_arg('--ner_train_path' , '.') add_arg('--ner_valid_path' , '.') add_arg('--ner_test_path' , '.') add_arg('--cws_use_features' , False) add_arg('--ner_use_features' , True) add_arg('--cws_circuit' , 'LSTMOrderOneCRF') add_arg('--ner_circuit' , 'plainOrderOneCRF') #add_arg('--cws_emb_init' , 'RANDOM', help='The initial embedding type for cws') #add_arg('--ner_emb_init' , 'FILE', help='The initial embedding type for ner') add_arg('--emb_init' , 'RANDOM', help='The initial embedding type for cws') add_arg('--emb_file' , '', help='The initial embedding file name') add_arg('--m1_wemb1_dropout_rate' , 0.2, help='Dropout rate for the input embedding layer') add_arg('--use_emb' , True, help='cws always use embeddings. so this always true. Just need to set it.') add_arg('--cws_use_emb' , True, help='cws always use embeddings. so this always true. Just need to set it.') add_arg('--ner_use_emb' , True) add_arg('--cws_fine_tuning' , True) add_arg('--ner_fine_tuning' , True) add_arg('--ner_eval_test', True, help='Whether evaluate the test data: test data may not have annotations.') add_arg('--ner_feature_thresh' , 0) ## Task add_arg('--ner_oovthresh' , 0 , help="The minimum count (upto and including) OOV threshold for NER") # Maybe 1 ? add_arg('--chunking_oovthresh', 0) add_arg('--pos_oovthresh' , 2) ## Training add_arg_to_L(TRAIN_PARAM, '--train_mode' , 'alternative' , help='possible train mode including joint, alternative, cws and ner') add_arg_to_L(TRAIN_PARAM, '--lr' , 0.01) add_arg_to_L(TRAIN_PARAM, '--cws_lr' , 0.01) add_arg_to_L(TRAIN_PARAM, '--ner_lr' , 0.05) add_arg_to_L(TRAIN_PARAM, '--sample_coef' , 10) add_arg_to_L(TRAIN_PARAM, '--nepochs' , 30) add_arg_to_L(TRAIN_PARAM, '--ner_nepochs' , 30) add_arg_to_L(TRAIN_PARAM, '--cws_nepochs' , 10) add_arg_to_L(TRAIN_PARAM, '--cws_joint_weight' , 0.1) add_arg_to_L(TRAIN_PARAM, '--optimizer' , 'sgd', help='sgd or adadelta') add_arg_to_L(TRAIN_PARAM, '--seed' , 1) #int(random.getrandbits(10))) add_arg_to_L(TRAIN_PARAM, '--decay' , True, help='whether learning rate decay') add_arg_to_L(TRAIN_PARAM, '--cws_decay_epochs' , 5) add_arg_to_L(TRAIN_PARAM, '--ner_decay_epochs' , 10) add_arg_to_L(TRAIN_PARAM, '--minimum_lr' , 1e-5) add_arg_to_L(TRAIN_PARAM, '--lower_case_input', 0) add_arg_to_L(TRAIN_PARAM, '--digit_to_zero' , 1) ## Topology add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim', -1) add_arg_to_L(TOPO_PARAM, '--crf_viterbi', False) add_arg_to_L(TOPO_PARAM, '--m1_wemb1_out_dim', 100) add_arg_to_L(TOPO_PARAM, '--m1_lstm_out_dim', 150) #add_arg_to_L(TOPO_PARAM, '--emb_output_transform_out_dim',500) #add_arg_to_L(TOPO_PARAM, '--lstm_activation_activation_fn',RELU_FN) add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight', 0.0) add_arg_to_L(TOPO_PARAM, '--cws_win_l', 0) add_arg_to_L(TOPO_PARAM, '--ner_win_l', 0) add_arg_to_L(TOPO_PARAM, '--cws_win_r', 2) add_arg_to_L(TOPO_PARAM, '--ner_win_r', 0) ## DEBUG add_arg('--verbose' , 2) add_arg('--debugtopo' , False) return _arg_parser
def create_arg_parser(args=None): _arg_parser = argparse.ArgumentParser(description='LSTM') add_arg.arg_parser = _arg_parser add_arg('--setting', 'run_single_corpus', help='running single corpus') ## File IO # For single task add_arg('--data_dir', '.') # For wild prediction add_arg('--train_path', '.') add_arg('--valid_path', '.') add_arg('--train_graph', '.') add_arg('--valid_graph', '.') add_arg('--content_file', 'sentences') add_arg('--dependent_file', 'graph_arcs') add_arg('--parameters_file', 'best_parameters') add_arg('--prediction_file', 'prediction') add_arg('--drug_gene_prediction_file', '.') add_arg('--drug_var_prediction_file', '.') add_arg('--triple_prediction_file', '.') add_arg('--num_entity', 2) add_arg('--total_fold', 10) add_arg('--dev_fold', 0) add_arg('--test_fold', 1) add_arg('--circuit', 'LSTMRelation') add_arg('--emb_dir', '../treelstm/data', help='The initial embedding file name for cws') add_arg('--wemb1_dropout_rate', 0.0, help='Dropout rate for the input embedding layer') add_arg('--lstm_dropout_rate', 0.0, help='Dropout rate for the lstm output embedding layer') add_arg('--representation', 'charpos', help='Use which representation') add_arg('--fine_tuning', True) add_arg('--feature_thresh', 0) add_arg('--graph', False) add_arg('--weighted', False) add_arg('--add', False) add_arg('--print_prediction', True) add_arg('--factor_set', 3) add_arg('--additional', False) ## Task add_arg('--task', 'news_cws') add_arg('--oovthresh', 0, help="The minimum count (upto and including) OOV threshold for NER" ) # Maybe 1 ? ## Training add_arg_to_L(TRAIN_PARAM, '--cost_coef', 0.0) add_arg_to_L(TRAIN_PARAM, '--sample_coef', 0.0) add_arg_to_L(TRAIN_PARAM, '--batch_size', 1) add_arg_to_L(TRAIN_PARAM, '--train_mode', 'alternative') add_arg_to_L(TRAIN_PARAM, '--lr', 0.01) add_arg_to_L(TRAIN_PARAM, '--dg_lr', 0.005) add_arg_to_L(TRAIN_PARAM, '--dv_lr', 0.005) add_arg_to_L(TRAIN_PARAM, '--dgv_lr', 0.005) add_arg_to_L(TRAIN_PARAM, '--nepochs', 30) add_arg_to_L(TRAIN_PARAM, '--optimizer', 'sgd', help='sgd or adadelta') add_arg_to_L(TRAIN_PARAM, '--seed', 1) #int(random.getrandbits(10))) add_arg_to_L(TRAIN_PARAM, '--decay', True, help='whether learning rate decay') add_arg_to_L(TRAIN_PARAM, '--decay_epochs', 5) add_arg_to_L(TRAIN_PARAM, '--minimum_lr', 1e-5) ## Topology add_arg_to_L(TOPO_PARAM, '--emission_trans_out_dim', -1) add_arg_to_L(TOPO_PARAM, '--crf_viterbi', False) add_arg_to_L(TOPO_PARAM, '--lstm_win_size', 5) add_arg_to_L(TOPO_PARAM, '--wemb1_out_dim', 300) add_arg_to_L(TOPO_PARAM, '--lstm_out_dim', 150) add_arg_to_L(TOPO_PARAM, '--CNN_out_dim', 500) add_arg_to_L(TOPO_PARAM, '--lstm_type_dim', 50) add_arg_to_L(TOPO_PARAM, '--MLP_hidden_out_dim', 1000) add_arg_to_L(TOPO_PARAM, '--MLP_activation_fn', 'tanh') add_arg_to_L(TOPO_PARAM, '--L2Reg_reg_weight', 0.0) add_arg_to_L(TOPO_PARAM, '--win_l', 0) add_arg_to_L(TOPO_PARAM, '--win_r', 0) ## DEBUG add_arg('--verbose', 2) return _arg_parser