""" service = Service(service_name) api.req_options.auto_parse_form_urlencoded = True api.add_route('/{}'.format(service_name), service) # api.router_options path = os.path.abspath( os.path.join(os.path.dirname(__file__), "web_service/visualizer/displacy")) api.add_static_route('/{}'.format(service_name), path) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--name', help="the name of the service you want to upload", type=str, required=True, action=check_size(1, 30)) args = parser.parse_args() app = application = falcon.API(middleware=[MultipartMiddleware()]) if not is_valid_input(args.name): logger.error('ERROR: Invalid argument input for the server.') sys.exit(0) # init and load service set_server_properties(app, args.name) # run server: port = 8080 server = make_server('0.0.0.0', port, app) print('starting the server at port {0}'.format(port)) server.serve_forever()
# load existing model mlp_clf.load(args.model) results = mlp_clf.get_outputs(test_set['X']) return results if __name__ == "__main__": # parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument('--max_num_of_senses_to_search', default=3, type=int, action=check_size(0, 100), help='maximum number of senses that are tests') parser.add_argument('--input_inference_examples_file', type=validate_existing_filepath, default='data/input_inference_examples.csv', help='input_data_file') parser.add_argument( '--model', default='data/mcs_model.h5', type=validate_existing_filepath, help='path to the file where the trained model has been stored') parser.add_argument( '--word_embedding_model_file', type=validate_existing_filepath, default='pretrained_models/GoogleNews-vectors-negative300.bin', help='path to the word embedding\'s model')
import logging import sys from nlp_architect.models.np2vec import NP2vec from nlp_architect.utils.io import check_size, validate_existing_filepath logger = logging.getLogger(__name__) if __name__ == "__main__": arg_parser = argparse.ArgumentParser() arg_parser.add_argument( '--corpus', default='train.txt', type=str, action=check_size(min_size=1), help='path to the corpus. By default, ' 'it is the training set of CONLL2000 shared task dataset.') arg_parser.add_argument( '--corpus_format', default='conll2000', type=str, choices=[ 'json', 'txt', 'conll2000'], help='format of the input marked corpus; txt, conll2000 and json formats are supported. ' 'For json format, the file should contain an iterable of sentences. ' 'Each sentence is a list of terms (unicode strings) that will be used for training.') arg_parser.add_argument( '--mark_char',
help= 'boolean indicating whether the model to load has been stored in binary format.', action='store_true') arg_parser.add_argument( '--word_ngrams', default=0, type=int, choices=[0, 1], help= 'If 0, the model to load stores word information. If 1, the model to load stores ' 'subword (ngrams) information; note that subword information is relevant only to ' 'fasttext models.') arg_parser.add_argument('--topn', default=500, type=int, action=check_size(min_size=1), help='maximal number of expanded terms to return') arg_parser.add_argument('--grouping', action='store_true', default=False, help='grouping mode') args = arg_parser.parse_args() se = SetExpand(np2vec_model_file=args.np2vec_model_file, binary=args.binary, word_ngrams=args.word_ngrams, grouping=args.grouping) enter_seed_str = 'Enter the seed (comma-separated seed terms):' logger.info(enter_seed_str) for seed_str in sys.stdin:
num_iterations = int(n_train * n_epochs * 1.0 / batch_size) results_dir = os.path.abspath(args.results_dir) adding_dataset = Adding(seq_len=seq_len, n_train=n_train, n_test=n_val) model = TCNForAdding(seq_len, n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout) model.build_train_graph(args.lr, max_gradient_norm=args.grad_clip_value) model.run(adding_dataset, num_iterations=num_iterations, log_interval=args.log_interval, result_dir=results_dir) PARSER = argparse.ArgumentParser() PARSER.add_argument('--seq_len', type=int, action=check_size(0, 1000), help="Number of time points in each input sequence", default=200) PARSER.add_argument('--log_interval', type=int, default=100, action=check_size(0, 10000), help="frequency, in number of iterations, after which loss is evaluated") PARSER.add_argument('--results_dir', type=validate_parent_exists, help="Directory to write results to", default=os.path.expanduser('~/results')) PARSER.add_argument('--dropout', type=float, default=0.0, action=check_size(0, 1), help='dropout applied to layers, between 0 and 1 (default: 0.0)') PARSER.add_argument('--ksize', type=int, default=6, action=check_size(0, 10), help='kernel size (default: 6)') PARSER.add_argument('--levels', type=int, default=7, action=check_size(0, 10), help='# of levels (default: 7)') PARSER.add_argument('--lr', type=float, default=2e-3, action=check_size(0, 1), help='initial learning rate (default: 2e-3)') PARSER.add_argument('--nhid', type=int, default=27, action=check_size(0, 1000),
from nlp_architect.models.crossling_emb import WordTranslator from nlp_architect.utils.io import validate_existing_directory, validate_parent_exists, check_size if __name__ == "__main__": print("\t\t" + 40 * "=") print("\t\t= Unsupervised Crosslingual Embeddings =") print("\t\t" + 40 * "=") # Parsing arguments for model parameters parser = argparse.ArgumentParser() parser.add_argument("--emb_dim", type=int, default=300, help="Embedding Dimensions", action=check_size(1, 1024)) parser.add_argument("--vocab_size", type=int, default=200000, help="Vocabulary Size", action=check_size(1, 1000000)) parser.add_argument("--lr", type=float, default=0.1, help="Learning Rate", action=check_size(0.00001, 2.0)) parser.add_argument("--beta", type=float, default=0.001, help="Beta for W orthogornaliztion", action=check_size(0.0000001, 5.0))
verbose=1, validation_data=(X_test, Y_test), callbacks=[es]) lstm_acc = model_hist.history['val_acc'][-1] print("LSTM model accuracy ", lstm_acc) # This minimizes, so the maximize we have to take the inverse :) return 1 - lstm_acc if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--file_path', type=validate_existing_filepath, default='./', help='file_path where the files to parse are located') parser.add_argument('--data_type', type=str, default='amazon', choices=['amazon']) parser.add_argument('--output_file', type=validate_parent_exists, default='./opt_trials.pkl', help='file_path where the output of the trials will be located') parser.add_argument('--new_trials', type=int, default=20, action=check_size(1, 20000)) args_in = parser.parse_args() # Check inputs if args_in.file_path: validate_existing_filepath(args_in.file_path) if args_in.output_file: validate_parent_exists(args_in.output_file) if args_in.data_type == 'amazon': data_in = Amazon_Reviews(args_in.file_path) try: if args_in.output_file.endswith('.pkl'): with open(args_in.output_file, 'rb') as read_f: trials_to_keep = pickle.load(read_f)
parser.add_argument( '--use_oov', default=False, action='store_true', help='use OOV test set') parser.add_argument( '--lr', type=float, default=0.001, help='learning rate') parser.add_argument( '--grad_clip_norm', type=float, default=40.0, help='Clip gradients such that norm is below this value.', action=check_size(0,100)) parser.add_argument( '--eps', type=float, default=1e-8, help='epsilon used to avoid divide by zero in softmax renormalization.', action=check_size(1e-100,1e-2)) parser.add_argument( '--save_log', action='store_true', default=False, help='Save evaluation results to log file.') parser.add_argument( '--log_file', type=str, default='memn2n_dialgoue_results.txt',
import argparse import tensorflow as tf from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists # Parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument('--data_path', default='data', type=validate_existing_directory, help='enter path for training data') parser.add_argument('--gpu_id', default="0", type=str, help='enter gpu id', action=check_size(0, 8)) parser.add_argument('--max_para_req', default=300, type=int, help='enter the max length of paragraph', action=check_size(30, 300)) parser.add_argument('--epochs', default=15, type=int, help='enter the number of epochs', action=check_size(1, 30)) parser.add_argument('--select_device', default='GPU',
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--gold_standard_file', default='data/goldStd.csv', type=validate_existing_filepath, help='path to gold standard file') parser.add_argument( '--word_embedding_model_file', type=validate_existing_filepath, default='pretrained_models/GoogleNews-vectors-negative300.bin', help='path to the word embedding\'s model') parser.add_argument('--training_to_validation_size_ratio', default=0.8, type=float, action=check_size(0, 1), help='ratio between training and validation size') parser.add_argument( '--data_set_file', default='data/data_set.pkl', type=validate_parent_exists, help='path the file where the train, valid and test sets will be stored' ) args = parser.parse_args() # training set X_train = [] y_train = [] # validation set X_valid = []
hidden_sizes, kernel_size=kernel_size, dropout=dropout) model.build_train_graph(args.lr, max_gradient_norm=args.grad_clip_value) model.run(adding_dataset, num_iterations=num_iterations, log_interval=args.log_interval, result_dir=results_dir) PARSER = argparse.ArgumentParser() PARSER.add_argument('--seq_len', type=int, action=check_size(0, 1000), help="Number of time points in each input sequence", default=200) PARSER.add_argument( '--log_interval', type=int, default=100, action=check_size(0, 10000), help="frequency, in number of iterations, after which loss is evaluated") PARSER.add_argument('--results_dir', type=validate_parent_exists, help="Directory to write results to", default=os.path.expanduser('~/results')) PARSER.add_argument( '--dropout', type=float,
from ngraph.frontends.neon import Saver import ngraph.transformers as ngt from nlp_architect.models.kvmemn2n import KVMemN2N from nlp_architect.data.wikimovies import WIKIMOVIES from nlp_architect.utils.io import validate_parent_exists, check_size from examples.kvmemn2n.interactive_util import interactive_loop # parse the command line arguments parser = NgraphArgparser(__doc__) parser.add_argument( '--emb_size', type=int, default='50', help='Size of the word-embedding used in the model. (default 50)', action=check_size(1, 20000)) parser.add_argument('--nhops', type=int, default='3', help='Number of memory hops in the network', action=check_size(1, 20)) parser.add_argument('--lr', type=float, default=0.01, help='learning rate', action=check_size(0, 5)) parser.add_argument('--subset', type=str, default='wiki-entities', choices=['full', 'wiki-entities'], help='wikiMovies dataset to use for training examples.')
create_data_dict, ) from .matchlstm_ansptr import MatchLSTMAnswerPointer from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists # Parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument( "--data_path", default="data", type=validate_existing_directory, help="enter path for training data", ) parser.add_argument("--gpu_id", default="0", type=str, help="enter gpu id", action=check_size(0, 8)) parser.add_argument( "--max_para_req", default=300, type=int, help="enter the max length of paragraph", action=check_size(30, 300), ) parser.add_argument( "--epochs", default=15, type=int, help="enter the number of epochs", action=check_size(1, 30) ) parser.add_argument( "--select_device",
return target_word_vec1 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--gold_standard_file', default='data/goldStd.csv', type=validate_existing_filepath, help='path to gold standard file') parser.add_argument('--word_embedding_model_file', type=validate_existing_filepath, default='pretrained_models/GoogleNews-vectors-negative300.bin', help='path to the word embedding\'s model') parser.add_argument('--training_to_validation_size_ratio', default=0.8, type=float, action=check_size(0, 1), help='ratio between training and validation size') parser.add_argument('--data_set_file', default='data/data_set.pkl', type=validate_parent_exists, help='path the file where the train, valid and test sets will be stored') args = parser.parse_args() # training set X_train = [] y_train = [] # validation set X_valid = [] y_valid = [] # 1. read GS file [target_word_vec, definition_vec, hypernym_vec, label_vec] = \
print("Ensembling with weights: ") for na in norm_accuracies: print(na) ensembled_predictions = simple_ensembler([lstm_predictions, one_hot_cnn_predictions], norm_accuracies) final_preds = np.argmax(ensembled_predictions, axis=1) # Get the final accuracy print(classification_report(np.argmax(Y_test, axis=1), final_preds, target_names=data.labels_0.columns.values)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--file_path', type=str, default='./', help='file_path where the files to parse are located') parser.add_argument('--data_type', type=str, default='amazon', choices=['amazon'], help='dataset source') parser.add_argument('--epochs', type=int, default=10, help='Number of epochs for both models', action=check_size(1, 20000)) args_in = parser.parse_args() # Check file path if args_in.file_path: validate_existing_filepath(args_in.file_path) if args_in.data_type == 'amazon': data_in = Amazon_Reviews(args_in.file_path) ensemble_models(data_in, args_in)
def clean_group(phrase_group): """ Returns the shortest element in a group of phrases Args: phrase_group (String): a group of phrases separated by ';' Returns: The shortest phrase in the group (String) """ text = [x.lstrip() for x in phrase_group.split(';')] return min(text, key=len) if __name__ == '__main__': parser = argparse.ArgumentParser(prog='trend_analysis.py') parser.add_argument('target_topics', metavar='target_topics', type=validate_existing_filepath, help='a path to a csv topic-list extracted from the ' 'target corpus') parser.add_argument('ref_topics', metavar='ref_topics', type=validate_existing_filepath, help='a path to a csv topic-list extracted from the ' 'reference corpus') parser.add_argument('--top_n', type=int, action=check_size(0, 100000), default=10000, help='compare only top N topics (default: 10000)') parser.add_argument('--top_vectors', type=int, action=check_size(0, 100000), default=500, help='include only top N vectors in the scatter graph (default: 500)') args = parser.parse_args() analyze(args.target_topics, args.ref_topics, args.target_topics, args.ref_topics, args.top_n, args.top_vectors)
np_list.append(np) logger.info("np_list=%s", str(np_list)) return se.similarity(np_list, seed, args.similarity) if __name__ == "__main__": parser = argparse.ArgumentParser(prog='expand_server.py') parser.add_argument('model_path', metavar='model_path', type=validate_existing_filepath, help='a path to the w2v model file') parser.add_argument('--host', type=str, default='localhost', help='set port for the server', action=check_size(1, 20)) parser.add_argument('--port', type=int, default=1234, help='set port for the server', action=check_size(0, 65535)) parser.add_argument('--grouping', action='store_true', default=False, help='grouping mode') parser.add_argument('--similarity', default=0.5, type=float, action=check_size(0, 1), help='similarity threshold') parser.add_argument(
Args: api (:obj:`falcon.api`): the Falcon API service_name (str): the name of the service to init and load """ service = Service(service_name) api.req_options.auto_parse_form_urlencoded = True api.add_route('/{}'.format(service_name), service) # api.router_options path = os.path.abspath(os.path.join(os.path.dirname(__file__), "web_service/visualizer/displacy")) api.add_static_route('/{}'.format(service_name), path) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--name', help="the name of the service you want to upload", type=str, required=True, action=check_size(1, 30)) args = parser.parse_args() app = application = falcon.API(middleware=[MultipartMiddleware()]) if not is_valid_input(args.name): logger.error('ERROR: Invalid argument input for the server.') sys.exit(0) # init and load service set_server_properties(app, args.name) # run server: port = 8080 server = make_server('0.0.0.0', port, app) print('starting the server at port {0}'.format(port)) server.serve_forever()
from nlp_architect.utils.io import validate, validate_existing_directory, \ validate_existing_filepath, validate_parent_exists, check_size """ Training script for reading comprehension model """ # parse the command line arguments parser = NgraphArgparser(__doc__) parser.add_argument('--data_path', help='enter path for training data', type=str) parser.add_argument('--gpu_id', default="0", help='enter gpu id', type=str,action=check_size(0,10)) parser.add_argument('--max_para_req', default=100, help='enter the max length of paragraph', type=int, action=check_size(30,300)) parser.add_argument('--batch_size_squad',default=16, help='enter the batch size', type=int, action=check_size(1,256)) parser.set_defaults() args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id hidden_size = 150 gradient_clip_value = 15 embed_size = 300
import os import numpy as np from nlp_architect.utils.mrc_utils import ( create_squad_training, max_values_squad, get_data_array_squad, create_data_dict) from nlp_architect.models.matchlstm_ansptr import MatchLSTM_AnswerPointer import argparse import tensorflow as tf from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists # Parse the command line arguments parser = argparse.ArgumentParser() parser.add_argument('--data_path', default='data', type=validate_existing_directory, help='enter path for training data') parser.add_argument('--gpu_id', default="0", type=str, help='enter gpu id', action=check_size(0, 8)) parser.add_argument('--max_para_req', default=300, type=int, help='enter the max length of paragraph', action=check_size(30, 300)) parser.add_argument('--epochs', default=15, type=int, help='enter the number of epochs', action=check_size(1, 30)) parser.add_argument('--select_device', default='GPU', type=str, help='enter the device to execute on', action=check_size(3, 9)) parser.add_argument('--train_set_size', default=None, type=int, help='enter the size of the training set', action=check_size(200, 90000)) parser.add_argument('--hidden_size', default=150, type=int, help='enter the number of hidden units', action=check_size(30, 300))
help='boolean indicating whether the model to load has been stored in binary ' 'format.', action='store_true') arg_parser.add_argument( '--word_ngrams', default=0, type=int, choices=[0, 1], help='If 0, the model to load stores word information. If 1, the model to load stores ' 'subword (ngrams) information; note that subword information is relevant only to ' 'fasttext models.') arg_parser.add_argument( '--mark_char', default='_', type=str, action=check_size(1, 2), help='special character that marks word separator and NP suffix.') arg_parser.add_argument( '--np', default='Intel Corp.', type=str, action=check_size(min=1), help='NP to print its word vector.') args = arg_parser.parse_args() np2vec_model = NP2vec.load( args.np2vec_model_file, binary=args.binary, word_ngrams=args.word_ngrams)
target_names=data.labels_0.columns.values)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--file_path", type=str, default="./", help="file_path where the files to parse are located") parser.add_argument("--data_type", type=str, default="amazon", choices=["amazon"], help="dataset source") parser.add_argument( "--epochs", type=int, default=10, help="Number of epochs for both models", action=check_size(1, 20000), ) args_in = parser.parse_args() # Check file path if args_in.file_path: validate_existing_filepath(args_in.file_path) if args_in.data_type == "amazon": data_in = Amazon_Reviews(args_in.file_path) ensemble_models(data_in, args_in)
12 1:puppet 1:sequences 1:were done by <NULL> Paska . 1:puppet 1:sequences 1:were Roman 13 1:done 1:by 1:Roman Paska . <NULL> for the 1:done 1:by 1:Roman Music ***** Misc ***** Notice that you don't get anything from the third line of data.txt, as there are no recognized entities. The final ngram in each output line is preceded by a tab in the actual output, though it's hard to discern above. ''' parser = argparse.ArgumentParser( description='Generates windowed examples for wikipedia files. By default,' + ' creates pairs of window<TAB>entity when used with entities.' ) parser.add_argument('data_dir', type=str, help='name of root directory for files') parser.add_argument('-n', type=int, help='Max number of examples to process.', action=check_size(1, 100000000)) parser.add_argument('-e', '--entities', type=str, help='entities file (each line specifies ngrams to always chunk together)') parser.add_argument('-a', '--all_windows', action='store_true', help='if set, keeps all windows (not just ones entities). defaults to ' + ' True if entities file not present, False if it is present.') parser.add_argument('-m', '--movie_in_all', action='store_true', help='if set, prepends movie to every line in example') parser.add_argument('-i', '--inverse', action='store_true', help='if set, also write "inversed" version of each fact to the kb') parser.add_argument('-r', '--replace_centroids', action='store_true', help='specifies whether to remove the center words of windows from ' + 'their windows (defaults false, if true replaces word with <NULL>)') parser.add_argument('-dm', '--dontmerge', action='store_true', help='default behavior merges lines from the same example--set this flag ' + 'to disable and only consider windows from the same line in the file')
metavar="target_topics", type=validate_existing_filepath, help="a path to a csv topic-list extracted from the " "target corpus", ) parser.add_argument( "ref_topics", metavar="ref_topics", type=validate_existing_filepath, help="a path to a csv topic-list extracted from the " "reference corpus", ) parser.add_argument( "--top_n", type=int, action=check_size(0, 100000), default=10000, help="compare only top N topics (default: 10000)", ) parser.add_argument( "--top_vectors", type=int, action=check_size(0, 100000), default=500, help="include only top N vectors in the scatter graph (default: 500)", ) args = parser.parse_args() analyze( args.target_topics, args.ref_topics, args.target_topics,
if __name__ == '__main__': # read input args and validate parser = argparse.ArgumentParser() parser.add_argument( '--input_file', type=validate_existing_filepath, required=True, help='Input texts file path (samples to pass for inference)') parser.add_argument('--model_name', default='chunker_model', type=str, required=True, help='Model name (used for saving the model)') parser.add_argument('-b', type=int, action=check_size(1, 9999), default=1, help='inference batch size') args = parser.parse_args() model_path = path.join(path.dirname(path.realpath(__file__)), '{}.h5'.format(str(args.model_name))) settings_path = path.join(path.dirname(path.realpath(__file__)), '{}.params'.format(str(args.model_name))) validate_existing_filepath(model_path) validate_existing_filepath(settings_path) # load model and parameters model = SequenceChunker() model.load(model_path) word_length = model.max_word_len with open(settings_path, 'rb') as fp:
args.lr, num_iterations=num_iterations, log_interval=n_per_epoch, result_dir=results_dir, ckpt=None) else: sequences = model.run_inference(args.ckpt, num_samples=args.num_samples, sos=ptb_dict.sos_symbol, eos=ptb_dict.eos_symbol) for seq in sequences: sentence = [] for idx in seq: while idx == ptb_dict.sos_symbol: continue sentence.append(ptb_dict.idx2word[idx]) print(" ".join(sentence) + "\n") PARSER = argparse.ArgumentParser() PARSER.add_argument('--seq_len', type=int, action=check_size(0, 1000), help="Number of time points in each input sequence", default=60) PARSER.add_argument('--results_dir', type=validate_parent_exists, help="Directory to write results to", default=os.path.expanduser('~/results')) PARSER.add_argument('--dropout', type=float, default=0.45, action=check_size(0, 1), help='dropout applied to layers, value in [0, 1] (default: 0.45)') PARSER.add_argument('--ksize', type=int, default=3, action=check_size(0, 10), help='kernel size (default: 3)') PARSER.add_argument('--levels', type=int, default=4, action=check_size(0, 10), help='# of levels (default: 4)') PARSER.add_argument('--lr', type=float, default=4, action=check_size(0, 100), help='initial learning rate (default: 4)') PARSER.add_argument('--nhid', type=int, default=600, action=check_size(0, 1000), help='number of hidden units per layer (default: 600)')
***** Misc ***** Notice that you don't get anything from the third line of data.txt, as there are no recognized entities. The final ngram in each output line is preceded by a tab in the actual output, though it's hard to discern above. ''' parser = argparse.ArgumentParser( description='Generates windowed examples for wikipedia files. By default,' + ' creates pairs of window<TAB>entity when used with entities.') parser.add_argument('data_dir', type=str, help='name of root directory for files') parser.add_argument('-n', type=int, help='Max number of examples to process.', action=check_size(1, 100000000)) parser.add_argument( '-e', '--entities', type=str, help='entities file (each line specifies ngrams to always chunk together)') parser.add_argument( '-a', '--all_windows', action='store_true', help='if set, keeps all windows (not just ones entities). defaults to ' + ' True if entities file not present, False if it is present.') parser.add_argument('-m', '--movie_in_all', action='store_true', help='if set, prepends movie to every line in example')
'--corpus', help='path to the input corpus. Compressed files (gz) are also supported. By default, ' 'it is a subset of English Wikipedia. ' 'get subset of English wikipedia from ' 'https://github.com/NervanaSystems/nlp-architect/raw/' 'master/datasets/wikipedia/enwiki-20171201_subset.txt.gz') arg_parser.add_argument( '--marked_corpus', default='enwiki-20171201_subset_marked.txt', type=validate_parent_exists, help='path to the marked corpus corpus.') arg_parser.add_argument( '--mark_char', default='_', type=str, action=check_size(1, 2), help='special character that marks NP\'s in the corpus (word separator and NP suffix). ' 'Default value is _.') arg_parser.add_argument( '--grouping', action='store_true', default=False, help='perform noun-phrase grouping') arg_parser.add_argument( '--chunker', type=str, choices=['spacy', 'nlp_arch'], default='spacy', help='chunker to use for detecting noun phrases. \'spacy\' for using spacy built-in ' 'chunker or \'nlp_arch\' for NLP Architect NP Extractor') args = arg_parser.parse_args()
""" Training script for reading comprehension model """ # parse the command line arguments parser = NgraphArgparser(__doc__) parser.add_argument('--data_path', help='enter path for training data', type=str) parser.add_argument('--gpu_id', default="0", help='enter gpu id', type=str, action=check_size(0, 10)) parser.add_argument('--max_para_req', default=100, help='enter the max length of paragraph', type=int, action=check_size(30, 300)) parser.add_argument('--batch_size_squad', default=16, help='enter the batch size', type=int, action=check_size(1, 256)) parser.set_defaults()