コード例 #1
0
ファイル: serve.py プロジェクト: Asteur/NervanaNlpApch
    """
    service = Service(service_name)
    api.req_options.auto_parse_form_urlencoded = True
    api.add_route('/{}'.format(service_name), service)
    # api.router_options
    path = os.path.abspath(
        os.path.join(os.path.dirname(__file__),
                     "web_service/visualizer/displacy"))
    api.add_static_route('/{}'.format(service_name), path)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--name',
                        help="the name of the service you want to upload",
                        type=str,
                        required=True,
                        action=check_size(1, 30))
    args = parser.parse_args()
    app = application = falcon.API(middleware=[MultipartMiddleware()])
    if not is_valid_input(args.name):
        logger.error('ERROR: Invalid argument input for the server.')
        sys.exit(0)
    # init and load service
    set_server_properties(app, args.name)
    # run server:
    port = 8080
    server = make_server('0.0.0.0', port, app)
    print('starting the server at port {0}'.format(port))
    server.serve_forever()
コード例 #2
0
    # load existing model
    mlp_clf.load(args.model)

    results = mlp_clf.get_outputs(test_set['X'])

    return results


if __name__ == "__main__":
    # parse the command line arguments
    parser = argparse.ArgumentParser()

    parser.add_argument('--max_num_of_senses_to_search',
                        default=3,
                        type=int,
                        action=check_size(0, 100),
                        help='maximum number of senses that are tests')
    parser.add_argument('--input_inference_examples_file',
                        type=validate_existing_filepath,
                        default='data/input_inference_examples.csv',
                        help='input_data_file')
    parser.add_argument(
        '--model',
        default='data/mcs_model.h5',
        type=validate_existing_filepath,
        help='path to the file where the trained model has been stored')
    parser.add_argument(
        '--word_embedding_model_file',
        type=validate_existing_filepath,
        default='pretrained_models/GoogleNews-vectors-negative300.bin',
        help='path to the word embedding\'s model')
コード例 #3
0
ファイル: train.py プロジェクト: xiaming9880/nlp-architect
import logging
import sys

from nlp_architect.models.np2vec import NP2vec
from nlp_architect.utils.io import check_size, validate_existing_filepath

logger = logging.getLogger(__name__)


if __name__ == "__main__":
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument(
        '--corpus',
        default='train.txt',
        type=str,
        action=check_size(min_size=1),
        help='path to the corpus. By default, '
             'it is the training set of CONLL2000 shared task dataset.')
    arg_parser.add_argument(
        '--corpus_format',
        default='conll2000',
        type=str,
        choices=[
            'json',
            'txt',
            'conll2000'],
        help='format of the input marked corpus; txt, conll2000 and json formats are supported. '
        'For json format, the file should contain an iterable of sentences. '
        'Each sentence is a list of terms (unicode strings) that will be used for training.')
    arg_parser.add_argument(
        '--mark_char',
コード例 #4
0
        help=
        'boolean indicating whether the model to load has been stored in binary format.',
        action='store_true')
    arg_parser.add_argument(
        '--word_ngrams',
        default=0,
        type=int,
        choices=[0, 1],
        help=
        'If 0, the model to load stores word information. If 1, the model to load stores '
        'subword (ngrams) information; note that subword information is relevant only to '
        'fasttext models.')
    arg_parser.add_argument('--topn',
                            default=500,
                            type=int,
                            action=check_size(min_size=1),
                            help='maximal number of expanded terms to return')
    arg_parser.add_argument('--grouping',
                            action='store_true',
                            default=False,
                            help='grouping mode')

    args = arg_parser.parse_args()

    se = SetExpand(np2vec_model_file=args.np2vec_model_file,
                   binary=args.binary,
                   word_ngrams=args.word_ngrams,
                   grouping=args.grouping)
    enter_seed_str = 'Enter the seed (comma-separated seed terms):'
    logger.info(enter_seed_str)
    for seed_str in sys.stdin:
コード例 #5
0
    num_iterations = int(n_train * n_epochs * 1.0 / batch_size)
    results_dir = os.path.abspath(args.results_dir)

    adding_dataset = Adding(seq_len=seq_len, n_train=n_train, n_test=n_val)

    model = TCNForAdding(seq_len, n_features, hidden_sizes, kernel_size=kernel_size,
                         dropout=dropout)

    model.build_train_graph(args.lr, max_gradient_norm=args.grad_clip_value)

    model.run(adding_dataset, num_iterations=num_iterations, log_interval=args.log_interval,
              result_dir=results_dir)


PARSER = argparse.ArgumentParser()
PARSER.add_argument('--seq_len', type=int, action=check_size(0, 1000),
                    help="Number of time points in each input sequence",
                    default=200)
PARSER.add_argument('--log_interval', type=int, default=100, action=check_size(0, 10000),
                    help="frequency, in number of iterations, after which loss is evaluated")
PARSER.add_argument('--results_dir', type=validate_parent_exists,
                    help="Directory to write results to", default=os.path.expanduser('~/results'))
PARSER.add_argument('--dropout', type=float, default=0.0, action=check_size(0, 1),
                    help='dropout applied to layers, between 0 and 1 (default: 0.0)')
PARSER.add_argument('--ksize', type=int, default=6, action=check_size(0, 10),
                    help='kernel size (default: 6)')
PARSER.add_argument('--levels', type=int, default=7, action=check_size(0, 10),
                    help='# of levels (default: 7)')
PARSER.add_argument('--lr', type=float, default=2e-3, action=check_size(0, 1),
                    help='initial learning rate (default: 2e-3)')
PARSER.add_argument('--nhid', type=int, default=27, action=check_size(0, 1000),
コード例 #6
0
ファイル: train.py プロジェクト: neuroph12/intel_nlp
from nlp_architect.models.crossling_emb import WordTranslator
from nlp_architect.utils.io import validate_existing_directory, validate_parent_exists, check_size

if __name__ == "__main__":

    print("\t\t" + 40 * "=")
    print("\t\t= Unsupervised Crosslingual Embeddings =")
    print("\t\t" + 40 * "=")

    # Parsing arguments for model parameters
    parser = argparse.ArgumentParser()
    parser.add_argument("--emb_dim",
                        type=int,
                        default=300,
                        help="Embedding Dimensions",
                        action=check_size(1, 1024))
    parser.add_argument("--vocab_size",
                        type=int,
                        default=200000,
                        help="Vocabulary Size",
                        action=check_size(1, 1000000))
    parser.add_argument("--lr",
                        type=float,
                        default=0.1,
                        help="Learning Rate",
                        action=check_size(0.00001, 2.0))
    parser.add_argument("--beta",
                        type=float,
                        default=0.001,
                        help="Beta for W orthogornaliztion",
                        action=check_size(0.0000001, 5.0))
コード例 #7
0
                                verbose=1, validation_data=(X_test, Y_test), callbacks=[es])
    lstm_acc = model_hist.history['val_acc'][-1]
    print("LSTM model accuracy ", lstm_acc)
    # This minimizes, so the maximize we have to take the inverse :)
    return 1 - lstm_acc


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--file_path', type=validate_existing_filepath, default='./',
                        help='file_path where the files to parse are located')
    parser.add_argument('--data_type', type=str, default='amazon',
                        choices=['amazon'])
    parser.add_argument('--output_file', type=validate_parent_exists, default='./opt_trials.pkl',
                        help='file_path where the output of the trials will be located')
    parser.add_argument('--new_trials', type=int, default=20, action=check_size(1, 20000))
    args_in = parser.parse_args()

    # Check inputs
    if args_in.file_path:
        validate_existing_filepath(args_in.file_path)
    if args_in.output_file:
        validate_parent_exists(args_in.output_file)

    if args_in.data_type == 'amazon':
        data_in = Amazon_Reviews(args_in.file_path)

    try:
        if args_in.output_file.endswith('.pkl'):
            with open(args_in.output_file, 'rb') as read_f:
                trials_to_keep = pickle.load(read_f)
コード例 #8
0
ファイル: train_model.py プロジェクト: cdj0311/nlp-architect
parser.add_argument(
    '--use_oov',
    default=False,
    action='store_true',
    help='use OOV test set')
parser.add_argument(
    '--lr',
    type=float,
    default=0.001,
    help='learning rate')
parser.add_argument(
    '--grad_clip_norm',
    type=float,
    default=40.0,
    help='Clip gradients such that norm is below this value.',
    action=check_size(0,100))
parser.add_argument(
    '--eps',
    type=float,
    default=1e-8,
    help='epsilon used to avoid divide by zero in softmax renormalization.',
    action=check_size(1e-100,1e-2))
parser.add_argument(
    '--save_log',
    action='store_true',
    default=False,
    help='Save evaluation results to log file.')
parser.add_argument(
    '--log_file',
    type=str,
    default='memn2n_dialgoue_results.txt',
コード例 #9
0
import argparse
import tensorflow as tf
from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists

# Parse the command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--data_path',
                    default='data',
                    type=validate_existing_directory,
                    help='enter path for training data')

parser.add_argument('--gpu_id',
                    default="0",
                    type=str,
                    help='enter gpu id',
                    action=check_size(0, 8))

parser.add_argument('--max_para_req',
                    default=300,
                    type=int,
                    help='enter the max length of paragraph',
                    action=check_size(30, 300))

parser.add_argument('--epochs',
                    default=15,
                    type=int,
                    help='enter the number of epochs',
                    action=check_size(1, 30))

parser.add_argument('--select_device',
                    default='GPU',
コード例 #10
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('--gold_standard_file',
                        default='data/goldStd.csv',
                        type=validate_existing_filepath,
                        help='path to gold standard file')
    parser.add_argument(
        '--word_embedding_model_file',
        type=validate_existing_filepath,
        default='pretrained_models/GoogleNews-vectors-negative300.bin',
        help='path to the word embedding\'s model')
    parser.add_argument('--training_to_validation_size_ratio',
                        default=0.8,
                        type=float,
                        action=check_size(0, 1),
                        help='ratio between training and validation size')
    parser.add_argument(
        '--data_set_file',
        default='data/data_set.pkl',
        type=validate_parent_exists,
        help='path the file where the train, valid and test sets will be stored'
    )

    args = parser.parse_args()
    # training set
    X_train = []
    y_train = []

    # validation set
    X_valid = []
コード例 #11
0
                         hidden_sizes,
                         kernel_size=kernel_size,
                         dropout=dropout)

    model.build_train_graph(args.lr, max_gradient_norm=args.grad_clip_value)

    model.run(adding_dataset,
              num_iterations=num_iterations,
              log_interval=args.log_interval,
              result_dir=results_dir)


PARSER = argparse.ArgumentParser()
PARSER.add_argument('--seq_len',
                    type=int,
                    action=check_size(0, 1000),
                    help="Number of time points in each input sequence",
                    default=200)
PARSER.add_argument(
    '--log_interval',
    type=int,
    default=100,
    action=check_size(0, 10000),
    help="frequency, in number of iterations, after which loss is evaluated")
PARSER.add_argument('--results_dir',
                    type=validate_parent_exists,
                    help="Directory to write results to",
                    default=os.path.expanduser('~/results'))
PARSER.add_argument(
    '--dropout',
    type=float,
コード例 #12
0
from ngraph.frontends.neon import Saver
import ngraph.transformers as ngt

from nlp_architect.models.kvmemn2n import KVMemN2N
from nlp_architect.data.wikimovies import WIKIMOVIES
from nlp_architect.utils.io import validate_parent_exists, check_size
from examples.kvmemn2n.interactive_util import interactive_loop

# parse the command line arguments
parser = NgraphArgparser(__doc__)
parser.add_argument(
    '--emb_size',
    type=int,
    default='50',
    help='Size of the word-embedding used in the model. (default 50)',
    action=check_size(1, 20000))
parser.add_argument('--nhops',
                    type=int,
                    default='3',
                    help='Number of memory hops in the network',
                    action=check_size(1, 20))
parser.add_argument('--lr',
                    type=float,
                    default=0.01,
                    help='learning rate',
                    action=check_size(0, 5))
parser.add_argument('--subset',
                    type=str,
                    default='wiki-entities',
                    choices=['full', 'wiki-entities'],
                    help='wikiMovies dataset to use for training examples.')
コード例 #13
0
ファイル: train.py プロジェクト: yyzreal/nlp-architect
    create_data_dict,
)

from .matchlstm_ansptr import MatchLSTMAnswerPointer
from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists

# Parse the command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
    "--data_path",
    default="data",
    type=validate_existing_directory,
    help="enter path for training data",
)

parser.add_argument("--gpu_id", default="0", type=str, help="enter gpu id", action=check_size(0, 8))

parser.add_argument(
    "--max_para_req",
    default=300,
    type=int,
    help="enter the max length of paragraph",
    action=check_size(30, 300),
)

parser.add_argument(
    "--epochs", default=15, type=int, help="enter the number of epochs", action=check_size(1, 30)
)

parser.add_argument(
    "--select_device",
コード例 #14
0
ファイル: prepare_data.py プロジェクト: xcgfth/nlp-architect
    return target_word_vec1


if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.add_argument('--gold_standard_file', default='data/goldStd.csv',
                        type=validate_existing_filepath,
                        help='path to gold standard file')
    parser.add_argument('--word_embedding_model_file',
                        type=validate_existing_filepath,
                        default='pretrained_models/GoogleNews-vectors-negative300.bin',
                        help='path to the word embedding\'s model')
    parser.add_argument('--training_to_validation_size_ratio', default=0.8, type=float,
                        action=check_size(0, 1), help='ratio between training and validation size')
    parser.add_argument('--data_set_file', default='data/data_set.pkl',
                        type=validate_parent_exists,
                        help='path the file where the train, valid and test sets will be stored')

    args = parser.parse_args()
    # training set
    X_train = []
    y_train = []

    # validation set
    X_valid = []
    y_valid = []

    # 1. read GS file
    [target_word_vec, definition_vec, hypernym_vec, label_vec] = \
コード例 #15
0
    print("Ensembling with weights: ")
    for na in norm_accuracies:
        print(na)
    ensembled_predictions = simple_ensembler([lstm_predictions, one_hot_cnn_predictions],
                                             norm_accuracies)
    final_preds = np.argmax(ensembled_predictions, axis=1)

    # Get the final accuracy
    print(classification_report(np.argmax(Y_test, axis=1), final_preds,
                                target_names=data.labels_0.columns.values))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--file_path', type=str, default='./',
                        help='file_path where the files to parse are located')
    parser.add_argument('--data_type', type=str, default='amazon',
                        choices=['amazon'],
                        help='dataset source')
    parser.add_argument('--epochs', type=int, default=10,
                        help='Number of epochs for both models', action=check_size(1, 20000))
    args_in = parser.parse_args()

    # Check file path
    if args_in.file_path:
        validate_existing_filepath(args_in.file_path)

    if args_in.data_type == 'amazon':
        data_in = Amazon_Reviews(args_in.file_path)
    ensemble_models(data_in, args_in)
コード例 #16
0
def clean_group(phrase_group):
    """
   Returns the shortest element in a group of phrases

    Args:
        phrase_group (String): a group of phrases separated by ';'

    Returns:
        The shortest phrase in the group (String)
    """
    text = [x.lstrip() for x in phrase_group.split(';')]
    return min(text, key=len)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog='trend_analysis.py')
    parser.add_argument('target_topics', metavar='target_topics', type=validate_existing_filepath,
                        help='a path to a csv topic-list extracted from the '
                             'target corpus')
    parser.add_argument('ref_topics', metavar='ref_topics', type=validate_existing_filepath,
                        help='a path to a csv topic-list extracted from the '
                             'reference corpus')
    parser.add_argument('--top_n', type=int, action=check_size(0, 100000), default=10000,
                        help='compare only top N topics (default: 10000)')
    parser.add_argument('--top_vectors', type=int, action=check_size(0, 100000), default=500,
                        help='include only top N vectors in the scatter graph (default: 500)')
    args = parser.parse_args()
    analyze(args.target_topics, args.ref_topics, args.target_topics,
            args.ref_topics, args.top_n, args.top_vectors)
コード例 #17
0
ファイル: expand_server.py プロジェクト: neuroph12/intel_nlp
                np_list.append(np)
        logger.info("np_list=%s", str(np_list))
        return se.similarity(np_list, seed, args.similarity)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog='expand_server.py')
    parser.add_argument('model_path',
                        metavar='model_path',
                        type=validate_existing_filepath,
                        help='a path to the w2v model file')
    parser.add_argument('--host',
                        type=str,
                        default='localhost',
                        help='set port for the server',
                        action=check_size(1, 20))
    parser.add_argument('--port',
                        type=int,
                        default=1234,
                        help='set port for the server',
                        action=check_size(0, 65535))
    parser.add_argument('--grouping',
                        action='store_true',
                        default=False,
                        help='grouping mode')
    parser.add_argument('--similarity',
                        default=0.5,
                        type=float,
                        action=check_size(0, 1),
                        help='similarity threshold')
    parser.add_argument(
コード例 #18
0
ファイル: serve.py プロジェクト: cdj0311/nlp-architect
    Args:
        api (:obj:`falcon.api`): the Falcon API
        service_name (str): the name of the service to init and load
    """
    service = Service(service_name)
    api.req_options.auto_parse_form_urlencoded = True
    api.add_route('/{}'.format(service_name), service)
    # api.router_options
    path = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                        "web_service/visualizer/displacy"))
    api.add_static_route('/{}'.format(service_name), path)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--name', help="the name of the service you want to upload", type=str,
                        required=True, action=check_size(1, 30))
    args = parser.parse_args()
    app = application = falcon.API(middleware=[MultipartMiddleware()])
    if not is_valid_input(args.name):
        logger.error('ERROR: Invalid argument input for the server.')
        sys.exit(0)
    # init and load service
    set_server_properties(app, args.name)
    # run server:
    port = 8080
    server = make_server('0.0.0.0', port, app)
    print('starting the server at port {0}'.format(port))
    server.serve_forever()
コード例 #19
0
ファイル: prepare_data.py プロジェクト: cdj0311/nlp-architect
    return target_word_vec1


if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.add_argument('--gold_standard_file', default='data/goldStd.csv',
                        type=validate_existing_filepath,
                        help='path to gold standard file')
    parser.add_argument('--word_embedding_model_file',
                        type=validate_existing_filepath,
                        default='pretrained_models/GoogleNews-vectors-negative300.bin',
                        help='path to the word embedding\'s model')
    parser.add_argument('--training_to_validation_size_ratio', default=0.8, type=float,
                        action=check_size(0, 1), help='ratio between training and validation size')
    parser.add_argument('--data_set_file', default='data/data_set.pkl',
                        type=validate_parent_exists,
                        help='path the file where the train, valid and test sets will be stored')

    args = parser.parse_args()
    # training set
    X_train = []
    y_train = []

    # validation set
    X_valid = []
    y_valid = []

    # 1. read GS file
    [target_word_vec, definition_vec, hypernym_vec, label_vec] = \
コード例 #20
0
ファイル: train.py プロジェクト: cdj0311/nlp-architect
from nlp_architect.utils.io import validate, validate_existing_directory, \
    validate_existing_filepath, validate_parent_exists, check_size


"""
Training script for reading comprehension model

"""
# parse the command line arguments
parser = NgraphArgparser(__doc__)

parser.add_argument('--data_path', help='enter path for training data',
                    type=str)

parser.add_argument('--gpu_id', default="0", help='enter gpu id',
                    type=str,action=check_size(0,10))

parser.add_argument('--max_para_req', default=100, help='enter the max length of paragraph',
                    type=int, action=check_size(30,300))

parser.add_argument('--batch_size_squad',default=16, help='enter the batch size',
                    type=int, action=check_size(1,256))

parser.set_defaults()

args = parser.parse_args()
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

hidden_size = 150
gradient_clip_value = 15
embed_size = 300
コード例 #21
0
ファイル: train.py プロジェクト: tony32769/nlp-architect
import os
import numpy as np
from nlp_architect.utils.mrc_utils import (
    create_squad_training, max_values_squad, get_data_array_squad, create_data_dict)
from nlp_architect.models.matchlstm_ansptr import MatchLSTM_AnswerPointer
import argparse
import tensorflow as tf
from nlp_architect.utils.io import validate_existing_directory, check_size, validate_parent_exists

# Parse the command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', default='data', type=validate_existing_directory,
                    help='enter path for training data')

parser.add_argument('--gpu_id', default="0", type=str,
                    help='enter gpu id', action=check_size(0, 8))

parser.add_argument('--max_para_req', default=300, type=int,
                    help='enter the max length of paragraph', action=check_size(30, 300))

parser.add_argument('--epochs', default=15, type=int,
                    help='enter the number of epochs', action=check_size(1, 30))

parser.add_argument('--select_device', default='GPU', type=str,
                    help='enter the device to execute on', action=check_size(3, 9))

parser.add_argument('--train_set_size', default=None, type=int,
                    help='enter the size of the training set', action=check_size(200, 90000))

parser.add_argument('--hidden_size', default=150, type=int,
                    help='enter the number of hidden units', action=check_size(30, 300))
コード例 #22
0
ファイル: inference.py プロジェクト: Asteur/NervanaNlpApch
        help='boolean indicating whether the model to load has been stored in binary '
        'format.',
        action='store_true')
    arg_parser.add_argument(
        '--word_ngrams',
        default=0,
        type=int,
        choices=[0, 1],
        help='If 0, the model to load stores word information. If 1, the model to load stores '
        'subword (ngrams) information; note that subword information is relevant only to '
        'fasttext models.')
    arg_parser.add_argument(
        '--mark_char',
        default='_',
        type=str,
        action=check_size(1, 2),
        help='special character that marks word separator and NP suffix.')
    arg_parser.add_argument(
        '--np',
        default='Intel Corp.',
        type=str,
        action=check_size(min=1),
        help='NP to print its word vector.')

    args = arg_parser.parse_args()

    np2vec_model = NP2vec.load(
        args.np2vec_model_file,
        binary=args.binary,
        word_ngrams=args.word_ngrams)
コード例 #23
0
                              target_names=data.labels_0.columns.values))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--file_path",
                        type=str,
                        default="./",
                        help="file_path where the files to parse are located")
    parser.add_argument("--data_type",
                        type=str,
                        default="amazon",
                        choices=["amazon"],
                        help="dataset source")
    parser.add_argument(
        "--epochs",
        type=int,
        default=10,
        help="Number of epochs for both models",
        action=check_size(1, 20000),
    )
    args_in = parser.parse_args()

    # Check file path
    if args_in.file_path:
        validate_existing_filepath(args_in.file_path)

    if args_in.data_type == "amazon":
        data_in = Amazon_Reviews(args_in.file_path)
    ensemble_models(data_in, args_in)
コード例 #24
0
ファイル: wikiwindows.py プロジェクト: cdj0311/nlp-architect
12 1:puppet 1:sequences 1:were done by <NULL> Paska . 1:puppet 1:sequences
    1:were   Roman
13 1:done 1:by 1:Roman Paska . <NULL> for the 1:done 1:by 1:Roman   Music

***** Misc *****
Notice that you don't get anything from the third line of data.txt, as there
are no recognized entities. The final ngram in each output line is preceded
by a tab in the actual output, though it's hard to discern above.
'''
parser = argparse.ArgumentParser(
    description='Generates windowed examples for wikipedia files. By default,' +
    ' creates pairs of window<TAB>entity when used with entities.'
)
parser.add_argument('data_dir', type=str, help='name of root directory for files')
parser.add_argument('-n', type=int, help='Max number of examples to process.',
                    action=check_size(1, 100000000))
parser.add_argument('-e', '--entities', type=str,
                    help='entities file (each line specifies ngrams to always chunk together)')
parser.add_argument('-a', '--all_windows', action='store_true',
                    help='if set, keeps all windows (not just ones entities). defaults to ' +
                    ' True if entities file not present, False if it is present.')
parser.add_argument('-m', '--movie_in_all', action='store_true',
                    help='if set, prepends movie to every line in example')
parser.add_argument('-i', '--inverse', action='store_true',
                    help='if set, also write "inversed" version of each fact to the kb')
parser.add_argument('-r', '--replace_centroids', action='store_true',
                    help='specifies whether to remove the center words of windows from ' +
                    'their windows (defaults false, if true replaces word with <NULL>)')
parser.add_argument('-dm', '--dontmerge', action='store_true',
                    help='default behavior merges lines from the same example--set this flag ' +
                    'to disable and only consider windows from the same line in the file')
コード例 #25
0
     metavar="target_topics",
     type=validate_existing_filepath,
     help="a path to a csv topic-list extracted from the "
     "target corpus",
 )
 parser.add_argument(
     "ref_topics",
     metavar="ref_topics",
     type=validate_existing_filepath,
     help="a path to a csv topic-list extracted from the "
     "reference corpus",
 )
 parser.add_argument(
     "--top_n",
     type=int,
     action=check_size(0, 100000),
     default=10000,
     help="compare only top N topics (default: 10000)",
 )
 parser.add_argument(
     "--top_vectors",
     type=int,
     action=check_size(0, 100000),
     default=500,
     help="include only top N vectors in the scatter graph (default: 500)",
 )
 args = parser.parse_args()
 analyze(
     args.target_topics,
     args.ref_topics,
     args.target_topics,
コード例 #26
0
ファイル: inference.py プロジェクト: neuroph12/intel_nlp
if __name__ == '__main__':
    # read input args and validate
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--input_file',
        type=validate_existing_filepath,
        required=True,
        help='Input texts file path (samples to pass for inference)')
    parser.add_argument('--model_name',
                        default='chunker_model',
                        type=str,
                        required=True,
                        help='Model name (used for saving the model)')
    parser.add_argument('-b',
                        type=int,
                        action=check_size(1, 9999),
                        default=1,
                        help='inference batch size')
    args = parser.parse_args()
    model_path = path.join(path.dirname(path.realpath(__file__)),
                           '{}.h5'.format(str(args.model_name)))
    settings_path = path.join(path.dirname(path.realpath(__file__)),
                              '{}.params'.format(str(args.model_name)))
    validate_existing_filepath(model_path)
    validate_existing_filepath(settings_path)

    # load model and parameters
    model = SequenceChunker()
    model.load(model_path)
    word_length = model.max_word_len
    with open(settings_path, 'rb') as fp:
コード例 #27
0
            args.lr, num_iterations=num_iterations, log_interval=n_per_epoch,
            result_dir=results_dir, ckpt=None)
    else:
        sequences = model.run_inference(args.ckpt, num_samples=args.num_samples,
                                        sos=ptb_dict.sos_symbol, eos=ptb_dict.eos_symbol)
        for seq in sequences:
            sentence = []
            for idx in seq:
                while idx == ptb_dict.sos_symbol:
                    continue
                sentence.append(ptb_dict.idx2word[idx])
            print(" ".join(sentence) + "\n")


PARSER = argparse.ArgumentParser()
PARSER.add_argument('--seq_len', type=int, action=check_size(0, 1000),
                    help="Number of time points in each input sequence",
                    default=60)
PARSER.add_argument('--results_dir', type=validate_parent_exists,
                    help="Directory to write results to",
                    default=os.path.expanduser('~/results'))
PARSER.add_argument('--dropout', type=float, default=0.45, action=check_size(0, 1),
                    help='dropout applied to layers, value in [0, 1] (default: 0.45)')
PARSER.add_argument('--ksize', type=int, default=3, action=check_size(0, 10),
                    help='kernel size (default: 3)')
PARSER.add_argument('--levels', type=int, default=4, action=check_size(0, 10),
                    help='# of levels (default: 4)')
PARSER.add_argument('--lr', type=float, default=4, action=check_size(0, 100),
                    help='initial learning rate (default: 4)')
PARSER.add_argument('--nhid', type=int, default=600, action=check_size(0, 1000),
                    help='number of hidden units per layer (default: 600)')
コード例 #28
0
***** Misc *****
Notice that you don't get anything from the third line of data.txt, as there
are no recognized entities. The final ngram in each output line is preceded
by a tab in the actual output, though it's hard to discern above.
'''
parser = argparse.ArgumentParser(
    description='Generates windowed examples for wikipedia files. By default,'
    + ' creates pairs of window<TAB>entity when used with entities.')
parser.add_argument('data_dir',
                    type=str,
                    help='name of root directory for files')
parser.add_argument('-n',
                    type=int,
                    help='Max number of examples to process.',
                    action=check_size(1, 100000000))
parser.add_argument(
    '-e',
    '--entities',
    type=str,
    help='entities file (each line specifies ngrams to always chunk together)')
parser.add_argument(
    '-a',
    '--all_windows',
    action='store_true',
    help='if set, keeps all windows (not just ones entities). defaults to ' +
    ' True if entities file not present, False if it is present.')
parser.add_argument('-m',
                    '--movie_in_all',
                    action='store_true',
                    help='if set, prepends movie to every line in example')
コード例 #29
0
        '--corpus',
        help='path to the input corpus. Compressed files (gz) are also supported. By default, '
             'it is a subset of English Wikipedia. '
             'get subset of English wikipedia from '
             'https://github.com/NervanaSystems/nlp-architect/raw/'
             'master/datasets/wikipedia/enwiki-20171201_subset.txt.gz')
    arg_parser.add_argument(
        '--marked_corpus',
        default='enwiki-20171201_subset_marked.txt',
        type=validate_parent_exists,
        help='path to the marked corpus corpus.')
    arg_parser.add_argument(
        '--mark_char',
        default='_',
        type=str,
        action=check_size(1, 2),
        help='special character that marks NP\'s in the corpus (word separator and NP suffix). '
             'Default value is _.')
    arg_parser.add_argument(
        '--grouping',
        action='store_true',
        default=False,
        help='perform noun-phrase grouping')
    arg_parser.add_argument(
        '--chunker', type=str,
        choices=['spacy', 'nlp_arch'],
        default='spacy',
        help='chunker to use for detecting noun phrases. \'spacy\' for using spacy built-in '
             'chunker or \'nlp_arch\' for NLP Architect NP Extractor')

    args = arg_parser.parse_args()
コード例 #30
0
ファイル: train.py プロジェクト: Asteur/NervanaNlpApch
"""
Training script for reading comprehension model

"""
# parse the command line arguments
parser = NgraphArgparser(__doc__)

parser.add_argument('--data_path',
                    help='enter path for training data',
                    type=str)

parser.add_argument('--gpu_id',
                    default="0",
                    help='enter gpu id',
                    type=str,
                    action=check_size(0, 10))

parser.add_argument('--max_para_req',
                    default=100,
                    help='enter the max length of paragraph',
                    type=int,
                    action=check_size(30, 300))

parser.add_argument('--batch_size_squad',
                    default=16,
                    help='enter the batch size',
                    type=int,
                    action=check_size(1, 256))

parser.set_defaults()