Ejemplo n.º 1
0
    parser.add_argument('--vocab',
                        help='Vocabulary file (only needed if numpy'
                        'embedding file is given)')
    parser.add_argument('-a',
                        help='Plot attention values graph',
                        dest='attention',
                        action='store_true')
    parser.add_argument('-i',
                        help='Run inference classifier',
                        dest='inference',
                        action='store_true')
    args = parser.parse_args()

    utils.config_logger(verbose=False)
    logger = utils.get_logger()
    params = ioutils.load_params(args.load)
    if args.inference:
        label_dict = ioutils.load_label_dict(args.load)
        number_to_label = {v: k for (k, v) in label_dict.items()}

    logger.info('Reading model')
    sess = tf.InteractiveSession()
    model_class = utils.get_model_class(params)
    model = model_class.load(args.load, sess)
    word_dict, embeddings = ioutils.load_embeddings(args.embeddings,
                                                    args.vocab,
                                                    generate=False,
                                                    load_extra_from=args.load,
                                                    normalize=True)
    model.initialize_embeddings(sess, embeddings)
Ejemplo n.º 2
0
                        help='JSONL or TSV file with data to evaluate on')
    parser.add_argument('embeddings', help='Numpy embeddings file')
    parser.add_argument('--vocabulary',
                        help='Text file with embeddings vocabulary')
    parser.add_argument('-v',
                        help='Verbose',
                        action='store_true',
                        dest='verbose')
    parser.add_argument('-e',
                        help='Print pairs and labels that got a wrong answer',
                        action='store_true',
                        dest='errors')
    args = parser.parse_args()

    utils.config_logger(verbose=args.verbose)
    params = ioutils.load_params(args.model)
    sess = tf.InteractiveSession()

    model_class = utils.get_model_class(params)
    model = model_class.load(args.model, sess)
    word_dict, embeddings = ioutils.load_embeddings(args.embeddings,
                                                    args.vocabulary,
                                                    generate=False,
                                                    load_extra_from=args.model,
                                                    normalize=True)
    model.initialize_embeddings(sess, embeddings)
    label_dict = ioutils.load_label_dict(args.model)
    print('Label dict[Y] : ', label_dict['Y'])
    #    pairs = ioutils.read_corpus(args.dataset, params['lowercase'],
    #           params['language'])
    #dataset = utils.create_dataset(pairs, word_dict, label_dict)
Ejemplo n.º 3
0
    # whether to generate embeddings for unknown, padding, null
    is_really_cont = args.warm != None or (args.cont and os.path.exists(
        os.path.join(args.save, "model.meta")))
    warmup_model = args.warm

    if is_really_cont:
        logger.info('Found a model. Fine-tuning...')

        word_dict, embeddings = ioutils.load_embeddings(
            args.embeddings,
            args.vocab,
            generate=False,
            normalize=True,
            load_extra_from=warmup_model)
        params = ioutils.load_params(warmup_model)

    else:
        word_dict, embeddings = ioutils.load_embeddings(args.embeddings,
                                                        args.vocab,
                                                        generate=True,
                                                        normalize=True)
        ioutils.write_params(args.save,
                             lowercase=args.lower,
                             language=args.lang,
                             model=args.model)
        ioutils.write_extra_embeddings(embeddings, args.save)

    logger.info('Converting words to indices')
    # find out which labels are there in the data
    # (more flexible to different datasets)