Ejemplo n.º 1
0
        help=
        "precomputed features for the input data, i.e. the values of \phi(x,_) without y"
    )
    optparser.add_option(
        "-e",
        "--numepochs",
        dest="numepochs",
        default=int(10),
        help=
        "number of epochs of training; in each epoch we iterate over over all the training examples"
    )
    optparser.add_option("-m",
                         "--modelfile",
                         dest="modelfile",
                         default=os.path.join("data", "default.model"),
                         help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print >> sys.stderr, "reading data ..."
    train_data = perc.read_labeled_data(opts.trainfile, opts.featfile)
    print >> sys.stderr, "done."
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)
Ejemplo n.º 2
0
from collections import defaultdict

def perc_train(train_data, tagset, numepochs):
    feat_vec = defaultdict(int)
    # insert your code here
    # please limit the number of iterations of training to n iterations
    return feat_vec

if __name__ == '__main__':
    optparser = optparse.OptionParser()
    optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)")
    optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)")
    optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y")
    optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples")
    optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print >>sys.stderr, "reading data ..."
    train_data = perc.read_labeled_data(opts.trainfile, opts.featfile)
    print >>sys.stderr, "done."
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)

Ejemplo n.º 3
0
from collections import defaultdict

def perc_train(train_data, tagset, numepochs):
    feat_vec = defaultdict(int)
    # insert your code here
    # please limit the number of iterations of training to n iterations
    return feat_vec

if __name__ == '__main__':
    optparser = optparse.OptionParser()
    optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)")
    optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)")
    optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y")
    optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples")
    optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print("reading data ...", file=sys.stderr)
    train_data = perc.read_labeled_data(opts.trainfile, opts.featfile, verbose=False)
    print("done.", file=sys.stderr)
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)

Ejemplo n.º 4
0
    )
    optparser.add_option(
        "-e",
        "--numepochs",
        dest="numepochs",
        default=int(10),
        help=
        "number of epochs of training; in each epoch we iterate over over all the training examples"
    )
    optparser.add_option("-m",
                         "--modelfile",
                         dest="modelfile",
                         default=os.path.join("data", "default.model"),
                         help="weights for all features stored on disk")
    (opts, _) = optparser.parse_args()

    # each element in the feat_vec dictionary is:
    # key=feature_id value=weight
    feat_vec = {}
    tagset = []
    train_data = []

    tagset = perc.read_tagset(opts.tagsetfile)
    print("reading data ...", file=sys.stderr)
    train_data = perc.read_labeled_data(opts.trainfile,
                                        opts.featfile,
                                        verbose=False)
    print("done.", file=sys.stderr)
    feat_vec = perc_train(train_data, tagset, int(opts.numepochs))
    perc.perc_write_to_file(feat_vec, opts.modelfile)
Ejemplo n.º 5
0
                           help='POS tag embedding dimension')
    argparser.add_argument('-r',
                           '--resume',
                           help='resume training from saved model')
    argparser.add_argument('--prototype',
                           default=False,
                           action='store_true',
                           help='prototyping mode')
    args = argparser.parse_args()

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    tagset = perc.read_tagset(args.tagsetfile)
    print("reading data ...", file=sys.stderr)

    test_data = perc.read_labeled_data(args.inputfile,
                                       args.featfile,
                                       verbose=False)
    print("done.", file=sys.stderr)
    if args.prototype:
        test_data = test_data[0:8]

    print('loading model...', file=sys.stderr)
    model_data = load_model(args.modelfile)

    word_idx = model_data['word_index']
    speech_tag_idx = model_data['speech_tag_index']
    tag2idx = model_data['tag_index']
    idx2tag = model_data['reverse_tag_index']

    model = BiLSTM_Enc_Dec_CRF(len(speech_tag_idx), len(tag2idx), device,
                               args.layer, args.hidden, args.pos_dim)
Ejemplo n.º 6
0
                           default=False,
                           action='store_true',
                           help='prototyping mode')
    args = argparser.parse_args()

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if not osp.exists(args.ckpt):
        os.mkdir(args.ckpt)
    if not osp.exists('models'):
        os.mkdir('models')

    tagset = perc.read_tagset(args.tagsetfile)
    print("reading data ...", file=sys.stderr)
    train_data = perc.read_labeled_data(args.trainfile,
                                        args.featfile,
                                        verbose=False)
    test_data = perc.read_labeled_data(args.valfile,
                                       args.valfeatfile,
                                       verbose=False)
    print("done.", file=sys.stderr)

    word_idx, speech_tag_idx = build_vocab(train_data)
    tag2idx, idx2tag = build_tag_index(tagset)
    if args.prototype:
        train_data = train_data[1:8]
        test_data = test_data[1:8]

    print("preparing training data...", file=sys.stderr)
    training_tuples = prepare_training_data(train_data, speech_tag_idx,
                                            tag2idx)