help= "precomputed features for the input data, i.e. the values of \phi(x,_) without y" ) optparser.add_option( "-e", "--numepochs", dest="numepochs", default=int(10), help= "number of epochs of training; in each epoch we iterate over over all the training examples" ) optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk") (opts, _) = optparser.parse_args() # each element in the feat_vec dictionary is: # key=feature_id value=weight feat_vec = {} tagset = [] train_data = [] tagset = perc.read_tagset(opts.tagsetfile) print >> sys.stderr, "reading data ..." train_data = perc.read_labeled_data(opts.trainfile, opts.featfile) print >> sys.stderr, "done." feat_vec = perc_train(train_data, tagset, int(opts.numepochs)) perc.perc_write_to_file(feat_vec, opts.modelfile)
from collections import defaultdict def perc_train(train_data, tagset, numepochs): feat_vec = defaultdict(int) # insert your code here # please limit the number of iterations of training to n iterations return feat_vec if __name__ == '__main__': optparser = optparse.OptionParser() optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)") optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)") optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y") optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples") optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk") (opts, _) = optparser.parse_args() # each element in the feat_vec dictionary is: # key=feature_id value=weight feat_vec = {} tagset = [] train_data = [] tagset = perc.read_tagset(opts.tagsetfile) print >>sys.stderr, "reading data ..." train_data = perc.read_labeled_data(opts.trainfile, opts.featfile) print >>sys.stderr, "done." feat_vec = perc_train(train_data, tagset, int(opts.numepochs)) perc.perc_write_to_file(feat_vec, opts.modelfile)
from collections import defaultdict def perc_train(train_data, tagset, numepochs): feat_vec = defaultdict(int) # insert your code here # please limit the number of iterations of training to n iterations return feat_vec if __name__ == '__main__': optparser = optparse.OptionParser() optparser.add_option("-t", "--tagsetfile", dest="tagsetfile", default=os.path.join("data", "tagset.txt"), help="tagset that contains all the labels produced in the output, i.e. the y in \phi(x,y)") optparser.add_option("-i", "--trainfile", dest="trainfile", default=os.path.join("data", "train.txt.gz"), help="input data, i.e. the x in \phi(x,y)") optparser.add_option("-f", "--featfile", dest="featfile", default=os.path.join("data", "train.feats.gz"), help="precomputed features for the input data, i.e. the values of \phi(x,_) without y") optparser.add_option("-e", "--numepochs", dest="numepochs", default=int(10), help="number of epochs of training; in each epoch we iterate over over all the training examples") optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk") (opts, _) = optparser.parse_args() # each element in the feat_vec dictionary is: # key=feature_id value=weight feat_vec = {} tagset = [] train_data = [] tagset = perc.read_tagset(opts.tagsetfile) print("reading data ...", file=sys.stderr) train_data = perc.read_labeled_data(opts.trainfile, opts.featfile, verbose=False) print("done.", file=sys.stderr) feat_vec = perc_train(train_data, tagset, int(opts.numepochs)) perc.perc_write_to_file(feat_vec, opts.modelfile)
) optparser.add_option( "-e", "--numepochs", dest="numepochs", default=int(10), help= "number of epochs of training; in each epoch we iterate over over all the training examples" ) optparser.add_option("-m", "--modelfile", dest="modelfile", default=os.path.join("data", "default.model"), help="weights for all features stored on disk") (opts, _) = optparser.parse_args() # each element in the feat_vec dictionary is: # key=feature_id value=weight feat_vec = {} tagset = [] train_data = [] tagset = perc.read_tagset(opts.tagsetfile) print("reading data ...", file=sys.stderr) train_data = perc.read_labeled_data(opts.trainfile, opts.featfile, verbose=False) print("done.", file=sys.stderr) feat_vec = perc_train(train_data, tagset, int(opts.numepochs)) perc.perc_write_to_file(feat_vec, opts.modelfile)
help='POS tag embedding dimension') argparser.add_argument('-r', '--resume', help='resume training from saved model') argparser.add_argument('--prototype', default=False, action='store_true', help='prototyping mode') args = argparser.parse_args() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") tagset = perc.read_tagset(args.tagsetfile) print("reading data ...", file=sys.stderr) test_data = perc.read_labeled_data(args.inputfile, args.featfile, verbose=False) print("done.", file=sys.stderr) if args.prototype: test_data = test_data[0:8] print('loading model...', file=sys.stderr) model_data = load_model(args.modelfile) word_idx = model_data['word_index'] speech_tag_idx = model_data['speech_tag_index'] tag2idx = model_data['tag_index'] idx2tag = model_data['reverse_tag_index'] model = BiLSTM_Enc_Dec_CRF(len(speech_tag_idx), len(tag2idx), device, args.layer, args.hidden, args.pos_dim)
default=False, action='store_true', help='prototyping mode') args = argparser.parse_args() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if not osp.exists(args.ckpt): os.mkdir(args.ckpt) if not osp.exists('models'): os.mkdir('models') tagset = perc.read_tagset(args.tagsetfile) print("reading data ...", file=sys.stderr) train_data = perc.read_labeled_data(args.trainfile, args.featfile, verbose=False) test_data = perc.read_labeled_data(args.valfile, args.valfeatfile, verbose=False) print("done.", file=sys.stderr) word_idx, speech_tag_idx = build_vocab(train_data) tag2idx, idx2tag = build_tag_index(tagset) if args.prototype: train_data = train_data[1:8] test_data = test_data[1:8] print("preparing training data...", file=sys.stderr) training_tuples = prepare_training_data(train_data, speech_tag_idx, tag2idx)