if args.load_check_point: if os.path.isfile(args.load_check_point): print("loading checkpoint: '{}'".format(args.load_check_point)) checkpoint_file = torch.load(args.load_check_point) args.start_epoch = checkpoint_file['epoch'] f_map = checkpoint_file['f_map'] l_map = checkpoint_file['l_map'] train_features, train_labels = utils.read_corpus(lines) else: print("no checkpoint found at: '{}'".format(args.load_check_point)) else: print('constructing coding table') # converting format train_features, train_labels, f_map, l_map = utils.generate_corpus( lines, if_shrink_feature=True, thresholds=0) f_set = {v for v in f_map} f_map = utils.shrink_features(f_map, train_features, args.mini_count) dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), dev_features), f_set) if not args.rand_embedding: print("feature size: '{}'".format(len(f_map))) print('loading embedding') if args.fine_tune: # which means does not do fine-tune f_map = {'<eof>': 0} f_map, embedding_tensor, in_doc_words = utils.load_embedding_wlm( args.emb_file, ' ', f_map, dt_f_set, args.caseless, args.unk, args.embedding_dim)
checkpoint_file = torch.load(args.load_check_point) args.start_epoch = checkpoint_file['epoch'] f_map = checkpoint_file['f_map'] l_map = checkpoint_file['l_map'] lexicon_f_map = checkpoint_file['lexicon_f_map'] train_features, train_labels = utils.read_corpus(lines) lexicon_train_features, lexicon_feature_map = utils.read_corpus_lexicon( lexicon_train_lines, train_features, lexicon_feature_map) else: print("no checkpoint found at: '{}'".format(args.load_check_point)) else: print('constructing coding table') # converting format train_features, train_labels, f_map, l_map, train_bichar_features, bichar_f_map = utils.generate_corpus( lines, if_shrink_feature=False, thresholds=0) f_set = {v for v in f_map} f_map = utils.shrink_features(f_map, train_features, args.mini_count) bichar_f_set = {v for v in bichar_f_map} bichar_f_map = utils.shrink_features(bichar_f_map, train_bichar_features, args.mini_count) lexicon_train_features, lexicon_f_map = utils.generous_corpus_lexicon( lexicon_train_lines, train_features, lexicon_feature_map) lexicon_f_set = {v for v in lexicon_f_map} dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), dev_features), f_set)
f_map = checkpoint_file['f_map'] l_map = checkpoint_file['l_map'] a_map = checkpoint_file['a_map'] ner_map = checkpoint_file['ner_map'] char_map = checkpoint_file['char_map'] singleton = checkpoint_file['singleton'] train_features, train_labels, train_actions, word_count = utils.read_corpus_ner( lines, word_count) else: print("no checkpoint found at: '{}'".format(args.load_check_point)) else: print('constructing coding table') train_features, train_labels, train_actions, f_map, l_map, a_map, ner_map, singleton, char_map = utils.generate_corpus( lines, word_count, args.spelling, if_shrink_feature=True, thresholds=0) f_set = {v for v in f_map} dt_f_set = functools.reduce( lambda x, y: x | y, map(lambda t: set(t), dev_features), f_set) # Add word in dev and in test into feature_map dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), test_features), dt_f_set) dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), train_features), dt_f_set) if not args.rand_embedding:
if args.load_check_point: if os.path.isfile(args.load_check_point): print("loading checkpoint: '{}'".format(args.load_check_point)) checkpoint_file = torch.load(args.load_check_point) args.start_epoch = checkpoint_file['epoch'] f_map = checkpoint_file['f_map'] l_map = checkpoint_file['l_map'] train_features, train_labels = utils.read_corpus(lines) else: print("no checkpoint found at: '{}'".format(args.load_check_point)) else: print('constructing coding table') # converting format train_features, train_labels, f_map, l_map = utils.generate_corpus(lines, if_shrink_feature=True, thresholds=0) f_set = {v for v in f_map} f_map = utils.shrink_features(f_map, train_features, args.mini_count) dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), dev_features), f_set) dt_f_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), test_features), dt_f_set) if not args.rand_embedding: print("feature size: '{}'".format(len(f_map))) print('loading embedding') if args.fine_tune: # which means does not do fine-tune f_map = {'<eof>': 0} f_map, embedding_tensor, in_doc_words = utils.load_embedding_wlm(args.emb_file, ' ', f_map, dt_f_set,args.caseless,args.unk, args.embedding_dim, shrink_to_corpus=args.shrink_embedding) print("embedding size: '{}'".format(len(f_map)))