def __init__(self, expdir): self.expdir = expdir self.params = helper.GetParams( os.path.join(expdir, 'char_vocab.pickle'), 'eval', expdir) self.char_vocab = Vocab.Load(os.path.join(expdir, 'char_vocab.pickle')) self.user_vocab = Vocab.Load(os.path.join(expdir, 'user_vocab.pickle')) self.params.vocab_size = len(self.char_vocab) self.params.user_vocab_size = len(self.user_vocab) # construct the tensorflow graph self.graph = tf.Graph() with self.graph.as_default(): self.model = Model(self.params, training_mode=False) self.char_tensor = tf.constant(self.char_vocab.GetWords(), name='char_tensor') self.beam_chars = tf.nn.embedding_lookup(self.char_tensor, self.model.selected)
# expdir = args.expdir expdir = dir if not os.path.exists(expdir): os.mkdir(expdir) else: for file in os.listdir(expdir): file = expdir + "/" + file os.remove(file) os.removedirs(expdir) print('ERROR: expdir already exists') # exit(-1) # tf.set_random_seed(int(time.time() * 1000)) tf.compat.v1.set_random_seed(int(time.time() * 1000)) params = helper.GetParams(args.params, 'train', args.expdir) logging.basicConfig(filename=os.path.join(expdir, 'logfile.txt'), level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) df = LoadData(args.data) char_vocab = Vocab.MakeFromData(df.query_, min_count=10) char_vocab.Save(os.path.join(args.expdir, 'char_vocab.pickle')) params.vocab_size = len(char_vocab) user_vocab = Vocab.MakeFromData([[u] for u in df.user], min_count=15) user_vocab.Save(os.path.join(args.expdir, 'user_vocab.pickle')) params.user_vocab_size = len(user_vocab) dataset = Dataset(df, char_vocab, user_vocab,
help='where to load the data') parser.add_argument('--valdata', type=str, action='append', dest='valdata', help='where to load validation data', default=[]) parser.add_argument('--threads', type=int, default=12, help='how many threads to use in tensorflow') args = parser.parse_args() expdir = args.expdir params = helper.GetParams(None, 'eval', args.expdir) logging.basicConfig(filename=os.path.join(expdir, 'logfile.more.txt'), level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) df = LoadData(args.data) char_vocab = Vocab.Load(os.path.join(args.expdir, 'char_vocab.pickle')) params.vocab_size = len(char_vocab) user_vocab = Vocab.Load(os.path.join(args.expdir, 'user_vocab.pickle')) params.user_vocab_size = len(user_vocab) dataset = Dataset(df, char_vocab, user_vocab, max_len=params.max_len) val_df = LoadData(args.valdata) valdata = Dataset(val_df, char_vocab,
parser.add_argument('--reverse', type=bool, default=False) parser.add_argument('--threads', type=int, default=12, help='how many threads to use in tensorflow') args = parser.parse_args() if not os.path.exists(args.expdir): os.mkdir(args.expdir) elif args.mode == 'train': print 'ERROR: expdir already exists!!!!' exit() tf.set_random_seed(int(time.time() * 1000)) params = helper.GetParams(args.params, args.mode, args.expdir) config = tf.ConfigProto(inter_op_parallelism_threads=args.threads, intra_op_parallelism_threads=args.threads) if not hasattr(params, 'context_var_types'): params.context_var_types = ['categorical'] * len(params.context_vars) if args.mode != 'train': params.batch_size = 5 if args.mode == 'debug': params.batch_size = 1 SEPERATOR = ' ' if params.splitter == 'char': SEPERATOR = ''