Beispiel #1
0
    def __init__(self, expdir):
        self.expdir = expdir
        self.params = helper.GetParams(
            os.path.join(expdir, 'char_vocab.pickle'), 'eval', expdir)
        self.char_vocab = Vocab.Load(os.path.join(expdir, 'char_vocab.pickle'))
        self.user_vocab = Vocab.Load(os.path.join(expdir, 'user_vocab.pickle'))
        self.params.vocab_size = len(self.char_vocab)
        self.params.user_vocab_size = len(self.user_vocab)

        # construct the tensorflow graph
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.model = Model(self.params, training_mode=False)
            self.char_tensor = tf.constant(self.char_vocab.GetWords(),
                                           name='char_tensor')
            self.beam_chars = tf.nn.embedding_lookup(self.char_tensor,
                                                     self.model.selected)
Beispiel #2
0
# expdir = args.expdir
expdir = dir
if not os.path.exists(expdir):
    os.mkdir(expdir)
else:
    for file in os.listdir(expdir):
        file = expdir + "/" + file
        os.remove(file)
    os.removedirs(expdir)

    print('ERROR: expdir already exists')
    # exit(-1)

    # tf.set_random_seed(int(time.time() * 1000))
    tf.compat.v1.set_random_seed(int(time.time() * 1000))
params = helper.GetParams(args.params, 'train', args.expdir)

logging.basicConfig(filename=os.path.join(expdir, 'logfile.txt'),
                    level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())

df = LoadData(args.data)
char_vocab = Vocab.MakeFromData(df.query_, min_count=10)
char_vocab.Save(os.path.join(args.expdir, 'char_vocab.pickle'))
params.vocab_size = len(char_vocab)
user_vocab = Vocab.MakeFromData([[u] for u in df.user], min_count=15)
user_vocab.Save(os.path.join(args.expdir, 'user_vocab.pickle'))
params.user_vocab_size = len(user_vocab)
dataset = Dataset(df,
                  char_vocab,
                  user_vocab,
                    help='where to load the data')
parser.add_argument('--valdata',
                    type=str,
                    action='append',
                    dest='valdata',
                    help='where to load validation data',
                    default=[])
parser.add_argument('--threads',
                    type=int,
                    default=12,
                    help='how many threads to use in tensorflow')
args = parser.parse_args()

expdir = args.expdir

params = helper.GetParams(None, 'eval', args.expdir)

logging.basicConfig(filename=os.path.join(expdir, 'logfile.more.txt'),
                    level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())

df = LoadData(args.data)
char_vocab = Vocab.Load(os.path.join(args.expdir, 'char_vocab.pickle'))
params.vocab_size = len(char_vocab)
user_vocab = Vocab.Load(os.path.join(args.expdir, 'user_vocab.pickle'))
params.user_vocab_size = len(user_vocab)
dataset = Dataset(df, char_vocab, user_vocab, max_len=params.max_len)

val_df = LoadData(args.valdata)
valdata = Dataset(val_df,
                  char_vocab,
Beispiel #4
0
parser.add_argument('--reverse', type=bool, default=False)
parser.add_argument('--threads',
                    type=int,
                    default=12,
                    help='how many threads to use in tensorflow')
args = parser.parse_args()

if not os.path.exists(args.expdir):
    os.mkdir(args.expdir)
elif args.mode == 'train':
    print 'ERROR: expdir already exists!!!!'
    exit()

tf.set_random_seed(int(time.time() * 1000))

params = helper.GetParams(args.params, args.mode, args.expdir)
config = tf.ConfigProto(inter_op_parallelism_threads=args.threads,
                        intra_op_parallelism_threads=args.threads)

if not hasattr(params, 'context_var_types'):
    params.context_var_types = ['categorical'] * len(params.context_vars)

if args.mode != 'train':
    params.batch_size = 5
if args.mode == 'debug':
    params.batch_size = 1

SEPERATOR = ' '
if params.splitter == 'char':
    SEPERATOR = ''