def __init__(self, actions_num, img_shape, misc_len=0, conv_filters_num=(32, 64, 64), conv_filters_sizes=(8, 4, 3), conv_strides=(4, 2, 1), activation_fn="tf.nn.relu", fc_units_num=256, **ignored): self.actions_num = actions_num self.conv_filters_num = conv_filters_num self.conv_filters_sizes = conv_filters_sizes self.conv_strides = conv_strides self.activation_fn = eval(activation_fn) self.fc_units_num = fc_units_num self.ops = Record() self.vars = Record() self.use_misc = misc_len > 0 self.params = None self.vars.state_img = tf.placeholder(tf.float32, [None] + list(img_shape), name="state_img") if self.use_misc: self.vars.state_misc = tf.placeholder(tf.float32, [None, misc_len], name="state_misc") else: self.vars.state_misc = None
class RevisionStore(Record("fs", "root")): def __contains__(self, entry): full_path = self.get_full_revision_path(entry) return self.fs.stat_eq(full_path, entry.size, entry.mtime) def move_in(self, source_path, dest_entry): dest_path = self.get_full_revision_path(dest_entry) self.fs.move(source_path, dest_path, mtime=dest_entry.mtime) def copy_out(self, source_entry, dest_path): source_path = self.get_full_revision_path(source_entry) self.fs.copy(source_path, dest_path, mtime=source_entry.mtime) def get_full_revision_path(self, entry): return join_paths(self.root, self.get_revision_path(entry)) # TODO: Use the entry.groupid in the revision_path. Right now, # all of the groups are kind of merged together, which is kind of # messy. def get_revision_path(self, entry): path, ext = os.path.splitext(entry.path) if entry.hash: return "{path}_{hash}{ext}".format(path=path, hash=entry.hash, ext=ext) else: return "{path}_{size}_{mtime}{ext}".format(path=path, size=entry.size, mtime=entry.mtime, ext=ext)
class HistoryStore(Record("db", "slog", "cache_by_peerid")): def __new__(cls, db, slog): db.create(TABLE_NAME, TABLE_FIELD_TYPES) return cls.new(db, slog, {}) # return [entry] def read_entries(self, peerid): cached = setdefault(self.cache_by_peerid, peerid, lambda: \ HistoryCache(list(self.select_entries(peerid)))) # copy list for thread safety return list(cached.entries) # Reads 100,000/sec on my 2008 Macbook. If you sort by utime, it goes # down to 40,000/sec, so that doesn't seem like a good idea. def select_entries(self, peerid): # TODO: use a where clause when selecting by peerid. It will # be a lot faster! return (entry for entry in self.db.select( TABLE_NAME, TABLE_FIELDS, into=HistoryEntry) if entry.peerid == peerid) def add_entries(self, new_entries): self.db.insert(TABLE_NAME, TABLE_FIELDS, new_entries) self.slog.inserted_history(new_entries) for peerid, new_entries in \ group_history_by_peerid(new_entries).iteritems(): cache = self.cache_by_peerid.get(peerid) if cache is not None: cache.add_entries(new_entries)
class FileStat(Record("rpath", "size", "mtime")): @classmethod def from_deleted(cls, rpath): return cls.new(rpath, DELETED_SIZE, DELETED_MTIME) @property def deleted(entry): return entry.mtime == DELETED_MTIME
class RootedPath(Record("root", "rel")): """ Represents a path (rel) that is relative to another path (root). For examples, when scanning a large directory, it is convenient to know the paths relative to the directory passed in. In code a RootedPath is often called an "rpath".""" @property def full(self): return join_paths(*self)
class UnixPathEncoder(Record("encoding", "decoding")): def encode_path(self, path): if self.encoding: return path.encode(self.encoding) else: return path def decode_path(self, path): return path.decode(self.decoding)
class History(Record("entries", "latest")): def __new__(cls, entries): # Using max(entries) is faster than # max(history, key=HistoryEntry.get_utime) # by .2 secs for 150,000 entries. latest = max(entries) return cls.new(entries, latest) def __iter__(self): return iter(self.entries)
class Groupids(Record("root_by_groupid", "groupid_by_root")): def __new__(cls, root_by_groupid): groupid_by_root = flip_dict(root_by_groupid) return cls.new(root_by_groupid, groupid_by_root) def to_root(self, groupid): return self.root_by_groupid.get(groupid, None) def from_root(self, root): return self.groupid_by_root.get(root, None)
class WindowsPathEncoder(Record("encoding", "decoding")): def encode_path(self, path): win_path = "\\\\?\\" + os.path.abspath(path.replace(PATH_SEP, os.sep)) if self.encoding: return win_path.encode(self.encoding) else: return win_path def decode_path(self, win_path): return win_path.replace(os.sep, PATH_SEP).decode(self.decoding)
class HistoryEntry(Record(*TABLE_FIELDS)): @property def deleted(entry): return entry.mtime == DELETED_MTIME def get_gpath(entry): return GroupedPath(entry.groupid, entry.path) @property def gpath(entry): return GroupedPath(entry.groupid, entry.path)
class MergeLog(Record("db", "clock")): def __new__(cls, db, clock): db.create(TABLE_NAME, TABLE_FIELD_TYPES) return cls.new(db, clock) def read_actions(self): return list( self.db.select(TABLE_NAME, TABLE_FIELDS, into=MergeLogEntry)) def add_action(self, action): utime = self.clock.unix() peerid = action.newer.peerid action_type = str(action.type) (groupid, path) = action.gpath details = str(action.details) if action.details else "" author_peerid = action.newer.author_peerid entry = MergeLogEntry(utime, peerid, action_type, groupid, path, details, author_peerid) self.db.insert(TABLE_NAME, TABLE_FIELDS, [entry])
class PathFilter( Record("patterns_to_ignore", "names_to_ignore", "paths_to_ignore")): """Controls whether to ingore a path or not, which is mostly used for scanning and comparing files. ignore_path will be called a lot, and should memoize. Also, names_to_ignore is used directly as a set in FileSystem scans. For convenience, globs_to_ignore are converted to regular expressions to ignore.""" def __new__(cls, globs_to_ignore, names_to_ignore): names_to_ignore = frozenset(names_to_ignore) patterns_to_ignore = compile_globs(globs_to_ignore) return cls.new(patterns_to_ignore, names_to_ignore, set()) def ignore_path(self, path): if path in self.paths_to_ignore: return True elif (matches_any_name(path, self.names_to_ignore) or matches_any_pattern(path, self.patterns_to_ignore)): self.paths_to_ignore.add(path) return True else: return False
def train(net_enn, input_, target, feature_name=''): dstb_y = lamuda.Lamuda(target, NE, ERROR_PER) train_losses = Record() losses = Record() lamuda_history = Record() std_history = Record() pred_history = Record() initial_parameters = net_enn.initial_parameters initial_pred = net_enn.output(input_) train_losses.update(criterion(initial_pred.mean(0), target).tolist()) losses.update(criterion(initial_pred.mean(0), target).tolist()) std_history.update(dstb_y.std(initial_pred)) pred_history.update(initial_pred) lamuda_history.update(dstb_y.lamuda(initial_pred)) for j in range(T): torch.cuda.empty_cache() params = net_enn.get_parameter() dstb_y.update() time_ = time.strftime('%Y%m%d_%H_%M_%S') delta = enrml.EnRML(pred_history.get_latest(mean=False), params, initial_parameters, lamuda_history.get_latest(mean=False), dstb_y.dstb, ERROR_PER) params_raw = net_enn.update_parameter(delta) torch.cuda.empty_cache() pred = net_enn.output(input_) loss_new = criterion(pred.mean(0), target).tolist() bigger = train_losses.check(loss_new) record_while = 0 while bigger: record_while += 1 lamuda_history.update( lamuda_history.get_latest(mean=False) * GAMMA) if lamuda_history.get_latest(mean=False) > GAMMA**10: lamuda_history.update(lamuda_history.data[0]) print('abandon current iteration') net_enn.set_parameter(params) loss_new = train_losses.get_latest() dstb_y.update() params_raw = params break dstb_y.update() net_enn.set_parameter(params) delta = enrml.EnRML(pred_history.get_latest(mean=False), params, initial_parameters, lamuda_history.get_latest(mean=False), dstb_y.dstb, ERROR_PER) params_raw = net_enn.update_parameter(delta) torch.cuda.empty_cache() pred = net_enn.output(input_) loss_new = criterion(pred.mean(0), target).tolist() print('update losses, new loss:{}'.format(loss_new)) bigger = train_losses.check(loss_new) train_losses.update(loss_new) save_var(params_raw, '{}/{}_{}_params'.format(PATH, time_, feature_name)) print("iteration:{} \t current train losses:{}".format( j, train_losses.get_latest(mean=True))) save_txt( '{}/loss_{}.txt'.format(PATH, feature_name), time.strftime('%Y%m%d_%H_%M_%S') + ',' + str(train_losses.get_latest(mean=True)) + ',\n') pred_history.update(pred) std_history.update(dstb_y.std(pred)) if std_history.bigger(): lamuda_history.update(lamuda_history.get_latest(mean=False)) else: lamuda_tmp = lamuda_history.get_latest(mean=False) / GAMMA if lamuda_tmp < 0.005: lamuda_tmp = 0.005 lamuda_history.update(lamuda_tmp) return net_enn, train_losses.get_latest( mean=True), pred_history.get_latest(mean=False)
def model(mode, src_dwh, tgt_dwh, src_idx=None, len_src=None, tgt_img=None, tgt_idx=None, len_tgt=None, num_layers=3, num_units=512, learn_rate=1e-3, decay_rate=1e-2, dropout=0.1): assert mode in ('train', 'valid', 'infer') self = Record() src_d, src_w, src_h = src_dwh tgt_d, tgt_w, tgt_h = tgt_dwh with scope('source'): # input nodes src_idx = self.src_idx = placeholder(tf.int32, (None, None), src_idx, 'src_idx') # n s len_src = self.len_src = placeholder(tf.int32, (None, ), len_src, 'len_src') # n # time major order src_idx = tf.transpose(src_idx, (1, 0)) # s n emb_src = tf.one_hot(src_idx, src_d) # s n v for i in range(num_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='fwd')(emb_src, training='train' == mode) emb_bwd, _ = tf.contrib.cudnn_rnn.CudnnGRU( 1, num_units, dropout=dropout, name='bwd')(tf.reverse_sequence(emb_src, len_src, seq_axis=0, batch_axis=1), training='train' == mode) emb_src = tf.concat( (emb_fwd, tf.reverse_sequence( emb_bwd, len_src, seq_axis=0, batch_axis=1)), axis=-1) # emb_src = tf.layers.dense(emb_src, num_units, name= 'reduce_concat') # s n d emb_src = self.emb_src = tf.transpose(emb_src, (1, 2, 0)) # n d s with scope('target'): # input nodes tgt_img = self.tgt_img = placeholder(tf.uint8, (None, None, tgt_h, tgt_w), tgt_img, 'tgt_img') # n t h w tgt_idx = self.tgt_idx = placeholder(tf.int32, (None, None), tgt_idx, 'tgt_idx') # n t len_tgt = self.len_tgt = placeholder(tf.int32, (None, ), len_tgt, 'len_tgt') # n # time major order tgt_idx = tf.transpose(tgt_idx) # t n tgt_img = tf.transpose(tgt_img, (1, 0, 2, 3)) # t n h w tgt_img = flatten(tgt_img, 2, 3) # t n hw # normalize pixels to binary tgt_img = tf.to_float(tgt_img) / 255.0 # tgt_img = tf.round(tgt_img) # todo consider adding noise # causal padding fire = self.fire = tf.pad(tgt_img, ((1, 0), (0, 0), (0, 0)), constant_values=0.0) true = self.true = tf.pad(tgt_img, ((0, 1), (0, 0), (0, 0)), constant_values=1.0) tidx = self.tidx = tf.pad(tgt_idx, ((0, 1), (0, 0)), constant_values=1) mask_tgt = tf.transpose(tf.sequence_mask(len_tgt + 1)) # t n with scope('decode'): # needs to get input from latent space to do attention or some shit decoder = self.decoder = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, dropout=dropout) state_in = self.state_in = tf.zeros( (num_layers, tf.shape(fire)[1], num_units)) x, _ = _, (self.state_ex, ) = decoder(fire, initial_state=(state_in, ), training='train' == mode) # transform mask to -inf and 0 in order to simply sum for whatever the f**k happens next mask = tf.log(tf.sequence_mask(len_src, dtype=tf.float32)) # n s mask = tf.expand_dims(mask, 1) # n 1 s # multi-head scaled dot-product attention x = tf.transpose(x, (1, 2, 0)) # t n d ---> n d t attn = Attention(num_units, num_units, 2 * num_units)(x, emb_src, mask) if 'train' == mode: attn = tf.nn.dropout(attn, 1 - dropout) x = Normalize(num_units)(x + attn) x = tf.transpose(x, (2, 0, 1)) # n d t ---> t n d if 'infer' != mode: x = tf.boolean_mask(x, mask_tgt) true = tf.boolean_mask(true, mask_tgt) tidx = tf.boolean_mask(tidx, mask_tgt) with scope('output'): y = tf.layers.dense(x, tgt_h * tgt_w, name='dense_img') z = tf.layers.dense(x, tgt_d, name='logit_idx') pred = self.pred = tf.clip_by_value(y, 0.0, 1.0) prob = self.prob = tf.nn.softmax(z) pidx = self.pidx = tf.argmax(z, axis=-1, output_type=tf.int32) with scope('losses'): diff = true - pred mae = self.mae = tf.reduce_mean(tf.abs(diff), axis=-1) mse = self.mse = tf.reduce_mean(tf.square(diff), axis=-1) xid = self.xid = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=z, labels=tidx) err = self.err = tf.not_equal(tidx, pidx) loss = tf.reduce_mean(xid) with scope('update'): step = self.step = tf.train.get_or_create_global_step() lr = self.lr = learn_rate / (1.0 + decay_rate * tf.sqrt(tf.to_float(step))) if 'train' == mode: down = self.down = tf.train.AdamOptimizer(lr).minimize(loss, step) return self
class FileSystem(Record("slog", "path_encoder")): """Encapsulates all of the operations we need on the FileSystem. The most important part is probably listing/stating.""" READ_MODE = "rb" NEW_WRITE_MODE = "wb" EXISTING_WRITE_MODE = "r+b" # slog needs to have def __new__(cls, slog): return cls.new(slog, PathEncoder()) def encode_path(fs, path): return fs.path_encoder.encode_path(path) def decode_path(fs, path): return fs.path_encoder.decode_path(path) def exists(fs, path): encoded_path = fs.encode_path(path) return os.path.exists(encoded_path) def isdir(fs, path): encoded_path = fs.encode_path(path) return os.path.isdir(encoded_path) def isfile(fs, path): encoded_path = fs.encode_path(path) return os.path.isfile(encoded_path) def isempty(fs, path): encoded_path = fs.encode_path(path) for _ in fs.list(encoded_path): return False return True # yields FileStat, with same "root marker" rules as self.list(...) # # On my 2008 Macbook, reads about 10,000 files/sec when doing small # groups (5,000 files), and 4,000 files/sec when doing large # (200,000). These means it can take anywhere from .1 sec to 1 # minute. Cacheing seems to improve performance by about 30%. # While running, the CPU is pegged :(. Oh well, 60,000 files in 8 # sec isn't too bad. That's my whole home directory. # # On my faster linux desktop machine, it's about 30,000 files/sec # when cached, even for 200,00 files, which is a big improvement. def list_stats(fs, root, root_marker=None, names_to_ignore=frozenset()): return fs.stats( fs.list(root, root_marker=root_marker, names_to_ignore=names_to_ignore)) # yields a RootedPath for each file found in the root. The intial # root is the given root. Deeper in, if there is a "root_marker" # file in a directory, that directory becomes a new root. def list(fs, root, root_marker=None, names_to_ignore=frozenset()): listdir = os.listdir join = os.path.join isdir = os.path.isdir islink = os.path.islink def decode(encoded_path): try: return fs.decode_path(encoded_path) except Exception as err: fs.slog.path_error( "Could not decode file path {0}: {1}".format( repr(encoded_path)), err) return None # We pass root around so that we only have to decode it once. def walk(root, encoded_root, encoded_parent): child_names = listdir(encoded_parent) if root_marker is not None: if root_marker in child_names: encoded_root = encoded_parent root = decode(encoded_root) # If decoding root fails, no point in traversing any futher. if root is not None: for child_name in child_names: if child_name not in names_to_ignore: encoded_full = join(encoded_parent, child_name) if isdir(encoded_full): if not islink(encoded_full): for child in \ walk(root, encoded_root, encoded_full): yield child else: rel = decode(encoded_full[len(encoded_root) + 1:]) if rel: yield RootedPath(root, rel) encoded_root = fs.encode_path(root) return walk(root, encoded_root, encoded_root) # yields FileStats def stats(fs, rpaths): stat = os.stat for rpath in rpaths: try: encoded_path = fs.encode_path(rpath.full) stats = stat(encoded_path) size = stats[STAT_SIZE_INDEX] mtime = stats[STAT_MTIME_INDEX] yield FileStat(rpath, size, mtime) except OSError: pass # Probably a link # returns (size, mtime) def stat(fs, path): encoded_path = fs.encode_path(path) stats = os.stat(encoded_path) return stats[STAT_SIZE_INDEX], stats[STAT_MTIME_INDEX] # Will not throw OSError for no path. Will return False in that case. def stat_eq(fs, path, size, mtime): try: (current_size, current_mtime) = fs.stat(path) return (current_size == size and mtimes_eq(current_mtime, mtime)) except OSError: return False def read(fs, path, start=0, size=None): encoded_path = fs.encode_path(path) with open(path, fs.READ_MODE) as file: if loc > 0: file.seek(start, 0) if size: return file.read(size) else: return file.read() # On my 2008 Macbook, with SHA1, it can hash 50,000 files # totalling 145GB (about 3MB each file) in 48min, which is 17 # files totalling 50MB/sec. So, if you scan 30GB of new files, it # will take 10min. During that time, CPU usage is ~80%. def hash(fs, path, hash_type=hashlib.sha1, chunk_size=100000): if hash_type == None: return "" hasher = hash_type() for chunk_data in fs._iter_chunks(path, chunk_size): hasher.update(chunk_data) return hasher.digest() def _iter_chunks(fs, path, chunk_size): encoded_path = fs.encode_path(path) with open(path, fs.READ_MODE) as file: chunk = file.read(chunk_size) while chunk: yield chunk chunk = file.read(chunk_size) def write(fs, path, contents, start=None, mtime=None): encoded_path = fs.encode_path(path) fs.create_parent_dirs(path) if (start is not None) and fs.exists(encoded_path): mode = fs.EXISTING_WRITE_MODE else: mode = fs.NEW_WRITE_MODE with open(encoded_path, mode) as file: if start is not None: file.seek(start, 0) assert start == file.tell(), \ "Failed to seek to proper location in file" file.write(contents) if mtime is not None: fs.touch(encoded_path, mtime) def touch(fs, path, mtime): encoded_path = fs.encode_path(path) os.utime(encoded_path, (mtime, mtime)) def create_parent_dirs(fs, path): fs.create_dir(parent_path(path)) def create_dir(fs, path): encoded_path = fs.encode_path(path) if not os.path.exists(encoded_path): os.makedirs(encoded_path) # # Blows up if existing stuff "in the way". def move(fs, from_path, to_path, mtime=None): encoded_from_path = fs.encode_path(from_path) encoded_to_path = fs.encode_path(to_path) fs.create_parent_dirs(to_path) os.rename(encoded_from_path, encoded_to_path) if mtime is not None: fs.touch(to_path, mtime) # Blows up if existing stuff "in the way". def copy(fs, from_path, to_path, mtime=None): encoded_from_path = fs.encode_path(from_path) encoded_to_path = fs.encode_path(to_path) fs.create_parent_dirs(to_path) shutil.copyfile(encoded_from_path, encoded_to_path) if mtime is not None: fs.touch(to_path, mtime) # Blows up if non-empy directory def delete(fs, path): encoded_path = fs.encode_path(path) if os.path.exists(encoded_path): os.remove(encoded_path) def remove_empty_parent_dirs(fs, path): encoded_parent_path = fs.encode_path(parent_path(path)) try: os.removedirs(encoded_parent_path) except OSError: pass # Not empty
from util import Record config = Record( trial = None , ckpt = None , seed = 0 ### data , unk = 0 , eos = 1 , bos = 2 , cap = 64 ### model , dim_voc = 8192 , dim_emb = 512 , dim_mid = 2048 ### batch , batch_train = 300 , batch_infer = 256 , batch_valid = 512 ) paths = Record( raw = "../data/raw" , data = "../data/master" , pred = "../pred" , ckpt = "../ckpt" , log = "../log" )
class FileDiff(Record("type", "gpath", "rpath", "size", "mtime", "hash")): @property def was_deleted(entry): return entry.mtime == DELETED_MTIME
def vAe( mode, src=None, tgt=None, # model spec dim_tgt=8192, dim_emb=512, dim_rep=1024, rnn_layers=3, bidirectional=True, bidir_stacked=True, attentive=False, logit_use_embed=True, # training spec accelerate=1e-4, learn_rate=1e-3, bos=2, eos=1): # dim_tgt : vocab size # dim_emb : model dimension # dim_rep : representation dimension # # unk=0 for word dropout assert mode in ('train', 'valid', 'infer') self = Record(bos=bos, eos=eos) with scope('step'): step = self.step = tf.train.get_or_create_global_step() rate = accelerate * tf.to_float(step) rate_keepwd = self.rate_keepwd = tf.sigmoid(rate) rate_anneal = self.rate_anneal = tf.tanh(rate) rate_update = self.rate_update = learn_rate / (tf.sqrt(rate) + 1.0) with scope('src'): src = self.src = placeholder(tf.int32, (None, None), src, 'src') src = tf.transpose(src) # time major order src, msk_src, len_src = trim(src, eos) with scope('tgt'): tgt = self.tgt = placeholder(tf.int32, (None, None), tgt, 'tgt') tgt = tf.transpose(tgt) # time major order tgt, msk_tgt, len_tgt = trim(tgt, eos) msk_tgt = tf.pad(msk_tgt, ((1, 0), (0, 0)), constant_values=True) # pads for decoder : lead=[bos]+tgt -> gold=tgt+[eos] lead, gold = tgt, tf.pad(tgt, paddings=((0, 1), (0, 0)), constant_values=eos) if 'train' == mode: lead *= tf.to_int32( tf.random_uniform(tf.shape(lead)) < rate_keepwd) lead = self.lead = tf.pad(lead, paddings=((1, 0), (0, 0)), constant_values=bos) # s : src length # t : tgt length plus one padding, either eos or bos # b : batch size # # len_src : b aka s # msk_src : sb without padding # msk_tgt : tb with eos # # lead : tb with bos # gold : tb with eos with scope('embed'): b = (6 / (dim_tgt / dim_emb + 1))**0.5 embedding = tf.get_variable('embedding', (dim_tgt, dim_emb), initializer=tf.random_uniform_initializer( -b, b)) emb_tgt = tf.gather(embedding, lead, name='emb_tgt') # (t, b) -> (t, b, dim_emb) emb_src = tf.gather(embedding, src, name='emb_src') # (s, b) -> (s, b, dim_emb) with scope('encode'): # (s, b, dim_emb) -> (b, dim_emb) reverse = partial(tf.reverse_sequence, seq_lengths=len_src, seq_axis=0, batch_axis=1) if bidirectional and bidir_stacked: for i in range(rnn_layers): with scope("rnn{}".format(i + 1)): emb_fwd, _ = layer_rnn(1, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(1, dim_emb, name='bwd')(reverse(emb_src)) hs = emb_src = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) elif bidirectional: with scope("rnn"): emb_fwd, _ = layer_rnn(rnn_layers, dim_emb, name='fwd')(emb_src) emb_bwd, _ = layer_rnn(rnn_layers, dim_emb, name='bwd')(reverse(emb_src)) hs = tf.concat((emb_fwd, reverse(emb_bwd)), axis=-1) else: hs, _ = layer_rnn(rnn_layers, dim_emb, name='rnn')(emb_src) with scope('cata'): # extract the final states from the outputs: bd <- sbd, b2 h = tf.gather_nd( hs, tf.stack( (len_src - 1, tf.range(tf.size(len_src), dtype=tf.int32)), axis=1)) if attentive: # todo fixme # the values are the outputs from all non-padding steps; # the queries are the final states; h = layer_nrm(h + tf.squeeze( # bd <- bd1 attention( # bd1 <- bd1, bds, b1s tf.expand_dims(h, axis=2), # query: bd1 <- bd tf.transpose(hs, (1, 2, 0)), # value: bds <- sbd tf.log( tf.to_float( # -inf,0 mask: b1s <- sb <- bs tf.expand_dims(tf.transpose(msk_src), axis=1))), int(h.shape[-1])), 2)) with scope('latent'): # (b, dim_emb) -> (b, dim_rep) -> (b, dim_emb) # h = layer_aff(h, dim_emb, name='in') mu = self.mu = layer_aff(h, dim_rep, name='mu') lv = self.lv = layer_aff(h, dim_rep, name='lv') with scope('z'): h = mu if 'train' == mode: h += tf.exp(0.5 * lv) * tf.random_normal(shape=tf.shape(lv)) self.z = h h = layer_aff(h, dim_emb, name='ex') with scope('decode'): # (b, dim_emb) -> (t, b, dim_emb) -> (?, dim_emb) h = self.state_in = tf.stack((h, ) * rnn_layers) h, _ = _, (self.state_ex, ) = layer_rnn(rnn_layers, dim_emb, name='rnn')( emb_tgt, initial_state=(h, )) if 'infer' != mode: h = tf.boolean_mask(h, msk_tgt) h = layer_aff(h, dim_emb, name='out') with scope('logits'): # (?, dim_emb) -> (?, dim_tgt) if logit_use_embed: logits = self.logits = tf.tensordot(h, (dim_emb**-0.5) * tf.transpose(embedding), 1) else: logits = self.logits = layer_aff(h, dim_tgt) with scope('prob'): prob = self.prob = tf.nn.softmax(logits) with scope('pred'): pred = self.pred = tf.argmax(logits, -1, output_type=tf.int32) if 'infer' != mode: labels = tf.boolean_mask(gold, msk_tgt, name='labels') with scope('errt'): errt_samp = self.errt_samp = tf.to_float(tf.not_equal( labels, pred)) errt = self.errt = tf.reduce_mean(errt_samp) with scope('loss'): with scope('loss_gen'): loss_gen_samp = self.loss_gen_samp = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) loss_gen = self.loss_gen = tf.reduce_mean(loss_gen_samp) with scope('loss_kld'): loss_kld_samp = self.loss_kld_samp = 0.5 * ( tf.square(mu) + tf.exp(lv) - lv - 1.0) loss_kld = self.loss_kld = tf.reduce_mean(loss_kld_samp) loss = self.loss = rate_anneal * loss_kld + loss_gen if 'train' == mode: with scope('train'): train_step = self.train_step = tf.train.AdamOptimizer( rate_update).minimize(loss, step) return self
def fnn_policy_train(env, policy, value_network, policy_optimizer, value_optimizer, value_loss_function, args, convergence_threshold=0.1): """ Parameters ---------- env: gym.wrappers.time_limit.TimeLimit unwrapped gym simulated environment policy: nn.module model that learns a categorical distribution for p(a|s) value_network: nn.module model that learns state value function V(s) policy_optimizer: torch.optim.Optimizer optimizer that optimizes the parameters of the policy network value_optimizer: torch.optim.Optimizer optimizer that optimizes the parameters of the value network value_loss_function: torch.nn.modules.loss loss function for the value network args: argparse.Namespace args that specify the setting for an experiment convergence_threshold: float threshold that decides if the convergence of the policy is attained Returns ------- time_str: str string of the format YearMonthDay-HourMinuteSeconds, which is used in naming files for distinguishing between different experiments policy_trained: nn.module policy model that has been trained """ time_str = time.strftime("%Y%m%d-%H%M%S") if args.record: # Store results from different runs of the model separately. results_directory = ''.join([ '/tmp', args.directory_name, '/training/', time_str, '_discounting_', str(args.gamma), '_update_frequency_', str(args.update_frequency), '_value_update_times_', str(args.value_update_times) ]) env = gym.wrappers.Monitor(env, results_directory) if args.cuda: th.cuda.manual_seed(args.seed) FloatTensor = th.cuda.FloatTensor policy.cuda() value_network.cuda() else: th.manual_seed(args.seed) FloatTensor = th.FloatTensor import matplotlib.pyplot as plt plt.ion() try: os.makedirs('fnn/training') except OSError as exception: if exception.errno != errno.EEXIST: raise Tensor = FloatTensor # Create a list for storing Records. records = Records(args.update_frequency) value_losses = [] policy_losses = [] returns = [] for episode in range(1, args.episodes + 1): state_ = env.reset() done_ = False episode_record = [] for timestep in range(1, 501): if not done_: if not args.cuda: env.render() state_ = th.from_numpy(state_.reshape(1, -1)) state = Variable(state_, requires_grad=False).type(Tensor) p = policy.forward(state) selection_ = th.multinomial(p, 1) action = selection_.data[0] p_action = p[0][action].view(1, -1) next_state_, reward_, done_, info_ = env.step(action[0]) value = Tensor([reward_]).view(1, -1) episode_record.append( Record(state_, action, value, p_action, p.view(1, -1))) # Update state values for i in range(0, len(episode_record) - 1): episode_record[i].value.add_( (args.gamma**(len(episode_record) - 1 - i)) * value) if done_: returns.append(episode_record[0].value[0][0]) state_ = next_state_ records.push(episode_record) if episode % args.update_frequency == 0: history = records.pull() state_history = Variable(th.cat(history.state), requires_grad=False).type(Tensor) value_history = Variable(th.cat(history.value), requires_grad=False).type(Tensor) prob_action_history = th.cat( history.probability_action).type(Tensor) prob_history = th.cat(history.probabilities).type(Tensor) # Update the value network first. for _ in range(args.value_update_times): value_optimizer.zero_grad() value = value_network.forward(state_history) value_loss = value_loss_function(value, value_history) value_losses.append(value_loss.data[0]) value_loss.backward() value_optimizer.step() # Now update the policy network. policy_optimizer.zero_grad() value_estimated_ = value_network.forward(state_history) value_estimated = value_estimated_.detach() policy_loss = fnn_policy_loss(value_history, value_estimated, prob_action_history.view(-1, 1), prob_history.view(-1, 3)) policy_loss = th.div(policy_loss, state_history.size()[0]) policy_losses.append(policy_loss.data[0]) if len(policy_losses) >= 2: if abs(policy_losses[-1] - policy_losses[-2]) < convergence_threshold: if not args.cuda: plt.ioff() plt.close() env.close() return time_str, policy policy_loss.backward() policy_optimizer.step() print( '====> Episode: {} value_loss: {:.4f} policy_loss: {:.4f} return: {}' .format(episode, value_losses[-1], policy_losses[-1], returns[-1])) if not args.cuda: plt.clf() plt.figure(1) plt.subplot(311) plt.xlabel('episodes') plt.ylabel('cumulative rewards') plt.plot(returns) plt.subplot(312) plt.xlabel('update every ' + str(args.update_frequency) + ' episodes') plt.ylabel('value network losses') plt.plot(value_losses) plt.subplot(313) plt.xlabel('update every ' + str(args.update_frequency) + ' episodes') plt.ylabel('policy losses') plt.plot(policy_losses) plt.show() plt.savefig(''.join([ 'fnn/training/', time_str, '_discounting_', str(args.gamma), '_update_frequency_', str(args.update_frequency), '_value_update_times_', str(args.value_update_times) ]) + '.png') if not args.cuda: plt.ioff() plt.close() env.close() return time_str, policy
class GroupedPath(Record("groupid", "path")): """Like a FileSystem RootedPath, but relative to a group, not a FileSystem path. Often known as a "gpath". """ pass
class MergeAction(Record("type", "gpath", "older", "newer", "details")): def __new__(cls, type, older, newer, details = None): gpath = get_gpath(older, newer) return cls.new(type, gpath, older, newer, details)
class ActorCall(Record("name", "args", "kargs", "future")): pass
class HistoryCache(Record("entries")): def add_entries(self, entries): self.entries.extend(entries)
class HistoryDiff(Record("type", "gpath", "latest1", "latest2")): """Compares the latest of the histories of a given gpath (GroupedPath).""" def __new__(cls, type, latest1, latest2): gpath = get_gpath(latest1, latest2) return cls.new(type, gpath, latest1, latest2)
from util import Record config = Record( trial='m', ckpt=None, seed=0 ### data , unk=0, eos=1, bos=2, cap=64 ### model , dim_src=8192, dim_tgt=8192, dim_emb=512, dim_mid=2048, depth=2 ### batch , batch_train=64, batch_valid=256, total_valid=4096) paths = Record(log="~/cache/tensorboard-logdir/eti", raw="../data", pred="../trial/pred", ckpt="../trial/ckpt", data="../trial/data")
os.environ['CUDA_VISIBLE_DEVICES'] = A.gpu ############### # preparation # ############### from util import Record, comp from model import vAe as vae from tqdm import tqdm from util_io import pform, load_txt, load_json from util_np import np, vpack, sample, partition from util_sp import load_spm, encode_capped, encode_capped_sample_pair from util_tf import tf, pipe config = load_json(A.config) P = Record(config['paths']) C = Record(config['model']) T = Record(config['train']) tf.set_random_seed(A.seed) ############# # load data # ############# vocab = load_spm(P.vocab) valid = np.load(P.valid) def batch(size=T.batch_train, path=P.train,
class MergeLogEntry(Record(*TABLE_FIELDS)): pass