def __init__(self, train, device): max_movie_id = 0 self.device = device self.batch_size = 200 self.data_count = 0 self.window_size = 2 filename = 'temp/train.cooc_dnn.data' self.train_data = TrainData(filename) write_progress = tqdm.tqdm(train) for uid, views in write_progress: clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1, views)) if len(clicks) == 0: continue max_movie_id = max(max_movie_id, max(clicks)) for idx, x in enumerate(clicks): for offset in range(self.window_size): if idx + offset + 1 >= len(clicks): continue y = clicks[idx + offset + 1] self.train_data.write(x, y) self.data_count += 1 self.train_data.write_over() self.movie_count = max_movie_id + 1 pydev.log('max_movie_id=%d' % self.movie_count) pydev.log('data_count=%d' % self.data_count)
def __init__(self, train, device): max_movie_id = 0 self.batch_size = 200 self.data_count = 0 self.device = device filename = 'temp/train.data' self.train_data = TrainData(filename, 'r') # self.train_data = TrainData(filename) write_progress = tqdm.tqdm(train) for uid, views in write_progress: clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1, views)) if len(clicks) == 0: continue max_movie_id = max(max_movie_id, max(clicks)) for idx, click in enumerate(clicks): x = clicks[:idx] y = clicks[idx] if len(x) < 3: continue #self.train_data.write(x, y) self.data_count += 1 # self.train_data.write_over() self.movie_count = max_movie_id + 1 pydev.log('max_movie_id=%d' % self.movie_count) pydev.log('data_count=%d' % self.data_count)
def __init__(self, train, device, epoch_count, batch_size): max_movie_id = 0 self.epoch_count = epoch_count self.batch_size = batch_size self.data_count = 0 self.device = device self.train = train write_progress = tqdm.tqdm(train) for uid, views in write_progress: clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1, views)) if len(clicks) == 0: continue max_movie_id = max(max_movie_id, max(clicks)) self.data_count += len(views) self.train_iter_count = self.epoch_count * self.data_count / self.batch_size # self.train_data.write_over() self.movie_count = max_movie_id + 1 pydev.log('max_movie_id=%d' % self.movie_count) pydev.log('data_count=%d' % self.data_count)
def test_validation(): y = [] y_ = [] batch_size = 2048 for begin in range(0, len(valid)-1, batch_size): output = model.forward( torch.tensor(map(lambda x:x[0], valid[begin:begin+batch_size])).to(device), torch.tensor(map(lambda x:x[1], valid[begin:begin+batch_size])).to(device), ) y += map(lambda x:x[2], valid[begin:begin+batch_size]) y_ += output.view(-1).tolist() auc = metrics.roc_auc_score(y, y_) pydev.log('Valid AUC: %.3f' % auc)
def test_ins_data(self, model, slot_info): autoarg = pydev.AutoArg() input_filename = autoarg.option('test') batch_size = int(autoarg.option('batch', 20000)) reader = easy.slot_file.SlotFileReader(input_filename) y = [] y_ = [] reading_count = 0 while reader.epoch() < 1: labels, slots = reader.next(batch_size) # make pytorch data. clicks = torch.Tensor(labels).to(self.device) dct = {} for item in slots: for slot, ids in item: if slot not in dct: # id_list, offset dct[slot] = [[], []] lst = dct[slot][0] idx = dct[slot][1] idx.append(len(lst)) lst += ids x = [] for slot, _ in slot_info: id_list, offset = dct.get(slot, [[], []]) emb_pair = torch.tensor(id_list).to( self.device), torch.tensor(offset).to(self.device) x.append(emb_pair) clicks_ = model.forward(x) y += clicks.view(-1).tolist() y_ += clicks_.view(-1).tolist() pydev.log13('reading_count : %d' % reading_count) reading_count += 1 auc = metrics.roc_auc_score(y, y_) print pydev.log('Valid AUC: %.3f' % auc)
def __init__(self, inputs, config_reader=None): self.stacks = [] self.shape = map(int, config_reader('shape').split(',')) self.stack_count = int(config_reader('stack_count', 1)) self.use_residual = int(config_reader('use_residual', 0)) self.pooling_size = int(config_reader('pool_size', 2)) self.pooling_strides = int(config_reader('pool_strides', 2)) self.pooling_type = config_reader('pool_type') pydev.log('StackCount : %d' % self.stack_count) pydev.log('use_residual : %d' % self.use_residual) src = inputs for i in range(0, self.stack_count): layer = Layer_Conv2D(src, self.__fake_config_reader(i, config_reader)) self.stacks.append(layer) dest = layer.outputs src = dest self.inputs = inputs self.outputs = [] for idx, x in enumerate(inputs): if self.use_residual: pydev.log('use_residual!') if self.shape[2] != self.shape[3]: pydev.log( 'input shape is different with output shape, so residual by second stack.' ) y = self.stacks[0].outputs[idx] + dest[idx] else: y = self.inputs[idx] + dest[idx] else: y = dest[idx] if self.pooling_type == 'max': y = tf.nn.max_pool( y, ksize=[1, self.pooling_size, self.pooling_size, 1], strides=[1, self.pooling_strides, self.pooling_strides, 1], padding='SAME') elif self.pooling_type == 'avg': y = tf.nn.avg_pool( y, ksize=[1, self.pooling_size, self.pooling_size, 1], strides=[1, self.pooling_strides, self.pooling_strides, 1], padding='SAME') self.outputs.append(y)
def measure(predictor, test, debug=False): progress = tqdm.tqdm(test) y = [] y_ = [] debug_fd = None if debug: debug_fd = file('log/debug.log', 'w') for uid, iid, score in progress: pred_score = predictor(uid, iid, debug_fd) if debug: print >> debug_fd, '%s\t%s\t%d\t%.3f' % (uid, iid, score, pred_score) y.append(score) y_.append(pred_score) pydev.info('Predict over') auc = metrics.roc_auc_score(y, y_) pydev.log('Test AUC: %.3f' % auc)
def test_uid_iid_model(self, model): self.load_uid_iid_data() y = [] y_ = [] batch_size = 2048 for begin in tqdm.tqdm(range(0, len(self.test) - 1, batch_size)): output = model.forward( torch.tensor( map(lambda x: x[0], self.test[begin:begin + batch_size])).to(self.device), torch.tensor( map(lambda x: x[1], self.test[begin:begin + batch_size])).to(self.device), ) y += map(lambda x: x[2], self.test[begin:begin + batch_size]) y_ += output.view(-1).tolist() auc = metrics.roc_auc_score(y, y_) print pydev.log('Valid AUC: %.3f' % auc)
def __init__(self, inputs, config_reader=None): n_in = int(config_reader('n_in')) n_out = int(config_reader('n_out')) op = config_reader('op') self.l2wd = float(config_reader('l2wd', 0.0)) self.bias_init = float(config_reader('bias_init', 0.0)) self.weight_stddev = float(config_reader('weight_stddev', 0.01)) pydev.log('l2 weight : %f' % self.l2wd) pydev.log('weight_stddev : %f' % self.weight_stddev) pydev.log('bias_init : %f' % self.bias_init) Fdict = { 'sigmoid': tf.sigmoid, 'tanh': tf.tanh, 'softmax': tf.nn.softmax, 'relu': tf.nn.relu } F = Fdict.get(op, None) self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev) self.b = bias_variable([n_out], init=self.bias_init) # active function. self.inputs = inputs self.outputs = [] for x in self.inputs: if F is None: print >> sys.stderr, 'Warning: FullConnectOp with no OP. [%s]' % op y = tf.matmul(x, self.w) + self.b else: y = F(tf.matmul(x, self.w) + self.b) self.outputs.append(y)
def __init__(self, inputs, config_reader=None): n_in = int( config_reader('n_in') ) n_out = int( config_reader('n_out') ) op = config_reader('op') self.l2wd = float(config_reader('l2wd', 0.0)) self.bias_init = float(config_reader('bias_init', 0.0)) self.weight_stddev = float(config_reader('weight_stddev', 0.01)) pydev.log('l2 weight : %f' % self.l2wd ) pydev.log('weight_stddev : %f' % self.weight_stddev) pydev.log('bias_init : %f' % self.bias_init) Fdict = { 'sigmoid' : tf.sigmoid, 'tanh' : tf.tanh, 'softmax' : tf.nn.softmax, 'relu' : tf.nn.relu } F = Fdict.get(op, None) self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev) self.b = bias_variable([n_out], init=self.bias_init) # active function. self.inputs = inputs self.outputs = [] for x in self.inputs: if F is None: print >> sys.stderr, 'Warning: FullConnectOp with no OP. [%s]' % op y = tf.matmul(x, self.w) + self.b else: y = F( tf.matmul(x, self.w) + self.b ) self.outputs.append( y )
def __init__(self, train, device, epoch_count, batch_size): max_movie_id = 0 max_user_id = 0 self.epoch_count = epoch_count self.batch_size = batch_size self.data_count = 0 self.device = device self.train = train self.current_epoch = 0 write_progress = tqdm.tqdm(train) for uid, iid, click in write_progress: max_movie_id = max(max_movie_id, iid) max_user_id = max(max_user_id, uid) self.data_count += 1 self.train_iter_count = self.epoch_count * self.data_count / self.batch_size self.user_count = max_user_id + 1 self.movie_count = max_movie_id + 1 pydev.log('user_count=%d' % self.user_count) pydev.log('movie_count=%d' % self.movie_count) pydev.log('data_count=%d' % self.data_count)
def __init__(self, inputs, config_reader=None): self.stacks = [] self.shape = map(int, config_reader('shape').split(',')) self.stack_count = int( config_reader('stack_count', 1) ) self.use_residual = int( config_reader('use_residual', 0) ) self.pooling_size = int( config_reader('pool_size', 2) ) self.pooling_strides = int( config_reader('pool_strides', 2) ) self.pooling_type = config_reader('pool_type') pydev.log('StackCount : %d' % self.stack_count) pydev.log('use_residual : %d' % self.use_residual) src = inputs for i in range(0, self.stack_count): layer = Layer_Conv2D(src, self.__fake_config_reader(i, config_reader)) self.stacks.append(layer) dest = layer.outputs src = dest self.inputs = inputs self.outputs = [] for idx, x in enumerate(inputs): if self.use_residual: pydev.log('use_residual!') if self.shape[2] != self.shape[3]: pydev.log('input shape is different with output shape, so residual by second stack.') y = self.stacks[0].outputs[idx] + dest[idx] else: y = self.inputs[idx] + dest[idx] else: y = dest[idx] if self.pooling_type == 'max': y = tf.nn.max_pool(y, ksize=[1, self.pooling_size, self.pooling_size, 1], strides=[1, self.pooling_strides, self.pooling_strides, 1], padding='SAME') elif self.pooling_type == 'avg': y = tf.nn.avg_pool(y, ksize=[1, self.pooling_size, self.pooling_size, 1], strides=[1, self.pooling_strides, self.pooling_strides, 1], padding='SAME') self.outputs.append(y)
def read(self, filename): self.__fd = file(filename) self.__record_count = 0 while 1: ret = self.__read_buffers(3) # load over. if ret is None: break else: buf_label, buf_image_a, buf_image_b = ret #print len(buf_label), len(buf_image_a), len(buf_image_b) label = struct.unpack('i', buf_label)[0] image_a = cv2.imdecode( np.asarray(bytearray(buf_image_a), dtype=np.uint8), 1) image_b = cv2.imdecode( np.asarray(bytearray(buf_image_b), dtype=np.uint8), 1) yield label, image_a, image_b self.__record_count += 1 pydev.log('%d image(s) loaded' % self.__record_count)
def __init__(self, train): self.x = [] self.y = [] max_movie_id = 0 for uid, views in train: clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1, views)) if len(clicks) == 0: continue max_movie_id = max(max_movie_id, max(clicks)) for idx, click in enumerate(clicks): x = clicks[:idx] y = clicks[idx] if len(x) < 3: continue self.x.append(x) self.y.append(y) self.movie_count = max_movie_id + 1 pydev.log('max_movie_id=%d' % self.movie_count) pydev.log('data_count=%d' % len(self.x))
def __init__(self, inputs, config_reader=None): n_in = int( config_reader('n_in') ) n_out = int( config_reader('n_out') ) self.l2wd = float(config_reader('l2wd', 0.0)) self.bias_init = float(config_reader('bias_init', 0.0)) self.weight_stddev = float(config_reader('weight_stddev', 0.01)) pydev.log('l2 weight : %f' % self.l2wd ) pydev.log('weight_stddev : %f' % self.weight_stddev) pydev.log('bias_init : %f' % self.bias_init) self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev) self.b = bias_variable([n_out], init=self.bias_init) # active function. self.inputs = inputs self.outputs = [] for x in self.inputs: y = tf.matmul(x, self.w) + self.b self.outputs.append(y)
def __init__(self, inputs, config_reader=None): n_in = int(config_reader('n_in')) n_out = int(config_reader('n_out')) self.l2wd = float(config_reader('l2wd', 0.0)) self.bias_init = float(config_reader('bias_init', 0.0)) self.weight_stddev = float(config_reader('weight_stddev', 0.01)) pydev.log('l2 weight : %f' % self.l2wd) pydev.log('weight_stddev : %f' % self.weight_stddev) pydev.log('bias_init : %f' % self.bias_init) self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev) self.b = bias_variable([n_out], init=self.bias_init) # active function. self.inputs = inputs self.outputs = [] for x in self.inputs: y = tf.matmul(x, self.w) + self.b self.outputs.append(y)
def __init__(self, train, device, epoch_count, batch_size, movie_dir): max_movie_id = 0 max_user_id = 0 pydev.info('load movies') self.movies = utils.load_movies(movie_dir, ignore_tags=True) self.epoch_count = epoch_count self.batch_size = batch_size self.data_count = 0 self.device = device self.data = [] write_progress = tqdm.tqdm(train) self.slot_coder = easy_train.SlotIndexCoder() # feature extracting. for uid, iid, click in write_progress: max_movie_id = max(max_movie_id, iid) max_user_id = max(max_user_id, uid) self.data_count += 1 movie_id = int(iid) movie = self.movies.get(movie_id, utils.MovieInfo()) user_genres = [] for genres in movie.genres: key = '%s_%s' % (uid, genres) idx = self.slot_coder.alloc('uid_genres', key) user_genres.append(idx) self.data.append((uid, iid, user_genres, click)) self.train_iter_count = self.epoch_count * self.data_count / self.batch_size self.user_count = max_user_id + 1 self.movie_count = max_movie_id + 1 pydev.log('user_count=%d' % self.user_count) pydev.log('movie_count=%d' % self.movie_count) pydev.log('data_count=%d' % self.data_count)
sys.exit(-1) emb_dict = ItemIndex(sys.argv[2]) cooc_dict = CoocDict() for line in file(sys.argv[1]).readlines(): terms = line.strip().split(' ') for idx in range(len(terms)-window_size): a = terms[idx] for j in range(window_size): b = terms[idx + j + 1] cooc_dict.add(a, b) cooc_dict.add(b, a) pydev.log('load cooc over.') hit = 0 total = 0 for key in cooc_dict.cooc_dict: if key == '': continue values = sorted(cooc_dict.cooc_dict[key].iteritems(), key=lambda x:-x[1])[:20] recalls, dis = emb_dict.index.get_nns_by_item(int(key), n=50, include_distances=True) total += len(values) for cooc, count in values: if int(cooc) in set(recalls): hit += 1
def __init__(self, config_file, network_name, output_01=False): self.__LayerCreator__ = { 'full_connect' : Layer_FullConnect, 'full_connect_op' : Layer_OpFullConnect, 'dot' : Layer_Dot, 'norm2' : Layer_Norm2Cost, 'softmax_entropy' : Layer_SoftmaxEntropyCost, 'sigmoid' : Layer_Sigmoid, 'softmax' : Layer_Softmax, 'tanh' : Layer_Tanh, 'relu' : Layer_Relu, 'pooling' : Layer_Pooling, # all following 4 layers has same implemention. 'conv2d' : Layer_StackConv2D, 'conv2d_pool' : Layer_StackConv2D, 'stack_conv2d' : Layer_StackConv2D, 'stack_conv2d_pool' : Layer_StackConv2D, 'reshape' : Layer_Reshape, 'dropout' : Layer_DropOut, 'local_norm' : Layer_LocalResponseNormalization, } self.__output_01 = output_01 self.__layers = [] self.__layers_info = {} cp = ConfigParser.ConfigParser() self.__config_parser = cp self.__network_name = network_name cp.read(config_file) # read inputs and label. self.__inputs = self.__placeholders_read( pydev.config_default_get(cp, network_name, 'input_def', 'f:2') ) self.__label = self.__single_placeholder_read( pydev.config_default_get(cp, network_name, 'label_def', 'f:2') ) pydev.log('Inputs: %s' % self.__inputs) pydev.log('Labels %s' % self.__label) # batch_size. self.__batch_size = int(pydev.config_default_get(cp, network_name, 'batch_size', 50)) layer_names = cp.get(network_name, 'layers').split(',') active_name = cp.get(network_name, 'active').strip() cost_name = cp.get(network_name, 'cost').strip() global_step = tf.Variable(0, trainable=False) self.__batch_size = int( pydev.config_default_get(cp, network_name, 'batch_size', 50) ) self.__epoch = int( pydev.config_default_get(cp, network_name, 'epoch', 10) ) print >> sys.stderr, 'Epoch : %d' % self.__epoch print >> sys.stderr, 'BatchSize : %d' % self.__batch_size for layer_name in layer_names: layer_type = cp.get(network_name, '%s.type' % layer_name) input_names = cp.get(network_name, '%s.input' % layer_name).split(',') # type, inputs, layer_refer. self.__layers_info[layer_name] = [layer_type, input_names, None] self.__layer_has_receiver = set() for name in layer_names: self.__init_layer(name) # check receivers. warning_layers = [] for name in layer_names: if name not in self.__layer_has_receiver: warning_layers.append(name) if len(warning_layers)>0: pydev.err('===[ Layers of no receivers, check it : %s ]===' % ','.join(warning_layers)) # default use outputs[0] as y and loss. # make network-active function. self.active = self.__get_layer(active_name).outputs[0] # loss function. # main_loss : main target loss. # pernalized_loss = wd_loss # loss = main_loss + pernalize_loss self.main_loss = self.__get_layer(cost_name).outputs[0] tf.add_to_collection('losses', self.main_loss) self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss') # record losses. tf.scalar_summary('train/total_loss', self.loss) tf.scalar_summary('train/main_loss', self.main_loss) # learning method and learning rate. self.__learner = pydev.config_dict_get(cp, network_name, 'learner', { 'gradient' : tf.train.GradientDescentOptimizer, 'movingGradient' : MovingGradientDescentOptimizer, 'adam' : tf.train.AdamOptimizer, }, default_key='gradient' ) self.__lr_value = float( pydev.config_default_get(cp, network_name, 'learning_rate', 1e-3) ) self.__lr_decay_ratio = float( pydev.config_default_get(cp, network_name, 'learning_decay_ratio', 0.96) ) self.__lr_decay_step = float( pydev.config_default_get(cp, network_name, 'learning_decay_step', 300) ) self.__lr_tensor = pydev.config_dict_get(cp, network_name, 'learning_rate_type', { 'fixed' : tf.Variable(self.__lr_value), 'exponential_decay' : tf.train.exponential_decay(self.__lr_value, global_step, self.__lr_decay_step, self.__lr_decay_ratio, staircase=True) }, default_key = 'fixed' ) pydev.log('learner : %s' % self.__learner) pydev.log('lr_type : %s' % self.__lr_tensor) pydev.log('lr_value: %s' % self.__lr_value) pydev.log('lr_step : %s' % self.__lr_decay_step) pydev.log('lr_ratio: %s' % self.__lr_decay_ratio) tf.scalar_summary('train/learning_rate', self.__lr_tensor) # generate training function. self.train = self.__learner( self.__lr_tensor ).minimize(self.loss, global_step=global_step) self.__train_summary_merged = tf.merge_all_summaries() # setup session config. session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True #session_config.gpu_options.per_process_gpu_fraction = 0.4 self.session = tf.Session(config = session_config)
def fit(self, X, Y, callback=None, callback_iteration=100, preprocessor=None, first_run=True): ''' X : training X Y : training Y callback : callback when training. callback type: callback(predict_function) callback_interval : interval to call back (total 1.0) callback_iteration : callback each N iterations. preprocess : preprocess tensor for each (x, ..., x, y) ''' # uniform input: # X can be [tensors ...] or tensor # make X as [tensors ...] if not isinstance(X, tuple) and not isinstance(X, list): X = [X] pydev.log('fit X to be [X] %s' % type(X)) # make shuffle and preprocess graph. holders = [] for x in X: holders.append(tf.constant(x)) holders.append(tf.constant(Y)) queues = tf.train.slice_input_producer(holders) if preprocessor is not None: queues = preprocessor(*queues) batchs = tf.train.batch(queues, batch_size=self.__batch_size, num_threads=4) # init all variables. if first_run: # initialize the training summary. ts = time.asctime().replace(' ', '_') self.__train_writer = tf.train.SummaryWriter( './tensorboard_logs/%s/%s' % (self.__network_name, ts), self.session.graph) self.session.run(tf.initialize_all_variables()) # simple train. data_size = len(X[-1]) iteration_count = (self.__epoch * data_size) // self.__batch_size print >> sys.stderr, 'Iteration=%d (batchsize=%d, epoch=%d, datasize=%d)' % ( iteration_count, self.__batch_size, self.__epoch, data_size) self.__current_iteration = 0 last_percentage = 0 last_callback_iteration = 0 begin_time = time.time() with self.session.as_default(): coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for it in xrange(1, iteration_count + 1): # training code. self.__current_iteration = it # run back data and fit one batch. subs = self.session.run(batchs) loss, main_loss, summary_info, lr = self.fit_one_batch(*subs) self.__train_writer.add_summary(summary_info, self.__current_iteration) # Report code. percentage = it * 1. / iteration_count cost_time = time.time() - begin_time remain_time = cost_time / percentage - cost_time sys.stderr.write( '%cProgress: %3.1f%% [%s/%s] [iter=%7d loss=%.4f(%.4f+%.4f) lr=%f ips=%.2f]' % (13, percentage * 100., pydev.format_time(cost_time), pydev.format_time(remain_time), it, loss, main_loss, loss - main_loss, lr, it / (time.time() - begin_time))) # call back the reporter and tester. if callback: if it - last_callback_iteration >= callback_iteration: callback(self.predict, self.__train_writer, self.__current_iteration) last_callback_iteration = it
def __init__(self, config_file, network_name, output_01=False): self.__LayerCreator__ = { 'full_connect': Layer_FullConnect, 'full_connect_op': Layer_OpFullConnect, 'dot': Layer_Dot, 'norm2': Layer_Norm2Cost, 'softmax_entropy': Layer_SoftmaxEntropyCost, 'sigmoid': Layer_Sigmoid, 'softmax': Layer_Softmax, 'tanh': Layer_Tanh, 'relu': Layer_Relu, 'pooling': Layer_Pooling, # all following 4 layers has same implemention. 'conv2d': Layer_StackConv2D, 'conv2d_pool': Layer_StackConv2D, 'stack_conv2d': Layer_StackConv2D, 'stack_conv2d_pool': Layer_StackConv2D, 'reshape': Layer_Reshape, 'dropout': Layer_DropOut, 'local_norm': Layer_LocalResponseNormalization, } self.__output_01 = output_01 self.__layers = [] self.__layers_info = {} cp = ConfigParser.ConfigParser() self.__config_parser = cp self.__network_name = network_name cp.read(config_file) # read inputs and label. self.__inputs = self.__placeholders_read( pydev.config_default_get(cp, network_name, 'input_def', 'f:2')) self.__label = self.__single_placeholder_read( pydev.config_default_get(cp, network_name, 'label_def', 'f:2')) pydev.log('Inputs: %s' % self.__inputs) pydev.log('Labels %s' % self.__label) # batch_size. self.__batch_size = int( pydev.config_default_get(cp, network_name, 'batch_size', 50)) layer_names = cp.get(network_name, 'layers').split(',') active_name = cp.get(network_name, 'active').strip() cost_name = cp.get(network_name, 'cost').strip() global_step = tf.Variable(0, trainable=False) self.__batch_size = int( pydev.config_default_get(cp, network_name, 'batch_size', 50)) self.__epoch = int( pydev.config_default_get(cp, network_name, 'epoch', 10)) print >> sys.stderr, 'Epoch : %d' % self.__epoch print >> sys.stderr, 'BatchSize : %d' % self.__batch_size for layer_name in layer_names: layer_type = cp.get(network_name, '%s.type' % layer_name) input_names = cp.get(network_name, '%s.input' % layer_name).split(',') # type, inputs, layer_refer. self.__layers_info[layer_name] = [layer_type, input_names, None] self.__layer_has_receiver = set() for name in layer_names: self.__init_layer(name) # check receivers. warning_layers = [] for name in layer_names: if name not in self.__layer_has_receiver: warning_layers.append(name) if len(warning_layers) > 0: pydev.err('===[ Layers of no receivers, check it : %s ]===' % ','.join(warning_layers)) # default use outputs[0] as y and loss. # make network-active function. self.active = self.__get_layer(active_name).outputs[0] # loss function. # main_loss : main target loss. # pernalized_loss = wd_loss # loss = main_loss + pernalize_loss self.main_loss = self.__get_layer(cost_name).outputs[0] tf.add_to_collection('losses', self.main_loss) self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss') # record losses. tf.scalar_summary('train/total_loss', self.loss) tf.scalar_summary('train/main_loss', self.main_loss) # learning method and learning rate. self.__learner = pydev.config_dict_get( cp, network_name, 'learner', { 'gradient': tf.train.GradientDescentOptimizer, 'movingGradient': MovingGradientDescentOptimizer, 'adam': tf.train.AdamOptimizer, }, default_key='gradient') self.__lr_value = float( pydev.config_default_get(cp, network_name, 'learning_rate', 1e-3)) self.__lr_decay_ratio = float( pydev.config_default_get(cp, network_name, 'learning_decay_ratio', 0.96)) self.__lr_decay_step = float( pydev.config_default_get(cp, network_name, 'learning_decay_step', 300)) self.__lr_tensor = pydev.config_dict_get( cp, network_name, 'learning_rate_type', { 'fixed': tf.Variable(self.__lr_value), 'exponential_decay': tf.train.exponential_decay(self.__lr_value, global_step, self.__lr_decay_step, self.__lr_decay_ratio, staircase=True) }, default_key='fixed') pydev.log('learner : %s' % self.__learner) pydev.log('lr_type : %s' % self.__lr_tensor) pydev.log('lr_value: %s' % self.__lr_value) pydev.log('lr_step : %s' % self.__lr_decay_step) pydev.log('lr_ratio: %s' % self.__lr_decay_ratio) tf.scalar_summary('train/learning_rate', self.__lr_tensor) # generate training function. self.train = self.__learner(self.__lr_tensor).minimize( self.loss, global_step=global_step) self.__train_summary_merged = tf.merge_all_summaries() # setup session config. session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True #session_config.gpu_options.per_process_gpu_fraction = 0.4 self.session = tf.Session(config=session_config)
def fit(self, X, Y, callback=None, callback_iteration=100, preprocessor=None, first_run=True): ''' X : training X Y : training Y callback : callback when training. callback type: callback(predict_function) callback_interval : interval to call back (total 1.0) callback_iteration : callback each N iterations. preprocess : preprocess tensor for each (x, ..., x, y) ''' # uniform input: # X can be [tensors ...] or tensor # make X as [tensors ...] if not isinstance(X, tuple) and not isinstance(X, list): X = [ X ] pydev.log('fit X to be [X] %s' % type(X) ) # make shuffle and preprocess graph. holders = [] for x in X: holders.append( tf.constant(x) ) holders.append( tf.constant(Y) ) queues = tf.train.slice_input_producer(holders) if preprocessor is not None: queues = preprocessor( *queues ) batchs = tf.train.batch(queues, batch_size=self.__batch_size, num_threads=4) # init all variables. if first_run: # initialize the training summary. ts = time.asctime().replace(' ', '_') self.__train_writer = tf.train.SummaryWriter( './tensorboard_logs/%s/%s' % (self.__network_name, ts), self.session.graph) self.session.run( tf.initialize_all_variables() ) # simple train. data_size = len(X[-1]) iteration_count = (self.__epoch * data_size) // self.__batch_size print >> sys.stderr, 'Iteration=%d (batchsize=%d, epoch=%d, datasize=%d)' % ( iteration_count, self.__batch_size, self.__epoch, data_size) self.__current_iteration = 0 last_percentage = 0 last_callback_iteration = 0 begin_time = time.time() with self.session.as_default(): coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord = coord) for it in xrange( 1, iteration_count+1 ): # training code. self.__current_iteration = it # run back data and fit one batch. subs = self.session.run(batchs) loss, main_loss, summary_info, lr = self.fit_one_batch(*subs) self.__train_writer.add_summary(summary_info, self.__current_iteration) # Report code. percentage = it * 1. / iteration_count cost_time = time.time() - begin_time remain_time = cost_time / percentage - cost_time sys.stderr.write('%cProgress: %3.1f%% [%s/%s] [iter=%7d loss=%.4f(%.4f+%.4f) lr=%f ips=%.2f]' % ( 13, percentage * 100., pydev.format_time(cost_time), pydev.format_time(remain_time), it, loss, main_loss, loss-main_loss, lr, it / (time.time()-begin_time) )) # call back the reporter and tester. if callback: if it - last_callback_iteration >= callback_iteration: callback(self.predict, self.__train_writer, self.__current_iteration) last_callback_iteration = it