Ejemplo n.º 1
0
    def __init__(self, train, device):
        max_movie_id = 0

        self.device = device
        self.batch_size = 200
        self.data_count = 0
        self.window_size = 2

        filename = 'temp/train.cooc_dnn.data'
        self.train_data = TrainData(filename)

        write_progress = tqdm.tqdm(train)
        for uid, views in write_progress:
            clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1,
                                                     views))
            if len(clicks) == 0:
                continue

            max_movie_id = max(max_movie_id, max(clicks))

            for idx, x in enumerate(clicks):
                for offset in range(self.window_size):
                    if idx + offset + 1 >= len(clicks):
                        continue
                    y = clicks[idx + offset + 1]

                    self.train_data.write(x, y)
                    self.data_count += 1

        self.train_data.write_over()
        self.movie_count = max_movie_id + 1
        pydev.log('max_movie_id=%d' % self.movie_count)
        pydev.log('data_count=%d' % self.data_count)
Ejemplo n.º 2
0
    def __init__(self, train, device):
        max_movie_id = 0

        self.batch_size = 200
        self.data_count = 0
        self.device = device

        filename = 'temp/train.data'

        self.train_data = TrainData(filename, 'r')
        # self.train_data = TrainData(filename)

        write_progress = tqdm.tqdm(train)
        for uid, views in write_progress:
            clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1,
                                                     views))
            if len(clicks) == 0:
                continue

            max_movie_id = max(max_movie_id, max(clicks))

            for idx, click in enumerate(clicks):
                x = clicks[:idx]
                y = clicks[idx]
                if len(x) < 3:
                    continue

                #self.train_data.write(x, y)
                self.data_count += 1

        # self.train_data.write_over()
        self.movie_count = max_movie_id + 1
        pydev.log('max_movie_id=%d' % self.movie_count)
        pydev.log('data_count=%d' % self.data_count)
Ejemplo n.º 3
0
    def __init__(self, train, device, epoch_count, batch_size):
        max_movie_id = 0

        self.epoch_count = epoch_count
        self.batch_size = batch_size
        self.data_count = 0
        self.device = device
        self.train = train

        write_progress = tqdm.tqdm(train)
        for uid, views in write_progress:
            clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1,
                                                     views))
            if len(clicks) == 0:
                continue

            max_movie_id = max(max_movie_id, max(clicks))
            self.data_count += len(views)

        self.train_iter_count = self.epoch_count * self.data_count / self.batch_size

        # self.train_data.write_over()
        self.movie_count = max_movie_id + 1
        pydev.log('max_movie_id=%d' % self.movie_count)
        pydev.log('data_count=%d' % self.data_count)
Ejemplo n.º 4
0
    def test_validation():
        y = []
        y_ = []

        batch_size = 2048
        for begin in range(0, len(valid)-1, batch_size):
            output = model.forward(
                    torch.tensor(map(lambda x:x[0], valid[begin:begin+batch_size])).to(device),
                    torch.tensor(map(lambda x:x[1], valid[begin:begin+batch_size])).to(device),
                    )
            y += map(lambda x:x[2], valid[begin:begin+batch_size])
            y_ += output.view(-1).tolist()
        
        auc = metrics.roc_auc_score(y, y_)
        pydev.log('Valid AUC: %.3f' % auc)
Ejemplo n.º 5
0
    def test_ins_data(self, model, slot_info):
        autoarg = pydev.AutoArg()
        input_filename = autoarg.option('test')
        batch_size = int(autoarg.option('batch', 20000))
        reader = easy.slot_file.SlotFileReader(input_filename)

        y = []
        y_ = []
        reading_count = 0
        while reader.epoch() < 1:
            labels, slots = reader.next(batch_size)

            # make pytorch data.
            clicks = torch.Tensor(labels).to(self.device)
            dct = {}
            for item in slots:
                for slot, ids in item:
                    if slot not in dct:
                        # id_list, offset
                        dct[slot] = [[], []]

                    lst = dct[slot][0]
                    idx = dct[slot][1]
                    idx.append(len(lst))
                    lst += ids

            x = []
            for slot, _ in slot_info:
                id_list, offset = dct.get(slot, [[], []])
                emb_pair = torch.tensor(id_list).to(
                    self.device), torch.tensor(offset).to(self.device)
                x.append(emb_pair)

            clicks_ = model.forward(x)

            y += clicks.view(-1).tolist()
            y_ += clicks_.view(-1).tolist()

            pydev.log13('reading_count : %d' % reading_count)
            reading_count += 1

        auc = metrics.roc_auc_score(y, y_)
        print
        pydev.log('Valid AUC: %.3f' % auc)
Ejemplo n.º 6
0
    def __init__(self, inputs, config_reader=None):

        self.stacks = []
        self.shape = map(int, config_reader('shape').split(','))
        self.stack_count = int(config_reader('stack_count', 1))
        self.use_residual = int(config_reader('use_residual', 0))

        self.pooling_size = int(config_reader('pool_size', 2))
        self.pooling_strides = int(config_reader('pool_strides', 2))
        self.pooling_type = config_reader('pool_type')

        pydev.log('StackCount : %d' % self.stack_count)
        pydev.log('use_residual : %d' % self.use_residual)

        src = inputs
        for i in range(0, self.stack_count):
            layer = Layer_Conv2D(src,
                                 self.__fake_config_reader(i, config_reader))
            self.stacks.append(layer)

            dest = layer.outputs
            src = dest

        self.inputs = inputs
        self.outputs = []
        for idx, x in enumerate(inputs):
            if self.use_residual:
                pydev.log('use_residual!')
                if self.shape[2] != self.shape[3]:
                    pydev.log(
                        'input shape is different with output shape, so residual by second stack.'
                    )
                    y = self.stacks[0].outputs[idx] + dest[idx]
                else:
                    y = self.inputs[idx] + dest[idx]
            else:
                y = dest[idx]

            if self.pooling_type == 'max':
                y = tf.nn.max_pool(
                    y,
                    ksize=[1, self.pooling_size, self.pooling_size, 1],
                    strides=[1, self.pooling_strides, self.pooling_strides, 1],
                    padding='SAME')
            elif self.pooling_type == 'avg':
                y = tf.nn.avg_pool(
                    y,
                    ksize=[1, self.pooling_size, self.pooling_size, 1],
                    strides=[1, self.pooling_strides, self.pooling_strides, 1],
                    padding='SAME')

            self.outputs.append(y)
Ejemplo n.º 7
0
def measure(predictor, test, debug=False):
    progress = tqdm.tqdm(test)
    y = []
    y_ = []

    debug_fd = None
    if debug:
        debug_fd = file('log/debug.log', 'w')
    for uid, iid, score in progress:
        pred_score = predictor(uid, iid, debug_fd)
        if debug:
            print >> debug_fd, '%s\t%s\t%d\t%.3f' % (uid, iid, score,
                                                     pred_score)

        y.append(score)
        y_.append(pred_score)

    pydev.info('Predict over')

    auc = metrics.roc_auc_score(y, y_)
    pydev.log('Test AUC: %.3f' % auc)
Ejemplo n.º 8
0
    def test_uid_iid_model(self, model):
        self.load_uid_iid_data()
        y = []
        y_ = []

        batch_size = 2048
        for begin in tqdm.tqdm(range(0, len(self.test) - 1, batch_size)):
            output = model.forward(
                torch.tensor(
                    map(lambda x: x[0],
                        self.test[begin:begin + batch_size])).to(self.device),
                torch.tensor(
                    map(lambda x: x[1],
                        self.test[begin:begin + batch_size])).to(self.device),
            )
            y += map(lambda x: x[2], self.test[begin:begin + batch_size])
            y_ += output.view(-1).tolist()

        auc = metrics.roc_auc_score(y, y_)
        print
        pydev.log('Valid AUC: %.3f' % auc)
Ejemplo n.º 9
0
    def __init__(self, inputs, config_reader=None):
        n_in = int(config_reader('n_in'))
        n_out = int(config_reader('n_out'))
        op = config_reader('op')
        self.l2wd = float(config_reader('l2wd', 0.0))
        self.bias_init = float(config_reader('bias_init', 0.0))
        self.weight_stddev = float(config_reader('weight_stddev', 0.01))

        pydev.log('l2 weight : %f' % self.l2wd)
        pydev.log('weight_stddev : %f' % self.weight_stddev)
        pydev.log('bias_init : %f' % self.bias_init)

        Fdict = {
            'sigmoid': tf.sigmoid,
            'tanh': tf.tanh,
            'softmax': tf.nn.softmax,
            'relu': tf.nn.relu
        }
        F = Fdict.get(op, None)

        self.w = weight_variable([n_in, n_out],
                                 l2_weight=self.l2wd,
                                 stddev=self.weight_stddev)
        self.b = bias_variable([n_out], init=self.bias_init)

        # active function.
        self.inputs = inputs
        self.outputs = []
        for x in self.inputs:
            if F is None:
                print >> sys.stderr, 'Warning: FullConnectOp with no OP. [%s]' % op
                y = tf.matmul(x, self.w) + self.b
            else:
                y = F(tf.matmul(x, self.w) + self.b)
            self.outputs.append(y)
Ejemplo n.º 10
0
    def __init__(self, inputs, config_reader=None):
        n_in = int( config_reader('n_in') )
        n_out = int( config_reader('n_out') )
        op = config_reader('op')
        self.l2wd = float(config_reader('l2wd', 0.0))
        self.bias_init = float(config_reader('bias_init', 0.0))
        self.weight_stddev = float(config_reader('weight_stddev', 0.01))
        
        pydev.log('l2 weight : %f' % self.l2wd )
        pydev.log('weight_stddev : %f' % self.weight_stddev)
        pydev.log('bias_init : %f' % self.bias_init)

        Fdict = {
            'sigmoid'   : tf.sigmoid,
            'tanh'      : tf.tanh,
            'softmax'   : tf.nn.softmax,
            'relu'      : tf.nn.relu
                }
        F = Fdict.get(op, None)

        self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev)
        self.b = bias_variable([n_out], init=self.bias_init)

        # active function.
        self.inputs = inputs
        self.outputs = []
        for x in self.inputs:
            if F is None:
                print >> sys.stderr, 'Warning: FullConnectOp with no OP. [%s]' % op
                y = tf.matmul(x, self.w) + self.b
            else:
                y = F( tf.matmul(x, self.w) + self.b )
            self.outputs.append( y )
Ejemplo n.º 11
0
    def __init__(self, train, device, epoch_count, batch_size):
        max_movie_id = 0
        max_user_id = 0

        self.epoch_count = epoch_count
        self.batch_size = batch_size
        self.data_count = 0
        self.device = device
        self.train = train
        self.current_epoch = 0

        write_progress = tqdm.tqdm(train)
        for uid, iid, click in write_progress:
            max_movie_id = max(max_movie_id, iid)
            max_user_id = max(max_user_id, uid)
            self.data_count += 1

        self.train_iter_count = self.epoch_count * self.data_count / self.batch_size

        self.user_count = max_user_id + 1
        self.movie_count = max_movie_id + 1

        pydev.log('user_count=%d' % self.user_count)
        pydev.log('movie_count=%d' % self.movie_count)
        pydev.log('data_count=%d' % self.data_count)
Ejemplo n.º 12
0
    def __init__(self, inputs, config_reader=None):
        
        self.stacks = []
        self.shape = map(int, config_reader('shape').split(','))
        self.stack_count = int( config_reader('stack_count', 1) )
        self.use_residual = int( config_reader('use_residual', 0) )

        self.pooling_size = int( config_reader('pool_size', 2) )
        self.pooling_strides = int( config_reader('pool_strides', 2) )
        self.pooling_type = config_reader('pool_type')

        pydev.log('StackCount : %d' % self.stack_count)
        pydev.log('use_residual : %d' % self.use_residual)

        src = inputs
        for i in range(0, self.stack_count):
            layer = Layer_Conv2D(src, self.__fake_config_reader(i, config_reader))
            self.stacks.append(layer)
            
            dest = layer.outputs
            src = dest

        self.inputs = inputs
        self.outputs = []
        for idx, x in enumerate(inputs):
            if self.use_residual:
                pydev.log('use_residual!')
                if self.shape[2] != self.shape[3]:
                    pydev.log('input shape is different with output shape, so residual by second stack.')
                    y = self.stacks[0].outputs[idx] + dest[idx]
                else:
                    y = self.inputs[idx] + dest[idx]
            else:
                y = dest[idx]

            if self.pooling_type == 'max':
                y = tf.nn.max_pool(y, 
                            ksize=[1, self.pooling_size, self.pooling_size, 1],
                            strides=[1, self.pooling_strides, self.pooling_strides, 1], 
                            padding='SAME')
            elif self.pooling_type == 'avg':
                y = tf.nn.avg_pool(y, 
                            ksize=[1, self.pooling_size, self.pooling_size, 1],
                            strides=[1, self.pooling_strides, self.pooling_strides, 1], 
                            padding='SAME')

            self.outputs.append(y)
Ejemplo n.º 13
0
    def read(self, filename):
        self.__fd = file(filename)
        self.__record_count = 0

        while 1:
            ret = self.__read_buffers(3)

            # load over.
            if ret is None:
                break
            else:
                buf_label, buf_image_a, buf_image_b = ret
                #print len(buf_label), len(buf_image_a), len(buf_image_b)

                label = struct.unpack('i', buf_label)[0]
                image_a = cv2.imdecode(
                    np.asarray(bytearray(buf_image_a), dtype=np.uint8), 1)
                image_b = cv2.imdecode(
                    np.asarray(bytearray(buf_image_b), dtype=np.uint8), 1)

                yield label, image_a, image_b
                self.__record_count += 1

        pydev.log('%d image(s) loaded' % self.__record_count)
Ejemplo n.º 14
0
    def __init__(self, train):
        self.x = []
        self.y = []
        max_movie_id = 0

        for uid, views in train:
            clicks = map(lambda x: int(x[0]), filter(lambda x: x[1] == 1,
                                                     views))
            if len(clicks) == 0:
                continue

            max_movie_id = max(max_movie_id, max(clicks))

            for idx, click in enumerate(clicks):
                x = clicks[:idx]
                y = clicks[idx]
                if len(x) < 3:
                    continue
                self.x.append(x)
                self.y.append(y)

        self.movie_count = max_movie_id + 1
        pydev.log('max_movie_id=%d' % self.movie_count)
        pydev.log('data_count=%d' % len(self.x))
Ejemplo n.º 15
0
    def __init__(self, inputs, config_reader=None):
        n_in = int( config_reader('n_in') )
        n_out = int( config_reader('n_out') )
        self.l2wd = float(config_reader('l2wd', 0.0))
        self.bias_init = float(config_reader('bias_init', 0.0))
        self.weight_stddev = float(config_reader('weight_stddev', 0.01))
        
        pydev.log('l2 weight : %f' % self.l2wd )
        pydev.log('weight_stddev : %f' % self.weight_stddev)
        pydev.log('bias_init : %f' % self.bias_init)

        self.w = weight_variable([n_in, n_out], l2_weight=self.l2wd, stddev=self.weight_stddev)
        self.b = bias_variable([n_out], init=self.bias_init)

        # active function.
        self.inputs = inputs
        self.outputs = []
        for x in self.inputs:
            y = tf.matmul(x, self.w) + self.b
            self.outputs.append(y)
Ejemplo n.º 16
0
    def __init__(self, inputs, config_reader=None):
        n_in = int(config_reader('n_in'))
        n_out = int(config_reader('n_out'))
        self.l2wd = float(config_reader('l2wd', 0.0))
        self.bias_init = float(config_reader('bias_init', 0.0))
        self.weight_stddev = float(config_reader('weight_stddev', 0.01))

        pydev.log('l2 weight : %f' % self.l2wd)
        pydev.log('weight_stddev : %f' % self.weight_stddev)
        pydev.log('bias_init : %f' % self.bias_init)

        self.w = weight_variable([n_in, n_out],
                                 l2_weight=self.l2wd,
                                 stddev=self.weight_stddev)
        self.b = bias_variable([n_out], init=self.bias_init)

        # active function.
        self.inputs = inputs
        self.outputs = []
        for x in self.inputs:
            y = tf.matmul(x, self.w) + self.b
            self.outputs.append(y)
Ejemplo n.º 17
0
    def __init__(self, train, device, epoch_count, batch_size, movie_dir):
        max_movie_id = 0
        max_user_id = 0

        pydev.info('load movies')
        self.movies = utils.load_movies(movie_dir, ignore_tags=True)

        self.epoch_count = epoch_count
        self.batch_size = batch_size
        self.data_count = 0
        self.device = device
        self.data = []

        write_progress = tqdm.tqdm(train)
        self.slot_coder = easy_train.SlotIndexCoder()
        # feature extracting.
        for uid, iid, click in write_progress:
            max_movie_id = max(max_movie_id, iid)
            max_user_id = max(max_user_id, uid)
            self.data_count += 1

            movie_id = int(iid)
            movie = self.movies.get(movie_id, utils.MovieInfo())
            user_genres = []
            for genres in movie.genres:
                key = '%s_%s' % (uid, genres)
                idx = self.slot_coder.alloc('uid_genres', key)
                user_genres.append(idx)

            self.data.append((uid, iid, user_genres, click))

        self.train_iter_count = self.epoch_count * self.data_count / self.batch_size

        self.user_count = max_user_id + 1
        self.movie_count = max_movie_id + 1

        pydev.log('user_count=%d' % self.user_count)
        pydev.log('movie_count=%d' % self.movie_count)
        pydev.log('data_count=%d' % self.data_count)
Ejemplo n.º 18
0
        sys.exit(-1)
    emb_dict = ItemIndex(sys.argv[2])

    cooc_dict = CoocDict()
    for line in file(sys.argv[1]).readlines():
        terms = line.strip().split(' ')
        
        for idx in range(len(terms)-window_size):
            a = terms[idx]
            for j in range(window_size):
                b = terms[idx + j + 1]

                cooc_dict.add(a, b)
                cooc_dict.add(b, a)
    
    pydev.log('load cooc over.')

    hit = 0
    total = 0
    for key in cooc_dict.cooc_dict:
        if key == '':
            continue

        values = sorted(cooc_dict.cooc_dict[key].iteritems(), key=lambda x:-x[1])[:20]
        recalls, dis = emb_dict.index.get_nns_by_item(int(key), n=50, include_distances=True)

        total += len(values)
        for cooc, count in values:
            if int(cooc) in set(recalls):
                hit += 1
        
Ejemplo n.º 19
0
    def __init__(self, config_file, network_name, output_01=False):
        self.__LayerCreator__ = {
                'full_connect'      : Layer_FullConnect,
                'full_connect_op'   : Layer_OpFullConnect,
                'dot'               : Layer_Dot,
                'norm2'             : Layer_Norm2Cost,
                'softmax_entropy'   : Layer_SoftmaxEntropyCost,
                'sigmoid'           : Layer_Sigmoid,
                'softmax'           : Layer_Softmax,
                'tanh'              : Layer_Tanh,
                'relu'              : Layer_Relu,
                'pooling'           : Layer_Pooling,
                # all following 4 layers has same implemention.
                'conv2d'            : Layer_StackConv2D,
                'conv2d_pool'       : Layer_StackConv2D,
                'stack_conv2d'      : Layer_StackConv2D,
                'stack_conv2d_pool' : Layer_StackConv2D,
                'reshape'           : Layer_Reshape,
                'dropout'           : Layer_DropOut,
                'local_norm'        : Layer_LocalResponseNormalization,
            }

        self.__output_01 = output_01

        self.__layers = []
        self.__layers_info = {}

        cp = ConfigParser.ConfigParser()
        self.__config_parser = cp
        self.__network_name = network_name
        cp.read(config_file)

        # read inputs and label.
        self.__inputs = self.__placeholders_read( 
                pydev.config_default_get(cp, network_name, 'input_def', 'f:2') )
        self.__label = self.__single_placeholder_read( 
                pydev.config_default_get(cp, network_name, 'label_def', 'f:2') )
        pydev.log('Inputs: %s' % self.__inputs)
        pydev.log('Labels  %s' % self.__label)

        # batch_size.
        self.__batch_size = int(pydev.config_default_get(cp, network_name, 'batch_size', 50))

        layer_names = cp.get(network_name, 'layers').split(',')
        active_name = cp.get(network_name, 'active').strip()
        cost_name = cp.get(network_name, 'cost').strip()

        global_step = tf.Variable(0, trainable=False)

        self.__batch_size = int( pydev.config_default_get(cp, network_name, 'batch_size', 50) )
        self.__epoch = int( pydev.config_default_get(cp, network_name, 'epoch', 10) )
        print >> sys.stderr, 'Epoch     : %d' % self.__epoch
        print >> sys.stderr, 'BatchSize : %d' % self.__batch_size
   
        for layer_name in layer_names:
            layer_type = cp.get(network_name, '%s.type' % layer_name)
            input_names = cp.get(network_name, '%s.input' % layer_name).split(',')

            # type, inputs, layer_refer.
            self.__layers_info[layer_name] = [layer_type, input_names, None]
        
        self.__layer_has_receiver = set()
        for name in layer_names:
            self.__init_layer(name)

        # check receivers.
        warning_layers = []
        for name in layer_names:
            if name not in self.__layer_has_receiver:
                warning_layers.append(name)
        if len(warning_layers)>0:
            pydev.err('===[ Layers of no receivers, check it : %s ]===' % ','.join(warning_layers))

        # default use outputs[0] as y and loss.
        # make network-active function.
        self.active = self.__get_layer(active_name).outputs[0]

        # loss function.
        # main_loss : main target loss.
        # pernalized_loss = wd_loss
        # loss = main_loss + pernalize_loss
        self.main_loss = self.__get_layer(cost_name).outputs[0]
        tf.add_to_collection('losses', self.main_loss)
        self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

        # record losses.
        tf.scalar_summary('train/total_loss', self.loss)
        tf.scalar_summary('train/main_loss', self.main_loss)

        # learning method and learning rate.
        self.__learner = pydev.config_dict_get(cp, network_name, 'learner', 
                {
                    'gradient'          : tf.train.GradientDescentOptimizer,
                    'movingGradient'    : MovingGradientDescentOptimizer,
                    'adam'              : tf.train.AdamOptimizer,
                }, default_key='gradient'
                )
        
        self.__lr_value = float( pydev.config_default_get(cp, network_name, 'learning_rate', 1e-3) )
        self.__lr_decay_ratio = float( pydev.config_default_get(cp, network_name, 'learning_decay_ratio', 0.96) )
        self.__lr_decay_step = float( pydev.config_default_get(cp, network_name, 'learning_decay_step', 300) )
        self.__lr_tensor = pydev.config_dict_get(cp, network_name, 'learning_rate_type', 
                {
                    'fixed'             : tf.Variable(self.__lr_value),
                    'exponential_decay' : tf.train.exponential_decay(self.__lr_value, global_step, 
                        self.__lr_decay_step, self.__lr_decay_ratio, staircase=True)
                }, default_key = 'fixed'
            )

        pydev.log('learner : %s' % self.__learner)
        pydev.log('lr_type : %s' % self.__lr_tensor)
        pydev.log('lr_value: %s' % self.__lr_value)
        pydev.log('lr_step : %s' % self.__lr_decay_step)
        pydev.log('lr_ratio: %s' % self.__lr_decay_ratio)

        tf.scalar_summary('train/learning_rate', self.__lr_tensor)

        # generate training function.
        self.train = self.__learner( self.__lr_tensor ).minimize(self.loss, global_step=global_step)

        self.__train_summary_merged = tf.merge_all_summaries()

        # setup session config.
        session_config = tf.ConfigProto()
        session_config.gpu_options.allow_growth = True
        #session_config.gpu_options.per_process_gpu_fraction = 0.4
        self.session = tf.Session(config = session_config)
Ejemplo n.º 20
0
    def fit(self,
            X,
            Y,
            callback=None,
            callback_iteration=100,
            preprocessor=None,
            first_run=True):
        '''
            X : training X
            Y : training Y
            callback : callback when training. callback type: callback(predict_function)
            callback_interval : interval to call back (total 1.0)
            callback_iteration : callback each N iterations.
            preprocess : preprocess tensor for each (x, ..., x, y)
        '''

        # uniform input:
        #   X can be [tensors ...] or tensor
        #   make X as [tensors ...]
        if not isinstance(X, tuple) and not isinstance(X, list):
            X = [X]
            pydev.log('fit X to be [X] %s' % type(X))

        # make shuffle and preprocess graph.
        holders = []
        for x in X:
            holders.append(tf.constant(x))
        holders.append(tf.constant(Y))

        queues = tf.train.slice_input_producer(holders)
        if preprocessor is not None:
            queues = preprocessor(*queues)
        batchs = tf.train.batch(queues,
                                batch_size=self.__batch_size,
                                num_threads=4)

        # init all variables.
        if first_run:
            # initialize the training summary.
            ts = time.asctime().replace(' ', '_')
            self.__train_writer = tf.train.SummaryWriter(
                './tensorboard_logs/%s/%s' % (self.__network_name, ts),
                self.session.graph)

            self.session.run(tf.initialize_all_variables())

        # simple train.
        data_size = len(X[-1])
        iteration_count = (self.__epoch * data_size) // self.__batch_size
        print >> sys.stderr, 'Iteration=%d (batchsize=%d, epoch=%d, datasize=%d)' % (
            iteration_count, self.__batch_size, self.__epoch, data_size)
        self.__current_iteration = 0

        last_percentage = 0
        last_callback_iteration = 0
        begin_time = time.time()
        with self.session.as_default():
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for it in xrange(1, iteration_count + 1):
                # training code.
                self.__current_iteration = it

                # run back data and fit one batch.
                subs = self.session.run(batchs)

                loss, main_loss, summary_info, lr = self.fit_one_batch(*subs)
                self.__train_writer.add_summary(summary_info,
                                                self.__current_iteration)

                # Report code.
                percentage = it * 1. / iteration_count
                cost_time = time.time() - begin_time
                remain_time = cost_time / percentage - cost_time

                sys.stderr.write(
                    '%cProgress: %3.1f%% [%s/%s] [iter=%7d loss=%.4f(%.4f+%.4f) lr=%f ips=%.2f]'
                    % (13, percentage * 100., pydev.format_time(cost_time),
                       pydev.format_time(remain_time), it, loss, main_loss,
                       loss - main_loss, lr, it / (time.time() - begin_time)))

                # call back the reporter and tester.
                if callback:
                    if it - last_callback_iteration >= callback_iteration:
                        callback(self.predict, self.__train_writer,
                                 self.__current_iteration)
                        last_callback_iteration = it
Ejemplo n.º 21
0
    def __init__(self, config_file, network_name, output_01=False):
        self.__LayerCreator__ = {
            'full_connect': Layer_FullConnect,
            'full_connect_op': Layer_OpFullConnect,
            'dot': Layer_Dot,
            'norm2': Layer_Norm2Cost,
            'softmax_entropy': Layer_SoftmaxEntropyCost,
            'sigmoid': Layer_Sigmoid,
            'softmax': Layer_Softmax,
            'tanh': Layer_Tanh,
            'relu': Layer_Relu,
            'pooling': Layer_Pooling,
            # all following 4 layers has same implemention.
            'conv2d': Layer_StackConv2D,
            'conv2d_pool': Layer_StackConv2D,
            'stack_conv2d': Layer_StackConv2D,
            'stack_conv2d_pool': Layer_StackConv2D,
            'reshape': Layer_Reshape,
            'dropout': Layer_DropOut,
            'local_norm': Layer_LocalResponseNormalization,
        }

        self.__output_01 = output_01

        self.__layers = []
        self.__layers_info = {}

        cp = ConfigParser.ConfigParser()
        self.__config_parser = cp
        self.__network_name = network_name
        cp.read(config_file)

        # read inputs and label.
        self.__inputs = self.__placeholders_read(
            pydev.config_default_get(cp, network_name, 'input_def', 'f:2'))
        self.__label = self.__single_placeholder_read(
            pydev.config_default_get(cp, network_name, 'label_def', 'f:2'))
        pydev.log('Inputs: %s' % self.__inputs)
        pydev.log('Labels  %s' % self.__label)

        # batch_size.
        self.__batch_size = int(
            pydev.config_default_get(cp, network_name, 'batch_size', 50))

        layer_names = cp.get(network_name, 'layers').split(',')
        active_name = cp.get(network_name, 'active').strip()
        cost_name = cp.get(network_name, 'cost').strip()

        global_step = tf.Variable(0, trainable=False)

        self.__batch_size = int(
            pydev.config_default_get(cp, network_name, 'batch_size', 50))
        self.__epoch = int(
            pydev.config_default_get(cp, network_name, 'epoch', 10))
        print >> sys.stderr, 'Epoch     : %d' % self.__epoch
        print >> sys.stderr, 'BatchSize : %d' % self.__batch_size

        for layer_name in layer_names:
            layer_type = cp.get(network_name, '%s.type' % layer_name)
            input_names = cp.get(network_name,
                                 '%s.input' % layer_name).split(',')

            # type, inputs, layer_refer.
            self.__layers_info[layer_name] = [layer_type, input_names, None]

        self.__layer_has_receiver = set()
        for name in layer_names:
            self.__init_layer(name)

        # check receivers.
        warning_layers = []
        for name in layer_names:
            if name not in self.__layer_has_receiver:
                warning_layers.append(name)
        if len(warning_layers) > 0:
            pydev.err('===[ Layers of no receivers, check it : %s ]===' %
                      ','.join(warning_layers))

        # default use outputs[0] as y and loss.
        # make network-active function.
        self.active = self.__get_layer(active_name).outputs[0]

        # loss function.
        # main_loss : main target loss.
        # pernalized_loss = wd_loss
        # loss = main_loss + pernalize_loss
        self.main_loss = self.__get_layer(cost_name).outputs[0]
        tf.add_to_collection('losses', self.main_loss)
        self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

        # record losses.
        tf.scalar_summary('train/total_loss', self.loss)
        tf.scalar_summary('train/main_loss', self.main_loss)

        # learning method and learning rate.
        self.__learner = pydev.config_dict_get(
            cp,
            network_name,
            'learner', {
                'gradient': tf.train.GradientDescentOptimizer,
                'movingGradient': MovingGradientDescentOptimizer,
                'adam': tf.train.AdamOptimizer,
            },
            default_key='gradient')

        self.__lr_value = float(
            pydev.config_default_get(cp, network_name, 'learning_rate', 1e-3))
        self.__lr_decay_ratio = float(
            pydev.config_default_get(cp, network_name, 'learning_decay_ratio',
                                     0.96))
        self.__lr_decay_step = float(
            pydev.config_default_get(cp, network_name, 'learning_decay_step',
                                     300))
        self.__lr_tensor = pydev.config_dict_get(
            cp,
            network_name,
            'learning_rate_type', {
                'fixed':
                tf.Variable(self.__lr_value),
                'exponential_decay':
                tf.train.exponential_decay(self.__lr_value,
                                           global_step,
                                           self.__lr_decay_step,
                                           self.__lr_decay_ratio,
                                           staircase=True)
            },
            default_key='fixed')

        pydev.log('learner : %s' % self.__learner)
        pydev.log('lr_type : %s' % self.__lr_tensor)
        pydev.log('lr_value: %s' % self.__lr_value)
        pydev.log('lr_step : %s' % self.__lr_decay_step)
        pydev.log('lr_ratio: %s' % self.__lr_decay_ratio)

        tf.scalar_summary('train/learning_rate', self.__lr_tensor)

        # generate training function.
        self.train = self.__learner(self.__lr_tensor).minimize(
            self.loss, global_step=global_step)

        self.__train_summary_merged = tf.merge_all_summaries()

        # setup session config.
        session_config = tf.ConfigProto()
        session_config.gpu_options.allow_growth = True
        #session_config.gpu_options.per_process_gpu_fraction = 0.4
        self.session = tf.Session(config=session_config)
Ejemplo n.º 22
0
    def fit(self, X, Y, callback=None, callback_iteration=100, preprocessor=None, first_run=True):
        '''
            X : training X
            Y : training Y
            callback : callback when training. callback type: callback(predict_function)
            callback_interval : interval to call back (total 1.0)
            callback_iteration : callback each N iterations.
            preprocess : preprocess tensor for each (x, ..., x, y)
        '''

        # uniform input:
        #   X can be [tensors ...] or tensor
        #   make X as [tensors ...]
        if not isinstance(X, tuple) and not isinstance(X, list):
            X = [ X ]
            pydev.log('fit X to be [X] %s' % type(X) )

        # make shuffle and preprocess graph.
        holders = []
        for x in X:
            holders.append( tf.constant(x) )
        holders.append( tf.constant(Y) )

        queues = tf.train.slice_input_producer(holders)
        if preprocessor is not None:
            queues = preprocessor( *queues )
        batchs = tf.train.batch(queues, batch_size=self.__batch_size, num_threads=4)

        # init all variables.
        if first_run:
            # initialize the training summary.
            ts = time.asctime().replace(' ', '_')
            self.__train_writer = tf.train.SummaryWriter(
                                './tensorboard_logs/%s/%s' % (self.__network_name, ts),
                                self.session.graph)

            self.session.run( tf.initialize_all_variables() )

        # simple train.
        data_size = len(X[-1])
        iteration_count = (self.__epoch * data_size) // self.__batch_size
        print >> sys.stderr, 'Iteration=%d (batchsize=%d, epoch=%d, datasize=%d)' % (
                iteration_count, self.__batch_size, self.__epoch, data_size)
        self.__current_iteration = 0

        last_percentage = 0
        last_callback_iteration = 0
        begin_time = time.time()
        with self.session.as_default():
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord = coord)

            for it in xrange( 1, iteration_count+1 ):
                # training code.
                self.__current_iteration = it
                
                # run back data and fit one batch.
                subs = self.session.run(batchs)

                loss, main_loss, summary_info, lr = self.fit_one_batch(*subs)
                self.__train_writer.add_summary(summary_info, self.__current_iteration)

                # Report code.
                percentage = it * 1. / iteration_count
                cost_time = time.time() - begin_time
                remain_time = cost_time / percentage - cost_time

                sys.stderr.write('%cProgress: %3.1f%% [%s/%s] [iter=%7d loss=%.4f(%.4f+%.4f) lr=%f ips=%.2f]' % (
                    13, 
                    percentage * 100., 
                    pydev.format_time(cost_time),
                    pydev.format_time(remain_time),
                    it, loss, main_loss, loss-main_loss, lr, 
                    it / (time.time()-begin_time) 
                    ))

                # call back the reporter and tester.
                if callback:
                    if it - last_callback_iteration >= callback_iteration:
                        callback(self.predict, self.__train_writer, self.__current_iteration)
                        last_callback_iteration = it