コード例 #1
0
    def train(self, data_root, out_dir):
        data_path = os.path.join(data_root, 'data.h5py')
        meta_path = os.path.join(data_root, 'meta')
        data = h5py.File(data_path, 'r')
        meta = cPickle.loads(open(meta_path, 'rb').read())
        self.weight_fname = os.path.join(out_dir, 'weights')
        self.model_fname = os.path.join(out_dir, 'model')
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        self.logger.info('# of classes: %s' % len(meta['y_vocab']))
        self.num_classes = len(meta['y_vocab'])

        train = data['train']
        dev = data['dev']

        self.logger.info('# of train samples: %s' % train['cate'].shape[0])
        self.logger.info('# of dev samples: %s' % dev['cate'].shape[0])

        checkpoint = ModelCheckpoint(self.weight_fname, monitor='val_loss',
                                     save_best_only=True, mode='min', period=10)

        textonly = TextOnly()
        model = textonly.get_model(self.num_classes)

        total_train_samples = train['uni'].shape[0]
        train_gen = self.get_sample_generator(train,
                                              batch_size=opt.batch_size)
        self.steps_per_epoch = int(np.ceil(total_train_samples / float(opt.batch_size)))

        total_dev_samples = dev['uni'].shape[0]
        dev_gen = self.get_sample_generator(dev,
                                            batch_size=opt.batch_size)
        self.validation_steps = int(np.ceil(total_dev_samples / float(opt.batch_size)))

        model_full_fname = self.model_fname + '.h5'
        print(model_full_fname)
        if os.path.isfile(model_full_fname):
            print('previous model file exist')
            model = load_model(model_full_fname, custom_objects={'top1_acc': top1_acc})
            optm = keras.optimizers.Nadam(opt.lr)
            model.compile(loss='binary_crossentropy',
                          optimizer=optm,
                          metrics=[top1_acc])

        model.fit_generator(generator=train_gen,
                            steps_per_epoch=self.steps_per_epoch,
                            epochs=opt.num_epochs,
                            validation_data=dev_gen,
                            validation_steps=self.validation_steps,
                            shuffle=True,
                            callbacks=[checkpoint])

        model.load_weights(self.weight_fname) # loads from checkout point if exists
        open(self.model_fname + '.json', 'w').write(model.to_json())
        model.save(self.model_fname + '.h5')
コード例 #2
0
    def train(self, data_root, out_dir):
        data_path = os.path.join(data_root, 'data.h5py')
        meta_path = os.path.join(data_root, 'meta')
        data = h5py.File(data_path, 'r')
        meta = cPickle.loads(open(meta_path, 'rb').read())
        self.weight_fname = os.path.join(out_dir, 'weights')
        self.model_fname = os.path.join(out_dir, 'model')
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        self.logger.info('# of classes: %s' % len(meta['y_vocab']))
        self.num_classes = len(meta['y_vocab'])

        train = data['train']
        dev = data['dev']

        self.logger.info('# of train samples: %s' % train['cate'].shape[0])
        self.logger.info('# of dev samples: %s' % dev['cate'].shape[0])

        checkpoint = ModelCheckpoint(self.weight_fname,
                                     monitor='val_loss',
                                     save_best_only=True,
                                     mode='min',
                                     period=1)
        earlystopper = EarlyStopping(monitor='val_loss',
                                     patience=opt.early_stop,
                                     verbose=1)

        textonly = TextOnly()
        model = textonly.get_model(self.num_classes)

        total_train_samples = train['uni'].shape[0]
        train_gen = self.get_sample_generator(train, batch_size=opt.batch_size)
        self.steps_per_epoch = int(
            np.ceil(total_train_samples / float(opt.batch_size)))

        total_dev_samples = dev['uni'].shape[0]
        dev_gen = self.get_sample_generator(dev, batch_size=opt.batch_size)
        self.validation_steps = int(
            np.ceil(total_dev_samples / float(opt.batch_size)))

        model.fit_generator(generator=train_gen,
                            steps_per_epoch=self.steps_per_epoch,
                            epochs=opt.num_epochs,
                            validation_data=dev_gen,
                            validation_steps=self.validation_steps,
                            shuffle=True,
                            callbacks=[checkpoint, earlystopper])

        model = textonly.get_model(self.num_classes)
        model.load_weights(
            self.weight_fname)  # loads from checkout point if exists
        open(self.model_fname + '.json', 'w').write(model.to_json())
        model.save(self.model_fname + '.h5')
コード例 #3
0
    def train(self, data_root, out_dir):
        data_path = os.path.join(data_root, 'data.h5py')
        meta_path = os.path.join(data_root, 'meta')
        data = h5py.File(data_path, 'r')
        meta = cPickle.loads(open(meta_path, 'rb').read())
        self.weight_fname = os.path.join(out_dir, 'weights')
        self.model_fname = os.path.join(out_dir, 'model')
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        self.logger.info('# of classes: %s' % len(meta['y_vocab']))
        self.num_classes = len(meta['y_vocab'])

        train = data['train']
        dev = data['dev']

        self.logger.info('# of train samples: %s' % train['cate'].shape[0])
        self.logger.info('# of dev samples: %s' % dev['cate'].shape[0])

        checkpoint = ModelCheckpoint(self.weight_fname, monitor='val_loss',
                                     save_best_only=True, mode='min', period=10)

        textonly = TextOnly()
        model = textonly.get_model(self.num_classes)

        total_train_samples = train['uni'].shape[0]
        train_gen = self.get_sample_generator(train,
                                              batch_size=opt.batch_size)
        #train_gen generates batch array of (uni, w_uni), cate in while iteration to feed (inputs, targets)
        self.steps_per_epoch = int(np.ceil(total_train_samples / float(opt.batch_size)))

        total_dev_samples = dev['uni'].shape[0]
        dev_gen = self.get_sample_generator(dev,
                                            batch_size=opt.batch_size)
        self.validation_steps = int(np.ceil(total_dev_samples / float(opt.batch_size)))

        """
        shuffle=TRUE will not work,
        because it requires to use keras's Sequence for generator, and None for steps_per_ephoch.
        use_multiprocessing option can enhance performance.
        """
        model.fit_generator(generator=train_gen,
                            steps_per_epoch=self.steps_per_epoch,
                            epochs=opt.num_epochs,
                            validation_data=dev_gen,
                            validation_steps=self.validation_steps,
                            shuffle=True,
                            callbacks=[checkpoint])

        model.load_weights(self.weight_fname) # loads from checkout point if exists
        open(self.model_fname + '.json', 'w').write(model.to_json())
        model.save(self.model_fname + '.h5')
コード例 #4
0
    def train(self, data_root, out_dir, model_name=None):
        data_path = os.path.join(data_root, 'data.h5py')
        meta_path = os.path.join(data_root, 'meta')
        data = h5py.File(data_path, 'r')
        meta = cPickle.loads(open(meta_path, 'rb').read())
        self.weight_fname = os.path.join(out_dir, 'weights')
        self.model_fname = os.path.join(out_dir, 'model')
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        self.logger.info('# of classes: %s' % len(meta['y_vocab']))
        self.num_classes = len(meta['y_vocab'])

        train = data['train']
        dev = data['dev']

        self.logger.info('# of train samples: %s' % train['cate'].shape[0])
        self.logger.info('# of dev samples: %s' % dev['cate'].shape[0])

        checkpoint = ModelCheckpoint(self.weight_fname,
                                     monitor='val_loss',
                                     save_best_only=True,
                                     mode='min',
                                     period=10)

        textonly = TextOnly()
        model = textonly.get_model(self.num_classes)

        total_train_samples = train['uni'].shape[0]
        train_gen = self.get_sample_generator(train, batch_size=opt.batch_size)
        self.steps_per_epoch = int(
            np.ceil(total_train_samples / float(opt.batch_size)))

        total_dev_samples = dev['uni'].shape[0]
        dev_gen = self.get_sample_generator(dev, batch_size=opt.batch_size)
        self.validation_steps = int(
            np.ceil(total_dev_samples / float(opt.batch_size)))

        if not model_name:
            now = datetime.now()
            model_name = '{}-{}-{} {}:{}:{}'.format(now.year, now.month,
                                                    now.day, now.hour,
                                                    now.minute, now.second)
        tb_hist = keras.callbacks.TensorBoard(
            log_dir='./graph/{0}'.format(model_name),
            histogram_freq=0,
            write_graph=True,
            write_images=True)
        if opt.num_gpus > 1:
            model = multi_gpu_model(model, gpus=opt.num_gpus)
        model.fit_generator(generator=train_gen,
                            steps_per_epoch=self.steps_per_epoch,
                            epochs=opt.num_epochs,
                            validation_data=dev_gen,
                            validation_steps=self.validation_steps,
                            shuffle=True,
                            callbacks=[checkpoint, tb_hist])

        model.load_weights(
            self.weight_fname)  # loads from checkout point if exists
        open(self.model_fname + '.json', 'w').write(model.to_json())
        model.save(self.model_fname + '.h5')
コード例 #5
0
    def train(self, data_root, out_dir, cate_type_='bm', is_validation=True):
        global cate_type
        cate_type = cate_type_

        if cate_type == 'bm':
            num_epochs = opt.bm_num_epochs
        elif cate_type == 's':
            num_epochs = opt.s_num_epochs
        elif cate_type == 'd':
            num_epochs = opt.d_num_epochs
        else:
            assert False, '%s is not valid data name' % cate_type

        data_path = os.path.join(data_root, 'data.h5py')
        meta_path = os.path.join(data_root, 'meta')
        data = h5py.File(data_path, 'r')
        meta = cPickle.loads(open(meta_path).read())
        self.weight_fname = os.path.join(out_dir, 'weights')
        self.model_fname = os.path.join(out_dir, 'model')
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        train = data['train']
        dev = data['dev']

        self.logger.info('# of train samples: %s' % train['uni'].shape[0])
        self.logger.info('# of dev samples: %s' % dev['uni'].shape[0])

        textonly = TextOnly()
        model = textonly.get_model(cate_type=cate_type)

        total_train_samples = train['uni'].shape[0]
        train_gen = self.get_sample_generator(train,
                                              batch_size=opt.batch_size,
                                              div='train')
        self.steps_per_epoch = int(
            np.ceil(total_train_samples / float(opt.batch_size)))

        def schedule(epoch, lr):
            if cate_type == 'bm':
                if epoch == opt.bm_lr_change_epoch:
                    lr = 1e-5
            elif cate_type == 's':
                if epoch == opt.s_lr_change_epoch:
                    lr = 3e-5
            elif cate_type == 'd':
                if epoch == opt.d_lr_change_epoch:
                    lr = 3e-5
            return lr

        lrSchedule = LearningRateScheduler(schedule)

        if is_validation is True:
            checkpoint = ModelCheckpoint(self.weight_fname,
                                         monitor='val_loss',
                                         save_best_only=True,
                                         mode='min',
                                         period=1)

            total_dev_samples = dev['uni'].shape[0]
            dev_gen = self.get_sample_generator(dev,
                                                batch_size=opt.batch_size,
                                                div='train')
            self.validation_steps = int(
                np.ceil(total_dev_samples / float(opt.batch_size)))

            model.fit_generator(generator=train_gen,
                                steps_per_epoch=self.steps_per_epoch,
                                epochs=num_epochs,
                                validation_data=dev_gen,
                                validation_steps=self.validation_steps,
                                shuffle=True,
                                callbacks=[checkpoint, lrSchedule])
        else:
            checkpoint = ModelCheckpoint(self.weight_fname,
                                         mode='min',
                                         period=1)

            model.fit_generator(generator=train_gen,
                                steps_per_epoch=self.steps_per_epoch,
                                epochs=num_epochs,
                                validation_data=None,
                                shuffle=True,
                                callbacks=[checkpoint, lrSchedule])

        model.load_weights(os.path.join(out_dir, 'weights'))
        open(self.model_fname + '.json', 'w').write(model.to_json())
        model.save(self.model_fname + '.h5')