Ejemplo n.º 1
0
def _test_preprocess_true_boxes():
    dataset = Dataset('train')
    # anno = '/home/ylqi007/work/DATA/VOC2007/test/JPEGImages/004538.jpg 215,35,308,224,14 41,77,403,367,12'
    anno = '/home/ylqi007/work/DATA/VOC2007/test/JPEGImages/004538.jpg 215,35,308,224,14'
    image, bboxes = dataset.parse_annotation(annotation=anno)
    print("bboxes after dataset.parse_annotation()\n", bboxes)
    dataset.preprocess_true_boxes(bboxes)
 def init_dataset(self):
     self.train_dataset = Dataset(self.sess, self.config.batch_size,
                                  self.config.use_train_shuffle, True,
                                  self.config, self.input_size)
     self.validation_dataset = Dataset(self.sess,
                                       self.config.validation_batch_size,
                                       False, False, self.config,
                                       self.input_size)
Ejemplo n.º 3
0
    def __init__(self, config_filename):
        # configuration
        config = Config()
        config_file = "{}/{}".format(config.config_dir, config_filename)
        config.update_config(config_file)

        # word embedding
        print("setting word embedding...")
        word_embedding = Embedding()

        word_embedding_file = "{}/word_embedding_{}.pkl".format(
            config.cache_dir, config.config_name)
        print("loading word embedding from {}...".format(word_embedding_file))
        word_embedding.load_word_embedding(word_embedding_file)

        # demo dataset
        print("setting demo dataset...")
        self.demo_dataset = Dataset(config.data_config)

        self.demo_dataset.set_word_to_index(word_embedding.word2index)

        label_mapping_file = "{}/label_mapping_{}.pkl".format(
            config.cache_dir, config.config_name)
        print("loading label mapping from {}...".format(label_mapping_file))
        self.demo_dataset.load_label_mapping(label_mapping_file)

        # model
        new_model_config = {
            "vocab_size": word_embedding.vocab_size,
            "word_dim": word_embedding.word_dim,
            "document_length": self.demo_dataset.document_length,
            "sentence_length": self.demo_dataset.sentence_length,
            "num_labels": self.demo_dataset.num_labels
        }
        config.update_model_config(new_model_config)

        model = Model(config.model_config)

        # model factory
        self.network = Factory(model)

        self.network.set_test_module()
        print("number of GPUs: {}".format(self.network.num_gpus))
        print("device: {}".format(self.network.device))

        # load model
        model_file = "{}/model_{}.pkl".format(config.cache_dir,
                                              config.config_name)
        print("loading model from {}...".format(model_file))
        self.network.load_model(model_file)

        self.network.model_to_device()
        self.network.eval_mode()
Ejemplo n.º 4
0
    def predict_proba(self, *args):
        # predict_proba conforms to the sklearn estimator interface

        # predict_proba is only available for classifiers
        m = self.root.data[NODE_DATASET_KEY].metadata
        assert m.is_categorical(m.target)
        assert self._estimator_type == ESTIMATOR_TYPE_CLASSIFIER

        # if being called with a dataframe so we convert the dataframe to a
        # Dataset before calling transform on it
        assert len(args) == 1 and isinstance(args[0], pd.DataFrame)
        X = args[0]
        m = self.root.data[NODE_DATASET_KEY].metadata
        dataset = Dataset.from_pandas_X(X, m)

        # gather predictions and probabilities in the format sklearn expects
        all_datapoints_class_probabilities = []
        for transformed in self.transform(dataset):
            targets_reduced, total_datapoints_count, prediction_datatype = (
                transformed)
            # for each class in order, get the proba
            class_probabilities = []
            for klass in self.classes_:
                fraction_of_class = targets_reduced[klass] / sum(
                    targets_reduced.values())
                class_probabilities.append(fraction_of_class)
            all_datapoints_class_probabilities.append(class_probabilities)

        return np.array(all_datapoints_class_probabilities)
Ejemplo n.º 5
0
    def predict(self, *args):
        assert len(args) == 1

        if isinstance(args[0], pd.DataFrame):
            X = args[0]
            m = self.root.data[NODE_DATASET_KEY].metadata
            dataset = Dataset.from_pandas_X(X, m)
        elif isinstance(args[0], Dataset):
            dataset = args[0]
        else:
            raise ValueError(
                "Arg to predict() must be a Pandas DataFrame or Arboreal Dataset"
            )

        # return predictions in the format sklearn expects
        predictions = []
        for transformed in self.transform(dataset):
            targets_reduced, total_datapoints_count, prediction_datatype = (
                transformed)
            if prediction_datatype == Datatype.numerical:
                prediction = targets_reduced
            elif prediction_datatype == Datatype.categorical:
                prediction = targets_reduced.most_common()[0][0]
            predictions.append(prediction)

        return predictions
Ejemplo n.º 6
0
	def fit(self):
		history = self.candidate_generation.fit([tf.keras.preprocessing.sequence.pad_sequences(self.trainset['movie_id']),
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['watch_hist_time'], dtype=float),
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['search_hist'], dtype=float) + 1e-10,
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['example_age'], dtype=float),
			],self.trainset['predict_labels'].values,
			steps_per_epoch=1, epochs=50)

		pred = self.candidate_generation.predict([tf.keras.preprocessing.sequence.pad_sequences(self.trainset['movie_id']),
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['watch_hist_time'], dtype=float),
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['search_hist'], dtype=float) + 1e-10,
			tf.keras.preprocessing.sequence.pad_sequences(self.trainset['example_age'], dtype=float),
			])

		print(pred)
		# candidate generation: 
		###### 각 user당 top-7개의 추천 데이터를 뽑아낸다.
		# movies = utils.get_topk(6)
		N = 6
		k = np.sort((-pred).argsort()[:,:N])
		print(k)
		k = k.flatten()
		# k[k>data["movie"].max()]=0
		k = np.unique(k)

		print(k)

		Dataset('train').preprocess_ranking(k)
Ejemplo n.º 7
0
    def training(self):

        self.__getDataset()

        trainset = Dataset('train')
        logdir = "./data/log"
        steps_per_epoch = len(trainset)
        global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
        warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
        total_steps = cfg.TRAIN.EPOCHS * steps_per_epoch

        input_tensor = tf.keras.layers.Input([416, 416, 3])
        conv_tensors = YOLOv3(input_tensor)

        output_tensors = []
        for i, conv_tensor in enumerate(conv_tensors):
            pred_tensor = decode(conv_tensor, i)
            output_tensors.append(conv_tensor)
            output_tensors.append(pred_tensor)

        model = tf.keras.Model(input_tensor, output_tensors)
        optimizer = tf.keras.optimizers.Adam()
        if os.path.exists(logdir): shutil.rmtree(logdir)
        writer = tf.summary.create_file_writer(logdir)

        self._tb.start()
        for epoch in range(cfg.TRAIN.EPOCHS):
            print(epoch)
            for image_data, target in trainset:
                self.__train_step(image_data, target, model, global_steps,
                                  writer, optimizer, warmup_steps, total_steps)
            model.save_weights(self._args.ckpt_path)
        self._tb.stop()
        model.save(f"./models")

        zipFolder("check.zip", "checkpoint")
        zipFolder("log.zip", "data/log")
        zipFolder("model.zip", "models")

        self._run.upload_file(name='check.zip', path_or_stream="check.zip")
        print(
            f"Uploaded the checkpoints to experiment {self._run.experiment.name}"
        )
        self._run.upload_file(name='log.zip', path_or_stream="log.zip")
        print(f"Uploaded the tfruns to experiment {self._run.experiment.name}")
        self._run.upload_file(name='model.zip', path_or_stream="model.zip")
        print(f"Uploaded the model to experiment {self._run.experiment.name}")

        print("Following files are uploaded")
        print(self._run.get_file_names())

        self._run.add_properties({
            "release_id": self._args.release_id,
            "run_type": "train"
        })
        print(f"added properties: {self._run.properties}")

        self._run.complete()
Ejemplo n.º 8
0
        def __init__(self,
                    name, 
                    model_fn,
                    config, 
                    model_config,
                    env_config, 
                    replay_config):
            cpu_affinity('Learner')
            silence_tf_logs()
            configure_threads(config['n_cpus'], config['n_cpus'])
            configure_gpu()
            configure_precision(config['precision'])
            self._dtype = global_policy().compute_dtype

            self._envs_per_worker = env_config['n_envs']
            env_config['n_envs'] = 1
            env = create_env(env_config)
            assert env.obs_dtype == np.uint8, \
                f'Expect image observation of type uint8, but get {env.obs_dtype}'
            self._action_shape = env.action_shape
            self._action_dim = env.action_dim
            self._frame_skip = getattr(env, 'frame_skip', 1)

            self.models = Ensemble(
                model_fn=model_fn,
                config=model_config, 
                obs_shape=env.obs_shape,
                action_dim=env.action_dim, 
                is_action_discrete=env.is_action_discrete
            )

            super().__init__(
                name=name, 
                config=config, 
                models=self.models,
                dataset=None,
                env=env)

            replay_config['dir'] = config['root_dir'].replace('logs', 'data')
            self.replay = create_replay(replay_config)
            data_format = get_data_format(env, replay_config)
            process = functools.partial(process_with_env, env=env)
            self.dataset = Dataset(self.replay, data_format, process, prefetch=10)

            self._env_step = self.env_step()
Ejemplo n.º 9
0
def annotation_flip_test():
    dataset = Dataset('train')
    anno = '/home/ylqi007/work/DATA/VOC2007/test/JPEGImages/004538.jpg 215,35,308,224,14 41,77,403,367,12'
    # anno = '/home/ylqi007/work/DATA/VOC2007/test/JPEGImages/007741.jpg 45,254,67,289,2 87,124,119,152,2 66,108,100,121,2 242,66,295,113,2 150,144,192,192,2 221,178,316,227,2 316,198,416,253,2 354,149,390,174,2 253,174,299,199,2 312,75,338,91,2 207,78,235,95,2'
    image, bboxes = dataset.parse_annotation(annotation=anno)
    image = image.copy(
    )  # https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy
    flip_img, flip_bboxes = np.copy(image), np.copy(bboxes)
    _, w, _ = flip_img.shape
    flip_img = flip_img[:, ::-1, :]
    flip_bboxes[:, [0, 2]] = w - flip_bboxes[:, [2, 0]]
    flip_img = flip_img.copy()

    print("========== original box ==========")
    # show_image('original', image)
    show_image('original_with_bbox', image, bboxes)

    print("========== flipped box ==========")
    show_image('flipped_with_bbox', flip_img, flip_bboxes)
Ejemplo n.º 10
0
def train(Model):
    """Train the model"""

    train_set, target, categoricals = utils.load_data(args.dataset,
                                                      args.file_name)
    dataset_train = Dataset(dataset=train_set,
                            categorical_indices=categoricals)
    target = dataset_train[target]
    dataset_train.drop(target, axis=1, inplace=True)
    (
        categorical_variables,
        non_categorical_variables,
    ) = dataset_train.get_variables_names()

    print("Creating cross products dataset")
    cross_products = dataset_train.cross_categorical_dataset()

    model = Model(categorical_variables, non_categorical_variables)
    model = model.build_model(cross_products)

    print("Training model")
    model.fit(
        [
            [
                train_set[categorical_variables],
                train_set[non_categorical_variables]
            ],
            cross_products,
        ],
        target,
        epochs=config.EPOCHS,
        validation_split=config.VALIDATION_SPLIT,
    )
Ejemplo n.º 11
0
def main_worker(gpu, ngpus_per_node, config):
  torch.cuda.set_device(gpu)
  set_seed(config['seed'])

  # Model and version
  net = importlib.import_module('model.'+args.model_name)
  model = set_device(net.InpaintGenerator())
  latest_epoch = open(os.path.join(config['save_dir'], 'latest.ckpt'), 'r').read().splitlines()[-1]
  path = os.path.join(config['save_dir'], 'gen_{}.pth'.format(latest_epoch))
  data = torch.load(path, map_location = lambda storage, loc: set_device(storage)) 
  model.load_state_dict(data['netG'])
  model.eval()

  # prepare dataset
  dataset = Dataset(config['data_loader'], debug=False, split='test', level=args.level)
  step = math.ceil(len(dataset) / ngpus_per_node)
  dataset.set_subset(gpu*step, min(gpu*step+step, len(dataset)))
  dataloader = DataLoader(dataset, batch_size= BATCH_SIZE, shuffle=False, num_workers=config['trainer']['num_workers'], pin_memory=True)

  path = os.path.join(config['save_dir'], 'results_{}_level_{}'.format(str(latest_epoch).zfill(5), str(args.level).zfill(2)))
  os.makedirs(path, exist_ok=True)
  # iteration through datasets
  for idx, (images, masks, names) in enumerate(dataloader):
    print('[{}] GPU{} {}/{}: {}'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
      gpu, idx, len(dataloader), names[0]))
    images, masks = set_device([images, masks])
    images_masked = images*(1-masks) + masks
    with torch.no_grad():
      _, output = model(torch.cat((images_masked, masks), dim=1), masks)
    orig_imgs = postprocess(images)
    mask_imgs = postprocess(images_masked)
    comp_imgs = postprocess((1-masks)*images+masks*output)
    pred_imgs = postprocess(output)
    for i in range(len(orig_imgs)):
      Image.fromarray(pred_imgs[i]).save(os.path.join(path, '{}_pred.png'.format(names[i].split('.')[0])))
      Image.fromarray(orig_imgs[i]).save(os.path.join(path, '{}_orig.png'.format(names[i].split('.')[0])))
      Image.fromarray(comp_imgs[i]).save(os.path.join(path, '{}_comp.png'.format(names[i].split('.')[0])))
      Image.fromarray(mask_imgs[i]).save(os.path.join(path, '{}_mask.png'.format(names[i].split('.')[0])))
  print('Finish in {}'.format(path))
Ejemplo n.º 12
0
    def predict(self, image, annotation):

        org_image = np.copy(image)
        org_h, org_w, _ = org_image.shape

        image_data = utils.image_preporcess(image, [self.input_size, self.input_size])
        image_data = image_data[np.newaxis, ...]

        _, gt_bboxes = Dataset('train').parse_annotation(annotation)
        label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = Dataset('train').preprocess_true_boxes(gt_bboxes)
        label_sbbox = label_sbbox[np.newaxis, ...]
        label_mbbox = label_mbbox[np.newaxis, ...]
        label_lbbox = label_lbbox[np.newaxis, ...]
        sbboxes = sbboxes[np.newaxis, ...]
        mbboxes = mbboxes[np.newaxis, ...]
        lbboxes = lbboxes[np.newaxis, ...]

        pred_sbbox, pred_mbbox, pred_lbbox, loss = self.sess.run(
            [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox, self.loss],
            feed_dict={
                self.input_data: image_data,
                self.label_sbbox: label_sbbox,
                self.label_mbbox: label_mbbox,
                self.label_lbbox: label_lbbox,
                self.sbboxes: sbboxes,
                self.mbboxes: mbboxes,
                self.lbboxes: lbboxes,
                self.trainable: False
            }
        )

        pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.num_classes)),
                                    np.reshape(pred_mbbox, (-1, 5 + self.num_classes)),
                                    np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0)
        bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w), self.input_size, self.score_threshold)
        bboxes = utils.nms(bboxes, self.iou_threshold)

        return bboxes, loss
Ejemplo n.º 13
0
    def fit(self, *args):

        logger.debug("ArborealTree fit() called")

        # to be compatible with sklearn interface, fit() must accept an (X, y)
        # function call signature. to be nice with Arboreal Datasets, we also
        # want to accept a (Dataset) signature. this fit() function is mostly
        # about handling incoming data of different types and preparing it for
        # the _fit() function which does the actual fitting work on a Dataset.
        # determine whether we're using a scikit-learn or Arboreal signature:
        if (len(args) == 2 and isinstance(args[0], pd.DataFrame)
                and isinstance(args[1], pd.Series)):
            # we are using X, y dataframe function signature
            # we'll construct a Dataset from these X, y dataframes
            dataset = Dataset.from_pandas_Xy(args[0], args[1])

        elif len(args) == 1 and isinstance(args[0], Dataset):
            # we are using Dataset function signature, good to pass directly to _fit()
            dataset = args[0]

        else:

            import pdb

            pdb.set_trace()

            # we are using an unknown signature
            raise ValueError(
                "fit() requires either X,y paired dataframes(/series) or a Dataset"
            )

        # sklearn parameters:
        # set _estimator_type for sklearn compatibility; use the target metadata
        # to determine estimator type
        if dataset.metadata.is_categorical(dataset.metadata.target):
            self._estimator_type = ESTIMATOR_TYPE_CLASSIFIER
        elif dataset.metadata.is_numerical(dataset.metadata.target):
            self._estimator_type = ESTIMATOR_TYPE_REGRESSOR
        else:
            raise ValueError(
                "target must have type set, and must be either categorical or numerical"
            )

        # set classes_ for sklearn compatibility (if classifier); also used
        # as class ordering in predict and predict_proba
        if dataset.metadata.is_categorical(dataset.metadata.target):
            self.classes_ = sorted(set(dataset.targets.keys()))

        return self._fit(dataset)
Ejemplo n.º 14
0
Archivo: train.py Proyecto: xlnwel/d2rl
def main(env_config, model_config, agent_config, replay_config):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config['precision'])

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    env = create_env(env_config, make_env, force_envvec=True)
    eval_env_config = env_config.copy()
    eval_env_config['n_envs'] = 1
    eval_env_config['n_workers'] = 1
    eval_env = create_env(eval_env_config, make_env)

    replay_config['dir'] = agent_config['root_dir'].replace('logs', 'data')
    replay = create_replay(replay_config)
    replay.load_data()
    dtype = global_policy().compute_dtype
    data_format = pkg.import_module(
        'agent', config=agent_config).get_data_format(
            env=env,
            batch_size=agent_config['batch_size'],
            sample_size=agent_config['sample_size'],
            dtype=dtype)
    process = functools.partial(process_with_env,
                                env=env,
                                obs_range=[-.5, .5],
                                one_hot_action=True,
                                dtype=dtype)
    dataset = Dataset(replay, data_format, process)

    create_model, Agent = pkg.import_agent(config=agent_config)
    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=dataset, env=env)

    agent.save_config(
        dict(env=env_config,
             model=model_config,
             agent=agent_config,
             replay=replay_config))

    train(agent, env, eval_env, replay)
def deduplication(dataset: Dataset, context: DictConfig) -> Dataset:
    """
    This function deals with duplications
    """
    from collections import defaultdict

    elements_grouped_by_iid = defaultdict(list)

    for element in dataset:
        cid = element.cid
        elements_grouped_by_iid[cid].append(element)

    dt = dataset.from_items(
        name=dataset.name,
        items=[item[0] for key, item in list(elements_grouped_by_iid.items())])

    return dt
Ejemplo n.º 16
0
    def predict(self, image, annotation):
        dataset_obj = Dataset('test')
        dataset_obj.train_input_size = cfg.TEST.INPUT_SIZE
        dataset_obj.strides = np.array(cfg.YOLO.STRIDES)
        dataset_obj.train_output_sizes = dataset_obj.train_input_size // dataset_obj.strides

        org_image = np.copy(image)
        org_h, org_w, _ = org_image.shape

        image_data = utils.image_preporcess(image,
                                            [self.input_size, self.input_size])
        image_data = image_data[np.newaxis, ...]

        _, gt_bboxes = dataset_obj.parse_annotation(annotation)
        label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = dataset_obj.preprocess_true_boxes(
            gt_bboxes)
        label_sbbox = label_sbbox[np.newaxis, ...]
        label_mbbox = label_mbbox[np.newaxis, ...]
        label_lbbox = label_lbbox[np.newaxis, ...]
        sbboxes = sbboxes[np.newaxis, ...]
        mbboxes = mbboxes[np.newaxis, ...]
        lbboxes = lbboxes[np.newaxis, ...]

        pred_sbbox, pred_mbbox, pred_lbbox, loss = self.sess.run(
            [self.pred_sbbox, self.pred_mbbox, self.pred_lbbox, self.loss],
            feed_dict={
                self.input_data: image_data,
                self.label_sbbox: label_sbbox,
                self.label_mbbox: label_mbbox,
                self.label_lbbox: label_lbbox,
                self.true_sbboxes: sbboxes,
                self.true_mbboxes: mbboxes,
                self.true_lbboxes: lbboxes,
                self.trainable: False
            })

        pred_bbox = np.concatenate([
            np.reshape(pred_sbbox, (-1, 5 + self.num_classes)),
            np.reshape(pred_mbbox, (-1, 5 + self.num_classes)),
            np.reshape(pred_lbbox, (-1, 5 + self.num_classes))
        ],
                                   axis=0)
        bboxes = utils.postprocess_boxes(pred_bbox, (org_h, org_w),
                                         self.input_size, self.score_threshold)
        bboxes = utils.nms(bboxes, self.iou_threshold)

        return bboxes, loss
Ejemplo n.º 17
0
def train_model(mirna_fasta_file, mrna_fasta_file, train_file, model=None, cts_size=30, seed_match='offset-9-mer-m7', level='gene', batch_size=32, epochs=10, save_file=None, device='cpu'):
    if not isinstance(model, deepTarget):
        raise ValueError("'model' expected <nn.Module 'deepTarget'>, got {}".format(type(model)))
    
    print("\n[TRAIN] {}".format(model.name))
    
    if train_file.split('/')[-1] == 'train_set.csv':
        train_set = TrainDataset(train_file)
    else:
        train_set = Dataset(mirna_fasta_file, mrna_fasta_file, train_file, seed_match=seed_match, header=True, train=True)
    train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
    
    class_weight = torch.Tensor(compute_class_weight('balanced', classes=np.unique(train_set.labels), y=train_set.labels)).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weight)
    optimizer = optim.Adam(model.parameters())
    
    model = model.to(device)
    for epoch in range(epochs):
        epoch_loss, corrects = 0, 0

        with tqdm(train_loader, desc="Epoch {}/{}".format(epoch+1, epochs), bar_format=bar_format) as tqdm_loader:
            for i, ((mirna, mrna), label) in enumerate(tqdm_loader):
                mirna, mrna, label = mirna.to(device, dtype=torch.float), mrna.to(device, dtype=torch.float), label.to(device)
                
                outputs = model(mirna, mrna)
                loss = criterion(outputs, label)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                epoch_loss += loss.item() * outputs.size(0)
                corrects += (torch.max(outputs, 1)[1] == label).sum().item()
                
                if (i+1) == len(train_loader):
                    tqdm_loader.set_postfix(dict(loss=(epoch_loss/len(train_set)), acc=(corrects/len(train_set))))
                else:
                    tqdm_loader.set_postfix(loss=loss.item())
    
    if save_file is None:
        time = datetime.now()
        save_file = "{}.pt".format(time.strftime('%Y%m%d_%H%M%S_weights'))
    torch.save(model.state_dict(), save_file)
Ejemplo n.º 18
0
 def __init__(self, opts):
     """Create a training class
     The constructor init all needed parameters
     """
     self.opts = opts
     self.learn_rate_init = opts["train"]["learn_rate_init"]
     self.learn_rate_end = opts["train"]["learn_rate_end"]
     self.epochs = opts["train"]["epochs"]
     self.warmup_epochs = opts["train"]["warmup_epochs"]
     self.initial_weight = opts["train"]["initial_weight"]
     self.moving_avg_decay = opts["yolo"]["moving_avg_decay"]
     self.trainset = Dataset("train", self.opts)
     self.steps_per_epoch = len(
         self.trainset) / opts["train"]["total_replicas"]
     self.precision = tf.float16 if opts["yolo"][
         "precision"] == "fp16" else tf.float32
     self.model = YOLOV3(opts["train"]["bn_trainable"], opts)
     self.batch_size = opts["train"]["batch_size"]
     self.data_threads_number = opts["train"]["data_threads_number"]
     self.loss_scaling = opts["train"]["loss_scaling"]
     self.repeat_count = opts["train"]["repeat_count"]
     self.for_speed_test = opts["train"]["for_speed_test"]
Ejemplo n.º 19
0
    def transform(self, *args):
        # This function aims to be compatible with both arboreal Datasets
        # directly as well as Pandas dataframes, to implement the sklearn
        # interface.  If a DataFrame is passed in, we create a Dataset, and
        # then continue handling as usual.  Note, if we need to create a
        # dataset, rather than do inference on the types of the passed-in
        # dataframe, we use the metadata determined in fit() (called before
        # transform()) to determine which columns are of which types.
        if len(args) == 1 and isinstance(args[0], Dataset):
            dataset = args[0]
        elif len(args) == 1 and isinstance(args[0], pd.DataFrame):
            X = args[0]
            # use the fit metadata rather than re-inferring (and potentially
            # being inconsistent with the previous inference)
            m = self.root.data[NODE_DATASET_KEY].metadata
            dataset = Dataset.from_pandas_X(X, m)
        else:
            raise ValueError(
                "transform requires either an Arboreal Dataset or Pandas DataFrame"
            )

        return self._transform(dataset)
Ejemplo n.º 20
0
def predict_result(mirna_fasta_file, mrna_fasta_file, query_file, model=None, weight_file=None, seed_match='offset-9-mer-m7', level='gene', batch_size=32, output_file=None, device='cpu'):
    if not isinstance(model, deepTarget):
        raise ValueError("'model' expected <nn.Module 'deepTarget'>, got {}".format(type(model)))
    
    if not weight_file.endswith('.pt'):
        raise ValueError("'weight_file' expected '*.pt', got {}".format(weight_file))
    
    model.load_state_dict(torch.load(weight_file))
    
    test_set = Dataset(mirna_fasta_file, mrna_fasta_file, query_file, seed_match=seed_match, header=True, train=False)
    test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False)
    
    y_probs = []
    y_predicts = []
    y_truth = []

    model = model.to(device)
    with torch.no_grad():
        model.eval()
        
        with tqdm(test_loader, bar_format=bar_format) as tqdm_loader:
            for i, ((mirna, mrna), label) in enumerate(tqdm_loader):
                mirna, mrna, label = mirna.to(device, dtype=torch.float), mrna.to(device, dtype=torch.float), label.to(device)
                
                outputs = model(mirna, mrna)
                _, predicts = torch.max(outputs.data, 1)
                probabilities = F.softmax(outputs, dim=1)
                
                y_probs.extend(probabilities.cpu().numpy()[:, 1])
                y_predicts.extend(predicts.cpu().numpy())
                y_truth.extend(label.cpu().numpy())
        
        if output_file is None:
            time = datetime.now()
            output_file = "{}.csv".format(time.strftime('%Y%m%d_%H%M%S_results'))
        results = postprocess_result(test_set.dataset, y_probs, y_predicts,
                                     seed_match=seed_match, level=level, output_file=output_file)
        
        print(results)
Ejemplo n.º 21
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    # if len(physical_devices) > 0:
    if physical_devices:
        # tf.config.experimental.set_memory_growth(physical_devices[0], True)
        tf.config.experimental.set_visible_devices(physical_devices[0], "GPU")

    trainset = Dataset(FLAGS, is_training=True)
    testset = Dataset(FLAGS, is_training=False)
    logdir = "./data/log"
    isfreeze = False
    steps_per_epoch = len(trainset)
    first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
    second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
    global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
    warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
    total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch
    # train_steps = (first_stage_epochs + second_stage_epochs) * steps_per_period

    input_layer = tf.keras.layers.Input([cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3])
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH

    freeze_layers = utils.load_freeze_layer(FLAGS.model, FLAGS.tiny)

    feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny)
    if FLAGS.tiny:
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            if i == 0:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            else:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            bbox_tensors.append(fm)
            bbox_tensors.append(bbox_tensor)
    else:
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            if i == 0:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            elif i == 1:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            else:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            bbox_tensors.append(fm)
            bbox_tensors.append(bbox_tensor)

    model = tf.keras.Model(input_layer, bbox_tensors)
    model.summary()

    if FLAGS.weights == None:
        print("Training from scratch")
    else:
        if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights":
            utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny)
        else:
            model.load_weights(FLAGS.weights)
        print('Restoring weights from: %s ... ' % FLAGS.weights)


    optimizer = tf.keras.optimizers.Adam()
    if os.path.exists(logdir): shutil.rmtree(logdir)
    writer = tf.summary.create_file_writer(logdir)

    # define training step function
    # @tf.function
    def train_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            gradients = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            tf.print("=> STEP %4d/%4d   lr: %.6f   giou_loss: %4.2f   conf_loss: %4.2f   "
                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(),
                                                               giou_loss, conf_loss,
                                                               prob_loss, total_loss))
            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
            else:
                lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * (
                    (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
                )
            optimizer.lr.assign(lr.numpy())

            # writing summary data
            with writer.as_default():
                tf.summary.scalar("lr", optimizer.lr, step=global_steps)
                tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
                tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
                tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
                tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
            writer.flush()
    def test_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            giou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)
                giou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = giou_loss + conf_loss + prob_loss

            tf.print("=> TEST STEP %4d   giou_loss: %4.2f   conf_loss: %4.2f   "
                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, giou_loss, conf_loss,
                                                               prob_loss, total_loss))

    for epoch in range(first_stage_epochs + second_stage_epochs):
        if epoch < first_stage_epochs:
            if not isfreeze:
                isfreeze = True
                for name in freeze_layers:
                    freeze = model.get_layer(name)
                    freeze_all(freeze)
        elif epoch >= first_stage_epochs:
            if isfreeze:
                isfreeze = False
                for name in freeze_layers:
                    freeze = model.get_layer(name)
                    unfreeze_all(freeze)
        for image_data, target in trainset:
            train_step(image_data, target)
        for image_data, target in testset:
            test_step(image_data, target)
        model.save_weights("./checkpoints/yolov4")
Ejemplo n.º 22
0
# Author: Jerry Xia <*****@*****.**>

import time
import tensorflow as tf
from config import CFG
from core.model import MobileNetv3_small
from core.dataset import Dataset

tf.keras.backend.set_learning_phase(True)

####################################
#          Generate Dataset        #
####################################
train_set = Dataset(CFG.train_file, CFG.batch_size, CFG.batch_per_epoch)
val_set = Dataset(CFG.val_file, CFG.batch_size, 1)

####################################
#           Create Model           #
####################################
tf.print("Start creating model.")
input_tensor = tf.keras.layers.Input(shape=(224, 224, 3))
output_tensor = MobileNetv3_small(CFG.num_classes)(input_tensor)
model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)

optimizer = tf.keras.optimizers.RMSprop(lr=CFG.lr_init, momentum=0.9)
cce = tf.keras.losses.CategoricalCrossentropy()
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
# ema = tf.train.ExponentialMovingAverage(decay=0.9999)

ckpt = tf.train.Checkpoint(model=model)
Ejemplo n.º 23
0
    def __init__(self):
    
        # Initialize Horovod
        hvd.init()
        config=tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.visible_device_list = str(hvd.local_rank())
        
        self.anchor_per_scale    = cfg.YOLO.ANCHOR_PER_SCALE
        self.classes             = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes         = len(self.classes)
        self.learn_rate_init     = cfg.TRAIN.LEARN_RATE_INIT
        self.learn_rate_end      = cfg.TRAIN.LEARN_RATE_END
        self.first_stage_epochs  = cfg.TRAIN.FISRT_STAGE_EPOCHS
        self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
        self.warmup_periods      = cfg.TRAIN.WARMUP_EPOCHS
        self.initial_weight      = cfg.TRAIN.INITIAL_WEIGHT
        self.time                = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
        self.moving_ave_decay    = cfg.YOLO.MOVING_AVE_DECAY
        self.max_bbox_per_scale  = 150
        self.train_logdir        = "./data/log/train"
        self.trainset            = Dataset('train')
        self.testset             = Dataset('test')
        self.steps_per_period    = len(self.trainset)
        self.sess                = tf.Session(config=config)
        
        
        with tf.name_scope('define_input'):
            self.input_data   = tf.placeholder(dtype=tf.float32, name='input_data')
            self.label_sbbox  = tf.placeholder(dtype=tf.float32, name='label_sbbox')
            self.label_mbbox  = tf.placeholder(dtype=tf.float32, name='label_mbbox')
            self.label_lbbox  = tf.placeholder(dtype=tf.float32, name='label_lbbox')
            self.true_sbboxes = tf.placeholder(dtype=tf.float32, name='sbboxes')
            self.true_mbboxes = tf.placeholder(dtype=tf.float32, name='mbboxes')
            self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes')
            self.trainable     = tf.placeholder(dtype=tf.bool, name='training')

        with tf.name_scope("define_loss"):
            self.model = YOLOV3(self.input_data, self.trainable)
            self.net_var = tf.global_variables()
            self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
                                                    self.label_sbbox,  self.label_mbbox,  self.label_lbbox,
                                                    self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
            self.loss = self.giou_loss + self.conf_loss + self.prob_loss

        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
            warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period,
                                        dtype=tf.float64, name='warmup_steps')
            train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs)* self.steps_per_period,
                                        dtype=tf.float64, name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) *
                                    (1 + tf.cos(
                                        (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
            )
            global_step_update = tf.assign_add(self.global_step, 1.0)
            #for Horovod expand learning rate
            self.learn_rate = self.learn_rate * hvd.size()

        with tf.name_scope("define_weight_decay"):
            moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())

        with tf.name_scope("define_first_stage_train"):
            self.first_stage_trainable_var_list = []
            for var in tf.trainable_variables():
                var_name = var.op.name
                var_name_mess = str(var_name).split('/')
                if var_name_mess[0] in ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']:
                    self.first_stage_trainable_var_list.append(var)
            
            first_opt = tf.train.AdamOptimizer(self.learn_rate)
            #for Horovod
            first_opt = hvd.DistributedOptimizer(first_opt)
            #for Horovod
            hooks = [hvd.BroadcastGlobalVariablesHook(0)]
            first_stage_optimizer = first_opt.minimize(self.loss,
                                                      var_list=self.first_stage_trainable_var_list)
                                                    
            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies([first_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_frozen_variables = tf.no_op()

        with tf.name_scope("define_second_stage_train"):
            second_stage_trainable_var_list = tf.trainable_variables()
            second_opt = tf.train.AdamOptimizer(self.learn_rate)
            #for Horovod
            second_opt = hvd.DistributedOptimizer(second_opt)
            second_stage_optimizer = second_opt.minimize(self.loss,
                                                      var_list=second_stage_trainable_var_list)

            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies([second_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_all_variables = tf.no_op()

        with tf.name_scope('loader_and_saver'):
            self.loader = tf.train.Saver(self.net_var)
            self.saver  = tf.train.Saver(tf.global_variables(), max_to_keep=10)

        with tf.name_scope('summary'):
            tf.summary.scalar("learn_rate",      self.learn_rate)
            tf.summary.scalar("giou_loss",  self.giou_loss)
            tf.summary.scalar("conf_loss",  self.conf_loss)
            tf.summary.scalar("prob_loss",  self.prob_loss)
            tf.summary.scalar("total_loss", self.loss)

            logdir = "./data/log/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            self.write_op = tf.summary.merge_all()
            self.summary_writer  = tf.summary.FileWriter(logdir, graph=self.sess.graph)
Ejemplo n.º 24
0
def main(_argv):
    input_channel = 3
    patience = 30
    steps_in_epoch = 0
    epoch_loss = np.inf
    prev_minloss = np.inf

    trainset = Dataset(FLAGS,input_channel, is_training=True)
    # testset = Dataset(FLAGS, input_channel, is_training=False)
    logdir = "./data/log"
    isfreeze = False
    steps_per_epoch = len(trainset)
    print("steps_per_epoch:{}".format(steps_per_epoch))

    first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
    second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
    global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
    warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
    total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch
    # train_steps = (first_stage_epochs + second_stage_epochs) * steps_per_period

    loss_tracker = []
    losses_in_epoch = 0.
    input_layer = tf.keras.layers.Input([cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3])
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH

    freeze_layers = utils.load_freeze_layer(FLAGS.model, FLAGS.num_detection_layer)
    # feature_maps = YOLO(input_layer, NUM_CLASS, FLAGS.model, FLAGS.tiny)
    feature_maps = YOLOv4_more_tiny(input_layer, NUM_CLASS)

    bbox_tensors = []
    for i, fm in enumerate(feature_maps):        # fm shape: (None, featw, feath, filters)
        bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
        bbox_tensors.append(fm)
        bbox_tensors.append(bbox_tensor)

    if cfg.YOLO.NUM_YOLOLAYERS == 3:  # yolov4
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            if i == 0:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 8, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            elif i == 1:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            else:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            bbox_tensors.append(fm)
            bbox_tensors.append(bbox_tensor)
    elif cfg.YOLO.NUM_YOLOLAYERS == 2:  # yolo tiny
        bbox_tensors = []
        for i, fm in enumerate(feature_maps):
            if i == 0:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 16, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            else:
                bbox_tensor = decode_train(fm, cfg.TRAIN.INPUT_SIZE // 32, NUM_CLASS, STRIDES, ANCHORS, i, XYSCALE)
            bbox_tensors.append(fm)
            bbox_tensors.append(bbox_tensor)
    elif cfg.YOLO.NUM_YOLOLAYERS == 1: # custom yolo
        bbox_tensors = []
        bbox_tensor = decode_train(feature_maps[0], cfg.TRAIN.INPUT_SIZE // cfg.YOLO.STRIDES_CUSTOM[0], NUM_CLASS, STRIDES, ANCHORS, 0, XYSCALE)
        bbox_tensors.append(feature_maps[0])
        bbox_tensors.append(bbox_tensor)

    model = tf.keras.Model(input_layer, bbox_tensors)
    model.summary()


    if FLAGS.weights == None:
        print("Training from scratch")
    else:
        if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) - 1] == "weights":
            utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.num_detection_layer)
        else:
            model.load_weights(FLAGS.weights)

        print('Restoring weights from: %s ... ' % FLAGS.weights)

    optimizer = tf.keras.optimizers.Adam()

    if os.path.exists(logdir): shutil.rmtree(logdir)
    writer = tf.summary.create_file_writer(logdir)

    # define training step function
    # @tf.function
    def train_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            ciou_loss = conf_loss = prob_loss = 0

            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]  # target[i][1]:(32, 150, 4)
                # print("conv shape:{} pred shape:{}".format(tf.keras.backend.int_shape(conv), tf.keras.backend.int_shape(pred)))
                # print("target[i][0]:{} target[i][1]:{}".format(np.array(target[i][0]).shape, np.array(target[i][1]).shape))
                loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)
                ciou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

            total_loss = ciou_loss + conf_loss + prob_loss

            gradients = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            tf.print("=> STEP %4d/%4d   lr: %.6f   ciou_loss: %4.2f   conf_loss: %4.2f   "
                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, total_steps, optimizer.lr.numpy(),
                                                               ciou_loss, conf_loss,
                                                               prob_loss, total_loss))
            # update learning rate
            global_steps.assign_add(1)
            if global_steps < warmup_steps:
                lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT
            else:
                lr = cfg.TRAIN.LR_END + 0.5 * (cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * (
                    (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))
                )
            optimizer.lr.assign(lr.numpy())

            # writing summary data
            with writer.as_default():
                tf.summary.scalar("lr", optimizer.lr, step=global_steps)
                tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
                tf.summary.scalar("loss/ciou_loss", ciou_loss, step=global_steps)
                tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
                tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
            writer.flush()
            # return total_loss

    @tf.function
    def test_step(image_data, target):
        with tf.GradientTape() as tape:
            pred_result = model(image_data, training=True)
            ciou_loss = conf_loss = prob_loss = 0

            preds = None
            gt_infos = None
            gt_bboxes = None
            # optimizing process
            for i in range(len(freeze_layers)):
                conv, pred = pred_result[i * 2], pred_result[i * 2 + 1]
                loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i)

                ciou_loss += loss_items[0]
                conf_loss += loss_items[1]
                prob_loss += loss_items[2]

                if FLAGS.show_map:
                    batch_, _, _, _, ch_ = tf.keras.backend.int_shape(pred)
                    if preds == None:
                        preds = tf.reshape(pred, (batch_, -1, ch_))
                    else:
                        preds = tf.concat([preds, tf.reshape(preds, (batch_, -1, ch_))], axis=1)

                    if gt_infos == None:
                        gt_infos = tf.reshape(target[i][0], (batch_, -1, ch_))
                        gt_bboxes = target[i][1]
                    else:
                        gt_infos = tf.concat([gt_infos, tf.reshape(target[i][0], (batch_, -1, ch_))], axis=1)
                        gt_bboxes = tf.concat([gt_bboxes, tf.reshape(target[i][1], (batch_, -1, 4))], axis=1)


            if FLAGS.show_map:
                map = compute_map(preds, gt_bboxes, gt_infos, NUM_CLASS)

            total_map = map
            total_loss = ciou_loss + conf_loss + prob_loss

            tf.print("=> TEST STEP %4d   ciou_loss: %4.2f   conf_loss: %4.2f   "
                     "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, ciou_loss, conf_loss,
                                                               prob_loss, total_loss))
            tf.print("=> TEST STEP %4d map: %4.2f" % (total_map))
            # tf.print("=> TEST STEP %4d   giou_loss: %4.2f   conf_loss: %4.2f   "
            #          "prob_loss: %4.2f   total_loss: %4.2f" % (global_steps, giou_loss, conf_loss,
            #                                                    prob_loss, total_loss))

    for epoch in range(first_stage_epochs + second_stage_epochs):
        start_ch = time.time()
        train_loss = 0.
        if epoch < first_stage_epochs:
            if not isfreeze:
                isfreeze = True
                for name in freeze_layers:
                    try:
                        freeze = model.get_layer(name)
                        freeze_all(freeze)
                    except ValueError:
                        pass
        elif epoch >= first_stage_epochs:
            if isfreeze:
                isfreeze = False
                for name in freeze_layers:
                    try: # try구문 추가
                        freeze = model.get_layer(name)
                        freeze_all(freeze)
                    except ValueError:
                        pass


        for image_data, target in trainset:
            # bboxes_list = target[0][1]
            # label_data = target[0][0]
            #
            # for batch_idx, bboxes in enumerate(bboxes_list):
            #     # print("bboxes:{}".format(bboxes))
            #     class_inds = []
            #     check = np.array(image_data[batch_idx]).reshape(cfg.TRAIN.INPUT_SIZE, cfg.TRAIN.INPUT_SIZE, 3)
            #     # print(r"D:\tf_data_not\preprocessed_{}.npy".format(batch_idx))
            #     # np.save(r"D:\tf_data_not\image_{}.npy".format(batch_idx), check)
            #     # np.save(r"D:\tf_data_not\preprocessed_{}.npy".format(batch_idx), label_data[batch_idx])
            #     # np.save(r"D:\tf_data_not\bboxes_{}.npy".format(batch_idx), bboxes)
            #     label = label_data[batch_idx]
            #     label_class = label[...,5:]
            #     # print("label shape:{}".format(np.array(label).shape))
            #
            #     for line_label in label_class:
            #         if np.sum(line_label) > 0:
            #             print(line_label)
            #     class_ = np.array(label[..., 5:]).flatten()
            #     class_ = np.where(class_>0.1, class_, 0)
            #     for class_idx, class_label in enumerate(class_):
            #         if class_label > 0:
            #             class_inds.append(class_idx % cfg.YOLO.NUM_CLASSES)
            #
            #     for bbox_idx, bbox in enumerate(bboxes):
            #         half_h = bbox[3] / 2
            #         half_w = bbox[2] / 2
            #
            #         if np.sum(bbox) > 0:
            #             # print("class:{}".format(class_inds))
            #             cv2.rectangle(check, (int(bbox[0] - half_w), int(bbox[1] - half_h)),
            #                           (int(bbox[0] + half_w), int(bbox[1] + half_h)), color=(0, 255, 0), thickness=3)
            #             cv2.putText(check, text="{}".format(class_inds[bbox_idx]),
            #                         org=(int(bbox[0] + half_w)-10, int(bbox[1] + half_h)-30), thickness=2, color=(0, 255, 0),
            #                         fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.7)
            #
            #     cv2.imshow("check_aug", check)
            #     cv2.waitKey(0)
            #     cv2.destroyAllWindows()
            #     cv2.imwrite(os.path.join(r"D:\Public\JHS\SIMPLE_DATA_INPUT_CHECK", "{}.jpg".format(batch_idx)), check*255)

            train_step(image_data, target)

        # for image_data, target in testset:
        #     test_step(image_data, target)

        ### for loss graph
        """
        if steps_in_epoch >= steps_per_epoch:
            loss_tracker.append(losses_in_epoch / steps_per_epoch)

            plt.plot(loss_tracker)
            plt.xlabel('epoch')
            plt.ylabel('loss')
            # plt.show()
            plt.savefig("D:/checkpoint/loss.png")
        """

        # print("steps_in_epoch:{}, steps_per_epoch:{}".format(global_steps, steps_per_epoch))
        # print("prev_minloss:{}, epoch_loss:{}".format(prev_minloss, epoch_loss))
        # # early stopping
        # if len(losses_in_epoch) >= steps_per_epoch:
        #
        #     epoch_loss = losses_in_epoch / steps_per_epoch
        #     if prev_minloss > epoch_loss:   # save best weight
        #         prev_minloss = epoch_loss
        #
        #         if epoch > 800:  # 최소학습 에폭
        #             model.save("D:\ckpt_best")
        #             print("{} epoch save best weights".format(epoch))
        #
        #     if len(loss_tracker) > patience:
        #         print("check loss_tracker len:{}".format(len(loss_tracker)))
        #         if loss_tracker[0] > np.min(loss_tracker[1:]):
        #             loss_tracker.pop(0)
        #         else:
        #             print("total loss didn't decreased during {} epochs. train stop".format(patience))
        #             return
        #     steps_in_epoch = 0
        #     epoch_loss = train_loss
        # else:
        #     epoch_loss += train_loss

        if (epoch+1) % 500 == 0:
            model.save(r"D:\notf_ckpt-epoch{}".format(epoch))
            print("{} epoch model saved".format(epoch))
    model.save(r"D:\notf_ckpt-last")
Ejemplo n.º 25
0
#! /usr/bin/env python
# coding=utf-8
import os
import time
import shutil
import numpy as np
import tensorflow as tf
import core.utils as utils
from tqdm import tqdm
import tensorflow.keras.optimizers as optimizer
from core.dataset import Dataset
from core.yolov3 import YOLOv3, decode, compute_loss
from core.config import cfg
from core.LLmodel import LLYOLO
trainset = Dataset('train')
logdir = "./data/log"
steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch
total_steps = cfg.TRAIN.EPOCHS * steps_per_epoch
model = LLYOLO(input_shape = (None,416,416,3))
optimizer = tf.keras.optimizers.Adam()

def train_step(image_data, target):
    with tf.GradientTape() as tape:
        pred_result = model(image_data, training=True)
        giou_loss=conf_loss=prob_loss=0

        # optimizing process
        for i in range(3):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
Ejemplo n.º 26
0
    # Metadata
    m = Metadata()
    m.identifier = "_id"
    m.numericals = [
        "fit_rating",
        "fitted",
        "buttonup",
        "buttondown",
        "previous_purchases",
    ]
    m.categoricals = ["summer", "winter"]
    m.target = "fit_rating"

    # Dataset
    dataset = Dataset(metadata=m, datapoints=[dp1, dp2, dp3, dp4, dp5])

    # Fit Split
    s = Split()
    s.fit(dataset)
    print(f"Split: {s}")
    print(f"Has reward: {s.reward}")

    # Decision Tree
    t = DecisionTree(name="A Simple Tree")
    t.fit(dataset)
    print(f"Fit tree:")
    print(t)

    # Split fitting scratch
    vsss_fitted = create_val_set_set_set(
    def __init__(self):                                 # 从config文件
        self.anchor_per_scale    = cfg.YOLO.ANCHOR_PER_SCALE
        self.classes             = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes         = len(self.classes)
        self.learn_rate_init     = cfg.TRAIN.LEARN_RATE_INIT
        self.learn_rate_end      = cfg.TRAIN.LEARN_RATE_END
        self.first_stage_epochs  = cfg.TRAIN.FISRT_STAGE_EPOCHS
        self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
        self.warmup_periods      = cfg.TRAIN.WARMUP_EPOCHS
        self.initial_weight      = cfg.TRAIN.INITIAL_WEIGHT
        self.time                = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
        self.moving_ave_decay    = cfg.YOLO.MOVING_AVE_DECAY
        self.max_bbox_per_scale  = 150
        self.train_logdir        = "./data/log/train"
        self.trainset            = Dataset('train')
        self.testset             = Dataset('test')
        self.steps_per_period    = len(self.trainset)
        self.sess                = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

        with tf.name_scope('define_input'):
            self.input_data   = tf.placeholder(dtype=tf.float32, name='input_data')
            self.label_sbbox  = tf.placeholder(dtype=tf.float32, name='label_sbbox')
            self.label_mbbox  = tf.placeholder(dtype=tf.float32, name='label_mbbox')
            self.label_lbbox  = tf.placeholder(dtype=tf.float32, name='label_lbbox')
            self.true_sbboxes = tf.placeholder(dtype=tf.float32, name='sbboxes')
            self.true_mbboxes = tf.placeholder(dtype=tf.float32, name='mbboxes')
            self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes')
            self.trainable     = tf.placeholder(dtype=tf.bool, name='training')

        with tf.name_scope("define_loss"):
            self.model = YOLOV3(self.input_data, self.trainable)
            self.net_var = tf.global_variables()
            self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
                                                    self.label_sbbox,  self.label_mbbox,  self.label_lbbox,
                                                    self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
            self.loss = self.giou_loss + self.conf_loss + self.prob_loss

        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step')
            warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period,
                                        dtype=tf.float64, name='warmup_steps')
            train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs)* self.steps_per_period,
                                        dtype=tf.float64, name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) *
                                    (1 + tf.cos(
                                        (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))
            )
            global_step_update = tf.assign_add(self.global_step, 1.0)

        '''
        warmup_steps作用:   
        神经网络在刚开始训练的过程中容易出现loss=NaN的情况,为了尽量避免这个情况,因此初始的学习率设置得很低
        但是这又使得训练速度变慢了。因此,采用逐渐增大的学习率,从而达到既可以尽量避免出现nan,又可以等训练过程稳定了再增大训练速度的目的。
        '''

        # with tf.name_scope('loader_and_saver'):
        #     self.loader = tf.train.Saver(self.net_var)
        #     self.saver  = tf.train.Saver(tf.global_variables(), max_to_keep=10)

        with tf.name_scope("define_weight_decay"):
            moving_ave = tf.train.ExponentialMovingAverage(self.moving_ave_decay).apply(tf.trainable_variables())

        # 指定需要恢复的参数。层等信息, 位置提前,减少模型体积。
        with tf.name_scope('loader_and_saver'):
            variables_to_restore = [v for v in self.net_var if v.name.split('/')[0] not in ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']]
            self.loader = tf.train.Saver(variables_to_restore)
            self.saver  = tf.train.Saver(tf.global_variables(), max_to_keep=1)

        with tf.name_scope("define_first_stage_train"):
            self.first_stage_trainable_var_list = []
            for var in tf.trainable_variables():
                var_name = var.op.name
                var_name_mess = str(var_name).split('/')
                if var_name_mess[0] in ['conv_sbbox', 'conv_mbbox', 'conv_lbbox']:
                    self.first_stage_trainable_var_list.append(var)

            first_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss,
                                                      var_list=self.first_stage_trainable_var_list)
            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies([first_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_frozen_variables = tf.no_op()

        with tf.name_scope("define_second_stage_train"):
            second_stage_trainable_var_list = tf.trainable_variables()
            second_stage_optimizer = tf.train.AdamOptimizer(self.learn_rate).minimize(self.loss,
                                                      var_list=second_stage_trainable_var_list)

            with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies([second_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_all_variables = tf.no_op()

        with tf.name_scope('summary'):
            tf.summary.scalar("learn_rate",      self.learn_rate)
            tf.summary.scalar("giou_loss",  self.giou_loss)
            tf.summary.scalar("conf_loss",  self.conf_loss)
            tf.summary.scalar("prob_loss",  self.prob_loss)
            tf.summary.scalar("total_loss", self.loss)

            logdir = "./data/log/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            self.write_op = tf.summary.merge_all()
            self.summary_writer  = tf.summary.FileWriter(logdir, graph=self.sess.graph)
Ejemplo n.º 28
0
def main(_argv):
    print('Config File From:', FLAGS.config)
    cfg = decode_cfg(FLAGS.config)

    model_type = cfg['yolo']['type']
    if model_type == 'yolov3':
        from core.model.one_stage.yolov3 import YOLOv3 as Model
        from core.model.one_stage.yolov3 import YOLOLoss as Loss
        num = 186
        epochs = 270
    elif model_type == 'yolov3_tiny':
        from core.model.one_stage.yolov3 import YOLOv3_Tiny as Model
        from core.model.one_stage.yolov3 import YOLOLoss as Loss
        num = 29
        epochs = 30
    elif model_type == 'yolov4':
        from core.model.one_stage.yolov4 import YOLOv4 as Model
        from core.model.one_stage.yolov4 import YOLOLoss as Loss
        num = 251
        epochs = 270
    elif model_type == 'yolov4_tiny':
        from core.model.one_stage.yolov4 import YOLOv4_Tiny as Model
        from core.model.one_stage.yolov4 import YOLOLoss as Loss
        num = 29
        epochs = 30
    else:
        raise NotImplementedError()

    model, eval_model = Model(cfg)
    model.summary()
    train_dataset = Dataset(cfg)

    init_weight = cfg["train"]["init_weight_path"]
    anchors = cfg['yolo']['anchors']
    mask = cfg['yolo']['mask']
    strides = cfg['yolo']['strides']
    ignore_threshold = cfg['train']['ignore_threshold']
    loss_type = cfg['train']['loss_type']

    if init_weight:
        load_weights(model, init_weight)
    else:
        print("Training from scratch")
        num = 0

    loss = [
        Loss(anchors[mask[i]], strides[i], train_dataset.num_classes,
             ignore_threshold, loss_type) for i in range(len(mask))
    ]

    ckpt_path = os.path.join(cfg["train"]["save_weight_path"], 'tmp',
                             cfg["train"]["label"],
                             time.strftime("%Y%m%d%H%M", time.localtime()))

    warmup_epochs = 3
    warmup_callback = [
        WarmUpScheduler(learning_rate=1e-3,
                        warmup_step=warmup_epochs * len(train_dataset),
                        verbose=1)
    ]

    eval_callback = [
        COCOEvalCheckpoint(save_path=os.path.join(ckpt_path,
                                                  "mAP-{mAP:.4f}.h5"),
                           eval_model=eval_model,
                           model_cfg=cfg,
                           eval_n_samples=None,
                           eval_per_batch=32000,
                           verbose=1)
    ]
    lr_callback = [
        CosineAnnealingScheduler(learning_rate=1e-3,
                                 eta_min=1e-6,
                                 T_max=epochs * len(train_dataset),
                                 verbose=1)
    ]

    if not os.path.isdir(ckpt_path):
        os.makedirs(ckpt_path)
        os.makedirs(os.path.join(ckpt_path, 'train', 'plugins', 'profile'))

    # opt = optimizers.Adam(lr=0.)
    opt = Accumulative(optimizers.Adam(lr=0.), 32)
    # warm-up
    for i in range(num):
        model.layers[i].trainable = False
        print(model.layers[i].name)
    print('Freeze the first {} layers of total {} layers.'.format(
        num, len(model.layers)))

    model.compile(loss=loss, optimizer=opt, run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=warmup_epochs,
              callbacks=warmup_callback)

    for i in range(len(model.layers)):
        model.layers[i].trainable = True
    print('Unfreeze all layers.')

    model.compile(loss=loss, optimizer=opt, run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=epochs,
              callbacks=eval_callback + lr_callback)

    # reset sample rate
    model.compile(loss=loss,
                  optimizer=optimizers.Adam(lr=1e-7),
                  run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=10,
              callbacks=eval_callback)
Ejemplo n.º 29
0
def main(_argv):
    cfg = decode_cfg("cfgs/voc_yolov4_tiny.yaml")
    model, eval_model = YOLOv4_Tiny(cfg)
    model.summary()
    train_dataset = Dataset(cfg)

    init_weight = cfg["train"]["init_weight_path"]
    anchors = cfg['yolo']['anchors']
    mask = cfg['yolo']['mask']
    strides = cfg['yolo']['strides']
    ignore_threshold = cfg['train']['ignore_threshold']
    loss_type = cfg['train']['loss_type']

    if init_weight:
        load_weights(model, init_weight)
    else:
        print("Training from scratch")

    loss = [
        YOLOLoss(anchors[mask[i]], strides[i], train_dataset.num_classes,
                 ignore_threshold, loss_type) for i in range(len(mask))
    ]

    ckpt_path = os.path.join(cfg["train"]["save_weight_path"], 'tmp',
                             cfg["train"]["label"],
                             time.strftime("%Y%m%d%H%M", time.localtime()))
    if not os.path.isdir(ckpt_path):
        os.makedirs(ckpt_path)
        os.makedirs(
            os.path.join(ckpt_path, 'log', 'train', 'plugins', 'profile'))

    _cfg = copy.deepcopy(cfg)
    _cfg['test']['anno_path'] = "./data/pascal_voc/voc2007_val.txt"
    callback = [
        COCOEvalCheckpoint(save_path=os.path.join(ckpt_path,
                                                  "mAP-{mAP:.4f}.h5"),
                           eval_model=eval_model,
                           model_cfg=cfg,
                           sample_rate=5,
                           verbose=1),
        COCOEvalCheckpoint(save_path=None,
                           eval_model=eval_model,
                           model_cfg=_cfg,
                           sample_rate=5,
                           verbose=1)
    ]

    num = 29
    for i in range(num):
        model.layers[i].trainable = False
    print('Freeze the first {} layers of total {} layers.'.format(
        num, len(model.layers)))

    model.compile(loss=loss,
                  optimizer=optimizers.Adam(lr=1e-4),
                  run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=30,
              callbacks=callback)

    for i in range(len(model.layers)):
        model.layers[i].trainable = True

    model.compile(loss=loss,
                  optimizer=optimizers.Adam(lr=1e-5),
                  run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=50,
              callbacks=callback)

    callback = [
        COCOEvalCheckpoint(save_path=os.path.join(ckpt_path,
                                                  "mAP-{mAP:.4f}.h5"),
                           eval_model=eval_model,
                           model_cfg=cfg,
                           sample_rate=1,
                           verbose=1),
    ]

    model.compile(loss=loss,
                  optimizer=optimizers.Adam(lr=1e-6),
                  run_eagerly=False)
    model.fit(train_dataset,
              steps_per_epoch=len(train_dataset),
              epochs=10,
              callbacks=callback)
Ejemplo n.º 30
0
Archivo: train.py Proyecto: shifop/yolo
        #     print("%d:%s %s N" % (i, x.name, str(x.shape)))
    for i, x in enumerate(model.trainable_variables):
        name = x.name[:-2]
        name = name.replace('/', '|')
        if name not in n2v:
            print("%d:%s %s N" % (i, x.name, str(x.shape)))
    return model


if __name__=='__main__':
    # 训练、推理模式的选择,0-推理、1-训练
    tf.keras.backend.set_learning_phase(1)
    os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'
    save_f = 'test_ep10_pretrain_aug_random_size'
    train = Dataset('train')
    # i2n = ["有框表格","无框表格","页眉","页脚","图片","图表","注脚","公式","目录"]
    i2n = ["有框表格","无框表格","页眉","图片","图表","公式","目录"]

    size = train.num_batchs
    train = get_dataset_by_iter(train, cfg.TRAIN.BATCH_SIZE)
    train_w, init_call = get_data(train, [608, 76, 38, 19])

    test = Dataset('test')
    dev_size = test.num_batchs

    # 读取标注数据
    raw_label = {}
    with open(cfg.TEST.ANNOT_PATH,'r',encoding='utf-8') as f:
        for line in f:
            line = line.strip().split(' ')