def _generate_train_luminoth_config(self, **kwargs):
        config = {
            "train": {
                "run_name": self.name,
                "job_dir": "datasets/object_recognition"
            },
            "dataset": {
                "type": "object_detection",
                "dir": "datasets"
            },
            "model": {
                "type": self.algorithm,
                "network": {
                    "num_classes": len(self.classes)
                }
            }
        }

        if not os.path.exists("datasets/object_recognition"):
            os.mkdir("datasets/object_recognition")

        with open("datasets/luminoth.yml", "w") as f:
            f.write(yaml.dump(config))

        return get_config("datasets/luminoth.yml")
Exemple #2
0
def train(config_files, job_dir, override_params):
    """
    Parse TF_CONFIG to cluster_spec and call run() function
    """
    # TF_CONFIG environment variable is available when running using gcloud
    # either locally or on cloud. It has all the information required to create
    # a ClusterSpec which is important for running distributed code.
    tf_config_val = os.environ.get('TF_CONFIG')

    if tf_config_val:
        tf_config = json.loads(tf_config_val)
    else:
        tf_config = {}

    cluster = tf_config.get('cluster')
    job_name = tf_config.get('task', {}).get('type')
    task_index = tf_config.get('task', {}).get('index')
    environment = tf_config.get('environment', 'local')

    # Get the user config and the model type from it.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        # Without mode type defined we can't use the default config settings.
        raise KeyError('model.type should be set on the custom config.')

    if job_dir:
        override_params += ('train.job_dir={}'.format(job_dir), )

    # If cluster information is empty or TF_CONFIG is not available, run local
    if job_name is None or task_index is None:
        return run_local(config, environment=environment)
def main(input_image):

    # *************************************************************************

    with tf.gfile.Open(input_image, 'rb') as f:
        try:
            image = Image.open(f).convert('RGB')
        except (tf.errors.OutOfRangeError, OSError) as e:
            print 'Exception!'

    # *************************************************************************

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = MAX_DET
    config.model.rcnn.proposals.min_prob_threshold = MIN_PROB

    network = PredictorNetwork(config)
    objects = network.predict_image(image)

    print '************************* Num of Objects : ', len(objects)
    # *************************************************************************

    ref_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    pref_list, images, ids = classify_cellscropping.load_json(ref_image,
                                                              preds=objects)
    print '************************* Num of Cropped Objects : ', np.shape(ids)

    # *************************************************************************

    final_json = classify_main.predict(images, ids, CKPT)
    output_json_path = os.path.join(
        OUTPUT_JSON_DIR, '%s.json' % input_image.split('/')[-1][:-4])
    with open(output_json_path, 'w') as fp:
        json.dump(final_json, fp)
Exemple #4
0
def web(config_files, checkpoint, override_params, host, port, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    if checkpoint:
        config = get_checkpoint_config(checkpoint)
    elif config_files:
        config = get_config(config_files)
    else:
        click.echo('You must specify either a checkpoint or a config file.')
        return

    if override_params:
        config = override_config_params(config, override_params)

    # Bounding boxes will be filtered by frontend (using slider), so we set a
    # low threshold.
    config.model.rcnn.proposals.min_prob_threshold = 0.01

    # Initialize model
    global NETWORK_START_THREAD
    NETWORK_START_THREAD = Thread(target=start_network, args=(config,))
    NETWORK_START_THREAD.start()

    app.run(host=host, port=port, debug=debug)
Exemple #5
0
def web(config_files, checkpoint, override_params, host, port, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    if checkpoint:
        config = get_checkpoint_config(checkpoint)
    elif config_files:
        config = get_config(config_files)
    else:
        click.echo(
            'Neither checkpoint not config specified, assuming `accurate`.')
        config = get_checkpoint_config('accurate')

    if override_params:
        config = override_config_params(config, override_params)

    # Bounding boxes will be filtered by frontend (using slider), so we set a
    # low threshold.
    if config.model.type == 'fasterrcnn':
        config.model.rcnn.proposals.min_prob_threshold = 0.01
    elif config.model.type == 'ssd':
        config.model.proposals.min_prob_threshold = 0.01
    else:
        raise ValueError("Model type '{}' not supported".format(
            config.model.type))

    # Initialize model
    global NETWORK_START_THREAD
    NETWORK_START_THREAD = Thread(target=start_network, args=(config, ))
    NETWORK_START_THREAD.start()

    app.run(host=host, port=port, debug=debug)
    def _load_model(self):
        config_path = f"{self.model_path}/luminoth.predict.yml"

        if not os.path.exists(config_path):
            self._generate_predict_luminoth_config()

        config = get_config(config_path)

        return PredictorNetwork(config)
Exemple #7
0
def get_predictions(image_paths, config_files):
    """
    Get predictions for multiple images.

    When predicting many images we don't want to load the checkpoint each time.
    We load the checkpoint in the first iteration and then use the same
    session and graph for subsequent images.
    """
    config = get_config(config_files)

    if config.dataset.dir:
        # Gets the names of the classes
        classes_file = os.path.join(config.dataset.dir, 'classes.json')
        if tf.gfile.Exists(classes_file):
            class_labels = json.load(tf.gfile.GFile(classes_file))
        else:
            class_labels = None

    session = None
    fetches = None
    image_tensor = None

    for image_path in image_paths:
        with tf.gfile.Open(image_path, 'rb') as im_file:
            try:
                image = Image.open(im_file).convert('RGB')
            except tf.errors.OutOfRangeError as e:
                yield {
                    'error': '{}'.format(e),
                    'image_path': image_path,
                }
                continue

        preds = get_prediction(image,
                               config,
                               session=session,
                               fetches=fetches,
                               image_tensor=image_tensor,
                               class_labels=class_labels,
                               return_tf_vars=True)

        if session is None:
            # After first loop
            session = preds['session']
            fetches = preds['fetches']
            image_tensor = preds['image_tensor']

        yield {
            'objects': preds['objects'],
            'objects_labels': preds['objects_labels'],
            'objects_labels_prob': preds['objects_labels_prob'],
            'inference_time': preds['inference_time'],
            'image_path': image_path,
        }
Exemple #8
0
def train(config_files, job_dir, override_params):
    """
    Parse TF_CONFIG to cluster_spec and call run() function
    """
    # TF_CONFIG environment variable is available when running using gcloud
    # either locally or on cloud. It has all the information required to create
    # a ClusterSpec which is important for running distributed code.
    tf_config_val = os.environ.get("TF_CONFIG")

    if tf_config_val:
        tf_config = json.loads(tf_config_val)
    else:
        tf_config = {}

    cluster = tf_config.get("cluster")
    job_name = tf_config.get("task", {}).get("type")
    task_index = tf_config.get("task", {}).get("index")
    environment = tf_config.get("environment", "local")

    # Get the user config and the model type from it.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        # Without mode type defined we can't use the default config settings.
        raise KeyError("model.type should be set on the custom config.")

    if job_dir:
        override_params += ("train.job_dir={}".format(job_dir),)

    # If cluster information is empty or TF_CONFIG is not available, run local
    if job_name is None or task_index is None:
        return run(config, environment=environment)

    cluster_spec = tf.train.ClusterSpec(cluster)
    server = tf.train.Server(cluster_spec, job_name=job_name, task_index=task_index)

    # Wait for incoming connections forever
    # Worker ships the graph to the ps server
    # The ps server manages the parameters of the model.
    if job_name == "ps":
        server.join()
        return
    elif job_name in ["master", "worker"]:
        is_chief = job_name == "master"
        return run(
            config,
            target=server.target,
            cluster_spec=cluster_spec,
            is_chief=is_chief,
            job_name=job_name,
            task_index=task_index,
            environment=environment,
        )
Exemple #9
0
def train(config_files, job_dir, override_params):
    """
    Parse TF_CONFIG to cluster_spec and call run() function
    """
    # TF_CONFIG environment variable is available when running using gcloud
    # either locally or on cloud. It has all the information required to create
    # a ClusterSpec which is important for running distributed code.
    tf_config_val = os.environ.get('TF_CONFIG')

    if tf_config_val:
        tf_config = json.loads(tf_config_val)
    else:
        tf_config = {}

    cluster = tf_config.get('cluster')
    job_name = tf_config.get('task', {}).get('type')
    task_index = tf_config.get('task', {}).get('index')
    environment = tf_config.get('environment', 'local')

    # Get the user config and the model type from it.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        # Without mode type defined we can't use the default config settings.
        raise KeyError('model.type should be set on the custom config.')

    if job_dir:
        override_params += ('train.job_dir={}'.format(job_dir), )

    # If cluster information is empty or TF_CONFIG is not available, run local
    if job_name is None or task_index is None:
        return run(
            config, environment=environment
        )

    cluster_spec = tf.train.ClusterSpec(cluster)
    server = tf.train.Server(
        cluster_spec, job_name=job_name, task_index=task_index)

    # Wait for incoming connections forever
    # Worker ships the graph to the ps server
    # The ps server manages the parameters of the model.
    if job_name == 'ps':
        server.join()
        return
    elif job_name in ['master', 'worker']:
        is_chief = job_name == 'master'
        return run(
            config, target=server.target, cluster_spec=cluster_spec,
            is_chief=is_chief, job_name=job_name, task_index=task_index,
            environment=environment
        )
    def _generate_train_config(self, **kwargs):
        if self.algorithm == "fasterrcnn":
            train_config, hyperparams = self._generate_train_fasterrcnn_config(**kwargs)
        elif self.algorithm == "ssd":
            train_config, hyperparams = self._generate_train_ssd_config(**kwargs)

        if not os.path.exists("data/luminoth"):
            os.mkdir("data/luminoth")

        with open("data/luminoth/luminoth.yml", "w") as f:
            f.write(yaml.dump(train_config))

        return get_config("data/luminoth/luminoth.yml"), hyperparams
Exemple #11
0
def web(config_files, checkpoint, override_params, host, port, debug, min_prob, save_path):
    global SAVE_PATH_GLOBAL
    if save_path:
        SAVE_PATH_GLOBAL = save_path
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    if checkpoint:
        config = get_checkpoint_config(checkpoint)
    elif config_files:
        config = get_config(config_files)
    else:
        raise ValueError(
            'Neither checkpoint not config specified, assuming `accurate`.'
        )

    if override_params:
        config = override_config_params(config, override_params)

    # Bounding boxes will be filtered by frontend (using slider), so we set a
    # low threshold.
    if config.model.type == 'fasterrcnn':
        config.model.rcnn.proposals.min_prob_threshold = min_prob
    elif config.model.type == 'ssd':
        config.model.proposals.min_prob_threshold = min_prob
    else:
        raise ValueError(
            "Model type '{}' not supported".format(config.model.type)
        )
    
    # Verfy folder path or create
    try:
        os.stat(SAVE_PATH_GLOBAL)
    except:
        os.mkdir(SAVE_PATH_GLOBAL)    

    # Initialize model
    global NETWORK_START_THREAD
    NETWORK_START_THREAD = Thread(target=start_network, args=(config,))
    NETWORK_START_THREAD.start()

    if debug:
        app.config.from_object('config.DebugConfig')
    else:
        app.config.from_object('config.ProductionConfig')
        
    app.run(host=host, port=port, debug=debug)
Exemple #12
0
def get_checkpoint_config(id_or_alias, prompt=True):
    """Returns the checkpoint config object in order to load the model.

    If `prompt` is ``True`` and the checkpoint is not present in the index,
    prompt the user to refresh the index. If the checkpoint is present in the
    index but is remote and not yet downloaded, prompt to download.
    """
    db = read_checkpoint_db()
    checkpoint = get_checkpoint(db, id_or_alias)

    if prompt and not checkpoint:
        # Checkpoint not found in database. Prompt for refreshing the index and
        # try again.
        click.confirm(
            'Checkpoint not found. Check remote repository?', abort=True
        )
        db = refresh_remote_index()
        checkpoint = get_checkpoint(db, id_or_alias)
        if not checkpoint:
            # Still not found, abort.
            click.echo(
                "Checkpoint isn't available in remote repository either."
            )
            raise ValueError('Checkpoint not found.')
    elif not checkpoint:
        # No checkpoint but didn't prompt.
        raise ValueError('Checkpoint not found.')

    if prompt and checkpoint['status'] == 'NOT_DOWNLOADED':
        # Checkpoint hasn't been downloaded yet. Prompt for downloading it
        # before continuing.
        click.confirm(
            'Checkpoint not present locally. Want to download it?', abort=True
        )
        download_remote_checkpoint(db, checkpoint)
    elif checkpoint['status'] == 'NOT_DOWNLOADED':
        # Not downloaded but didn't prompt.
        raise ValueError('Checkpoint not downloaded.')

    path = get_checkpoint_path(checkpoint['id'])
    config = get_config(os.path.join(path, 'config.yml'))

    # Config paths should point to the path where the checkpoint files are
    # stored.
    config.dataset.dir = path
    config.train.job_dir = get_checkpoints_directory()

    return config
def web(config_files, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    config = get_config(config_files)
    app.config['config'] = config
    if config.dataset.dir:
        # Gets the names of the classes
        classes_file = os.path.join(config.dataset.dir, 'classes.json')
        if tf.gfile.Exists(classes_file):
            app.config['class_labels'] = json.load(
                tf.gfile.GFile(classes_file))

    app.run(debug=debug)
Exemple #14
0
def main(input_image):

    tf.reset_default_graph()

    # **********************************************************************

    with tf.gfile.Open(input_image, 'rb') as f:
        try:
            image = Image.open(f).convert('RGB')
        except (tf.errors.OutOfRangeError, OSError) as e:
            print 'Exception!'

    # **********************************************************************

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = MAX_DET
    config.model.rcnn.proposals.min_prob_threshold = MIN_PROB

    network = PredictorNetwork(config)
    objects = network.predict_image(image)

    print '************************************** Num of Objects : ', len(
        objects)
    # **********************************************************************

    ref_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
    pref_list, images, ids = cell_croppings.load_json(ref_image, preds=objects)
    print '************************************** Num of Cropped Objects : ', np.shape(
        ids)

    # **********************************************************************

    if not os.path.exists(CROP_CELLS_DIR):
        os.makedirs(CROP_CELLS_DIR)

    for i in range(ids.shape[0]):

        # SRC_DIR / IMAGE_ID _ COORDS . LABEL

        dst_filename = '%s_%s_%s.png' % (os.path.basename(input_image)[:-4],
                                         ids[i], i)

        dst_img_name = os.path.join(CROP_CELLS_DIR, dst_filename)

        cv2.imwrite(dst_img_name, images[i, ...])
Exemple #15
0
def web(config_files, host, port, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    config = get_config(config_files)

    # Bounding boxes will be filtered by frontend (using slider), so we set
    # a low threshold.
    config.model.rcnn.proposals.min_prob_threshold = 0.01

    # Initialize model
    global NETWORK_START_THREAD
    NETWORK_START_THREAD = Thread(target=start_network, args=(config_files))
    NETWORK_START_THREAD.start()

    app.run(host=host, port=port, debug=debug)
Exemple #16
0
def web(config_files, host, port, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    config = get_config(config_files)
    app.config['config'] = config

    # Bounding boxes will be filtered by frontend (using slider), so we set
    # a low threshold.
    config.model.rcnn.proposals.min_prob_threshold = 0.01

    if config.dataset.dir:
        # Gets the names of the classes
        classes_file = os.path.join(config.dataset.dir, 'classes.json')
        if tf.gfile.Exists(classes_file):
            app.config['class_labels'] = json.load(
                tf.gfile.GFile(classes_file))

    app.run(host=host, port=port, debug=debug)
Exemple #17
0
def web(config_files, host, port, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    config = get_config(config_files)
    app.config['config'] = config

    # Bounding boxes will be filtered by frontend (using slider), so we set
    # a low threshold.
    config.model.rcnn.proposals.min_prob_threshold = 0.01

    if config.dataset.dir:
        # Gets the names of the classes
        classes_file = os.path.join(config.dataset.dir, 'classes.json')
        if tf.gfile.Exists(classes_file):
            app.config['class_labels'] = json.load(
                tf.gfile.GFile(classes_file))

    app.run(host=host, port=port, debug=debug)
Exemple #18
0
    def __init__(self, config_files):

        config = get_config(config_files)
        if config.dataset.dir:
            # Gets the names of the classes
            classes_file = os.path.join(config.dataset.dir, 'classes.json')
            if tf.gfile.Exists(classes_file):
                self.class_labels = json.load(tf.gfile.GFile(classes_file))
            else:
                self.class_labels = None

        # Don't use data augmentation in predictions
        config.dataset.data_augmentation = None

        dataset_class = get_dataset(config.dataset.type)
        model_class = get_model(config.model.type)
        dataset = dataset_class(config)
        model = model_class(config)

        graph = tf.Graph()
        self.session = tf.Session(graph=graph)

        with graph.as_default():
            self.image_placeholder = tf.placeholder(tf.float32,
                                                    (None, None, 3))
            image_tf, _, process_meta = dataset.preprocess(
                self.image_placeholder)
            pred_dict = model(image_tf)

            # Restore checkpoint
            if config.train.job_dir:
                job_dir = config.train.job_dir
                if config.train.run_name:
                    job_dir = os.path.join(job_dir, config.train.run_name)
                ckpt = tf.train.get_checkpoint_state(job_dir)
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError(
                        'Could not find checkpoint in {}.'.format(job_dir))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(self.session, ckpt)
                tf.logging.info('Loaded checkpoint.')
            else:
                # A prediction without checkpoint is just used for testing
                tf.logging.warning(
                    'Could not load checkpoint. Using initialized model.')
                init_op = tf.group(tf.global_variables_initializer(),
                                   tf.local_variables_initializer())
                self.session.run(init_op)

            if config.model.network.with_rcnn:
                cls_prediction = pred_dict['classification_prediction']
                objects_tf = cls_prediction['objects']
                objects_labels_tf = cls_prediction['labels']
                objects_labels_prob_tf = cls_prediction['probs']
            else:
                rpn_prediction = pred_dict['rpn_prediction']
                objects_tf = rpn_prediction['proposals']
                objects_labels_prob_tf = rpn_prediction['scores']
                # All labels without RCNN are zero
                objects_labels_tf = tf.zeros(tf.shape(objects_labels_prob_tf),
                                             dtype=tf.int32)

            self.fetches = {
                'objects': objects_tf,
                'labels': objects_labels_tf,
                'probs': objects_labels_prob_tf,
                'scale_factor': process_meta['scale_factor']
            }

            # If in debug mode, return the full prediction dictionary.
            if config.train.debug:
                self.fetches['_debug'] = pred_dict
Exemple #19
0
def train(
    job_id,
    resume_job_id,
    bucket_name,
    region,
    config_files,
    dataset,
    scale_tier,
    master_type,
    worker_type,
    worker_count,
    parameter_server_type,
    parameter_server_count,
):
    account = ServiceAccount()
    account.validate_region(region)

    if bucket_name is None:
        bucket_name = "luminoth-{}".format(account.client_id)
        click.echo(
            'Bucket name not specified. Using "{}".'.format(bucket_name))

    # Creates bucket for logs and models if it doesn't exist
    bucket = account.get_bucket(bucket_name)

    if not job_id:
        job_id = "train_{}".format(datetime.now().strftime("%Y%m%d_%H%M%S"))

    # Path in bucket to store job's config, logs, etc.
    # If we are resuming a previous job, then we will use the same path
    # that job used, so Luminoth will load the checkpoint from there.
    base_path = "lumi_{}".format(resume_job_id if resume_job_id else job_id)

    package_path = build_package(bucket, base_path)
    job_dir = "gs://{}/{}".format(bucket_name, base_path)

    override_params = [
        "train.job_dir={}".format(job_dir),
    ]

    if dataset:
        # Check if absolute or relative dataset path
        if not dataset.startswith("gs://"):
            dataset = "gs://{}".format(dataset)
        override_params.append("dataset.dir={}".format(dataset))

    # Even if we are resuming job, we will use a new config. Thus, we will
    # overwrite the config in the old job's dir if it existed.
    config = get_config(config_files, override_params=override_params)

    # Update final config file to job bucket
    config_path = "{}/{}".format(base_path, DEFAULT_CONFIG_FILENAME)
    upload_data(bucket, config_path, dump_config(config))

    args = ["--config", "{}/{}".format(job_dir, DEFAULT_CONFIG_FILENAME)]

    cloudml = account.cloud_service("ml")

    training_inputs = {
        "scaleTier": scale_tier,
        "packageUris": ["gs://{}/{}".format(bucket_name, package_path)],
        "pythonModule": "luminoth.train",
        "args": args,
        "region": region,
        "jobDir": job_dir,
        "runtimeVersion": RUNTIME_VERSION,
        "pythonVersion": PYTHON_VERSION,
    }

    if scale_tier == "CUSTOM":
        training_inputs["masterType"] = master_type
        if worker_count > 0:
            training_inputs["workerCount"] = worker_count
            training_inputs["workerType"] = worker_type

        if parameter_server_count > 0:
            training_inputs["parameterServerCount"] = parameter_server_count
            training_inputs["parameterServerType"] = parameter_server_type

    job_spec = {"jobId": job_id, "trainingInput": training_inputs}

    jobrequest = (cloudml.projects().jobs().create(body=job_spec,
                                                   parent="projects/{}".format(
                                                       account.project_id)))

    try:
        click.echo("Submitting training job.")
        res = jobrequest.execute()
        click.echo("Job submitted successfully.")
        click.echo("state = {}, createTime = {}".format(
            res.get("state"), res.get("createTime")))
        if resume_job_id:
            click.echo(
                "\nNote: this job is resuming job {}.\n".format(resume_job_id))
        click.echo("Job id: {}".format(job_id))
        click.echo("Job directory: {}".format(job_dir))

        save_run(config, environment="gcloud", extra_config=job_spec)

    except Exception as err:
        click.echo("There was an error creating the training job. "
                   "Check the details: \n{}".format(err._get_reason()))
def detect_tile_cell(slide_path, tile_position, csv_dict, args, it_kwargs):

    start_t = time.time()

    print('--- Loading Image...')
    # get slide tile source
    ts = large_image.getTileSource(slide_path)

    # get requested tile
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)

    # get tile image
    im_tile = tile_info['tile'][:, :, :3]
    t1 = time.time() - start_t
    csv_dict['Image Loading'].append(round(t1, 3))
    print('--- Finished Loading Image')

    cv2.imwrite('hey_im_tile.png', im_tile)

    # *******************************************************
    #
    # Perform cell detections
    #
    # ########################### DETECTION #################
    print('--- Performing cell detections...')

    config = get_config(CONFIG)
    if not args.max_det is None:
        config.model.rcnn.proposals.total_max_detections = args.max_det
    else:
        config.model.rcnn.proposals.total_max_detections = MAX_DET
    if not args.min_prob is None:
        config.model.rcnn.proposals.min_prob_threshold = args.min_prob
    else:
        config.model.rcnn.proposals.min_prob_threshold = MIN_PROB

    print('--- Currently Analysing Input Image Size : ', im_tile.shape)

    network = PredictorNetwork(config)
    objects = network.predict_image(im_tile)
    print('--- Finished Cell Detections')
    t2 = time.time() - start_t
    t22 = float(t2) - float(t1)
    csv_dict['Cell Detection'].append(round(t22, 3))

    print('***** Number of Detected Cells ****** : ', len(objects))

    #
    # Perform JSON loading
    #
    print('--- Performing Cell Crops loading...')
    im_tile_rgb = cv2.cvtColor(im_tile, cv2.COLOR_BGR2RGB)
    if not args.inputImageFile is None:
        _, images, ids = classify_cellscropping.\
            load_json(im_tile_rgb, preds=objects)
    else:
        _, images, ids = classify_cellscropping.\
            load_json(im_tile_rgb, preds=objects)
    print('--- Finished Cell Crops loading')
    t3 = time.time() - start_t
    t33 = float(t3) - float(t2)
    csv_dict['Cell Cropping'].append(round(t33, 3))
    csv_dict['Number of Cells'].append(len(ids))

    #       ########################### CLASSIFICATION #######################
    print('--- Performing Cell Classification...')
    try:
        final_json = classify_main.predict(images, ids, CKPT)
    except ValueError:
        final_json = []
        print(
            '!!!!! Can not Conduct Classification on 0 Number of Cells Detected !!!!!'
        )
    print('--- Finished Cell Classification')
    t4 = time.time() - start_t
    t44 = float(t4) - float(t3)
    csv_dict['Cell Classification'].append(round(t44, 3))

    # # Delete border nuclei
    # if args.ignore_border_nuclei is True:
    #     im_nuclei_seg_mask = htk_seg_label.delete_border(im_nuclei_seg_mask)

    # generate cell annotations
    cell_annot_list = cli_utils.create_tile_cell_annotations(
        final_json, tile_info, args.cell_annotation_format)
    t5 = time.time() - start_t
    t55 = float(t5) - float(t4)
    csv_dict['Annotation Writing'].append(round(t55, 3))

    return cell_annot_list, csv_dict
def eval(dataset_split, config_files, watch, from_global_step,
         override_params, files_per_class, iou_threshold, min_probability):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    if not config.train.job_dir:
        raise KeyError('`job_dir` should be set.')
    if not config.train.run_name:
        raise KeyError('`run_name` should be set.')

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    if config.model.network.with_rcnn:
        config.model.rcnn.proposals.min_prob_threshold = min_probability
    else:
        config.model.rpn.proposals.min_prob_threshold = min_probability

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(
                run_dir, last_global_step, last_only=not watch
            )
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a moment')
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                    iou_threshold=iou_threshold,
                    min_probability=min_probability
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a moment.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a moment')
        time.sleep(5)
Exemple #22
0
def create(config_files, override_params, entries):
    # Parse the entries passed as options.
    entries = parse_entries(entries)
    if entries is None:
        return

    click.echo('Creating checkpoint for given configuration...')
    # Get and build the configuration file for the model.
    config = get_config(config_files, override_params=override_params)

    # Retrieve the files for the last checkpoint available.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)
    ckpt = tf.train.get_checkpoint_state(run_dir)
    if not ckpt or not ckpt.all_model_checkpoint_paths:
        click.echo("Couldn't find checkpoint in '{}'.".format(run_dir))
        return

    last_checkpoint = sorted([{
        'global_step': int(path.split('-')[-1]),
        'file': path
    } for path in ckpt.all_model_checkpoint_paths],
                             key=lambda c: c['global_step'])[-1]['file']

    checkpoint_prefix = os.path.basename(last_checkpoint)
    checkpoint_paths = [
        os.path.join(run_dir, file) for file in os.listdir(run_dir)
        if file.startswith(checkpoint_prefix)
    ]

    # Find the `classes.json` file.
    classes_path = os.path.join(config.dataset.dir, 'classes.json')
    if not os.path.exists(classes_path):
        classes_path = None

    # Create an checkpoint_id to identify the checkpoint.
    checkpoint_id = str(uuid.uuid4()).replace('-', '')[:12]

    # Update the directory paths for the configuration file. Since it's going
    # to be packed into a single tar file, we set them to the current directoy.
    config.dataset.dir = '.'
    config.train.job_dir = '.'
    config.train.run_name = checkpoint_id

    # Create the directory that will contain the model.
    path = get_checkpoint_path(checkpoint_id)
    tf.gfile.MakeDirs(path)

    with open(os.path.join(path, 'config.yml'), 'w') as f:
        json.dump(config, f)

    # Add the checkpoint files.
    for checkpoint_path in checkpoint_paths:
        shutil.copy2(checkpoint_path, path)

    # Add `checkpoint` file to indicate where the checkpoint is located. We
    # need to create it manually instead of just copying as it may contain
    # absolute paths.
    with open(os.path.join(path, 'checkpoint'), 'w') as f:
        f.write("""
            model_checkpoint_path: "{0}"
            all_model_checkpoint_paths: "{0}"
            """.format(checkpoint_prefix))

    # Add the `classes.json` file. Also get the number of classes, if
    # available.
    num_classes = None
    if classes_path:
        shutil.copy2(classes_path, path)
        with open(classes_path) as f:
            num_classes = len(json.load(f))

    # Store the new checkpoint into the checkpoint index.
    metadata = {
        'id': checkpoint_id,
        'name': entries.get('name', ''),
        'description': entries.get('description', ''),
        'alias': entries.get('alias', ''),
        'model': config.model.type,
        'dataset': {
            'name':
            entries.get('dataset.name', ''),
            'num_classes': (num_classes
                            or entries.get('dataset.num_classes', None)),
        },
        'luminoth_version': lumi_version,
        'created_at': datetime.utcnow().isoformat(),
        'status': 'LOCAL',
        'source': 'local',
        'url': None,  # Only for remotes.
    }

    db = read_checkpoint_db()
    db['checkpoints'].append(metadata)
    save_checkpoint_db(db)

    click.echo('Checkpoint {} created successfully.'.format(checkpoint_id))
Exemple #23
0
def eval(
    dataset_split,
    config_files,
    watch,
    from_global_step,
    override_params,
    files_per_class,
    max_detections,
):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError("model.type should be set on the custom config.")

    if not config.train.job_dir:
        raise KeyError("`job_dir` should be set.")
    if not config.train.run_name:
        raise KeyError("`run_name` should be set.")

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == "debug"

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split

    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Attempt to get class names, if available.
    classes_file = os.path.join(config.dataset.dir, "classes.json")
    if tf.gfile.Exists(classes_file):
        class_labels = json.load(tf.gfile.GFile(classes_file))
    else:
        class_labels = None

    if config.model.type == "fasterrcnn":
        # Override max detections with specified value.
        if config.model.network.with_rcnn:
            config.model.rcnn.proposals.total_max_detections = max_detections
        else:
            config.model.rpn.proposals.post_nms_top_n = max_detections

        # Also overwrite `min_prob_threshold` in order to use all detections.
        config.model.rcnn.proposals.min_prob_threshold = 0.0
    elif config.model.type == "ssd":
        config.model.proposals.total_max_detections = max_detections
        config.model.proposals.min_prob_threshold = 0.0
    else:
        raise ValueError("Model type '{}' not supported".format(
            config.model.type))

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup.
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training.
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset["image"]
    train_objects = train_dataset["bboxes"]
    train_filename = train_dataset["filename"]

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.type == "ssd" or config.model.network.with_rcnn:
        pred = prediction_dict["classification_prediction"]
        pred_objects = pred["objects"]
        pred_objects_classes = pred["labels"]
        pred_objects_scores = pred["probs"]
    else:
        # Force the num_classes to 1.
        config.model.network.num_classes = 1

        pred = prediction_dict["rpn_prediction"]
        pred_objects = pred["proposals"]
        pred_objects_scores = pred["scores"]
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros((tf.shape(pred_objects_scores)[0], ),
                                        dtype=tf.int32)

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor,
            name=loss_name,
            metrics_collections="metrics",
            updates_collections="metric_ops",
        )
        full_loss_name = "{}_losses/{}".format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection("metric_ops")

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict.
    ops = {
        "init_op": init_op,
        "metric_ops": metric_ops,
        "pred_objects": pred_objects,
        "pred_objects_classes": pred_objects_classes,
        "pred_objects_scores": pred_objects_scores,
        "train_objects": train_objects,
        "losses": losses,
        "prediction_dict": prediction_dict,
        "filename": train_filename,
        "train_image": train_image,
    }

    metrics_scope = "{}_metrics".format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(run_dir,
                                          last_global_step,
                                          last_only=not watch)
        except ValueError as e:
            if not watch:
                tf.logging.error("Missing checkpoint.")
                raise e

            tf.logging.warning(
                "Missing checkpoint; Checking again in a moment")
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                "Evaluating global_step {} using checkpoint '{}'".format(
                    checkpoint["global_step"], checkpoint["file"]))
            try:
                start = time.time()
                evaluate_once(
                    config,
                    writer,
                    saver,
                    ops,
                    checkpoint,
                    class_labels=class_labels,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                )
                last_global_step = checkpoint["global_step"]
                tf.logging.info("Evaluated in {:.2f}s".format(time.time() -
                                                              start))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info("Checkpoint {} is not ready yet. "
                                "Checking again in a moment.".format(
                                    checkpoint["file"]))
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info("All checkpoints evaluated; sleeping for a moment")
        time.sleep(5)
Exemple #24
0
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs,
                       src_mu_lab=None, src_sigma_lab=None, debug=False):

    # =========================================================================
    # ======================= Tile Loading ====================================
    # =========================================================================
    print('\n>> Loading Tile ... \n')

    csv_dict = {}

    csv_dict['PreparationTime'] = []
    csv_dict['ColorDeconvTime'] = []
    csv_dict['TotalTileLoadingTime'] = []

    csv_dict['CKPTLoadingTime'] = []
    csv_dict['ModelInfernceTime'] = []
    csv_dict['DetectionTime'] = []

    csv_dict['ROIShape'] = []
    csv_dict['ObjectsDict'] = []
    csv_dict['NumObjects'] = []

    csv_dict['AnnotationWritingTime'] = []

    csv_dict['AnnotationDict'] = []
    csv_dict['AnalysisDict'] = []

    start_time = time.time()
    total_tileloading_start_time = time.time()

    ts = large_image.getTileSource(slide_path)
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)
    im_tile = tile_info['tile'][:, :, :3]
    csv_dict['ROIShape'] = im_tile.shape[:2]

    prep_time = time.time() - start_time
    csv_dict['PreparationTime'] = round(prep_time, 3)

    # =========================================================================
    # =================Img Normalization & Color Deconv========================
    # =========================================================================
    print('\n>> Color Deconvolving ... \n')
    start_time = time.time()

    im_nmzd = htk_cnorm.reinhard(
        im_tile,
        REFERENCE_MU_LAB,
        REFERENCE_STD_LAB,
        src_mu=src_mu_lab,
        src_sigma=src_sigma_lab
    )

    # perform color decovolution
    if args.deconv_method == 'ruifrok':

        w = cli_utils.get_stain_matrix(args)
        im_stains = htk_cdeconv.color_deconvolution(
            im_nmzd, w).Stains.astype(np.float)[:, :, :2]

    elif args.deconv_method == 'macenko':

        w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255)
        im_stains = htk_cdeconv.color_deconvolution(
            im_tile, w_est, 255).Stains.astype(np.float)
        ch1 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_1], w_est)
        ch2 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_2], w_est)
        im_stains = im_stains[:, :, [ch1, ch2]]

    else:

        raise ValueError('Invalid deconvolution method parameter.')

    # =========================================================================
    # ====================== Fuse the stain1 & stain2 pix======================
    # =========================================================================

    # compute nuclear foreground mask
    im_fgnd_mask_stain_1 = im_stains[
        :, :, 0] < threshold_yen(im_stains[:, :, 0])
    im_fgnd_mask_stain_2 = im_stains[
        :, :, 1] < threshold_yen(im_stains[:, :, 1])
    im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2

    # segment nuclei
    im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2))
    print('---> Fusing 2 Stains')
    deconv_time = time.time() - start_time
    csv_dict['ColorDeconvTime'] = round(deconv_time, 3)

    # =========================================================================
    # ================= Nuclie Detection Deep Learning Block ==================
    # =========================================================================

    total_tileloading_time = time.time() - total_tileloading_start_time
    csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3)

    start_time = time.time()

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = args.max_det
    config.model.rcnn.proposals.min_prob_threshold = args.min_prob
    im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1)

    # ====================================================================================================================================
    tf.reset_default_graph()

    dataset_class = get_dataset('object_detection')
    model_class = get_model('fasterrcnn')
    dataset = dataset_class(config)
    model = model_class(config)

    graph = tf.Graph()
    session = tf.Session(graph=graph)

    with graph.as_default():
        image_placeholder = tf.placeholder(
            tf.float32, (None, None, 3), name='Input_Placeholder'
        )
        pred_dict = model(image_placeholder)

        ckpt_loading_start_time = time.time()

        saver = tf.train.Saver(sharded=True, allow_empty=True)
        saver.restore(session, CKPT_DIR)
        tf.logging.info('Loaded checkpoint.')

        ckpt_loading_time = time.time() - ckpt_loading_start_time
        csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3)

        inference_start_time = time.time()

        cls_prediction = pred_dict['classification_prediction']
        objects_tf = cls_prediction['objects']
        objects_labels_tf = cls_prediction['labels']
        objects_labels_prob_tf = cls_prediction['probs']

        fetches = {
            'objects': objects_tf,
            'labels': objects_labels_tf,
            'probs': objects_labels_prob_tf,
        }

        fetched = session.run(fetches, feed_dict={
            image_placeholder: np.array(im_nuc_det_input)
        })

        inference_time = time.time() - inference_start_time
        csv_dict['ModelInfernceTime'] = round(inference_time, 3)

        objects = fetched['objects']
        labels = fetched['labels'].tolist()
        probs = fetched['probs'].tolist()

        # Cast to int to consistently return the same type in Python 2 and 3
        objects = [
            [int(round(coord)) for coord in obj]
            for obj in objects.tolist()
        ]

        predictions = sorted([
            {
                'bbox': obj,
                'label': label,
                'prob': round(prob, 4),
            } for obj, label, prob in zip(objects, labels, probs)
        ], key=lambda x: x['prob'], reverse=True)

    print('\n>> Finishing Detection ... \n')
    print('***** Number of Detected Cells ****** : ', len(predictions))
    detection_time = time.time() - start_time
    csv_dict['DetectionTime'] = round(detection_time, 3)
    csv_dict['NumObjects'] = len(predictions)
    csv_dict['ObjectsDict'] = predictions

    # =========================================================================
    # ======================= TODO: Implement border deletion =================
    # =========================================================================

    # =========================================================================
    # ======================= Write Annotations ===============================
    # =========================================================================

    start_time = time.time()

    objects_df = pd.DataFrame(objects)
    formatted_annot_list,\
        formatter_analysis_list = cli_utils.convert_preds_to_utilformat(
            objects_df,
            probs,
            args.ignore_border_nuclei,
            im_tile_size=args.analysis_tile_size)

    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        formatted_annot_list, tile_info, args.nuclei_annotation_format)
    csv_dict['AnnotationDict'] = nuclei_annot_list

    csv_dict['AnalysisDict'] = formatter_analysis_list

    num_nuclei = len(nuclei_annot_list)

    anot_time = time.time() - start_time
    csv_dict['AnnotationWritingTime'] = round(anot_time, 3)

    return csv_dict
Exemple #25
0
def evaluate(dataset_split, config_files, job_dir, watch,
             from_global_step, override_params, files_per_class):
    """
    Evaluate models using dataset.
    """
    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    config.train.job_dir = job_dir or config.train.job_dir

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(
        train_image, train_objects
    )

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(config.train.job_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(config, last_global_step)
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a minute')
            time.sleep(60)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a minute.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(60)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a minute and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a minute')
        time.sleep(60)
Exemple #26
0
def train(job_id, service_account_json, bucket_name, region, config_files,
          dataset, scale_tier, master_type, worker_type, worker_count,
          parameter_server_type, parameter_server_count):

    project_id = get_project_id(service_account_json)
    if project_id is None:
        raise ValueError(
            'Missing "project_id" in service_account_json "{}"'.format(
                service_account_json))

    if bucket_name is None:
        client_id = get_client_id(service_account_json)
        bucket_name = 'luminoth-{}'.format(client_id)
        click.echo(
            'Bucket name not specified. Using "{}".'.format(bucket_name))

    credentials = get_credentials(service_account_json)
    validate_region(region, project_id, credentials)

    # Creates bucket for logs and models if it doesn't exist
    bucket = get_bucket(service_account_json, bucket_name)

    if not job_id:
        job_id = 'train_{}'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))

    # Define path in bucket to store job's config, logs, etc.
    base_path = 'lumi_{}'.format(job_id)

    package_path = build_package(bucket, base_path)
    job_dir = 'gs://{}/{}/'.format(bucket_name, base_path)

    override_params = [
        'train.job_dir={}'.format(job_dir),
    ]

    if dataset:
        # Check if absolute or relative dataset path
        if not dataset.startswith('gs://'):
            dataset = 'gs://{}'.format(dataset)
        override_params.append('dataset.dir={}'.format(dataset))

    config = get_config(config_files, override_params=override_params)
    # We should validate config before submitting job

    # Update final config file to job bucket
    config_path = os.path.join(base_path, DEFAULT_CONFIG_FILENAME)
    upload_data(bucket, config_path, dump_config(config))

    args = ['--config', os.path.join(job_dir, DEFAULT_CONFIG_FILENAME)]

    cloudml = cloud_service(credentials, 'ml')

    training_inputs = {
        'scaleTier': scale_tier,
        'packageUris': [
            'gs://{}/{}'.format(bucket_name, package_path)
        ],
        'pythonModule': 'luminoth.train',
        'args': args,
        'region': region,
        'jobDir': job_dir,
        'runtimeVersion': RUNTIME_VERSION,
    }

    if scale_tier == 'CUSTOM':
        training_inputs['masterType'] = master_type
        if worker_count > 0:
            training_inputs['workerCount'] = worker_count
            training_inputs['workerType'] = worker_type

        if parameter_server_count > 0:
            training_inputs['parameterServerCount'] = parameter_server_count
            training_inputs['parameterServerType'] = parameter_server_type

    job_spec = {
        'jobId': job_id,
        'trainingInput': training_inputs
    }

    jobrequest = cloudml.projects().jobs().create(
        body=job_spec, parent='projects/{}'.format(project_id))

    try:
        click.echo('Submitting training job.')
        res = jobrequest.execute()
        click.echo('Job {} submitted successfully.'.format(job_id))
        click.echo('state = {}, createTime = {}'.format(
            res.get('state'), res.get('createTime')))

        save_run(config, environment='gcloud', extra_config=job_spec)

    except Exception as err:
        click.echo(
            'There was an error creating the training job. '
            'Check the details: \n{}'.format(err._get_reason())
        )
Exemple #27
0
def create(config_files, override_params, entries):
    # Parse the entries passed as options.
    entries = parse_entries(entries)
    if entries is None:
        return

    click.echo("Creating checkpoint for given configuration...")
    # Get and build the configuration file for the model.
    config = get_config(config_files, override_params=override_params)

    # Retrieve the files for the last checkpoint available.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)
    ckpt = tf.train.get_checkpoint_state(run_dir)
    if not ckpt or not ckpt.all_model_checkpoint_paths:
        click.echo("Couldn't find checkpoint in '{}'.".format(run_dir))
        return

    last_checkpoint = sorted(
        [{
            "global_step": int(path.split("-")[-1]),
            "file": path
        } for path in ckpt.all_model_checkpoint_paths],
        key=lambda c: c["global_step"],
    )[-1]["file"]

    checkpoint_prefix = os.path.basename(last_checkpoint)
    checkpoint_paths = [
        os.path.join(run_dir, file) for file in os.listdir(run_dir)
        if file.startswith(checkpoint_prefix)
    ]

    # Find the `classes.json` file.
    classes_path = os.path.join(config.dataset.dir, "classes.json")
    if not os.path.exists(classes_path):
        classes_path = None

    # Create an checkpoint_id to identify the checkpoint.
    checkpoint_id = str(uuid.uuid4()).replace("-", "")[:12]

    # Update the directory paths for the configuration file. Since it's going
    # to be packed into a single tar file, we set them to the current directoy.
    config.dataset.dir = "."
    config.train.job_dir = "."
    config.train.run_name = checkpoint_id

    # Create the directory that will contain the model.
    path = get_checkpoint_path(checkpoint_id)
    tf.gfile.MakeDirs(path)

    with open(os.path.join(path, "config.yml"), "w") as f:
        json.dump(config, f)

    # Add the checkpoint files.
    for checkpoint_path in checkpoint_paths:
        shutil.copy2(checkpoint_path, path)

    # Add `checkpoint` file to indicate where the checkpoint is located. We
    # need to create it manually instead of just copying as it may contain
    # absolute paths.
    with open(os.path.join(path, "checkpoint"), "w") as f:
        f.write("""
            model_checkpoint_path: "{0}"
            all_model_checkpoint_paths: "{0}"
            """.format(checkpoint_prefix))

    # Add the `classes.json` file. Also get the number of classes, if
    # available.
    num_classes = None
    if classes_path:
        shutil.copy2(classes_path, path)
        with open(classes_path) as f:
            num_classes = len(json.load(f))

    # Store the new checkpoint into the checkpoint index.
    metadata = {
        "id": checkpoint_id,
        "name": entries.get("name", ""),
        "description": entries.get("description", ""),
        "alias": entries.get("alias", ""),
        "model": config.model.type,
        "dataset": {
            "name":
            entries.get("dataset.name", ""),
            "num_classes": (num_classes
                            or entries.get("dataset.num_classes", None)),
        },
        "luminoth_version": lumi_version,
        "created_at": datetime.utcnow().isoformat(),
        "status": "LOCAL",
        "source": "local",
        "url": None,  # Only for remotes.
    }

    db = read_checkpoint_db()
    db["checkpoints"].append(metadata)
    save_checkpoint_db(db)

    click.echo("Checkpoint {} created successfully.".format(checkpoint_id))
def predict(path_or_dir, config_files, checkpoint, override_params,
            output_path, save_media_to, min_prob, max_detections, only_class,
            ignore_class, debug):
    """Obtain a model's predictions.

    Receives either `config_files` or `checkpoint` in order to load the correct
    model. Afterwards, runs the model through the inputs specified by
    `path-or-dir`, returning predictions according to the format specified by
    `output`.

    Additional model behavior may be modified with `min-prob`, `only-class` and
    `ignore-class`.
    """
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.ERROR)

    if only_class and ignore_class:
        click.echo(
            "Only one of `only-class` or `ignore-class` may be specified.")
        return

    # Process the input and get the actual files to predict.
    files = resolve_files(path_or_dir)
    if not files:
        error = 'No files to predict found. Accepted formats are: {}.'.format(
            ', '.join(IMAGE_FORMATS + VIDEO_FORMATS))
        click.echo(error)
        return
    else:
        click.echo('Found {} files to predict.'.format(len(files)))

    # Build the `Formatter` based on the outputs, which automatically writes
    # the formatted output to all the requested output files.
    if output_path == '-':
        output = sys.stdout
    else:
        output = open(output_path, 'w')

    # Create `save_media_to` if specified and it doesn't exist.
    if save_media_to:
        tf.gfile.MakeDirs(save_media_to)

    # Resolve the config to use and initialize the model.
    if checkpoint:
        config = get_checkpoint_config(checkpoint)
    elif config_files:
        config = get_config(config_files)
    else:
        click.echo(
            'Neither checkpoint not config specified, assuming `accurate`.')
        config = get_checkpoint_config('accurate')

    if override_params:
        config = override_config_params(config, override_params)

    # Filter bounding boxes according to `min_prob` and `max_detections`.
    if config.model.type == 'fasterrcnn':
        if config.model.network.with_rcnn:
            config.model.rcnn.proposals.total_max_detections = max_detections
        else:
            config.model.rpn.proposals.post_nms_top_n = max_detections
        config.model.rcnn.proposals.min_prob_threshold = min_prob
    elif config.model.type == 'ssd':
        config.model.proposals.total_max_detections = max_detections
        config.model.proposals.min_prob_threshold = min_prob
    else:
        raise ValueError("Model type '{}' not supported".format(
            config.model.type))

    # Instantiate the model indicated by the config.
    network = PredictorNetwork(config)

    # Iterate over files and run the model on each.
    for file in files:

        # Get the media output path, if media storage is requested.
        save_path = os.path.join(save_media_to, 'pred_{}'.format(
            os.path.basename(file))) if save_media_to else None

        file_type = get_file_type(file)
        predictor = predict_image if file_type == 'image' else predict_video

        objects = predictor(
            network,
            file,
            only_classes=only_class,
            ignore_classes=ignore_class,
            save_path=save_path,
        )

        # TODO: Not writing jsons for video files for now.
        if objects is not None and file_type == 'image':
            output.write(
                json.dumps({
                    'file': file,
                    'objects': objects,
                }) + '\n')

    output.close()
Exemple #29
0
def train(job_id, bucket_name, region, config_files, dataset, scale_tier,
          master_type, worker_type, worker_count, parameter_server_type,
          parameter_server_count):
    account = ServiceAccount()
    account.validate_region(region)

    if bucket_name is None:
        bucket_name = 'luminoth-{}'.formata(account.client_id)
        click.echo(
            'Bucket name not specified. Using "{}".'.format(bucket_name))

    # Creates bucket for logs and models if it doesn't exist
    bucket = account.get_bucket(bucket_name)

    if not job_id:
        job_id = 'train_{}'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))

    # Define path in bucket to store job's config, logs, etc.
    base_path = 'lumi_{}'.format(job_id)

    package_path = build_package(bucket, base_path)
    job_dir = 'gs://{}/{}'.format(bucket_name, base_path)

    override_params = [
        'train.job_dir={}'.format(job_dir),
    ]

    if dataset:
        # Check if absolute or relative dataset path
        if not dataset.startswith('gs://'):
            dataset = 'gs://{}'.format(dataset)
        override_params.append('dataset.dir={}'.format(dataset))

    config = get_config(config_files, override_params=override_params)

    # Update final config file to job bucket
    config_path = '{}/{}'.format(base_path, DEFAULT_CONFIG_FILENAME)
    upload_data(bucket, config_path, dump_config(config))

    args = ['--config', '{}/{}'.format(job_dir, DEFAULT_CONFIG_FILENAME)]

    cloudml = account.cloud_service('ml')

    training_inputs = {
        'scaleTier': scale_tier,
        'packageUris': ['gs://{}/{}'.format(bucket_name, package_path)],
        'pythonModule': 'luminoth.train',
        'args': args,
        'region': region,
        'jobDir': job_dir,
        'runtimeVersion': RUNTIME_VERSION
    }

    if scale_tier == 'CUSTOM':
        training_inputs['masterType'] = master_type
        if worker_count > 0:
            training_inputs['workerCount'] = worker_count
            training_inputs['workerType'] = worker_type

        if parameter_server_count > 0:
            training_inputs['parameterServerCount'] = parameter_server_count
            training_inputs['parameterServerType'] = parameter_server_type

    job_spec = {'jobId': job_id, 'trainingInput': training_inputs}

    jobrequest = cloudml.projects().jobs().create(body=job_spec,
                                                  parent='projects/{}'.format(
                                                      account.project_id))

    try:
        click.echo('Submitting training job.')
        res = jobrequest.execute()
        click.echo('Job submitted successfully.')
        click.echo('state = {}, createTime = {}'.format(
            res.get('state'), res.get('createTime')))
        click.echo('\nJob id: {}'.format(job_id))

        save_run(config, environment='gcloud', extra_config=job_spec)

    except Exception as err:
        click.echo('There was an error creating the training job. '
                   'Check the details: \n{}'.format(err._get_reason()))
Exemple #30
0
def predict(path_or_dir, config_files, checkpoint, override_params, output_dir,
            save, min_prob, ignore_classes, debug):
    if debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Get file paths
    if tf.gfile.IsDirectory(path_or_dir):
        file_paths = [
            os.path.join(path_or_dir, f)
            for f in tf.gfile.ListDirectory(path_or_dir)
            if get_filetype(f) in ('image', 'video')
        ]
    else:
        if get_filetype(path_or_dir) in ('image', 'video'):
            file_paths = [path_or_dir]
        else:
            file_paths = []

    errors = 0
    successes = 0
    created_files_paths = []
    total_files = len(file_paths)
    if total_files == 0:
        no_files_message = ("No images or videos found. "
                            "Accepted formats -> Image: {} - Video: {}")
        tf.logging.error(no_files_message.format(IMAGE_FORMATS, VIDEO_FORMATS))
        exit()

    # Resolve the config to use and initialize the mdoel.
    if checkpoint:
        config = get_checkpoint_config(checkpoint)
    elif config_files:
        config = get_config(config_files)
    else:
        click.echo('You must specify either a checkpoint or a config file.')
        exit()

    if override_params:
        config = override_config_params(config, override_params)

    network = PredictorNetwork(config)

    # Create output_dir if it doesn't exist
    if output_dir:
        tf.gfile.MakeDirs(output_dir)

    tf.logging.info('Getting predictions for {} files'.format(total_files))

    # Iterate over file paths
    for file_path in file_paths:

        save_path = 'pred_' + os.path.basename(file_path)
        if output_dir:
            save_path = os.path.join(output_dir, save_path)

        if get_filetype(file_path) == 'image':
            click.echo('Predicting {}...'.format(file_path))
            with tf.gfile.Open(file_path, 'rb') as f:
                try:
                    image = Image.open(f).convert('RGB')
                except (tf.errors.OutOfRangeError, OSError) as e:
                    tf.logging.warning('Error: {}'.format(e))
                    tf.logging.warning("Couldn't open: {}".format(file_path))
                    errors += 1
                    continue

            # Run image through network
            prediction = network.predict_image(image)
            successes += 1

            # Filter results if required by user
            if ignore_classes:
                prediction = filter_classes(prediction, ignore_classes)

            # Save prediction json file
            with open(save_path + '.json', 'w') as outfile:
                json.dump(prediction, outfile)
            created_files_paths.append(save_path + '.json')

            # Save predicted image
            if save:
                with tf.gfile.Open(file_path, 'rb') as im_file:
                    image = Image.open(im_file)
                    draw_bboxes_on_image(image, prediction, min_prob)
                    image.save(save_path)
                created_files_paths.append(save_path)

        elif get_filetype(file_path) == 'video':
            # NOTE: We'll hardcode the video ouput to mp4 for the time being
            save_path = os.path.splitext(save_path)[0] + '.mp4'
            try:
                writer = skvideo.io.FFmpegWriter(save_path)
            except AssertionError as e:
                tf.logging.error(e)
                tf.logging.error(
                    "Please install ffmpeg before making video predictions.")
                exit()
            num_of_frames = int(
                skvideo.io.ffprobe(file_path)['video']['@nb_frames'])
            video_progress_bar = click.progressbar(
                skvideo.io.vreader(file_path),
                length=num_of_frames,
                label='Predicting {}'.format(file_path))
            with video_progress_bar as bar:
                try:
                    for frame in bar:
                        # Run image through network
                        prediction = network.predict_image(frame)

                        # Filter results if required by user
                        if ignore_classes:
                            prediction = filter_classes(
                                prediction, ignore_classes)

                        image = Image.fromarray(frame)
                        draw_bboxes_on_image(image, prediction, min_prob)
                        writer.writeFrame(np.array(image))
                except RuntimeError as e:
                    click.echo()  # Error prints next to progress-bar if not
                    tf.logging.error('Error: {}'.format(e))
                    tf.logging.error('Corrupt videofile: {}'.format(file_path))
                    tf.logging.error(
                        'Partially processed video file saved in {}'.format(
                            save_path))
                    errors += 1

            writer.close()
            created_files_paths.append(save_path)

        else:
            tf.logging.warning("{} isn't an image/video".format(file_path))

    # Generate logs
    tf.logging.info("Created the following files: {}".format(
        ', '.join(created_files_paths)))

    if errors:
        tf.logging.warning('{} errors.'.format(errors))