Ejemplo n.º 1
0
def run(sources, threads, prefetch, batch, steps, epochs, verbose, seed):
    """
    sources: list of tf-record files
    threads: number of concurrent read pipelines
    prefetch: reader prefetch count
    batch: batch size
    steps: number of steps per epoch
    epochs: number of epochs
    verbose: verbose output
    """

    reset_tensorflow(seed)

    datasets = [
        make_tfr_dataset(source,
                         threads=threads,
                         read_rgb=False,
                         read_mask=False,
                         read_weight=False) for source in sources
    ]

    items = list()
    with tf.device('/cpu:0'):
        for epoch in range(epochs):
            for dataset in datasets:
                input_fn = make_tfr_input_fn(dataset,
                                             threads=threads,
                                             offset=max(
                                                 0, epoch * steps * batch),
                                             limit=max(0, steps * batch),
                                             shuffle=max(0, steps * batch),
                                             prefetch=prefetch,
                                             batch=batch,
                                             repeat=-1)
                with tf.Session() as session:
                    try:
                        it = input_fn()
                        batch_counter = 0
                        while batch_counter < steps:
                            features, label = session.run(it)
                            names = features['name']
                            variants = features['variant']
                            for i in range(names.shape[0]):
                                key = '%s-%s' % (str(names[i], 'utf-8'),
                                                 str(variants[i], 'utf-8'))
                                items.append(key)
                            batch_counter += 1
                    except tf.errors.DataLossError as e:
                        logger.warn(e)
                        pass
                    except tf.errors.OutOfRangeError:
                        pass

    logger.info("ORDER: %d items", len(items))
    for i in range(len(items)):
        print("%05d. %s" % (i, items[i]))
Ejemplo n.º 2
0
def run(sources):
    """
    sources: list of tf-record files
    """

    key = None
    for source in sources:
        logger.info(source)
        dataset = make_tfr_dataset(source)

        reset_tensorflow()
        step = 0
        with tf.Session() as session:
            with tf.device('/cpu:0'):
                it = dataset().make_one_shot_iterator().get_next()
                while key != ord('q'):
                    item = session.run(it)
                    step += 1

                    name = str(item['name'], 'utf-8')
                    rgb = cv2.cvtColor(item['rgb'], cv2.COLOR_RGB2BGR)
                    mask = item['mask']
                    weight = item['weight']
                    result = resizemax(
                        np.concatenate(
                            [
                                rgb,
                                cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR),
                                tobytes(cv2.cvtColor(weight, cv2.COLOR_GRAY2BGR) / 255)
                                # tobytes(mix(tofloats(cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)), 0.5 + 0.5 * tofloats(rgb))),
                                # tobytes(mix(cv2.cvtColor(weight, cv2.COLOR_GRAY2BGR).astype(np.float32) / 65535.0, 0.5 + 0.5 * tofloats(rgb)))
                            ],
                            axis=0
                        ),
                        1024
                    )
                    cv2.putText(result, '%d' % step, (result.shape[1]-50, result.shape[0]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))
                    cv2.putText(result, name, (5, result.shape[0]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

                    cv2.imshow('cv2: preview', result)

                    key = cv2.waitKey()
                    while key != ord('q') and key != ord('c'):
                        key = cv2.waitKey()
Ejemplo n.º 3
0
def run(device, model, sources, threads, prefetch, batch, steps, verbose,
        seed):
    """
    Evaluation task implementation.
    """

    logger.info("Running graph on %s", device)

    # reset tensorflow
    reset_tensorflow(seed)

    # read parameters
    paramsfile = Path(model) / "parameters.json"
    if paramsfile.exists():
        params = json.loads(paramsfile.read_text())
    else:
        raise Exception("Model parameters.json not found")
    classes = params.get('classes', [0.0, 1.0])

    # read previous evals
    evalsfile = Path(model) / "evals.json"
    if evalsfile.exists():
        evalslogs = json.loads(evalsfile.read_text())
    else:
        evalslogs = list()

    # read topology file
    logger.info("Creating model topology V%d (%s)",
                params.get('topology_version', 1), params.get('topology'))
    topologyfile = Path(model) / "topology.yaml"
    if not topologyfile.exists():
        raise Exception("Model topology.yaml not found")
    topology = resolve_topology(params.get('topology_version', 1))(
        topologyfile, params.get("initializer", 'none'),
        params.get("regularizer", 'none'), params.get("constraint", 'none'),
        params.get("activation", 'none'),
        params.get("local_response_normalization", 0),
        params.get("batch_normalization", False),
        params.get("dropout_rate", 0.0))

    # create tensorflow estimator
    estimator = tf.estimator.Estimator(
        model_dir=model,
        model_fn=topology_cnn_model(topology, verbose),
        params=params,
        config=tf.estimator.RunConfig(tf_random_seed=seed,
                                      device_fn=lambda op: device))
    logger.debug("Using checkpoint: %s", estimator.latest_checkpoint())

    # evaluate datasets using model
    for source in sources:
        eval_start_time = time.time()

        logger.info("Starting evaluation: %s (steps=%d, batch=%d)", source,
                    steps, batch)

        results = estimator.evaluate(input_fn=make_tfr_input_fn(
            make_tfr_dataset(source, threads=threads),
            threads=threads,
            limit=max(0, steps * batch),
            prefetch=prefetch,
            batch=batch))

        for i in range(len(classes)):
            logger.info("Class %d: precision=%02.02f%%, recall=%02.02f%%", i,
                        results['precision_c%d' % i] * 100,
                        results['recall_c%d' % i] * 100)
        logger.info("Evaluation completed: loss=%f, jaccard=%f (%s)",
                    results['loss'], results['jaccard'], source)

        # write eval logs
        evalslogs.append({
            'dataset':
            source,
            'checkpoint':
            Path(estimator.latest_checkpoint()).name,
            'duration':
            int(time.time() - eval_start_time),
            'loss':
            float(results['loss']),
            'jaccard':
            float(results['jaccard']),
            'precision':
            [float(results['precision_c%d' % i]) for i in range(len(classes))],
            'recall':
            [float(results['recall_c%d' % i]) for i in range(len(classes))],
            'f1': [
                f1_score(results['precision_c%d' % i],
                         results['recall_c%d' % i])
                for i in range(len(classes))
            ],
        })
        evalsfile.write_text(json.dumps(evalslogs, indent=' '))
Ejemplo n.º 4
0
def run(device, name, sources, compression, batch, test_ratio, eval_ratio,
        size, blur_radius, blur_scale, center_crop, scales, flips, rotations,
        crops, brightness, contrast, gaussian_noise, uniform_noise, seed):
    """
    device: tensorflow device
    name: tf-record prefix
    sources: list of input image folders
    compression: tf-record compression
    batch: image pipeline size
    test_ratio: percentage of final test instances in the total set
    eval_ratio: percentage of evaluation instances in the training set
    size: target image size (width and height)
    blur_radius: gaussian blur radius for mask/weight images
    blur_scale: gaussian blur scale factor for mask/weight images
    center_crop: source images center crop percentage
    scales: list of scale factors
    flips: list of flip modes
    rotations: list of rotations
    crops: number of crops/variations to create for each configuration
    brightness: random brightness factor
    contrast: random contrast factor
    brightness: random brightness factor
    gaussian_noise: gaussian noise standard deviation
    uniform_noise: uniform noise magnitude
    """

    if compression == 'none':
        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.NONE)
        ext = 'tfr'
    elif compression == 'zlib':
        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.ZLIB)
        ext = 'tfr.zlib'
    elif compression == 'gzip':
        options = tf.python_io.TFRecordOptions(
            tf.python_io.TFRecordCompressionType.GZIP)
        ext = 'tfr.gz'
    else:
        raise Exception("Unsupported compression format (%s)" % compression)
    batch = max(1, batch)
    test_ratio = max(0.0, min(1.0, test_ratio))
    eval_ratio = max(0.0, min(1.0, eval_ratio))
    size = int(max(1, size))
    blur_radius = int(max(0, min(size / 2, blur_radius)))
    center_crop = max(0.0, min(1.0, center_crop))
    blur_scale = max(0.0, min(1.0, blur_scale))
    scales = [max(0.0, min(1.0, scale)) for scale in scales]
    rotations = [max(-180, min(180, rotation)) for rotation in rotations]
    crops = int(max(1, crops))
    brightness = max(0.0, min(1.0, brightness))
    contrast = max(0.0, min(1.0, contrast))
    gaussian_noise = max(0.0, gaussian_noise)
    uniform_noise = max(0.0, min(1.0, uniform_noise))

    # find samples
    items = list()
    logger.info('Finding input files...')
    for source in sources:
        path = Path(source)
        bakemeta = path / "bake.json"
        if not bakemeta.exists():
            meta_items = list()
            listing = path / "items.json"
            for (filename, meta) in json.loads(listing.read_text()).items():
                if not meta['active']:
                    continue

                rgb = path / filename
                mask = rgb.parent / (rgb.stem + '.mask')
                weight = rgb.parent / (rgb.stem + '.weight')

                width, height = image_size(rgb)
                assert width >= size and height >= size
                assert image_channels(rgb) == 3

                if not mask.exists():
                    imask = np.zeros([height, width], dtype=np.uint8)
                    cv2.imwrite(str(mask) + '.png', imask)
                    Path(str(mask) + '.png').replace(mask)
                assert (width, height) == image_size(mask)
                assert image_channels(mask) == 1

                if not weight.exists():
                    iweight = 255 * np.ones([height, width], dtype=np.uint8)
                    cv2.imwrite(str(weight) + '.png', iweight)
                    Path(str(weight) + '.png').replace(weight)
                assert (width, height) == image_size(weight)
                assert image_channels(weight) == 1

                meta_items.append({
                    'name': rgb.stem,
                    'orientation': image_orientation(rgb),
                    'width': width,
                    'height': height,
                    'rgbpath': str(rgb),
                    'maskpath': str(mask),
                    'weightpath': str(weight),
                })
            random.shuffle(meta_items)
            bakemeta.write_text(json.dumps(meta_items, indent=' '))
        items += json.loads(bakemeta.read_text())

    # select test set
    i = int(len(items) * (1.0 - test_ratio))
    remaining_items = items[0:i]
    test_items = items[i:]

    # select train/eval sets
    i = int(len(remaining_items) * (1.0 - eval_ratio))
    train_items = remaining_items[0:i]
    eval_items = remaining_items[i:]

    # save partitioning
    if len(train_items) > 0:
        with io.open('%s-%dx%d-train.json' % (name, size, size), "w") as f:
            json.dump(train_items, f, indent=' ')
    if len(eval_items) > 0:
        with io.open('%s-%dx%d-eval.json' % (name, size, size), "w") as f:
            json.dump(eval_items, f, indent=' ')
    if len(test_items) > 0:
        with io.open('%s-%dx%d-test.json' % (name, size, size), "w") as f:
            json.dump(test_items, f, indent=' ')

    # generate tf-records
    def bake_records(session, writer, inputs, graphs):
        feeds = dict()
        for i in range(len(inputs)):
            if i > 0 and i % batch == 0:
                logger.debug('%.02f %%', 100.0 * i / len(inputs))
                write_records(writer, session.run(graphs, feeds))
            k = i % batch
            for (key, value) in inputs[i].items():
                feeds[graphs[k][key]] = value
        k = len(inputs) % batch
        if k == 0:
            k = batch
        logger.debug('%.02f %%', 100)
        write_records(writer, session.run(graphs[0:k], feeds))

    # show generation statistics
    start_time = time.time()
    n = len(scales) * len(flips) * len(rotations)
    m = crops

    logger.info('Generating %d variants (%d train, %d eval, %d test)...',
                n * m * len(items), n * m * len(train_items),
                n * m * len(eval_items), n * m * len(test_items))

    train_writer = tf.python_io.TFRecordWriter(
        '%s-%dx%d-train.%s' %
        (name, size, size, ext), options) if len(train_items) > 0 else None
    eval_writer = tf.python_io.TFRecordWriter(
        '%s-%dx%d-eval.%s' %
        (name, size, size, ext), options) if len(eval_items) > 0 else None
    test_writer = tf.python_io.TFRecordWriter(
        '%s-%dx%d-test.%s' %
        (name, size, size, ext), options) if len(test_items) > 0 else None
    try:
        i = 0
        for scale in scales:
            for flip in flips:
                for rotation in rotations:
                    if i > 0:
                        elapsed_time = (time.time() - start_time) / 60
                        total_time = elapsed_time * (n / i)
                        remaining_time = total_time - elapsed_time
                        logger.info(
                            '[%d / %d] scale=%f, flip=%s, rotation=%f (remaining=%d[min], total=%d[min])',
                            i + 1, n, scale, flip, rotation, remaining_time,
                            total_time)
                    else:
                        logger.info('[%d / %d] scale=%f, flip=%s, rotation=%f',
                                    i + 1, n, scale, flip, rotation)
                    i += 1

                    reset_tensorflow(seed)
                    with tf.Session() as session:
                        with tf.device(device):
                            # build tf graph
                            logger.info('building tf graphs...')
                            graphs = build_graphs(batch, size, blur_radius,
                                                  blur_scale, center_crop,
                                                  scale, flip, rotation, crops,
                                                  brightness, contrast,
                                                  gaussian_noise,
                                                  uniform_noise)
                            # with io.open('%s-%dx%d.graph' % (name, size, size), 'w') as f:
                            #     f.write(str(tf.get_default_graph().as_graph_def()))

                            # run tf graph
                            if len(train_items) > 0:
                                logger.info('processing samples (train)...')
                                bake_records(session, train_writer,
                                             train_items, graphs)
                            if len(eval_items) > 0:
                                logger.info('processing samples (eval)...')
                                bake_records(session, eval_writer, eval_items,
                                             graphs)
                            if len(test_items) > 0:
                                logger.info('processing samples (test)...')
                                bake_records(session, test_writer, test_items,
                                             graphs)
                    seed = seed * 11113 + seed
    finally:
        if train_writer is not None:
            train_writer.close()
        if eval_writer is not None:
            eval_writer.close()
        if test_writer is not None:
            test_writer.close()
Ejemplo n.º 5
0
def run(device, model, topology, topology_version, eval_source, train_sources,
        threads, prefetch, batch, steps, epochs, ram_cache, initializer,
        regularizer, constraint, activation, batch_normalization,
        local_response_normalization, dropout_rate, label_weighting, loss,
        optimizer, learning_rate, learning_rate_decay, verbose, seed):
    """
    Training task implementation.
    """

    classes = [0.0, 1.0]
    delta_loss_threshold = 0.01
    max_stable_loss_counter = 5
    max_overfit_loss_counter = 2
    start_time = time.time()

    logger.info("Running graph on %s", device)

    # reset tensorflow
    reset_tensorflow(seed)

    # create model directory
    Path(model).mkdir(parents=True, exist_ok=True)

    # copy topology
    topologyfile = Path(model) / "topology.yaml"
    if not topologyfile.exists():
        logger.info("Topology copied from %s", topology)
        topologyfile.write_text(Path(topology).read_text())

    # read/update/store parameters
    paramsfile = Path(model) / "parameters.json"
    params = dict({
        "batch_size": batch,
        "steps": steps,
        "epochs": epochs,
        "seed": seed,
        "classes": classes,
        "topology": topology,
        "topology_version": topology_version,
        "optimizer": optimizer,
        "learning_rate": learning_rate,
        "learning_rate_decay": learning_rate_decay,
        "loss": loss,
        "activation": activation,
        "local_response_normalization": local_response_normalization,
        "batch_normalization": batch_normalization,
        "dropout_rate": dropout_rate,
        "initializer": initializer,
        "regularizer": regularizer,
        "constraint": constraint,
        "label_weighting": label_weighting,
    })
    if paramsfile.exists():
        oldparams = json.loads(paramsfile.read_text())
        for (key, value) in params.items():
            if key not in oldparams:
                logger.info("Parameter added: %s=%s", key, str(value))
            elif oldparams[key] != value:
                logger.info("Parameter changed: %s=%s (initial=%s)", key,
                            value, oldparams[key])
        for (key, value) in oldparams.items():
            if key not in params:
                logger.info("Parameter obsolete: %s=%s", key, value)
    else:
        for (key, value) in params.items():
            logger.debug("Parameter added: %s=%s", key, str(value))
    paramsfile.write_text(json.dumps(params, indent=' '))

    # read previous sessions
    epochsfile = Path(model) / "epochs.json"
    if epochsfile.exists():
        epochlogs = json.loads(epochsfile.read_text())
    else:
        epochlogs = list()
    epoch_offset = len(epochlogs)

    # read topology file
    logger.info("Creating model topology V%d (%s)", topology_version, topology)
    topology = resolve_topology(topology_version)(
        topologyfile, params.get("initializer", 'none'),
        params.get("regularizer", 'none'), params.get("constraint", 'none'),
        params.get("activation", 'none'),
        params.get("local_response_normalization", 0),
        params.get("batch_normalization", False),
        params.get("dropout_rate", 0.0))

    # create tensorflow estimator
    estimator = tf.estimator.Estimator(
        model_dir=model,
        model_fn=topology_cnn_model(topology, verbose),
        params=params,
        config=tf.estimator.RunConfig(log_step_count_steps=10,
                                      save_summary_steps=10,
                                      save_checkpoints_secs=900,
                                      keep_checkpoint_max=epochs,
                                      tf_random_seed=seed,
                                      device_fn=lambda op: device))
    logger.debug("Starting checkpoint: %s", estimator.latest_checkpoint())

    # prepare inputs
    train_datasets = [
        make_tfr_dataset(train_source, threads=threads, cache=ram_cache)
        for train_source in train_sources
    ]
    eval_dataset = make_tfr_dataset(eval_source,
                                    threads=threads,
                                    cache=ram_cache)

    # evaluate model
    eval_start_time = time.time()
    logger.info("Initial evaluation (batch=%d)", batch)

    results = estimator.evaluate(input_fn=make_tfr_input_fn(eval_dataset,
                                                            threads=threads,
                                                            prefetch=prefetch,
                                                            batch=batch), )

    for i in range(len(classes)):
        logger.info("Class %d: precision=%02.02f%%, recall=%02.02f%%", i,
                    results['precision_c%d' % i] * 100,
                    results['recall_c%d' % i] * 100)
    logger.info("Evaluation completed: loss=%f, jaccard=%f (%d[min])",
                results['loss'], results['jaccard'],
                (time.time() - eval_start_time) / 60)

    # run epochs
    train_time = time.time()
    best_loss = results['loss']
    best_epoch = 0
    best_checkpoint = estimator.latest_checkpoint()
    stable_loss_counter = 0
    overfit_loss_counter = 0
    for epoch in range(epochs):
        # abort training if eval getting worse
        if stable_loss_counter >= max_stable_loss_counter or overfit_loss_counter >= max_overfit_loss_counter:
            logger.info("Early-stop at epoch %d / %d (best=%d, checkpoint=%s)",
                        epoch, epochs, best_epoch + 1, best_checkpoint)
            break

        # start epoch
        epoch_start_time = time.time()
        if epoch > 0:
            elapsed_time = (time.time() - train_time) / 60
            total_time = elapsed_time * (epochs / epoch)
            remaining_time = total_time - elapsed_time
            logger.info(
                "Epoch started: %d / %d (batch=%d, remaining=%d[min], total=%d[min])",
                epoch + 1, epochs, batch, remaining_time, total_time)
        else:
            logger.info("Epoch started: %d / %d (batch=%d)", epoch + 1, epochs,
                        batch)

        # train model
        for train_dataset in train_datasets:
            train_start_time = time.time()
            logger.info("Training epoch started (%d steps)", steps)

            estimator.train(input_fn=make_tfr_input_fn(
                train_dataset,
                threads=threads,
                offset=max(0, epoch * steps * batch),
                limit=max(0, steps * batch),
                shuffle=max(0, steps * batch),
                prefetch=prefetch,
                batch=batch,
                repeat=-1,
                seed=seed),
                            steps=(steps if steps >= 0 else None))

            logger.info("Training epoch completed (%d[min])",
                        (time.time() - train_start_time) / 60)

        seed += seed * 11113 + seed

        # evaluate model
        eval_start_time = time.time()
        logger.info("Evaluation epoch started")

        results = estimator.evaluate(input_fn=make_tfr_input_fn(
            eval_dataset,
            threads=threads,
            prefetch=prefetch,
            batch=batch,
        ))
        if results['loss'] < best_loss:
            delta_loss = results['loss'] - best_loss
            best_loss = results['loss']
            best_epoch = epoch
            best_checkpoint = estimator.latest_checkpoint()
            if abs(delta_loss) >= delta_loss_threshold * abs(results['loss']):
                stable_loss_counter = 0
                status = 'learn(%.01e)' % delta_loss
            else:
                stable_loss_counter += 1
                status = 'stable(%d/%d)' % (stable_loss_counter,
                                            max_stable_loss_counter)
            overfit_loss_counter = 0
        elif results['loss'] > (best_loss * 1.1):
            stable_loss_counter += 1
            overfit_loss_counter += 1
            status = 'overfit(%d/%d)' % (overfit_loss_counter,
                                         max_overfit_loss_counter)
        else:
            stable_loss_counter += 1
            status = 'stable(%d/%d)' % (stable_loss_counter,
                                        max_stable_loss_counter)

        for i in range(len(classes)):
            logger.info("Class %d: precision=%02.02f%%, recall=%02.02f%%", i,
                        results['precision_c%d' % i] * 100,
                        results['recall_c%d' % i] * 100)
        logger.info(
            "Evaluation epoch completed: status=%s, loss=%.03e, jaccard=%.03f (%d[min])",
            status, results['loss'], results['jaccard'],
            (time.time() - eval_start_time) / 60)

        # write epoch logs
        epochlogs.append({
            'epoch':
            epoch_offset + epoch + 1,
            'checkpoint':
            Path(estimator.latest_checkpoint()).name,
            'duration':
            int(time.time() - epoch_start_time),
            'datasets': {
                'train': train_sources,
                'eval': eval_source,
            },
            'loss':
            float(results['loss']),
            'learning':
            status.startswith('learn'),
            'status':
            status,
            'jaccard':
            float(results['jaccard']),
            'precision':
            [float(results['precision_c%d' % i]) for i in range(len(classes))],
            'recall':
            [float(results['recall_c%d' % i]) for i in range(len(classes))],
            'f1': [
                f1_score(results['precision_c%d' % i],
                         results['recall_c%d' % i])
                for i in range(len(classes))
            ],
        })
        epochsfile.write_text(json.dumps(epochlogs, indent=' '))

        logger.info("Epoch completed (%d[min])",
                    (time.time() - epoch_start_time) / 60)

    logger.info("Training completed: best epoch=%d (%d[min])", best_epoch + 1,
                (time.time() - train_time) / 60)
    logger.info("Total elapsed time: %d[min]", (time.time() - start_time) / 60)
Ejemplo n.º 6
0
def run(sources, threads, prefetch, batch, steps, epochs, verbose, seed):
    """
    sources: list of tf-record files
    threads: number of concurrent read pipelines
    prefetch: reader prefetch count
    batch: batch size
    steps: number of steps per epoch
    epochs: number of epochs
    verbose: verbose output
    """

    reset_tensorflow(seed)

    datasets = [ make_tfr_dataset(source, threads=threads, read_rgb=False, read_mask=False, read_weight=False) for source in sources ]

    global_stats = dict()
    global_batch_counter = 0
    global_instance_counter = 0
    with tf.device('/cpu:0'):
        for epoch in range(epochs):
            for dataset in datasets:
                input_fn = make_tfr_input_fn(
                    dataset,
                    threads=threads,
                    offset=max(0, epoch*steps*batch),
                    limit=max(0, steps*batch),
                    shuffle=max(0, steps*batch),
                    prefetch=prefetch,
                    batch=batch,
                    repeat=-1
                )
                stats = dict()
                batch_counter = 0
                instance_counter = 0
                with tf.Session() as session:
                    try:
                        it = input_fn()
                        while batch_counter < steps:
                            features, label = session.run(it)
                            names = features['name']
                            variants = features['variant']
                            for i in range(names.shape[0]):
                                key = '%s-%s' % (str(names[i], 'utf-8'), str(variants[i], 'utf-8'))
                                if key not in stats:
                                    stats[key] = list()
                                stats[key].append(instance_counter)
                                instance_counter += 1
                            batch_counter += 1
                    except tf.errors.DataLossError as e:
                        logger.warn(e)
                        pass
                    except tf.errors.OutOfRangeError:
                        pass
                logger.info(
                    "EPOCH[%d]: %d / %d batches, %d / %d instances",
                    epoch,
                    batch_counter,
                    steps,
                    instance_counter,
                    steps * batch
                )
                for key in sorted(stats.keys()):
                    if verbose:
                        logger.debug("  %20s: %4d %s", key, len(stats[key]), stats[key])
                    if key not in global_stats:
                        global_stats[key] = 0
                    global_stats[key] += len(stats[key])
                global_batch_counter += batch_counter
                global_instance_counter += instance_counter

    logger.info(
        "FINAL: %d / %d batches, %d / %d instances",
        global_batch_counter,
        epochs * len(datasets) * steps,
        global_instance_counter,
        epochs * len(datasets) * steps * batch
    )
    min_p = 1.0
    max_p = 0.0
    sum_p = 0.0
    for key in sorted(global_stats.keys()):
        p = global_stats[key] / global_instance_counter
        min_p = min(min_p, p)
        max_p = max(max_p, p)
        sum_p += p
        if verbose:
            logger.info("  %20s: %4d (%02.01f %%)", key, global_stats[key], 100 * p)
    logger.info(
        "  %d instance, %d variants seen (C=[%d ; %d], P=[%.03e ; %.03e], E=%.03e, dE=%.03e)",
        global_instance_counter,
        len(global_stats),
        min_p * global_instance_counter,
        max_p * global_instance_counter,
        min_p,
        max_p,
        1.0 / len(global_stats),
        1.0 / len(global_stats) - sum_p / len(global_stats)
    )
Ejemplo n.º 7
0
def run(sources, threads, verbose, seed):
    """
    sources: list of tf-record files
    threads: number of concurrent read pipelines
    verbose: verbose output
    """

    reset_tensorflow(seed)

    global_instances = 0
    global_pixels = 0
    global_positives = 0
    global_weighted_positives = 0
    global_negatives = 0
    global_weighted_negatives = 0
    with tf.device('/cpu:0'):
        for source in sources:
            input_fn = make_tfr_input_fn(
                make_tfr_dataset(source, threads=threads, read_rgb=False, read_mask=True, read_weight=True),
                threads=threads,
                batch=10
            )
            instances = 0
            pixels = 0
            positives = 0
            weighted_positives = 0
            negatives = 0
            weighted_negatives = 0
            with tf.Session() as session:
                try:
                    it = input_fn()
                    while True:
                        features, label = session.run(it)
                        z = np.sum(1.0 - label)
                        wz = np.sum((1.0 - label) * features['weight'])
                        nz = np.sum(label)
                        wnz = np.sum(label * features['weight'])
                        instances += label.shape[0]
                        pixels += label.size
                        positives += nz
                        weighted_positives += wnz
                        negatives += z
                        weighted_negatives += wz
                except tf.errors.DataLossError as e:
                    logger.warn(e)
                    pass
                except tf.errors.OutOfRangeError:
                    pass
            global_instances += instances
            global_pixels += pixels
            global_positives += positives
            global_weighted_positives += weighted_positives
            global_negatives += negatives
            global_weighted_negatives += weighted_negatives
            logger.info(
                "%s: %d instances, %d pixels, %d:%d (%.02f:%.02f%%) std:weighted positives, %d:%d (%.02f:%.02f%%) std:weighted negatives",
                source,
                instances,
                pixels,
                positives,
                weighted_positives,
                100 * positives / max(1, positives + negatives),
                100 * weighted_positives / max(1, weighted_positives + weighted_negatives),
                negatives,
                weighted_negatives,
                100 * negatives / max(1, positives + negatives),
                100 * weighted_negatives / max(1, weighted_positives + weighted_negatives)
            )
    logger.info(
        "TOTAL: %d instances, %d pixels, %d:%d (%.02f:%.02f%%) std:weighted positives, %d:%d (%.02f:%.02f%%) std:weighted negatives",
        global_instances,
        global_pixels,
        global_positives,
        global_weighted_positives,
        100 * global_positives / max(1, global_positives + global_negatives),
        100 * global_weighted_positives / max(1, global_weighted_positives + global_weighted_negatives),
        global_negatives,
        global_weighted_negatives,
        100 * global_negatives / max(1, global_positives + global_negatives),
        100 * global_weighted_negatives / max(1, global_weighted_positives + global_weighted_negatives)
    )
Ejemplo n.º 8
0
def run(device, model, sources, threads, prefetch, batch, epochs, verbose,
        seed):
    """
    Prediction task implementation.
    """

    logger.info("Running graph on %s", device)

    # reset tensorflow
    reset_tensorflow(seed)

    # read parameters
    paramsfile = Path(model) / "parameters.json"
    if paramsfile.exists():
        params = json.loads(paramsfile.read_text())
    else:
        raise Exception("Model parameters.json not found")

    # read topology file
    logger.info("Creating model topology V%d (%s)",
                params.get('topology_version', 1), params.get('topology'))
    topologyfile = Path(model) / "topology.yaml"
    if not topologyfile.exists():
        raise Exception("Model topology.yaml not found")
    topology = resolve_topology(params.get('topology_version', 1))(
        topologyfile, params.get("initializer", 'none'),
        params.get("regularizer", 'none'), params.get("constraint", 'none'),
        params.get("activation", 'none'),
        params.get("local_response_normalization", 0),
        params.get("batch_normalization", False),
        params.get("dropout_rate", 0.0))

    # create tensorflow estimator
    estimator = tf.estimator.Estimator(
        model_dir=model,
        model_fn=topology_cnn_model(topology, verbose),
        params=params,
        config=tf.estimator.RunConfig(tf_random_seed=seed,
                                      device_fn=lambda op: device))
    logger.debug("Using checkpoint: %s", estimator.latest_checkpoint())

    # predict datasets using model
    for source in sources:
        logger.info("Starting prediction: %s (batch=%d)", source, batch)

        if source.endswith('.tfr') or source.endswith(
                '.tfr.zlib') or source.endswith('.tfr.gz'):
            results = estimator.predict(input_fn=make_tfr_input_fn(
                make_tfr_dataset(source, threads=threads),
                threads=threads,
                prefetch=1,
                batch=batch))
        else:
            results = estimator.predict(input_fn=make_predict_input_fn(
                make_predict_dataset(source, threads=threads),
                topology.config['input_size'],
                threads=threads,
                prefetch=1,
                batch=batch,
                repeat=epochs - 1))
        if not show_results(params.get('topology'), results,
                            topology.config['input_size'], verbose):
            logger.info("Aborted prediction: %s", source)
            break

        logger.info("Finished prediction: %s", source)