コード例 #1
0
def test_packages():
    from contentai_metadata_flatten import generators, parsers

    list_gen = generators.get_by_name('TimeTaggedMetadata')
    assert len(list_gen) > 0  # at least one member

    instance_gen = list_gen[0]['obj']('junk')
    assert path.exists(
        instance_gen.schema_path)  # need to have the template/schema path
コード例 #2
0
def test_discovery():
    from contentai_metadata_flatten import generators, parsers

    list_gen = generators.get_by_type('csv')
    assert len(list_gen) > 0  # at least one member

    list_gen = generators.get_by_type(['csv', 'json'])
    assert len(list_gen) > 1  # at least one member

    list_gen = generators.get_by_name('csv')
    assert len(list_gen) > 0  # at least one member

    list_parser = parsers.get_by_type('moderation')
    assert len(list_parser) > 0  # at least one member

    list_parser = parsers.get_by_type(['shot', 'scene'])
    assert len(list_parser) > 1  # at least one member

    list_parser = parsers.get_by_name('aws')
    assert len(list_parser) > 1  # at least one member
コード例 #3
0
def flatten(input_params=None, args=None, logger=None):
    # from contentai_metadata_flatten import parsers
    if logger is None:
        logger = logging.getLogger()
    logging.basicConfig(level=logging.WARNING)

    parser = argparse.ArgumentParser(
        description="""A script to perform metadata parsing""",
        epilog="""
        Launch to parse a set of downloaded and flattened assets... 
            python main.py --path_content=path/to/dir --path_result results
    """,
        formatter_class=argparse.RawTextHelpFormatter)
    submain = parser.add_argument_group(
        'main execution and evaluation functionality')
    submain.add_argument('--path_content',
                         dest='path_content',
                         type=str,
                         default=contentai.content_path,
                         help='input video path for files to label')
    submain.add_argument('--path_result',
                         dest='path_result',
                         type=str,
                         default=contentai.result_path,
                         help='output path for samples')
    submain.add_argument('--verbose',
                         dest='verbose',
                         default=False,
                         action='store_true',
                         help='verbosely print operations')
    submain = parser.add_argument_group('input and parsing options')
    submain.add_argument(
        '--extractor',
        dest='extractor',
        type=str,
        default="",
        help=
        'specify one extractor to flatten, skipping nested module import (*default=all*, e.g. ``dsai_metadata``)'
    )
    submain.add_argument(
        '--time_offset',
        dest='time_offset',
        type=int,
        default=0,
        help=
        'when merging events for an asset split into multiple parts, time in seconds (*default=0*); negative numbers will cause a truncation (skip) of events happening before the zero time mark *(added v0.7.1)*'
    )
    submain.add_argument(
        '--all_frames',
        dest='all_frames',
        default=False,
        action='store_true',
        help=
        'for video-based events, log all instances in box or just the center')
    submain = parser.add_argument_group('output modulation')
    submain.add_argument(
        '--generator',
        dest='generator',
        type=str,
        default="",
        help=
        'specify one generator for output, skipping nested module import (*default=all*)'
    )
    submain.add_argument(
        '--no_compression',
        dest='compressed',
        default=True,
        action='store_false',
        help=
        "compress output CSVs instead of raw write (*default=True*, e.g. append ‘.gz’)"
    )
    submain.add_argument(
        '--force_overwrite',
        dest='force_overwrite',
        default=False,
        action='store_true',
        help="compforce existing files to be overwritten (*default=False*)")

    if args is not None:
        config = vars(parser.parse_args(args))
    else:
        config = vars(parser.parse_args())
    if input_params is not None:
        config.update(input_params)

    # allow injection of parameters from environment
    contentai_metadata = contentai.metadata()
    if contentai_metadata is not None:  # see README.md for more info
        config.update(contentai_metadata)
    logger.info(f"Run arguments: {config}")
    if not config['path_content'] or not config['path_result']:
        logger.critical(
            f"Missing content path ({config['path_content']}) or result path ({config['path_result']})"
        )
        parser.print_help(sys.stderr)
        return []

    path_result = Path(config['path_result'])
    if not path_result.exists():
        path_result.mkdir(parents=True)

    list_parser_modules = parsers.get_by_name(
        config['extractor'] if len(config['extractor']) else None)
    list_generator_modules = generators.get_by_name(
        config['generator'] if len(config['generator']) else None)
    path_source = Path(config['path_content'])
    if not path_source.is_dir():
        path_source = path_source.parent
    path_source = str(path_source.resolve())

    need_generation = False
    map_outputs = {}
    set_results = set()
    for parser_obj in list_parser_modules:  # iterate through auto-discovered packages
        for generator_obj in list_generator_modules:  # iterate through auto-discovered packages
            generator_instance = generator_obj['obj'](
                str(path_result), logger=logger)  # create instance
            generator_name = generator_obj['name']
            map_outputs[generator_name] = {
                'module': generator_instance,
                'path': generator_instance.get_output_path(parser_obj['name'])
            }
            if "compressed" in config and config[
                    "compressed"]:  # allow compressed version
                map_outputs[generator_name]["path"] += ".gz"
            need_generation |= (
                generator_instance.is_universal
                or not Path(map_outputs[generator_name]["path"]).exists())

        df = None
        if not need_generation and not config['force_overwrite']:
            logger.info(f"Skipping re-process of {config['path_result']}...")
        else:
            parser_instance = parser_obj['obj'](
                path_source, logger=logger)  # create instance

            if config["verbose"]:
                logger.info(f"ContentAI arguments: {config}")
            df = parser_instance.parse(config)  # attempt to process

            if df is None:  # skip bad results
                if len(config['extractor']):
                    logger.warning(f"Specified extractor `{config['extractor']}` failed to find data. " \
                        f"Verify that input directory {path_source} points directly to file...")

        if df is not None:
            if config['time_offset'] != 0:  # need offset?
                logger.info(
                    f"Applying time offset of {config['time_offset']} seconds to {len(df)} events ('{parser_obj['name']}')..."
                )
                for col_name in ['time_begin', 'time_end', 'time_event']:
                    df[col_name] += config['time_offset']
            df.drop(df[df["time_begin"] < 0].index,
                    inplace=True)  # drop rows if trimmed from front

            for generator_name in map_outputs:  # iterate through auto-discovered packages
                if map_outputs[generator_name][
                        'module'].is_universal or not Path(
                            map_outputs[generator_name]["path"]).exists():
                    num_items = map_outputs[generator_name]['module'].generate(
                        map_outputs[generator_name]["path"], config,
                        df)  # attempt to process
                    logger.info(
                        f"Wrote {num_items} items as '{generator_name}' to result file '{map_outputs[generator_name]['path']}'"
                    )
                else:
                    logger.info(
                        f"Skipping re-generate of {generator_name} to file '{map_outputs[generator_name]['path']}''..."
                    )
                set_results.add(map_outputs[generator_name]["path"])

    # resolve and return fully qualified path
    return [
        str(Path(config['path_result']).joinpath(k).resolve())
        for k in set_results
    ]