def grow_taxonomy(max_depth=1): # Load some aggregation function to manipulate arrays of arrays # (to materialize paths to the roots in the taxonomy) pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER, "array_aggregate.sql")) # Aggregate categories info and find semantic grounding by trying to match # with wikipedia articles pg.check_run_if_undef(join(SQL_SCRIPTS_FOLDER, "build_grounded_categories.sql")) pg.run_file(join(SQL_SCRIPTS_FOLDER, "init_taxonomy.sql")) current_depth = int(pg.select("SELECT max(depth) from taxonomy_dag")) if current_depth < max_depth: for depth in range(current_depth + 1, max_depth + 1): logging.info("Growing taxonomy to depth=%d", depth) pg.run_file(join(SQL_SCRIPTS_FOLDER, "grow_taxonomy.sql"))
parser.add_argument( "--examples-file", default="dbpedia-examples.tsv.bz2", help="Filename to store the TSV export of the examples text" " categorized using the taxonomy.", ) parser.add_argument( "--max-depth", default=1, type=int, help="Limit the depth of subcategories to follow from the roots." ) parser.add_argument( "--max-items", default=None, type=int, help="Limit the number of rows to load from DBpedia archives" " (for debug purpose only)", ) args = parser.parse_args() for operation in args.operations: if operation == "build_taxonomy": check_load_taxonomy_data(args.max_items) grow_taxonomy(args.max_depth) elif operation == "build_examples": check_load_examples_data(args.max_items) pg.run_file(join(SQL_SCRIPTS_FOLDER, "build_dataset.sql")) elif operation == "dump_taxonomy": dump_taxonomy(args.taxonomy_file) elif operation == "dump_examples": dump_examples(args.examples_file)
default='dbpedia-taxonomy.tsv', help='Filename to store the TSV export of the taxonomy.') parser.add_argument( '--examples-file', default='dbpedia-examples.tsv.bz2', help='Filename to store the TSV export of the examples text' ' categorized using the taxonomy.') parser.add_argument( '--max-depth', default=1, type=int, help='Limit the depth of subcategories to follow from the roots.', ) parser.add_argument( '--max-items', default=None, type=int, help='Limit the number of rows to load from DBpedia archives' ' (for debug purpose only)') args = parser.parse_args() for operation in args.operations: if operation == 'build_taxonomy': check_load_taxonomy_data(args.max_items) grow_taxonomy(args.max_depth) elif operation == 'build_examples': check_load_examples_data(args.max_items) pg.run_file(join(SQL_SCRIPTS_FOLDER, "build_dataset.sql")) elif operation == 'dump_taxonomy': dump_taxonomy(args.taxonomy_file) elif operation == 'dump_examples': dump_examples(args.examples_file)