def __init__(self, mapping=None, **kwargs): super().__init__(mapping, **kwargs) self.default_namespaces_url = 'http://dbpedia.org/sparql?nsdecl' self.default_namespaces_file = base_path('default-namespaces.json') if not self.data: self.load_default_namespaces() # overrides self['http://id.dbpedia.org/global/'] = 'dbg' self['http://www.wikidata.org/entity/'] = 'wde'
def test_skip_to_global_left_binary(): args = get_test_args( input_path=base_path('samples/skip-to-left-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 2 == len(part_positions) assert (314, 1703) == part_positions[0][1:] assert (1703, 3587) == part_positions[1][1:]
def test_skip_to_global_right_binary(): args = get_test_args( input_path=base_path('samples/skip-to-right-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 2 == len(part_positions) assert (4615, 7243) == part_positions[0][1:] assert (7243, 7743) == part_positions[1][1:]
def test_skip_to_global_middle_binary(): args = get_test_args( input_path=base_path('samples/skip-to-middle-test.nt'), target_size=500, search_type='binary', ) part_positions = list(compute_parts(args)) assert 3 == len(part_positions) assert (1834, 6236) == part_positions[0][1:] assert (6236, 6949) == part_positions[1][1:] assert (6949, 7676) == part_positions[2][1:]
def get_test_args(**kwargs): args = parse_arguments( [], # we don't want to test the parser by default output_dir=base_path('tests/output/'), **kwargs) return args
def get_timed_output_path(prefix='output'): this_second_hex = hex(int(time.time())) return base_path(f'{prefix}_{this_second_hex[2:]}/')
def cast_int(str_or_number): return int(float(str_or_number)) def get_timed_output_path(prefix='output'): this_second_hex = hex(int(time.time())) return base_path(f'{prefix}_{this_second_hex[2:]}/') arg_parser.add_argument('input_path', nargs='?', type=os.path.abspath, default=os.environ.get('INPUT_PATH', base_path('sorted.nt')), help='the Databus NTriples input file path') arg_parser.add_argument('output_dir', nargs='?', type=os.path.abspath, default=os.environ.get('OUTPUT_DIR', get_timed_output_path()), help='the JSON output directory path') arg_parser.add_argument( '--parallel', action='store_true', help='transform parts in parallel using a multiprocessing pool') arg_parser.add_argument( '--shorten-uris', action='store_true', help=