for m in re.finditer(r'(?<=/)([A-Z]+\.?\s*\d+)', text): text += ' # {}'.format(m.group(0)) for m in re.finditer(r'(?<=/)([0-9]+\.\s*\w+)', text): text += ' # {}'.format(m.group(0)) text = text.strip() text = re.sub(r'\s+', ' ', text) return text ignore = ( 'Puolukka', 'Otava', 'Vaaka', 'Varsa', 'Neito', 'Voima', 'RPr', # Ratsuväkiprikaati talvisodassa, rannikkoprikaati jatkosodassa ) if __name__ == '__main__': if sys.argv[1] == 'test': import doctest doctest.testmod() exit() process_stage(sys.argv, preprocessor=preprocessor, ignore=ignore)
def pruner(candidate): if name_re_compiled.fullmatch(candidate): return candidate return None def set_dataset(dataset_name): if dataset_name == 'event': print('Handling as events') ValidationContext.dataset = 'event' elif dataset_name == 'photo': print('Handling as photos') ValidationContext.dataset = 'photo' else: raise ValueError('Invalid dataset: {}'.format(dataset_name)) if __name__ == '__main__': if sys.argv[1] == 'test': import doctest doctest.testmod() exit() set_dataset(sys.argv[1]) args = sys.argv[0:1] + sys.argv[2:] process_stage(args, ignore=ignore, validator_class=Validator, preprocessor=preprocessor, pruner=pruner, log_level='INFO')
pruner_fun = pruner elif sys.argv[1] == 'photo': print('Handling as photos') pruner_fun = None else: raise ValueError('Invalid dataset') args = sys.argv[0:1] + sys.argv[2:] no_duplicates = [ 'http://www.yso.fi/onto/suo/kunta', 'http://ldf.fi/schema/warsa/Town', 'http://ldf.fi/schema/warsa/Village', 'http://ldf.fi/schema/warsa/Body_of_water', 'http://ldf.fi/schema/warsa/Hypsographic_feature', 'http://ldf.fi/pnr-schema#place_type_540', 'http://ldf.fi/pnr-schema#place_type_550', 'http://ldf.fi/pnr-schema#place_type_560', ISLAND_TYPE, # Selected islands ] prep = preprocessor if args[-1] == 'naive': prep = None ignore = None no_duplicates = None args.pop() process_stage(args, ignore=ignore, pruner=pruner_fun, validator_class=Validator, preprocessor=preprocessor, remove_duplicates=no_duplicates)
] events_only_ignore = [ 'turtola', # only for events! 'pajari' # only for events, remove for photos ] if sys.argv[1] == 'event': print('Handling as events') ignore = ignore + events_only_ignore elif sys.argv[1] == 'photo': print('Handling as photos') else: raise ValueError('Invalid dataset') args = sys.argv[0:1] + sys.argv[2:] no_duplicates = [ 'http://www.yso.fi/onto/suo/kunta', 'http://ldf.fi/warsa/places/place_types/Kirkonkyla_kaupunki', 'http://ldf.fi/warsa/places/place_types/Kyla', 'http://ldf.fi/warsa/places/place_types/Vesimuodostuma', 'http://ldf.fi/warsa/places/place_types/Maastokohde', 'http://ldf.fi/pnr-schema#place_type_540', 'http://ldf.fi/pnr-schema#place_type_550', 'http://ldf.fi/pnr-schema#place_type_560', ] process_stage(args, ignore=ignore, validator_class=Validator, preprocessor=preprocessor, remove_duplicates=no_duplicates)
from arpa_linker.link_helper import process_stage import logging import sys logger = logging.getLogger('arpa_linker.arpa') if __name__ == '__main__': process_stage(sys.argv, log_level='INFO')
) if __name__ == '__main__': if sys.argv[1] == 'test': import doctest doctest.testmod() exit() special_args = sys.argv[-2:] if 'no_cover' in special_args: Validator.accept_cover = False sys.argv.remove('no_cover') if 'no_length_filter' in special_args: Validator.filter_by_length = False sys.argv.remove('no_length_filter') prep = preprocessor if sys.argv[-1] == 'naive': prep = None ignore = None sys.argv.pop() if sys.argv[4] == 'battle_unit_linked.ttl': ignore = None process_stage(sys.argv, preprocessor=prep, ignore=ignore, validator_class=Validator, log_level='INFO')
ValidationContext.dataset = 'event' elif dataset_name == 'photo': print('Handling as photos') ValidationContext.dataset = 'photo' else: raise ValueError('Invalid dataset: {}'.format(dataset_name)) if __name__ == '__main__': if sys.argv[1] == 'test': import doctest doctest.testmod() exit() set_dataset(sys.argv[1]) args = sys.argv[0:1] + sys.argv[2:] prep = preprocessor if args[-1] == 'naive': prep = None ignore = None args.pop() process_stage(args, ignore=ignore, validator_class=Validator, preprocessor=prep, pruner=pruner, log_level='DEBUG')