def test_pre_save(self): city = City(name='City', state_id=State.DEFAULT_ID) city.save() c = Court(city=city, court_type='AG', state_id=State.DEFAULT_ID) c.save() self.assertEqual(c.slug, 'ag-city', 'Slug not correctly generated in pre_save')
def empty(self): Case.objects.all().update(court_id=Court.DEFAULT_ID) Court.objects.exclude(pk=Court.DEFAULT_ID).delete() City.objects.all().delete() State.objects.exclude(pk=State.DEFAULT_ID).delete() # Add default court? default_state = State(pk=State.DEFAULT_ID, name='Unknown state', country=self.country) default_state.save() default_court = Court(pk=Court.DEFAULT_ID, name='Unknown court', code='unknown', state=default_state) default_court.save() # Add standard courts TODO more courts? Court(name='Europäischer Gerichtshof', code='EuGH', state=default_state).save()
def process(self, court: Court) -> Court: """ Generates all possible aliases for court names AG Aachen Aachen AG Aachener AG """ aliases = [ # Name is always as well an alias court.name ] if court.court_type is None: logger.warning('No court type: %s' % court) return court type_info = settings.COURT_TYPES.get_type(court.court_type) location_levels = type_info['levels'] type_aliases = [ court.court_type, type_info['name'], ] if CourtLocationLevel.CITY in location_levels: # Frankfurt (Oder) # ... an der Oder # Frankfurt am Main loc_name = court.city.name aliases.extend(self.combine_type_location(type_aliases, loc_name)) for match in re.finditer(r'\s(a\.d\.|an der|am|im|unter|in der)\s(.*?)$', loc_name): # Frankfurt an der Oder -> Frankfurt (Oder) loc_name_x = '%s (%s)' % (loc_name[:match.start(0)], match.group(2)) aliases.extend(self.combine_type_location(type_aliases, loc_name_x)) if CourtLocationLevel.STATE in location_levels: # Add variations, e.g. Hamburg_er, Holstein_isches loc_name = court.state.name aliases.extend(self.combine_type_location(type_aliases, loc_name)) for t in type_aliases: for v in ['es', 'er', 'isches']: aliases.append(loc_name + v + ' ' + t) if CourtLocationLevel.COUNTRY in location_levels: # TODO Handle federal courts (BGH, ...) pass # Set as list court.aliases = Court.ALIAS_SEPARATOR.join(aliases) return court
def process(self, court: Court) -> Court: """ Assign jurisdiction and level_of_appeal with regex on court name """ # Test all types with regex for name in settings.COURT_JURISDICTIONS: if re.compile(settings.COURT_JURISDICTIONS[name], re.IGNORECASE).search(court.name): court.jurisdiction = name break # Test all types with regex for name in settings.COURT_LEVELS_OF_APPEAL: if re.compile(settings.COURT_LEVELS_OF_APPEAL[name], re.IGNORECASE).search(court.name): court.level_of_appeal = name break return court
def test_type_extraction(self): class TestCourtTypes(CourtTypes): def get_types(self): return { 'AG': { 'name': 'Amtsgericht', 'levels': [CourtLocationLevel.CITY] }, 'ARBG': { 'name': 'Arbeitsgericht', 'levels': [CourtLocationLevel.CITY] }, } with self.settings(COURT_TYPES=TestCourtTypes()): self.assertEqual( Court.extract_type_code_from_name('Amtsgericht Aalen'), 'AG') self.assertEqual( Court.extract_type_code_from_name('Arbeitsgericht Aalen'), 'ARBG')
def process(self, court: Court): if court.wikipedia_title is None: court.wikipedia_title = self.get_wikipedia_field( court.name, 'title') logger.info('Title: %s' % court.wikipedia_title) # Description court.description = self.get_wikipedia_extract(court.wikipedia_title) logger.info('Description: %s' % court.description) # Image image_url = self.get_wikipedia_image(court.wikipedia_title) logger.info('Downloading image from: %s' % image_url) result = urllib.request.urlopen(image_url) court.image.delete(False) # delete old image court.image.save(court.code + '.jpg', result) # save new image return court
def test_type_extraction(self): self.assertEqual( Court.extract_type_code_from_name('Amtsgericht Aalen'), 'AG') self.assertEqual( Court.extract_type_code_from_name('Arbeitsgericht Aalen'), 'ARBG')
def find_court(self, query) -> Court: """ Example court names: - Oberverwaltungsgericht für das Land Schleswig-Holstein - VG Magdeburg - {"name": "OVG L\u00fcneburg 5. Senat"} :param query: Dict(name, code) :return: """ if 'code' in query: # Find based on code (EuGH, ...) try: return Court.objects.get(code=query['code']) except Court.DoesNotExist: pass if 'name' not in query: raise ProcessingError('Field name not in query') name = query['name'] if ' ' not in name: # Find based on name if name does not contain whitespaces try: return Court.objects.get(name=name) except Court.DoesNotExist: pass # Determine type # print('Find court: %s' % query) court_type = Court.extract_type_code_from_name(name) # print('Type code: %s' % court_type) if court_type is None: raise ProcessingError('Court type not found') location_levels = CourtTypes().get_type(court_type)['levels'] # print('Location level: %s' % location_levels) # Look for states if CourtLocationLevel.STATE in location_levels: state_id_mapping = {} for r in State.objects.values_list('id', 'name'): if r[1] != '': state_id_mapping[r[1]] = r[0] # Add variations, e.g. Hamburg_er, Holstein_isches for v in ['es', 'er', 'isches']: state_id_mapping[r[1] + v] = r[0] state_id = find_from_mapping(name, state_id_mapping) if state_id is not None: try: logger.debug('Look for state=%i, type=%s' % (state_id, court_type)) return Court.objects.get(state_id=state_id, court_type=court_type) except Court.DoesNotExist: pass # Look for cities if CourtLocationLevel.CITY in location_levels: city_id_mapping = {} for r in City.objects.values_list('id', 'name'): if r[1] != '': city_id_mapping[r[1]] = r[0] city_id = find_from_mapping(name, city_id_mapping) # print(city_id_mapping) if city_id is not None: try: logger.debug('Look for city=%i, type=%s' % (city_id, court_type)) return Court.objects.get(city_id=city_id, court_type=court_type) except Court.DoesNotExist: pass # Search by alias (use case-insensitive filter for umlauts) candidates = Court.objects.filter(aliases__icontains=name) if len(candidates) == 1: return candidates.first() elif len(candidates) > 1: # Multiple candidates found: fuzzy string matching? logger.warning('Multiple candidates found') # return candidates.first() # Nothing found raise Court.DoesNotExist
def handle(self, *args, **options): # if options['verbose']: # root_logger.setLevel(logging.DEBUG) # Country identical for all courts self.country = get_instance_or_create(Country, 'Deutschland') # Delete all courts if options['empty']: self.empty() # Court types # type_mapping = CourtTypes.get_name_to_code_mapping() previous_state_name = None previous_state = None without_type_counter = 0 court_counter = 0 city_counter = 0 state_counter = 0 if not os.path.isfile(options['input']): logger.error('Cannot read from: %s' % options['input']) exit(1) logger.debug('Reading from: %s' % options['input']) with open(options['input']) as f: reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE) for row in reader: if len(row) != 3 or reader.line_num == 1 or row[1] == '': continue if 0 < options['limit'] <= reader.line_num: logger.debug('Limit reached') break name = row[1].replace('_', '').strip() code = re.sub('[^0-9a-zA-Z]+', '', row[2]) # code = row[2].replace(' ', '') # Fetch state state_name = row[0] if previous_state is not None and previous_state_name == state_name: state = previous_state else: try: state = State.objects.get(name=state_name) except State.DoesNotExist: state = State(name=state_name, country=self.country) state.save() state_counter += 1 # Fetch city and court type city = None court_type = Court.extract_type_code_from_name(name) if court_type is None: logger.debug('Court type is none: %s' % row) without_type_counter += 1 else: if CourtLocationLevel.CITY in CourtTypes().get_type( court_type)['levels']: # Remove court type (left over is city name) city_name = name.replace(court_type, '').strip() city_name = city_name.replace( CourtTypes().get_type(court_type)['name'], '').strip() try: city = City.objects.get(name=city_name, state=state) except City.DoesNotExist: city = City(name=city_name, state=state) city.save() city_counter += 1 # Save court # try: court = Court(name=name, code=code, state=state, city=city, court_type=court_type) court.save() court_counter += 1 logger.debug('Saved court: %s' % court) # except IntegrityError as e: # logger.error('Cannot save court: %s' % e) previous_state = state previous_state_name = state_name logger.info( 'Done. Courts: %i; Without types: %i; Cities: %i, States: %i' % (court_counter, without_type_counter, city_counter, state_counter))
def find_court(query) -> Court: """ Example court names: - Oberverwaltungsgericht für das Land Schleswig-Holstein - VG Magdeburg - {"name": "OVG L\u00fcneburg 5. Senat"} :param query: Dict(name, code) :return: """ if 'code' in query: # Find based on code (EuGH, ...) try: return Court.objects.get(code=query['code']) except Court.DoesNotExist: pass if 'name' not in query: raise ProcessingError('Field name not in query') if ' ' not in query['name']: # Find based on name if name does not contain whitespaces try: return Court.objects.get(name=query['name']) except Court.DoesNotExist: pass # Determine type # print('Find court: %s' % query) court_type = Court.extract_type_code_from_name(query['name']) # print('Type code: %s' % court_type) if court_type is None: raise ProcessingError('Court type not found') location_levels = CourtTypes().get_type(court_type)['levels'] # print('Location level: %s' % location_levels) # Look for states if CourtLocationLevel.STATE in location_levels: state_id_mapping = {} for r in State.objects.values_list('id', 'name'): if r[1] != '': state_id_mapping[r[1]] = r[0] # Add variations, e.g. Hamburg_er, Holstein_isches for v in ['es', 'er', 'isches']: state_id_mapping[r[1] + v] = r[0] state_id = find_from_mapping(query['name'], state_id_mapping) if state_id is not None: try: logger.debug('Look for state=%i, type=%s' % (state_id, court_type)) return Court.objects.get(state_id=state_id, court_type=court_type) except Court.DoesNotExist: pass # Look for cities if CourtLocationLevel.CITY in location_levels: city_id_mapping = {} for r in City.objects.values_list('id', 'name'): if r[1] != '': city_id_mapping[r[1]] = r[0] city_id = find_from_mapping(query['name'], city_id_mapping) # print(city_id_mapping) if city_id is not None: try: logger.debug('Look for city=%i, type=%s' % (city_id, court_type)) return Court.objects.get(city_id=city_id, court_type=court_type) except Court.DoesNotExist: pass # Nothing found raise Court.DoesNotExist # if 'name' in query and 'code' in query: # candidates = Court.objects.filter(Q(name=query['name']) | Q(code=query['code'])) # instance = candidates[0] # # if len(candidates) == 0: # raise Court.DoesNotExist # elif 'name' in query: # instance = Court.objects.get(name=query['name']) # # else: # raise ProcessingError('Court fields missing: %s' % query) return instance