def import_units(self): if not getattr(self, 'org_syncher', None): self.import_organizations(noop=True) if not getattr(self, 'dept_syncher', None): self.import_departments(noop=True) if self.options['single']: obj_id = self.options['single'] obj_list = [self.pk_get('unit', obj_id)] queryset = Unit.objects.filter(id=obj_id) else: obj_list = self.pk_get('unit') queryset = Unit.objects.all().select_related('services') self.target_srid = settings.PROJECTION_SRID self.bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) self.bounding_box.set_srid(4326) gps_srs = SpatialReference(4326) target_srs = SpatialReference(self.target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) self.bounding_box.transform(target_to_gps_ct) self.gps_to_target_ct = CoordTransform(gps_srs, target_srs) syncher = ModelSyncher(queryset, lambda obj: obj.id) for idx, info in enumerate(obj_list): self._import_unit(syncher, info) syncher.finish()
def import_units(self): self._load_postcodes() self.muni_by_name = {muni.name_fi.lower(): muni for muni in Municipality.objects.all()} if self.existing_service_ids == None or len(self.existing_service_ids) < 1: self.existing_service_ids = set(Service.objects.values_list('id', flat=True)) if not getattr(self, 'org_syncher', None): self.import_organizations(noop=True) if not getattr(self, 'dept_syncher', None): self.import_departments(noop=True) if self.verbosity: self.logger.info("Fetching unit connections") connections = self.pk_get('connection') conn_by_unit = {} for conn in connections: unit_id = conn['unit_id'] if unit_id not in conn_by_unit: conn_by_unit[unit_id] = [] conn_by_unit[unit_id].append(conn) self.accessibility_variables = {x.id: x for x in AccessibilityVariable.objects.all()} if self.verbosity: self.logger.info("Fetching accessibility properties") acc_properties = self.pk_get('accessibility_property') acc_by_unit = {} for ap in acc_properties: unit_id = ap['unit_id'] if unit_id not in acc_by_unit: acc_by_unit[unit_id] = [] acc_by_unit[unit_id].append(ap) self.target_srid = PROJECTION_SRID self.bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) self.bounding_box.set_srid(4326) gps_srs = SpatialReference(4326) target_srs = SpatialReference(self.target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) self.bounding_box.transform(target_to_gps_ct) self.gps_to_target_ct = CoordTransform(gps_srs, target_srs) if self.options['single']: obj_id = self.options['single'] obj_list = [self.pk_get('unit', obj_id)] queryset = Unit.objects.filter(id=obj_id) else: obj_list = self._fetch_units() queryset = Unit.objects.filter(data_source='tprek').prefetch_related('services', 'keywords') syncher = ModelSyncher(queryset, lambda obj: obj.id) for idx, info in enumerate(obj_list): conn_list = conn_by_unit.get(info['id'], []) info['connections'] = conn_list acp_list = acc_by_unit.get(info['id'], []) info['accessibility_properties'] = acp_list self._import_unit(syncher, info) syncher.finish()
def syncher(municipalities): syncher = ModelSyncher(municipalities, lambda x: x.id) for i, m in enumerate(syncher.obj_dict.values()): if i % 3 == 0: m._found = True else: m._found = False return syncher
def import_departments(self, noop=False): obj_list = self.pk_get('department') syncher = ModelSyncher(Department.objects.all(), lambda obj: obj.id) self.dept_syncher = syncher if noop: return for d in obj_list: obj = syncher.get(d['id']) if not obj: obj = Department(id=d['id']) obj._changed = True self._save_translated_field(obj, 'name', d, 'name') if obj.abbr != d['abbr']: obj._changed = True obj.abbr = d['abbr'] if self.org_syncher: org_obj = self.org_syncher.get(d['org_id']) else: org_obj = Organization.objects.get(id=d['org_id']) assert org_obj if obj.organization_id != d['org_id']: obj._changed = True obj.organization = org_obj if obj._changed: obj.save() syncher.mark(obj) syncher.finish()
def import_organizations(self, noop=False): obj_list = self.pk_get('organization') syncher = ModelSyncher(Organization.objects.all(), lambda obj: obj.id) self.org_syncher = syncher if noop: return for d in obj_list: obj = syncher.get(d['id']) if not obj: obj = Organization(id=d['id']) self._save_translated_field(obj, 'name', d, 'name') url = d['data_source_url'] if not url.startswith('http'): url = 'http://%s' % url if obj.data_source_url != url: obj._changed = True obj.data_source_url = url if obj._changed: obj.save() syncher.mark(obj) syncher.finish()
def _import_one_division_type(self, muni, div): def make_div_id(obj): if 'parent' in div: return "%s-%s" % (obj.parent.origin_id, obj.origin_id) else: return obj.origin_id self.logger.info(div['name']) if not 'origin_id' in div['fields']: raise Exception( "Field 'origin_id' not defined in config section '%s'" % div['name']) try: type_obj = AdministrativeDivisionType.objects.get(type=div['type']) except AdministrativeDivisionType.DoesNotExist: type_obj = AdministrativeDivisionType(type=div['type']) type_obj.name = div['name'] type_obj.save() div_qs = AdministrativeDivision.objects.filter(type=type_obj) if not div.get('no_parent_division', False): div_qs = div_qs.by_ancestor(muni.division).select_related('parent') syncher = ModelSyncher(div_qs, make_div_id) # Cache the list of possible parents. Assumes parents are imported # first. if 'parent' in div: parent_list = AdministrativeDivision.objects.\ filter(type__type=div['parent']).by_ancestor(muni.division) parent_dict = {} for o in parent_list: assert o.origin_id not in parent_dict parent_dict[o.origin_id] = o else: parent_dict = None if 'file' in div: path = self.find_data_file( os.path.join(self.division_data_path, div['file'])) ds = DataSource(path, encoding='iso8859-1') else: wfs_url = 'WFS:' + div['wfs_url'] if '?' in wfs_url: sep = '&' else: sep = '?' url = wfs_url + sep + 'typeName=' + div[ 'wfs_layer'] + '&' + "srsName=EPSG:%d" % PROJECTION_SRID + '&' + "outputFormat=application/json" ds = DataSource(url) lyr = ds[0] assert len(ds) == 1 with AdministrativeDivision.objects.delay_mptt_updates(): for feat in lyr: self._import_division(muni, div, type_obj, syncher, parent_dict, feat)
def import_entrances(fetch_resource=pk_get): obj_list = fetch_resource("entrance") syncher = ModelSyncher(UnitEntrance.objects.all(), lambda obj: obj.id) target_srid = PROJECTION_SRID bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) bounding_box.srid = 4326 gps_srs = SpatialReference(4326) target_srs = SpatialReference(target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) bounding_box.transform(target_to_gps_ct) gps_to_target_ct = CoordTransform(gps_srs, target_srs) for info in sorted(obj_list, key=lambda x: x["unit_id"]): _import_unit_entrance( syncher, info.copy(), bounding_box, gps_to_target_ct, target_srid, ) syncher.finish() return syncher
def import_municipalities(self): # self._setup_land_area() self.logger.info("Loading municipality boundaries") path = self.find_muni_data() ds = DataSource(path) lyr = ds[0] assert lyr.name == "AdministrativeUnit" defaults = {'name': 'Municipality'} muni_type, _ = AdministrativeDivisionType.objects.get_or_create( type='muni', defaults=defaults) self.muni_type = muni_type syncher = ModelSyncher( AdministrativeDivision.objects.filter(type=muni_type), lambda obj: obj.origin_id) # If running under Python 3, parallelize the heavy lifting. if ThreadPoolExecutor: executor = ThreadPoolExecutor(max_workers=8) futures = [] else: executor = None with db.transaction.atomic(): with AdministrativeDivision.objects.disable_mptt_updates(): for idx, feat in enumerate(lyr): if feat.get('nationalLevel') != '4thOrder': continue # Process the first in a single-threaded way to catch # possible exceptions early. if executor and idx > 0: futures.append( executor.submit(self._process_muni, syncher, feat)) else: self._process_muni(syncher, feat) if executor: for f in futures: res = f.result() executor.shutdown() AdministrativeDivision.objects.rebuild()
def import_services(self): obj_list = self.pk_get('service') syncher = ModelSyncher(Service.objects.all(), lambda obj: obj.id) for d in obj_list: obj = syncher.get(d['id']) if not obj: obj = Service(id=d['id']) obj._changed = True self._save_translated_field(obj, 'name', d, 'name') if 'parent_id' in d: parent = syncher.get(d['parent_id']) assert parent else: parent = None if obj.parent != parent: obj.parent = parent obj._changed = True if obj._changed: obj.save() syncher.mark(obj) syncher.finish()
def import_units(dept_syncher=None, fetch_only_id=None, verbosity=True, logger=None, fetch_units=_fetch_units, fetch_resource=pk_get): global VERBOSITY, LOGGER, EXISTING_SERVICE_NODE_IDS, EXISTING_SERVICE_IDS EXISTING_SERVICE_NODE_IDS = None EXISTING_SERVICE_IDS = None VERBOSITY = verbosity LOGGER = logger keyword_handler = KeywordHandler(verbosity=verbosity, logger=logger) if VERBOSITY and not LOGGER: LOGGER = logging.getLogger(__name__) muni_by_name = { muni.name_fi.lower(): muni for muni in Municipality.objects.all() } if not dept_syncher: dept_syncher = import_departments(noop=True) department_id_to_uuid = dict( ((k, str(v)) for k, v in Department.objects.all().values_list('id', 'uuid'))) VERBOSITY and LOGGER.info("Fetching unit connections %s" % dept_syncher) connections = fetch_resource('connection') conn_by_unit = defaultdict(list) for conn in connections: unit_id = conn['unit_id'] conn_by_unit[unit_id].append(conn) VERBOSITY and LOGGER.info("Fetching accessibility properties") # acc_properties = self.fetch_resource('accessibility_property', v3=True) acc_properties = fetch_resource('accessibility_property') acc_by_unit = defaultdict(list) for ap in acc_properties: unit_id = ap['unit_id'] acc_by_unit[unit_id].append(ap) VERBOSITY and LOGGER.info("Fetching ontologyword details") details = fetch_resource('ontologyword_details') ontologyword_details_by_unit = defaultdict(list) for detail in details: unit_id = detail['unit_id'] ontologyword_details_by_unit[unit_id].append(detail) target_srid = PROJECTION_SRID bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) bounding_box.set_srid(4326) gps_srs = SpatialReference(4326) target_srs = SpatialReference(target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) bounding_box.transform(target_to_gps_ct) gps_to_target_ct = CoordTransform(gps_srs, target_srs) if fetch_only_id: obj_id = fetch_only_id obj_list = [fetch_resource('unit', obj_id, params={'official': 'yes'})] queryset = Unit.objects.filter(id=obj_id) else: obj_list = fetch_units() queryset = Unit.objects.all().prefetch_related('services', 'keywords', 'service_details') syncher = ModelSyncher(queryset, lambda obj: obj.id) for idx, info in enumerate(obj_list): uid = info['id'] info['connections'] = conn_by_unit.get(uid, []) info['accessibility_properties'] = acc_by_unit.get(uid, []) info['service_details'] = ontologyword_details_by_unit.get(uid, []) _import_unit(syncher, keyword_handler, info.copy(), dept_syncher, muni_by_name, bounding_box, gps_to_target_ct, target_srid, department_id_to_uuid) syncher.finish() return dept_syncher, syncher
def import_departments(noop=False, logger=None, fetch_resource=pk_get): obj_list = fetch_resource('department') syncher = ModelSyncher(Department.objects.all(), lambda obj: str(obj.uuid)) # self.dept_syncher = syncher if noop: return syncher for d in sorted(obj_list, key=lambda x: x['hierarchy_level']): obj = syncher.get(d['id']) obj_has_changed = False if not obj: obj = Department(uuid=d['id']) obj_has_changed = True fields = ('phone', 'address_zip', 'oid', 'organization_type', 'business_id') fields_that_need_translation = ('name', 'abbr', 'street_address', 'address_city', 'address_postal_full', 'www') obj.uuid = d['id'] for field in fields: if d.get(field): if d[field] != getattr(obj, field): obj_has_changed = True setattr(obj, field, d.get(field)) parent_id = d.get('parent_id') if parent_id != obj.parent_id: obj_has_changed = True if parent_id is None: obj.parent_id = None else: try: parent = Department.objects.get(uuid=parent_id) obj.parent_id = parent.id except Department.DoesNotExist: logger and logger.error( "Department import: no parent with uuid {} found for {}".format( parent_id, d['id']) ) for field in fields_that_need_translation: if save_translated_field(obj, field, d, field): obj_has_changed = True muni_code = d.get('municipality_code') if muni_code is None: municipality = None if muni_code is not None: try: municipality = Municipality.objects.get(division__origin_id=str(muni_code)) except Municipality.DoesNotExist: logger and logger.error( "No municipality with code {} for department {}".format( muni_code, d['id'])) if obj.municipality != municipality: obj.municipality = municipality obj_has_changed = True if obj_has_changed: obj.save() syncher.mark(obj) syncher.finish() return syncher
def import_services(self): srv_list = self.pk_get('service') syncher = ModelSyncher(Service.objects.exclude(pk__in=SERVICE_IDS_TO_SKIP), lambda obj: obj.id) self.detect_duplicate_services(srv_list) additional_root_services = [ { 'name_fi': 'Asuminen ja kaupunkiympäristö', 'name_en': 'Housing and urban environment', 'name_sv': 'Boende och stadsmiljö', 'id': 50000 }, { 'name_fi': 'Työ, talous ja hallinto', 'name_en': 'Employment, economy and administration', 'name_sv': 'Arbete, ekonomi och förvaltning', 'id': 50001 }, { 'name_fi': 'Kulttuuri, liikunta ja vapaa-aika', 'name_en': 'Culture, sports and leisure', 'name_sv': 'Kultur, motion och fritid', 'id': 50002 }, { 'name_fi': 'Liikenne ja kartat', 'name_en': 'Traffic and maps', 'name_sv': 'Trafik och kartor', 'id': 50003 }, ] service_to_new_root = { 25298: 50000, 25142: 50000, 26098: 50001, 26300: 50001, 26244: 50001, 25622: 50002, 28128: 50002, 25954: 50002, 25554: 50003, 25476: 50003 } dupes = [] def handle_service(d): if d['id'] in SERVICE_IDS_TO_SKIP: return obj = syncher.get(d['id']) if not obj: obj = Service(id=d['id']) obj._changed = True self._save_translated_field(obj, 'name', d, 'name') if 'identical_to' in d: master = syncher.get(d['identical_to']) # If the master entry hasn't been saved yet, we save the # duplicate information later. if not master: dupes.append((obj, d['identical_to'])) d['identical_to'] = None else: d['identical_to'] = None self._set_field(obj, 'identical_to_id', d['identical_to']) new_root = service_to_new_root.get(d['id']) if new_root: d['parent_id'] = new_root if 'parent_id' in d: parent = syncher.get(d['parent_id']) assert parent else: parent = None if obj.parent != parent: obj.parent = parent obj._changed = True self._sync_searchwords(obj, d) if obj._changed: obj.unit_count = obj.get_unit_count() obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() self.services_changed = True syncher.mark(obj) for d in additional_root_services: handle_service(d) for d in srv_list: handle_service(d) for obj, master_id in dupes: obj.identical_to_id = master_id obj.save(update_fields=['identical_to']) syncher.finish()
def import_services(self): srv_list = self.pk_get('service') syncher = ModelSyncher( Service.objects.exclude(pk__in=SERVICE_IDS_TO_SKIP), lambda obj: obj.id) self.detect_duplicate_services(srv_list) additional_root_services = [ { 'name_fi': 'Asuminen ja kaupunkiympäristö', 'name_en': 'Housing and urban environment', 'name_sv': 'Boende och stadsmiljö', 'id': 50000 }, { 'name_fi': 'Työ, talous ja hallinto', 'name_en': 'Employment, economy and administration', 'name_sv': 'Arbete, ekonomi och förvaltning', 'id': 50001 }, { 'name_fi': 'Kulttuuri, liikunta ja vapaa-aika', 'name_en': 'Culture, sports and leisure', 'name_sv': 'Kultur, motion och fritid', 'id': 50002 }, { 'name_fi': 'Liikenne ja kartat', 'name_en': 'Traffic and maps', 'name_sv': 'Trafik och kartor', 'id': 50003 }, ] service_to_new_root = { 25298: 50000, 25142: 50000, 26098: 50001, 26300: 50001, 26244: 50001, 25622: 50002, 28128: 50002, 25954: 50002, 25554: 50003, 25476: 50003 } dupes = [] def handle_service(d): if d['id'] in SERVICE_IDS_TO_SKIP: return obj = syncher.get(d['id']) if not obj: obj = Service(id=d['id']) obj._changed = True self._save_translated_field(obj, 'name', d, 'name') if 'identical_to' in d: master = syncher.get(d['identical_to']) # If the master entry hasn't been saved yet, we save the # duplicate information later. if not master: dupes.append((obj, d['identical_to'])) d['identical_to'] = None else: d['identical_to'] = None self._set_field(obj, 'identical_to_id', d['identical_to']) new_root = service_to_new_root.get(d['id']) if new_root: d['parent_id'] = new_root if 'parent_id' in d: parent = syncher.get(d['parent_id']) assert parent else: parent = None if obj.parent != parent: obj.parent = parent obj._changed = True self._sync_searchwords(obj, d) if obj._changed: obj.unit_count = obj.get_unit_count() obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() self.services_changed = True syncher.mark(obj) for d in additional_root_services: handle_service(d) for d in srv_list: handle_service(d) for obj, master_id in dupes: obj.identical_to_id = master_id obj.save(update_fields=['identical_to']) syncher.finish()
class UnitImporter: unitsyncher = ModelSyncher(Unit.objects.all(), lambda obj: obj.id) def __init__(self, logger=None, importer=None): self.logger = logger self.importer = importer def import_units(self): units = get_turku_resource("palvelupisteet") for unit in units: self._handle_unit(unit) self.unitsyncher.finish() update_service_node_counts() remove_empty_service_nodes(self.logger) def _handle_unit(self, unit_data): unit_id = int(unit_data["koodi"]) state = unit_data["tila"].get("koodi") if state != "1": self.logger.debug('Skipping service point "{}" state "{}".'.format( unit_id, state)) return obj = self.unitsyncher.get(unit_id) if not obj: obj = Unit(id=unit_id) obj._changed = True self._handle_root_fields(obj, unit_data) self._handle_location(obj, unit_data) self._handle_extra_info(obj, unit_data) self._handle_ptv_id(obj, unit_data) self._handle_service_descriptions(obj, unit_data) self._save_object(obj) self._handle_opening_hours(obj, unit_data) self._handle_email_and_phone_numbers(obj, unit_data) self._handle_services_and_service_nodes(obj, unit_data) self._handle_accessibility_shortcomings(obj) self._save_object(obj) self.unitsyncher.mark(obj) def _save_object(self, obj): if obj._changed: obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() if self.importer: self.importer.services_changed = True def _handle_root_fields(self, obj, unit_data): self._update_fields(obj, unit_data, ROOT_FIELD_MAPPING) def _handle_location(self, obj, unit_data): location_data = unit_data.get("fyysinenPaikka") location = None if location_data: latitude = location_data.get("leveysaste") longitude = location_data.get("pituusaste") if latitude and longitude: point = Point(float(longitude), float(latitude), srid=SOURCE_DATA_SRID) if point.within(BOUNDING_BOX): point.transform(settings.DEFAULT_SRID) location = point set_syncher_object_field(obj, "location", location) if not location_data: return address_data_list = location_data.get("osoitteet") if address_data_list: # TODO what if there are multiple addresses address_data = address_data_list[0] full_postal_address = {} street = {"fi": address_data.get("katuosoite_fi")} zip = address_data.get("postinumero") post_office_fi = address_data.get("postitoimipaikka_fi") full_postal_address["fi"] = "{} {} {}".format( street["fi"], zip, post_office_fi) for language in ("sv", "en"): street[language] = (address_data.get( "katuosoite_{}".format(language)) or street["fi"]) post_office = (address_data.get( "postitoimipaikka_{}".format(language)) or post_office_fi) full_postal_address[language] = "{} {} {}".format( street[language], zip, post_office) set_syncher_tku_translated_field(obj, "address_postal_full", full_postal_address) set_syncher_tku_translated_field(obj, "street_address", street) set_syncher_object_field(obj, "address_zip", zip) municipality = get_municipality( address_data.get("kunta", {}).get("nimi_fi")) if not municipality: municipality = get_municipality(post_office_fi) set_syncher_object_field(obj, "municipality", municipality) def _handle_extra_info(self, obj, unit_data): # TODO handle existing extra data erasing when needed location_data = unit_data.get("fyysinenPaikka") if not location_data: return for extra_info_data in location_data.get("lisatiedot", []): try: koodi = extra_info_data["lisatietotyyppi"].get("koodi") field_mapping = EXTRA_INFO_FIELD_MAPPING[koodi] except KeyError: continue self._update_fields(obj, extra_info_data, field_mapping) def _handle_ptv_id(self, obj, unit_data): ptv_id = unit_data.get("ptv_id") if ptv_id: created, _ = UnitIdentifier.objects.get_or_create(namespace="ptv", value=ptv_id, unit=obj) if created: obj._changed = True else: num_of_deleted, _ = UnitIdentifier.objects.filter( namespace="ptv", unit=obj).delete() if num_of_deleted: obj._changed = True def _handle_services_and_service_nodes(self, obj, unit_data): old_service_ids = set(obj.services.values_list("id", flat=True)) old_service_node_ids = set( obj.service_nodes.values_list("id", flat=True)) obj.services.clear() obj.service_nodes.clear() for service_offer in unit_data.get("palvelutarjoukset", []): for service_data in service_offer.get("palvelut", []): service_id = int(service_data.get("koodi")) try: service = Service.objects.get(id=service_id) except Service.DoesNotExist: # TODO fail the unit node completely here? self.logger.warning( 'Service "{}" does not exist!'.format(service_id)) continue UnitServiceDetails.objects.get_or_create(unit=obj, service=service) service_nodes = ServiceNode.objects.filter( related_services=service) obj.service_nodes.add(*service_nodes) new_service_ids = set(obj.services.values_list("id", flat=True)) new_service_node_ids = set( obj.service_nodes.values_list("id", flat=True)) if (old_service_ids != new_service_ids or old_service_node_ids != new_service_node_ids): obj._changed = True set_syncher_object_field( obj, "root_service_nodes", ",".join(str(x) for x in obj.get_root_service_nodes()), ) def _handle_accessibility_shortcomings(self, obj): description, count = AccessibilityShortcomingCalculator().calculate( obj) UnitAccessibilityShortcomings.objects.update_or_create( unit=obj, defaults={ "accessibility_shortcoming_count": count, "accessibility_description": description, }, ) def _handle_service_descriptions(self, obj, unit_data): description_data = unit_data.get("kuvaus_kieliversiot", {}) descriptions = { lang: description_data.get(lang, "") for lang in ("fi", "sv", "en") } set_syncher_tku_translated_field(obj, "description", descriptions, clean=False) def _handle_opening_hours(self, obj, unit_data): obj.connections.filter( section_type=OPENING_HOURS_SECTION_TYPE).delete() try: opening_hours_data = unit_data["fyysinenPaikka"]["aukioloajat"] except KeyError: self.logger.debug("Cannot find opening hours for unit {}".format( unit_data.get("koodi"))) return # Opening hours data will be stored in a complex structure where opening hours data is # first grouped by type and then by Finnish name / title. Inside there each data entry # localized name and description list. Example: # # { # 'normaali': { # 'Avoinna': ( # {'fi': 'Avoinna', 'sv': 'Öppna', 'en': 'Open' }, # ['fi': 'ma-pe 10:00-12:00', 'sv': 'mån-fre 10:00-12:00', 'en': 'Mon-Fri 10:00-12:00' }] # ), # '10.10.2020 Avoinna': ( # {'fi': ... }, # ['fi': ... } # ) # }, # 'erityinen': { # ... # } all_opening_hours = defaultdict(OrderedDict) for opening_hours_datum in sorted( opening_hours_data, key=lambda x: x.get("voimassaoloAlkamishetki")): opening_hours_type = opening_hours_datum["aukiolotyyppi"] start = parse_date(opening_hours_datum["voimassaoloAlkamishetki"]) end = parse_date(opening_hours_datum["voimassaoloPaattymishetki"]) today = date.today() if start and start < today and end and end < today: continue opening_time = self._format_time( opening_hours_datum["avaamisaika"]) closing_time = self._format_time( opening_hours_datum["sulkemisaika"]) if (not opening_time and not closing_time and not opening_hours_type == EXCEPTION_CLOSED): continue names = self._generate_name_for_opening_hours(opening_hours_datum) weekday = opening_hours_datum["viikonpaiva"] opening_hours_value = {} for language in LANGUAGES: weekday_str = "–".join([ get_weekday_str(int(wd), language) if wd else "" for wd in weekday.split("-") ]) if opening_hours_type == EXCEPTION_CLOSED: opening_hours_value[language] = " ".join( (weekday_str, CLOSED_STR[language])) else: opening_hours_value[language] = "{} {}–{}".format( weekday_str, opening_time, closing_time) # map exception open and exception closed to the same slot to get them # sorted by start dates rather than first all open and then all closed if EXCEPTION in opening_hours_type: opening_hours_type = EXCEPTION # append new opening hours name and value to the complex structure all_of_type = all_opening_hours.get(opening_hours_type, {}) data = all_of_type.get(names["fi"], ()) if not data: data = (names, [opening_hours_value]) else: if opening_hours_value not in data[1]: data[1].append(opening_hours_value) all_opening_hours[opening_hours_type][names["fi"]] = data index = 0 for opening_hours_type in (NORMAL, NORMAL_EXTRA, SPECIAL, EXCEPTION): for description, value in all_opening_hours[ opening_hours_type].items(): names = {} for language in LANGUAGES: first_part = value[0][language] if opening_hours_type in (NORMAL, NORMAL_EXTRA, SPECIAL): first_part = "{}".format(first_part) second_part = " ".join(v[language] for v in value[1]) names["name_{}".format(language)] = "{} {}".format( first_part, second_part) UnitConnection.objects.create( unit=obj, section_type=OPENING_HOURS_SECTION_TYPE, order=index, **names) index += 1 def _handle_email_and_phone_numbers(self, obj, unit_data): UnitConnection.objects.filter( unit=obj, section_type=PHONE_OR_EMAIL_SECTION_TYPE).delete() index = 0 email = unit_data.get("sahkoposti") if email: UnitConnection.objects.get_or_create( unit=obj, section_type=PHONE_OR_EMAIL_SECTION_TYPE, email=email, name_fi="Sähköposti", name_sv="E-post", name_en="Email", order=index, ) index += 1 phone_number_data = unit_data.get("puhelinnumerot", []) if not phone_number_data: return for phone_number_datum in phone_number_data: number_type = phone_number_datum.get("numerotyyppi") descriptions = phone_number_datum.get("kuvaus_kieliversiot", {}) type_names = { "fi": number_type.get("teksti_fi"), "sv": number_type.get("teksti_sv"), "en": number_type.get("teksti_en"), } names = { "name_{}".format(language): get_localized_value(descriptions, language) or get_localized_value(type_names, language) # NOQA for language in LANGUAGES } UnitConnection.objects.get_or_create( unit=obj, section_type=PHONE_OR_EMAIL_SECTION_TYPE, phone=self._generate_phone_number(phone_number_datum), order=index, **names) index += 1 def _generate_phone_number(self, phone_number_datum): if not phone_number_datum: return "" code = phone_number_datum["maakoodi"] number = phone_number_datum["numero"] return "+{}{}".format(code, number) if code else number def _generate_name_for_opening_hours(self, opening_hours_datum): opening_hours_type = opening_hours_datum["aukiolotyyppi"] names = defaultdict(str) for language in LANGUAGES: names[language] = get_localized_value( opening_hours_datum.get("kuvaus_kieliversiot", {}), language) # NOQA for language in LANGUAGES: if not names[language]: if opening_hours_type == SPECIAL: names[language] = SPECIAL_STR[language] elif opening_hours_type in (NORMAL, NORMAL_EXTRA): names[language] = OPEN_STR[language] start = parse_date(opening_hours_datum["voimassaoloAlkamishetki"]) end = parse_date(opening_hours_datum["voimassaoloPaattymishetki"]) if not start and not end: return names # if end < start assume it means just one day (start) if end and start and end < start: end = start for language in LANGUAGES: with translation.override(language): start_str = (formats.date_format( start, format="SHORT_DATE_FORMAT") if start else None) end_str = (formats.date_format(end, format="SHORT_DATE_FORMAT") if end else None) # shorten start date string if it has the same year and/or month as end date, # for example 5.7.2018 - 9.7.2018 becomes 5. - 9.7.2018 if (language in ("fi", "sv") and start_str and end_str and start_str != end_str): original_start_str = start_str if start.year == end.year: if start.month == end.month: start_str = "{}.".format( original_start_str.split(".")[0]) else: start_str = ".".join( original_start_str.split(".")[:-1]) if start and end: dates = ("{}–{}".format(start_str, end_str) if start != end else start_str) else: dates = start_str or end_str names[language] = ("{} {}".format(dates, names[language]) if names[language] else dates) return names def _format_time(self, time_str): if not time_str: return "" parts = time_str.split(":")[:2] parts[0] = str(int(parts[0])) return ":".join(parts) @staticmethod def _update_fields(obj, imported_data, field_mapping): for data_field, model_field in field_mapping.items(): value = imported_data.get(data_field) if data_field.endswith("_kieliversiot"): set_syncher_tku_translated_field(obj, model_field, value) else: set_syncher_object_field(obj, model_field, value)
def import_units(self): self._load_postcodes() self.muni_by_name = { muni.name_fi.lower(): muni for muni in Municipality.objects.all() } if self.existing_service_ids == None or len( self.existing_service_ids) < 1: self.existing_service_ids = set( Service.objects.values_list('id', flat=True)) if not getattr(self, 'org_syncher', None): self.import_organizations(noop=True) if not getattr(self, 'dept_syncher', None): self.import_departments(noop=True) if self.verbosity: self.logger.info("Fetching unit connections") connections = self.pk_get('connection') conn_by_unit = {} for conn in connections: unit_id = conn['unit_id'] if unit_id not in conn_by_unit: conn_by_unit[unit_id] = [] conn_by_unit[unit_id].append(conn) self.accessibility_variables = { x.id: x for x in AccessibilityVariable.objects.all() } if self.verbosity: self.logger.info("Fetching accessibility properties") acc_properties = self.pk_get('accessibility_property') acc_by_unit = {} for ap in acc_properties: unit_id = ap['unit_id'] if unit_id not in acc_by_unit: acc_by_unit[unit_id] = [] acc_by_unit[unit_id].append(ap) self.target_srid = PROJECTION_SRID self.bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) self.bounding_box.set_srid(4326) gps_srs = SpatialReference(4326) target_srs = SpatialReference(self.target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) self.bounding_box.transform(target_to_gps_ct) self.gps_to_target_ct = CoordTransform(gps_srs, target_srs) if self.options['single']: obj_id = self.options['single'] obj_list = [self.pk_get('unit', obj_id)] queryset = Unit.objects.filter(id=obj_id) else: obj_list = self._fetch_units() queryset = Unit.objects.filter( data_source='tprek').prefetch_related('services', 'keywords') syncher = ModelSyncher(queryset, lambda obj: obj.id) for idx, info in enumerate(obj_list): conn_list = conn_by_unit.get(info['id'], []) info['connections'] = conn_list acp_list = acc_by_unit.get(info['id'], []) info['accessibility_properties'] = acp_list self._import_unit(syncher, info) syncher.finish()
def import_departments(noop=False, logger=None, fetch_resource=pk_get): obj_list = fetch_resource("department") syncher = ModelSyncher(Department.objects.all(), lambda obj: str(obj.uuid)) # self.dept_syncher = syncher if noop: return syncher for d in sorted(obj_list, key=lambda x: x["hierarchy_level"]): obj = syncher.get(d["id"]) obj_has_changed = False if not obj: obj = Department(uuid=d["id"]) obj_has_changed = True fields = ("phone", "address_zip", "oid", "organization_type", "business_id") fields_that_need_translation = ( "name", "abbr", "street_address", "address_city", "address_postal_full", "www", ) obj.uuid = d["id"] for field in fields: if d.get(field): if d[field] != getattr(obj, field): obj_has_changed = True setattr(obj, field, d.get(field)) parent_id = d.get("parent_id") if parent_id != obj.parent_id: obj_has_changed = True if parent_id is None: obj.parent_id = None else: try: parent = Department.objects.get(uuid=parent_id) obj.parent_id = parent.id except Department.DoesNotExist: logger and logger.error( "Department import: no parent with uuid {} found for {}" .format(parent_id, d["id"])) for field in fields_that_need_translation: if save_translated_field(obj, field, d, field): obj_has_changed = True muni_code = d.get("municipality_code") if muni_code is None: municipality = None if muni_code is not None: try: municipality = Municipality.objects.get( division__origin_id=str(muni_code)) except Municipality.DoesNotExist: logger and logger.error( "No municipality with code {} for department {}".format( muni_code, d["id"])) if obj.municipality != municipality: obj.municipality = municipality obj_has_changed = True if obj_has_changed: obj.save() syncher.mark(obj) syncher.finish() return syncher
def _import_unit_accessibility_properties(self): property_syncher = ModelSyncher( UnitAccessibilityProperty.objects.all(), lambda obj: obj.id) num_of_imports = 0 # For caching unit ids that are not present in the database unit_skip_list = set([]) accessibility_properties = get_ar_servicepoint_accessibility_resource( "properties") for accessibility_property in accessibility_properties: # Make sure that we have all the necessary property attributes ptv_id = accessibility_property.get("servicePointId") accessibility_variable_id = accessibility_property.get( "variableId") accessibility_variable_value = accessibility_property.get("value") if not (ptv_id and accessibility_variable_id and accessibility_variable_value): continue # No need to check further if the unit has already been marked as non-existing if ptv_id in unit_skip_list: continue # Make sure that the unit exists try: # TODO: Optimize this if it gets too slow # One way is to get all unit ids in one go and make a lookup table unit_identifier = UnitIdentifier.objects.get(namespace="ptv", value=ptv_id) unit = unit_identifier.unit except UnitIdentifier.DoesNotExist: self.logger.info( "Unit {} does not exist, skipping".format(ptv_id)) unit_skip_list.add(ptv_id) continue # Make sure that the variable exists if accessibility_variable_id not in self._accessibility_variables: self.logger.info("No variable {}, skipping".format(ptv_id)) continue # Create or update the property including its associated value uap, created = UnitAccessibilityProperty.objects.update_or_create( unit=unit, variable_id=accessibility_variable_id, defaults={"value": accessibility_variable_value}, ) # If an entry was updated if not created: # Mark it as synced sync_uap = property_syncher.get(uap.id) if sync_uap: property_syncher.mark(sync_uap) if created: num_of_imports += 1 property_syncher.finish() self.logger.info( "Imported {} accessibility properties.".format(num_of_imports))
def import_units(dept_syncher=None, fetch_only_id=None, verbosity=True, logger=None, fetch_units=_fetch_units, fetch_resource=pk_get): global VERBOSITY, LOGGER, EXISTING_SERVICE_NODE_IDS, EXISTING_SERVICE_IDS EXISTING_SERVICE_NODE_IDS = None EXISTING_SERVICE_IDS = None VERBOSITY = verbosity LOGGER = logger keyword_handler = KeywordHandler( verbosity=verbosity, logger=logger) if VERBOSITY and not LOGGER: LOGGER = logging.getLogger(__name__) muni_by_name = {muni.name_fi.lower(): muni for muni in Municipality.objects.all()} if not dept_syncher: dept_syncher = import_departments(noop=True) department_id_to_uuid = dict(((k, str(v)) for k, v in Department.objects.all().values_list('id', 'uuid'))) VERBOSITY and LOGGER.info("Fetching unit connections %s" % dept_syncher) connections = fetch_resource('connection') conn_by_unit = defaultdict(list) for conn in connections: unit_id = conn['unit_id'] conn_by_unit[unit_id].append(conn) VERBOSITY and LOGGER.info("Fetching accessibility properties") # acc_properties = self.fetch_resource('accessibility_property', v3=True) acc_properties = fetch_resource('accessibility_property') acc_by_unit = defaultdict(list) for ap in acc_properties: unit_id = ap['unit_id'] acc_by_unit[unit_id].append(ap) VERBOSITY and LOGGER.info("Fetching ontologyword details") details = fetch_resource('ontologyword_details') ontologyword_details_by_unit = defaultdict(list) for detail in details: unit_id = detail['unit_id'] ontologyword_details_by_unit[unit_id].append(detail) target_srid = PROJECTION_SRID bounding_box = Polygon.from_bbox(settings.BOUNDING_BOX) bounding_box.set_srid(4326) gps_srs = SpatialReference(4326) target_srs = SpatialReference(target_srid) target_to_gps_ct = CoordTransform(target_srs, gps_srs) bounding_box.transform(target_to_gps_ct) gps_to_target_ct = CoordTransform(gps_srs, target_srs) if fetch_only_id: obj_id = fetch_only_id obj_list = [fetch_resource('unit', obj_id, params={'official': 'yes'})] queryset = Unit.objects.filter(id=obj_id) else: obj_list = fetch_units() queryset = Unit.objects.all().prefetch_related( 'services', 'keywords', 'service_details') syncher = ModelSyncher(queryset, lambda obj: obj.id) for idx, info in enumerate(obj_list): uid = info['id'] info['connections'] = conn_by_unit.get(uid, []) info['accessibility_properties'] = acc_by_unit.get(uid, []) info['service_details'] = ontologyword_details_by_unit.get(uid, []) _import_unit(syncher, keyword_handler, info.copy(), dept_syncher, muni_by_name, bounding_box, gps_to_target_ct, target_srid, department_id_to_uuid) syncher.finish() return dept_syncher, syncher
def import_services(syncher=None, noop=False, logger=None, importer=None, ontologytrees=pk_get('ontologytree'), ontologywords=pk_get('ontologyword')): nodesyncher = ModelSyncher(ServiceNode.objects.all(), lambda obj: obj.id) servicesyncher = ModelSyncher(Service.objects.all(), lambda obj: obj.id) def save_object(obj): if obj._changed: obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() if importer: importer.services_changed = True def _build_servicetree(ontologytrees): tree = [ot for ot in ontologytrees if not ot.get('parent_id')] for parent_ot in tree: _add_ot_children(parent_ot, ontologytrees) return tree def _add_ot_children(parent_ot, ontologytrees): parent_ot['children'] = [ot for ot in ontologytrees if ot.get('parent_id') == parent_ot['id']] for child_ot in parent_ot['children']: _add_ot_children(child_ot, ontologytrees) def handle_service_node(d, keyword_handler): obj = nodesyncher.get(d['id']) if not obj: obj = ServiceNode(id=d['id']) obj._changed = True if save_translated_field(obj, 'name', d, 'name'): obj._changed = True if 'parent_id' in d: parent = nodesyncher.get(d['parent_id']) assert parent else: parent = None if obj.parent != parent: obj.parent = parent obj._changed = True related_services_changed = False if obj.service_reference != d.get('ontologyword_reference', None): obj.service_reference = d.get('ontologyword_reference') related_services_changed = True obj._changed = True save_object(obj) obj._changed = keyword_handler.sync_searchwords(obj, d, obj._changed) save_object(obj) nodesyncher.mark(obj) if ((related_services_changed or obj.related_services.count() == 0) and obj.service_reference is not None): related_service_ids = set( (id for id in SERVICE_REFERENCE_SEPARATOR.split(obj.service_reference))) obj.related_services.set(related_service_ids) for child_node in d['children']: handle_service_node(child_node, keyword_handler) def handle_service(d, keyword_handler): obj = servicesyncher.get(d['id']) if not obj: obj = Service(id=d['id']) obj._changed = True obj._changed |= save_translated_field(obj, 'name', d, 'ontologyword') period_enabled = d['can_add_schoolyear'] clarification_enabled = d['can_add_clarification'] obj._changed |= period_enabled != obj.period_enabled obj._changed |= clarification_enabled != obj.clarification_enabled obj.period_enabled = period_enabled obj.clarification_enabled = clarification_enabled obj._changed = keyword_handler.sync_searchwords(obj, d, obj._changed) if obj._changed: obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() if importer: importer.services_changed = True servicesyncher.mark(obj) return obj tree = _build_servicetree(ontologytrees) keyword_handler = KeywordHandler(logger=logger) for d in tree: handle_service_node(d, keyword_handler) nodesyncher.finish() for d in ontologywords: handle_service(d, keyword_handler) servicesyncher.finish()
class ServiceImporter: nodesyncher = ModelSyncher(ServiceNode.objects.all(), lambda obj: obj.id) servicesyncher = ModelSyncher(Service.objects.all(), lambda obj: obj.id) def __init__(self, logger=None, importer=None): self.logger = logger self.importer = importer def import_services(self): keyword_handler = KeywordHandler(logger=self.logger) self._import_services(keyword_handler) self._import_service_nodes(keyword_handler) def _import_service_nodes(self, keyword_handler): service_classes = get_turku_resource("palveluluokat") tree = self._build_servicetree(service_classes) for parent_node in tree: if parent_node["koodi"] in BLACKLISTED_SERVICE_NODES: continue self._handle_service_node(parent_node, keyword_handler) self.nodesyncher.finish() def _import_services(self, keyword_handler): services = get_turku_resource("palvelut") for service in services: self._handle_service(service, keyword_handler) self.servicesyncher.finish() def _save_object(self, obj): if obj._changed: obj.last_modified_time = datetime.now(UTC_TIMEZONE) obj.save() if self.importer: self.importer.services_changed = True def _build_servicetree(self, service_classes): tree = [ s_cls for s_cls in service_classes if "ylatason_koodi" not in s_cls ] for parent in tree: self._add_service_tree_children(parent, service_classes) return tree def _add_service_tree_children(self, parent_classes, service_classes): parent_classes["children"] = [ s_cls for s_cls in service_classes if convert_code_to_int(s_cls.get("ylatason_koodi")) == convert_code_to_int(parent_classes["koodi"]) ] for child_ot in parent_classes["children"]: self._add_service_tree_children(child_ot, service_classes) def _handle_service_node(self, node, keyword_handler): node_id = convert_code_to_int(node["koodi"]) obj = self.nodesyncher.get(node_id) if not obj: obj = ServiceNode(id=node_id) obj._changed = True if "nimi_kieliversiot" in node: set_syncher_tku_translated_field(obj, "name", node.get("nimi_kieliversiot")) else: name = node.get("nimi") set_syncher_object_field(obj, "name", name) set_syncher_object_field(obj, "name_fi", name) if "ylatason_koodi" in node: parent_id = convert_code_to_int(node["ylatason_koodi"]) parent = self.nodesyncher.get(parent_id) assert parent else: parent = None if obj.parent != parent: obj.parent = parent obj._changed = True self._save_object(obj) if not node["koodi"].startswith(SERVICE_AS_SERVICE_NODE_PREFIX): self._handle_related_services(obj, node) else: set_syncher_object_field(obj, "service_reference", convert_code_to_int(node["koodi"])) self.nodesyncher.mark(obj) for child_node in node["children"]: self._handle_service_node(child_node, keyword_handler) def _handle_related_services(self, obj, node): old_service_ids = set(obj.related_services.values_list("id", flat=True)) obj.related_services.clear() for service_data in node.get("palvelut", []): service_id = int(service_data.get("koodi")) try: service = Service.objects.get(id=service_id) except Service.DoesNotExist: # TODO fail the service node completely here? self.logger.warning( 'Service "{}" does not exist!'.format(service_id)) continue obj.related_services.add(service) new_service_ids = set(obj.related_services.values_list("id", flat=True)) if old_service_ids != new_service_ids: obj._changed = True def _handle_service(self, service, keyword_handler): koodi = int( service["koodi"] ) # Cast to int as koodi should always be a stringified integer obj = self.servicesyncher.get(koodi) if not obj: obj = Service(id=koodi, clarification_enabled=False, period_enabled=False) obj._changed = True set_syncher_tku_translated_field(obj, "name", service.get("nimi_kieliversiot")) obj._changed = keyword_handler.sync_searchwords( obj, service, obj._changed) self._save_object(obj) self.servicesyncher.mark(obj)