def get_protocols_per_category_data(): categories_qs = Attack.objects.values('category').distinct() categories = [qe['category'].strip() for qe in categories_qs] categories = sorted(list(set(categories))) protocols_qs = Attack.objects.values('protocol').distinct() protocols = [qe['protocol'].strip().upper() for qe in protocols_qs] protocols = sorted(list(set(protocols))) categ_proto_qs = Attack.objects.annotate( p_category=Trim('category'), p_protocol=Upper(Trim('protocol')))\ .values('p_category', 'p_protocol')\ .annotate(counter=Count('protocol')) categ_proto = {category: dict.fromkeys(protocols, 0) for category in categories} for qe in categ_proto_qs: categ_proto[qe['p_category']][qe['p_protocol']] += qe['counter'] series = [] for category in categories: element = {} element['name'] = category element['data'] = [] for protocol in protocols: element['data'].append(categ_proto[category][protocol]) series.append(element) return protocols, series
def get_individuals_affected_data(): years_qs = Breach.objects.annotate(year=ExtractYear('date_created'))\ .values('year').distinct() years = sorted([qe['year'] for qe in years_qs]) breach_types_qs = Breach.objects.values('breach_type').distinct() breach_types = [breach_type.strip() for qe in breach_types_qs for breach_type in qe['breach_type'].split(',')] breach_types = sorted(list(set(breach_types))) indiv_affec_qs = Breach.objects.annotate( p_breach_type=Trim('breach_type'), p_year=ExtractYear('date_created'),)\ .values('p_breach_type', 'p_year')\ .annotate(total=Sum('individuals_affected')) indiv_affec = {breach_type: dict.fromkeys(years, 0) for breach_type in breach_types} for qe in indiv_affec_qs: for breach_type in qe['p_breach_type'].split(','): indiv_affec[breach_type.strip()][qe['p_year']] += qe['total'] series = [] for breach_type in breach_types: element = {} element['name'] = breach_type element['data'] = [] for year in years: element['data'].append(indiv_affec[breach_type][year]) series.append(element) return years, series
def tag_count(blog_user, topn=0): # TODO Move to model manager raw_tags = (Blog.blog.filter( user=blog_user).order_by("tag").values("tag").annotate( count=Count("tag"), tag_new=Trim(Lower("tag")))) count_tags = dict() # TODO Split by tags with "," and those without for record in raw_tags: for tag in record["tag_new"].split(","): k = tag.strip() if len(k) > 0: count_tags[k] = count_tags.get(k, 0) + record["count"] # TODO Sort by value (desc) and then key (ascend) for common values if topn == 0: return { k: count_tags[k] for k in sorted(count_tags, key=count_tags.get, reverse=True) } else: return { k: count_tags[k] for k in sorted(count_tags, key=count_tags.get, reverse=True) [:topn] }
def get_location_breached_info_data(): loc_bre_info_qs = Breach.objects.annotate( p_location=Lower( Trim('location_of_breached_info')))\ .values('p_location')\ .annotate( total=Count('location_of_breached_info')) loc_bre_info_keys = [ location.strip().capitalize() for qe in loc_bre_info_qs for location in qe['p_location'].split(',')] loc_bre_info_keys = list(set(loc_bre_info_keys)) loc_bre_info = {key: 0 for key in loc_bre_info_keys} for qe in loc_bre_info_qs: for key in qe['p_location'].split(','): loc_bre_info[key.strip().capitalize()] += qe['total'] series = { 'name': 'Brands', 'colorByPoint': True, 'data': [] } for key in sorted(loc_bre_info_keys): series['data'].append({ 'name': key, 'y': loc_bre_info[key] }) return [series]
def node(self, n): self.nodes_current += 1 # TODO: add 'shop' in n.tags also, more info in https://github.com/gravitystorm/openstreetmap-carto/blob/96c64fa5b0449e79c17e39626f3b8f38c96a12bb/project.mml#L1504 if 'amenity' in n.tags and 'name' in n.tags and len(n.tags['name']) > 2: try: point = Point([float(n.location.x) / 10000000, float(n.location.y) / 10000000], srid=4326) # this is a little slow, but it uses indexes :) # TODO: improve this by using in-memory pandas queries like we did with the 'cross' q = Recorrido.objects \ .order_by() \ .annotate(cond=RawSQL("ST_Intersects(ST_Buffer(%s::geography, 400, 2)::geometry, ruta)", (point.ewkb,), output_field=BooleanField())) \ .filter(cond=True) \ .only('id') \ .exists() if q: defaults = { 'tags': {k:v for k,v in n.tags}, 'nom': n.tags['name'][:200], 'nom_normal': Substr(Trim(Upper(Unaccent(Value(n.tags['name'])))), 1, 200), 'latlng': point, 'country_code': king['country_code'], } Poi.objects.update_or_create( osm_id=n.id, osm_type='n', defaults=defaults ) self.nodes_added += 1 print(f'[{self.nodes_current*100/nodes_count:7.3f}%] / Nodes added: {self.nodes_added} | processed: {self.nodes_current} / {nodes_count}') except Exception as e: print(f'Could not save, exception {e}')
def yesno_annotate(queryset, field): return queryset.annotate(lowered=Lower(Trim(field))).annotate( yesno=Case( When(lowered='yes', then=Value('yes')), default=Value('no'), output_field=CharField(), ), )
def test_expressions(self): author = Author.objects.annotate(backward=Reverse(Trim('name'))).get( pk=self.john.pk) self.assertEqual(author.backward, self.john.name[::-1]) with register_lookup(CharField, Reverse), register_lookup(CharField, Length): authors = Author.objects.all() self.assertCountEqual(authors.filter(name__reverse__length__gt=7), [self.john, self.elena]) self.assertCountEqual(authors.exclude(name__reverse__length__gt=7), [self.python])
def test_trim(self): Author.objects.create(name=' John ', alias='j') Author.objects.create(name='Rhonda', alias='r') authors = Author.objects.annotate( ltrim=LTrim('name'), rtrim=RTrim('name'), trim=Trim('name'), ) self.assertQuerysetEqual(authors.order_by('alias'), [ ('John ', ' John', 'John'), ('Rhonda', 'Rhonda', 'Rhonda'), ], lambda a: (a.ltrim, a.rtrim, a.trim))
def natural_sort(qs, field): return qs.annotate( ns_len=Length(field), ns_split_index=StrIndex(field, Value(' ')), ns_suffix=Trim( Substr(field, F('ns_split_index'), output_field=CharField())), ).annotate(ns_code=Trim( Substr(field, Value(1), 'ns_split_index', output_field=CharField())), ns_weight=Case( When(ns_split_index=0, then=Value(0)), default=Case(When(ns_suffix__regex=r'^\d+$', then=Cast( Substr(field, F('ns_split_index'), output_field=CharField()), output_field=IntegerField(), )), default=Value(1230), output_field=IntegerField()), output_field=IntegerField(), )).order_by('ns_code', 'ns_weight', 'ns_len', field)
def get_queryset(self): """ """ current_user = self.request.user # Todo: guard this API so only admin or scheduler can call it. querying_attendee_id = self.kwargs.get('pk') querying_term = self.request.query_params.get('searchValue') if querying_attendee_id: qs = Attendee.objects.annotate( organization_slug=F('division__organization__slug'), attendingmeets= JSONBAgg( # used by datagrid_assembly_data_attendees.js & datagrid_attendee_update_view.js Func( Value('attending_id'), 'attendings__id', Value('registration_assembly'), 'attendings__registration__assembly__display_name', Value('registrant'), Trim( Concat( Trim( Concat( 'attendings__registration__registrant__first_name', Value(' '), 'attendings__registration__registrant__last_name' )), Value(' '), Trim( Concat( 'attendings__registration__registrant__last_name2', 'attendings__registration__registrant__first_name2' )))), function='jsonb_build_object'), ), # contacts=ArrayAgg('attendings__meets__slug', distinct=True), ).filter(division__organization=current_user.organization, pk=querying_attendee_id) elif querying_term: qs = Attendee.objects.filter(infos__icontains=querying_term, ) return qs.filter(division__organization=current_user.organization)
def get_vulnerabilities_score_data(): vuln_score_qs = Vulnerability.objects.annotate( p_name=Trim('name'))\ .values('p_name')\ .annotate(average=Avg('score')) series = {} series['name'] = "Vulnerabilities" series['colorByPoint'] = True series['data'] = [{'name': qe['p_name'], 'y': qe['average']} for qe in vuln_score_qs] return [series]
def populate_user_name_field(apps, field_name, model_class): """ Populate related user's full name to equivalent char field prefixed with single underscore. The char field is expected to be in the same table as the foreign key. """ User = apps.get_model('users', 'User') char_field_name = '_%s' % field_name full_name = (User.objects.filter(pk=OuterRef(field_name)).annotate( full_name=Trim(Concat('first_name', V(' '), 'last_name'))).values_list( 'full_name')[:1]) model_class.objects.exclude(**{ field_name: None }).update(**{char_field_name: Subquery(full_name)})
def test_trim(self): Author.objects.create(name=" John ", alias="j") Author.objects.create(name="Rhonda", alias="r") authors = Author.objects.annotate( ltrim=LTrim("name"), rtrim=RTrim("name"), trim=Trim("name"), ) self.assertQuerysetEqual( authors.order_by("alias"), [ ("John ", " John", "John"), ("Rhonda", "Rhonda", "Rhonda"), ], lambda a: (a.ltrim, a.rtrim, a.trim), )
class DematSumRankView(ListView): # model = DematSum # if pagination is desired # paginate_by = 300 # amfi_qset = Amfi.objects.filter(comp_isin=OuterRef('pk')) # queryset = DematSum.objects.annotate(comp_rank=Subquery(amfi_qset.values('comp_rank'))).order_by('comp_rank') # queryset = DematSum.objects.annotate(comp_rank=Subquery(amfi_qset.values('comp_rank'))) amfi_qs = Amfi.objects.filter(comp_isin=OuterRef("isin_code_id")) queryset = DematSum.objects.all(). \ annotate(comp_rank=Subquery(amfi_qs.values('comp_rank')[:1])). \ annotate(cap_type=Lower(Trim(Subquery(amfi_qs.values('cap_type')[:1])))). \ values('stock_symbol', 'comp_name', 'value_cost', 'comp_rank', 'cap_type'). \ order_by('comp_rank') def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) return context
def post(self, request, *args, **kwargs): messages_error = [] messages_info = [] imported_ids = [] counter_records_updated = 0 counter_records_created = 0 filter_type = 'iexact' # get advanced options language = request.POST.get('language', translation.get_supported_language_variant( settings.LANGUAGE_CODE)) start_row = int(request.POST.get('start_row', config.START_ROW)) header_row = int(request.POST.get('header_row', config.HEADER_ROW)) template = request.POST.get('template', config.DEFAULT_TEMPLATE) date_format = request.POST.get('date_format', formats.get_format("SHORT_DATE_FORMAT")) # read excel file and sheet sheet = request.POST.get('sheet', _('data')) tmp_excel = request.POST.get('excel_file') excel_file = default_storage.open('{}/{}'.format('tmp', tmp_excel)) uploaded_wb = load_workbook(excel_file) uploaded_ws = uploaded_wb[sheet] # check headers template_obj = Template.objects.get(id=template) headers = {cell.value: cell.col_idx - 1 for cell in uploaded_ws[header_row]} mapping = getattr(template_obj, __('mapping', language)) # map headers to columns for model, fields in mapping.items(): for field_name, field_data in fields.items(): column_header = field_data['name'] mapping[model][field_name]['column'] = headers[column_header] # create log event content = "Excel import using '{}' and template '{}'".format(tmp_excel, template) log = Log.objects.create(module='excel import', user=request.user.username, content=content) # import uploaded_ws.delete_rows(0, amount=start_row - 1) for row in uploaded_ws.iter_rows(): # skips empty row, based on first 3 values if xstr(row[0].value) == '' and xstr(row[1].value) == '' and xstr(row[2].value) == '': continue # quick data validation error_message = validate_data(row, mapping) if error_message: messages_error.append(error_message) continue # create or update contact model = Contact model_fields = mapping['contact'] row_dict = {} row_dict['source_id'] = 'excel' for field_name, field_data in model_fields.items(): value = row[field_data['column']].value if value: if model._meta.get_field(field_name).get_internal_type() == 'PointField': (lat, lng) = value.split(' ')[:2] value = Point(float(lng), float(lat)) if model._meta.get_field(field_name).get_internal_type() == 'CharField': value = str(value) if model._meta.get_field(field_name).get_internal_type() == 'DateField': if not row[field_data['column']].is_date: value = parse_date(value, date_format) else: value = None # removes extra spaces if string if isinstance(value, str): value = xstr(value) row_dict[field_name] = value # there are two ways to look up a contact: name+doc and firstname+lastname+doc if {'name', 'document'} <= set(model_fields): contact = Contact.objects.filter(name__iexact=row_dict['name'], document__iexact=row_dict['document)']).first() elif {'first_name', 'last_name', 'document'} <= set(model_fields): name = "{} {}".format(xstr(row_dict['first_name']), xstr(row_dict['last_name'])) row_dict['name'] = xstr(name) contact = Contact.objects.filter(Q(name__iexact=row_dict['name'], document__iexact=row_dict['document']) | Q(first_name__iexact=row_dict['name'], last_name__iexact=row_dict['last_name'], document__iexact=row_dict['document'])).first() else: raise Exception('Mapping needs more contact data fields') # create new organization if needed contact_organization = Organization.objects.filter( name__iexact=row_dict['organization']).first() if not contact_organization and row_dict['organization']: contact_organization = Organization.objects.filter( varname__iexact=row_dict['organization']).first() if not contact_organization and row_dict['organization']: messages_info.append('Create organization: {}'.format(row_dict['organization'])) contact_organization = Organization() contact_organization.name = row_dict['organization'] contact_organization.created_user = request.user.username contact_organization.log = log contact_organization.save() # create contact if needed if not contact: messages_info.append('Create contact: {}'.format(contact)) contact = Contact() if contact_organization: contact.organization = contact_organization counter_records_created += 1 else: messages_info.append('Update contact: {}'.format(contact)) counter_records_updated += 1 row_dict['log'] = log update_contact(request, contact, row_dict) imported_ids.append(contact.id) # create or update project contact model = ProjectContact model_fields = mapping['project_contact'] row_dict = {} row_dict['source_id'] = 'excel' for field_name, field_data in model_fields.items(): value = row[field_data['column']].value # removes extra spaces if string if isinstance(value, str): value = xstr(value) if value: if model._meta.get_field(field_name).get_internal_type() == 'ForeignKey': related_model = model._meta.get_field(field_name).related_model value = try_to_find(related_model, value) if model._meta.get_field(field_name).get_internal_type() == 'DateField': if not row[field_data['column']].is_date: value = parse_date(value, date_format) else: value = None row_dict[field_name] = value subproject = row_dict['subproject'] project_contact = ProjectContact.objects.filter(subproject=subproject, contact=contact).first() if not project_contact: messages_info.append('Create participant: {} {}'.format(subproject, contact)) project_contact = ProjectContact() project_contact.contact = contact else: messages_info.append('Update participant: {} {}'.format(subproject, contact)) row_dict['log'] = log update_project_contact(request, project_contact, row_dict) # gets dupes contacts = Contact.objects.all() if self.request.user: contacts = contacts.for_user(self.request.user) contacts = contacts.annotate( name_uc=Trim(Upper(RegexpReplace(F('name'), r'\s+', ' ', 'g')))) # manually (python) counts dupes, because count messed up the distinct() filter names = {} for row in contacts: if row.name_uc not in names: names[row.name_uc] = 0 names[row.name_uc] += 1 names_uc = list(k_v[0] for k_v in names.items() if k_v[1] > 1) contacts_names_ids = contacts.values_list('id', flat=True).filter(name_uc__in=names_uc) # manually (python) counts dupes, because count messed up the distinct() filter contacts = contacts.filter(document__isnull=False).exclude(document='') docs = {} for row in contacts: if row.document not in docs: docs[row.document] = 0 docs[row.document] += 1 documents = list(k_v[0] for k_v in docs.items() if k_v[1] > 1) contacts = Contact.objects.filter(id__in=imported_ids) \ .filter(Q(id__in=contacts_names_ids) | Q(document__in=documents)) \ .values(contact_id=F('id'), contact_name=Coalesce('name', Value('')), contact_sex=Coalesce('sex_id', Value('')), contact_document=Coalesce('document', Value('')), contact_organization=Coalesce('organization__name', Value('')), ) context = {} context['excel_file'] = tmp_excel context['messages_error'] = messages_error context['messages_info'] = messages_info context['quantity_records_updated'] = counter_records_updated context['quantity_records_created'] = counter_records_created context['model'] = list(contacts) return render(request, self.template_name, context)
def handle(self, *args, **options): groups = Kontgrp.objects.filter( kont__kurs__in=(1, 2, 3), kont__id__in=Ownres.objects.filter(ownerid__in=( 9, 18, 21, 17, 16, 24, 13, 7, 12, 10, 41, 8, 20, 11, 14, 43, 28, 36, 35, 34, 40, 32, 26, 31, 29, 42, 27, 38, 30, 33, )).values("objid"), ).order_by("title") raspis = Raspis.objects.annotate( kont_id=F("raspnagr__kontid"), prep=Trim(F("raspnagr__prep__short_name")), ).filter( Q(aud__in=Auditory.objects.filter( Q(korp=19) | Q(id__in=[489, 413, 367, 369, 64, 634])).values("id")) | Q(aud__in=AudList.objects.filter( aud__in=[489, 413, 367, 369, 64, 634]).values("auds")), raspnagr__kontid__in=groups.values("id"), ).order_by("raspnagr__kontid__title", "everyweek", "day", "para") raspis = { kont_id: list(items) for kont_id, items in groupby(raspis, lambda x: x.kont_id) } document = Document() auditories = {i.id: i for i in Auditory.objects.all()} auditories_list = {i.id: i for i in AudSps.objects.all()} for group in groups: raspis_items = raspis.get(group.id, []) if raspis_items: p = document.add_paragraph() r = p.add_run(group.title) r.font.bold = True for item in raspis_items: every_week = "еженедельная" if item.everyweek == 2 else "нечетная" if item.day <= 7 else "четная" day = { 1: "понедельник", 2: "вторник", 3: "среда", 4: "четверг", 5: "пятница", 6: "суббота", }.get(item.day % 7) para = "{}-ая пара".format(item.para) aud_title = auditories.get(item.aud, auditories_list.get( item.aud)).obozn p.add_run("\n{}".format(" ".join([ str(aud_title.strip()), str(every_week), str(day), str(para), str(item.prep), ]))) else: print(group.title)
def _handle(self, *args, **options): king = kings[options['king']] run_timestamp = datetime.now() inputfile = f'/tmp/osm-{king["name"]}.pbf' if options['download']: self.out1(f'Descargando mapa de {king["name"]} de geofabrik') # url = 'http://download.geofabrik.de/south-america-latest.osm.pbf' url = king['url'] self.out2(url) f, d = request.urlretrieve(url, inputfile, lambda nb, bs, fs, url=url: self.reporthook(nb, bs, fs, url)) if 'clip_country' in king and king['clip_country'] == True: self.out1('Clip big pbf file to the country adminarea') self.out2('get full country admin area polygon') KING_ADMIN_AREA = get_admin_area(inputfile, king['id'], self.out2) self.out2('make poly file for osmconvert') filename = make_poly_file(KING_ADMIN_AREA['geometry'].buffer(0.001)) self.out2(f'generated temp poly file at {filename}') self.out2('run osmconvert to clip pbf file') # result = subprocess.run(['osmconvert', inputfile, f'-B={filename}', '--complete-ways', '--complete-multipolygons', f'-o={inputfile}-cropped.pbf'], stdout=subprocess.PIPE) result = subprocess.run(['osmium', 'extract', f'-p{filename}', inputfile, f'-o{inputfile}-cropped.pbf'], stdout=subprocess.PIPE) inputfile = f'{inputfile}-cropped.pbf' self.out2(f'pbf clipped at {inputfile}') ####################### # Adminareas de osm # ####################### # 1. ./manage.py update_osm --download --admin_areas # 2. ./manage.py update_osm -f /tmp/part-all.o5m --admin_areas if options['admin_areas']: self.out1('Admin Areas') KING_ID = king['id'] # osm_id king OLD_KING = list(AdministrativeArea.objects.filter(osm_id=KING_ID)) admin_areas, KING = get_admin_areas(run_timestamp, inputfile, king['country_code'], KING_ID, self.out2) KING_GEOM_BUFF = KING['geometry_simple'].buffer(0.01) def fuzzy_contains(out_geom, in_geom, buffer=0, attempts=3): try: return ( out_geom.intersects(in_geom) and # optimization out_geom.buffer(buffer).contains(in_geom) ) except GEOSException as e: if attempts > 0: self.out2(f''' out_geom.valid: {out_geom.valid} out_geom.valid_reason: {out_geom.valid_reason} in_geom.valid: {in_geom.valid} in_geom.valid_reason: {in_geom.valid_reason} ''') return fuzzy_contains(maybe_make_valid(out_geom), maybe_make_valid(in_geom), buffer, attempts - 1) else: raise def get_parent_aa(node, geometry): try: if ( node['data']['osm_id'] is KING_ID or fuzzy_contains(node['data']['geometry_simple'], geometry, 0.01) ): parent_aa = None for child in node['children']: parent_aa = get_parent_aa(child, geometry) if parent_aa is not None: break if parent_aa is None: return node else: return parent_aa else: return None except Exception: # print('node.geometry', node['data']['geometry']) print('node.data', node['data']['name']) print('node.osm_id', node['data']['osm_id']) print('node.osm_type', node['data']['osm_type']) # traceback.print_exc() raise tree = { 'children': [], 'data': { 'import_timestamp': run_timestamp, 'geometry': KING['geometry_simple'], 'geometry_simple': KING['geometry_simple'], 'osm_id': KING['osm_id'], 'osm_type': KING['osm_type'], 'name': KING['name'], 'tags': KING['tags'], 'country_code': king['country_code'], } } for li in admin_areas: # aa = admin area for aa in li: try: if not aa['geometry'].intersects(KING_GEOM_BUFF): continue except GEOSException as e: self.out2(f'{str(e)}\n{aa["osm_id"]} {aa["name"]}') try: parent_aa = get_parent_aa(tree, aa['geometry']) aa.pop('admin_level') if 'ways' in aa: aa.pop('ways') else: self.out2(f" {aa['osm_id']}: {aa['name']}, does not have 'ways' attribute") if parent_aa is None: tree['children'].append({'children': [], 'data': aa}) else: parent_aa['children'].append({'children': [], 'data': aa}) except GEOSException as e: self.out2(f'{str(e)}\n{tree["data"]["osm_id"]} {tree["data"]["name"]}\n{aa["osm_id"]} {aa["name"]}') def print_tree(node, level=0): print(f'{" " * level} {level} {node["data"]["name"].decode("utf-8")}') for node in node['children']: print_tree(node, level + 1) # print_tree(tree) AdministrativeArea.load_bulk([tree]) for K in OLD_KING: K.delete() # fix invalid geometries # TODO: I think these should be makeValid(ated) earlier in the process, not here but ASAP # that way we would avoid some issues around intersections that fail earlier in the process of creating the adminareas tree # the makevalid function is only available in postgis (is not in a library like GEOS) # in ~4000 shapes we had 10 not valid, so we can use something like `if not geom.valid: cursor.exec('SELECT ST_MAKEVALID(POLYGON('WKT text here'));')` AdministrativeArea.objects.filter(geometry_simple__isvalid=False).update(geometry_simple=MakeValid(F('geometry_simple'))) AdministrativeArea.objects.filter(geometry__isvalid=False).update(geometry_simple=MakeValid(F('geometry'))) ####################### # recorridos de osm # ####################### if options['cross']: crs = {'init': 'epsg:4326'} self.out1('Cross osm recorridos') self.out2('Obteniendo bus routes de osm planet_osm_line') bus_routes = gpd.read_postgis( """ # esto cambiarlo para no usar mas planet_osm_line (osm2pgsql), usar osmosis para construir las bus_routes # SELECT # @osm_id AS osm_id, -- @=modulus operator # name, # ref, # st_linemerge(st_union(way)) AS way # FROM # planet_osm_line # WHERE # route = 'bus' # GROUP BY # osm_id, # name, # ref """, connection, geom_col='way', crs=crs ) bus_routes.set_index('osm_id', inplace=True) self.out2('Creando geodataframe') bus_routes_buffer = gpd.GeoDataFrame({ 'osm_id': bus_routes.index, 'way': bus_routes.way, 'way_buffer_40': bus_routes.way.buffer(0.0004), 'way_buffer_40_simplify': bus_routes.way.simplify(0.0001).buffer(0.0004), 'name': bus_routes.name }, crs=crs).set_geometry('way_buffer_40_simplify') self.out2('Obteniendo recorridos de cualbondi core_recorridos') core_recorrido = gpd.read_postgis( """ SELECT cr.id, cr.nombre, cr.linea_id, cr.ruta, cl.nombre AS linea_nombre FROM core_recorrido cr JOIN core_linea cl ON (cr.linea_id = cl.id) -- JOIN catastro_ciudad_recorridos ccr ON (ccr.recorrido_id = cr.id) --WHERE -- ccr.ciudad_id = 1 ; """, connection, geom_col='ruta', crs=crs ) core_recorrido.set_index('id', inplace=True) self.out2('Creando geodataframe') core_recorrido_buffer = gpd.GeoDataFrame({ 'id': core_recorrido.index, 'ruta': core_recorrido.ruta.simplify(0.0001), 'ruta_buffer_40_simplify': core_recorrido.ruta.simplify(0.0001).buffer(0.0004), 'nombre': core_recorrido.nombre, 'linea_id': core_recorrido.linea_id, }, crs=crs).set_geometry('ruta') self.out2('Generando intersecciones') intersections = gpd.sjoin(core_recorrido_buffer, bus_routes_buffer, how='inner', op='intersects') self.out2('Copiando indice, id') intersections['id'] = intersections.index self.out2('Copiando indice, osm_id') intersections['osm_id'] = intersections.index_right self.out2('Drop indice, osm_id') intersections.drop('index_right', inplace=True, axis=1) self.out2('Generando match [id, osm_id]') intersections = intersections[['id', 'osm_id']] self.out2('Generando indice de match [id, osm_id]') intersections.index = range(len(intersections)) self.out2('Generando way_buffer_40_simplify') way_buffer_40_simplify = gpd.GeoSeries( bus_routes_buffer.loc[intersections.osm_id].way_buffer_40_simplify.values, crs=crs) self.out2('Generando ruta_buffer_40_simplify') ruta_buffer_40_simplify = gpd.GeoSeries( core_recorrido_buffer.loc[intersections.id].ruta_buffer_40_simplify.values, crs=crs) self.out2('Generando symmetric_difference') diffs = ruta_buffer_40_simplify.symmetric_difference(way_buffer_40_simplify).area.values self.out2('Generando norm_factor') norm_factor = ruta_buffer_40_simplify.area.values + way_buffer_40_simplify.area.values self.out2('Generando diffs') diffs = (diffs / norm_factor).tolist() self.out2('Pasando osm_ids a lista') osm_ids = intersections.osm_id.values.tolist() self.out2('Pasando osm_names a lista') osm_names = bus_routes.loc[osm_ids].name.values.tolist() # ways = bus_routes.loc[osm_ids].way.map(lambda x: x.wkb).values.tolist() self.out2('Pasando recorrido_ids de intersections a lista') recorrido_ids = intersections['id'].values.tolist() self.out2('Pasando linea_ids a lista') linea_ids = core_recorrido.loc[recorrido_ids].linea_id.values.tolist() # rutas = core_recorrido.loc[recorrido_ids].ruta.map(lambda x: x.wkb).values.tolist() self.out2('Pasando recorrido_nombres a lista') recorrido_nombres = core_recorrido.loc[recorrido_ids].nombre.values.tolist() # ruta_buffer_40_simplifys = ruta_buffer_40_simplify.map(lambda x: x.wkb).values.tolist() # way_buffer_40_simplifys = way_buffer_40_simplify.map(lambda x: x.wkb).values.tolist() self.out2('Pasando linea_nombres a lista') linea_nombres = core_recorrido.loc[recorrido_ids].linea_nombre.values.tolist() self.out2('DROP TABLE crossed_areas') cu = connection.cursor() cu.execute("DROP TABLE IF EXISTS crossed_areas;") cu.execute('DROP INDEX IF EXISTS crossed_areas_recorrido_id;') cu.execute('DROP INDEX IF EXISTS crossed_areas_area;') self.out2('CREATE TABLE crossed_areas') cu.execute( """ CREATE TABLE crossed_areas ( area FLOAT, linea_id INTEGER, recorrido_id INTEGER, osm_id BIGINT, linea_nombre VARCHAR(100), recorrido_nombre VARCHAR(100), osm_name TEXT ); """ ) self.out2('Preparando lista de values') data = list(zip(diffs, linea_ids, recorrido_ids, osm_ids, linea_nombres, recorrido_nombres, osm_names)) self.out2('Ejecutando insert query') insert_query = """ INSERT INTO crossed_areas ( area, linea_id, recorrido_id, osm_id, linea_nombre, recorrido_nombre, osm_name ) VALUES %s """ execute_values(cu, insert_query, data) self.out2('Commit insert query') connection.commit() self.out2('Generando indice crossed_areas_recorrido_id') cu.execute('CREATE INDEX crossed_areas_recorrido_id ON crossed_areas (recorrido_id);') cu.execute('CREATE INDEX crossed_areas_area ON crossed_areas (area);') self.out2('LISTO!') if options['update_routes']: # TODO: consider also trains / trams / things that have fixed stops self.out1('UPDATE ROUTES FROM OSM') self.out2('process .osm input file') # we can see if all tags are ok and give hints to mappers according to spec: # https://wiki.openstreetmap.org/w/index.php?oldid=625726 # https://wiki.openstreetmap.org/wiki/Buses p = pyosmptparser.Parser(inputfile) pts = p.get_public_transports(500) routetypes_stops = ['train', 'subway', 'monorail', 'tram', 'light_rail'] routetypes = routetypes_stops + ['bus', 'trolleybus'] buses = {} counters = {} for pt in pts: self.out2(pt.id, end=': ') buses[pt.id] = { 'pt': pt, 'way': LineString(pt.geometry[0]) if pt.status.code < 500 and len(pt.geometry) == 1 else None, 'paradas_completas': (pt.tags['route'] in routetypes_stops or len(pt.stops) > 20) and king['paradas_completas'], } counters.setdefault(pt.status.code, 0) counters[pt.status.code] += 1 self.out2('{}: {} > {}'.format(pt.status.code, pt.status.detail, pt.tags['name'], start='')) self.out2('status | count') for key, counter in sorted(counters.items(), key=lambda e: e[1], reverse=True): self.out2('{} | {}'.format(key, counter)) # HINT: run migrations in order to have osmbot in the db user_bot_osm = get_user_model().objects.get(username='******') self.out2('fixer routine') # add all as new routes if options['add_routes']: for bus_osm_id, bus in buses.items(): # try to fix way, returns None if it can't way = bus['way'] # recorrido proposed creation checks if bus['way'] is None: self.out2('{} : SKIP {}'.format(bus['pt'].id, bus['pt'].status.code)) continue # set proposal fields # rp.paradas_completas = len(bus['stops']) > options['paradas_completas_threshold'] o usar una config en el pais KINGs rp = RecorridoProposed(nombre=bus['pt'].tags['name'][:199]) rp.osm_id = bus['pt'].id rp.ruta = bus['way'] rp.ruta_last_updated = datetime.utcfromtimestamp(int(bus['pt'].info['timestamp'])).replace(tzinfo=pytz.utc) rp.osm_version = int(bus['pt'].info['version']) rp.import_timestamp = run_timestamp rp.paradas_completas = bus['paradas_completas'] rp.type = bus['pt'].tags['route'] rp.king = king['id'] rp.country_code = king['country_code'] if not options['dry-run']: rp.save(user=user_bot_osm) self.out2('{} | AUTO ACCEPTED!'.format(bus['pt'].id)) if not options['dry-run']: rp.aprobar(user_bot_osm) # add stops! if not options['dry-run'] and len(bus['pt'].stops) > 0: self.out2(f'ADDing STOPS {len(bus["pt"].stops)}', end=' > ') count_created = 0 count_associated = 0 for s in bus['pt'].stops: parada, created = Parada.objects.update_or_create( osm_id=s.id, defaults={ 'import_timestamp': run_timestamp, 'nombre': s.tags['name'] if 'name' in s.tags else f'{s.lon}, {s.lat}', 'latlng': Point(s.lon, s.lat), 'tags': s.tags, 'country_code': king['country_code'], } ) if created: count_created = count_created + 1 horario, created = Horario.objects.update_or_create( recorrido=rp.recorrido, parada=parada, ) if created: count_associated = count_associated + 1 self.out2(f'CREATED STOPS {count_created}, ASSOCIATED {count_associated}') else: for rec in Recorrido.objects.filter(osm_id__isnull=False, ruta__intersects=AdministrativeArea.objects.get(osm_id=king['id']).geometry): # try to fix way, returns None if it can't adminareas_str = ', '.join(AdministrativeArea.objects.filter(geometry__intersects=rec.ruta).order_by('depth').values_list('name', flat=True)) osm_id = rec.osm_id if osm_id in buses: way = buses[osm_id]['way'] status = buses[osm_id]['pt'].status.code osm_osm_version = buses[osm_id]['pt'].info['version'] osm_last_updated = buses[osm_id]['pt'].info['timestamp'] name = buses[osm_id]['pt'].tags['name'] paradas_completas = buses[osm_id]['paradas_completas'] routetype = buses[osm_id]['pt'].tags['route'] else: way = None status = None osm_osm_version = -1 osm_last_updated = None name = None paradas_completas = None routetype = None ilog = ImporterLog( osm_id=osm_id, osm_version=osm_osm_version, osm_timestamp=osm_last_updated, run_timestamp=run_timestamp, proposed=False, accepted=False, status=status, proposed_reason='', accepted_reason='', osm_administrative=adminareas_str, osm_name=name, type=routetype, king=king['name'], ) ilog.save() # recorrido proposed creation checks if way is None: self.out2('{} | {} : SKIP {}'.format(rec.id, osm_id, status)) ilog.proposed_reason = 'broken' ilog.save() continue if rec.ruta_last_updated >= osm_last_updated: self.out2('{} | {} : SKIP, older than current recorrido {}, ({} >= {})'.format(rec.id, osm_id, status, rec.ruta_last_updated, osm_last_updated)) ilog.proposed_reason = 'older than current recorrido' ilog.save() continue # check if there is another proposal already submitted (with same timestamp or greater) if RecorridoProposed.objects.filter(osm_id=osm_id, parent=rec.uuid, ruta_last_updated__gte=osm_last_updated).exists(): self.out2('{} | {} : SKIP, older than prev proposal {}'.format(rec.id, osm_id, status)) ilog.proposed_reason = 'older than previous proposal' ilog.save() continue # update previous proposal if any previous_proposals = RecorridoProposed.objects.filter( osm_id=osm_id, parent=rec.uuid, ruta_last_updated__lt=osm_last_updated, logmoderacion__newStatus='E' ).order_by('-ruta_last_updated') if len(previous_proposals) > 0: proposal_info = 'UPDATE prev proposal' rp = previous_proposals[0] # else create a new proposal else: proposal_info = 'NEW prev proposal' rp = RecorridoProposed.from_recorrido(rec) # set proposal fields rp.ruta = way rp.ruta_last_updated = osm_last_updated rp.osm_version = osm_osm_version # to not be confsed with Recorrido.osm_version rp.import_timestamp = run_timestamp rp.paradas_completas = paradas_completas if paradas_completas is not None else king['paradas_completas'] rp.type = routetype if not options['dry-run']: rp.save(user=user_bot_osm) ilog.proposed = True ilog.save() # AUTO ACCEPT CHECKS if rec.osm_version is None: self.out2('{} | {} : {} | NOT auto accepted: previous accepted proposal does not come from osm'.format(rec.id, osm_id, proposal_info)) ilog.accepted_reason = 'previous accepted proposal does not come from osm' ilog.save() continue if RecorridoProposed.objects.filter(parent=rec.uuid).count() > 1: self.out2('{} | {} : {} | NOT auto accepted: another not accepted recorridoproposed exists for this recorrido'.format(rec.id, osm_id, proposal_info)) ilog.accepted_reason = 'another not accepted proposal exists for this recorrido' ilog.save() continue self.out2('{} | {} : {} | AUTO ACCEPTED!'.format(rec.id, osm_id, proposal_info)) if not options['dry-run']: rp.aprobar(user_bot_osm) ilog.accepted = True ilog.save() # # TODO: think how to do "stops" change proposals # el recorrido podria tener una lista de paradas, una lista de latlongs nada mas # cambiar una parada es cambiar el recorrido tambien. El problema es que las paradas se comparten # ####################### # POIs de osm # ####################### if options['pois']: self.out2('Eliminando indices viejos de la base de datos') cu = connection.cursor() # cu.execute('DROP INDEX IF EXISTS catastrocalle_nomnormal_gin;') cu.execute('DROP INDEX IF EXISTS catastropoi_nomnormal_gin;') self.out2('counting') result = subprocess.run(f'osmium fileinfo -g data.count.nodes -e {inputfile}'.split(' '), stdout=subprocess.PIPE) nodes_count = int(result.stdout) self.out2(f'end counting = {nodes_count}') class Unaccent(Func): function = 'UNACCENT' arity = 1 class RegexpReplace(Func): function = 'REGEXP_REPLACE' arity = 3 class POIsHandler(osmium.SimpleHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.nodes_current = 0 self.nodes_added = 0 # # TODO: necesito las calles por ahora? son muchas en espana # def way(self, w): # if 'highway' in w.tags and 'route' not in w.tags and 'name' in w.tags: # points = [] # for node in w.nodes: # points.append([float(node.x) / 10000000, float(node.y) / 10000000]) # linestring = LineString(points, srid=4326) # if Recorrido.objects.filter(ruta__intersects=linestring).exists(): # print('|', end='') # Calle.objects.update_or_create( # osm_id=w.id, # defaults={ # 'nom': w.tags['name'][:200], # 'nom_normal': Substr(Trim(RegexpReplace(Upper(Unaccent(Value(w.tags['name']))), r'AV\.|AVENIDA|CALLE|DIAGONAL|BOULEVARD', '')), 1, 199), # 'way': linestring, # } # ) def node(self, n): self.nodes_current += 1 # TODO: add 'shop' in n.tags also, more info in https://github.com/gravitystorm/openstreetmap-carto/blob/96c64fa5b0449e79c17e39626f3b8f38c96a12bb/project.mml#L1504 if 'amenity' in n.tags and 'name' in n.tags and len(n.tags['name']) > 2: try: point = Point([float(n.location.x) / 10000000, float(n.location.y) / 10000000], srid=4326) # this is a little slow, but it uses indexes :) # TODO: improve this by using in-memory pandas queries like we did with the 'cross' q = Recorrido.objects \ .order_by() \ .annotate(cond=RawSQL("ST_Intersects(ST_Buffer(%s::geography, 400, 2)::geometry, ruta)", (point.ewkb,), output_field=BooleanField())) \ .filter(cond=True) \ .only('id') \ .exists() if q: defaults = { 'tags': {k:v for k,v in n.tags}, 'nom': n.tags['name'][:200], 'nom_normal': Substr(Trim(Upper(Unaccent(Value(n.tags['name'])))), 1, 200), 'latlng': point, 'country_code': king['country_code'], } Poi.objects.update_or_create( osm_id=n.id, osm_type='n', defaults=defaults ) self.nodes_added += 1 print(f'[{self.nodes_current*100/nodes_count:7.3f}%] / Nodes added: {self.nodes_added} | processed: {self.nodes_current} / {nodes_count}') except Exception as e: print(f'Could not save, exception {e}') self.out2('POIS from ways and nodes with osmosis') h = POIsHandler() with transaction.atomic(): h.apply_file(inputfile, locations=True) self.out2('POIS from AdministrativeAreas in database') adminareas = AdministrativeArea.objects.get(osm_id=king['id']).get_descendants() adminareascount = adminareas.count() i = 0 for aa in adminareas: i = i + 1 self.out2(f' [{i*100/adminareascount:7.3f}%] {aa.name}') Poi.objects.update_or_create( osm_id=aa.osm_id, osm_type=aa.osm_type, defaults={ 'tags': aa.tags, 'nom': aa.name, 'nom_normal': Substr(Trim(Upper(Unaccent(Value(aa.name)))), 1, 200), 'latlng': aa.geometry.centroid, 'country_code': king['country_code'], } ) self.out2('Generando slugs') total = Poi.objects.filter(slug__isnull=True).count() i = 0 start = time.time() for o in Poi.objects.filter(slug__isnull=True): o.save() i = i + 1 if i % 50 == 0 and time.time() - start > 1: start = time.time() self.out2('{}/{} ({:2.0f}%)'.format(i, total, i * 100.0 / total)) # unir catastro_poicb (13 y 60, 13 y 66, 13 y 44) con catastro_poi (osm_pois) # self.out2('Mergeando POIs propios de cualbondi') # for poicb in Poicb.objects.all(): # Poi.objects.create(nom_normal = poicb.nom_normal.upper(), nom = poicb.nom, latlng = poicb.latlng) # self.out2('Purgando nombres repetidos') # cu.execute('delete from catastro_poi where id not in (select min(id) from catastro_poi group by nom_normal)') self.out1('Regenerando indices') # self.out2('Generando catastro_calle') # cu.execute('CREATE INDEX catastrocalle_nomnormal_gin ON catastro_calle USING gin (nom_normal gin_trgm_ops);') self.out2('Generando catastro_poi') cu.execute('CREATE INDEX catastropoi_nomnormal_gin ON catastro_poi USING gin (nom_normal gin_trgm_ops);') ########################## # Intersections de osm # ########################## if options['intersections']: self.out1('Generando Intersecciones') cu = connection.cursor() cu.execute('delete from catastro_interseccion') cu.execute(''' SELECT SEL1.nom || ' y ' || SEL2.nom as nom, upper(translate(SEL1.nom || ' y ' || SEL2.nom, 'áéíóúÁÉÍÓÚäëïöüÄËÏÖÜñÑàèìòùÀÈÌÒÙ', 'AEIOUAEIOUAEIOUAEIOUNNAEIOUAEIOU')) as nom_normal, ST_Intersection(SEL1.way, SEL2.way) as latlng FROM catastro_calle AS SEL1 join catastro_calle as SEL2 on (ST_Intersects(SEL1.way, SEL2.way) and ST_GeometryType(ST_Intersection(SEL1.way, SEL2.way):: Geometry)='ST_Point' ) ''') self.out2('Generando slugs') intersections = cu.fetchall() total = len(intersections) i = 0 for inter in intersections: i = i + 1 Interseccion.objects.create(nom=inter[0], nom_normal=inter[1], latlng=inter[2]) if (i * 100.0 / total) % 1 == 0: self.out2('{:2.0f}%'.format(i * 100.0 / total)) # self.out1('Eliminando tablas no usadas') # cu.execute('drop table planet_osm_roads;') # cu.execute('drop table planet_osm_polygon;') # cx.commit() # cx.close() self.out1('LISTO!')