def handle(self, *args, **options): print "Downloading second level ids" ids = [] i = 0 for line in open(os.path.join(settings.PROJECT_PATH, 'data', 'regions.txt')): region_id, name = line.strip().split(' ', 1) for option in HtmlXPathSelector(text=read_url(URL+region_id)) \ .select("//table[@width='100%' and @cellspacing='2' and @cellpadding='5']//tr[2]//option"): id = option.select("@value").extract()[0] for option1 in HtmlXPathSelector(text=read_url(URL+id)) \ .select("//table[@width='100%' and @cellspacing='2' and @cellpadding='5']//tr[3]//option"): ids.append(option1.select("@value").extract()[0]) i += 1 print_progress(i, 80) print "Downloading locations hierarchy" i = 0 data = {} for id in set(ids): for tr in HtmlXPathSelector(text=read_url(URL+id)).select("//table[@class='list']/tr")[1:]: okato_id = tr.select(".//td[2]//b/text()").extract()[0].replace(' ', '') assert len(okato_id)==8 data[okato_id] = tr.select("./td[3]/text()").extract()[0] i += 1 print_progress(i, len(ids)) with open(os.path.join(settings.PROJECT_PATH, 'data', 'locations.json'), 'w') as f: f.write(json.dumps(data, indent=4, ensure_ascii=False).encode('utf8'))
def handle(self, *args, **options): from locations.models import Location from navigation.models import Page db_entries = {} #print "initializing static pages" #pages_data = open(os.path.join(settings.PROJECT_PATH, 'data', 'pages_data.json')).read() #data = json.loads(pages_data) #for name, html in data.iteritems(): # Page.objects.create(name=name, content=html) print "loading the regions hierarchy" data = json.loads(open(os.path.join(settings.PROJECT_PATH, 'data', 'regions-gosduma.json')).read()) i = 0 for location in iterate_struct(data, []): print_progress(i, 100) if len(location) == 1: db_entries[location[0]] = {'entry': Location.objects.create(name=location[0], **default_location_fields), 'sub': {}} elif len(location) == 2: db_entries[location[0]]['sub'][location[1]] = \ Location.objects.create(name=location[1], region=db_entries[location[0]]['entry'], **default_location_fields) elif len(location) == 3: Location.objects.create(name=location[2], region=db_entries[location[0]]['entry'], tik=db_entries[location[0]]['sub'][location[1]], **default_location_fields) i += 1 if i > 100: break # artificial break to speed up data loading
def handle(self, *args, **options): from locations.models import Location from organizations.models import Organization from protocols.models import Protocol cik = Organization.objects.get(name='cik') content_type = ContentType.objects.get_for_model(Organization) locations_processed = Protocol.objects.filter(content_type=content_type, object_id=cik.id) \ .values_list('location', flat=True) uiks_count = Location.objects.exclude(tik=None).count() j = len(locations_processed) for location in Location.objects.exclude(tik=None).exclude(id__in=locations_processed): trs = HtmlXPathSelector(text=read_url(location.results_url())) \ .select("//table[@width='100%' and @cellspacing='1' and @cellpadding='2' and @bgcolor='#ffffff']//tr") #trs = list(HtmlXPathSelector(text=read_url(location.results_url())) \ # .select("//body//table[3]//tr[4]//td//table[6]//tr")) del trs[18] assert len(trs) == 23, "incorrect number of rows" data = {} for i in range(23): data['p'+str(i+1)] = int(trs[i].select(".//b/text()").extract()[0]) data.update({'location': location, 'verified': True}) Protocol.objects.get_or_create(content_type=content_type, object_id=cik.id, protocol_id=location.id, defaults=data) print_progress(j, uiks_count) j += 1
def handle(self, *args, **options): from locations.models import Location from loginza.models import UserMap from protocols.models import Protocol from users.models import Role locations = list(Location.objects.values_list('id', 'region', 'tik')) locations_by_id = {} for loc_id, region_id, tik_id in locations: locations_by_id[loc_id] = (region_id, tik_id) inactive_ids = UserMap.objects.filter(verified=False).values_list('user', flat=True) roles = list(Role.objects.exclude(user__user__email='', user__user__is_active=False, user__in=inactive_ids).values_list('location', 'type')) # Calculate roles distribution roles_by_location = {} for loc_id, role_type in roles: roles_by_location.setdefault(loc_id, []).append(role_type) data_by_location = {} for loc_id in locations_by_id: data_by_location[loc_id] = Counter(roles_by_location.get(loc_id, [])) # Add uiks counts to tiks for loc_id in locations_by_id: region_id, tik_id = locations_by_id[loc_id] if tik_id: # only process uiks data_by_location[tik_id] += data_by_location[loc_id] # Add tiks counts to regions for loc_id in locations_by_id: region_id, tik_id = locations_by_id[loc_id] if region_id and tik_id is None: # only process tiks data_by_location[region_id] += data_by_location[loc_id] # Add cik data protocols_by_location = {} for protocol in Protocol.objects.from_cik(): protocols_by_location[protocol.location_id] = protocol for loc_id in protocols_by_location: pr = protocols_by_location[loc_id] data_by_location[loc_id].update(p9=pr.p9, p19=pr.p19, p20=pr.p20, p21=pr.p21, p22=pr.p22, p23=pr.p23) # Recalculate uiks i = 0 count = Location.objects.count() for location in Location.objects.all(): prev_value = location.data location.data = json.dumps(data_by_location[location.id]) if location.data != prev_value: location.save() i += 1 print_progress(i, count)
def handle(self, *args, **options): from locations.models import Location skip = 0 osm_dir_path = os.path.join(settings.PROJECT_PATH, 'data', 'osm') files = [data[2] for data in os.walk(osm_dir_path)][0] for filename in files: xml = fromstring(open(os.path.join(osm_dir_path, filename)).read()) i = 0 for node in xml: if node.tag != 'node': continue attrs = dict((tag.get('k'), tag.get('v')) for tag in node) if not (('addr:city' in attrs) and ('addr:street' in attrs) and \ ('addr:housenumber' in attrs) and ('ref' in attrs)): skip += 1 continue try: location = Location.objects.get(region_name=filename[:-4], name=attrs['ref']) except Location.DoesNotExist: skip += 1 continue else: location.x_coord = float(node.get('lon')) location.y_coord = float(node.get('lat')) location.address = '%s, %s %s' % (attrs['addr:city'], attrs['addr:street'], attrs['addr:housenumber']) if attrs.get('phone', ''): location.telephone = attrs.get('phone', '') location.save() print_progress(i, len(xml)) i += 1 print "skipped", skip
def handle(self, *args, **options): from locations.models import Boundary for i in range(1, 90): data = json.loads(open(os.path.join( settings.PROJECT_PATH, 'grakon', 'static', 'districts', str(i)+'s.json')).read()) for feature in data['features']: properties = feature['properties'] geometry = feature['geometry']['coordinates'][0][0] data_to_save = {'properties': properties, 'geometry': geometry} Boundary(data=json.dumps(data_to_save), x_min=min(point[0] for point in geometry), x_max=max(point[0] for point in geometry), y_min=min(point[1] for point in geometry), y_max=max(point[1] for point in geometry) ).save() print_progress(i, 90)
def handle(self, *args, **options): from locations.models import Location print "creating regions" for location in Location.objects.filter(region=None): location.save(using='default1') print "creating tiks" i = 0 uik_count = Location.objects.filter(tik=None).exclude(region=None).count() for location in Location.objects.filter(tik=None).exclude(region=None): print_progress(i, uik_count) location.save(using='default1') i += 1 print "creating uiks" i = 0 uik_count = Location.objects.exclude(tik=None).count() for location in Location.objects.exclude(tik=None): print_progress(i, uik_count) location.save(using='default1') i += 1
def handle(self, *args, **options): from locations.models import FOREIGN_CODE, FOREIGN_NAME, Location uiks = {} for line in open(os.path.join(settings.PROJECT_PATH, 'data', 'foreign_uiks.csv'), 'r'): uik_no, country_id, country_name, address = line.strip().split(',') uiks[uik_no] = {'tik': int(country_id), 'address': address} countries_by_id = dict((location.id, location) for location in Location.objects.exclude(region=None) \ .filter(tik=None).filter(region_code=FOREIGN_CODE)) foreign_countries = Location.objects.get(region=None, region_code=FOREIGN_CODE) i = 0 for uik_option in HtmlXPathSelector(text=read_url(FOREIGN_UIKS_URL)) \ .select("//select[@name='gs']//option"): uik_no = uik_option.select("text()").extract()[0].strip()[:4] if uik_no not in uiks: print uik_no continue url = uik_option.select("@value").extract()[0] for param in url.split('?')[1].split('&'): param_name, param_value = param.split('=') if param_name in ('root', 'tvd'): uiks[uik_no][param_name] = int(param_value) location = Location(region=foreign_countries, tik=countries_by_id[uiks[uik_no]['tik']], name=uik_no, region_name=FOREIGN_NAME, region_code=FOREIGN_CODE, address=uiks[uik_no]['address'], tvd=uiks[uik_no]['tvd'], root=uiks[uik_no]['root'], data='{}') location.save() i += 1 print_progress(i, 350)
def handle(self, *args, **options): from locations.models import Location data = {} path = [] txt = open(os.path.join(settings.PROJECT_PATH, 'data', 'struct.txt')).read() \ .decode("utf-8-sig") for line in txt.splitlines(): level = (len(line)-len(line.lstrip())) / 4 loc = tuple(line.strip().split('|')) if len(path) > level: path = path[:level] if len(path) == level: dt = data for p in path: dt = dt[p] dt[loc] = {} path.append(loc) else: raise ValueError('incorrect file format') #import json #with open('/home/serg/data/grakon/test.txt', 'w') as f: # f.write(json.dumps(data, indent=4, ensure_ascii=False).encode('utf8')) #locations = list(Location.objects.all()) #locations_by_okato = dict((loc.okato_id, loc) for loc in locations if loc.okato_id!='') # Get or create Russia country, created = Location.objects.get_or_create(country=None, defaults={'name': u'Россия'}) i = 0 for (region_name, region_id), region_data in data.items(): region = Location.objects.create(country=country, okato_id=region_id, name=region_name) for (district_name, district_id), district_data in region_data.iteritems(): district = Location.objects.create(country=country, region=region, okato_id=district_id, name=district_name) for location_name, location_id in district_data: Location.objects.create(country=country, region=region, district=district, okato_id=location_id, name=location_name) print_progress(i, len(data)) i += 1 """ for (name, okato_id), region_data in data.iteritems(): if region_id in locations_by_okato: region = locations_by_okato[region_id] if region.name != region_data[0]: # TODO: ask user what to do print "Mismatch:", region.name, '!=', region_data[0] else: region = Location.objects.create(country=country, okato_id=region_id, name=region_data[0]) for district_id, district_data in region_data[1].iteritems(): if district_id in locations_by_okato: district = locations_by_okato[district_id] if district.name != district_data[0]: # TODO: ask user what to do print "Mismatch:", district.name, '!=', district_data[0] else: # Manual hack because name is too long if u'Таймырский Долгано-Ненецкий район' in district_data[0]: district_data[0] = u'Таймырский Долгано-Ненецкий район' district = Location.objects.create(country=country, region=region, okato_id=district_id, name=district_data[0]) locations = [] for loc_id, loc_data in district_data[1].iteritems(): if loc_id in locations_by_okato: location = locations_by_okato[loc_id] if location.name != loc_data: # TODO: ask user what to do print "Mismatch:", location.name, '!=', loc_data else: locations.append(Location(country=country, region=region, district=district, okato_id=loc_id, name=loc_data)) Location.objects.bulk_create(locations) """
def import_uiks_file(path): data = json.loads(open(path, 'r').read().decode('utf8')) from elections.models import Election, ElectionLocation from locations.models import Location election_date = datetime.strptime(data['date'], r'%d.%m.%Y') # Create country location for the date country, created = Location.objects.get_or_create(name=u'Россия', region_code=0, date=election_date) election, created = Election.objects.get_or_create(vrn=int(data['vrn']), prver=int(data['prver']), defaults={'title': data['election_name'], 'date': election_date, 'location': country}) if not created: raise ValueError("Election has been imported already") try: region = Location.objects.get(region=None, region_code=data['region_id'], date=election_date) except Location.DoesNotExist: region = Location.objects.get(region=None, region_code=data['region_id'], date=None) region.id = None region.country = country region.date = election_date region.save() if data['vrn'][0] == '2': ElectionLocation(location=region, election=election).save() merge_ids = set(uik_data['merge_id'] for uik_data in data['merge']) tiks = Location.objects.filter(tik=None, merge_id__in=merge_ids, date=election_date).exclude(region=None) tiks_by_merge_id = dict((tik.merge_id, tik) for tik in tiks) for merge_id in merge_ids: if merge_id not in tiks_by_merge_id: tik = Location.objects.get(merge_id=merge_id, date=None) tik.id = None tik.date = election_date tik.country = country tik.region = region tik.save() tiks_by_merge_id[merge_id] = tik ElectionLocation(location=tiks_by_merge_id[merge_id], election=election).save() if data['vrn'][0] == '2': # region-level elections election.location = region else: if len(merge_ids) == 1: election.location = tiks_by_merge_id[list(merge_ids)[0]] else: election.location = region election.save() i = 0 for uik_data in data['merge']: tik = tiks_by_merge_id[uik_data['merge_id']] uik, created = Location.objects.get_or_create(tik=tik, date=election_date, name=uik_data['name'][5:], defaults={'country': region.country, 'region': region, 'region_name': region.region_name, 'region_code': region.region_code}) ElectionLocation(location=uik, election=election, tvd=int(uik_data['tvd'])).save() i += 1 print_progress(i, len(data['merge']))
def handle(self, *args, **options): from django.contrib.auth.models import User from grakon.models import Profile from links.models import Link from locations.models import Location from users.models import Contact, Role profiles_db = [] USER_COUNT = 40 print "creating users" # Create users for i in xrange(USER_COUNT): print_progress(i, USER_COUNT) is_male = choice([True, False]) if is_male: first_name, username = choice(male_names) last_name = choice(male_surnames) else: first_name, username = choice(female_names) last_name = choice(female_surnames) alphabet = 'abcdefghijklmnopqrstvuwz' while True: postfix = '_' + choice(alphabet) + choice(['_', '']) + choice(alphabet) try: user = User.objects.create(username=username+postfix, first_name=first_name, last_name=last_name) except IntegrityError: continue else: profile = user.get_profile() profile.about = u"Этот пользователь создан в тестовых целях и не является настоящим человеком" profile.save() profiles_db.append(profile) break print "creating links" locations_db = list(Location.objects.all()) for i in range(len(locations_db)): print_progress(i, len(locations_db)) for j in range(choice([1, 2])): user = choice(profiles_db) link_data = choice(links) try: Link.objects.create(location=locations_db[i], user=user, name=link_data[0], url=link_data[1]) except IntegrityError: continue print "creating contacts" for i in range(USER_COUNT): print_progress(i, USER_COUNT) Role.objects.create(location=choice(locations_db), user=profiles_db[i], type='voter') for i in range(choice([1, 2, 3])): contact = choice(profiles_db) if contact != user: try: Contact.objects.create(user=profiles_db[i], contact=contact) except IntegrityError: continue # Add superuser as a contact to a few users for profile in Profile.objects.filter(user__is_superuser=True): for i in range(4): try: Contact.objects.create(user=choice(profiles_db), contact=profile) except IntegrityError: pass if not profile.first_name and not profile.last_name: profile.first_name = profile.username
def handle(self, *args, **options): from locations.models import Location from organizations.models import Organization from protocols.models import Protocol cik = Organization.objects.get(name='cik') content_type = ContentType.objects.get_for_model(Organization) if args[0] == 'cik': protocol_queryset = Protocol.objects.from_cik() organization = cik elif args[0] == 'other': protocol_queryset = Protocol.objects.from_users().filter(verified=True) organization = Organization.objects.get(name='grakon') cik_protocols_by_location = dict((p.location_id, p) for p in Protocol.objects.from_cik()) # TODO: take average if there are few protocols from one uik # Generate CIK data for TIKs j = 0 tiks_count = Location.objects.exclude(region=None).filter(tik=None).count() for tik in Location.objects.exclude(region=None).filter(tik=None): protocols = list(protocol_queryset.filter(location__tik=tik)) data = {'location': tik, 'verified': True} for i in range(23): data['p'+str(i+1)] = sum(getattr(protocol, 'p'+str(i+1)) for protocol in protocols) # a fix to renormalize weight of protocols if args[0] == 'other': cik_protocol = cik_protocols_by_location[tik.id] if data['p10'] != 0: factor = float(cik_protocol.p10) / data['p10'] for i in range(23): data['p'+str(i+1)] = int(factor*data['p'+str(i+1)]) protocol, created = Protocol.objects.get_or_create(content_type=content_type, object_id=organization.id, protocol_id=tik.id, defaults=data) if not created: for i in range(23): setattr(protocol, 'p'+str(i+1), data['p'+str(i+1)]) protocol.save() print_progress(j, tiks_count) j += 1 # Generate CIK data for regions for region in Location.objects.filter(region=None): protocols = list(protocol_queryset.filter(location__region=region)) data = {'location': region, 'verified': True} for i in range(23): data['p'+str(i+1)] = sum(getattr(protocol, 'p'+str(i+1)) for protocol in protocols) # a fix to renormalize weight of protocols if args[0] == 'other': cik_protocol = cik_protocols_by_location[region.id] if data['p10'] != 0: factor = float(cik_protocol.p10) / data['p10'] for i in range(23): data['p'+str(i+1)] = int(factor*data['p'+str(i+1)]) protocol, created = Protocol.objects.get_or_create(content_type=content_type, object_id=organization.id, protocol_id=region.id, defaults=data) if not created: for i in range(23): setattr(protocol, 'p'+str(i+1), data['p'+str(i+1)]) protocol.save()
def handle(self, *args, **options): from locations.models import Location REGIONS = {} region_ids_path = os.path.join(settings.PROJECT_PATH, 'data', 'region_ids.txt') for line in open(region_ids_path): region_title, region_name, region_id = line.split(', ') REGIONS[region_name] = (region_title, int(region_id.strip())) i = 0 for region in REGIONS: print_progress(i, len(REGIONS)) region_location = Location.objects.get(region=None, region_name=region) # Update TIKs info_list = json.loads(open(os.path.join(settings.PROJECT_PATH, 'data', 'regions', '%s.json' % region)).read().decode('utf8')) for tik_info in info_list: try: tik = Location.objects.filter(region_name=region, tik=None).get(tvd=tik_info['tvd']) except Location.DoesNotExist: tik = location_from_info(tik_info) tik.region = region_location tik.region_name = region tik.region_code = REGIONS[region][1] tik.save() print "new tik", tik.id uiks_by_number = {} for uik in list(Location.objects.filter(tik=tik)): uiks_by_number[uik.name] = uik for uik_data in tik_info['sub']: if uik_data['name'].startswith(u'УИК №'): uik_data['name'] = uik_data['name'][5:] else: print uik_data['name'], region raise ValueError if uik_data['name'] in uiks_by_number: uik = uiks_by_number[uik_data['name']] if uik.tvd!=int(uik_data['tvd']) or uik.root!=int(uik_data['root']): #print "update uik", region, uik.tvd, uik_data['tvd'], uik_data['name'] uik.tvd = uik_data['tvd'] uik.root = uik_data['root'] uik.save() else: #print "new uik", region, uik_data['name'] uik = location_from_info(uik_data) uik.region = region_location uik.tik = tik uik.region_name = region uik.region_code = REGIONS[region][1] uik.save() # Remove UIKs number_list = [uik_data['name'] for uik_data in tik_info['sub']] for old_tik in list(Location.objects.filter(tik=tik).exclude(name__in=number_list)): print "UIK to delete", old_tik.id i += 1
def handle(self, *args, **options): REGIONS = {} region_ids_path = os.path.join(settings.PROJECT_PATH, 'data', 'region_ids.txt') for line in open(region_ids_path): region_title, region_name, region_id = line.split(', ') REGIONS[region_name] = (region_title, int(region_id.strip())) """ i = 0 for region in REGIONS: print_progress(i, len(REGIONS)) # Init regional comission info = json.loads(open(data_path(region, 'center')).read().decode('utf8')) regional_location = location_from_info(info) regional_location.name = REGIONS[region][0] regional_location.region_name = region regional_location.region_code = REGIONS[region][1] regional_location.save() # Init TIKs info_list = json.loads(open(data_path(region, 'merge')).read().decode('utf8')) for info in info_list: tik = location_from_info(info) tik.region = regional_location tik.region_name = region tik.region_code = REGIONS[region][1] tik.save() i += 1 # Init foreign countries from locations.models import FOREIGN_CODE, FOREIGN_NAME, FOREIGN_TERRITORIES foreign = location_from_info({'name': FOREIGN_TERRITORIES, 'postcode': 0, 'address': ''}) foreign.region_name = FOREIGN_NAME foreign.region_code = FOREIGN_CODE foreign.save() countries_path = os.path.join(settings.PROJECT_PATH, 'data', 'countries.txt') for country in open(countries_path): location = location_from_info({'name': country.strip(), 'postcode': 0, 'address': ''}) location.region_name = FOREIGN_NAME location.region_code = FOREIGN_CODE location.region = foreign location.save() """ # init uiks from locations.models import Location for region in REGIONS: print region region_location = Location.objects.get(region_name=region, region=None) uiks_path = os.path.join(settings.PROJECT_PATH, 'data', 'regions', region+'.json') tiks_list = json.loads(open(uiks_path).read().decode('utf8')) j = 0 for tik in tiks_list: try: tik_location = Location.objects.get(region=region_location, tvd=tik['tvd']) except Location.DoesNotExist: print region, tik['tvd'] continue print_progress(j, len(tiks_list)) for uik_data in tik['sub']: if uik_data['name'].startswith(u'УИК №'): uik_data['name'] = uik_data['name'][5:] else: print uik_data['name'], region raise ValueError uik = location_from_info(uik_data) uik.region = region_location uik.tik = tik_location uik.region_name = region uik.region_code = REGIONS[region][1] uik.save() j += 1