def main(): LOAD_ = True start = time() yplib.setUp() yplib.set_debugging(False) if LOAD_: GeoUKRSubject.objects.all().delete() yplib.get('http://en.wikipedia.org/wiki/ISO_3166-2:UA') soup = yplib.soup() tbl = soup.find('table', {'class': "wikitable sortable"}) rows = tbl.findAll('tr') for row in rows: cells = row.findAll('td') if cells: subject = GeoUKRSubject(country_iso='UA', geoname_id=0) cell = cells[1] a = cell.find('a') if a: subject.ascii_name = a.text subject.name = '' fullcode = cells[0].text.split('-') subject.code = fullcode[1] subject.save() elapsed = time() - start print "Elapsed time -->", elapsed
def main(): LOAD_ = True start = time() yplib.setUp() yplib.set_debugging(False) if LOAD_: GeoRUSSubject.objects.all().delete() yplib.get('http://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%B4%D1%8B_%D1%81%D1%83%D0%B1%D1%8A%D0%B5%D0%BA%D1%82%D0%BE%D0%B2_%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D0%B9%D1%81%D0%BA%D0%BE%D0%B9_%D0%A4%D0%B5%D0%B4%D0%B5%D1%80%D0%B0%D1%86%D0%B8%D0%B8') soup=yplib.soup() tbl = soup.find('table', {'class': "sortable standard"}) rows = tbl.findAll('tr') for row in rows: cells = row.findAll('td') print cells if cells: subject = GeoRUSSubject(country_iso='RU', geoname_id=0) cell = cells[0] a = cell.find('a') if a: subject.name = a.text subject.ascii_name = cells[1].text subject.code = cells[2].text subject.gai_code = cells[3].text subject.iso_3166_2_code = cells[4].text subject.save() elapsed = time() - start print "Elapsed time -->", elapsed
def main(): if not switch_off_status_updated(): return False LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1'))) soup=yplib.soup() a = soup.find('a', attrs={'class':"profilelink"}, text='galdor') if not a: print 'Authorization failed' return False if LOAD_CACHES: #Cach.objects.all().delete() cntr_list = [] t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(120): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort','1'), ('page', str(p)), ('in_page','100'), ('finded','1'), ('y','0'), ('x','0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) if item_list == cntr_list: break else: cntr_list = item_list check_cach_list(item_list) switch_on_status_updated() log('gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(): start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False excluded_id = [118575, 111821, 109578, 96417] all_id = [] for k in range(10): r = yplib.post2('http://www.geocaching.su/?pn=108', (('sort', '2'), ('page', str(k)), ('in_page', '1000'), ('updown', '2'))) soup = yplib.soup() a_list = soup.findAll('a', {'class': "profilelink"}) t = re.compile('\?pid=(\d+)') for a in a_list[:-1]: if a.get('onclick'): user_id = t.findall(a['onclick'])[0] #login = a.text.encode('utf8') if not (user_id in all_id) and not (user_id in excluded_id): all_id.append(user_id) check_id_list(all_id) elapsed = time() - start print "Elapsed time -->", elapsed log('upd_gcsu_cachers', 'OK')
def main(): start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False #all_updated = False t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(30): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort', '1'), ('page', str(p)), ('in_page', '1000'), ('finded', '1'), ('y', '0'), ('x', '0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) print 'count %s' % len(item_list) check_cach_list(item_list) log('upd_gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(): start = time() yplib.setUp() yplib.set_debugging(False) geosite = Geosite.objects.get(code='OCDE') countries = GeoCountry.objects.all() countries = countries.values_list('iso', flat=True) sql = """ SELECT `value` FROM variables WHERE `name`='last_ocde_updated' """ lastdate = sql2val(sql); if not lastdate: lastdate = '20000101000000' statuses = [] types = [] oc_count = 0 gc_count = 0 nc_count = 0 k = 0 uc = 0 nc = 0 for country in countries: url = 'http://opencaching.de/xml/ocxml11.php?modifiedsince=%s&cache=1&country=%s' % \ (lastdate, country) response = urllib2.urlopen(url) xml = response.read() try: root = ET.XML(xml) except Exception as e: print 'PARSING ERROR', country, e continue # session id current_session = root[0] session_id = current_session.text # count records = root[1] caches_count = int(records.get("cache") or 0) if caches_count: page_count = int(round(caches_count * 1.0 / CACHES_PER_PAGE, 0)) + 1 for p in range(page_count): page_url = 'http://www.opencaching.de/xml/ocxml11.php?sessionid=%s&file=%s' % \ (session_id, p + 1) page_response = urllib2.urlopen(page_url).read() from StringIO import StringIO zipdata = StringIO() zipdata.write(page_response) try: zf = zipfile.ZipFile(zipdata) except: continue for name in zf.namelist(): uncompressed = zf.read(name) cache_root = ET.XML(uncompressed) latitude = None longitude = None status = None created_date_str = '' for cache in cache_root.getchildren(): k += 1 if cache.tag == 'cache': the_geothing = TheGeothing() the_location = TheLocation() for param in cache: if param.tag == 'id': the_geothing.pid = param.get('id') if param.tag == 'userid': the_geothing.author = param.text if param.tag == 'name': the_geothing.name = param.text if param.tag == 'longitude': longitude = param.text if param.tag == 'latitude': latitude = param.text if param.tag == 'type': cache_type = param.get('short') the_geothing.type_code = OCDE_TYPES.get(cache_type) type_ = (param.get('id'), param.get('short')) if not type_ in types: types.append(type_) if param.tag == 'status': status = int(param.get('id') or 0) status_ = (status, param.text) if not status_ in statuses: statuses.append(status_) if param.tag == 'waypoints': the_geothing.code = param.get('oc') if the_geothing.code: oc_count += 1 gccode = param.get('gccom') if gccode: gc_count += 1 nccode = param.get('nccom') if nccode: nc_count += 1 if param.tag == 'datecreated': created_date_str = param.text parts = strptime(created_date_str, '%Y-%m-%d %H:%M:%S') dt = datetime(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5]) the_geothing.created_date = dt if latitude and longitude and status == 1: the_location.NS_degree = float(latitude) the_location.EW_degree = float(longitude) if the_geothing.code and the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: uc += update_geothing(geothing, the_geothing, the_location) or 0 else: create_new_geothing(the_geothing, the_location, geosite) nc += 1 message = 'OK. updated %s, new %s' % (uc, nc) log('map_ocde_caches', message) print message sql = """ UPDATE `variables` SET `value`='%s' WHERE `name`='last_ocde_updated' """ % ocde_timestamp() execute_query(sql) elapsed = time() - start print "Elapsed time -->", elapsed
def main(): if not switch_off_status_updated(): return False LOAD_GEOCACHERS = False LOAD_ABSENT_GEOCACHERS = False start = time() cursor = connection.cursor() cursor.execute('select * from geocacher') yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In','Log_In'), ('email', '*****@*****.**'), ('passwd','zaebalixakeryvas'), ('longterm', '1'))) soup=yplib.soup() a = soup.find('a', attrs={'class':"profilelink"}, text='galdor') if not a: print('Authorization failed') return False if LOAD_GEOCACHERS: Geocacher.objects.all().delete() cntr_list = [] all_id = [] for p in range(2500): print('page', p + 1) #if p < 0: #continue user_list = [] r = yplib.post2('http://www.geocaching.su/?pn=108', (('sort','1'), ('page', str(p)), ('in_page','100'), ('updown', '1'))) soup=yplib.soup() a_list = soup.findAll('a', {'class':"profilelink"}) t = re.compile('\?pid=(\d+)') for a in a_list[:-1]: if a.get('onclick'): #print p.findall(a['onclick']), a.text.encode('utf8') user_id = t.findall(a['onclick'])[0] login = a.text.encode('utf8') if not (user_id in all_id): user_list.append({'id': user_id, 'login': login}) all_id.append(user_id) #user_list = user_list[:-1] if user_list == cntr_list: break else: cntr_list = user_list #print len(user_list) #return check_id_list(user_list) #break #check_id_list([{'id': 15957, 'login': u'Кривич'}]) #break if LOAD_ABSENT_GEOCACHERS: pid_list = (469, 406, 1224, 4400, 11910, 4456, 13439, 7707, 8887, 3156, 8094) user_list = [{'id': pid, 'login': u''} for pid in pid_list] check_id_list(user_list) elapsed = time() - start print("Elapsed time -->", elapsed) switch_on_status_updated() log('gcsu_geocachers', 'OK')
def main(): LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) url = 'http://www.geocaching.su/rss/geokrety/api.php?interval=1y&ctypes=1,2,3,7&changed=1' f = urllib2.urlopen(url) xml = f.read() xml = xml try: sxml = ET.XML(xml) except Exception as e: print type(e) print e return cnt_new = 0 cnt_upd = 0 caches = sxml.getchildren() geosite = Geosite.objects.get(code='GC_SU') for cache in caches: if cache.tag == 'cache': the_geothing = TheGeothing() the_location = TheLocation() for tag_ in cache.getchildren(): if tag_.tag == 'code': the_geothing.code = tag_.text if tag_.tag == 'autor': the_geothing.author = tag_.text if tag_.tag == 'name': the_geothing.name = tag_.text if tag_.tag == 'position': lat_degree = float(tag_.get('lat')) the_location.NS_degree = lat_degree lon_degree = float(tag_.get('lon')) the_location.EW_degree = lon_degree if tag_.tag == 'cdate': date_str = tag_.text date_ = date_str.split('-') if len(date_) == 3: the_geothing.created_date = datetime(int(date_[0]), int(date_[1]), int(date_[2])) if the_geothing.code: p = re.compile('(\D+)(\d+)') dgs = p.findall(the_geothing.code) if dgs: code_data = dgs[0] the_geothing.pid = int(code_data[1]) the_geothing.type_code = code_data[0] if the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: cnt_upd += update_geothing(geothing, the_geothing, the_location) or 0 else: create_new_geothing(the_geothing, the_location, geosite) cnt_new += 1 message = 'OK %s/%s'%(cnt_new, cnt_upd) log('map_gcsu_caches', message) print message elapsed = time() - start print "Elapsed time -->", elapsed
def main(): #if not switch_off_status_updated(): #return False LOAD_CACHES = True LOAD_GEO_LOCATION = False start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False print 'OK' if LOAD_CACHES: r = yplib.get('http://www.geocaching.su/site/popup/selex.php') soup = yplib.soup() #print soup #html = yplib.show() chbox_list = soup.findAll('input', type='checkbox') regions = [] #print chbox_list print for chbox in chbox_list: #print chbox.get('value') v = chbox.get('value') if v and chbox.get('name', '') == 'point[]': regions.append(v) print print regions data = [ ('translit', '0'), ('fmt', 'wpt'), ('code_to_name', '1'), ('finded', '2'), ] for r in regions: data.append(('point[]', r)) print print data print r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data) soup = yplib.soup() txt = soup.text print txt return Cach.objects.all().delete() cntr_list = [] t = re.compile('\<td\>(\w\w\d+)\<\/td\>') for p in range(100): item_list = [] r = yplib.post2('http://www.geocaching.su/?pn=101', (('sort', '1'), ('page', str(p)), ('in_page', '100'), ('finded', '1'), ('y', '0'), ('x', '0'), ('updown', '1'))) html = yplib.show() code_list = t.findall(html) for code in code_list: pid = code[2:] item_list.append({'id': pid, 'code': code}) if item_list == cntr_list: break else: cntr_list = item_list check_cach_list(item_list) #check_cach_list([{'id': 2746, 'code': 'EX2746'}]) #break if LOAD_GEO_LOCATION: #.filter(pid=5408) for cach in Cach.objects.all(): lat = cach.latitude_degree lng = cach.longitude_degree if lat is not None and lng is not None: url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=ru' % ( lat, lng) print print cach.pid, url yplib.get(url) try: soup = yplib.soup() except: url = 'http://ws.geonames.org/countrySubdivision?lat=%s&lng=%s&lang=en' % ( lat, lng) yplib.get(url) soup = yplib.soup() item = soup.find('countrycode') if item: cach.country_code = item.text.encode('utf8') if soup.admincode1: cach.admin_code = soup.admincode1.text item = soup.find('code', {'type': 'FIPS10-4'}) if item: cach.code_fips10_4 = item.text item = soup.find('code', {'type': 'ISO3166-2'}) if item: cach.code_iso3166_2 = item.text item = soup.find('countryname') if item: cach.country_name = item.text.encode('cp1251') if soup.adminname1: cach.oblast_name = soup.adminname1.text.encode('cp1251') print cach.pid, cach.country_name, cach.oblast_name #print soup #print #print cach.pid cach.save() else: print cach.pid, lat, lng, cach.loc_NS, cach.loc_NS_degree, cach.loc_NS_minute, cach.loc_EW, cach.loc_EW_degree, cach.loc_EW_minute switch_on_status_updated() log('gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(): if not switch_off_status_updated(): return False LOAD_CREATED_CACHE_LOGS = False LOAD_SEEK_CACHE_LOGS = False LOAD_RECOMMEND_CACHE_LOGS = False LOAD_PHOTOALBUM_LOGS = False start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False print print 'BEGIN' if LOAD_CREATED_CACHE_LOGS: LogCreateCach.objects.all().delete() print 'delete create logs' cachers = Geocacher.objects.all() print cachers.count() t = re.compile('\?pn\=101\&cid=(\d+)') t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)') for cacher in cachers: if cacher.uid: print cacher.pid, cacher.uid url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher.uid try: yplib.get(url) except BrowserStateError: continue soup = yplib.soup() tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: #print tbl rows = tbl.findAll('tr') #print len(rows) for row in rows: cach_pid = created_date = None coauthor = False cell = row.find('td') if cell: #print cell a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text print cacher.pid, cach_pid, txt.encode('utf8') if u'(соавтор)' in txt: coauthor = True found = t1.findall(txt) if found: created_date = found[0] created_date = date_or_none(created_date) if cach_pid: the_log = LogCreateCach( author_pid=cacher.pid, cach_pid=cach_pid) the_log.created_date = created_date the_log.coauthor = coauthor the_log.save() print 'saved' if LOAD_SEEK_CACHE_LOGS: LogSeekCach.objects.all().delete() cachers = Geocacher.objects.all() t = re.compile('\?pn\=101\&cid=(\d+)') t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)') t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)') t3 = re.compile(u'оценен\s+на\s+(\d)') fh = open('cant_open_userstat.txt', 'w') for cacher in cachers: if cacher.uid: print cacher.pid, cacher.uid url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher.uid loaded = False cnter = 0 while not loaded and cnter < 100: try: yplib.get(url) soup = yplib.soup() loaded = True except BrowserStateError: cnter += 1 if not loaded: print 'cannot go to %s' % url fh.write(url) tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text found = t3.findall(txt) if found: g = found[0] grade = int_or_none(g) print cacher.pid, cach_pid, txt.encode('utf8') found = t2.findall(txt) if found: found_date = found[0] found_date = date_or_none(found_date) if cach_pid: the_log = LogSeekCach( cacher_pid=cacher.pid, cach_pid=cach_pid) the_log.found_date = found_date the_log.grade = grade the_log.save() print 'saved' fh.close() if LOAD_RECOMMEND_CACHE_LOGS: LogRecommendCach.objects.all().delete() cachers = Geocacher.objects.all() t = re.compile('\?pn\=101\&cid=(\d+)') for cacher in cachers: if cacher.uid: print cacher.pid, cacher.uid url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher.uid yplib.get(url) soup = yplib.soup() tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text print cacher.pid, cach_pid, txt.encode('utf8') if cach_pid: the_log = LogRecommendCach( cacher_pid=cacher.pid, cach_pid=cach_pid) the_log.save() print 'saved' if LOAD_PHOTOALBUM_LOGS: LogPhotoAlbum.objects.all().delete() cachers = Geocacher.objects.all() t = re.compile('showmemphotos\.php\?cid=(\d+)') for cacher in cachers: if cacher.uid: print cacher.pid, cacher.uid url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher.uid yplib.get(url) soup = yplib.soup() tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text print cacher.pid, cach_pid, txt.encode('utf8') if cach_pid: the_log = LogPhotoAlbum( cacher_pid=cacher.pid, cach_pid=cach_pid) the_log.save() print 'saved' elapsed = time() - start print "Elapsed time -->", elapsed switch_on_status_updated() log('gcsu_logs', 'OK')
def main(): start = time() yplib.setUp() yplib.set_debugging(False) geosite = Geosite.objects.get(code='OCCZ') statuses = [] types = [] oc_count = 0 k = 0 uc = 0 nc = 0 url = 'http://www.opencaching.cz/search.php?searchto=searchbydistance&showresult=1&output=XML&sort=byname&latNS=N&lat_h=50&lat_min=5.123&lonEW=E&lon_h=14&lon_min=20.123&distance=1500&unit=km&count=500&startat=0' response = urllib2.urlopen(url).read() cache_root = ET.XML(response) docinfo = cache_root.getchildren()[0] result_count = 0 for tag in docinfo.getchildren(): if tag.tag == 'results': result_count = int(tag.text or 0) if result_count: for cache in cache_root.getchildren()[1:]: latitude = None longitude = None status = None created_date_str = '' k += 1 if cache.tag == 'cache': the_geothing = TheGeothing() the_location = TheLocation() for param in cache: if param.tag == 'id': the_geothing.pid = param.text if param.tag == 'owner': the_geothing.author = param.text if param.tag == 'name': the_geothing.name = param.text if param.tag == 'lon': longitude = param.text if param.tag == 'lat': latitude = param.text if param.tag == 'type': cache_type = param.text the_geothing.type_code = OCCZ_TYPES.get(cache_type) if not cache_type in types: types.append(cache_type) if param.tag == 'status': status = param.text if not status in statuses: statuses.append(status) if param.tag == 'waypoint': the_geothing.code = param.text if the_geothing.code: oc_count += 1 if param.tag == 'hidden': created_date_str = param.text parts = strptime(created_date_str, '%d.%m.%Y') dt = datetime(parts[0], parts[1], parts[2], parts[3], parts[4], parts[5]) the_geothing.created_date = dt if latitude and longitude: the_location.NS_degree = get_degree(latitude) the_location.EW_degree = get_degree(longitude) if the_geothing.code and the_geothing.pid and \ the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: uc += update_geothing(geothing, the_geothing, the_location) or 0 else: create_new_geothing(the_geothing, the_location, geosite) nc += 1 message = 'OK. updated %s, new %s' % (uc, nc) log('map_occz_caches', message) print message print print types print statuses elapsed = time() - start print "Elapsed time -->", elapsed
def main(): LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False r = yplib.get('http://www.geocaching.su/site/popup/selex.php') soup = yplib.soup() chbox_list = soup.findAll('input', type='checkbox') regions = [] for chbox in chbox_list: v = chbox.get('value') if v and chbox.get('name', '') == 'point[]': regions.append(v) data = [ ('translit', '0'), ('fmt', 'wpt'), ('code_to_name', '1'), ('finded', '2'), ] for r in regions: data.append(('point[]', r)) r = yplib.post2('http://www.geocaching.su/site/popup/export.php', data) soup = yplib.soup() wpt = soup.text.split('\n') WPT_CODE = 1 WPT_LAT = 2 WPT_LON = 3 WPT_TITLE = 10 WPT_DATE = 4 geosite = Geosite.objects.get(code='GC_SU') print len(wpt), 'points' k = 0 for point in wpt: k += 1 fields = point.split(',') if fields[0].isdigit(): the_geothing = TheGeothing() the_location = TheLocation() lat_degree = float(fields[WPT_LAT]) the_location.NS_degree = lat_degree #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60 lon_degree = float(fields[WPT_LON]) the_location.EW_degree = lon_degree #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60 p = re.compile('(\D+)(\d+)') dgs = p.findall(fields[WPT_CODE]) if dgs: code_data = dgs[0] the_geothing.code = fields[WPT_CODE] the_geothing.pid = int(code_data[1]) the_geothing.type_code = code_data[0] p = re.compile(u'(.+)от(.+)') dgs = p.findall(fields[WPT_TITLE]) if dgs: title = dgs[0] the_geothing.name = title[0] the_geothing.author = title[1] d = float(fields[WPT_DATE]) the_geothing.created_date = Dephi_date_to_python_date(d) if the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: update_geothing(geothing, the_geothing, the_location) else: create_new_geothing(the_geothing, the_location, geosite) log('map_gcsu_caches', 'OK') elapsed = time() - start print "Elapsed time -->", elapsed
def main(): LOAD_CACHES = True start = time() yplib.setUp() yplib.set_debugging(False) # log in r = yplib.post2('http://opencaching.pl/login.php', (('LogMeIn', 'zaloguj'), ('email', 'kurianin'), ('password', 'gjhjkjy'), ('action', 'login'), ('target', 'index.php'))) soup = yplib.soup() a = soup.find('a', text='kurianin') if not a: print 'Authorization failed' return False print 'OK' ## search page #r = yplib.get('http://opencaching.pl/search.php') #soup = yplib.soup() # get wpt file r = yplib.get( 'http://opencaching.pl/search.php?searchto=searchbyname&showresult=1&expert=0&output=HTML&sort=bycreated&f_inactive=1&f_ignored=1&f_userfound=1&f_userowner=1&f_watched=0&f_geokret=0&country=PL®ion=&cachetype=1111111110&cache_attribs=&cache_attribs_not=&cachesize_1=1&cachesize_2=1&cachesize_3=1&cachesize_4=1&cachesize_5=1&cachesize_6=1&cachesize_7=1&cachevote_1=-3&cachevote_2=3.000&cachenovote=1&cachedifficulty_1=1&cachedifficulty_2=5&cacheterrain_1=1&cacheterrain_2=5&cacherating=0&cachename=%25&cachename=' ) soup = yplib.soup(cp='utf8') link_to_wpt = '' #the_div = soup.find('div', {'class':"content2-pagetitle"}) wpt_link = re.compile('ocpl\d+\.wpt\?.+count\=max.*') a_list = soup.findAll('a', {'class': "links", 'title': "Oziexplorer .wpt"}) if a_list: for a in a_list: if a.get('href') and wpt_link.match(a.get('href')): link_to_wpt = a.get('href') break print link_to_wpt if link_to_wpt: r = yplib.get(link_to_wpt) soup = yplib.soup(cp='utf8') wpt = soup.text.split('\n') else: print 'oblom' return WPT_CODE = 10 WPT_LAT = 2 WPT_LON = 3 WPT_TITLE = 1 WPT_DATE = 4 MY_CONSUMER_KEY = 'fky3LF9xvWz9y7Gs3tZ6' FIELDS = 'code|name|location|type|status|url|owner|date_created' geocach_api_request = 'http://opencaching.pl/okapi/services/caches/geocache?cache_code=%s&consumer_key=%s&fields=%s' geosite = Geosite.objects.get(code='OCPL') print geosite print len(wpt), 'points' k = 0 uc = 0 nc = 0 for point in wpt: k += 1 fields = point.split(',') if fields[0] == '-1': the_geothing = TheGeothing() the_geothing.pid = 1 the_location = TheLocation() lat_degree = float(fields[WPT_LAT]) the_location.NS_degree = lat_degree #the_location.NS_minute = (abs(lat_degree) - abs(the_location.NS_degree)) * 60 lon_degree = float(fields[WPT_LON]) the_location.EW_degree = lon_degree #the_location.EW_minute = (abs(lon_degree) - abs(the_location.EW_degree)) * 60 code_str = fields[WPT_CODE] parts = code_str.split('/') if len(parts) == 4: cache_code = parts[0] the_geothing.code = cache_code the_geothing.name = fields[WPT_TITLE] geothing_items = Geothing.objects.filter( code=the_geothing.code, geosite=geosite) if geothing_items.count() > 0: geothing = geothing_items[0] if the_geothing.name == geothing.name and not location_was_changed( geothing.location, the_location): continue url = geocach_api_request % (cache_code, MY_CONSUMER_KEY, FIELDS) try: response = urllib2.urlopen(url) json_str = response.read() cache_data = json.loads(json_str) if cache_data.get('status') != 'Available': continue #print cache_data.get('type') the_geothing.type_code = OCPL_TYPES.get( cache_data.get('type')) #print the_geothing.type_code cache_url = cache_data.get('url') if not cache_url: continue p = re.compile(u'OP([\dA-F]+)$') dgs = p.findall(cache_url) the_geothing.pid = int(dgs[0], 16) owner_name = '' if cache_data.get('owner'): owner_name = cache_data.get('owner').get('username') the_geothing.author = owner_name date_created = cache_data.get('date_created') if date_created: date_created = date_created[:10] parts = date_created.split('-') if parts and len(parts) == 3: dt = datetime(int(parts[0]), int(parts[1]), int(parts[2])) the_geothing.created_date = dt except: print print 'exception.' print url print cache_data #break continue if the_geothing.type_code in GEOCACHING_ONMAP_TYPES: geothing = get_object_or_none(Geothing, pid=the_geothing.pid, geosite=geosite) if geothing is not None: update_geothing(geothing, the_geothing, the_location) uc += 1 else: create_new_geothing(the_geothing, the_location, geosite) nc += 1 #break sql = """ select COUNT(*) FROM ( select g.code as code, count(id) as cnt from geothing g group by g.code having cnt > 1 ) as tbl """ dc = sql2val(sql) message = 'OK. updated %s, new %s, doubles %s' % (uc, nc, dc) log('map_ocpl_caches', message) elapsed = time() - start print "Elapsed time -->", elapsed
def main(): WPT_CODE = 1 WPT_LAT = 2 WPT_LON = 3 WPT_TITLE = 10 WPT_DATE = 4 start = time() geosite = Geosite.objects.get(code='SHUKACH') yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.shukach.com/ru/karta?destination=karta', (('form_build_id','form-ce43c02c68d4d8db1cb0e91745797d06'), ('name', 'gps-fun'), ('pass','vjlthybpfwbzwbz'), ('form_id', 'user_login_block'))) sql = """ DELETE FROM _temp_geothing """ execute_query(sql) all_points_count = 0 for k in range(50): ids = range(k*1000, (k+1)*1000) #print k*1000, (k+1)*1000 ids_str = ','.join([str(id) for id in ids]) r = yplib.post2('http://www.shukach.com/export_wpt', (('wptnids', ids_str), )) wpt = yplib.cmd.show() wpt = wpt.split('\n') #print len(wpt) if len(wpt) < 6: continue for point in wpt: point = point.decode('cp1251').encode('utf-8') pid = code = None name = '' created_date = None author = type_code = '' NS_degree = EW_degree = None fields = point.split(',') if fields[0].isdigit(): all_points_count += 1 p = re.compile('(\D+)(\d+)') code = fields[WPT_CODE] dgs = p.findall(code) if dgs: type_code = dgs[0][0] pid = int(dgs[0][1]) if type_code in GEOCACHING_ONMAP_TYPES: NS_degree = float(fields[WPT_LAT]) EW_degree = float(fields[WPT_LON]) p = re.compile(r'(.+)от(.+)') dgs = p.findall(fields[WPT_TITLE]) if dgs: title = dgs[0] name = title[0].strip() author = title[1].strip() else: name = fields[WPT_TITLE] d = float(fields[WPT_DATE]) created_date = Dephi_date_to_python_date(d) date_str = created_date.strftime('%Y-%m-%d %H:%M') ns_str = '{0:.9}'.format(NS_degree) ew_str = '{0:.9}'.format(EW_degree) sql = """ INSERT INTO _temp_geothing (pid, code, name, created_date, author, type_code, NS_degree, EW_degree) VALUES ({},'{}','{}','{}', '{}', '{}', {}, {}) """.format( pid, code, name.replace("'", "\\'"), date_str, author, type_code, ns_str, ew_str) execute_query(sql) sql = "SELECT id FROM geosite WHERE code='SHUKACH'" shukach_id = sql2val(sql) # update existent geothings sql = """ UPDATE geothing gt LEFT JOIN _temp_geothing as t ON gt.pid=t.pid SET gt.created_date=t.created_date, gt.name=t.name, gt.author=t.author, gt.type_code=t.type_code WHERE gt.geosite_id={} AND t.code IS NOT NULL AND (gt.name != t.name OR gt.author != t.author OR gt.type_code != t.type_code) """.format(shukach_id) #print sql updated_things = exec_sql(sql) sql = """ UPDATE location as l LEFT JOIN geothing as gt ON l.id=gt.location_id LEFT JOIN _temp_geothing as t ON gt.pid=t.pid SET l.NS_degree=t.NS_degree, l.EW_degree=t.EW_degree WHERE gt.geosite_id={} AND t.code IS NOT NULL AND ((ABS(l.NS_degree - t.NS_degree) > 0.00001) OR (ABS(l.EW_degree - t.EW_degree) > 0.00001)) """.format(shukach_id) updated_points = exec_sql(sql) # list of id of removed geothings sql = """ SELECT gt.id FROM geothing gt LEFT JOIN _temp_geothing as t ON gt.pid=t.pid WHERE gt.geosite_id={} AND t.code IS NULL """.format(shukach_id) removed = sql2table(sql) new_count = 0 # insert new geothings sql = """ SELECT t.pid, t.code, t.name, t.created_date, t.author, t.country_code, t.type_code, t.NS_degree, t.EW_degree FROM _temp_geothing as t LEFT JOIN geothing gt ON gt.pid=t.pid AND gt.geosite_id={} WHERE gt.pid IS NULL """.format(shukach_id) cursor = get_cursor(sql) while True: row = cursor.fetchone() if row is None: break else: sql = """ INSERT INTO location (NS_degree, EW_degree) VALUES ({}, {}) """.format(row[7], row[8]) execute_query(sql) sql = "SELECT LAST_INSERT_ID()" location_id = sql2val(sql) sql = """ INSERT INTO geothing (geosite_id, pid, code, name, created_date, author, type_code, location_id, admin_code) SELECT {}, t.pid, t.code, t.name, t.created_date, t.author, t.type_code, {}, '777' FROM _temp_geothing as t WHERE t.pid={} """.format(shukach_id, location_id, row[0]) execute_query(sql) new_count += 1 message = 'OK. %s waypoints, updated %s waypoints, updated %s locations, new %s, removed %s' % ( all_points_count, updated_things or 0, updated_points or 0, new_count, len(removed)) print(message) log('map_shukach', message) elapsed = time() - start print "Elapsed time -->", elapsed return True
def main(): #if not switch_off_status_updated(): #return False LOAD_CREATED_CACHE_LOGS = True LOAD_SEEK_CACHE_LOGS = True LOAD_RECOMMEND_CACHE_LOGS = True LOAD_PHOTOALBUM_LOGS = True start = time() yplib.setUp() yplib.set_debugging(False) r = yplib.post2('http://www.geocaching.su/?pn=108', (('Log_In', 'Log_In'), ('email', '*****@*****.**'), ('passwd', 'zaebalixakeryvas'), ('longterm', '1'))) soup = yplib.soup() a = soup.find('a', attrs={'class': "profilelink"}, text='galdor') if not a: print 'Authorization failed' return False print print 'BEGIN' fh = open('cant_open_user_profile.txt', 'w') if LOAD_CREATED_CACHE_LOGS: LogCreateCach.objects.all().update(updated=False) print 'updating of creating logs' cachers = Geocacher.objects.all().values_list('pid', 'uid') t = re.compile('\?pn\=101\&cid=(\d+)') t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)') for cacher in cachers: if cacher[1]: url = 'http://www.geocaching.su/site/popup/userstat.php?s=1&uid=%s' % cacher[ 1] try: yplib.get(url) except BrowserStateError: log_error(fh, cacher[1], 'bse') continue soup = yplib.soup() tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = created_date = None coauthor = False cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text if u'(соавтор)' in txt: coauthor = True found = t1.findall(txt) if found: created_date = found[0] created_date = date_or_none(created_date) if cach_pid: print cacher[0], cach_pid, txt.encode('utf8') the_log, created = LogCreateCach.objects.\ get_or_create( author_pid=cacher[0], cach_pid=cach_pid) the_log.created_date = created_date the_log.coauthor = coauthor the_log.updated = True the_log.save() else: log_error(fh, cacher[1], 'npc') LogCreateCach.objects.filter(updated=False).delete() if LOAD_SEEK_CACHE_LOGS: LogSeekCach.objects.all().update(updated=False) cachers = Geocacher.objects.all().values_list( 'pid', 'uid') #.filter(pid=18849) t = re.compile('\?pn\=101\&cid=(\d+)') t1 = re.compile(u'создан\s+(\d\d\.\d\d\.\d\d\d\d)') t2 = re.compile(u'найден\s+(\d\d\.\d\d\.\d\d\d\d)') t3 = re.compile(u'оценен\s+на\s+(\d)') for cacher in cachers: if cacher[1]: url = 'http://www.geocaching.su/site/popup/userstat.php?s=2&uid=%s' % cacher[ 1] try: yplib.get(url) soup = yplib.soup() except BrowserStateError: log_error(fh, cacher[1], 'bse') continue tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text found = t3.findall(txt) if found: g = found[0] grade = int_or_none(g) found = t2.findall(txt) if found: found_date = found[0] found_date = date_or_none(found_date) if cach_pid: print cacher[0], cach_pid, txt.encode('utf8') the_log, created = LogSeekCach.objects.\ get_or_create( cacher_pid=cacher[0], cach_pid=cach_pid, ) the_log.found_date = found_date the_log.grade = grade the_log.updated = True the_log.save() else: log_error(fh, cacher[1], 'npf') LogSeekCach.objects.filter(updated=False).delete() if LOAD_RECOMMEND_CACHE_LOGS: LogRecommendCach.objects.all().update(updated=False) cachers = Geocacher.objects.all().values_list('pid', 'uid') t = re.compile('\?pn\=101\&cid=(\d+)') for cacher in cachers: if cacher[1]: url = 'http://www.geocaching.su/site/popup/userstat.php?s=3&uid=%s' % cacher[ 1] try: yplib.get(url) soup = yplib.soup() except BrowserStateError: log_error(fh, cacher[1], 'bse') continue tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text if cach_pid: print cacher[0], cach_pid, txt.encode('utf8') the_log, created = LogRecommendCach.\ objects.get_or_create( cacher_pid=cacher[0], cach_pid=cach_pid) the_log.updated = True the_log.save() else: log_error(fh, cacher[1], 'npr') LogRecommendCach.objects.filter(updated=False).delete() if LOAD_PHOTOALBUM_LOGS: LogPhotoAlbum.objects.all().update(updated=False) cachers = Geocacher.objects.all().values_list('pid', 'uid') t = re.compile('showmemphotos\.php\?cid=(\d+)') for cacher in cachers: if cacher[1]: url = 'http://www.geocaching.su/site/popup/userstat.php?s=4&uid=%s' % cacher[ 1] try: yplib.get(url) soup = yplib.soup() except BrowserStateError: log_error(fh, cacher[1], 'bse') continue tbl = soup.find('table', attrs={'class': 'pages'}) if tbl: rows = tbl.findAll('tr') for row in rows: cach_pid = found_date = grade = None cell = row.find('td') if cell: a_list = cell.findAll('a') for a in a_list: cach_pid = None parts = t.findall(a['href']) if len(parts): cach_pid = int(parts[0]) txt = cell.text if cach_pid: print cacher[0], cach_pid, txt.encode('utf8') the_log, created = LogPhotoAlbum.\ objects.get_or_create( cacher_pid=cacher[0], cach_pid=cach_pid) the_log.updated = True the_log.save() else: log_error(fh, cacher[1], 'npp') LogPhotoAlbum.objects.filter(updated=False).delete() elapsed = time() - start print "Elapsed time -->", elapsed #switch_on_status_updated() log('gcsu_logs', 'OK') fh.close()