def _open_wiki_conn(op,name): wikiurls = { "current_revision": u"http://en.wikipedia.org/w/api.php?action=query&redirects&prop=revisions&titles=%s&rvprop=content&format=json", "recent_revisions": u"http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=500&rvprop=user|flags&rvdir=newer&rvstart=%s&format=json", "backlinks" : u"http://en.wikipedia.org/w/api.php?action=query&list=backlinks&bltitle=%s&bllimit=500&blnamespace=0&blfilterredir=nonredirects&format=json", } if (not wikiurls.has_key(op)): return None wiki_base_url = wikiurls[op] if(op == 'recent_revisions'): tstamp = '20081001000000' wiki_url = wiki_base_url % (name.replace(" ","_"),tstamp) else: wiki_url = wiki_base_url % name.replace(" ","_") httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="wikipedia",url=wiki_url.encode('utf-8'),json=True) if (not httpwhat.has_key('query')): return None if(op == 'backlinks'): if (not httpwhat['query'].has_key('backlinks')): return None json_set = httpwhat['query']['backlinks'] else: if (not httpwhat['query'].has_key('pages')): return None json_set = httpwhat['query']['pages'] if (json_set.keys()[0] == '-1'): return None if (not json_set.values()[0].has_key('revisions')): return None if (op == 'recent_revisions'): return json_set.values()[0]['revisions'] else: return json_set.values()[0] return json_set
def yahootravel_ranking(city_slug): cobj = City.find(slug=city_slug) query_array = cobj.name.lower().split(" ") if (cobj.region is not None): query_array.append("\"" + cobj.region.name.lower() + "\"") query_array.append("\"" + cobj.country.name.lower() + "\"") my_wikiname = re.sub(ur"[,\.:();]", " ", cobj.wikiname) my_wikiname = re.sub(ur" {2,}", " ", my_wikiname) query_array = query_array + my_wikiname.split(" ") yt_query = " ".join(set(query_array)) yt_url = (("http://travel.yahooapis.com/TripService/V1.1/" + "tripSearch?appid=%s&query=%s&results=%d&output=json") % (YAHOO_DEVAPP_ID, urllib.quote(yt_query.encode('utf-8')), 10)) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel", url=yt_url, json=True, alt_delta=2) if ('ResultSet' not in httpwhat): return if ('totalResultsAvailable' not in httpwhat['ResultSet']): return return int(httpwhat['ResultSet']['totalResultsAvailable'])
def _open_commons_image(image, width=450): resp = {'src': None, 'href': None, 'width': None, 'height': None} url = """http://toolserver.org/~magnus/commonsapi.php?image=%s&thumbwidth=%d&languages=en""" % ( urllib.quote(image.encode('utf-8')), width) httpsched = get_http_scheduler() xmlresp = httpsched.urlopen(service="commons", url=url, xml=False) return xmlresp if (xmlresp is None): return None # see http://toolserver.org/~magnus/commonsapi.php for details try: filedesc = xmlresp.getElementsByTagName('response') filedesc = filedesc[0].getElementsByTagName('file') urldesc = filedesc[0].getElementsByTagName('urls') resp['href'] = urldesc.getElementsByTagName( 'description')[0].firstChild.nodeValue resp['src'] = urldesc.getElementsByTagName( 'thumbnail')[0].firstChild.nodeValue resp['width'] = filedesc.getElementsByTagName( 'width')[0].firstChild.nodeValue resp['height'] = filedesc.getElementsByTagName( 'height')[0].firstChild.nodeValue except: resp = None return resp
def yahoolocal_coordinates(city, country): yt_query = "%s, %s" % (city, country) # retrieve info yt_url = ( "http://local.yahooapis.com/MapsService/V1/geocode?appid=%s&city=%s" % (YAHOO_DEVAPP_ID, urllib.quote(yt_query.encode('utf-8')))) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel", url=yt_url, xml=True, alt_delta=2) # store xmlobj = { 'latitude': httpwhat.getElementsByTagName('Latitude'), 'longitude': httpwhat.getElementsByTagName('Longitude'), 'city': httpwhat.getElementsByTagName('City') } for k in xmlobj.keys(): if (len(xmlobj[k]) == 0): return xmlobj[k] = xmlobj[k][0].childNodes[0].data # return return xmlobj
def yahootravel_ranking(city_slug): cobj = City.find(slug=city_slug) query_array = cobj.name.lower().split(" ") if(cobj.region is not None): query_array.append("\"" + cobj.region.name.lower() + "\"") query_array.append("\"" + cobj.country.name.lower() + "\"") my_wikiname = re.sub(ur"[,\.:();]", " ", cobj.wikiname) my_wikiname = re.sub(ur" {2,}", " ", my_wikiname) query_array = query_array + my_wikiname.split(" ") yt_query = " ".join(set(query_array)) yt_url = (("http://travel.yahooapis.com/TripService/V1.1/" + "tripSearch?appid=%s&query=%s&results=%d&output=json") % (YAHOO_DEVAPP_ID, urllib.quote(yt_query.encode('utf-8')), 10)) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel", url=yt_url, json=True, alt_delta=2) if ('ResultSet' not in httpwhat): return if ('totalResultsAvailable' not in httpwhat['ResultSet']): return return int(httpwhat['ResultSet']['totalResultsAvailable'])
def city_timezone(city_id): city = City.objects.get(pk=city_id) if(city is None): return None # generate url tz_url = """http://www.earthtools.org/timezone/%f/%f""" % (city.coordinates.x, city.coordinates.y) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="earthtools",url=tz_url,xml=True,alt_delta=1) offset_tag = httpwhat.getElementsByTagName('offset') if (len(offset_tag)==0): return None return {'offset': offset_tag[0].childNodes[0].data, 'city': city.name, 'id': city_id }
def delicious_popular(city_name): tags = city_name.split(" ") if(tags is None): return None if(len(tags) == 0): return None tagset = "+".join(tags) base_url = "http://feeds.delicious.com/v2/json/popular/%s" % tagset #httpresp = urllib2.urlopen(base_url) httpsched = get_http_scheduler() rssresp = httpsched.urlopen(url=base_url) return rssresp
def delicious_popular(city_name): tags = city_name.split(" ") if ((tags is None) or (len(tags) == 0)): return tagset = "+".join(tags) base_url = "http://feeds.delicious.com/v2/json/popular/%s" % tagset httpsched = get_http_scheduler() rssresp = httpsched.urlopen(url=base_url) return rssresp
def _open_panoramio_conn(coords,eps_factor=1,howmany=10): if coords is None: return None eps=eps_factor*0.016 min_coordinates = (coords[0] - eps, coords[1] - eps) max_coordinates = (coords[0] + eps, coords[1] + eps) url = "http://www.panoramio.com/map/get_panoramas.php?order=popularity&set=public&size=square&from=0&to=%d&minx=%f&miny=%f&maxx=%f&maxy=%f" % (howmany, min_coordinates[1], min_coordinates[0], max_coordinates[1], max_coordinates[0]) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="panoramio",url=url,json=True) if(not httpwhat.has_key('photos')): return None return httpwhat
def gmaplocal_coordinates(city, country): yt_query = "\"%s\", \"%s\"" % (city,country) # retrieve info yt_url = """http://maps.google.com/maps/geo?key=%s&q=%s&output=xml&hl=en-gb""" % (GOOGLEMAPS_APIKEY, urllib.quote(yt_query.encode('utf-8'))) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel",url=yt_url,xml=True,alt_delta=2) # check if errors were retrieved if (httpwhat.getElementsByTagName('code')[0].childNodes[0].data != '200'): return None placemark = httpwhat.getElementsByTagName('Placemark') if(len(placemark)==0) : return None xmlset = [] for p in placemark: xmlobj = {'ranking': 1} # check for accuracy accuracy = p.getElementsByTagName('AddressDetails') if(len(accuracy)==0): continue if(not accuracy[0].attributes.has_key('Accuracy')): continue accuracy = accuracy[0].attributes['Accuracy'].value if(accuracy != '4' and accuracy != '5'): continue # ZIP - CITY # locality check loc = p.getElementsByTagName('Locality') if(len(loc)==0): continue xmlobj['ranking'] = len(loc[0].childNodes) if(xmlobj['ranking']==0): continue # coordinates coords = p.getElementsByTagName('coordinates') if(len(coords)==0): continue coords = coords[0].childNodes[0].data coords = coords.split(',') # locality name lname = p.getElementsByTagName('LocalityName') if(len(lname)==0): continue lname = lname[0].childNodes[0].data # final return xmlobj.update({'latitude' : coords[1], 'longitude': coords[0], 'city' : lname }) xmlset.append(xmlobj) # sort by "ranking" -> official cities first, and take the best solution xmlset.sort(lambda x, y: x['ranking']-y['ranking']) xmlobj = xmlset[0] # return return xmlobj
def yahoolocal_coordinates(city, country): yt_query = "%s, %s" % (city,country) # retrieve info yt_url = """http://local.yahooapis.com/MapsService/V1/geocode?appid=%s&city=%s""" % (YAHOO_DEVAPP_ID, urllib.quote(yt_query.encode('utf-8'))) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel",url=yt_url,xml=True,alt_delta=2) # store xmlobj = {'latitude' : httpwhat.getElementsByTagName('Latitude'), 'longitude': httpwhat.getElementsByTagName('Longitude'), 'city' : httpwhat.getElementsByTagName('City')} for k in xmlobj.keys(): if(len(xmlobj[k]) == 0): return None # verify xmlobj[k] = xmlobj[k][0].childNodes[0].data # return return xmlobj
def city_timezone(city_id): city = City.objects.get(pk=city_id) if (city is None): return None # generate url tz_url = """http://www.earthtools.org/timezone/%f/%f""" % ( city.coordinates.x, city.coordinates.y) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="earthtools", url=tz_url, xml=True, alt_delta=1) offset_tag = httpwhat.getElementsByTagName('offset') if (len(offset_tag) == 0): return None return { 'offset': offset_tag[0].childNodes[0].data, 'city': city.name, 'id': city_id }
def _open_panoramio_conn(coords, eps_factor=1, howmany=10): if not coords: return eps = eps_factor * 0.016 # normalization factor min_coordinates = (coords[0] - eps, coords[1] - eps) max_coordinates = (coords[0] + eps, coords[1] + eps) url = (("http://www.panoramio.com/map/get_panoramas.php?order=popularity" "&set=public&size=square&from=0&to=%d&minx=%f&miny=%f&" "maxx=%f&maxy=%f") % (howmany, min_coordinates[1], min_coordinates[0], max_coordinates[1], max_coordinates[0])) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="panoramio", url=url, json=True) if ("photos" not in httpwhat): return return httpwhat
def _wikipedia_article(city_id,more='City'): if(more == 'City'): cobj = getCityObject(id=city_id) elif(more == 'Country'): cobj = Country.objects.get(id=city_id) else: cobj = None if cobj is None: return None dab_re = "{{disambig}}" #step 1: tries to access the direct page article httpwhat = None if (cobj.wikiname is not None): httpwhat = _open_wiki_url(cobj.wikiname) if (httpwhat is None): httpwhat = _open_wiki_url(cobj.name) httpwikiname = None if(httpwhat is not None and re.search(dab_re,httpwhat['content']) is None): # Apparently FOUND pass else: httpwhat = None if(cobj.local_name is not None): httpwhat = _open_wiki_url(cobj.local_name) if(httpwhat is None): query_array = [] query_array += cobj.name.split(" ") query_array += cobj.country.name.split(" ") base_url = 'http://en.wikipedia.org/w/api.php?action=query&redirects&list=search&srsearch=%s&format=json' % "+".join(query_array) httpsched = get_http_scheduler() jsonresp = httpsched.urlopen(service="wikipedia",url=base_url.encode('utf-8'),json=True) if(not jsonresp.has_key('query')): return None if(not jsonresp['query'].has_key('search')): return None if(len(jsonresp['query']['search'])==0): return None httpwhat = _open_wiki_url(jsonresp['query']['search'][0]['title']) #if (httpwhat is None): return None return httpwhat
def _wikipedia_article(obj): dab_re = "{{disambig}}" #step 1: tries to access the direct page article httpwhat = None if (obj.wikiname): httpwhat = _open_wiki_url(obj.wikiname) if (httpwhat is None): httpwhat = _open_wiki_url(obj.name) if (httpwhat and not re.search( dab_re, httpwhat['content'])): # Apparently FOUND pass else: httpwhat = None if (obj.local_name): httpwhat = _open_wiki_url(obj.local_name) if (not httpwhat): query_array = [] query_array += obj.name.split(" ") query_array += obj.country.name.split(" ") base_url = ( ("http://en.wikipedia.org/w/api.php?action=query" "&redirects&list=search&srsearch=%s&format=json") % "+".join(query_array)) httpsched = get_http_scheduler() jsonresp = httpsched.urlopen(service="wikipedia", url=base_url.encode('utf-8'), json=True) if (('query' not in jsonresp) or ('search' not in jsonresp['query']) or (len(jsonresp['query']['search']) == 0)): return httpwhat = _open_wiki_url( jsonresp['query']['search'][0]['title']) return httpwhat
def _open_wiki_conn(op, name): wikiurls = { "current_revision": u"http://en.wikipedia.org/w/api.php?action=query&redirects&prop=revisions&titles=%s&rvprop=content&format=json", "recent_revisions": u"http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=500&rvprop=user|flags&rvdir=newer&rvstart=%s&format=json", "backlinks": u"http://en.wikipedia.org/w/api.php?action=query&list=backlinks&bltitle=%s&bllimit=500&blnamespace=0&blfilterredir=nonredirects&format=json", } if (not wikiurls.has_key(op)): return None wiki_base_url = wikiurls[op] if (op == 'recent_revisions'): tstamp = '20081001000000' wiki_url = wiki_base_url % (name.replace(" ", "_"), tstamp) else: wiki_url = wiki_base_url % name.replace(" ", "_") httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="wikipedia", url=wiki_url.encode('utf-8'), json=True) if (not httpwhat.has_key('query')): return None if (op == 'backlinks'): if (not httpwhat['query'].has_key('backlinks')): return None json_set = httpwhat['query']['backlinks'] else: if (not httpwhat['query'].has_key('pages')): return None json_set = httpwhat['query']['pages'] if (json_set.keys()[0] == '-1'): return None if (not json_set.values()[0].has_key('revisions')): return None if (op == 'recent_revisions'): return json_set.values()[0]['revisions'] else: return json_set.values()[0] return json_set
def _wikipedia_article(obj): dab_re = "{{disambig}}" #step 1: tries to access the direct page article httpwhat = None if (obj.wikiname): httpwhat = _open_wiki_url(obj.wikiname) if (httpwhat is None): httpwhat = _open_wiki_url(obj.name) if (httpwhat and not re.search( dab_re, httpwhat['content'])): # Apparently FOUND pass else: httpwhat = None if (obj.local_name): httpwhat = _open_wiki_url(obj.local_name) if (not httpwhat): query_array = [] query_array += obj.name.split(" ") query_array += obj.country.name.split(" ") base_url = (("http://en.wikipedia.org/w/api.php?action=query" "&redirects&list=search&srsearch=%s&format=json") % "+".join(query_array)) httpsched = get_http_scheduler() jsonresp = httpsched.urlopen(service="wikipedia", url=base_url.encode('utf-8'), json=True) if (('query' not in jsonresp) or ('search' not in jsonresp['query']) or (len(jsonresp['query']['search']) == 0)): return httpwhat = _open_wiki_url(jsonresp['query']['search'][0]['title']) return httpwhat
def _open_commons_image(image,width=450): resp = { 'src': None, 'href': None, 'width': None, 'height': None} url = """http://toolserver.org/~magnus/commonsapi.php?image=%s&thumbwidth=%d&languages=en""" % (urllib.quote(image.encode('utf-8')),width) httpsched = get_http_scheduler() xmlresp = httpsched.urlopen(service="commons",url=url,xml=False) return xmlresp if (xmlresp is None): return None # see http://toolserver.org/~magnus/commonsapi.php for details try: filedesc = xmlresp.getElementsByTagName('response') filedesc = filedesc[0].getElementsByTagName('file') urldesc = filedesc[0].getElementsByTagName('urls') resp['href'] = urldesc.getElementsByTagName('description')[0].firstChild.nodeValue resp['src'] = urldesc.getElementsByTagName('thumbnail')[0].firstChild.nodeValue resp['width']= filedesc.getElementsByTagName('width')[0].firstChild.nodeValue resp['height']= filedesc.getElementsByTagName('height')[0].firstChild.nodeValue except: resp = None return resp
def gmaplocal_coordinates(city, country): yt_query = "\"%s\", \"%s\"" % (city, country) # retrieve info yt_url = ( "http://maps.google.com/maps/geo?key=%s&q=%s&output=xml&hl=en-gb" % (GOOGLEMAPS_APIKEY, urllib.quote(yt_query.encode('utf-8')))) httpsched = get_http_scheduler() httpwhat = httpsched.urlopen(service="yahootravel", url=yt_url, xml=True, alt_delta=2) # check if errors were retrieved if (httpwhat.getElementsByTagName('code')[0].childNodes[0].data != '200'): return placemark = httpwhat.getElementsByTagName('Placemark') if (len(placemark) == 0): return xmlset = [] for p in placemark: xmlobj = {'ranking': 1} # check for accuracy accuracy = p.getElementsByTagName('AddressDetails') if (len(accuracy) == 0): continue if ('Accuracy' not in accuracy[0].attributes): continue accuracy = accuracy[0].attributes['Accuracy'].value if (accuracy not in ('4', '5')): # ZIP or CITY continue # locality check loc = p.getElementsByTagName('Locality') if (len(loc) == 0): continue xmlobj['ranking'] = len(loc[0].childNodes) if (xmlobj['ranking'] == 0): continue # coordinates coords = p.getElementsByTagName('coordinates') if (len(coords) == 0): continue coords = coords[0].childNodes[0].data coords = coords.split(',') # locality name lname = p.getElementsByTagName('LocalityName') if (len(lname) == 0): continue lname = lname[0].childNodes[0].data # final return xmlobj.update({ 'latitude': coords[1], 'longitude': coords[0], 'city': lname }) xmlset.append(xmlobj) # sort by "ranking" -> official cities first, and take the best solution xmlset.sort(lambda x, y: x['ranking'] - y['ranking']) xmlobj = xmlset[0] # return return xmlobj