def fetch_webpage_text(url,use_cache=True): if use_cache and cache.contains(url): return cache.get(url) # cache miss, download it content = requests.get(url).text cache.put(url,content) return content
def _get_id_memcache(dbo, table): cache_key = "db:%s:as:%s:tb:%s" % (dbo.database, dbo.alias, table) nextid = cache.increment(cache_key) if nextid is None: nextid = query_int(dbo, "SELECT MAX(ID) FROM %s" % table) + 1 cache.put(cache_key, nextid, 600) return nextid
def fetch_webpage(url, use_cache=True): key = cache.md5_key(url) if use_cache and cache.contains(key): return cache.get(key) content = requests.get(url).text cache.put(key,content) return content
def beautify(text, lang, options): """ Process input `text` according to the specified `mode`. Adds comments if needed, according to the `lang` rules. Caches the results. The whole work (except caching) is done by _beautify(). """ options = options or {} beauty_options = dict((k, v) for k, v in options.items() if k in ['add_comments', 'remove_text']) mode = '' if beauty_options.get('add_comments'): mode += 'c' if beauty_options.get('remove_text'): mode += 'q' if beauty_options == {}: # if mode is unknown, just don't transform the text at all return text digest = "t:%s:%s:%s" % (hashlib.md5(text).hexdigest(), lang, mode) answer = cache.get(digest) if answer: return answer answer = _beautify(text, lang, **beauty_options) cache.put(digest, answer) return answer
def on_btnPost_clicked(self, widget, *args): #context Window.on_btnPost_clicked { buffer = self.textview.get_buffer() start, end = buffer.get_bounds() text = buffer.get_text(start, end) subject = self.subj.get_text() tags = self.taglist.get_text() if subject == '': self.msg_box("Subject can not be empty", gtk.MESSAGE_WARNING) return # actual submitting try: if lj.last_event: result = lj.edit(lj.last_event['itemid'], lj.last_event['eventtime'], {'subj': subject, 'text': format.post(text), 'tags': tags} ) else: result = lj.post({'subj': subject, 'text': format.post(text), 'tags': tags}) # store unaltered text cache.put(conf.server, result['itemid'], text) lj.last_event = {} except xmlrpclib.Fault, inst: self.msg_box(str(inst), gtk.MESSAGE_WARNING)
def get_lat_long(dbo, address, town, county, postcode, country = None): """ Looks up a latitude and longitude from an address using GEOCODE_URL and returns them as lat,long,(first 3 chars of address) Returns None if no results were found. NB: dbo is only used for contextual reference in logging, no database calls are made by any of this code. """ if address.strip() == "": return None try: # Synchronise this process to a single thread to prevent # abusing our geo provider and concurrent requests for the # same address when opening an animal with the same # original/brought in by owner, etc. lat_long_lock.acquire() url = "" if country is None: country = LOCALE_COUNTRY_NAME_MAP[dbo.locale] if BULK_GEO_PROVIDER == "cloudmade": q = normalise_cloudmade(address, town, county, postcode, country) url = CLOUDMADE_URL.replace("{key}", BULK_GEO_PROVIDER_KEY).replace("{q}", q) elif BULK_GEO_PROVIDER == "nominatim": q = normalise_nominatim(address, town, county, postcode, country) url = NOMINATIM_URL.replace("{q}", q) else: al.error("unrecognised geo provider: %s" % BULK_GEO_PROVIDER, "geo.get_lat_long", dbo) al.debug("looking up geocode for address: %s" % q, "geo.get_lat_long", dbo) key = "nom:" + q if cache.available(): v = cache.get(key) if v is not None: al.debug("cache hit for address: %s = %s" % (q, v), "geo.get_lat_long", dbo) return v jr = urllib2.urlopen(url, timeout = GEO_LOOKUP_TIMEOUT).read() j = json.loads(jr) latlon = None if BULK_GEO_PROVIDER == "cloudmade": latlon = parse_cloudmade(dbo, jr, j, q) elif BULK_GEO_PROVIDER == "nominatim": latlon = parse_nominatim(dbo, jr, j, q) # Cache this address/geocode response for an hour if cache.available() and latlon is not None: cache.put(key, latlon, 3600) return latlon except Exception,err: al.error(str(err), "geo.get_lat_long", dbo) return None
def fetch_webpage_text(url, use_cache=True): if use_cache and cache.contains(url): return cache.get(url) # if cache miss, download it and sleep one second to prevent too-frequent calls content = requests.get(url).text cache.put(url,content) time.sleep(1) return content
def fetch_webpage_text(url, use_cache=True): if use_cache and cache.contains(url): return cache.get(url) # if cache miss, download it and sleep one second to prevent too-frequent calls content = requests.get(url).text cache.put(url, content) time.sleep(1) return content
def get_answer_dict(self, topic, request_options=None): """ Find cheat sheet for the topic. Args: `topic` (str): the name of the topic of the cheat sheet Returns: answer_dict: the answer dictionary """ topic = self.handle_if_random_request(topic) topic_type = self.get_topic_type(topic) # 'question' queries are pretty expensive, that's why they should be handled # in a special way: # we do not drop the old style cache entries and try to reuse them if possible if topic_type == 'question': answer = cache.get('q:' + topic) if answer: if isinstance(answer, dict): return answer return { 'topic': topic, 'topic_type': 'question', 'answer': answer, 'format': 'text+code', } answer = self._get_page_dict(topic, topic_type, request_options=request_options) if answer.get("cache", True): cache.put('q:' + topic, answer) return answer # Try to find cacheable queries in the cache. # If answer was not found in the cache, resolve it in a normal way and save in the cache cache_needed = self._adapter[topic_type].is_cache_needed() if cache_needed: answer = cache.get(topic) if not isinstance(answer, dict): answer = None if answer: return answer answer = self._get_page_dict(topic, topic_type, request_options=request_options) if isinstance(answer, dict): if "cache" in answer: cache_needed = answer["cache"] if cache_needed and answer: cache.put(topic, answer) return answer
def set_cached_response(cache_key, mime, maxage, content): """ Sets a service call response in the cache and returns the response so methods can use this as a passthrough to return the response. """ response = (mime, maxage, content) if not CACHE_SERVICE_RESPONSES: return response cache.put(cache_key, response, maxage) return response
def update_sysprop(key, value, modifier_id): if strutil.is_empty(key): raise CoreError("key can't be empty.") if strutil.is_empty(value): raise CoreError("p_value can't be empty.") if has_sysprop(key) is None: raise CoreError("the key %s of sysprop does not existed.", key) value = strutil.to_str(value) conn = __get_conn() conn.update(SYSPROP_TABLENAME, where="p_key = '%s'" % key , p_value=value, modified_time=dtutil.utcnow(), modifier_id=modifier_id) cache.put(CACHESPACE_SYSPROP, key, value)
def __build_or_load_routes(self): hash_key = cache.get_hash(self.links, self.nodes) cached = cache.get(hash_key) if cached: (self.depart_routing_table, self.arrival_routing_table) = cached self.logger.debug("Cached routing table is loaded") else: # Builds the routes self.__build_routes() cache.put(hash_key, (self.depart_routing_table, self.arrival_routing_table))
def add_sysprop(key, value, modifier_id): if strutil.is_empty(key): raise CoreError("key can't be empty.") if strutil.is_empty(value): raise CoreError("value can't be empty.") conn = __get_conn() value1 = strutil.to_str(value) if not has_sysprop(key): conn.insert(SYSPROP_TABLENAME, p_key=key, p_value=value1, created_time=dtutil.utcnow(), modified_time=dtutil.utcnow(), modifier_id=modifier_id) else: conn.update(SYSPROP_TABLENAME, where="p_key = '%s'" % key, p_value=value1, modified_time=dtutil.utcnow(), modifier_id=modifier_id) cache.put(CACHESPACE_SYSPROP, key, value) return key
def execute_sparql_query(query): #first try the cache result = cache.get(query) if result is not None: return result sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.addDefaultGraph("http://dbpedia.org") sparql.setReturnFormat(JSON) sparql.setQuery(query) result = sparql.query().convert()["results"]["bindings"] cache.put(query, result) return result
def prepare(study, portfolio, remote): studyParams = util.load_json_file("study/%s.json" % study) search = build_search(study, portfolio, studyParams) logging.info("Caching %s-%s/search" % (study, portfolio)) cache.put("batch/%s-%s/search" % (study, portfolio), search, remote) batch_ = search['batch_'] target_ = search['target_'] value__ = search['value__'] for batch, value_ in zip(batch_, value__): params = copy.deepcopy(studyParams) del params['shift'] params['portfolioKey'] = "portfolio/%s" % portfolio apply_search(params, target_, value_) params['episodes'].update(epi.build_episodes(params['episodes'])) logging.info("Caching %s" % batch) cache.put("batch/%s/params" % batch, params, remote)
def execute_sparql_query(query, cached=True): #first try the cache if cached: result = cache.get(query) if result is not None: return result #if no result in cache sparql = SPARQLWrapper("http://dbpedia.org/sparql") # sparql = SPARQLWrapper("http://35.196.96.177:8890/sparql") sparql.addDefaultGraph("http://dbpedia.org") sparql.setReturnFormat(JSON) sparql.setQuery(query) result = sparql.query().convert()["results"]["bindings"] cache.put(query, result) return result if result else None
def get_sysprop_value(key, default=None, strict=True, fmt=None): if strutil.is_empty(key): raise CoreError("key can't be empty.") value = cache.get(CACHESPACE_SYSPROP, key) if value != None: return value conn = __get_conn() sql = "SELECT p_value from %s where p_key = $mn" % SYSPROP_TABLENAME results = conn.query(sql, vars={'mn':key}) if len(results) > 0: value = results[0].p_value value = strutil.to_object(value, default=default, strict=strict, fmt=fmt) cache.put(CACHESPACE_SYSPROP, key, value) else: from model import SYSTEM_UID add_sysprop(key, default, SYSTEM_UID) value = get_sysprop_value(key, default) return value
def query_cache(dbo, sql, age = 60): """ Runs the query given and caches the result for age seconds. If there's already a valid cached entry for the query, returns the cached result instead. If CACHE_COMMON_QUERIES is set to false, just runs the query without doing any caching and is equivalent to db.query() """ if not CACHE_COMMON_QUERIES or not cache.available(): return query(dbo, sql) cache_key = "%s:%s:%s" % (dbo.alias, dbo.database, sql.replace(" ", "_")) m = hashlib.md5() m.update(cache_key) cache_key = "q:%s" % m.hexdigest() results = cache.get(cache_key) if results is not None: return results results = query(dbo, sql) cache.put(cache_key, results, age) return results
def query_cache(dbo, sql, age=60): """ Runs the query given and caches the result for age seconds. If there's already a valid cached entry for the query, returns the cached result instead. If CACHE_COMMON_QUERIES is set to false, just runs the query without doing any caching and is equivalent to db.query() """ if not CACHE_COMMON_QUERIES or not cache.available(): return query(dbo, sql) cache_key = "%s:%s:%s" % (dbo.alias, dbo.database, sql.replace(" ", "_")) m = hashlib.md5() m.update(cache_key) cache_key = "q:%s" % m.hexdigest() results = cache.get(cache_key) if results is not None: return results results = query(dbo, sql) cache.put(cache_key, results, age) return results
def report(batch, params, remote, debug): logging.info("--------------") result = {'success' : False, 'error' : 'none'} try: numEpisodes = params['episodes']['num'] testParams = params['test'] objective = testParams['objective'] SBlend_ = [] for i in range(numEpisodes): testResult = cache.get("batch/%s/test/%s" % (batch, i), remote) success = testResult['success'] if success: SBlend_.append(testResult['SBlend']) else: SBlend_.append(0) SBlend_ = np.array(SBlend_) SBlendMean = np.mean(SBlend_) SBlendMin = np.min(SBlend_) SBlendMax = np.max(SBlend_) logging.info("SBlendMean : %s" % SBlendMean) logging.info("SBlendMin : %s" % SBlendMin) logging.info("SBlendMax : %s" % SBlendMax) result = {'success' : True, 'error' : 'none', 'SBlend_' : list(SBlend_), 'SBlendMean' : SBlendMean, 'SBlendMin' : SBlendMin, 'SBlendMax' : SBlendMax} except (KeyboardInterrupt): raise except Exception: result['error'] = sys.exc_info()[0] logging.info("error %s", result['error']) cache.put("batch/%s/report" % batch, result, remote) logging.info("--------------") return result
def beautify(text, lang, options): """ Process input `text` according to the specified `mode`. Adds comments if needed, according to the `lang` rules. Caches the results. The whole work (except caching) is done by _beautify(). """ options = options or {} beauty_options = dict((k, v) for k, v in options.items() if k in ['add_comments', 'remove_text']) mode = '' if beauty_options.get('add_comments'): mode += 'c' if beauty_options.get('remove_text'): mode += 'q' if beauty_options == {}: # if mode is unknown, just don't transform the text at all return text if isinstance(text, str): text = text.encode('utf-8') digest = "t:%s:%s:%s" % (hashlib.md5(text).hexdigest(), lang, mode) # temporary added line that removes invalid cache entries # that used wrong commenting methods if lang in ["git", "django", "flask", "cmake"]: cache.delete(digest) answer = cache.get(digest) if answer: return answer answer = _beautify(text, lang, **beauty_options) cache.put(digest, answer) return answer
def break_links(self): """ One node that connects to a link in the middle is not connected; therefore, break_links is used for cutting at the middle point and building new links that connects nodes to links. """ self.logger.info("Starts to break links") cache_enabled = Config.params["simulation"]["cache"] # Loads all_nodes from cache if exists if cache_enabled: hash_key = cache.get_hash([], self.nodes) cached = cache.get(hash_key) if cached: self.runways, self.taxiways, self.pushback_ways = cached self.logger.debug("Done breaking links using cache") return # Retrieve all nodes and links all_nodes = deepcopy(self.break_node) for link in self.links: all_nodes.append(link.start) all_nodes.append(link.end) self.__add_break_node(link.start) self.__add_break_node(link.end) index = 0 while index < len(all_nodes): index = self.__break_next_link(all_nodes, index) self.logger.info("Done breaking links") self.__get_break_nodes() # Stores the result into cache for future usages if cache_enabled: to_cache = [self.runways, self.taxiways, self.pushback_ways] cache.put(hash_key, to_cache)
def main(wf): args = wf.args film_dict = json.loads(str(args[0])) film_id = str(film_dict['id']) url = get_thumbnail_url(film_dict) if url is not None: try: filepath = cache.put(film_id, url) log.info('cached in path ' + filepath) except Exception as e: log.error(e) else: log.info('OMG!! url is None!!') return # problem, there is no url for image!
def changeColor(colorTuple, isHSV=False): rgb = __HSVtoRGB__(*colorTuple) if isHSV else colorTuple for x in range(count): __setColor__(x, rgb) cache.put(cache.COLOR, rgb)
print "dump : display cache item(s)" print "export : copy cache item(s) to clipboard" print "clear : clear cache item(s)" print "quit : quit" print "? : display help" elif (action == "portfolio"): portfolio = util.get_str_input("portfolio (%s) : " % portfolio, portfolio) elif (action == "study"): study = util.get_str_input("study (%s) : " % study, study) elif (action == "batches"): batches = util.get_str_input("batches (%s) : " % batches, batches) elif (action == "create"): portfolioParams = util.load_json_file("portfolio/%s.json" % portfolio) aPortfolio = ptf.Portfolio(portfolioParams) print "caching %s" % portfolio cache.put('portfolio/%s' % portfolio, aPortfolio, remote) elif (action == "remote"): remote = not remote elif (action == "debug"): debug = not debug elif (action == "pvdebug"): pvdebug = not pvdebug print pvdebug logging.getLogger().setLevel(level = logging.DEBUG if pvdebug else logging.INFO) elif (action == "prepare"): batcher.prepare(study, portfolio, remote) elif (action == "train"): batch_ = batcher.interpret_batches(study, portfolio, batches, remote) for batch in batch_: print batch batcher.train(batch, remote, debug)
def validate(batch, params, i, remote, debug): logging.info("--------------") logging.info("episode %s" % i) result = {'success' : False, 'error' : 'none'} try: portfolio = cache.get(params['portfolioKey'], remote) episodes = params['episodes'] validateParams = params['validate'] logging.debug("validateParams : %s" % validateParams) trainParams = params['train'] wParams = params['w'] fromDate = episodes['validate'][i][0] toDate = episodes['validate'][i][1] logging.info("fromDate, toDate : %s, %s" % (date.to_yyyymmdd(fromDate), date.to_yyyymmdd(toDate))) nFromDate = episodes['train'][i][0] nToDate = episodes['train'][i][1] logging.info("nFromDate, nToDate : %s, %s" % (date.to_yyyymmdd(nFromDate), date.to_yyyymmdd(nToDate))) portfolio.instantiate(fromDate, toDate, True, nFromDate, nToDate) numTrainIters = trainParams['iters'] accumulate = validateParams['accumulate'] objective = validateParams['objective'] threshold = validateParams['threshold'] trainLibrary = [] winner_ = [] loser_ = [] numCandidates = 0 iTrainFrom = 0 if accumulate else i iTrainTo = i + 1 for iTrain in range(iTrainFrom, iTrainTo): for j in range(numTrainIters): logging.info("train %s.%s : " % (iTrain, j)) trainResult = cache.get("batch/%s/train/%s.%s" % (batch, iTrain, j), remote) trainWinner_ = trainResult['winner_'] numCandidates += len(trainWinner_) for trainWinner in trainWinner_: W_ = trainWinner['W_'] F__ = w.run_W(portfolio, W_, wParams) S = obj.score(objective, portfolio, F__) outcome = {'W_' : W_, 'S' : S, 'provenance' : iTrain} if (debug): outcome.update({'F__' : F__}) if (S >= threshold): winner_.append(outcome) else: loser_.append(outcome) logging.info("candidates : %s" % numCandidates) logging.info("winners : %s" % len(winner_)) logging.info("losers : %s" % len(loser_)) result = {'success' : True, 'error' : 'none', 'winner_' : winner_, 'loser_' : loser_} except (KeyboardInterrupt): raise except: result['error'] = sys.exc_info()[0] logging.info("error %s", result['error']) cache.put("batch/%s/validate/%s" % (batch, i), result, remote) logging.info("--------------") return result
def run(batch, remote, debug): k_Train = train(batch, remote, debug) k_Validate = validate(batch, remote, debug, dependency = k_Train) k_Test = test(batch, remote, debug, dependency = k_Validate) kReport = report(batch, remote, debug, dependency = k_Test) cache.put("batch/%s/jobs" % batch, {'train' : list(k_Train), 'validate' : list(k_Validate), 'test' : list(k_Test), 'report' : kReport}, remote)
def turnOff(): print('Turning off...') blank() show() cache.put(cache.STATE, False)
print "Finding largest cities in country:" alpha3_to_city = {} for country in iso3166.countries: print " "+country.name cache_key = country.alpha3+"-geocode" results_text = None if cache.contains(cache_key): results_text = cache.get(cache_key) else: response = requests.get(GEONAMES_API_URL, params={ 'country':country.alpha2, 'q':country.name.split(",")[0], 'username':geonames_username}) results_text = response.content cache.put(cache_key,results_text) results = json.loads(results_text) try: cities = sorted([place for place in results['geonames'] if "PPL" in place['fcode']], key=itemgetter('population'),reverse=True) except KeyError: print "Error! Couldn't find an fcodes" continue if len(cities)>0: print " biggest city = "+cities[0]['name'] alpha3_to_city[country.alpha3] = cities[0] else: print "Error! No cities found!" print "Computing Distances" distances = {} for alpha31,city1 in alpha3_to_city.iteritems():
def test(batch, params, i, remote, debug): logging.info("--------------") logging.info("episode %s" % i) result = {'success' : False, 'error' : 'none'} try: portfolio = cache.get(params['portfolioKey'], remote) episodes = params['episodes'] testParams = params['test'] wParams = params['w'] logging.debug("testParams : %s" % testParams) fromDate = episodes['test'][i][0] toDate = episodes['test'][i][1] logging.info("fromDate, toDate : %s, %s" % (date.to_yyyymmdd(fromDate), date.to_yyyymmdd(toDate))) nFromDate = episodes['train'][i][0] nToDate = episodes['train'][i][1] logging.info("nFromDate, nToDate : %s, %s" % (date.to_yyyymmdd(nFromDate), date.to_yyyymmdd(nToDate))) portfolio.instantiate(fromDate, toDate, True, nFromDate, nToDate) objective = testParams['objective'] validateResult = cache.get("batch/%s/validate/%s" % (batch, i), remote) validateWinner_ = validateResult['winner_'] numValidateWinners = len(validateWinner_) logging.info("candidates : %s" % numValidateWinners) F__Total = np.zeros([portfolio.tMax, portfolio.iMax]) S_ = [] provenance_ = [] for validateWinner in validateWinner_: W_ = validateWinner['W_'] provenance = validateWinner['provenance'] F__ = w.run_W(portfolio, W_, wParams) S = obj.score(objective, portfolio, F__) logging.info("S : %s" % S) S_.append(S) provenance_.append(provenance) F__Total += F__ F__Blend = np.zeros([portfolio.tMax, portfolio.iMax]) SBlend = 0 if (numValidateWinners > 0): F__Blend = F__Total / numValidateWinners SBlend = obj.score(objective, portfolio, F__Blend) logging.info("SBlend : %s" % SBlend) #points = [util.Point(validateWinner['W_'].tolist()) for validateWinner in validateWinner_] #k = max(2, int(0.25 * len(points))) #cutoff = 0.5 #clusters = util.kmeans(points, k, cutoff) # #validateWinner_ = [{'W_' : np.array(c.centroid.coords, dtype=float), 'provenance' : -1} for c in clusters] header_0 = ["t", "date"] header_1 = ["F[%s]" % k for k in range(portfolio.iMax)] header_2 = ["r[%s]" % k for k in range(portfolio.iMax)] header__ = [header_0 + header_1 + header_2] body__ = [[j] + [date.to_yyyymmdd(portfolio.date_[j])] + list(F__Blend[j]) + list(portfolio.r__[j]) for j in range(portfolio.tMax)] excel__ = header__ + body__ excelStr = "\n".join([",".join(str(val) for val in row) for row in excel__]) #excelStr = "\n".join(str(excel__)) result = {'success' : True, 'error' : 'none', 'S_' : S_, 'SBlend' : SBlend, 'provenance' : provenance_, 'F__Blend' : F__Blend, 'excel' : excelStr} except (KeyboardInterrupt): raise except Exception: result['error'] = sys.exc_info()[0] logging.info("error %s", result['error']) cache.put("batch/%s/test/%s" % (batch, i), result, remote) logging.info("--------------") return result
def get_answers( self, topic: str, request_options: Dict[str, str] = None) -> List[Dict[str, Any]]: """ Find cheat sheets for the topic. Args: `topic` (str): the name of the topic of the cheat sheet Returns: [answer_dict]: list of answers (dictionaries) """ # if topic specified as <topic_type>:<topic>, # cut <topic_type> off topic_type = "" if re.match("[^/]+:", topic): topic_type, topic = topic.split(":", 1) topic = self.handle_if_random_request(topic) topic_types = self.get_topic_type(topic) # if topic_type is specified explicitly, # show pages only of that type if topic_type and topic_type in topic_types: topic_types = [topic_type] # 'question' queries are pretty expensive, that's why they should be handled # in a special way: # we do not drop the old style cache entries and try to reuse them if possible if topic_types == ['question']: answer = cache.get('q:' + topic) if answer: if isinstance(answer, dict): return [answer] return [{ 'topic': topic, 'topic_type': 'question', 'answer': answer, 'format': 'text+code', }] answer = self._get_page_dict(topic, topic_types[0], request_options=request_options) if answer.get("cache", True): cache.put('q:' + topic, answer) return [answer] # Try to find cacheable queries in the cache. # If answer was not found in the cache, resolve it in a normal way and save in the cache answers = [] for topic_type in topic_types: cache_entry_name = f"{topic_type}:{topic}" cache_needed = self._adapter[topic_type].is_cache_needed() if cache_needed: answer = cache.get(cache_entry_name) if not isinstance(answer, dict): answer = None if answer: answers.append(answer) continue answer = self._get_page_dict(topic, topic_type, request_options=request_options) if isinstance(answer, dict): if "cache" in answer: cache_needed = answer["cache"] if cache_needed and answer: cache.put(cache_entry_name, answer) answers.append(answer) return answers
alpha3_to_city = {} for country in iso3166.countries: print " " + country.name cache_key = country.alpha3 + "-geocode" results_text = None if cache.contains(cache_key): results_text = cache.get(cache_key) else: response = requests.get(GEONAMES_API_URL, params={ 'country': country.alpha2, 'q': country.name.split(",")[0], 'username': geonames_username }) results_text = response.content cache.put(cache_key, results_text) results = json.loads(results_text) try: cities = sorted([ place for place in results['geonames'] if "PPL" in place['fcode'] ], key=itemgetter('population'), reverse=True) except KeyError: print "Error! Couldn't find an fcodes" continue if len(cities) > 0: print " biggest city = " + cities[0]['name'] alpha3_to_city[country.alpha3] = cities[0] else: print "Error! No cities found!"
}.get(x, 0) # set up logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # let's scrape url = BASE_URL + START_PAGE logger.info("Scraping UFO reports from %s" % url) # first grab the index page if not cache.contains(url): index_page = requests.get(url) logger.debug("\tadded to cache from %s" % url) cache.put(url, index_page.text) content = cache.get(url) # now pull out all the links to songs dom = BeautifulSoup(content) #/html/body/p/table/tbody/tr[1]/td[1]/font/a link_tags = dom.select("td a") logger.debug("\tfound %d link tags" % len(link_tags)) links = set([tag['href'] for tag in link_tags]) # get all the unique urls logger.info("\tfound %d links to UFO shapes" % len(links)) # now scrape ufo data from each page that lists reports tr_count = 0 for ufo_shape_link in links:
def get_lat_long(dbo, address, town, county, postcode, country=None): """ Looks up a latitude and longitude from an address using GEOCODE_URL and returns them as lat,long,(first 3 chars of address) Returns None if no results were found. NB: dbo is only used for contextual reference in logging, no database calls are made by any of this code. """ if address.strip() == "": return None try: # Synchronise this process to a single thread to prevent # abusing our geo provider and concurrent requests for the # same address when opening an animal with the same # original/brought in by owner, etc. lat_long_lock.acquire() url = "" if country is None: country = LOCALE_COUNTRY_NAME_MAP[dbo.locale] if BULK_GEO_PROVIDER == "cloudmade": q = normalise_cloudmade(address, town, county, postcode, country) url = CLOUDMADE_URL.replace("{key}", BULK_GEO_PROVIDER_KEY).replace( "{q}", q) elif BULK_GEO_PROVIDER == "nominatim": q = normalise_nominatim(address, town, county, postcode, country) url = NOMINATIM_URL.replace("{q}", q) else: al.error("unrecognised geo provider: %s" % BULK_GEO_PROVIDER, "geo.get_lat_long", dbo) al.debug("looking up geocode for address: %s" % q, "geo.get_lat_long", dbo) key = "nom:" + q if cache.available(): v = cache.get(key) if v is not None: al.debug("cache hit for address: %s = %s" % (q, v), "geo.get_lat_long", dbo) return v jr = urllib2.urlopen(url, timeout=GEO_LOOKUP_TIMEOUT).read() j = json.loads(jr) latlon = None if BULK_GEO_PROVIDER == "cloudmade": latlon = parse_cloudmade(dbo, jr, j, q) elif BULK_GEO_PROVIDER == "nominatim": latlon = parse_nominatim(dbo, jr, j, q) # Cache this address/geocode response for an hour if cache.available() and latlon is not None: cache.put(key, latlon, 3600) return latlon except Exception, err: al.error(str(err), "geo.get_lat_long", dbo) return None
def changeBrightness(brightness): __pixels__.brightness = (brightness / 100.0) cache.put(cache.BRIGHTNESS, brightness)
def save_query(client_id, query): """ Save the last query `query` for the client `client_id` """ cache.put("l:%s" % client_id, query)
def train(batch, params, i, j, remote, debug): logging.info("--------------") logging.info("episode %s" % i) logging.info("iter %s" % j) result = {'success' : False, 'error' : 'none'} try: portfolio = cache.get(params['portfolioKey'], remote) episodes = params['episodes'] trainParams = params['train'] logging.debug("trainParams : %s" % trainParams) wParams = params['w'] fromDate = episodes['train'][i][0] toDate = episodes['train'][i][1] logging.info("fromDate, toDate : %s, %s" % (date.to_yyyymmdd(fromDate), date.to_yyyymmdd(toDate))) nFromDate = episodes['train'][i][0] nToDate = episodes['train'][i][1] logging.info("nFromDate, nToDate : %s, %s" % (date.to_yyyymmdd(nFromDate), date.to_yyyymmdd(nToDate))) portfolio.instantiate(fromDate, toDate, True, nFromDate, nToDate) iters = trainParams['iters'] draws = trainParams['draws'] epochs = trainParams['epochs'] alpha = trainParams['alpha'] objective = trainParams['objective'] threshold = trainParams['threshold'] winner_ = [] loser_ = [] for k in range(draws): try: logging.info("draw %s" % k) W_ = w.init(portfolio.jLen, wParams) for e in range(epochs + 1): if (e > 0): W_ = moo.run_epoch(portfolio, W_, alpha, wParams) F__ = w.run_W(portfolio, W_, wParams) logging.debug(F__) logging.debug(portfolio.x___) S = obj.score(objective, portfolio, F__) if (e == 0): logging.info("SInit : %s", S) if (S >= threshold): break logging.info("SFinal : %s", S) outcome = {'W_' : W_, 'S' : S, 'provenance' : i} if (debug): outcome.update({'F__' : F__}) if (S >= threshold): winner_.append(outcome) else: loser_.append(outcome) except (KeyboardInterrupt): raise except: result['error'] = sys.exc_info()[0] logging.info("error %s", result['error']) logging.info("winners : %s" % len(winner_)) logging.info("losers : %s" % len(loser_)) result = {'success' : True, 'error' : 'none', 'winner_' : winner_, 'loser_' : loser_} except (KeyboardInterrupt): raise except: result['error'] = sys.exc_info()[0] logging.info("error %s", result['error']) cache.put("batch/%s/train/%s.%s" % (batch, i, j), result, remote) logging.info("--------------") return result