def homepage(): query = request.args.get("postal_code", "84510") results = [] for city in City.select().where(City.postal_code == query): results.append({"insee": city.insee, "name": city.name, "postal_code": city.postal_code, "label": city.label}) return jsonify(results=results)
def main(): cities = City.select() city_dict = {} for city in cities: if city.parent is None: city_dict[city.id] = [] city_dict[city.id].append({'id': city.id, 'name': city.name}) else: city_dict[city.parent.id].append({'id': city.id, 'name': city.name}) print(json.dumps(city_dict, ensure_ascii=False))
def save_to_db(self, dic): assert all(map(dic.has_key, ['title', 'original_price', 'price', 'detail', 'url'])),\ "Information incomplete." url = dic['url'] original_price = dic['original_price'].text.encode('utf8') price = dic['price'].text.encode('utf8') title = dic['title'].text # title is unicode detail = dic['detail'].renderContents(encoding='utf8') detail = utils.clean_detail(detail, self.home_url) # Data formatting & validation. try: original_price, price = map(lambda s: int(re.search(r'(\d+)', s).group()), [original_price, price]) except TypeError: logging.error("Price conversion failed. Detailed info: %s", [original_price, price]) return except AttributeError: logging.error("Regex failed on %s", [original_price, price]) return if len(title) > 500 or len(title) < 10: logging.error("Title length too short or too long : %s", title) return if len(detail) < 20: logging.error("Detail too short. %s", detail) return # Save to db. try: site = Site.select(Site.q.url == self.home_url) assert(site.count() == 1), "%s not found or dups." % self.home_url title = utils.lstrip(title, [s.decode('utf8') for s in ('今日团购', '今日精选', ':')]) title = title.strip() title='[%s] %s' % (site[0].name, title) city_name = self.index_urls[url] city = City.select(City.q.name == city_name.decode('utf8')) assert city.count() == 1, "%s not found or dups." % city_name cityID = city[0].id if Deal.select(AND(Deal.q.title == title, Deal.q.cityID == cityID)).count() > 0: logging.info("Title dups %s" % title) return deal = Deal(url=url, title=title, price=price, originalPrice=original_price, detail=detail.decode('utf8'),cityID=cityID, siteID=site[0].id) logging.info('%s OK', url) except: # Simple handling for the moment. logging.error("Error occured while saving data : %s", sys.exc_info())
def homepage(): query = request.args.get('postal_code', '84510') results = [] for city in City.select().where(City.postal_code == query): results.append({ "insee": city.insee, "name": city.name, "postal_code": city.postal_code, "label": city.label }) return jsonify(results=results)
def create_word_dicts(): big_list_of_shit = {} for city in City.select(): try: list_of_photos = [] town = re.sub(' ','_',city.city.encode("utf-8").lower()) with open('new_cities_photos_words/{}.json'.format(town),'r') as inputfile: my_file = json.loads(inputfile.read()) for k,v in my_file.iteritems(): for word in v: list_of_photos.append(word) print 'Done!' big_list_of_shit.update({ town: list(set(list_of_photos)) }) except: pass with open('all_words.json', 'w') as outfile: json.dump(big_list_of_shit, outfile, indent=4, sort_keys=True, separators=(',', ':'))
def city_exist(form, field): if City.select().where( City.city_name == str(field.data).lower()).exists() == False: raise ValidationError("There is no such city")
# -*- coding: utf-8 -*- from selenium import webdriver from models import City import time import requests import os import json from selenium.webdriver.support.ui import WebDriverWait chromedriver_path = '/Users/sp41mer/PycharmProjects/parcer/chromedriver' ghostdriver_path = '/Users/sp41mer/HiGuys/imaged_cluster/instagram/phantomjs' for city in City.select().where(City.id > 11): print('Parsing city {} with ID = {}'.format(city.city, city.id)) driver_for_page = webdriver.Chrome(chromedriver_path) # driver_for_page = webdriver.PhantomJS(ghostdriver_path) url = city.ig_link name_of_town = url.split('.com/')[1].split('/')[0] driver_for_page.get(url) driver_for_page.execute_script( 'window.scrollTo(0,document.body.scrollHeight);') try: button_more = driver_for_page.find_element_by_css_selector('a._oidfu') except: print 'Couldnt find a._oidfu' button_more = None try: button_more = driver_for_page.find_element_by_xpath( u"//*[contains(text(), 'Загрузить еще')]") except: print 'Couldnt find by russian text' try: button_more = driver_for_page.find_element_by_xpath(
def city_list(): obj_list = City.select().order_by('name') return object_list('city_list.html', obj_list, "obj_list")