def generate_fake_data(): session = new_session() for post in session.query(Post).all(): session.delete(post) for author in session.query(Author).all(): session.delete(author) session.flush() count = 10 faker = Faker(locale='ru_RU') for _ in range(count): profile = faker.simple_profile() author = Author(login=profile['username'], full_name=profile['name'], password=faker.password(length=12)) session.add(author) session.flush() for _ in range(count): post = Post(author_id=author.id, title=faker.sentence(), text=faker.paragraph(nb_sentences=5), is_published=False, published_at=faker.date_time()) session.add(post) session.commit() return redirect(url_for('index'))
def register(): session = new_session() author = Author(login=request.form['login'], password=request.form['password']) session.add(author) session.commit() return redirect(url_for('index'))
def POST(self): # save session info for this call session_info = json.loads(web.data())['session'] tropo_call_id = session_info['callId'] caller_network = session_info['from']['network'] caller_channel = session_info['from']['channel'] caller_id = session_info['from']['id'] models.new_session( tropo_call_id, caller_network, caller_channel, caller_id) if caller_channel == 'VOICE': return self.do_voice() elif caller_channel == 'TEXT': return self.do_text() else: raise Exception("unexpected caller channel %s" % caller_channel)
def data_onboard(data): s = new_session() getOrElse = lambda o, k, d: o[k] if k in o else d violation_code = Collection(ViolationCode, lambda vc: vc.code) borough = Collection(Borough, lambda b: b.borough) restaurant = Collection(Restaurant, lambda r: r.camis) inspection = [] for row in data: if 'boro' in row: borough.insert(Borough(borough=row['boro'])) if 'violation_code' in row: violation_code.insert(ViolationCode(code=row['violation_code'])) if 'camis' in row: restaurant.insert(Restaurant( camis=getOrElse(row, 'camis', None), dba=getOrElse(row, 'dba', None), street=getOrElse(row, 'street', None), phone=getOrElse(row, 'phone', None), cuisine_description=getOrElse(row, 'cuisine_description', None), borough=getOrElse(row, 'boro', None), building=getOrElse(row, 'building', None), zipcode=getOrElse(row, 'zipcode', None))) inspection.append(Inspection( restaurant=getOrElse(row, 'camis', None), record_date=getOrElse(row, 'record_date', None), violation_code=getOrElse(row, 'violation_code', None), violation_description=getOrElse(row, 'violation_description', None), score=int(row['score']) if 'score' in row else None, inspection_date=getOrElse(row, 'inspection_date', None), inspection_type=getOrElse(row, 'inspection_type', None), critical_flag=getOrElse(row, 'critical_flag', None))) s.bulk_save_objects(borough.getall()) s.bulk_save_objects(violation_code.getall()) s.commit() boroughs = set([r.borough for r in restaurant.getall()]) boroughs = {b[1]: b[0] for b in s.query(Borough.__table__.c.id, Borough.__table__.c.borough)} for r in restaurant.getall(): if r.borough != None: r.borough = boroughs[r.borough] bulk_save(s, restaurant.getall(), 1500) codes = set([i.violation_code for i in inspection]) codes = {c[1]: c[0] for c in s.query(ViolationCode.__table__.c.id, ViolationCode.__table__.c.code)} camis = set([r.camis for r in restaurant.getall()]) camis = {c[1]: c[0] for c in s.query(Restaurant.__table__.c.id, Restaurant.__table__.c.camis)} for i in inspection: if i.restaurant != None: i.restaurant = camis[i.restaurant] if i.violation_code != None: i.violation_code = codes[i.violation_code] bulk_save(s, inspection, 1500) s.close()
def start_crawler(): test_logger() session = new_session() logger.info(u"爬取%s年的专利" % year) url_maker = FullIndexURLMaker(year, skip) futures = [] httpclient.AsyncHTTPClient.configure(None, defaults=dict(max_client=100)) logger.info(u"爬虫启动,创建%s个线程" % index_num) for i in range(index_num): worker = FullIndexWorker(name="default-%s" % i, url_maker=url_maker, session=session) futures.append(worker.go()) # yield gen.sleep(10000) yield futures logging.info(u"爬虫完成\n\n\n")
def crawler_start(): # cookies = login() # print cookies # url = make_url("CN", 2010, 0) # res = requests.get(url, cookies=cookies) # if echo_to_file: # with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), "log.html"), "w") as f: # f.write(res.content) # return res load_country_code() session = new_session() countries = session.query(Country).all() searcher = SearchWorker(name="default", countries=countries, session=session) yield searcher.go() print "搜索完成"
from models import Restaurant, new_session import geohash import requests as req import json, yaml import asyncio import re with open('config.yaml') as f: config = yaml.load(f) api_keys = config['keys'] s = new_session() print(len(s.query(Restaurant).all())) restaurants = {(r[0], r[3]): (r[1], r[2]) for r in s.query(Restaurant.__table__.c.id, \ Restaurant.__table__.c.street, Restaurant.__table__.c.zipcode, \ Restaurant.__table__.c.dba).filter(Restaurant.geohash==None).filter(Restaurant.borough==4).all()} GEOCODING_URL = "https://maps.googleapis.com/maps/api/geocode/json" def get_found_geolocations(): try: with open(config['files']['geolocation']) as f: geometrics = json.load(f) return geometrics except json.decoder.JSONDecodeError: return [] found_addrs = [addr['key'] for addr in get_found_geolocations()]
def post(pk): session = new_session() post = session.query(Post).filter(Post.id == pk).one() return render_template('post.html', post=post)
def posts(): session = new_session() all_posts = session.query(Post).options(defer('text')).options( joinedload('author')).all() return render_template('posts.html', posts=all_posts)
def clear_all_countries(): session = new_session() session.query(Country).delete() session.commit()
def country_for_name(name): session = new_session() return session.query(Country).filter(Country.name == name).first()