def random(out_file, n): """ Write a CSV with plaintext for N random docs. """ # CSV writer. cols = ['id', 'title', 'text'] writer = csv.DictWriter(out_file, cols) writer.writeheader() query = ( Document_Text .select(Document_Text.text, Document.path) .join(Document) .order_by(fn.random()) .limit(n) ) for row in query_bar(query): writer.writerow({ 'id': row.path, 'title': row.path, 'text': row.text })
def proxy_crawler(item): # print(item) try: proxyip = Proxy_IP.select().where(Proxy_IP.status == '1').order_by( fn.random()).get() except Exception as err: print(err) return crawler(item.get('url'), item.get('r')) proxies = { proxyip.http_s.strip(): proxyip.http_s + '://' + proxyip.ip.strip(), } try: response = item.get('req')(item.get('url'), headers=item.get('headers'), data=item.get('data'), timeout=20, proxies=proxies) except Exception as err: update_ip(proxyip) return proxy_crawler(item) print('proxies ip: %s, status_code: %s' % (proxyip.ip, response.status_code)) time.sleep(0.3) return response
def get_proxy_ip(): try: proxyip = Proxy_IP.select().where(Proxy_IP.status == '1').order_by( fn.random()).get() return proxyip except Exception as err: print(err) return
async def opening_var(request, id): opening = OpeningVar3Agg.select().where( OpeningVar3Agg.openingid == id).first() if not opening: return _no_results() games = GameState.select().where(GameState.openingid==id).\ order_by(fn.random()).limit(50) t = env.get_template("opening_var.jinja") return response.html(t.render(opening=opening, games=games))
def task_for_today(): ws = get_data_of_day(datetime.date.today(), datetime.date.today()) if ws.count() > 30: print('==== review today ({}) ===='.format(ws.count())) ws = ws.order_by(fn.random()) print_ws(ws, False) else: print('==== new today ====') ws = Word.select().where(Word.last_modify.is_null()).\ order_by(((Word.count-0.8) * Word.id).desc(), Word.id.desc()).\ limit(65) print_ws(ws, True)
def task_for_review(): cr = (1, 2, 3, 5) cr = [ datetime.datetime.now().date() - datetime.timedelta(days=i) for i in cr ] cr = [i.year * 10000 + i.month * 100 + i.day for i in cr] # how does peewee support |(col1, col2) << ((a,b), (c,d))| ? ws = Word.select().\ where(~Word.last_modify.is_null()). \ where((Word.last_modify.year*10000+Word.last_modify.month*100+Word.last_modify.day) << cr).\ order_by(Word.last_modify.desc(), fn.random()) print_ws(ws, False)
def task_for_today(): ws = get_data_of_day(datetime.date.today(), datetime.date.today()) if ws.count() > 30: print(Fore.BLUE + '==== review today ({}) ===='.format(ws.count())) print(Fore.BLUE + '<q> to quit') ws = ws.order_by(fn.random()) print_ws(ws, False) else: print(Fore.BLUE + '==== new today ====') print(Fore.BLUE + '<q> to quit') ws = Word.select().where(Word.last_modify.is_null()).\ order_by(Word.lid, Word.count.desc()).\ limit(80) print_ws(ws, True)
def _kmode_id(request, id, kmode, kmodeagg, eco_name, eco_var1, title): k = kmodeagg.select().where(kmodeagg.kclass == id).first() if not kmode: return _no_results() eco = eco_name.select().where(eco_name.kclass == id) var1 = eco_var1.select().where(eco_var1.kclass == id) games = kmode.select().where(kmode.kclass==id).\ order_by(fn.random()).limit(20) t = env.get_template('kmode_id.jinja') return response.html( t.render(kmode=k, eco=eco, var1=var1, games=games, title=title))
def random(out_file, n): """ Write a CSV with plaintext for N random docs. """ # CSV writer. cols = ['id', 'title', 'text'] writer = csv.DictWriter(out_file, cols) writer.writeheader() query = (Document_Text.select(Document_Text.text, Document.path).join(Document).order_by( fn.random()).limit(n)) for row in query_bar(query): writer.writerow({'id': row.path, 'title': row.path, 'text': row.text})
def ran(cnt): return Word.select().order_by( (fn.random() * Word.count).desc()).limit(cnt)
def get_dataset(cls): return Actor.select().where(Actor.crawled == False).order_by(fn.random())
def get_dataset(cls): return Movie.select().where(Movie.crawled == False, Movie.type == Movie.TYPE_NORMAL).order_by( fn.random())
cwd = Path(__file__).resolve().parent colorjs = cwd / Path("./web/src/color.js") if config.sentryDSN: client = sentry_sdk.init(dsn=config.sentryDSN) if len(sys.argv) > 1: limit = int(sys.argv[1]) else: limit = 50 query = Song.select().where((Song.show == 0) & (Song.spotify_data == 1) & (Song.image_large.is_null(False)) & (Song.background_color.is_null())) for song in query.order_by(fn.random()).limit(limit): print(song.title) url = song.image_large print(url) if not url: continue r = requests.get(url) if r.status_code == 404: song.spotify_data = None song.save() continue with tempfile.TemporaryDirectory() as tmpdirname: tmpdir = Path(tmpdirname) image = tmpdir / "image.jpg" with open(image, 'wb') as fd: