def random(out_file, n):

    """
    Write a CSV with plaintext for N random docs.
    """

    # CSV writer.
    cols = ['id', 'title', 'text']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    query = (
        Document_Text
        .select(Document_Text.text, Document.path)
        .join(Document)
        .order_by(fn.random())
        .limit(n)
    )

    for row in query_bar(query):

        writer.writerow({
            'id': row.path,
            'title': row.path,
            'text': row.text
        })
Example #2
0
def proxy_crawler(item):
    # print(item)
    try:
        proxyip = Proxy_IP.select().where(Proxy_IP.status == '1').order_by(
            fn.random()).get()
    except Exception as err:
        print(err)
        return crawler(item.get('url'), item.get('r'))
    proxies = {
        proxyip.http_s.strip(): proxyip.http_s + '://' + proxyip.ip.strip(),
    }

    try:
        response = item.get('req')(item.get('url'),
                                   headers=item.get('headers'),
                                   data=item.get('data'),
                                   timeout=20,
                                   proxies=proxies)
    except Exception as err:
        update_ip(proxyip)
        return proxy_crawler(item)
    print('proxies ip: %s, status_code: %s' %
          (proxyip.ip, response.status_code))
    time.sleep(0.3)
    return response
Example #3
0
def get_proxy_ip():
    try:
        proxyip = Proxy_IP.select().where(Proxy_IP.status == '1').order_by(
            fn.random()).get()
        return proxyip
    except Exception as err:
        print(err)
        return
Example #4
0
async def opening_var(request, id):

    opening = OpeningVar3Agg.select().where(
        OpeningVar3Agg.openingid == id).first()
    if not opening:
        return _no_results()

    games = GameState.select().where(GameState.openingid==id).\
        order_by(fn.random()).limit(50)

    t = env.get_template("opening_var.jinja")
    return response.html(t.render(opening=opening, games=games))
Example #5
0
def task_for_today():
    ws = get_data_of_day(datetime.date.today(), datetime.date.today())

    if ws.count() > 30:
        print('==== review today ({}) ===='.format(ws.count()))
        ws = ws.order_by(fn.random())
        print_ws(ws, False)
    else:
        print('==== new today ====')
        ws = Word.select().where(Word.last_modify.is_null()).\
            order_by(((Word.count-0.8) * Word.id).desc(), Word.id.desc()).\
            limit(65)
        print_ws(ws, True)
Example #6
0
def task_for_review():
    cr = (1, 2, 3, 5)
    cr = [
        datetime.datetime.now().date() - datetime.timedelta(days=i) for i in cr
    ]
    cr = [i.year * 10000 + i.month * 100 + i.day for i in cr]

    # how does peewee support |(col1, col2) << ((a,b), (c,d))| ?
    ws = Word.select().\
            where(~Word.last_modify.is_null()). \
            where((Word.last_modify.year*10000+Word.last_modify.month*100+Word.last_modify.day) << cr).\
            order_by(Word.last_modify.desc(), fn.random())

    print_ws(ws, False)
Example #7
0
def task_for_today():
    ws = get_data_of_day(datetime.date.today(), datetime.date.today())

    if ws.count() > 30:
        print(Fore.BLUE + '==== review today ({}) ===='.format(ws.count()))
        print(Fore.BLUE + '<q> to quit')
        ws = ws.order_by(fn.random())
        print_ws(ws, False)
    else:
        print(Fore.BLUE + '==== new today ====')
        print(Fore.BLUE + '<q> to quit')
        ws = Word.select().where(Word.last_modify.is_null()).\
            order_by(Word.lid, Word.count.desc()).\
            limit(80)
        print_ws(ws, True)
Example #8
0
def _kmode_id(request, id, kmode, kmodeagg, eco_name, eco_var1, title):

    k = kmodeagg.select().where(kmodeagg.kclass == id).first()
    if not kmode:
        return _no_results()

    eco = eco_name.select().where(eco_name.kclass == id)
    var1 = eco_var1.select().where(eco_var1.kclass == id)

    games = kmode.select().where(kmode.kclass==id).\
        order_by(fn.random()).limit(20)

    t = env.get_template('kmode_id.jinja')
    return response.html(
        t.render(kmode=k, eco=eco, var1=var1, games=games, title=title))
Example #9
0
def random(out_file, n):
    """
    Write a CSV with plaintext for N random docs.
    """

    # CSV writer.
    cols = ['id', 'title', 'text']
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    query = (Document_Text.select(Document_Text.text,
                                  Document.path).join(Document).order_by(
                                      fn.random()).limit(n))

    for row in query_bar(query):

        writer.writerow({'id': row.path, 'title': row.path, 'text': row.text})
Example #10
0
 def ran(cnt):
     return Word.select().order_by(
         (fn.random() * Word.count).desc()).limit(cnt)
Example #11
0
 def get_dataset(cls):
     return Actor.select().where(Actor.crawled == False).order_by(fn.random())
Example #12
0
 def get_dataset(cls):
     return Movie.select().where(Movie.crawled == False,
                                 Movie.type == Movie.TYPE_NORMAL).order_by(
                                     fn.random())
Example #13
0
cwd = Path(__file__).resolve().parent
colorjs = cwd / Path("./web/src/color.js")

if config.sentryDSN:
    client = sentry_sdk.init(dsn=config.sentryDSN)

if len(sys.argv) > 1:
    limit = int(sys.argv[1])
else:
    limit = 50

query = Song.select().where((Song.show == 0) & (Song.spotify_data == 1)
                            & (Song.image_large.is_null(False))
                            & (Song.background_color.is_null()))
for song in query.order_by(fn.random()).limit(limit):
    print(song.title)
    url = song.image_large
    print(url)
    if not url:
        continue

    r = requests.get(url)
    if r.status_code == 404:
        song.spotify_data = None
        song.save()
        continue
    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdir = Path(tmpdirname)
        image = tmpdir / "image.jpg"
        with open(image, 'wb') as fd: