Esempio n. 1
0
def get_si2_sg2() -> Tuple[pd.DataFrame, pd.DataFrame]:
    try:
        si2 = pkg.load_dataset('si2')
        sg2 = pkg.load_dataset('sg2')
    except:
        st2 = get_st2()
        si2, sg2 = label.index_games(parse.index_games(st2))
        pkg.save_dataset(si2, 'si2')
        pkg.save_dataset(sg2, 'sg2')
    return si2, sg2
Esempio n. 2
0
def get_setups(
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    try:
        db_setups = pkg.load_dataset('db_setups')
        game_setups = pkg.load_dataset('game_setups')
        free_setups = pkg.load_dataset('free_setups')
        null_setups = pkg.load_dataset('null_setups')
    except:
        st2 = games.get_st2()
        si2, _ = games.get_si2_sg2()
        free_setups = si2.query('type == "free"')
        null_setups = si2.query('field_content.isnull()')
        game_setups = (
            si2.query('type != "free" & field_content.notnull()').assign(
                setup_str_red=lambda r: r.field_content.str[:40],
                setup_str_blue=lambda r: r.field_content.str[60:].str[::-1],
                dmz=lambda r: r.field_content.str[40:60]))
        assert (game_setups.dmz == 'AA__AA__AAAA__AA__AA').all()
        db_setups = (pd.wide_to_long(
            st2.merge(game_setups).drop(
                columns=['player_red', 'player_blue', 'dmz']),
            stubnames=['setup_str'],
            i='gid',
            j='player',
            sep='_',
            suffix='(red|blue)').reset_index().assign(
                result=lambda r: np.where(
                    r.player == r.winner, 'win',
                    np.where(r.winner == 'draw', 'draw', 'loss')),
                score=lambda r: np.where(
                    r.result == 'win', 1.0,
                    np.where(r.result == 'draw', 0.5, 0.0)),
                setup_str=lambda r: r.setup_str.apply(strados2.decode_setup),
                setup_obj=lambda r: r.apply(
                    lambda x: Setup(x.setup_str, x.type), axis=1)
            ).astype(
                dtype={
                    'result':
                    pd.CategoricalDtype(categories=['win', 'draw', 'loss'])
                }
            ).pipe(lambda df: pd.concat(
                [df, pd.get_dummies(df.result, prefix='', prefix_sep='')],
                axis=1
            )).loc[:, [
                'gid', 'filename', 'period', 'freq', 'ext', 'type', 'player',
                'result', 'win', 'draw', 'loss', 'score', 'ending',
                'num_moves', 'num_turns', 'next_move', 'setup_str', 'setup_obj'
            ]].pipe(label.setups).sort_values(['gid', 'player'
                                               ]).reset_index(drop=True))
        assert all(db_setups.setup_obj.apply(lambda x: x.ok()))
        pkg.save_dataset(db_setups, 'db_setups')
        pkg.save_dataset(game_setups, 'game_setups')
        pkg.save_dataset(free_setups, 'free_setups')
        pkg.save_dataset(null_setups, 'null_setups')
    return db_setups, game_setups, free_setups, null_setups
Esempio n. 3
0
def get_daily():
    try:
        last = dt.date(*map(
            int,
            sorted(os.listdir(pkg.daily_dir))[-1].split('.')[0].split('-')
            [1:]))
        results = pkg.load_dataset('results')
        assert last == dt.date(*map(int, results.date.max().split('-')))
        start = last + dt.timedelta(days=1)
    except:
        results = pd.DataFrame()
        start = dt.date(2003, 6, 1)
    filenames = scrape.results(start,
                               pd.to_datetime('today').date(), pkg.daily_dir)
    try:
        update = (pd.concat(
            [
                pd.read_csv(os.path.join(pkg.daily_dir, filename))
                for filename in tqdm(filenames)
            ],
            ignore_index=True).rename(columns=lambda c: c.lower()).rename(
                columns=lambda c: c.replace(' ', '_')).rename(
                    columns={
                        '#_of_turns': 'num_turns'
                    }).loc[:, [
                        'date', 'time', 'type', 'player_red', 'player_blue',
                        'result', 'ending', 'num_turns', 'duration'
                    ]])
    except:
        update = pd.DataFrame()
    if not update.empty:
        results = results.append(update, ignore_index=True)
        pkg.save_dataset(results, 'results')
    return results, update
Esempio n. 4
0
def get_txt_files() -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    zip_files, _ = get_zip_files()
    txt_files_packed = unpack.infolist(pkg.zip_dir, zip_files)
    try:
        txt_files_cached = pkg.load_dataset('txt_files')
    except:
        os.makedirs(pkg.txt_dir)
        txt_files_cached = pd.DataFrame(columns=txt_files_packed.columns.values)
    assert sorted(os.listdir(pkg.txt_dir)) == sorted(txt_files_cached.filename)
    txt_queue = (pd
        .merge(
            txt_files_packed, txt_files_cached,
            how='outer', indicator=True, validate='one_to_one'
        )
        .query('_merge == "left_only"')
        .drop(columns='_merge')
    )
    zip_queue = txt_queue.drop_duplicates('name')
    unpack.extract(pkg.zip_dir, zip_queue, pkg.txt_dir)
    unpacked, txt_files = txt_queue.loc[:, ['filename']], txt_files_packed
    if not unpacked.empty:
        pkg.save_dataset(txt_files, 'txt_files')
    assert sorted(os.listdir(pkg.txt_dir)) == sorted(txt_files.filename)
    repaired = repair.directory(pkg.txt_dir, unpacked)
    return txt_files, unpacked, repaired
Esempio n. 5
0
def get_st2() -> pd.DataFrame:
    try:
        st2 = pkg.load_dataset('st2')
    except:
        st2 = (games
            .get_txt_files()[0]
            .pipe(parse.txt_files)
            .pipe(label.txt_files)
        )
        pkg.save_dataset(st2, 'st2')
    return st2
Esempio n. 6
0
def get_zip_files() -> Tuple[pd.DataFrame, pd.DataFrame]:
    zip_files_remote = scrape.list_directory_contents_recursive(pkg.strados2_url)
    try:
        zip_files_cached = pkg.load_dataset('zip_files')
    except:
        os.makedirs(pkg.zip_dir)
        zip_files_cached = pd.DataFrame(columns=zip_files_remote.columns.values)
    assert sorted(os.listdir(pkg.zip_dir)) == sorted(zip_files_cached.name)
    zip_queue = (pd
        .merge(
            zip_files_remote, zip_files_cached,
            how='outer', indicator=True, validate='one_to_one'
        )
       .query('_merge == "left_only"')
        .drop(columns='_merge')
    )
    scraped, zip_files = scrape.mirror_no_directories(pkg.zip_dir, '*.zip', zip_queue), zip_files_remote
    if not scraped.empty:
        pkg.save_dataset(zip_files, 'zip_files')
    assert sorted(os.listdir(pkg.zip_dir)) == sorted(zip_files.name)
    return zip_files, scraped
Esempio n. 7
0
#          Copyright Rein Halbersma 2018-2021.
# Distributed under the Boost Software License, Version 1.0.
#    (See accompanying file LICENSE_1_0.txt or copy at
#          http://www.boost.org/LICENSE_1_0.txt)

import datetime as dt
import os

import pandas as pd
from tqdm import tqdm

import gravon.package as pkg
import gravon.transform.label as label
import gravon.transform.tidy as tidy

results = pkg.load_dataset('results')
sr2 = label.results(results)
pkg.save_dataset(sr2, 'sr2')

all_results = (tidy
    .results(raw_results)
    .query('player_red.notnull() & player_blue.notnull()')
)

# There have been 798K matches.
all_results.shape[0]

# There have been almost 70K players.
all_players = tidy.players(all_results)
all_players.player.unique().shape[0]
Esempio n. 8
0
#          Copyright Rein Halbersma 2018-2021.
# Distributed under the Boost Software License, Version 1.0.
#    (See accompanying file LICENSE_1_0.txt or copy at
#          http://www.boost.org/LICENSE_1_0.txt)

import pandas as pd

import gravon.package as pkg
import gravon.pattern as pattern
import gravon.archive as archive

ss2 = pkg.load_dataset('ss2').query('type == "classic" & period >= "2006-09"')
setup = ("""
    55X.......
    ..........
    ..........
    ..........
    """)
games = pattern.match(ss2, setup)
pd.crosstab(games.player, games.match_type, margins=True)
archive.make_zip(games.filename.tolist(), 'constrictor')