class MoexUpdatable(Updatable): def __init__(self, model_launcher, flavor): self.model_launcher = model_launcher self.session = CachedSession() def download_date(self, date): request = Request( method='GET', url='https://www.moex.com/ru/derivatives/open-positions-csv.aspx', params={ 'd': dt.datetime(date.year, date.month, date.day).strftime("%Y%m%d") }, headers={'User-Agent': 'Mozilla/5.0'}) request = self.session.prepare_request(request) response = self.session.send(request) df = pd.read_csv(StringIO(response.text), parse_dates=['moment']) if df.empty: remove_from_cache(self.session, request) return pd.DataFrame() df = df.rename(columns={'moment': 'Date', 'isin': 'code'}) df.name = df.apply(lambda row: '{} ({})'.format( row['name'], MOEX['ct_mapping'][row['contract_type']]), axis=1) df.iz_fiz.fillna(0, inplace=True) keys = ('Date', 'name') groups = df.groupby(keys) df = pd.DataFrame([{ **{ '{}{}'.format(MOEX['phys_mapping'][row['iz_fiz']], value): row[key] for i, row in content.iterrows() for key, value in MOEX['csv_mapping'].items( ) }, **dict(zip(keys, group)) } for group, content in groups]) return df def initial_fill(self): return self.fill(MOEX['first_date'], dt.date.today()) def fill(self, first, last): return concat( [self.download_date(date) for date in pd.date_range(first, last)]) def range(self): return reduce_ranges([ active.range() for active in active_iterator(MOEX, self.model_launcher) ]) def write_update(self, data): if not data.empty: Platforms(self.model_launcher, MOEX['name']).write_single(MOEX['platform_code'], 'Moscow exchange') actives_table = WeeklyActives(self.model_launcher, MOEX['name']) groups = data.groupby('name') actives_table.write_df( pd.DataFrame([(MOEX['platform_code'], active) for active in groups.groups.keys()], columns=("PlatformCode", "ActiveName"))) for group, content in groups: Active(self.model_launcher, MOEX, MOEX['platform_code'], group, content.drop('name', axis=1)).update()
class Cache: """ Cache class for operating on the local SQLite cache """ def __init__(self): self.session = CachedSession('games/db/cache', allowable_codes=(200, 302, 304)) self.solvers = [ GoogleAnswerWordsSolver(), GoogleResultsCountSolver() ] def prune(self): """ Prune stale entries from the local cache """ urls = [] for solver in self.solvers: for filename in sorted(glob('games/json/*.json')): game = load(open(filename)) for turn in game.get('questions'): urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values()) stale_entries = [] for key, (resp, _) in self.session.cache.responses.items(): if resp.url not in urls and not any(step.url in urls for step in resp.history): stale_entries.append((key, resp)) print('Found %s/%s stale entries' % (len(stale_entries), len(self.session.cache.responses.keys()))) for key, resp in stale_entries: print('Deleting stale entry: %s' % resp.url) self.session.cache.delete(key) def refresh(self): """ Refresh the local cache with unsaved questions """ urls = [] for solver in self.solvers: for filename in sorted(glob('games/json/*.json')): game = load(open(filename)) for turn in game.get('questions'): urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values()) cache_misses = [ url for url in urls if not self.session.cache.create_key( self.session.prepare_request(Request('GET', url)) ) in self.session.cache.responses ] print('Found %s/%s URLs not in cache' % (len(cache_misses), len(urls))) for idx, url in enumerate(cache_misses): print('Adding cached entry: %s' % url) response = self.session.get(url) if '/sorry/index?continue=' in response.url: exit('ERROR: Google rate limiting detected. Cached %s pages.' % idx) @staticmethod def vacuum(): """ Perform an SQL vacuum on the local cache to save space """ conn = connect("games/db/cache.sqlite") conn.execute("VACUUM") conn.close() @staticmethod def import_sql(): """ Import saved SQL dumps into a local SQLite cache """ conn = connect("games/db/cache.sqlite") for filename in sorted(glob('games/db/*.sql')): print('Importing SQL %s' % filename) sql = open(filename, 'r').read() cur = conn.cursor() cur.executescript(sql) conn.close() def export(self): """ Export the local cache to SQL dump files """ for filename in sorted(glob('games/json/*.json')): game = load(open(filename)) show_id = path.basename(filename).split('.')[0] if not path.isfile('./games/db/%s.sql' % show_id): print('Exporting SQL %s' % show_id) urls = [] for solver in self.solvers: for turn in game.get('questions'): urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values()) url_keys = [self.session.cache.create_key( self.session.prepare_request(Request('GET', url)) ) for url in urls] conn = connect(':memory:') cur = conn.cursor() cur.execute("attach database 'games/db/cache.sqlite' as cache") cur.execute("select sql from cache.sqlite_master where type='table' and name='urls'") cur.execute(cur.fetchone()[0]) cur.execute("select sql from cache.sqlite_master where type='table' and name='responses'") cur.execute(cur.fetchone()[0]) for key in list(set(url_keys)): cur.execute("insert into urls select * from cache.urls where key = '%s'" % key) cur.execute("insert into responses select * from cache.responses where key = '%s'" % key) conn.commit() cur.execute("detach database cache") with open('games/db/%s.sql' % show_id, 'w') as file: for line in conn.iterdump(): file.write('%s\n' % line.replace( 'TABLE', 'TABLE IF NOT EXISTS' ).replace( 'INSERT', 'INSERT OR IGNORE' )) conn.close()