Beispiel #1
0
class MoexUpdatable(Updatable):
    def __init__(self, model_launcher, flavor):
        self.model_launcher = model_launcher
        self.session = CachedSession()

    def download_date(self, date):
        request = Request(
            method='GET',
            url='https://www.moex.com/ru/derivatives/open-positions-csv.aspx',
            params={
                'd':
                dt.datetime(date.year, date.month, date.day).strftime("%Y%m%d")
            },
            headers={'User-Agent': 'Mozilla/5.0'})

        request = self.session.prepare_request(request)
        response = self.session.send(request)

        df = pd.read_csv(StringIO(response.text), parse_dates=['moment'])

        if df.empty:
            remove_from_cache(self.session, request)

            return pd.DataFrame()

        df = df.rename(columns={'moment': 'Date', 'isin': 'code'})

        df.name = df.apply(lambda row: '{} ({})'.format(
            row['name'], MOEX['ct_mapping'][row['contract_type']]),
                           axis=1)
        df.iz_fiz.fillna(0, inplace=True)

        keys = ('Date', 'name')

        groups = df.groupby(keys)

        df = pd.DataFrame([{
            **{
                '{}{}'.format(MOEX['phys_mapping'][row['iz_fiz']], value): row[key]
                for i, row in content.iterrows() for key, value in MOEX['csv_mapping'].items(
                )
            },
            **dict(zip(keys, group))
        } for group, content in groups])

        return df

    def initial_fill(self):
        return self.fill(MOEX['first_date'], dt.date.today())

    def fill(self, first, last):
        return concat(
            [self.download_date(date) for date in pd.date_range(first, last)])

    def range(self):
        return reduce_ranges([
            active.range()
            for active in active_iterator(MOEX, self.model_launcher)
        ])

    def write_update(self, data):
        if not data.empty:
            Platforms(self.model_launcher,
                      MOEX['name']).write_single(MOEX['platform_code'],
                                                 'Moscow exchange')

            actives_table = WeeklyActives(self.model_launcher, MOEX['name'])

            groups = data.groupby('name')

            actives_table.write_df(
                pd.DataFrame([(MOEX['platform_code'], active)
                              for active in groups.groups.keys()],
                             columns=("PlatformCode", "ActiveName")))

            for group, content in groups:
                Active(self.model_launcher, MOEX, MOEX['platform_code'], group,
                       content.drop('name', axis=1)).update()
Beispiel #2
0
class Cache:
    """ Cache class for operating on the local SQLite cache """

    def __init__(self):
        self.session = CachedSession('games/db/cache', allowable_codes=(200, 302, 304))
        self.solvers = [
            GoogleAnswerWordsSolver(),
            GoogleResultsCountSolver()
        ]

    def prune(self):
        """ Prune stale entries from the local cache """
        urls = []
        for solver in self.solvers:
            for filename in sorted(glob('games/json/*.json')):
                game = load(open(filename))
                for turn in game.get('questions'):
                    urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values())
        stale_entries = []
        for key, (resp, _) in self.session.cache.responses.items():
            if resp.url not in urls and not any(step.url in urls for step in resp.history):
                stale_entries.append((key, resp))
        print('Found %s/%s stale entries' % (len(stale_entries), len(self.session.cache.responses.keys())))
        for key, resp in stale_entries:
            print('Deleting stale entry: %s' % resp.url)
            self.session.cache.delete(key)

    def refresh(self):
        """ Refresh the local cache with unsaved questions """
        urls = []
        for solver in self.solvers:
            for filename in sorted(glob('games/json/*.json')):
                game = load(open(filename))
                for turn in game.get('questions'):
                    urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values())
        cache_misses = [
            url for url in urls if not self.session.cache.create_key(
                self.session.prepare_request(Request('GET', url))
            ) in self.session.cache.responses
        ]
        print('Found %s/%s URLs not in cache' % (len(cache_misses), len(urls)))
        for idx, url in enumerate(cache_misses):
            print('Adding cached entry: %s' % url)
            response = self.session.get(url)
            if '/sorry/index?continue=' in response.url:
                exit('ERROR: Google rate limiting detected. Cached %s pages.' % idx)

    @staticmethod
    def vacuum():
        """ Perform an SQL vacuum on the local cache to save space """
        conn = connect("games/db/cache.sqlite")
        conn.execute("VACUUM")
        conn.close()

    @staticmethod
    def import_sql():
        """ Import saved SQL dumps into a local SQLite cache """
        conn = connect("games/db/cache.sqlite")
        for filename in sorted(glob('games/db/*.sql')):
            print('Importing SQL %s' % filename)
            sql = open(filename, 'r').read()
            cur = conn.cursor()
            cur.executescript(sql)
        conn.close()

    def export(self):
        """ Export the local cache to SQL dump files """
        for filename in sorted(glob('games/json/*.json')):
            game = load(open(filename))
            show_id = path.basename(filename).split('.')[0]
            if not path.isfile('./games/db/%s.sql' % show_id):
                print('Exporting SQL %s' % show_id)
                urls = []
                for solver in self.solvers:
                    for turn in game.get('questions'):
                        urls.extend(solver.build_urls(turn.get('question'), turn.get('answers')).values())
                url_keys = [self.session.cache.create_key(
                    self.session.prepare_request(Request('GET', url))
                ) for url in urls]
                conn = connect(':memory:')
                cur = conn.cursor()
                cur.execute("attach database 'games/db/cache.sqlite' as cache")
                cur.execute("select sql from cache.sqlite_master where type='table' and name='urls'")
                cur.execute(cur.fetchone()[0])
                cur.execute("select sql from cache.sqlite_master where type='table' and name='responses'")
                cur.execute(cur.fetchone()[0])
                for key in list(set(url_keys)):
                    cur.execute("insert into urls select * from cache.urls where key = '%s'" % key)
                    cur.execute("insert into responses select * from cache.responses where key = '%s'" % key)
                conn.commit()
                cur.execute("detach database cache")
                with open('games/db/%s.sql' % show_id, 'w') as file:
                    for line in conn.iterdump():
                        file.write('%s\n' % line.replace(
                            'TABLE', 'TABLE IF NOT EXISTS'
                        ).replace(
                            'INSERT', 'INSERT OR IGNORE'
                        ))
                conn.close()