def _get_token(self): """Init csrf token.""" with dbopen() as cur: cur.execute("SELECT token1 FROM tokens WHERE provider = '%s';" % self.SHORTNAME) serialized_auth = cur.fetchone()[0] if not serialized_auth: return self._reset_token() auth = json.loads(serialized_auth) now = int(round(time.time() * 1000)) refresh_expiry = auth['session_handle'][ 'refresh_token_expire_time'] token_expiry = auth['session_handle']['token_expire_time'] assert refresh_expiry > token_expiry, "Token expiry happens after refresh expiry! The logic below may be incorrect." if now > refresh_expiry: return self._reset_token() elif now > token_expiry: token = auth['session_handle']['refresh_token'] return self._refresh_token(token) return auth['session_handle']['access_token']
def _reset_tokens(self): logger.info('getting a brand new set of tokens!') headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.9', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36', } response = requests.get(self.START_URL, headers=headers) logger.info('init response=%s' % response) logger.debug('init response.headers=%s' % (response.headers, )) response.raise_for_status() cookie_str = response.headers['Set-Cookie'] matches = re.findall(r'([^,;\s]*csrf[^=]*)=([^,;\s]+)', cookie_str) key, val = matches[0] keyval = '%s=%s' % (key, val) with dbopen() as cur: query = """ UPDATE tokens SET token1 = ?, token2 = ? WHERE provider = '%s'; """ % self.SHORTNAME cur.execute(query, [keyval, val]) return (keyval, val)
def _save_auth(self, auth): serialized = json.dumps(auth) with dbopen() as cur: query = f""" UPDATE tokens SET token1 = ? WHERE provider = '{self.SHORTNAME}'; """ cur.execute(query, [serialized])
def _get_tokens(self): """Init csrf token.""" with dbopen() as cur: cur.execute( "SELECT token1, token2 FROM tokens WHERE provider = '%s';" % self.SHORTNAME) token1, token2 = cur.fetchone() if not token1 or not token2: return self._reset_tokens() return (token1, token2)
def _get_tokens(self): """Get stored tokens.""" with dbopen() as cur: cur.execute( "SELECT token1, token2 FROM tokens WHERE provider = '%s';" % self.SHORTNAME) cookie, csrf_token = cur.fetchone() if not cookie or not csrf_token: return self._reset_tokens() return (cookie, csrf_token)
def _upsert_to_master_table(self): with dbopen() as cur: cur.execute("CREATE TABLE IF NOT EXISTS %s AS SELECT * FROM %s WHERE 1=2;" % (self.master, self.tmp)) cur.execute(f"CREATE UNIQUE INDEX IF NOT EXISTS {self.master}_uix ON {self.master}({self.unique_on});") cur.execute(f"SELECT * FROM {self.master} WHERE 1=2;") master_columns = [description[0] for description in cur.description] df_columns = [x for x in self.df.columns if x in master_columns] columns_csv_fmt = ', '.join(df_columns) columns_tuple_fmt = '(%s)' % columns_csv_fmt cur.execute(f""" INSERT OR REPLACE INTO {self.master} {columns_tuple_fmt} SELECT {columns_csv_fmt} FROM {self.tmp}; """) cur.execute("SELECT * FROM %s;" % self.master) rows = cur.fetchall() cols = [description[0] for description in cur.description] logger.debug('first 3 rows: %s' % (rows[:3],)) logger.debug('cols: %s' % (cols,)) logger.info('row count: %s' % (len(rows),)) logger.info('col count: %s' % (len(cols),))
combined_data = pd.merge(df_cbsa, df_zips, left_on='zip_code', right_on='zip')[['lat', 'lng']] df = combined_data if df is None else df.append(combined_data) assert df.isnull().values.any( ) == False, "Sanity check failed: one or more zip codes could not be mapped to a (lat, lng)" logger.info('running total # of coords: %s' % (df.shape, )) df['progress'] = 0 logger.info('head:\n%s' % (df.head(), )) logger.info('types:\n%s' % (df.dtypes, )) setup() with dbopen() as cur: cur.execute(""" CREATE TABLE IF NOT EXISTS progress (lat REAL, lng REAL, progress INT, UNIQUE(lat, lng) ON CONFLICT IGNORE); """) with dbopen(return_conn=True) as conn: df.to_sql('progress', conn, if_exists='append', index=False) # import pdb; pdb.set_trace()
def _clean_temp_table(self): with dbopen() as cur: cur.execute(f"DROP TABLE IF EXISTS {self.tmp};")
def _insert_to_temp_table(self): with dbopen(return_conn=True) as conn: if self.debug: self._test_columns_seq(conn) self.df.to_sql(self.tmp, conn, if_exists='replace', index=False)
from config import PROVIDERS from database import dbopen from logzero import logger name_mapping = { 'uber_eats': 'payload_storePayload_stateMapDisplayInfo_available_title_text' } with dbopen() as cur: query = """ SELECT count(*) from progress WHERE progress = 1; """ cur.execute(query) logger.info(f"total coordinates searched: {cur.fetchone()}") for provider in PROVIDERS: query = f""" SELECT count(*) from {provider}; """ cur.execute(query) logger.info(f"{provider}: {cur.fetchone()}") for provider in PROVIDERS: name_column = name_mapping.get(provider, 'name') query = f""" SELECT {name_column}, count({name_column}) as count FROM {provider} GROUP BY {name_column} ORDER BY count DESC; """
def clean(): """Clean the DB.""" with dbopen() as cur: for p in PROVIDERS: cur.execute("DROP TABLE IF EXISTS %s;" % p)