def drop_views(session: Session) -> None: tables_to_drop = [ 'twitch_live_view', 'twitter_live_view', 'youtube_live_view', 'unified_minute_view', 'unified_hour_view', 'unified_day_view' ] for table_to_drop in tables_to_drop: session.execute("DROP MATERIALIZED VIEW " + table_to_drop + ' ;')
def cassandra_ingest( session: Session, prepared_stmt: PreparedStatement, parameters, concurrency: int = 100, ) -> None: """Concurrent ingest into Apache Cassandra.""" while True: try: results = execute_concurrent_with_args( session=session, statement=prepared_stmt, parameters=parameters, concurrency=concurrency, ) for (i, (success, _)) in enumerate(results): if not success: while True: try: session.execute(prepared_stmt, parameters[i]) except Exception as exception: print(exception) continue break break except Exception as exception: print(exception) time.sleep(1) continue
def ensure_keyspace_exists(cluster: Cluster, session: Session, keyspace: str) -> None: if keyspace not in cluster.metadata.keyspaces.keys(): cql = SimpleStatement( f"CREATE KEYSPACE \"{keyspace}\" WITH REPLICATION = {{'class': 'SingleRegionStrategy'}}" ) session.execute(cql) time.sleep(5)
def save(self, session: Session, prep_insert_statement: PreparedStatement, data): if data is None: raise ValueError("Cannot save None data to Cassandra.") for pulse_data in data: session.execute(prep_insert_statement, pulse_data) return 0
def write_accounts(session: Session, accounts: dict) -> None: games_prep = session.prepare( "insert into accounts (streamer, language, game) values (?,?,?)") for streamer, attributes in accounts.items(): session.execute(games_prep, [streamer, attributes['language'], attributes['game']]) print('ACCOUNTS DONE.')
def write_games(session: Session, games: dict) -> None: games_prep = session.prepare( "insert into games (game, genre, console) values (?,?,?)") for game, attributes in games.items(): session.execute(games_prep, [game, attributes['genre'], attributes['console']]) print('GAMES DONE.')
def write_to_cassandra(session: Session, random_step_data: dict, table: str) -> None: session.execute( "INSERT INTO insight." + table + " (streamer, timestamp, twitch_count, twitter_count, youtube_count, total_count) VALUES ('" + random_step_data.get('streamer') + "', " + "'" + random_step_data.get('timestamp') + "', " + str(random_step_data.get('twitch_count')) + ", " + str(random_step_data.get('twitter_count')) + ", " + str(random_step_data.get('youtube_count')) + ", " + str(random_step_data.get('total_count')) + ");")
def insert_exchange_rates(session: Session, keyspace: str, table: str, exchange_rates: pd.DataFrame) -> None: """Insert exchange rates into Cassandra table.""" colnames = ",".join(exchange_rates.columns) values = ",".join(["?" for i in range(len(exchange_rates.columns))]) query = f"""INSERT INTO {keyspace}.{table}({colnames}) VALUES ({values})""" prepared = session.prepare(query) for _, row in exchange_rates.iterrows(): session.execute(prepared, row)
def drop_table(table: str, session: Session) -> None: """ Drop table. Parameters ---------- table : str Table name. session : cassandra.cluster.Session Cassandra session. """ session.execute(f'DROP TABLE IF EXISTS {table:s};')
def ensure_table_exists( from_cluster: Cluster, to_cluster: Cluster, to_session: Session, keyspace: str, table: str, ) -> None: if table not in get_all_tables_from_keyspace(to_cluster, keyspace): click.echo(f"Table {keyspace}.{table} does not exists. Creating it...") table_scheme = get_table_description_as_cql(from_cluster, keyspace, table) to_session.execute(SimpleStatement(table_scheme)) time.sleep(1)
def create_keyspace(self, session: Session, keyspace: str): """ :param keyspace: Name of the keyspace :param session: Session object (obtained from cassandra.cluster) :return: """ print("Creating keyspace if not present: " + keyspace) try: session.execute(keyspace_query) print("Keyspace query executed successfully") except Exception as ex: print("Error running keyspace query") raise ex
def create_tables(self, session: Session): """ Function to create tables :param session: session object (obtained through cassandra-driver) :return: """ print("Creating tables session_songs, user_songs, app_history") try: for query in table_create_queries: session.execute(query) print("All the table creation queries ran successfully") except Exception as ex: print("Error executing query") raise ex
def ingest_configuration( session: Session, keyspace: str, block_bucket_size: int, tx_hash_prefix_len: int, ) -> None: """Store configuration details in Cassandra table.""" cql_str = """INSERT INTO configuration (id, block_bucket_size, tx_prefix_length) VALUES (%s, %s, %s)""" session.execute( cql_str, (keyspace, int(block_bucket_size), int(tx_hash_prefix_len)), )
def create_table(table_name: str, table_info: str, session: Session) -> None: """ Drop and create table. Parameters ---------- table_name : str Table name. table_info : str Table information passed to SQL CREATE TABLE command. session : cassandra.cluster.Session Cassandra session. """ drop_table(table_name, session) create_command = f'CREATE TABLE IF NOT EXISTS {table_name:s}\n{table_info:s};' session.execute(create_command)
def get_table_iterator(session: Session, keyspace: str, table: str, fetch_size: int = 100) -> t.Iterator: cql = SimpleStatement(f"select * from {keyspace}.{table};", fetch_size=fetch_size) for row in session.execute(cql): yield row
def insert_batch_app_history(self, rows: list, session: Session): """ Function to insert records into app_history table :param rows: list of records :param session: session object (obtained from cassandra-driver) :return: """ batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) statement = session.prepare(insert_app_history) try: for row in rows: if row[13] != "": if row[16] == "": userId = 0 else: userId = int(row[16]) batch.add(statement, (row[13], row[2], row[5], userId)) session.execute(batch) except Exception as ex: print("Error inserting records in app_history") raise ex
def get_last_ingested_block(session: Session, table="block") -> Optional[int]: """Return last ingested block ID from block table.""" cql_str = f"""SELECT block_id_group FROM {session.keyspace}.{table} PER PARTITION LIMIT 1""" simple_stmt = SimpleStatement(cql_str, fetch_size=None) result = session.execute(simple_stmt) groups = [row.block_id_group for row in result.current_rows] if len(groups) == 0: return None max_block_group = max(groups) result = session.execute( f"""SELECT MAX(block_id) AS max_block FROM {session.keyspace}.{table} WHERE block_id_group={max_block_group}""" ) max_block = result.current_rows[0].max_block return max_block
def insert_batch_session(self, rows: list, session: Session): """ Function to insert data into cassandra in batches in session_songs :param rows: :param session: :return: """ batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM) statement = session.prepare(insert_session_songs) try: for row in rows: if row[6] == "": length = 0.0 else: length = float(row[6]) batch.add(statement, (int(row[12]), int(row[4]), row[0], row[13], length)) session.execute(batch) except Exception as ex: print("Error inserting data into session_songs") raise ex
def get_prepared_statement( session: Session, keyspace: str, table: str ) -> PreparedStatement: """Build prepared CQL INSERT statement for specified table.""" cql_str = f"""SELECT column_name FROM system_schema.columns WHERE keyspace_name = \'{keyspace}\' AND table_name = \'{table}\';""" result_set = session.execute(cql_str) columns = [elem.column_name for elem in result_set._current_rows] cql_str = build_cql_insert_stmt(columns, table) prepared_stmt = session.prepare(cql_str) return prepared_stmt
def insert_rows(table_name: str, table_info: str, row_fn: Callable[[tuple], tuple], csv_file: str, session: Session) -> None: """ Insert rows from CSV file into table. Parameters ---------- table_name : str Table name. table_info : str Table information passed to SQL CREATE TABLE command. row_fn : callable[tuple, tuple] csv_file : str session : cassandra.cluster.Session Cassandra session. """ query = create_insert_command(table_name, table_info) with open(csv_file, encoding='utf8') as f: csvreader = csv.reader(f) next(csvreader) # skip header for line in csvreader: session.execute(query, row_fn(line))
def run_query(query: str, session: Session) -> None: """ Run SQL query and print rows. Parameters ---------- query : str SQL query. session : cassandra.cluster.Session Cassandra session. """ rows = session.execute(query) for row in rows: print(row)
def insert_exchange_rates(session: Session, keyspace: str, table: str, exchange_rates: pd.DataFrame) -> None: """Insert exchange rates into Cassandra table. Parameters ---------- session Cassandra session. keyspace Target Cassandra keyspace. table Cassandra table. exchange_rates pandas DataFrame with columns 'date', 'USD', 'EUR' etc. """ colnames = ",".join(exchange_rates.columns) values = ",".join(["?" for i in range(len(exchange_rates.columns))]) query = f"""INSERT INTO {keyspace}.{table}({colnames}) VALUES ({values})""" prepared_stmt = session.prepare(query) for _, row in exchange_rates.iterrows(): session.execute(prepared_stmt, row)
def query_most_recent_date(session: Session, keyspace: str, table: str) -> Optional[str]: """Fetch most recent entry from exchange rates table.""" def pandas_factory(colnames, rows): return pd.DataFrame(rows, columns=colnames) session.row_factory = pandas_factory query = f"""SELECT date FROM {keyspace}.{table};""" result = session.execute(query) rates = result._current_rows if rates.empty: max_date = None else: rates["date"] = rates["date"].astype("datetime64") max_date = (rates.nlargest( 1, "date").iloc[0]["date"].strftime("%Y-%m-%d")) return max_date
def create_keyspace(self, session: Session) -> None: session.execute(""" CREATE KEYSPACE IF NOT EXISTS """ + self.KEYSPACE + """ WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' } """)
def get_table_size(session: Session, keyspace: str, table: str) -> int: cql = SimpleStatement(f"select count(*) from {keyspace}.{table};") return session.execute(cql, timeout=30).one()["count"]
def get_data_frame_no_parameter(session: Session, prepared_query: str) -> pd.DataFrame: return session.execute(prepared_query)._current_rows
def create_tables(tables: list, session: Session) -> None: for table in tables: session.execute(table)
def create_keyspace(session: Session, keyspace: str = 'insight') -> None: session.execute( "CREATE KEYSPACE IF NOT EXISTS " + keyspace + " WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 3};" )
def get_data_frame(session: Session, prepared_query: PreparedStatement, user: str) -> pd.DataFrame: return session.execute(prepared_query, (user, ))._current_rows
def get_values_from_band(session: Session, band: str) -> ResultSet: return session.execute("select * from " + config.KEY_SPACE + "." + band)