コード例 #1
0
def drop_views(session: Session) -> None:
    tables_to_drop = [
        'twitch_live_view', 'twitter_live_view', 'youtube_live_view',
        'unified_minute_view', 'unified_hour_view', 'unified_day_view'
    ]
    for table_to_drop in tables_to_drop:
        session.execute("DROP MATERIALIZED VIEW " + table_to_drop + ' ;')
コード例 #2
0
def cassandra_ingest(
    session: Session,
    prepared_stmt: PreparedStatement,
    parameters,
    concurrency: int = 100,
) -> None:
    """Concurrent ingest into Apache Cassandra."""

    while True:
        try:
            results = execute_concurrent_with_args(
                session=session,
                statement=prepared_stmt,
                parameters=parameters,
                concurrency=concurrency,
            )

            for (i, (success, _)) in enumerate(results):
                if not success:
                    while True:
                        try:
                            session.execute(prepared_stmt, parameters[i])
                        except Exception as exception:
                            print(exception)
                            continue
                        break
            break

        except Exception as exception:
            print(exception)
            time.sleep(1)
            continue
コード例 #3
0
def ensure_keyspace_exists(cluster: Cluster, session: Session,
                           keyspace: str) -> None:
    if keyspace not in cluster.metadata.keyspaces.keys():
        cql = SimpleStatement(
            f"CREATE KEYSPACE \"{keyspace}\" WITH REPLICATION = {{'class': 'SingleRegionStrategy'}}"
        )
        session.execute(cql)
        time.sleep(5)
コード例 #4
0
    def save(self, session: Session, prep_insert_statement: PreparedStatement, data):
        if data is None:
            raise ValueError("Cannot save None data to Cassandra.")

        for pulse_data in data:
            session.execute(prep_insert_statement, pulse_data)

        return 0
コード例 #5
0
def write_accounts(session: Session, accounts: dict) -> None:

    games_prep = session.prepare(
        "insert into accounts (streamer, language, game) values (?,?,?)")

    for streamer, attributes in accounts.items():
        session.execute(games_prep,
                        [streamer, attributes['language'], attributes['game']])

    print('ACCOUNTS DONE.')
コード例 #6
0
def write_games(session: Session, games: dict) -> None:

    games_prep = session.prepare(
        "insert into games (game, genre, console) values (?,?,?)")

    for game, attributes in games.items():
        session.execute(games_prep,
                        [game, attributes['genre'], attributes['console']])

    print('GAMES DONE.')
コード例 #7
0
def write_to_cassandra(session: Session, random_step_data: dict,
                       table: str) -> None:
    session.execute(
        "INSERT INTO insight." + table +
        " (streamer, timestamp, twitch_count, twitter_count, youtube_count, total_count) VALUES ('"
        + random_step_data.get('streamer') + "', " + "'" +
        random_step_data.get('timestamp') + "', " +
        str(random_step_data.get('twitch_count')) + ", " +
        str(random_step_data.get('twitter_count')) + ", " +
        str(random_step_data.get('youtube_count')) + ", " +
        str(random_step_data.get('total_count')) + ");")
コード例 #8
0
def insert_exchange_rates(session: Session, keyspace: str, table: str,
                          exchange_rates: pd.DataFrame) -> None:
    """Insert exchange rates into Cassandra table."""

    colnames = ",".join(exchange_rates.columns)
    values = ",".join(["?" for i in range(len(exchange_rates.columns))])
    query = f"""INSERT INTO {keyspace}.{table}({colnames}) VALUES ({values})"""
    prepared = session.prepare(query)

    for _, row in exchange_rates.iterrows():
        session.execute(prepared, row)
コード例 #9
0
def drop_table(table: str, session: Session) -> None:
    """
    Drop table.

    Parameters
    ----------
    table : str
        Table name.
    session : cassandra.cluster.Session
        Cassandra session.
    """
    session.execute(f'DROP TABLE IF EXISTS {table:s};')
コード例 #10
0
def ensure_table_exists(
    from_cluster: Cluster,
    to_cluster: Cluster,
    to_session: Session,
    keyspace: str,
    table: str,
) -> None:
    if table not in get_all_tables_from_keyspace(to_cluster, keyspace):
        click.echo(f"Table {keyspace}.{table} does not exists. Creating it...")
        table_scheme = get_table_description_as_cql(from_cluster, keyspace,
                                                    table)
        to_session.execute(SimpleStatement(table_scheme))
        time.sleep(1)
コード例 #11
0
    def create_keyspace(self, session: Session, keyspace: str):
        """

        :param keyspace: Name of the keyspace
        :param session: Session object (obtained from cassandra.cluster)
        :return:
        """
        print("Creating keyspace if not present: " + keyspace)
        try:
            session.execute(keyspace_query)
            print("Keyspace query executed successfully")
        except Exception as ex:
            print("Error running keyspace query")
            raise ex
コード例 #12
0
 def create_tables(self, session: Session):
     """
     Function to create tables
     :param session: session object (obtained through cassandra-driver)
     :return:
     """
     print("Creating tables session_songs, user_songs, app_history")
     try:
         for query in table_create_queries:
             session.execute(query)
         print("All the table creation queries ran successfully")
     except Exception as ex:
         print("Error executing query")
         raise ex
コード例 #13
0
def ingest_configuration(
    session: Session,
    keyspace: str,
    block_bucket_size: int,
    tx_hash_prefix_len: int,
) -> None:
    """Store configuration details in Cassandra table."""

    cql_str = """INSERT INTO configuration
                 (id, block_bucket_size, tx_prefix_length)
                 VALUES (%s, %s, %s)"""
    session.execute(
        cql_str,
        (keyspace, int(block_bucket_size), int(tx_hash_prefix_len)),
    )
コード例 #14
0
def create_table(table_name: str, table_info: str, session: Session) -> None:
    """
    Drop and create table.

    Parameters
    ----------
    table_name : str
        Table name.
    table_info : str
        Table information passed to SQL CREATE TABLE command.
    session : cassandra.cluster.Session
        Cassandra session.
    """
    drop_table(table_name, session)
    create_command = f'CREATE TABLE IF NOT EXISTS {table_name:s}\n{table_info:s};'
    session.execute(create_command)
コード例 #15
0
def get_table_iterator(session: Session,
                       keyspace: str,
                       table: str,
                       fetch_size: int = 100) -> t.Iterator:
    cql = SimpleStatement(f"select * from {keyspace}.{table};",
                          fetch_size=fetch_size)
    for row in session.execute(cql):
        yield row
コード例 #16
0
 def insert_batch_app_history(self, rows: list, session: Session):
     """
     Function to insert records into app_history table
     :param rows: list of records
     :param session: session object (obtained from cassandra-driver)
     :return:
     """
     batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
     statement = session.prepare(insert_app_history)
     try:
         for row in rows:
             if row[13] != "":
                 if row[16] == "":
                     userId = 0
                 else:
                     userId = int(row[16])
                 batch.add(statement, (row[13], row[2], row[5], userId))
         session.execute(batch)
     except Exception as ex:
         print("Error inserting records in app_history")
         raise ex
コード例 #17
0
def get_last_ingested_block(session: Session, table="block") -> Optional[int]:
    """Return last ingested block ID from block table."""

    cql_str = f"""SELECT block_id_group FROM {session.keyspace}.{table}
                  PER PARTITION LIMIT 1"""
    simple_stmt = SimpleStatement(cql_str, fetch_size=None)
    result = session.execute(simple_stmt)
    groups = [row.block_id_group for row in result.current_rows]

    if len(groups) == 0:
        return None

    max_block_group = max(groups)

    result = session.execute(
        f"""SELECT MAX(block_id) AS max_block FROM {session.keyspace}.{table}
            WHERE block_id_group={max_block_group}"""
    )
    max_block = result.current_rows[0].max_block

    return max_block
コード例 #18
0
 def insert_batch_session(self, rows: list, session: Session):
     """
     Function to insert data into cassandra in batches in session_songs
     :param rows:
     :param session:
     :return:
     """
     batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM)
     statement = session.prepare(insert_session_songs)
     try:
         for row in rows:
             if row[6] == "":
                 length = 0.0
             else:
                 length = float(row[6])
             batch.add(statement,
                       (int(row[12]), int(row[4]), row[0], row[13], length))
         session.execute(batch)
     except Exception as ex:
         print("Error inserting data into session_songs")
         raise ex
コード例 #19
0
def get_prepared_statement(
    session: Session, keyspace: str, table: str
) -> PreparedStatement:
    """Build prepared CQL INSERT statement for specified table."""

    cql_str = f"""SELECT column_name FROM system_schema.columns
                  WHERE keyspace_name = \'{keyspace}\'
                  AND table_name = \'{table}\';"""
    result_set = session.execute(cql_str)
    columns = [elem.column_name for elem in result_set._current_rows]
    cql_str = build_cql_insert_stmt(columns, table)
    prepared_stmt = session.prepare(cql_str)
    return prepared_stmt
コード例 #20
0
def insert_rows(table_name: str, table_info: str, row_fn: Callable[[tuple],
                                                                   tuple],
                csv_file: str, session: Session) -> None:
    """
    Insert rows from CSV file into table.

    Parameters
    ----------
    table_name : str
        Table name.
    table_info : str
        Table information passed to SQL CREATE TABLE command.
    row_fn : callable[tuple, tuple]
    csv_file : str
    session : cassandra.cluster.Session
        Cassandra session.
    """
    query = create_insert_command(table_name, table_info)
    with open(csv_file, encoding='utf8') as f:
        csvreader = csv.reader(f)
        next(csvreader)  # skip header
        for line in csvreader:
            session.execute(query, row_fn(line))
コード例 #21
0
def run_query(query: str, session: Session) -> None:
    """
    Run SQL query and print rows.

    Parameters
    ----------
    query : str
        SQL query.
    session : cassandra.cluster.Session
        Cassandra session.
    """
    rows = session.execute(query)
    for row in rows:
        print(row)
コード例 #22
0
def insert_exchange_rates(session: Session, keyspace: str, table: str,
                          exchange_rates: pd.DataFrame) -> None:
    """Insert exchange rates into Cassandra table.

    Parameters
    ----------
    session
        Cassandra session.
    keyspace
        Target Cassandra keyspace.
    table
        Cassandra table.
    exchange_rates
        pandas DataFrame with columns 'date', 'USD', 'EUR' etc.
    """

    colnames = ",".join(exchange_rates.columns)
    values = ",".join(["?" for i in range(len(exchange_rates.columns))])
    query = f"""INSERT INTO {keyspace}.{table}({colnames}) VALUES ({values})"""
    prepared_stmt = session.prepare(query)

    for _, row in exchange_rates.iterrows():
        session.execute(prepared_stmt, row)
コード例 #23
0
def query_most_recent_date(session: Session, keyspace: str,
                           table: str) -> Optional[str]:
    """Fetch most recent entry from exchange rates table."""
    def pandas_factory(colnames, rows):
        return pd.DataFrame(rows, columns=colnames)

    session.row_factory = pandas_factory

    query = f"""SELECT date FROM {keyspace}.{table};"""

    result = session.execute(query)
    rates = result._current_rows

    if rates.empty:
        max_date = None
    else:
        rates["date"] = rates["date"].astype("datetime64")
        max_date = (rates.nlargest(
            1, "date").iloc[0]["date"].strftime("%Y-%m-%d"))
    return max_date
コード例 #24
0
 def create_keyspace(self, session: Session) -> None:
     session.execute("""
     CREATE KEYSPACE IF NOT EXISTS """ + self.KEYSPACE + """
     WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' }
     """)
コード例 #25
0
def get_table_size(session: Session, keyspace: str, table: str) -> int:
    cql = SimpleStatement(f"select count(*) from {keyspace}.{table};")

    return session.execute(cql, timeout=30).one()["count"]
コード例 #26
0
ファイル: ranking.py プロジェクト: KNIGHTTH0R/FollowMe
def get_data_frame_no_parameter(session: Session,
                                prepared_query: str) -> pd.DataFrame:
    return session.execute(prepared_query)._current_rows
コード例 #27
0
def create_tables(tables: list, session: Session) -> None:
    for table in tables:
        session.execute(table)
コード例 #28
0
def create_keyspace(session: Session, keyspace: str = 'insight') -> None:
    session.execute(
        "CREATE KEYSPACE IF NOT EXISTS " + keyspace +
        " WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 3};"
    )
コード例 #29
0
def get_data_frame(session: Session, prepared_query: PreparedStatement,
                   user: str) -> pd.DataFrame:
    return session.execute(prepared_query, (user, ))._current_rows
コード例 #30
0
def get_values_from_band(session: Session, band: str) -> ResultSet:
    return session.execute("select * from " + config.KEY_SPACE + "." + band)