Exemple #1
0
def check_cql(ip, ssl_context=None):
    try:
        cluster = get_cql_cluster(ip, ssl_context)
        cluster.connect()
        cluster.shutdown()
    except cassandra.cluster.NoHostAvailable:
        raise NotYetUp
def main():
    '''
    Script entry point:
    - Queries the data in the sparkify Cassandra keyspace, and prints the results

    Parameters:
        None

    Returns:
        None
    '''
    # connect to cluster
    cluster = cassandra.cluster.Cluster(['127.0.0.1'])
    session = cluster.connect()

    # set current keyspace for session
    session.set_keyspace('sparkify')

    # query data
    ## sessions for query 1
    print_query(session, 'Query 1')

    ## user_sessions for query 2
    print_query(session, 'Query 2')

    ## song_listeners for query 3
    print_query(session, 'Query 3')

    # shut down session and cluster
    session.shutdown()
    cluster.shutdown()
    def run():
        query = textwrap.dedent(f"""
            SELECT
            starttime, stoptime, start_lon, start_lat, stop_lon, stop_lat, bikeid
            FROM
            trip_by_startdayhour
            WHERE
                    starttime_year={dt.year}
                AND
                    starttime_month={dt.month}
                AND
                    starttime_day={dt.day}
                AND
                    starttime_hour={dt.hour}
            ;
            """)
        cluster = cassandra.cluster.Cluster()
        session = cluster.connect("paroisem")
        for r in session.execute(query):
            yield Trip(
                r.starttime,
                r.stoptime,
                r.start_lon,
                r.start_lat,
                r.stop_lon,
                r.stop_lat,
                r.bikeid,
            )
# Ou se connecter une seule fois seulement, faire une classe avec la connection en attribut
        cluster.shutdown()
Exemple #4
0
def connect(username=None, password=None, timeout=CONNECT_TIMEOUT):
    # We pull the currently configured listen address and port from the
    # yaml, rather than the service configuration, as it may have been
    # overridden.
    cassandra_yaml = read_cassandra_yaml()
    address = cassandra_yaml['rpc_address']
    if address == '0.0.0.0':
        address = 'localhost'
    port = cassandra_yaml['native_transport_port']

    auth_provider = get_auth_provider(username, password)

    addresses = set([address])
    cluster_ep = reactive.endpoint_from_name('cluster')
    addresses.update(cluster_ep.get_bootstrapped_ips())

    # Although we specify a reconnection_policy, it does not apply to
    # the initial connection so we retry in a loop.
    start = time.time()
    until = start + timeout
    while True:
        cluster = cassandra.cluster.Cluster(list(addresses), port=port, auth_provider=auth_provider)
        try:
            session = cluster.connect()
            session.default_timeout = timeout
            break
        except cassandra.cluster.NoHostAvailable as x:
            cluster.shutdown()
            if time.time() > until:
                raise
        time.sleep(1)
    try:
        yield session
    finally:
        cluster.shutdown()
def main():
    # Process command line arguments
    options = docopt.docopt(__doc__)
    cassandra_ip_list = options['<cassandra_ip_list>']

    # Open connection to Cassandra
    cluster = cassandra.cluster.Cluster(cassandra_ip_list,
                                        control_connection_timeout=60,
                                        protocol_version=3)
    session = cluster.connect('ooi')

    # Extract Stream Metadata from Cassandra
    stream_metadata = execute_query(session, ALL_STREAM_METADATA,
                                    STREAM_METADATA_COLUMNS)

    # Extract Partition Metadata from Cassandra
    partition_metadata = execute_query(session, ALL_PARTITION_METADATA,
                                       PARTITION_METADATA_COLUMNS)

    # Close connection to Cassandra
    cluster.shutdown()

    # Write Stream Metadata to CSV file
    write_csv_file(STREAM_METADATA_FILEPATH, stream_metadata,
                   STREAM_METADATA_COLUMNS)

    # Write Partition Metadata to CSV file
    write_csv_file(PARTITION_METADATA_FILEPATH, partition_metadata,
                   PARTITION_METADATA_COLUMNS)
    def test_prepared_metadata_generation(self):
        """
        Test to validate that result metadata is appropriately populated across protocol version

        In protocol version 1 result metadata is retrieved everytime the statement is issued. In all
        other protocol versions it's set once upon the prepare, then re-used. This test ensures that it manifests
        it's self the same across multiple protocol versions.

        @since 3.6.0
        @jira_ticket PYTHON-71
        @expected_result result metadata is consistent.
        """

        base_line = None
        for proto_version in get_supported_protocol_versions():
            cluster = Cluster(protocol_version=proto_version)
            session = cluster.connect()
            select_statement = session.prepare("SELECT * FROM system.local")
            if proto_version == 1:
                self.assertEqual(select_statement.result_metadata, None)
            else:
                self.assertNotEqual(select_statement.result_metadata, None)
            future = session.execute_async(select_statement)
            results = future.result()
            if base_line is None:
                base_line = results[0].__dict__.keys()
            else:
                self.assertEqual(base_line, results[0].__dict__.keys())
            cluster.shutdown()
    def test_prepared_metadata_generation(self):
        """
        Test to validate that result metadata is appropriately populated across protocol version

        In protocol version 1 result metadata is retrieved everytime the statement is issued. In all
        other protocol versions it's set once upon the prepare, then re-used. This test ensures that it manifests
        it's self the same across multiple protocol versions.

        @since 3.6.0
        @jira_ticket PYTHON-71
        @expected_result result metadata is consistent.
        """

        base_line = None
        for proto_version in get_supported_protocol_versions():
            cluster = Cluster(protocol_version=proto_version)
            session = cluster.connect()
            select_statement = session.prepare("SELECT * FROM system.local")
            if proto_version == 1:
                self.assertEqual(select_statement.result_metadata, None)
            else:
                self.assertNotEqual(select_statement.result_metadata, None)
            future = session.execute_async(select_statement)
            results = future.result()
            if base_line is None:
                base_line = results[0].__dict__.keys()
            else:
                self.assertEqual(base_line, results[0].__dict__.keys())
            cluster.shutdown()
def connect(username=None,
            password=None,
            timeout=CONNECT_TIMEOUT,
            auth_timeout=CONNECT_TIMEOUT):
    # We pull the currently configured listen address and port from the
    # yaml, rather than the service configuration, as it may have been
    # overridden.
    cassandra_yaml = read_cassandra_yaml()
    address = cassandra_yaml['rpc_address']
    if address == '0.0.0.0':
        address = 'localhost'
    port = cassandra_yaml['native_transport_port']

    if username is None or password is None:
        username, password = superuser_credentials()

    auth = hookenv.config()['authenticator']
    if auth == 'AllowAllAuthenticator':
        auth_provider = None
    else:
        auth_provider = cassandra.auth.PlainTextAuthProvider(username=username,
                                                             password=password)

    # Although we specify a reconnection_policy, it does not apply to
    # the initial connection so we retry in a loop.
    start = time.time()
    until = start + timeout
    auth_until = start + auth_timeout
    while True:
        cluster = cassandra.cluster.Cluster([address],
                                            port=port,
                                            auth_provider=auth_provider)
        try:
            session = cluster.connect()
            session.default_timeout = timeout
            break
        except cassandra.cluster.NoHostAvailable as x:
            cluster.shutdown()
            now = time.time()
            # If every node failed auth, reraise one of the
            # AuthenticationFailed exceptions. Unwrapping the exception
            # means call sites don't have to sniff the exception bundle.
            # We don't retry on auth fails; this method should not be
            # called if the system_auth data is inconsistent.
            auth_fails = [
                af for af in x.errors.values()
                if isinstance(af, cassandra.AuthenticationFailed)
            ]
            if auth_fails:
                if now > auth_until:
                    raise auth_fails[0]
            if now > until:
                raise
        time.sleep(1)
    try:
        yield session
    finally:
        cluster.shutdown()
Exemple #9
0
def try_connect(orig_cluster, ssl_version):
    cluster = cassandra.cluster.Cluster(
        contact_points=orig_cluster.contact_points,
        port=orig_cluster.port,
        protocol_version=orig_cluster.protocol_version,
        auth_provider=orig_cluster.auth_provider,
        ssl_context=ssl.SSLContext(ssl_version))
    cluster.connect()
    cluster.shutdown()
def connect(username=None, password=None, timeout=CONNECT_TIMEOUT,
            auth_timeout=CONNECT_TIMEOUT):
    # We pull the currently configured listen address and port from the
    # yaml, rather than the service configuration, as it may have been
    # overridden.
    cassandra_yaml = read_cassandra_yaml()
    address = cassandra_yaml['rpc_address']
    if address == '0.0.0.0':
        address = 'localhost'
    port = cassandra_yaml['native_transport_port']

    if username is None or password is None:
        username, password = superuser_credentials()

    auth = hookenv.config()['authenticator']
    if auth == 'AllowAllAuthenticator':
        auth_provider = None
    else:
        auth_provider = cassandra.auth.PlainTextAuthProvider(username=username,
                                                             password=password)

    # Although we specify a reconnection_policy, it does not apply to
    # the initial connection so we retry in a loop.
    start = time.time()
    until = start + timeout
    auth_until = start + auth_timeout
    while True:
        cluster = cassandra.cluster.Cluster([address], port=port,
                                            auth_provider=auth_provider)
        try:
            session = cluster.connect()
            session.default_timeout = timeout
            break
        except cassandra.cluster.NoHostAvailable as x:
            cluster.shutdown()
            now = time.time()
            # If every node failed auth, reraise one of the
            # AuthenticationFailed exceptions. Unwrapping the exception
            # means call sites don't have to sniff the exception bundle.
            # We don't retry on auth fails; this method should not be
            # called if the system_auth data is inconsistent.
            auth_fails = [af for af in x.errors.values()
                          if isinstance(af, cassandra.AuthenticationFailed)]
            if auth_fails:
                if now > auth_until:
                    raise auth_fails[0]
            if now > until:
                raise
        time.sleep(1)
    try:
        yield session
    finally:
        cluster.shutdown()
Exemple #11
0
def try_connect(orig_cluster, ssl_version):
    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS)
    ssl_context.minimum_version = ssl_version
    ssl_context.maximum_version = ssl_version
    cluster = cassandra.cluster.Cluster(
        contact_points=orig_cluster.contact_points,
        port=orig_cluster.port,
        protocol_version=orig_cluster.protocol_version,
        auth_provider=orig_cluster.auth_provider,
        ssl_context=ssl_context)
    cluster.connect()
    cluster.shutdown()
Exemple #12
0
def test_non_tls_on_tls(cql):
    if not cql.cluster.ssl_context:
        pytest.skip("SSL-specific tests are skipped without the '--ssl' option")
    # Copy the configuration of the existing "cql", just not the ssl_context
    cluster = cassandra.cluster.Cluster(
        contact_points=cql.cluster.contact_points,
        port=cql.cluster.port,
        protocol_version=cql.cluster.protocol_version,
        auth_provider=cql.cluster.auth_provider)
    with pytest.raises(cassandra.cluster.NoHostAvailable, match="ProtocolError"):
        cluster.connect()
    cluster.shutdown() # can't be reached
    def test_prepared_statement(self):
        """
        Highlight the difference between Prepared and Bound statements
        """

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        session = cluster.connect()

        prepared = session.prepare('INSERT INTO test3rf.test (k, v) VALUES (?, ?)')
        prepared.consistency_level = ConsistencyLevel.ONE

        self.assertEqual(str(prepared),
                         '<PreparedStatement query="INSERT INTO test3rf.test (k, v) VALUES (?, ?)", consistency=ONE>')

        bound = prepared.bind((1, 2))
        self.assertEqual(str(bound),
                         '<BoundStatement query="INSERT INTO test3rf.test (k, v) VALUES (?, ?)", values=(1, 2), consistency=ONE>')

        cluster.shutdown()
    def test_prepared_statement(self):
        """
        Highlight the difference between Prepared and Bound statements
        """

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        session = cluster.connect()

        prepared = session.prepare('INSERT INTO test3rf.test (k, v) VALUES (?, ?)')
        prepared.consistency_level = ConsistencyLevel.ONE

        self.assertEqual(str(prepared),
                         '<PreparedStatement query="INSERT INTO test3rf.test (k, v) VALUES (?, ?)", consistency=ONE>')

        bound = prepared.bind((1, 2))
        self.assertEqual(str(bound),
                         '<BoundStatement query="INSERT INTO test3rf.test (k, v) VALUES (?, ?)", values=(1, 2), consistency=ONE>')

        cluster.shutdown()
Exemple #15
0
def main():
    '''
    Script entry point:
    - Extracts, transforms and loads data from `event_data` CSV files
    into tables in the sparkify Cassandra keyspace.

    Parameters:
        None

    Returns:
        None
    '''
    # connect to cluster
    cluster = cassandra.cluster.Cluster(['127.0.0.1'])
    session = cluster.connect()

    # set current keyspace for session
    session.set_keyspace('sparkify')

    # create denormalized CSV
    denormalized_csv_filename = create_denormalized_csv()

    # insert data
    insert_query_tuples = [
        (csql_queries.sessions_table_insert, lambda line:
         (int(line[8]), int(line[3]), line[0], line[9], float(line[5]))),
        (csql_queries.user_sessions_table_insert, lambda line:
         (int(line[10]), int(line[8]), int(line[3]), line[0], line[9], line[1],
          line[4])),
        (csql_queries.song_listeners_table_insert, lambda line:
         (line[9], int(line[10]), line[1], line[4]))
    ]
    insert_rows(session, denormalized_csv_filename, insert_query_tuples)

    # delete denormalized CSV
    delete_denormalized_csv(denormalized_csv_filename)

    # shut down session and cluster
    session.shutdown()
    cluster.shutdown()
def main():
    '''
    Script entry point:
    - Establishes connection to the local Cassandra cluster's sparkify keyspace  
    - Drops all the tables
    - Creates all tables needed
    - Finally, closes the connection

    Parameters:
        None

    Returns:
        None
    '''
    cluster, session = create_database()
    
    drop_tables(session)
    create_tables(session)

    # shut down session and cluster
    session.shutdown()
    cluster.shutdown()
def main():
    # Process command line arguments
    options = docopt.docopt(__doc__)
    cassandra_ip_list = options['<cassandra_ip_list>']

    # Open connection to Cassandra
    cluster = cassandra.cluster.Cluster(cassandra_ip_list, control_connection_timeout=60, protocol_version=3)
    session = cluster.connect('ooi')

    # Extract Stream Metadata from Cassandra
    stream_metadata = execute_query(session, ALL_STREAM_METADATA, STREAM_METADATA_COLUMNS)

    # Extract Partition Metadata from Cassandra
    partition_metadata = execute_query(session, ALL_PARTITION_METADATA, PARTITION_METADATA_COLUMNS)

    # Close connection to Cassandra
    cluster.shutdown()

    # Write Stream Metadata to CSV file
    write_csv_file(STREAM_METADATA_FILEPATH, stream_metadata, STREAM_METADATA_COLUMNS)

    # Write Partition Metadata to CSV file
    write_csv_file(PARTITION_METADATA_FILEPATH, partition_metadata, PARTITION_METADATA_COLUMNS)
    def load_provenance(self):
        cluster = cassandra.cluster.Cluster(self.cassandra_ip_list,
                                            control_connection_timeout=60,
                                            protocol_version=3)
        session = cluster.connect('ooi')
        provenance = self.execute_query(session, ALL_DATASET_L0_PROVENANCE,
                                        DATASET_L0_PROVENANCE_COLUMNS)
        cluster.shutdown()

        with sqlite3.connect(self.database) as conn:

            def get_streamed_start(method, filename):
                if not streamed(method):
                    return None
                date = time_util.parse_basic_iso8601_date(filename)
                if not date:
                    return None
                return time_util.java_time_from_basic_iso8601_date(date)

            def get_streamed_stop(start):
                if not start:
                    return None
                return start + time_util.MILLIS_PER_DAY - 1

            conn.create_function("get_streamed_start", 2, get_streamed_start)
            conn.create_function("get_streamed_stop", 1, get_streamed_stop)

            conn.execute("DROP TABLE IF EXISTS PROVENANCE")
            conn.execute("""CREATE TABLE PROVENANCE (subsite TEXT, node TEXT,
                 sensor TEXT, method TEXT, deployment INTEGER,
                 rowid TEXT, filename TEXT, parsername TEXT,
                 parserversion TEXT)""")

            for row in provenance:
                subsite = row['subsite']
                node = row['node']
                sensor = row['sensor']
                method = row['method']
                deployment = row['deployment']
                rowid = str(row['id'])
                filename = row['fileName']
                parsername = row['parserName']
                parserversion = row['parserVersion']

                record = [
                    subsite, node, sensor, method, deployment, rowid, filename,
                    parsername, parserversion
                ]

                conn.execute(
                    """INSERT INTO PROVENANCE (subsite, node, sensor,
                    method, deployment, rowid, filename, parsername,
                    parserversion) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);""",
                    record)

            conn.execute("""ALTER TABLE PROVENANCE ADD COLUMN
                         start INTEGER""")
            conn.execute("""UPDATE PROVENANCE SET
                         start=get_streamed_start(method, filename)""")
            conn.execute("""ALTER TABLE PROVENANCE ADD COLUMN
                         stop INTEGER""")
            conn.execute("""UPDATE PROVENANCE SET
                         stop=get_streamed_stop(start)""")

            conn.commit()
    def load_provenance(self):
        cluster = cassandra.cluster.Cluster(
            self.cassandra_ip_list, control_connection_timeout=60,
            protocol_version=3)
        session = cluster.connect('ooi')
        provenance = self.execute_query(
            session, ALL_DATASET_L0_PROVENANCE,
            DATASET_L0_PROVENANCE_COLUMNS)
        cluster.shutdown()

        with sqlite3.connect(self.database) as conn:

            def get_streamed_start(method, filename):
                if not streamed(method):
                    return None
                date = time_util.parse_basic_iso8601_date(filename)
                if not date:
                    return None
                return time_util.java_time_from_basic_iso8601_date(date)

            def get_streamed_stop(start):
                if not start:
                    return None
                return start + time_util.MILLIS_PER_DAY - 1

            conn.create_function("get_streamed_start", 2, get_streamed_start)
            conn.create_function("get_streamed_stop", 1, get_streamed_stop)

            conn.execute("DROP TABLE IF EXISTS PROVENANCE")
            conn.execute(
                """CREATE TABLE PROVENANCE (subsite TEXT, node TEXT,
                 sensor TEXT, method TEXT, deployment INTEGER,
                 rowid TEXT, filename TEXT, parsername TEXT,
                 parserversion TEXT)""")

            for row in provenance:
                subsite = row['subsite']
                node = row['node']
                sensor = row['sensor']
                method = row['method']
                deployment = row['deployment']
                rowid = str(row['id'])
                filename = row['fileName']
                parsername = row['parserName']
                parserversion = row['parserVersion']

                record = [subsite, node, sensor, method, deployment, rowid,
                          filename, parsername, parserversion]

                conn.execute(
                    """INSERT INTO PROVENANCE (subsite, node, sensor,
                    method, deployment, rowid, filename, parsername,
                    parserversion) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);""",
                    record)

            conn.execute("""ALTER TABLE PROVENANCE ADD COLUMN
                         start INTEGER""")
            conn.execute("""UPDATE PROVENANCE SET
                         start=get_streamed_start(method, filename)""")
            conn.execute("""ALTER TABLE PROVENANCE ADD COLUMN
                         stop INTEGER""")
            conn.execute("""UPDATE PROVENANCE SET
                         stop=get_streamed_stop(start)""")

            conn.commit()