def try_connecting(username='', password=''):
    """
    Wait until can connect to cluster.
    When cluster starts up there is some time while it is not possible to connect to it even though
    Cassandra is listening on port 7000. Here we wait until we can actually issue successful Cluster.connect()
     method.
    :param username: optional user name for connection
    :param password: optional password for connection
    :return: True if can successfully connect to cluster within 2 minutes, False otherwise
    """

    if username and password:
        ap = AuthenticationTests.get_authentication_provider(username, password)
    else:
        ap = None

    maxwait = 120  # in seconds
    sleeptime = 1

    wait_time = 0
    while wait_time < maxwait:
        try:
            cluster = Cluster(protocol_version=tests.integration.PROTOCOL_VERSION, auth_provider=ap)
            cluster.connect()
            log.debug("Can connect after %d seconds" % wait_time)
            return True
        except Exception:
            wait_time += sleeptime
            time.sleep(sleeptime)

    return False
    def test_pool_management(self):
        # Ensure that in_flight and request_ids quiesce after cluster operations
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=0)  # no idle heartbeat here, pool management is tested in test_idle_heartbeat
        session = cluster.connect()
        session2 = cluster.connect()

        # prepare
        p = session.prepare("SELECT * FROM system.local WHERE key=?")
        self.assertTrue(session.execute(p, ('local',)))

        # simple
        self.assertTrue(session.execute("SELECT * FROM system.local WHERE key='local'"))

        # set keyspace
        session.set_keyspace('system')
        session.set_keyspace('system_traces')

        # use keyspace
        session.execute('USE system')
        session.execute('USE system_traces')

        # refresh schema
        cluster.refresh_schema_metadata()
        cluster.refresh_schema_metadata(max_schema_agreement_wait=0)

        # submit schema refresh
        future = cluster.submit_schema_refresh()
        future.result()

        assert_quiescent_pool_state(self, cluster)

        cluster.shutdown()
    def test_submit_schema_refresh(self):
        """
        Ensure new new schema is refreshed after submit_schema_refresh()
        """

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cluster.connect()
        self.assertNotIn("newkeyspace", cluster.metadata.keyspaces)

        other_cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        session = other_cluster.connect()
        session.execute(
            """
            CREATE KEYSPACE newkeyspace
            WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}
            """)

        future = cluster.submit_schema_refresh()
        future.result()

        self.assertIn("newkeyspace", cluster.metadata.keyspaces)

        session.execute("DROP KEYSPACE newkeyspace")
        cluster.shutdown()
        other_cluster.shutdown()
def connect_cassandra():
    error = False
    cluster = Cluster([config.get('cassandra', 'db_host')], port=config.get('cassandra', 'db_port'),
                      protocol_version=3, idle_heartbeat_interval=120)
    try:
        LOG.info("Connecting to Cassandra..")
        return cluster.connect(config.get('cassandra', 'keyspace'))
    except NoHostAvailable:
        error = True
        LOG.info("ERROR: Check Cassandra connection settings in conf")
    except InvalidRequest:
        LOG.info("ERROR: Could not find existing Cassandra keyspace. will create new one")
        try:
            db_connection = cluster.connect()
            CREATE_KEYSPACE = """
                              CREATE KEYSPACE %s WITH replication = {'class': '%s', 'replication_factor': %s }
                              """ % (config.get('cassandra', 'keyspace'),
                                     config.get('cassandra', 'replication_strategy'),
                                     config.get('cassandra', 'replication_factor'))
            db_connection.execute(CREATE_KEYSPACE)
            db_connection.set_keyspace(config.get('cassandra', 'keyspace'))
            LOG.info("Created and session set to new keyspace:  %s" % config.get('cassandra', 'keyspace'))
            return db_connection
        except SyntaxException:
            error = True
            LOG.info("ERROR: couldn't create new keyspace. check keyspace settings in conf. Exiting now.")
            raise
    except:
        error = True
        LOG.info("ERROR: something wrong with Cassandra connection")
    finally:
        if error:
            LOG.info("Exiting..")
            sys.exit(0)
def getEntrys(page,keyspaceid,columnfamilyid):
	cluster = Cluster()
	session = cluster.connect('system')
	rows = session.execute('select * from schema_keyspaces')
	info = []
	if int(keyspaceid) < (len(rows) + 1) and int(keyspaceid) > 0:
		info = {}
		info['name'] = rows[int(keyspaceid) -1][0]
		keyspacename = rows[int(keyspaceid) -1][0]
		rows = session.execute("SELECT * FROM schema_columnfamilies where keyspace_name='" + info['name'] + "'")
		columnfamilyname = rows[int(columnfamilyid)-1][1]
		session = cluster.connect(rows[int(keyspaceid)-1][0])
		rows = session.execute("SELECT * FROM " + columnfamilyname)
		info = rows
		rows = session.execute("SELECT * FROM system.schema_columns WHERE keyspace_name = '" + keyspacename + "' AND columnfamily_name = '" + columnfamilyname + "'")
		fields = []
		for i in rows:
			fields.append(i)
		temp = fields[len(rows) - 1]
		fields[len(rows) - 1] = fields[0]
		fields[0] = temp
		temp = fields[1]
		fields[1] = fields[2]
		fields[2] = temp
	else:
		return render_template('error.html',error="Not a valid keyspaceid")
	pages = info[(page-1)*PER_PAGE:PER_PAGE*page]
    	if not pages and page != 1:
   		abort(404)
	pagination = Pagination(page, PER_PAGE, len(info))
   	return render_template('listentrys.html',pagination=pagination,keyspaceid=keyspaceid,columnfamilyid=columnfamilyid,pages=pages,fields=fields,section = 'getEntrys')
    def test_raise_error_on_control_connection_timeout(self):
        """
        Test for initial control connection timeout

        test_raise_error_on_control_connection_timeout tests that the driver times out after the set initial connection
        timeout. It first pauses node1, essentially making it unreachable. It then attempts to create a Cluster object
        via connecting to node1 with a timeout of 1 second, and ensures that a NoHostAvailable is raised, along with
        an OperationTimedOut for 1 second.

        @expected_errors NoHostAvailable When node1 is paused, and a connection attempt is made.
        @since 2.6.0
        @jira_ticket PYTHON-206
        @expected_result NoHostAvailable exception should be raised after 1 second.

        @test_category connection
        """

        get_node(1).pause()
        cluster = Cluster(contact_points=['127.0.0.1'], protocol_version=PROTOCOL_VERSION, connect_timeout=1)

        with self.assertRaisesRegexp(NoHostAvailable, "OperationTimedOut\('errors=Timed out creating connection \(1 seconds\)"):
            cluster.connect()
        cluster.shutdown()

        get_node(1).resume()
def getEntrysInfo(keyspaceid,columnfamilyid,entryname):
	cluster = Cluster()
	session = cluster.connect('system')
	rows = session.execute('select * from schema_keyspaces')
	info = []
	if int(keyspaceid) < (len(rows) + 1) and int(keyspaceid) > 0:
		info = {}
		info['name'] = rows[int(keyspaceid) -1][0]
		keyspacename = rows[int(keyspaceid) -1][0]
		rows = session.execute("SELECT * FROM schema_columnfamilies where keyspace_name='" + info['name'] + "'")
		columnfamilyname = rows[int(columnfamilyid)-1][1]
		primarykey = rows[int(columnfamilyid)-1]
		session = cluster.connect(rows[int(keyspaceid)-1][0])
		primarykey = primarykey[17][2:]
		primarykey = primarykey[:-2]
		query = "SELECT * FROM " + columnfamilyname + " WHERE " + primarykey + "='" + entryname + "'"
		rows = session.execute(query)
		info = rows
		query = "SELECT * FROM system.schema_columns WHERE keyspace_name='" + keyspacename + "' AND columnfamily_name = '" + columnfamilyname + "'"
		rows = session.execute(query)
		fields = []
		for i in rows:
			fields.append(i)
		temp = fields[len(rows) - 1]
		fields[len(rows) - 1] = fields[0]
		fields[0] = temp
		temp = fields[1]
		fields[1] = fields[2]
		fields[2] = temp
		return render_template('entryinfo.html',info=info,fields=fields,keyspaceid=keyspaceid,columnfamilyid=columnfamilyid,entryname=entryname)
    def test_pool_with_host_down(self):
        """
        Test to ensure that cluster.connect() doesn't return prior to pools being initialized.

        This test will figure out which host our pool logic will connect to first. It then shuts that server down.
        Previouly the cluster.connect() would return prior to the pools being initialized, and the first queries would
        return a no host exception

        @since 3.7.0
        @jira_ticket PYTHON-617
        @expected_result query should complete successfully

        @test_category connection
        """

        # find the first node, we will try create connections to, shut it down.
        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cluster.connect()
        hosts = cluster.metadata.all_hosts()
        address = hosts[0].address
        node_to_stop = int(address.split('.')[-1:][0])
        try:
            force_stop(node_to_stop)
            wait_for_down(cluster, node_to_stop)
            # Attempt a query against that node. It should complete
            cluster2 = Cluster(protocol_version=PROTOCOL_VERSION)
            session2 = cluster2.connect()
            session2.execute("SELECT * FROM system.local")
            cluster2.shutdown()
        finally:
            start(node_to_stop)
            wait_for_up(cluster, node_to_stop)
            cluster.shutdown()
    def test_invalid_protocol_negotation(self):
        """
        Test for protocol negotiation when explicit versions are set

        If an explicit protocol version that is not compatible with the server version is set
        an exception should be thrown. It should not attempt to negotiate

        for reference supported protocol version to server versions is as follows/

        1.2 -> 1
        2.0 -> 2, 1
        2.1 -> 3, 2, 1
        2.2 -> 4, 3, 2, 1
        3.X -> 4, 3

        @since 3.6.0
        @jira_ticket PYTHON-537
        @expected_result downgrading should not be allowed when explicit protocol versions are set.

        @test_category connection
        """

        upper_bound = get_unsupported_upper_protocol()
        if upper_bound is not None:
            cluster = Cluster(protocol_version=upper_bound)
            with self.assertRaises(NoHostAvailable):
                cluster.connect()
            cluster.shutdown()

        lower_bound = get_unsupported_lower_protocol()
        if lower_bound is not None:
            cluster = Cluster(protocol_version=lower_bound)
            with self.assertRaises(NoHostAvailable):
                cluster.connect()
            cluster.shutdown()
def makeConnection():
    ip_address = findIP()
    notResolved = True
    while notResolved:
        notResolved=False
        try:
            userpass = findUserPass()
            ap = PlainTextAuthProvider(username=userpass[0], password=userpass[1])
            bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap)
            bSpace = bCluster.connect()
        except Exception as er:
            redFlag = ['AuthenticationFailed','username','password','incorrect']
            test = filter(lambda x: x.lower() in str(er).lower(), redFlag)
            if len(test)==len(redFlag): #all redFlags words exists on message
                print 'provided username doesnt work. trying default:'
                ap = PlainTextAuthProvider(username='******', password='******')
                try:
                    bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap)
                    bSpace=bCluster.connect()
                    bSpace.execute("ALTER USER cassandra with password 'merogharanuwakotmaparchhatimrokahaparchha'")
                except Exception as er:
		    print er
                    ap = PlainTextAuthProvider(username='******', password='******')
                    bCluster=Cluster([ip_address],connection_class=AsyncoreConnection,auth_provider=ap)
                    bSpace=bCluster.connect()

                bSpace.execute("CREATE USER %s with password '%s' SUPERUSER" % (userpass[0],userpass[1]))
                print ('The username and password created. Now trying login again')
                bCluster.shutdown()
                notResolved=True
            else:
                raise

    return bCluster, bSpace
Exemple #11
0
    def test_can_register_udt_before_connecting(self):
        """
        Test the registration of UDTs before session creation
        """

        c = Cluster(protocol_version=PROTOCOL_VERSION)
        s = c.connect()

        s.execute(
            """
            CREATE KEYSPACE udt_test_register_before_connecting
            WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor': '1' }
            """
        )
        s.set_keyspace("udt_test_register_before_connecting")
        s.execute("CREATE TYPE user (age int, name text)")
        s.execute("CREATE TABLE mytable (a int PRIMARY KEY, b frozen<user>)")

        s.execute(
            """
            CREATE KEYSPACE udt_test_register_before_connecting2
            WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor': '1' }
            """
        )
        s.set_keyspace("udt_test_register_before_connecting2")
        s.execute("CREATE TYPE user (state text, is_cool boolean)")
        s.execute("CREATE TABLE mytable (a int PRIMARY KEY, b frozen<user>)")

        # now that types are defined, shutdown and re-create Cluster
        c.shutdown()
        c = Cluster(protocol_version=PROTOCOL_VERSION)

        User1 = namedtuple("user", ("age", "name"))
        User2 = namedtuple("user", ("state", "is_cool"))

        c.register_user_type("udt_test_register_before_connecting", "user", User1)
        c.register_user_type("udt_test_register_before_connecting2", "user", User2)

        s = c.connect()

        s.set_keyspace("udt_test_register_before_connecting")
        s.execute("INSERT INTO mytable (a, b) VALUES (%s, %s)", (0, User1(42, "bob")))
        result = s.execute("SELECT b FROM mytable WHERE a=0")
        self.assertEqual(1, len(result))
        row = result[0]
        self.assertEqual(42, row.b.age)
        self.assertEqual("bob", row.b.name)
        self.assertTrue(type(row.b) is User1)

        # use the same UDT name in a different keyspace
        s.set_keyspace("udt_test_register_before_connecting2")
        s.execute("INSERT INTO mytable (a, b) VALUES (%s, %s)", (0, User2("Texas", True)))
        result = s.execute("SELECT b FROM mytable WHERE a=0")
        self.assertEqual(1, len(result))
        row = result[0]
        self.assertEqual("Texas", row.b.state)
        self.assertEqual(True, row.b.is_cool)
        self.assertTrue(type(row.b) is User2)

        c.shutdown()
    def test_cannot_connect_with_bad_client_auth(self):
        """
         Test to validate that we cannot connect with invalid client auth.

        This test will use bad keys/certs to preform client authentication. It will then attempt to connect
        to a server that has client authentication enabled.


        @since 2.7.0
        @expected_result The client will throw an exception on connect

        @test_category connection:ssl
        """

        # Setup absolute paths to key/cert files
        abs_path_ca_cert_path = os.path.abspath(CLIENT_CA_CERTS)
        abs_driver_keyfile = os.path.abspath(DRIVER_KEYFILE)
        abs_driver_certfile = os.path.abspath(DRIVER_CERTFILE_BAD)

        cluster = Cluster(protocol_version=PROTOCOL_VERSION, ssl_options={'ca_certs': abs_path_ca_cert_path,
                                                                          'ssl_version': ssl.PROTOCOL_TLSv1,
                                                                          'keyfile': abs_driver_keyfile,
                                                                          'certfile': abs_driver_certfile})
        with self.assertRaises(NoHostAvailable) as context:
            cluster.connect()
        cluster.shutdown()
Exemple #13
0
def copy_model(**kwargs):
    conf = Configuration('global').configuration
    cluster_source = Cluster(conf['cassandra']['hosts'])
    source = cluster_source.connect(conf['cassandra']['keyspace'])
    source.row_factory = dict_factory
    cluster_dest = Cluster(conf['new_cassandra']['hosts'])
    dest = cluster_dest.connect(conf['new_cassandra']['keyspace'])

    table = kwargs['model'].lower()
    fetch_size = kwargs.get('fetch_size', 100)
    query = "SELECT * FROM {0}".format(table)
    if 'where' in kwargs and kwargs['where']:
        query = "{0} WHERE {1} ALLOW FILTERING".format(query, kwargs['where'])
    statement = SimpleStatement(query, fetch_size=fetch_size)
    insert_query = "INSERT INTO {0} ({1}) VALUES ({2})"
    cpt = 0
    insert = None
    for row in source.execute(statement):
        if cpt == 0:
            columns = ['"{}"'.format(x) for x in row.keys()]
            binds = ['?' for x in range(0, len(columns))]
            insert_str = insert_query.format(table,
                                             ','.join(columns),
                                             ','.join(binds))
            insert = dest.prepare(insert_str)
        bound = insert.bind(row.values())
        dest.execute(bound)
        cpt += 1
    print('Copy of {} records from {}'.format(cpt, table))
    return cpt
    def test_session_no_cluster(self):
        """
        Test session context without cluster context.

        @since 3.4
        @jira_ticket PYTHON-521
        @expected_result session should be created correctly. Session should shutdown correctly outside of context

        @test_category configuration
        """
        cluster = Cluster(**self.cluster_kwargs)
        unmanaged_session = cluster.connect()
        with cluster.connect() as session:
            self.assertFalse(cluster.is_shutdown)
            self.assertFalse(session.is_shutdown)
            self.assertFalse(unmanaged_session.is_shutdown)
            self.assertTrue(session.execute('select release_version from system.local')[0])
        self.assertTrue(session.is_shutdown)
        self.assertFalse(cluster.is_shutdown)
        self.assertFalse(unmanaged_session.is_shutdown)
        unmanaged_session.shutdown()
        self.assertTrue(unmanaged_session.is_shutdown)
        self.assertFalse(cluster.is_shutdown)
        cluster.shutdown()
        self.assertTrue(cluster.is_shutdown)
    def test_for_schema_disagreement_attribute(self):
        """
        Tests to ensure that schema disagreement is properly surfaced on the response future.

        Creates and destroys keyspaces/tables with various schema agreement timeouts set.
        First part runs cql create/drop cmds with schema agreement set in such away were it will be impossible for agreement to occur during timeout.
        It then validates that the correct value is set on the result.
        Second part ensures that when schema agreement occurs, that the result set reflects that appropriately

        @since 3.1.0
        @jira_ticket PYTHON-458
        @expected_result is_schema_agreed is set appropriately on response thefuture

        @test_category schema
        """
        # This should yield a schema disagreement
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=0.001)
        session = cluster.connect(wait_for_all_pools=True)

        rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}")
        self.check_and_wait_for_agreement(session, rs, False)
        rs = session.execute("CREATE TABLE test_schema_disagreement.cf (key int PRIMARY KEY, value int)")
        self.check_and_wait_for_agreement(session, rs, False)
        rs = session.execute("DROP KEYSPACE test_schema_disagreement")
        self.check_and_wait_for_agreement(session, rs, False)

        # These should have schema agreement
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, max_schema_agreement_wait=100)
        session = cluster.connect()
        rs = session.execute("CREATE KEYSPACE test_schema_disagreement WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}")
        self.check_and_wait_for_agreement(session, rs, True)
        rs = session.execute("CREATE TABLE test_schema_disagreement.cf (key int PRIMARY KEY, value int)")
        self.check_and_wait_for_agreement(session, rs, True)
        rs = session.execute("DROP KEYSPACE test_schema_disagreement")
        self.check_and_wait_for_agreement(session, rs, True)
class ConnectionTest(BaseCassEngTestCase):

    @classmethod
    def setUpClass(cls):
        cls.original_cluster = connection.get_cluster()
        cls.keyspace1 = 'ctest1'
        cls.keyspace2 = 'ctest2'
        super(ConnectionTest, cls).setUpClass()
        cls.setup_cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cls.setup_session = cls.setup_cluster.connect()
        ddl = "CREATE KEYSPACE {0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '{1}'}}".format(cls.keyspace1, 1)
        execute_with_long_wait_retry(cls.setup_session, ddl)
        ddl = "CREATE KEYSPACE {0} WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': '{1}'}}".format(cls.keyspace2, 1)
        execute_with_long_wait_retry(cls.setup_session, ddl)

    @classmethod
    def tearDownClass(cls):
        execute_with_long_wait_retry(cls.setup_session, "DROP KEYSPACE {0}".format(cls.keyspace1))
        execute_with_long_wait_retry(cls.setup_session, "DROP KEYSPACE {0}".format(cls.keyspace2))
        models.DEFAULT_KEYSPACE = DEFAULT_KEYSPACE
        cls.original_cluster.shutdown()
        cls.setup_cluster.shutdown()
        setup_connection(DEFAULT_KEYSPACE)
        models.DEFAULT_KEYSPACE

    def setUp(self):
        self.c = Cluster(protocol_version=PROTOCOL_VERSION)
        self.session1 = self.c.connect(keyspace=self.keyspace1)
        self.session1.row_factory = dict_factory
        self.session2 = self.c.connect(keyspace=self.keyspace2)
        self.session2.row_factory = dict_factory

    def tearDown(self):
        self.c.shutdown()

    def test_connection_session_switch(self):
        """
        Test to ensure that when the default keyspace is changed in a session and that session,
        is set in the connection class, that the new defaul keyspace is honored.

        @since 3.1
        @jira_ticket PYTHON-486
        @expected_result CQLENGINE adopts whatever keyspace is passed in vai the set_session method as default

        @test_category object_mapper
        """

        connection.set_session(self.session1)
        sync_table(TestConnectModel)
        TCM1 = TestConnectModel.create(id=1, keyspace=self.keyspace1)
        connection.set_session(self.session2)
        sync_table(TestConnectModel)
        TCM2 = TestConnectModel.create(id=1, keyspace=self.keyspace2)
        connection.set_session(self.session1)
        self.assertEqual(1, TestConnectModel.objects.count())
        self.assertEqual(TestConnectModel.objects.first(), TCM1)
        connection.set_session(self.session2)
        self.assertEqual(1, TestConnectModel.objects.count())
        self.assertEqual(TestConnectModel.objects.first(), TCM2)
 def connect(self):
     """
     Connect to Cassandra cluster
     :return:
     """
     cluster = Cluster()
     self.sensor_data_session = cluster.connect(SENSOR_DATA_KEYSPACE)
     self.analytics_session = cluster.connect(ANALYTICS_KEYSPACE)
    def test_non_existing_types(self):
        c = Cluster(protocol_version=PROTOCOL_VERSION)
        c.connect()
        User = namedtuple('user', ('age', 'name'))
        self.assertRaises(UserTypeDoesNotExist, c.register_user_type, "some_bad_keyspace", "user", User)
        self.assertRaises(UserTypeDoesNotExist, c.register_user_type, "system", "user", User)

        c.shutdown()
    def test_idle_heartbeat(self):
        interval = 1
        cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=interval)
        if PROTOCOL_VERSION < 3:
            cluster.set_core_connections_per_host(HostDistance.LOCAL, 1)
        session = cluster.connect()

        # This test relies on impl details of connection req id management to see if heartbeats 
        # are being sent. May need update if impl is changed
        connection_request_ids = {}
        for h in cluster.get_connection_holders():
            for c in h.get_connections():
                # make sure none are idle (should have startup messages)
                self.assertFalse(c.is_idle)
                with c.lock:
                    connection_request_ids[id(c)] = deque(c.request_ids)  # copy of request ids

        # let two heatbeat intervals pass (first one had startup messages in it)
        time.sleep(2 * interval + interval/10.)

        connections = [c for holders in cluster.get_connection_holders() for c in holders.get_connections()]

        # make sure requests were sent on all connections
        for c in connections:
            expected_ids = connection_request_ids[id(c)]
            expected_ids.rotate(-1)
            with c.lock:
                self.assertListEqual(list(c.request_ids), list(expected_ids))

        # assert idle status
        self.assertTrue(all(c.is_idle for c in connections))

        # send messages on all connections
        statements_and_params = [("SELECT release_version FROM system.local", ())] * len(cluster.metadata.all_hosts())
        results = execute_concurrent(session, statements_and_params)
        for success, result in results:
            self.assertTrue(success)

        # assert not idle status
        self.assertFalse(any(c.is_idle if not c.is_control_connection else False for c in connections))

        # holders include session pools and cc
        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), len(cluster.metadata.all_hosts()) + 1)  # hosts pools, 1 for cc

        # include additional sessions
        session2 = cluster.connect()

        holders = cluster.get_connection_holders()
        self.assertIn(cluster.control_connection, holders)
        self.assertEqual(len(holders), 2 * len(cluster.metadata.all_hosts()) + 1)  # 2 sessions' hosts pools, 1 for cc

        cluster._idle_heartbeat.stop()
        cluster._idle_heartbeat.join()
        assert_quiescent_pool_state(self, cluster)

        cluster.shutdown()
    def test_can_shutdown_asyncoreconnection_subclass(self):
        start_and_prime_singledc()
        class ExtendedConnection(AsyncoreConnection):
            pass

        cluster = Cluster(contact_points=["127.0.0.2"],
                          connection_class=ExtendedConnection)
        cluster.connect()
        cluster.shutdown()
Exemple #21
0
 def getclustsess(self, keyspace=None):
     """ Return a Cluster instance and a session object """
     cluster = Cluster([self.chost])  # Cluster(['192.168.1.1', '192.168.1.2'])
     if keyspace:
         session = cluster.connect()
         session.set_keyspace(keyspace)
     else:
         session = cluster.connect()
     return cluster, session
    def test_export_schema(self):
        """
        Test export schema functionality
        """

        cluster = Cluster()
        cluster.connect()

        self.assertIsInstance(cluster.metadata.export_schema_as_string(), unicode)
    def test_export_schema(self):
        """
        Test export schema functionality
        """

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cluster.connect()

        self.assertIsInstance(cluster.metadata.export_schema_as_string(), six.string_types)
    def test_token(self):
        expected_node_count = len(get_cluster().nodes)

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cluster.connect()
        tmap = cluster.metadata.token_map
        self.assertTrue(issubclass(tmap.token_class, Token))
        self.assertEqual(expected_node_count, len(tmap.ring))
        cluster.shutdown()
    def test_token(self):
        expected_node_count = len(get_cluster().nodes)

        cluster = Cluster()
        cluster.connect()
        tmap = cluster.metadata.token_map
        self.assertTrue(issubclass(tmap.token_class, Token))
        self.assertEqual(expected_node_count, len(tmap.ring))
        self.assertEqual(expected_node_count, len(tmap.tokens_to_hosts))
        cluster.shutdown()
    def __init__(self):
        cluster = Cluster(["panoptes-cassandra.zooniverse.org"])

        try:
            self.cassandra_session = cluster.connect("active_weather")
        except InvalidRequest as e:
            print(e)
            self.cassandra_session = cluster.connect()
            self.cassandra_session.execute("CREATE KEYSPACE active_weather WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 }")
            self.cassandra_session = cluster.connect('active_weather')
 def _create_session(self):
     cassandra_ip = os.environ['CASSANDRA_PORT_9042_TCP_ADDR']
     cassandra_port = os.environ['CASSANDRA_PORT_9042_TCP_PORT']
     # Sometimes Cassandra is still starting, so we might need to wait
     for _ in range(10):
         try:
             cluster = Cluster([cassandra_ip], port=cassandra_port)
             return cluster.connect()
         except cassandra.cluster.NoHostAvailable:
             time.sleep(20)
     cluster = Cluster([cassandra_ip], port=cassandra_port)
     return cluster.connect()
    def test_export_keyspace_schema(self):
        """
        Test export keyspace schema functionality
        """

        cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        cluster.connect()

        for keyspace in cluster.metadata.keyspaces:
            keyspace_metadata = cluster.metadata.keyspaces[keyspace]
            self.assertIsInstance(keyspace_metadata.export_as_string(), six.string_types)
            self.assertIsInstance(keyspace_metadata.as_cql_query(), six.string_types)
    def test_export_keyspace_schema(self):
        """
        Test export keyspace schema functionality
        """

        cluster = Cluster()
        cluster.connect()

        for keyspace in cluster.metadata.keyspaces:
            keyspace_metadata = cluster.metadata.keyspaces[keyspace]
            self.assertIsInstance(keyspace_metadata.export_as_string(), unicode)
            self.assertIsInstance(keyspace_metadata.as_cql_query(), unicode)
    def test_metrics_per_cluster(self):
        """
        Test to validate that metrics can be scopped to invdividual clusters
        @since 3.6.0
        @jira_ticket PYTHON-561
        @expected_result metrics should be scopped to a cluster level

        @test_category metrics
        """

        cluster2 = Cluster(metrics_enabled=True, protocol_version=PROTOCOL_VERSION,
                           default_retry_policy=FallthroughRetryPolicy())
        cluster2.connect(self.ks_name, wait_for_all_pools=True)

        self.assertEqual(len(cluster2.metadata.all_hosts()), 3)

        query = SimpleStatement("SELECT * FROM {0}.{0}".format(self.ks_name), consistency_level=ConsistencyLevel.ALL)
        self.session.execute(query)

        # Pause node so it shows as unreachable to coordinator
        get_node(1).pause()

        try:
            # Test write
            query = SimpleStatement("INSERT INTO {0}.{0} (k, v) VALUES (2, 2)".format(self.ks_name), consistency_level=ConsistencyLevel.ALL)
            with self.assertRaises(WriteTimeout):
                self.session.execute(query, timeout=None)
        finally:
            get_node(1).resume()

        # Change the scales stats_name of the cluster2
        cluster2.metrics.set_stats_name('cluster2-metrics')

        stats_cluster1 = self.cluster.metrics.get_stats()
        stats_cluster2 = cluster2.metrics.get_stats()

        # Test direct access to stats
        self.assertEqual(1, self.cluster.metrics.stats.write_timeouts)
        self.assertEqual(0, cluster2.metrics.stats.write_timeouts)

        # Test direct access to a child stats
        self.assertNotEqual(0.0, self.cluster.metrics.request_timer['mean'])
        self.assertEqual(0.0, cluster2.metrics.request_timer['mean'])

        # Test access via metrics.get_stats()
        self.assertNotEqual(0.0, stats_cluster1['request_timer']['mean'])
        self.assertEqual(0.0, stats_cluster2['request_timer']['mean'])

        # Test access by stats_name
        self.assertEqual(0.0, scales.getStats()['cluster2-metrics']['request_timer']['mean'])

        cluster2.shutdown()
    #newRating = dataOutput[YColumn]
    #join updated rating
    updatedEmailList = pd.concat([centralEmailList, dataOutput[["responseRating"]]], axis=1)
    if updatedEmailList["responseRating"].isnull().any():
        raise ValueError("we have missed some ratings, which is not expected")
    if updatedEmailList.shape[0] != centralEmailList.shape[0]:
        raise ValueError("we have some missing values, which is not expected")
    #print updatedEmailList.head(10)
    updatedEmailList = updatedEmailList.sort([YColumn],ascending=[False])
    #print updatedEmailList.head(5)
    return(updatedEmailList)

#connect to cassandra
print "connecting to cassandra for local mode"
cluster = Cluster()
session = cluster.connect('marketingApp')
session.row_factory = dict_factory

#define the email rating paramters
ratingParameters = {"no":0,"open":1,"click":2,"sold":3}
minValidResponse = 10

#load the sent email list (the most recent) from cassandra
print "retrieving the most recent sent email list as training data from cassandra"
rawEmailList = session.execute("""
select * from "sentEmailList"
""")

#convert paged results to a list then a dataframe
sentEmailList = pd.DataFrame(list(rawEmailList))
#pre-check and summarize all responses
Exemple #32
0
from cassandra.cluster import Cluster
from hdt import HDTDocument
from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.query import BatchStatement
from cassandra.query import SimpleStatement
import time
import datetime
from cassandra.util import uuid_from_time, datetime_from_uuid1

cluster = Cluster(
    ['172.16.134.144', '172.16.134.142', '172.16.134.143'],
    load_balancing_policy=DCAwareRoundRobinPolicy(local_dc='dc1'))
# cluster = Cluster()


session = cluster.connect()


# Dans ce script on fait une insertion sur une primary key complexe afin de pouvoir faire des requetes avec nos tokens derriere

# Creating keyspace
session.execute(
    """
    CREATE KEYSPACE IF NOT EXISTS pkspo WITH REPLICATION = {
        'class' : 'SimpleStrategy',
        'replication_factor' : 1
    }
    """
)

# on switch sur le bon KEYSPACE
""" Real-time processing """

from pyleus.storm import SimpleBolt
import simplejson as json
import datetime
import time
from cassandra.cluster import Cluster
from cassandra.query import BatchStatement, PreparedStatement

cluster = Cluster(['172.31.1.44', '172.31.1.45', '172.31.1.46'])
session = cluster.connect('flashback')

cql_query = "INSERT INTO rt_reddit (secslot,subreddit,author,created_utc,body) VALUES (?,?,?,?,?)"
cql_reddit_stmt = session.prepare(cql_query)


def extract_json(json_line):
    """ simple json is slightly faster to use to load jsons than the default json """
    try:
        item = json.loads(json_line)
    except:
        return None

    reddit = {}
    reddit['author'] = item['author']
    reddit['subreddit'] = item['subreddit']
    reddit['body'] = item['body']
    reddit['created_utc'] = item['created_utc']

    return reddit
Exemple #34
0
class QueryPagingTests(unittest.TestCase):
    def setUp(self):
        if PROTOCOL_VERSION < 2:
            raise unittest.SkipTest(
                "Protocol 2.0+ is required for Paging state, currently testing against %r"
                % (PROTOCOL_VERSION, ))

        self.cluster = Cluster(protocol_version=PROTOCOL_VERSION)
        if PROTOCOL_VERSION < 3:
            self.cluster.set_core_connections_per_host(HostDistance.LOCAL, 1)
        self.session = self.cluster.connect(wait_for_all_pools=True)
        self.session.execute("TRUNCATE test3rf.test")

    def tearDown(self):
        self.cluster.shutdown()

    def test_paging(self):
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            self.assertEqual(
                100,
                len(list(self.session.execute("SELECT * FROM test3rf.test"))))

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            self.assertEqual(100, len(list(self.session.execute(statement))))

            self.assertEqual(100, len(list(self.session.execute(prepared))))

    def test_paging_state(self):
        """
        Test to validate paging state api
        @since 3.7.0
        @jira_ticket PYTHON-200
        @expected_result paging state should returned should be accurate, and allow for queries to be resumed.

        @test_category queries
        """
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        list_all_results = []
        self.session.default_fetch_size = 3

        result_set = self.session.execute("SELECT * FROM test3rf.test")
        while (result_set.has_more_pages):
            for row in result_set.current_rows:
                self.assertNotIn(row, list_all_results)
            list_all_results.extend(result_set.current_rows)
            page_state = result_set.paging_state
            result_set = self.session.execute("SELECT * FROM test3rf.test",
                                              paging_state=page_state)

        if (len(result_set.current_rows) > 0):
            list_all_results.append(result_set.current_rows)
        self.assertEqual(len(list_all_results), 100)

    def test_paging_verify_writes(self):
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute("SELECT * FROM test3rf.test")
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            results = self.session.execute(statement)
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)

            results = self.session.execute(prepared)
            result_array = set()
            result_set = set()
            for result in results:
                result_array.add(result.k)
                result_set.add(result.v)

            self.assertEqual(set(range(100)), result_array)
            self.assertEqual(set([0]), result_set)

    def test_paging_verify_with_composite_keys(self):
        ddl = '''
            CREATE TABLE test3rf.test_paging_verify_2 (
                k1 int,
                k2 int,
                v int,
                PRIMARY KEY(k1, k2)
            )'''
        self.session.execute(ddl)

        statements_and_params = zip(
            cycle([
                "INSERT INTO test3rf.test_paging_verify_2 "
                "(k1, k2, v) VALUES (0, %s, %s)"
            ]), [(i, i + 1) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare(
            "SELECT * FROM test3rf.test_paging_verify_2")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute(
                "SELECT * FROM test3rf.test_paging_verify_2")
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            statement = SimpleStatement(
                "SELECT * FROM test3rf.test_paging_verify_2")
            results = self.session.execute(statement)
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            results = self.session.execute(prepared)
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

    def test_async_paging(self):
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            self.assertEqual(
                100,
                len(
                    list(
                        self.session.execute_async(
                            "SELECT * FROM test3rf.test").result())))

            statement = SimpleStatement("SELECT * FROM test3rf.test")
            self.assertEqual(
                100, len(list(self.session.execute_async(statement).result())))

            self.assertEqual(
                100, len(list(self.session.execute_async(prepared).result())))

    def test_async_paging_verify_writes(self):
        ddl = '''
            CREATE TABLE test3rf.test_async_paging_verify (
                k1 int,
                k2 int,
                v int,
                PRIMARY KEY(k1, k2)
            )'''
        self.session.execute(ddl)

        statements_and_params = zip(
            cycle([
                "INSERT INTO test3rf.test_async_paging_verify "
                "(k1, k2, v) VALUES (0, %s, %s)"
            ]), [(i, i + 1) for i in range(100)])
        execute_concurrent(self.session, statements_and_params)

        prepared = self.session.prepare(
            "SELECT * FROM test3rf.test_async_paging_verify")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = self.session.execute_async(
                "SELECT * FROM test3rf.test_async_paging_verify").result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            statement = SimpleStatement(
                "SELECT * FROM test3rf.test_async_paging_verify")
            results = self.session.execute_async(statement).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

            results = self.session.execute_async(prepared).result()
            result_array = []
            value_array = []
            for result in results:
                result_array.append(result.k2)
                value_array.append(result.v)

            self.assertSequenceEqual(range(100), result_array)
            self.assertSequenceEqual(range(1, 101), value_array)

    def test_paging_callbacks(self):
        """
        Test to validate callback api
        @since 3.9.0
        @jira_ticket PYTHON-733
        @expected_result callbacks shouldn't be called twice per message
        and the fetch_size should be handled in a transparent way to the user

        @test_category queries
        """
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            future = self.session.execute_async("SELECT * FROM test3rf.test",
                                                timeout=20)

            event = Event()
            counter = count()
            number_of_calls = count()

            def handle_page(rows, future, counter, number_of_calls):
                next(number_of_calls)
                for row in rows:
                    next(counter)

                if future.has_more_pages:
                    future.start_fetching_next_page()
                else:
                    event.set()

            def handle_error(err):
                event.set()
                self.fail(err)

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter,
                                                number_of_calls),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(number_of_calls), 100 // fetch_size + 1)
            self.assertEqual(next(counter), 100)

            # simple statement
            future = self.session.execute_async(
                SimpleStatement("SELECT * FROM test3rf.test"), timeout=20)
            event.clear()
            counter = count()
            number_of_calls = count()

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter,
                                                number_of_calls),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(number_of_calls), 100 // fetch_size + 1)
            self.assertEqual(next(counter), 100)

            # prepared statement
            future = self.session.execute_async(prepared, timeout=20)
            event.clear()
            counter = count()
            number_of_calls = count()

            future.add_callbacks(callback=handle_page,
                                 callback_args=(future, counter,
                                                number_of_calls),
                                 errback=handle_error)
            event.wait()
            self.assertEqual(next(number_of_calls), 100 // fetch_size + 1)
            self.assertEqual(next(counter), 100)

    def test_concurrent_with_paging(self):
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        for fetch_size in (2, 3, 7, 10, 99, 100, 101, 10000):
            self.session.default_fetch_size = fetch_size
            results = execute_concurrent_with_args(self.session, prepared,
                                                   [None] * 10)
            self.assertEqual(10, len(results))
            for (success, result) in results:
                self.assertTrue(success)
                self.assertEqual(100, len(list(result)))

    def test_fetch_size(self):
        """
        Ensure per-statement fetch_sizes override the default fetch size.
        """
        statements_and_params = zip(
            cycle(["INSERT INTO test3rf.test (k, v) VALUES (%s, 0)"]),
            [(i, ) for i in range(100)])
        execute_concurrent(self.session, list(statements_and_params))

        prepared = self.session.prepare("SELECT * FROM test3rf.test")

        self.session.default_fetch_size = 10
        result = self.session.execute(prepared, [])
        self.assertTrue(result.has_more_pages)

        self.session.default_fetch_size = 2000
        result = self.session.execute(prepared, [])
        self.assertFalse(result.has_more_pages)

        self.session.default_fetch_size = None
        result = self.session.execute(prepared, [])
        self.assertFalse(result.has_more_pages)

        self.session.default_fetch_size = 10

        prepared.fetch_size = 2000
        result = self.session.execute(prepared, [])
        self.assertFalse(result.has_more_pages)

        prepared.fetch_size = None
        result = self.session.execute(prepared, [])
        self.assertFalse(result.has_more_pages)

        prepared.fetch_size = 10
        result = self.session.execute(prepared, [])
        self.assertTrue(result.has_more_pages)

        prepared.fetch_size = 2000
        bound = prepared.bind([])
        result = self.session.execute(bound, [])
        self.assertFalse(result.has_more_pages)

        prepared.fetch_size = None
        bound = prepared.bind([])
        result = self.session.execute(bound, [])
        self.assertFalse(result.has_more_pages)

        prepared.fetch_size = 10
        bound = prepared.bind([])
        result = self.session.execute(bound, [])
        self.assertTrue(result.has_more_pages)

        bound.fetch_size = 2000
        result = self.session.execute(bound, [])
        self.assertFalse(result.has_more_pages)

        bound.fetch_size = None
        result = self.session.execute(bound, [])
        self.assertFalse(result.has_more_pages)

        bound.fetch_size = 10
        result = self.session.execute(bound, [])
        self.assertTrue(result.has_more_pages)

        s = SimpleStatement("SELECT * FROM test3rf.test", fetch_size=None)
        result = self.session.execute(s, [])
        self.assertFalse(result.has_more_pages)

        s = SimpleStatement("SELECT * FROM test3rf.test")
        result = self.session.execute(s, [])
        self.assertTrue(result.has_more_pages)

        s = SimpleStatement("SELECT * FROM test3rf.test")
        s.fetch_size = None
        result = self.session.execute(s, [])
        self.assertFalse(result.has_more_pages)
Exemple #35
0
class DataProcess(Process):
    """
        This process handles all data submissions
        is_database_raw is a bool, if True, will write data to raw-db, else to decoded-db)
    """
    def __init__(self, is_database_raw, verbosity=0):
        """
            Starts up the Data handling Process
        """
        super(DataProcess, self).__init__()

        if is_database_raw:
            self.input_exchange = 'data-pipeline-in'
            self.queue = 'db-raw'
            self.statement = "INSERT INTO    sensor_data_raw   (node_id, date, plugin_name, plugin_version, plugin_instance, timestamp, parameter, data) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
            self.function_ExtractValuesFromMessage = self.ExtractValuesFromMessage_raw
        else:
            self.input_exchange = 'plugins-out'
            self.queue = 'db-decoded'
            self.statement = "INSERT INTO    sensor_data_decoded   (node_id, date, ingest_id, meta_id, timestamp, data_set, sensor, parameter, data, unit) VALUES (?, ?, ?, ?, ?,   ?, ?, ?, ?, ?)"
            self.function_ExtractValuesFromMessage = self.ExtractValuesFromMessage_decoded

        logger.info("Initializing DataProcess")

        # Set up the Rabbit connection
        #self.connection = pika.BlockingConnection(pika.ConnectionParameters(host='localhost'))
        #Connect to rabbitMQ
        while True:
            try:
                self.connection = pika.BlockingConnection(pika_params)
            except Exception as e:
                logger.error(
                    "QueueToDb: Could not connect to RabbitMQ server \"%s\": %s"
                    % (pika_params.host, e))
                time.sleep(1)
                continue
            break

        logger.info("Connected to RabbitMQ server \"%s\"" % (pika_params.host))
        self.verbosity = verbosity
        self.numInserted = 0
        self.numFailed = 0
        self.session = None
        self.cluster = None
        self.prepared_statement = None

        self.cassandra_connect()

        self.channel = self.connection.channel()
        self.channel.basic_qos(prefetch_count=1)
        # Declare this process's queue
        self.channel.queue_declare(self.queue, durable=True)

        self.channel.queue_bind(exchange=self.input_exchange, queue=self.queue)

        try:
            self.channel.basic_consume(self.callback, queue=self.queue)
        except KeyboardInterrupt:
            logger.info("exiting.")
            sys.exit(0)
        except Exception as e:
            logger.error("error: %s" % (str(e)))

    def callback(self, ch, method, props, body):
        #TODO: this simply drops failed messages, might find a better solution!? Keeping them has the risk of spamming RabbitMQ
        if self.verbosity > 1:
            print('######################################')
            print('method = ', method)
            print('props = ', props)
            print('body = ', body)
        '''EXAMPLE: 
            props =  <BasicProperties(['app_id=coresense:3', 'content_type=b', 'delivery_mode=2', 'reply_to=0000001e06107d97', 'timestamp=1476135836151', 'type=frame'])>
        '''
        try:
            for iValues, values in enumerate(
                    self.function_ExtractValuesFromMessage(props, body)):
                # Send the data off to Cassandra
                if self.verbosity > 1:
                    print('iValues =', iValues)
                    print(' values =', values)
                self.cassandra_insert(values)
        except Exception as e:
            values = None
            self.numFailed += 1
            logger.error("Error inserting data: %s" % (str(e)))
            logger.error(' method = {}'.format(repr(method)))
            logger.error(' props  = {}'.format(repr(props)))
            logger.error(' body   = {}'.format(repr(body)))
            ch.basic_ack(delivery_tag=method.delivery_tag)
            return

        ch.basic_ack(delivery_tag=method.delivery_tag)
        if values:
            self.numInserted += 1
            if self.numInserted % 1000 == 0:
                logger.debug('  inserted {} / {} raw samples of data'.format(
                    self.numInserted, self.numInserted + self.numFailed))

    # Parse a message of sensor data and convert to the values to be inserted into a row in the db.  NOTE: this is a generator - because the decoded messages produce multiple rows of data.
    def ExtractValuesFromMessage_raw(self, props, body):
        if self.verbosity > 0: print('props.app_id =', props.app_id)
        versionStrings = props.app_id.split(':')
        sampleDatetime = datetime.datetime.utcfromtimestamp(
            float(props.timestamp) / 1000.0)
        sampleDate = sampleDatetime.strftime('%Y-%m-%d')
        node_id = props.reply_to
        #ingest_id       = props.ingest_id ##props.get('ingest_id', 0)
        #print('ingest_id: ', ingest_id)
        plugin_name = versionStrings[0]
        plugin_version = versionStrings[1]
        plugin_instance = '0' if (
            len(versionStrings) < 3) else versionStrings[2]
        timestamp = int(props.timestamp)
        parameter = props.type
        data = str(binascii.hexlify(body))

        values = (node_id, sampleDate, plugin_name, plugin_version,
                  plugin_instance, timestamp, parameter, data)

        if self.verbosity > 0:
            print('   node_id = ', node_id)
            print('   date = ', sampleDate)
            #print('   ingest_id = ',        ingest_id       )
            print('   plugin_name = ', plugin_name)
            print('   plugin_version = ', plugin_version)
            print('   plugin_instance = ', plugin_instance)
            print('   timestamp = ', timestamp)
            print('   parameter = ', parameter)
            print('   data = ', data)
        yield values

    def ExtractValuesFromMessage_decoded(self, props, body):
        #(node_id, date, meta_id, timestamp, data_set, sensor, parameter, data, unit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"

        dictData = json.loads(body.decode())

        # same for each parameter:value pair
        sampleDatetime = datetime.datetime.utcfromtimestamp(
            float(props.timestamp) / 1000.0)
        node_id = props.reply_to
        sampleDate = sampleDatetime.strftime('%Y-%m-%d')
        ingest_id = 0  # props.ingest_id ##props.get('ingest_id', 0)
        #print('ingest_id: ', ingest_id)
        meta_id = 0  #props.meta_id
        timestamp = int(props.timestamp)
        data_set = props.app_id
        sensor = props.type
        unit = 'NO_UNIT'  #props.unit

        for k in dictData.keys():
            parameter = k
            data = str(dictData[k])

            values = (node_id, sampleDate, ingest_id, meta_id, timestamp,
                      data_set, sensor, parameter, data, unit)

            if self.verbosity > 0:
                print('   node_id = ', node_id)
                print('   date = ', sampleDate)
                print('   ingest_id = ', ingest_id)
                print('   meta_id = ', meta_id)
                print('   timestamp = ', timestamp)
                print('   data_set = ', data_set)
                print('   sensor = ', sensor)
                print('   parameter = ', parameter)
                print('   data = ', data)
                print('   unit = ', unit)
            yield values

    def cassandra_insert(self, values):

        if not self.session:
            self.cassandra_connect()

        if not self.prepared_statement:
            try:
                self.prepared_statement = self.session.prepare(self.statement)
            except Exception as e:
                logger.error("Error preparing statement: (%s) %s" %
                             (type(e).__name__, str(e)))
                raise

        if self.verbosity > 1: logger.debug("inserting: %s" % (str(values)))
        try:
            bound_statement = self.prepared_statement.bind(values)
        except Exception as e:
            logger.error(
                "QueueToDb: Error binding cassandra cql statement:(%s) %s -- values was: %s"
                % (type(e).__name__, str(e), str(values)))
            raise

        connection_retry_delay = 1
        while True:
            # this is long term storage
            try:
                self.session.execute(bound_statement)
            except TypeError as e:
                logger.error(
                    "QueueToDb: (TypeError) Error executing cassandra cql statement: %s -- values was: %s"
                    % (str(e), str(values)))
                break
            except Exception as e:
                logger.error(
                    "QueueToDb: Error (type: %s) executing cassandra cql statement: %s -- values was: %s"
                    % (type(e).__name__, str(e), str(values)))
                if "TypeError" in str(e):
                    logger.debug(
                        "detected TypeError, will ignore this message")
                    break

                self.cassandra_connect()
                time.sleep(connection_retry_delay)
                if connection_retry_delay < 10:
                    connection_retry_delay += 1
                continue

            break

    def cassandra_connect(self):
        bDone = False
        iTry = 0
        while not bDone and (iTry < 5):
            if self.cluster:
                try:
                    self.cluster.shutdown()
                except:
                    pass

            self.cluster = Cluster(contact_points=[CASSANDRA_HOST])
            self.session = None

            iTry2 = 0
            while not bDone and (iTry2 < 5):
                iTry2 += 1
                try:  # Might not immediately connect. That's fine. It'll try again if/when it needs to.
                    self.session = self.cluster.connect('waggle')
                    if self.session:
                        bDone = True
                except:
                    logger.warning(
                        "QueueToDb: WARNING: Cassandra connection to " +
                        CASSANDRA_HOST + " failed.")
                    logger.warning(
                        "QueueToDb: The process will attempt to re-connect at a later time."
                    )
                if not bDone:
                    time.sleep(3)

    def run(self):
        self.cassandra_connect()
        self.channel.start_consuming()

    def join(self):
        super(DataProcess, self).terminate()
        self.connection.close(0)
        if self.cluster:
            self.cluster.shutdown()
Exemple #36
0
    def token_aware(self, keyspace, use_prepared=False):
        use_singledc()
        cluster = Cluster(load_balancing_policy=TokenAwarePolicy(
            RoundRobinPolicy()),
                          protocol_version=PROTOCOL_VERSION)
        session = cluster.connect()
        wait_for_up(cluster, 1, wait=False)
        wait_for_up(cluster, 2, wait=False)
        wait_for_up(cluster, 3)

        create_schema(session, keyspace, replication_factor=1)
        self._insert(session, keyspace)
        self._query(session, keyspace, use_prepared=use_prepared)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 12)
        self.coordinator_stats.assert_query_count_equals(self, 3, 0)

        self.coordinator_stats.reset_counts()
        self._query(session, keyspace, use_prepared=use_prepared)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 12)
        self.coordinator_stats.assert_query_count_equals(self, 3, 0)

        self.coordinator_stats.reset_counts()
        force_stop(2)
        wait_for_down(cluster, 2, wait=True)

        try:
            self._query(session, keyspace, use_prepared=use_prepared)
            self.fail()
        except Unavailable as e:
            self.assertEqual(e.consistency, 1)
            self.assertEqual(e.required_replicas, 1)
            self.assertEqual(e.alive_replicas, 0)

        self.coordinator_stats.reset_counts()
        start(2)
        wait_for_up(cluster, 2, wait=True)

        self._query(session, keyspace, use_prepared=use_prepared)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 12)
        self.coordinator_stats.assert_query_count_equals(self, 3, 0)

        self.coordinator_stats.reset_counts()
        stop(2)
        wait_for_down(cluster, 2, wait=True)

        try:
            self._query(session, keyspace, use_prepared=use_prepared)
            self.fail()
        except Unavailable:
            pass

        self.coordinator_stats.reset_counts()
        start(2)
        wait_for_up(cluster, 2, wait=True)
        decommission(2)
        wait_for_down(cluster, 2, wait=True)

        self._query(session, keyspace, use_prepared=use_prepared)

        results = set([
            self.coordinator_stats.get_query_count(1),
            self.coordinator_stats.get_query_count(3)
        ])
        self.assertEqual(results, set([0, 12]))
        self.coordinator_stats.assert_query_count_equals(self, 2, 0)

        cluster.shutdown()
	kafka_broker = args.kafka_broker
	cassandra_broker = args.cassandra_broker
	keyspace = args.keyspace
	table = args.table


	#create kafka consumer 
	consumer = KafkaConsumer (#???  data structure python???
		topic_name,
		bootstrap_servers = kafka_broker
	)

	#create a cassandra session
	cassandra_cluster = Cluster (
		contact_points = cassandra_broker.split(',')
	)
	session = cassandra_cluster.connect()
	#user pass the argument: keyspace and table here
	#if the keyspace and table are not existed, create new ones  
	session.execute("CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}" % keyspace)
	session.set_keyspace(keyspace)
	session.execute("CREATE TABLE IF NOT EXISTS %s (symbol text, trade_time timestamp, price float, PRIMARY KEY(symbol, trade_time))" % table)

	atexit.register(shutdown_hook, consumer, session)

	for msg in consumer:
		# logger.debug(msg)
		save_data(msg.value, session)


Exemple #38
0
class Cassandra:
    def __init__(self):
        self.MORPHL_SERVER_IP_ADDRESS = getenv('MORPHL_SERVER_IP_ADDRESS')
        self.MORPHL_CASSANDRA_USERNAME = getenv('MORPHL_CASSANDRA_USERNAME')
        self.MORPHL_CASSANDRA_PASSWORD = getenv('MORPHL_CASSANDRA_PASSWORD')
        self.MORPHL_CASSANDRA_KEYSPACE = getenv('MORPHL_CASSANDRA_KEYSPACE')

        self.CASS_REQ_TIMEOUT = 3600.0

        self.auth_provider = PlainTextAuthProvider(
            username=self.MORPHL_CASSANDRA_USERNAME,
            password=self.MORPHL_CASSANDRA_PASSWORD)
        self.cluster = Cluster([self.MORPHL_SERVER_IP_ADDRESS],
                               auth_provider=self.auth_provider)

        self.session = self.cluster.connect(self.MORPHL_CASSANDRA_KEYSPACE)
        self.session.row_factory = dict_factory
        self.session.default_fetch_size = 100

        self.prepare_statements()

    def prepare_statements(self):
        """
            Prepare statements for database select queries
        """
        self.prep_stmts = {'predictions': {}, 'models': {}, 'access_logs': {}}

        template_for_single_row = 'SELECT * FROM ga_chp_predictions WHERE client_id = ? LIMIT 1'
        template_for_multiple_rows = 'SELECT client_id, prediction FROM ga_chp_predictions_by_prediction_date WHERE prediction_date = ?'
        template_for_predictions_statistics = 'SELECT loyal, neutral, churning, lost FROM ga_chp_predictions_statistics WHERE prediction_date= ? LIMIT 1'
        template_for_models_rows = 'SELECT accuracy, loss, day_as_str FROM ga_chp_valid_models WHERE is_model_valid = True LIMIT 20 ALLOW FILTERING'
        template_for_access_log_insert = 'INSERT INTO ga_chp_predictions_access_logs (client_id, tstamp, prediction) VALUES (?,?,?)'

        self.prep_stmts['predictions']['single'] = self.session.prepare(
            template_for_single_row)
        self.prep_stmts['predictions']['multiple'] = self.session.prepare(
            template_for_multiple_rows)
        self.prep_stmts['predictions']['statistics'] = self.session.prepare(
            template_for_predictions_statistics)
        self.prep_stmts['models']['multiple'] = self.session.prepare(
            template_for_models_rows)
        self.prep_stmts['access_logs']['insert'] = self.session.prepare(
            template_for_access_log_insert)

    def retrieve_prediction(self, client_id):
        bind_list = [client_id]
        return self.session.execute(
            self.prep_stmts['predictions']['single'],
            bind_list,
            timeout=self.CASS_REQ_TIMEOUT)._current_rows

    def retrieve_predictions(self, paging_state, date):

        bind_list = [date]

        # Check if paginated request
        if paging_state is not None:
            try:
                # Convert page from hex format to bytes
                previous_paging_state = bytes.fromhex(paging_state)
                results = self.session.execute(
                    self.prep_stmts['predictions']['multiple'],
                    bind_list,
                    paging_state=previous_paging_state,
                    timeout=self.CASS_REQ_TIMEOUT)
            except (ValueError, ProtocolException):

                # If paging_state causes an error, return invalid request since the format was probably valid but the actual value was wrong
                return {'status': 0, 'error': 'Invalid pagination request.'}

        else:
            # If no page is set get first page of results
            results = self.session.execute(
                self.prep_stmts['predictions']['multiple'],
                bind_list,
                timeout=self.CASS_REQ_TIMEOUT)

        return {
            'status':
            1,
            'predictions':
            results._current_rows,
            'next_paging_state':
            results.paging_state.hex() if results.has_more_pages == True else 0
        }

    def get_statistics(self, date):
        bind_list = [date]

        response = self.session.execute(
            self.prep_stmts['predictions']['statistics'],
            bind_list,
            timeout=self.CASS_REQ_TIMEOUT)._current_rows

        return {} if not response else response[0]

    def get_model_statistics(self):
        return self.session.execute(
            self.prep_stmts['models']['multiple'],
            timeout=self.CASS_REQ_TIMEOUT)._current_rows

    def insert_access_log(self, client_id, p):
        bind_list = [
            client_id,
            datetime.now(), -1 if len(p) == 0 else p[0]['prediction']
        ]

        return self.session.execute(self.prep_stmts['access_logs']['insert'],
                                    bind_list,
                                    timeout=self.CASS_REQ_TIMEOUT)
Exemple #39
0
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        #In case file has an empty name
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)

        #Everything is correct and we can run the prediction
        if file and allowed_file(file.filename):
            #save and read uploaded image
            filename = secure_filename(file.filename)
            file.save(secure_filename(file.filename))
            image = Image.open(file.filename)
            #flatten_img = np.reshape(image, 784)
            app = Predict()
            x = app.predict(image)
            current_time = str(datetime.datetime.now())
            session.execute(
                """
            INSERT INTO demandtable (time, filename, result)
            VALUES (%s, %s, %s)
            """, (current_time, filename, x))
            return '识别结果:' + x


if __name__ == '__main__':
    cluster = Cluster(contact_points=['127.0.0.1'], port=80)
    session = cluster.connect(KEYSPACE)
Exemple #40
0
def getdata(searchname):
    searchname = parse.unquote(
        searchname)  # Decode URL, e.g. turn %20 into space etc

    cluster = Cluster()  # Connect to local host on default port 9042
    session = cluster.connect('car_pricing')  # Connect to car_pricing keyspace

    # Get adIDs into list related to our search name
    cql = 'SELECT DISTINCT searchname, advertid FROM car_pricing.searchdata;'

    prepStatement = session.prepare(cql)
    queryResults = session.execute(prepStatement)

    adIDs = []
    for qr in queryResults:
        if qr[0] == searchname:  # If this is a result from our desired search then add it to the list
            adIDs.append(qr[1])

    # List columns we want to use as features (or to create features from) and build up cql query
    colListOther = [
        'advertid', 'plate', 'bodytype', 'transmission', 'fueltype',
        'sellertype', 'make', 'model', 'dealername', 'location',
        'searchcriteria', 'distancefromyou', 'features', 'adtitle', 'foundtime'
    ]
    colListPlottable = [
        'year', 'mileage', 'enginesize', 'bhp', 'price', 'averagempg',
        'predictedprice'
    ]
    colListPlottableFriendly = [
        'Registration Year', 'Mileage (miles)', 'Engine Size (L)',
        'Engine Power (BHP)', 'Price (£)', 'Avg. Fuel Consumpt. (mpg)',
        'Predicted Price (£)'
    ]
    cql = 'SELECT ' + ','.join(
        colListPlottable + colListOther
    ) + ' FROM car_pricing.searchdata WHERE searchname = ? AND advertid = ? LIMIT 1;'

    prepStatement = session.prepare(cql)

    # Create data frame to store results
    df_D3data = pd.DataFrame(columns=(colListPlottable + colListOther))

    for adID in adIDs:  # Query to get the latest information (latest data gathering time) for each advert
        queryResults = session.execute(prepStatement, [searchname, adID])

        #df_D3data = df_D3data.append(pd.DataFrame(data = [list(queryResults[0])], columns = (colListPlottable + colListOther)))   # Note that list is embedded in another list
        df_D3data = df_D3data.append(
            pandas_factory((colListPlottable + colListOther), queryResults))

    # Add advert age to the data frame
    df_D3data['advertage_days'] = df_D3data['advertid'].apply(compare_dates)
    colListPlottable += ['advertage_days']
    colListPlottableFriendly += ['Advert Age (days)']

    session.shutdown()
    cluster.shutdown()

    # Remove any points which are not valid, i.e. NaN, None, etc
    df_D3data['predictedprice'] = 0
    df_D3data = df_D3data[df_D3data.notnull().all(axis=1)]

    # Predict price based on parameters and saved model
    X = pd.get_dummies(
        df_D3data,
        dummy_na=False,
        columns=['bodytype', 'fueltype', 'make', 'model', 'sellertype'])

    gbr_gscv = joblib.load('scraper/price_predictor.sav')
    dfColList = joblib.load('scraper/price_predictor_columns.sav')

    X = X.reindex(
        columns=dfColList, fill_value=0
    )  # Fill all the one hot encoded columns with zero if they don't exist to ensure model is in correct shape to do predictions

    df_D3data['predictedprice'] = gbr_gscv.predict(X)

    # Calculate price difference and add to data frame and column lists
    df_D3data['pricediff'] = df_D3data['price'] - df_D3data['predictedprice']
    colListPlottable += ['pricediff']
    colListPlottableFriendly += ['Price Difference (£)']

    # Add advert URL so you can open it directly in Autotrader
    df_D3data[
        'advertURL'] = 'https://www.autotrader.co.uk/classified/advert/' + df_D3data[
            'advertid']

    # Required to generate index for DF so that it can be turned into JSON
    df_D3data = df_D3data.reset_index()

    # Prepare columns for output by sorting in alphabetical order and putting into dictionary for output
    colListPlottableFriendly, colListPlottable = (
        list(x)
        for x in zip(*sorted(zip(colListPlottableFriendly, colListPlottable),
                             key=lambda pair: pair[0]))
    )  # Taken from https://stackoverflow.com/questions/13668393/python-sorting-two-lists
    colOutputList = [{
        'name': n,
        'friendly_name': fn
    } for n, fn in zip(colListPlottable, colListPlottableFriendly)]

    response = jsonify({
        'data': df_D3data.to_dict(orient='records'),
        'plottable_columns': colOutputList
    })

    return response
Exemple #41
0
from cassandra.cluster import Cluster

# default cassandra docker is 127.0.0.1:9042
cluster = Cluster()
session = cluster.connect('student_keyspace')

student_insert_str = "INSERT INTO student_by_department (department, id, name) VALUES ('%s', %d, '%s');"

new_si_student = ['ilham', 'Sasa', 'Kevin']
for student_id, student_name in zip(range(len(new_si_student)), new_si_student):
    session.execute(student_insert_str % ('si', student_id, student_name))

si_students = session.execute("SELECT * FROM student_by_department WHERE department='si' ORDER BY id DESC;")
for student in si_students:
    print(student.department, student.id, student.name)
Exemple #42
0
from cassandra.cluster import Cluster

cluster = Cluster()
connection = cluster.connect('max_connect')

batch = """
BEGIN BATCH
INSERT INTO max_connect.feature_remedy(
				feature_id,
				remedy_id,
				remedy_name_color)
VALUES (1,
		'r2',
		{'mascara':'dark blue'}
);		
 
 
INSERT INTO max_connect.INSERT INTO client_feature(client_id,
							client_info,
							age,
							colors,
							skin_condition,
							feature_id,
							feature_name)
VALUES (2, {first_name: 'Nastya',
			second_name: 'Gogol', 
			email: '*****@*****.**'},
			25,
			{{'hair':'auburn'},{'eyes':'brown'},{'skin':'light'}},
			7,
			3,
Exemple #43
0
import time
from random import randint
from sopel.module import commands, event, rule
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

#Configuration
contactpoints = ['1.2.3.4', '4.3.2.1']
auth_provider = PlainTextAuthProvider(username='******', password='******')
keyspace = "whois"

print "Connecting to cluster"

cluster = Cluster(contact_points=contactpoints, auth_provider=auth_provider)

session = cluster.connect(keyspace)

NETWORK = "testnet"


def send_whois(bot, nick):
    """
    Sends the WHOIS command to the server for the
    specified nick.
    """
    time.sleep(randint(10, 40))
    bot.write(["WHOIS", nick])
    #bot.say("whois sent: " + nick)


def send_names(bot, channel):
    for c in cnts:
        cv2.drawContours(image, [c], -1, (255,255,255), 2)

    # Remove vertical
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(image, [c], -1, (255,255,255), 2)
        
    cv2.imwrite(img_for_box_extraction_path, image)    
    
from cassandra.cluster import Cluster
cluster = Cluster(['192.168.0.131', '192.168.0.132','192.168.0.133'])            
session = cluster.connect('electionanalysis')    
selectStmtStartedProcessing= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='processing started'") 
selectStmtComplete= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='Complete'") 
selectStmtError= session.prepare("SELECT * FROM votelist_full_pdf_queue where STATUS='ERROR'") 
dfProcessingStarted = pd.DataFrame(list(session.execute(selectStmtStartedProcessing)))
hostname= socket.gethostname()
if dfProcessingStarted.empty==False:
    dfProcessingStarted = dfProcessingStarted.loc[(dfProcessingStarted['hostname']== str(hostname))]
dfProcessingComplete = pd.DataFrame(list(session.execute(selectStmtComplete)))
if dfProcessingComplete.empty==False:
    dfProcessingComplete = dfProcessingComplete.loc[(dfProcessingComplete['hostname']== str(hostname))]
dfError = pd.DataFrame(list(session.execute(selectStmtError)))
if dfError.empty==False:
    dfError = dfError.loc[(dfError['hostname']== str(hostname))]

if dfProcessingStarted.empty==False and dfProcessingComplete.empty==False:
Exemple #45
0
    def test_dc_aware_roundrobin_one_remote_host(self):
        use_multidc([2, 2])
        keyspace = 'test_dc_aware_roundrobin_one_remote_host'
        cluster = Cluster(load_balancing_policy=DCAwareRoundRobinPolicy(
            'dc2', used_hosts_per_remote_dc=1),
                          protocol_version=PROTOCOL_VERSION)
        session = cluster.connect()
        wait_for_up(cluster, 1, wait=False)
        wait_for_up(cluster, 2, wait=False)
        wait_for_up(cluster, 3, wait=False)
        wait_for_up(cluster, 4)

        create_schema(session, keyspace, replication_strategy=[2, 2])
        self._insert(session, keyspace)
        self._query(session, keyspace)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 0)
        self.coordinator_stats.assert_query_count_equals(self, 3, 6)
        self.coordinator_stats.assert_query_count_equals(self, 4, 6)

        self.coordinator_stats.reset_counts()
        bootstrap(5, 'dc1')
        wait_for_up(cluster, 5)

        self._query(session, keyspace)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 0)
        self.coordinator_stats.assert_query_count_equals(self, 3, 6)
        self.coordinator_stats.assert_query_count_equals(self, 4, 6)
        self.coordinator_stats.assert_query_count_equals(self, 5, 0)

        self.coordinator_stats.reset_counts()
        decommission(3)
        decommission(4)
        wait_for_down(cluster, 3, wait=True)
        wait_for_down(cluster, 4, wait=True)

        self._query(session, keyspace)

        self.coordinator_stats.assert_query_count_equals(self, 3, 0)
        self.coordinator_stats.assert_query_count_equals(self, 4, 0)
        responses = set()
        for node in [1, 2, 5]:
            responses.add(self.coordinator_stats.get_query_count(node))
        self.assertEqual(set([0, 0, 12]), responses)

        self.coordinator_stats.reset_counts()
        decommission(5)
        wait_for_down(cluster, 5, wait=True)

        self._query(session, keyspace)

        self.coordinator_stats.assert_query_count_equals(self, 3, 0)
        self.coordinator_stats.assert_query_count_equals(self, 4, 0)
        self.coordinator_stats.assert_query_count_equals(self, 5, 0)
        responses = set()
        for node in [1, 2]:
            responses.add(self.coordinator_stats.get_query_count(node))
        self.assertEqual(set([0, 12]), responses)

        self.coordinator_stats.reset_counts()
        decommission(1)
        wait_for_down(cluster, 1, wait=True)

        self._query(session, keyspace)

        self.coordinator_stats.assert_query_count_equals(self, 1, 0)
        self.coordinator_stats.assert_query_count_equals(self, 2, 12)
        self.coordinator_stats.assert_query_count_equals(self, 3, 0)
        self.coordinator_stats.assert_query_count_equals(self, 4, 0)
        self.coordinator_stats.assert_query_count_equals(self, 5, 0)

        self.coordinator_stats.reset_counts()
        force_stop(2)

        try:
            self._query(session, keyspace)
            self.fail()
        except NoHostAvailable:
            pass

        cluster.shutdown()
def kmeans(query):
    from cassandra.cluster import Cluster
    from random import randint

    cluster = Cluster()
    session = cluster.connect('e08')

    STOP = 0
    ittr_conv = 0
    nb_clusters = 3
    N = 0
    query_trip1 = session.execute(query)
   
    #************************COUNT NUMBER OF ELEMENTS/ROWS RETURNED******************************#
    for row in query_trip1:
      N = N+1
    
    #************************GET RANDOM INDEXES FOR CENTROIDS******************************#
    centroids_indx = [randint(0,N) for p in range(0,nb_clusters)]
    centroids_indx = sorted(centroids_indx, key=int)
    
    counter = 0
    index  = 0
    init_centroids = []
   
    query_trip1 = session.execute(query)
    for row in query_trip1:
        if counter == centroids_indx[index]:
            centroid = [row.startlong, row.startlat, row.endlong, row.endlat]
            init_centroids.append(centroid)

            index = index +1 

        if index == nb_clusters:
           break;        

        counter = counter + 1
        
    #************************INITIALIZE CENTROIDS *****************************#
    oldcentroids = [{'startlong':0,'startlat':0,'endlong':0,'endlat':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0}]

    newcentroids = [{'startlong':init_centroids[0][0], 'startlat':init_centroids[0][1], 'endlong':init_centroids[0][2], 'endlat':init_centroids[0][3]}, {'startlong':init_centroids[1][0], 'startlat':init_centroids[1][1], 'endlong':init_centroids[1][2], 'endlat':init_centroids[1][3]}, {'startlong':init_centroids[2][0], 'startlat':init_centroids[2][1], 'endlong':init_centroids[2][2], 'endlat':init_centroids[2][3]}]


    #************************TIME TO STARTI ITTERATING WHILE NOT CONVERGING*****************************#
    while(STOP == 0):
       query_trip1 = session.execute(query)
       coord_sums = [{'startlong':0,'startlat':0,'endlong':0,'endlat':0 ,'nb':0},{'startlong':0,'startlat':0,'endlong':0,'endlat':0,'nb':0}, {'startlong':0,'startlat':0,'endlong':0,'endlat':0,'nb':0}]

       for row in query_trip1:
           distance1 = double_distance(newcentroids[0], row) 
           distance2 = double_distance(newcentroids[1], row)
           distance3 = double_distance(newcentroids[2], row)

           distances  = [distance1, distance2, distance3]

           min_indx = distances.index(min(distances))

           coord_sums[min_indx]['startlong'] += row.startlong
           coord_sums[min_indx]['startlat']  += row.startlat
           coord_sums[min_indx]['endlong']   += row.endlong
           coord_sums[min_indx]['endlat']    += row.endlat
           coord_sums[min_indx]['nb']        += 1


       #************************NEW CENTROIDS******************************#
       oldcentroids = newcentroids
       for i in range(nb_clusters):
            newcentroids[i]['startlong'] =  coord_sums[i]['startlong']/coord_sums[i]['nb']
            newcentroids[i]['startlat']  =  coord_sums[i]['startlat']/coord_sums[i]['nb']
            newcentroids[i]['endlong']   =  coord_sums[i]['endlong']/coord_sums[i]['nb']
            newcentroids[i]['endlat']    =  coord_sums[i]['endlat']/coord_sums[i]['nb']

        #************************SHOUDL WE STOP******************************#
       ittr_conv = ittr_conv + 1
       if oldcentroids == newcentroids:
          STOP = 1
      
    return(newcentroids)  
from cassandra.cluster import Cluster
import copy
from datetime import datetime

cluster = Cluster()
session = cluster.connect("creditcard")


def get_by_query(table_name, args):

    q_string = dict(copy.copy(args))
    terms = []

    for k, v in q_string.items():
        terms.append(str(k) + "='" + str(v) + "'")

    if len(terms) > 0:
        wc = "WHERE " + " AND ".join(terms)
    else:
        wc = ""

    q = "SELECT * FROM " + table_name + " " + wc

    rows = session.execute(q)
    print("Query = " + q)
    return rows


def get_costumer_by_id(cc_num):
    args = {"cc_num": cc_num}
    row = get_by_query("customer", args)
Exemple #48
0
 def __init__(self):
     hosts = [CASSANDRA_DNS]
     cluster = Cluster(hosts)
     self.session = cluster.connect()
     self.session.set_keyspace('birdfeed')
Exemple #49
0
#!/usr/bin/env python2.7

#pip install kafka-python
#pip install cassandra-driver

import time
import datetime
from cassandra.cluster import Cluster
cluster = Cluster(['10.0.0.1', '10.0.0.2', '10.0.0.3'])
session = cluster.connect('kafka')

print('Bridge Kafka Cassandra wrote in Python')
from kafka import KafkaConsumer
consumer = KafkaConsumer('topic2put', bootstrap_servers='10.0.0.5:32777')
for msg in consumer:
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    print(st + ':' + msg.value)
    session.execute(
        "insert into kafka.telemetry (topic, event_time,valore) values('topic2put',toTimestamp(now()),'"
        + msg.value + "') using ttl 20;")
Exemple #50
0
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from cassandra.query import BatchStatement

cluster = Cluster(auth_provider=PlainTextAuthProvider(username='******',
                                                      password='******'))
connection = cluster.connect()

connection.execute('''
        CREATE KEYSPACE IF NOT EXISTS lecture_vcs WITH replication = {
            'class': 'SimpleStrategy',
            'replication_factor': '1'
        };
    ''')

batch_list = [
    '''
    UPDATE lecture_vcs.users
    SET
        data = {"role": 'teacher', "date_registered": '2019-10-10'},
        email = '*****@*****.**'
    WHERE user_id = 1;
    ''', '''
    UPDATE lecture_vcs.groups
    SET user_email = '*****@*****.**'
    WHERE group_id = 2 and group_name = 'km-62' and user_id = 1;
    '''
]


def execute_batch(statement_list):
    def test_heart_beat_timeout(self):
        """
        Test to ensure the hosts are marked as down after a OTO is received.
        Also to ensure this happens within the expected timeout
        @since 3.10
        @jira_ticket PYTHON-762
        @expected_result all the hosts have been marked as down at some point

        @test_category metadata
        """
        number_of_dcs = 3
        nodes_per_dc = 100

        query_to_prime = "INSERT INTO test3rf.test (k, v) VALUES (0, 1);"

        idle_heartbeat_timeout = 5
        idle_heartbeat_interval = 1

        start_and_prime_cluster_defaults(number_of_dcs, nodes_per_dc,
                                         CASSANDRA_VERSION)
        self.addCleanup(stop_simulacron)

        listener = TrackDownListener()
        executor = ThreadTracker(max_workers=16)

        # We need to disable compression since it's not supported in simulacron
        cluster = Cluster(
            compression=False,
            idle_heartbeat_interval=idle_heartbeat_interval,
            idle_heartbeat_timeout=idle_heartbeat_timeout,
            executor_threads=16,
            execution_profiles={
                EXEC_PROFILE_DEFAULT:
                ExecutionProfile(load_balancing_policy=RoundRobinPolicy())
            })
        self.addCleanup(cluster.shutdown)

        cluster.scheduler.shutdown()
        cluster.executor = executor
        cluster.scheduler = _Scheduler(executor)

        session = cluster.connect(wait_for_all_pools=True)
        cluster.register_listener(listener)

        log = logging.getLogger()
        log.setLevel('CRITICAL')
        self.addCleanup(log.setLevel, "DEBUG")

        prime_query(query_to_prime, then=NO_THEN)

        futures = []
        for _ in range(number_of_dcs * nodes_per_dc):
            future = session.execute_async(query_to_prime)
            futures.append(future)

        for f in futures:
            f._event.wait()
            self.assertIsInstance(f._final_exception, OperationTimedOut)

        prime_request(PrimeOptions(then=NO_THEN))

        # We allow from some extra time for all the hosts to be to on_down
        # The callbacks should start happening after idle_heartbeat_timeout + idle_heartbeat_interval
        time.sleep((idle_heartbeat_timeout + idle_heartbeat_interval) * 2)

        for host in cluster.metadata.all_hosts():
            self.assertIn(host, listener.hosts_marked_down)

        # In this case HostConnection._replace shouldn't be called
        self.assertNotIn("_replace", executor.called_functions)
Exemple #52
0
def addToCassandra(mean,movie):
	cluster = Cluster()
	session = cluster.connect('movie_reviews')
	session.execute("CREATE TABLE IF NOT EXISTS movie_reviews.movie_sentiment ( movie_name text, sentiment int, PRIMARY KEY (movie_name,sentiment) );")
	session.execute("INSERT INTO movie_reviews.movie_sentiment (movie_name,sentiment) VALUES (%s,%s);", (movie,int(mean)))	
def monthRetrieve(
        startTime,
        endTime=datetime.datetime.today(),
        fields1=[
            'trade_status', 'close', 'mkt_freeshares', 'mkt_cap_float',
            'mfd_buyamt_d', 'mfd_sellamt_d', 'roa', 'pe', 'pb'
        ],
        option1="ruleType=8;unit=1;traderType=1;Period=M;Fill=Previous;PriceAdj=B",
        multi_mfd=True):
    # cassandra connect
    cluster = Cluster(['192.168.1.111'])
    session = cluster.connect('factors')  # factors: factors_month

    # 启动Wind API
    w.start()

    # 获取可交易日
    times = w.tdays(startTime, endTime, "Period=M").Times
    timeList = []
    for i in range(len(times)):
        row = str(times[i])
        row = row[:row.find(' ')]
        timeList.append(row)
    print(timeList)

    # # 【解耦:迁移至stock.py,定期更新】判断数据有效性
    # 获取某个月份所有可交易的A股 (如此的话每次一支股票只拿一个数据,分多个时间点去拿,请求数目过多,改成批量拉取一支股票
    # 所有因子
    # stocks = w.wset("SectorConstituent", u"sector=全部A股;field=wind_code")
    # validStocks ={}
    # # Total stock: 3183 [2017-04-13]
    # print (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "Total A stocks number: ", len(stocks.Data[0]))

    # # stock status update statement
    # updateStmt = session.prepare('''INSERT INTO stock_info(stock, ipo_date, trade_status) VALUES (?,?,?)''')
    #
    # #for stock in ["000852.SZ","603788.SH","603987.SH","603988.SH","603989.SH","603990.SH","603991.SH","603993.SH"]:
    # #for stock in ["000852.SZ","603788.SH","603990.SH","603991.SH","603993.SH"]:
    # for stock in stocks.Data[0]:
    #     ipo_status = w.wsd(stock, "ipo_date, trade_status", datetime.datetime.today())
    #     #print (ipo_status)
    #     try:
    #         days = (datetime.datetime.today() - ipo_status.Data[0][0]).days
    #         # trade_status 不能用一个变量表示,而是一个时序的因子,这里的0/1只能用区分IPO是否符合要求
    #         if  days > 90 and ipo_status.Data[1][0] == "交易":
    #         # if  days > 90:
    #             validStocks[stock] = ipo_status.Data[1][0]
    #             session.execute(updateStmt, (stock, ipo_status.Data[0][0], '1'))
    #         else:
    #             # set status 0
    #             session.execute(updateStmt, (stock, ipo_status.Data[0][0], '0'))
    #             print (" Set invalid data: ", stock, str(ipo_status.Data[0][0]))

    #     except TypeError:
    #         print (" -- Log TypeError at Stock: ", stock, " :\t", str(ipo_status.Data[0][0]))
    # Valid: 2819 [2017-04-13]
    # tradable stocks' collection
    rows = session.execute(
        '''SELECT stock, ipo_date FROM stock_info WHERE trade_status = '1' ALLOW FILTERING '''
    )
    validStocks = {}
    validStockCode = []
    for row in rows:
        validStocks[row.stock] = row.ipo_date
        validStockCode.append(row.stock)

    validN = len(validStocks)
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
          " valid stocks' number: ", validN)
    #print (validStocks)

    ## 拉取因子,分阶段拉取,拉完异步存DB
    if multi_mfd == True:
        columns = fields1 + [
            'mfd_buyamt_d2', 'mfd_sellamt_d2', 'mfd_buyamt_d4',
            'mfd_sellamt_d4'
        ]
    else:
        columns = fields1
    dataList = []  #创建数组
    cnt = 0  #当前拉取了多少支股票
    index = 0  #上一次dump的位置,主要目的是通过此索引找到该股票代码
    CHUNK_SIZE = 300  #每一次异步dump的股票个数

    preparedStmt = session.prepare(
        '''INSERT INTO factors_month(stock, factor, time, value) VALUES (?,?,?,?)'''
    )
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
          " ------ Starting to insert to DB")

    # 拉取交易状态便于之后数据过滤
    hasTradeStatus = False
    if len(fields1) > 1 and fields1[0] == 'trade_status':
        hasTradeStatus = True

    ## 遍历所有股票
    for stock, ipo_date in validStocks.items():
        # 只取 IPO 之后的数据【需求变更,IPO之前的ROA也可能是有用的数据】
        # start = startTime if startTime > ipo_date.date() else ipo_date.date()
        start = startTime
        # 同一个变量,参数不一样,需要分成几次拉取
        wsd_data = w.wsd(stock, fields1, start, endTime, option1).Data
        if multi_mfd == True:
            fields2 = ['mfd_buyamt_d', 'mfd_sellamt_d']
            option2 = "unit=1;traderType=2;Period=M;Fill=Previous;PriceAdj=B"
            wsd_data = wsd_data + w.wsd(stock, fields2, start, endTime,
                                        option2).Data
            option3 = "unit=1;traderType=4;Period=M;Fill=Previous;PriceAdj=B"
            wsd_data = wsd_data + w.wsd(stock, fields2, start, endTime,
                                        option3).Data

        ##【修改:计算动量模块单独移出来,为可扩展性】mmt = close_1 / close_2; 没有数据增长率为0
        # mmt = []
        # mmt.append(1)
        # for i in range(1, len(wsd_data[0])):
        #     if wsd_data[0][i] is not None and wsd_data[0][i] != 0:
        #         mmt.append(wsd_data[0][i] / wsd_data[0][i-1])
        #     else:
        #         mmt.append(float('nan'))
        # wsd_data.append(mmt)
        dataList.append(wsd_data)
        cnt += 1
        #阶段性异步导出 dump data asynchronously, 300 stocks / round
        if cnt % CHUNK_SIZE == 0:
            for s in range(index, cnt):
                for i in range(len(columns)):
                    for j in range(len(dataList[s - index][i])):
                        #print (validStocks[s],columns[i],timeList[j],dataList[s - index][i][j])
                        try:
                            value = dataList[s - index][i][j]
                            if hasTradeStatus == True and i == 0:
                                # 交易 状态作为一个因子
                                if value is not None and value == "交易":
                                    value = 1
                                else:
                                    value = 0
                            elif value is not None:
                                value = float(value)
                            else:
                                value = float('nan')
                        except (ValueError, TypeError, KeyError) as e:
                            value = float('nan')
                            print("--Log ValueError in ",
                                  validStockCode[s], "\t", columns[i], "\t",
                                  str(timeList[j]), "\t", str(value))
                            print(e)
                            print(
                                "--------------------------------------------------------------------------"
                            )
                        except IndexError as e:
                            print(
                                "--------------------------------------------------------------------------"
                            )
                            print(
                                "len s: %d, len i: %d, len j: %d ~ " %
                                (cnt, len(columns), len(timeList)),
                                (s - index, i, j))
                            print(e)
                        session.execute_async(preparedStmt,
                                              (validStockCode[s], columns[i],
                                               timeList[j], value))
            #记录上一次导出数据位置,清空buffer
            index = cnt
            dataList = []
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
                  '------ Dump NO.%d end at stock %s \n' % (cnt, stock))

    print("---- Last chunk size: ", len(dataList))
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
          '---------------- Pulling finished!\n')

    # 最后的剩余数据插入cassandra
    for s in range(index, cnt):
        for i in range(len(columns)):
            for j in range(len(dataList[s - index][i])):
                #print (validStocks[s],columns[i],timeList[j],dataList[s - index][i][j])
                try:
                    value = dataList[s - index][i][j]
                    if hasTradeStatus == True and i == 0:
                        if value is not None and value == "交易":
                            value = 1
                        else:
                            value = 0
                    elif value is not None:
                        value = float(value)
                    else:
                        value = float('nan')
                except (ValueError, TypeError, KeyError) as e:
                    value = float('nan')
                    print("--Log ValueError in ", validStockCode[s], "\t",
                          columns[i], "\t", str(timeList[j]), "\t", str(value))
                    print(e)
                    print(
                        "--------------------------------------------------------------------------"
                    )
                except IndexError as e:
                    print(
                        "--------------------------------------------------------------------------"
                    )
                    print(
                        "len s: %d, len i: %d, len j: %d ~ " %
                        (cnt, len(columns), len(timeList)), (s - index, i, j))
                    print(e)
                session.execute_async(
                    preparedStmt,
                    (validStockCode[s], columns[i], timeList[j], value))

    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
          '---------------- Persistion finished!\n')

    #result testing
    print("---------- Inserstion Testing: ")
    rows = session.execute(
        "select * from factors_month where stock='000852.SZ' and factor in ('roa', 'trade_status') and time > '2017-01-02'"
    )
    for row in rows:
        print(row.stock, row.factor, row.time, row.value)

    # close connection with cassandra
    cluster.shutdown()
class HeartbeatTest(unittest.TestCase):
    """
    Test to validate failing a heartbeat check doesn't mark a host as down

    @since 3.3
    @jira_ticket PYTHON-286
    @expected_result host should not be marked down when heartbeat fails

    @test_category connection heartbeat
    """

    def setUp(self):
        self.cluster = Cluster(protocol_version=PROTOCOL_VERSION, idle_heartbeat_interval=1)
        self.session = self.cluster.connect(wait_for_all_pools=True)

    def tearDown(self):
        self.cluster.shutdown()

    @local
    def test_heart_beat_timeout(self):
        # Setup a host listener to ensure the nodes don't go down
        test_listener = TestHostListener()
        host = "127.0.0.1"
        node = get_node(1)
        initial_connections = self.fetch_connections(host, self.cluster)
        self.assertNotEqual(len(initial_connections), 0)
        self.cluster.register_listener(test_listener)
        # Pause the node
        try:
            node.pause()
            # Wait for connections associated with this host go away
            self.wait_for_no_connections(host, self.cluster)
            # Resume paused node
        finally:
            node.resume()
        # Run a query to ensure connections are re-established
        current_host = ""
        count = 0
        while current_host != host and count < 100:
            rs = self.session.execute_async("SELECT * FROM system.local", trace=False)
            rs.result()
            current_host = str(rs._current_host)
            count += 1
            time.sleep(.1)
        self.assertLess(count, 100, "Never connected to the first node")
        new_connections = self.wait_for_connections(host, self.cluster)
        self.assertIsNone(test_listener.host_down)
        # Make sure underlying new connections don't match previous ones
        for connection in initial_connections:
            self.assertFalse(connection in new_connections)

    def fetch_connections(self, host, cluster):
        # Given a cluster object and host grab all connection associated with that host
        connections = []
        holders = cluster.get_connection_holders()
        for conn in holders:
            if host == str(getattr(conn, 'host', '')):
                if isinstance(conn, HostConnectionPool):
                    if conn._connections is not None and len(conn._connections) > 0:
                        connections.append(conn._connections)
                else:
                    if conn._connection is not None:
                        connections.append(conn._connection)
        return connections

    def wait_for_connections(self, host, cluster):
        retry = 0
        while(retry < 300):
            retry += 1
            connections = self.fetch_connections(host, cluster)
            if len(connections) is not 0:
                return connections
            time.sleep(.1)
        self.fail("No new connections found")

    def wait_for_no_connections(self, host, cluster):
        retry = 0
        while(retry < 100):
            retry += 1
            connections = self.fetch_connections(host, cluster)
            if len(connections) is 0:
                return
            time.sleep(.5)
        self.fail("Connections never cleared")
Exemple #55
0
import csv
from cassandra.cluster import Cluster

# Write sentiment score to Cassandra

cluster = Cluster(['169.53.141.8'])
session = cluster.connect('msd_01')

# Open the csv file with tracks, sentiment score
with open('sentiment.csv', 'rb') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')

    numRows = 0

    for csvrow in reader:
        if numRows % 5000 == 0:
            print numRows

        track_id = csvrow[0]
        sentiment = csvrow[1]

        # Write a query to update columns in the database
        update_query = """update songs set
                     sentiment = %s
                     where track_id = '%s'""" % (sentiment, track_id)

        session.execute(update_query)

        numRows += 1

cluster.shutdown()
import mysql.connector
from cassandra.cluster import Cluster

cluster = Cluster(['34.196.59.158'], port=9042)
session = cluster.connect('zubat')

cnx = mysql.connector.connect(
    user='******',
    password='******',
    host='sensedb.cenacuetgbbz.us-east-1.rds.amazonaws.com',
    database='sense2')
cursor = cnx.cursor()

insert_company_table = ("INSERT INTO company(name,type) VALUES (%s,%s) ")
insert_deploy_table = (
    "INSERT INTO deploy(company_id, name,date_time_start,date_time_end,street_address,zip,city,state,internal_id) VALUES(%s,%s, %s,%s,%s,%s,%s,%s,%s) "
)
insert_deploy_data_sense = (
    "INSERT INTO deploy_data_sense(deploy_id, sense_time, week_days, holiday) VALUES (%s, %s, %s, %s)"
)
insert_sensor_table = (
    "INSERT INTO sensor(id, deploy_id, register_date, last_update, model, version) VALUES (%s, %s, %s, %s, %s, %s) "
)
insert_sensor_data_sense_table = (
    "INSERT INTO sensor_data_sense(sensor_id, name) VALUES ( %s, %s )")
insert_whitelist_table = (
    "INSERT INTO whitelist_sense(mac_address, deploy_id) VALUES ( %s, %s)")
insert_whitelist_person_data_table = (
    "INSERT INTO whitelist_person_data_sense(whitelist_id, keyword, content) VALUES (%s, %s, %s)"
)
Exemple #57
0
def main():
    cluster = Cluster(['127.0.0.1'], port=9042)
    session = cluster.connect()

    log.info("creating keyspace...")
    session.execute("""
        CREATE KEYSPACE IF NOT EXISTS %s
        WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '2' }
        """ % KEYSPACE)

    log.info("setting keyspace...")
    session.set_keyspace(KEYSPACE)

    session.execute("""DROP TABLE  mytable""")

    log.info("creating table...")
    session.execute("""
        CREATE TABLE IF NOT EXISTS mytable (
            event_number                                       text,
            date_time                                          text,
            address_rounded_to_block_number_or_intersection    text,
            patrol_beat                                        text,
            incident_type                                      text,
            incident_type_description                          text,
            priority                                            int,
            time                                               time,
            hour                                               text,
            priority_hour                                      text,
            PRIMARY KEY (event_number)
        )
        """)

    query = SimpleStatement("""
        INSERT INTO mytable (event_number, date_time, address_rounded_to_block_number_or_intersection, 
                            patrol_beat, incident_type, incident_type_description, 
                            priority, time, hour, priority_hour)

        VALUES (%(event_number)s, %(date_time)s, %(address_rounded_to_block_number_or_intersection)s,
                %(patrol_beat)s, %(incident_type)s, %(incident_type_description)s,
                %(priority)s, %(time)s, %(hour)s, %(priority_hour)s)
        """, consistency_level=ConsistencyLevel.ONE)

    prepared = session.prepare("""
        INSERT INTO mytable (event_number, date_time, address_rounded_to_block_number_or_intersection, 
                            patrol_beat, incident_type, incident_type_description, 
                            priority, time, hour, priority_hour)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """)

    import pandas as pd
    data = pd.read_csv('oak-crimes-for-cassandra.csv').dropna().sample(10000, random_state=42)


    from tqdm import tqdm
    
    for i, row in tqdm(data.iterrows()):
        # log.info("inserting row %d" % i)
        # log.info(tuple(row))
        # session.execute(query, dict(key="key%d" % i, a='a', b='b'))
        session.execute(prepared, tuple(row))

    future = session.execute_async("SELECT * FROM mytable")
    # log.info("key\tcol1\tcol2")
    # log.info("---\t----\t----")

    try:
        rows = future.result()
    except Exception:
        log.exception("Error reading rows:")
        return

    for row in rows:
        log.info(row)
Exemple #58
0
#creates a given number of threads (pass by argument) and assigns an equal portion of the workload to each one
#the main thread waits actively until they finish

from cassandra.cluster import Cluster
import threading
import sys
import itertools

cluster = Cluster()
cluster = Cluster(["minerva-5"])
session = cluster.connect("case18")
query = "SELECT * FROM case18.particle WHERE partid=?"
prepared = session.prepare(query)

num_keys = 10000
max_parallelism = int(sys.argv[1])

count = itertools.count()  #starts at 0
finished = False


def call(keys):
    global finished
    for k in keys:
        result = session.execute(prepared, [k])
        # next returns value and increments subsequently
    if count.next() == max_parallelism - 1:
        finished = True


ths = []
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from cassandra.query import SimpleStatement
import config
""" Find the number of speeds > 100 in the data set. """

ap = PlainTextAuthProvider(username=config.username, password=config.password)
node_ips = config.hosts
cluster = Cluster(node_ips,
                  protocol_version=4,
                  auth_provider=ap,
                  port=config.port)
session = cluster.connect('part_3_version_0')

query = 'SELECT speed FROM loopdata_by_detector'
statement = SimpleStatement(query, fetch_size=5000)

count = 0
for row in session.execute(statement):
    if isinstance(row.speed, int) and row.speed > 100:
        count += 1

print("\nNumber of speeds > 100: " + str(count) + "\n")
cluster.shutdown()
Exemple #60
0
from kafka import KafkaConsumer
import json
from cassandra.cluster import Cluster

address = 'localhost'

cluster = Cluster([address])
session = cluster.connect("emoji")

consumer = KafkaConsumer('France', bootstrap_servers=['localhost:9092'])

for message in consumer:

    print("%s:%d:%d: key=%s value=%s" %
          (message.topic, message.partition, message.offset, message.key,
           message.value))
    session.execute("INSERT INTO pays (nom_pays, emojis) VALUES (" +
                    message.topic + ", ['" + message.value + "']);")