# add /test to pythonpath so utils can be imported when running from spark sys.path.append(dirname(dirname(dirname(__file__)))) import helpers.utils as utils conf = utils.createSparkConf() sc = SparkContext(conf=conf) sqlContext = SQLContext(sc) def verify(): customerData = sqlContext.sql("SELECT miles_ytd, total_miles FROM customerTable") customerData.printSchema() customerData.show(5) assert customerData.count() == doc_count # query the index using Cloudant API to get expected count test_properties = utils.get_test_properties() url = url = "https://{}/{}".format( test_properties["cloudanthost"], 'n_customer') print(url) response = requests.get(url, auth=(test_properties["cloudantusername"], test_properties["cloudantpassword"])) assert response.status_code == 200 doc_count = response.json().get("doc_count") print ('About to test com.cloudant.spark.CloudantRP for n_customer with setting schemaSampleSize to 5') sqlContext.sql(" CREATE TEMPORARY TABLE customerTable USING com.cloudant.spark.CloudantRP OPTIONS ( schemaSampleSize '5',database 'n_customer')") verify()
bookingData = spark.sql( "SELECT customerId, dateOfBooking FROM bookingTable1 WHERE customerId = '*****@*****.**'" ) bookingData.printSchema() # verify expected count print("bookingData.count() = ", bookingData.count()) assert bookingData.count() == total_rows # verify customerId = '*****@*****.**' for booking in bookingData.collect(): assert booking.customerId == '*****@*****.**' # query the index using Cloudant API to get expected count test_properties = utils.get_test_properties() url = "https://" + test_properties[ "cloudanthost"] + "/n_booking/_design/view/_search/n_bookings?q=customerId:[email protected]" response = requests.get(url, auth=(test_properties["cloudantusername"], test_properties["cloudantpassword"])) assert response.status_code == 200 total_rows = response.json().get("total_rows") # record a warning if there is no data to test, will check for 0 doc anyway if total_rows == 0: warnings.warn("No data for [email protected] in the n_booking database!") print('About to test com.cloudant.spark for n_booking') spark.sql( " CREATE TEMPORARY TABLE bookingTable1 USING com.cloudant.spark OPTIONS ( database 'n_booking')"