mongo_client= MongoClient()
    mongo_client.drop_database(db_out)
    mongo_client.close()
    print 'database cleared'
    
    col_tenant_id = 1
    col_user_id = 2
    col_item_id = 3

    num_to_recomm_per_user = 10
    num_to_recomm_per_item = 10
    
    
    conf = SparkConf().setAppName("PysparkCollaborativeFiltering").set("spark.cassandra.connection.host", spark_cassandra_connection_host)
    sc = CassandraSparkContext(conf=conf)
    sc.setCheckpointDir('checkpoint/')
    data = sc.cassandraTable("mykeyspace", "transactions",row_format=1).collect() # row_format: tuple
    # (id, tenant_id, user_id, item_id)
    tenant_ids = set(list(map(lambda x:x[col_tenant_id],data)))
    data_rdd = sc.parallelize(data)
    # data_rdd = sc.parallelize(data).map(list)
    
    all_results_per_user = sc.emptyRDD()
    all_results_per_item = sc.emptyRDD()
    
    for t_id in tenant_ids:
        print("\nComputing recommendation for tenant {}...\n".format(t_id))
        per_tenant_rdd = data_rdd.filter(
            lambda x: x[col_tenant_id] == t_id).map(
            lambda l: ((l[col_user_id],l[col_item_id]),1.0)).reduceByKey(
            lambda x,y: x + y).map(