def produce_song_pairs(song_list):
	song_pairs = combinations(song_list, 2)
	song_pairs_list = map(lambda song_pair: (song_pair, 1), song_pairs)
	return song_pairs_list

def cassandra_row_format(song_pair):
	songs = song_pair[0]
	frequency = song_pair[1]
	return [{"song_id": int(songs[0]), "freq_song_id": int(songs[1]), "frequency": frequency}, {"song_id": int(songs[1]), "freq_song_id": int(songs[0]), "frequency": frequency}]


if __name__ == "__main__":
	conf = SparkConf().setAppName("FrequentPatternsSongs").setMaster(config.SPARK_MASTER).set("spark.cassandra.connection.host", config.CASSANDRA_SEED_NODE_IP)
	sc = CassandraSparkContext(conf=conf)
	frequency_threshold = 3

	filename = datetime.now().strftime("%Y-%m-%d")+"-usersonglog.txt"

	sc.textFile(config.HDFS_URL+":"+config.HDFS_PORT+config.LOG_FOLDER+filename) \
		.filter(time_range_filter) \
		.map(parse_log_entry) \
		.reduceByKey(lambda song1, song2: song1+song2) \
		.map(lambda x: sorted(set(x[1]))) \
		.flatMap(produce_song_pairs) \
		.reduceByKey(lambda a,b: a+b) \
		.filter(lambda song_pair: song_pair[1] > frequency_threshold) \
		.flatMap(cassandra_row_format) \
		.saveToCassandra(config.CASSANDRA_KEYSPACE, "frequent_song_pairs")

	sc.close()