spark_url = "spark://10.211.55.4:7077"
spark_context_name = "Find Down Stocks"
included_python_files_package = ['/var/machine_learning/stocks/python/stocks_python.zip']
mysql_url = "jdbc:mysql://10.211.55.4:3306/stocks?user=parallels&password=dellc123"
data_files = "file:///var/data/stocks/historical_data/*.csv"

sc = SparkContext(spark_url, spark_context_name, pyFiles=included_python_files_package)
sqlContext = HiveContext(sc)

from pyspark.sql import Row
from stockRdd import StockRdd
from dateInterval import DateIntervalManager

sample_data_rdd = sc.textFile("file:///var/data/stocks/historical_data/*.csv").distinct()

yesterday_date = DateInterval.getYesterdayDate()

dailyDateIntervalDictionaryToCalculateFor = DateIntervalManager.createDailyIntervalDictionaryForPastYear(yesterday_date)

number_of_days_in_dictionary = dailyDateIntervalDictionaryToCalculateFor.getNumberOfDaysInDictionary()

minimum_number_of_days = int((4.0 / 7.0) * float(number_of_days_in_dictionary))

mapStockCsvToKeyValueClosure = StockRdd.getMapStockCsvToKeyValueForDatesInDictionaryClosure(dailyDateIntervalDictionaryToCalculateFor)
symbol_creation_function_closure = StockRdd.getSymbolDataInstanceForDateDictionaryDataPointsClosure(dailyDateIntervalDictionaryToCalculateFor, yesterday_date)

symbol_down_stocks_data_filtered = sample_data_rdd.map(mapStockCsvToKeyValueClosure)\
                                           .filter(lambda line: not(line is None))\
                                           .reduceByKey(lambda a,b : a + b)\
                                           .map(lambda tuple : ( tuple[0], StockRdd.sort_and_compute_deltas( list(tuple[1]) ) ) )\
                                           .filter(lambda tuple : len(list(tuple[1])) > minimum_number_of_days)\