included_python_files_package = ['/var/machine_learning/stocks/python/stocks_python.zip']
mysql_url = "jdbc:mysql://localhost:3306/stocks?user=parallels&password=dellc123"
data_files = "file:///var/data/stocks/historical_data/Z*.csv"

# In a production-environment example, this value would be dynamically generated using
today_date = '2016-03-24'

# Instantiate the Spark Context to be used for this script
sc = SparkContext(spark_url, spark_context_name, pyFiles=included_python_files_package)
sqlContext = HiveContext(sc)

# Initialize the RDD with the stock data files
sample_data_rdd = sc.textFile(data_files).distinct()

# Create a dictionary containing date-intervals to represent 26 intervals of 2-week spans spanning the past year
dateDictionaryToCalculateFor = DateIntervalManager.createDateIntervalDictionaryForPastYear(today_date)

# We want to ensure that any stocks being calculated existed during the entire period
number_of_days_in_dictionary = dateDictionaryToCalculateFor.getNumberOfDaysInDictionary()
minimum_number_of_days_for_stock = int((4.0 / 7.0) * float(number_of_days_in_dictionary))

# map_stock_csv_to_key_value_closure is a function closure that will filter out lines of data whose dates are outside of the
#   time-frame we are concerned about
map_stock_csv_to_key_value_closure = StockRdd.getMapStockCsvToKeyValueForDatesInDictionaryClosure(dateDictionaryToCalculateFor)

# symbol_creation_function_closure is a function closure which will convert the list of csv data lines to a SymbolData object
#   which can return the data points we need to cluster a stock
symbol_creation_function_closure = StockRdd.getSymbolDataInstanceForDateDictionaryDataPointsClosure(dateDictionaryToCalculateFor, today_date)

# symbol_cluster_data_closure is a function closure which will convert a SymbolData object to a list of data points which
#   should be used to cluster stocks by