Ejemplo n.º 1
0
def data_ingestion():
    
    # Create/Update database for one hour time window with 1 minute intervals.
    for idx in xrange(1, 61):

        # Download data
        req = download_bike_data()

        # Normalize Station List data
        df_stationlist = normalized_stationbean_data(req)

        # json data
        json_stationlist = get_stationlist_info_json(req)

        # Get execution time
        exec_time = get_execution_time(req)

        ''' Run inserts and updates '''
        
        # Create tables and bulk insert into reference table once.
        if idx == 1:
            output_data_keys(req) # For information purposes alone.
            create_tables(df_stationlist['id'])
            bulk_insert_citibike_ref_table()
            
        insert_update_available_bike_table(exec_time, json_stationlist)
        
        # Simple plots to observe data
        print "PLOT AND SAVE IMAGES FOR RUN: {}".format(idx)
        plot_and_save_hist(df_stationlist, 'availableBikes', 'figures/citibike', 'available_bikes', 'png', idx)
        plot_and_save_hist(df_stationlist, 'totalDocks', 'figures/citibike', 'total_docks', 'png', idx)
        plot_and_save_hist(df_stationlist, 'testStation', 'figures/citibike', 'test_station', 'png', idx)

        
        # Spinning cursor to wait for 60 seconds.
        spinning_cursor.spinning_cursor(60)
#                    GDP       Men     Total     Women
#        GDP    1.000000  0.495794  0.479050  0.497923
#        Men    0.495794  1.000000  0.971663  0.942572
#        Total  0.479050  0.971663  1.000000  0.977217
#        Women  0.497923  0.942572  0.977217  1.000000
#       
    print """
FINAL ANALYSIS: 
    We observe a weak correlation between education attainment and GDP.
    The correlation coefficients are closer to 0 than 1.
    This shows there is a greater scatter of the data points from the fitted line.
    At this point we cannot not conclude any direct relationship.
    
[Data Analysis and Correlation of Education to GDP data] ==> End
    """  
############# RUN MAIN ##########################
if __name__ == '__main__':
    df_education_result, min_year, max_year = data_ingestion_education_info()
    sc.spinning_cursor(3)
    
    dataingestion_worldbankgdp_info(min_year, max_year)
    sc.spinning_cursor(3)
    
    df_gdp_result = build_dataframe_gdp()
    sc.spinning_cursor(3)
    
    data_analysis_and_correlation(df_education_result, df_gdp_result)
    sc.spinning_cursor(3)