def analyze_n_dataframes_by_DBSCAN(number): data = dfc.get_n_latest_mta_dataframes(number) saturdays = dfc.find_saturday_dates_strings(number) data = data[data['DESC'] == 'REGULAR'] analyzedData = ag.analyze(data, 1300, 3) print(ag.count_negative_groups(analyzedData)) analyzedData.to_csv('data/dbscan_analysis_{}_to_{}.csv'.format( saturdays[-1], saturdays[0]))
def analyze_latest_dataframe_by_DBSCAN(station): data = dfc.get_latest_mta_dataframe() saturday = dfc.find_last_saturday_string() data = data[data['STATION'] == station] analyzedData = ag.analyze(data) analyzedData.to_csv('data/dbscan_analysis_of_{}_{}.csv'.format( station, saturday))
def analyze_append_new_data_to_csv(fileName, eps, min_samples): warnings.filterwarnings('ignore') newData = dfc.get_latest_mta_dataframe() analyzedNewData = ag.analyze(newData, eps, min_samples) oldData = pd.read_csv(fileName) newTotal = pd.concat([analyzedNewData, oldData]) saturday = dfc.find_last_saturday_string() newName = 'data/dbscan_analysis_from_{}_to_{}.csv'.format( fileName.split('_')[2], saturday) print('Writing out to file: ', newName) newTotal.to_csv(newName)
def analyze_from_to_dbscan(fromDate, toDate): warnings.filterwarnings('ignore') data = dfc.get_data_between_two_dates(fromDate, toDate) dates = sorted(data['DATE'].unique().tolist()) data = data[data['DESC'] == 'REGULAR'] analyzedData = ag.analyze(data, 1300, 3) fileName = 'data/dbscan_analysis_from_{}_to_{}.csv'.format( dates[0], dates[-1]) analyzedData.to_csv(fileName) rides_per_day = dfc.find_total_rides_per_day(analyzedData) rides_per_day.to_csv( 'dbscan_total_rides_over_time_from_{}_to_{}.csv'.format( dates[0], dates[-1]))
def analyze_from_to_piecewise(fromDate, toDate): saturdays = dfc.get_saturday_list(fromDate, toDate) base = 'data_by_date/dbscan_analysis_{}.csv' for saturday in saturdays: data = dfc.get_data_from_date(saturday) analyzedData = ag.analyze(data, 1300, 3) analyzedData.to_csv(base.format(saturday)) allData = [pd.read_csv(base.format(s)) for s in saturdays] concatData = pd.concat(allData) concatData.sort_values(by='DATE', inplace=True) rides_over_time = create_rides_over_time_csv(concatData) dbm.connect_execute_rides_over_time(rides_over_time) concatData.to_csv( 'data_by_date/000_total_dbscan_analysis_{}_to_{}.csv'.format( saturdays[0], saturdays[-1]))
def analyze_with_different_dbscan_params(a, b, c, x, y, n): warnings.filterwarnings('ignore') rawData = dfc.get_n_latest_mta_dataframes(n) saturdays = dfc.find_saturday_dates_strings(n) iterations = (b - a) / c * (y - x) print('total DBSCAN param iterations:', iterations) current = 0 for eps in range(a, b, c): for min_samples in range(x, y): analyzedData = ag.analyze(rawData, eps=eps, min_samples=min_samples) fileName = 'data/dbscan_data_outputs/dbscan_eps={}_min_samples={}_from_{}_to_{}.csv'.format( eps, min_samples, saturdays[-1], saturdays[0]) analyzedData.to_csv(fileName) current += 1 print(current, '/', iterations)
def analyze_latest_dataframe_by_DBSCAN_(): data = dfc.get_latest_mta_dataframe() saturday = dfc.find_last_saturday_string() data = data[data['DESC'] == 'REGULAR'] analyzedData = ag.analyze(data, 1300, 3) analyzedData.to_csv('data/dbscan_analysis_{}.csv'.format(saturday))
def analyze_latest_dataframe_by_OPTICS(): data = dfc.get_latest_mta_dataframe() saturday = dfc.find_last_saturday_string() data = data[data['DESC'] == 'REGULAR'] analyzedData = ag.analyze_optics(data, 1300) analyzedData.to_csv('data/optics_analysis_{}.csv'.format(saturday))
def update_with_new_data(): raw_data = dfc.get_latest_mta_dataframe() analyzed_data = ag.analyze(raw_data, 1300, 3) analyzed_data.to_csv('data_by_date/dbscan_analysis_{}.csv'.format( dfc.find_last_saturday_string())) get_rides_over_time(analyzed_data)