Exemple #1
0
def analyze_n_dataframes_by_DBSCAN(number):
    data = dfc.get_n_latest_mta_dataframes(number)
    saturdays = dfc.find_saturday_dates_strings(number)
    data = data[data['DESC'] == 'REGULAR']
    analyzedData = ag.analyze(data, 1300, 3)
    print(ag.count_negative_groups(analyzedData))
    analyzedData.to_csv('data/dbscan_analysis_{}_to_{}.csv'.format(
        saturdays[-1], saturdays[0]))
Exemple #2
0
def analyze_latest_dataframe_by_DBSCAN(station):
    data = dfc.get_latest_mta_dataframe()
    saturday = dfc.find_last_saturday_string()
    data = data[data['STATION'] == station]
    analyzedData = ag.analyze(data)
    analyzedData.to_csv('data/dbscan_analysis_of_{}_{}.csv'.format(
        station, saturday))
Exemple #3
0
def analyze_append_new_data_to_csv(fileName, eps, min_samples):
    warnings.filterwarnings('ignore')
    newData = dfc.get_latest_mta_dataframe()
    analyzedNewData = ag.analyze(newData, eps, min_samples)

    oldData = pd.read_csv(fileName)
    newTotal = pd.concat([analyzedNewData, oldData])

    saturday = dfc.find_last_saturday_string()
    newName = 'data/dbscan_analysis_from_{}_to_{}.csv'.format(
        fileName.split('_')[2], saturday)
    print('Writing out to file: ', newName)
    newTotal.to_csv(newName)
Exemple #4
0
def analyze_from_to_dbscan(fromDate, toDate):
    warnings.filterwarnings('ignore')
    data = dfc.get_data_between_two_dates(fromDate, toDate)
    dates = sorted(data['DATE'].unique().tolist())
    data = data[data['DESC'] == 'REGULAR']

    analyzedData = ag.analyze(data, 1300, 3)
    fileName = 'data/dbscan_analysis_from_{}_to_{}.csv'.format(
        dates[0], dates[-1])
    analyzedData.to_csv(fileName)

    rides_per_day = dfc.find_total_rides_per_day(analyzedData)
    rides_per_day.to_csv(
        'dbscan_total_rides_over_time_from_{}_to_{}.csv'.format(
            dates[0], dates[-1]))
Exemple #5
0
def analyze_from_to_piecewise(fromDate, toDate):
    saturdays = dfc.get_saturday_list(fromDate, toDate)
    base = 'data_by_date/dbscan_analysis_{}.csv'

    for saturday in saturdays:
        data = dfc.get_data_from_date(saturday)
        analyzedData = ag.analyze(data, 1300, 3)
        analyzedData.to_csv(base.format(saturday))

    allData = [pd.read_csv(base.format(s)) for s in saturdays]
    concatData = pd.concat(allData)
    concatData.sort_values(by='DATE', inplace=True)

    rides_over_time = create_rides_over_time_csv(concatData)
    dbm.connect_execute_rides_over_time(rides_over_time)
    concatData.to_csv(
        'data_by_date/000_total_dbscan_analysis_{}_to_{}.csv'.format(
            saturdays[0], saturdays[-1]))
Exemple #6
0
def analyze_with_different_dbscan_params(a, b, c, x, y, n):
    warnings.filterwarnings('ignore')
    rawData = dfc.get_n_latest_mta_dataframes(n)
    saturdays = dfc.find_saturday_dates_strings(n)
    iterations = (b - a) / c * (y - x)
    print('total DBSCAN param iterations:', iterations)
    current = 0

    for eps in range(a, b, c):
        for min_samples in range(x, y):
            analyzedData = ag.analyze(rawData,
                                      eps=eps,
                                      min_samples=min_samples)
            fileName = 'data/dbscan_data_outputs/dbscan_eps={}_min_samples={}_from_{}_to_{}.csv'.format(
                eps, min_samples, saturdays[-1], saturdays[0])
            analyzedData.to_csv(fileName)
            current += 1
            print(current, '/', iterations)
Exemple #7
0
def analyze_latest_dataframe_by_DBSCAN_():
    data = dfc.get_latest_mta_dataframe()
    saturday = dfc.find_last_saturday_string()
    data = data[data['DESC'] == 'REGULAR']
    analyzedData = ag.analyze(data, 1300, 3)
    analyzedData.to_csv('data/dbscan_analysis_{}.csv'.format(saturday))
Exemple #8
0
def analyze_latest_dataframe_by_OPTICS():
    data = dfc.get_latest_mta_dataframe()
    saturday = dfc.find_last_saturday_string()
    data = data[data['DESC'] == 'REGULAR']
    analyzedData = ag.analyze_optics(data, 1300)
    analyzedData.to_csv('data/optics_analysis_{}.csv'.format(saturday))
Exemple #9
0
def update_with_new_data():
    raw_data = dfc.get_latest_mta_dataframe()
    analyzed_data = ag.analyze(raw_data, 1300, 3)
    analyzed_data.to_csv('data_by_date/dbscan_analysis_{}.csv'.format(
        dfc.find_last_saturday_string()))
    get_rides_over_time(analyzed_data)