Example #1
0
    else:
        model_suffix = time.strftime("%d_%m_%Y")

    directory = 'model_' + model_suffix
    if not os.path.exists(directory):
        os.makedirs(directory)

    ##########################
    ###   Load the data
    ##########################
    if args.input_data:
        print('Loading data from {0}'.format(args.input_data))
        df = pd.read_csv(args.input_data,
                         parse_dates='Full_date',
                         low_memory=False)
        df['Full_date'] = rd.date_lookup(df['Full_date'])
    else:
        print('Reading and loading data. Saving to {}'.format(directory +
                                                              '/all_data.csv'))
        df = rd.read_data(read_weather_station=False,
                          read_water_sensor=False,
                          add_each_beach_data=True)
        df.to_csv(directory + '/all_data.csv', index=False)

    ###############################
    ###   Prepare Predictors
    ###############################
    if args.input_processed:
        print('Using Preprocessed data from {0} and {1}'.format(
            args.input_processed, args.input_meta))
        datafilename = args.input_processed
Example #2
0
if __name__ == '__main__':
    # Command Line Argument parsing
    parser = argparse.ArgumentParser(description='Process beach data.')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        metavar='input_filename',
                        help='input CSV filename')
    parser.add_argument('-v', '--verbose', action='count', default=0)

    args = parser.parse_args()

    # Load the data
    if args.input:
        df = pd.read_csv(args.input, parse_dates='Full_date')
        df['Full_date'] = rd.date_lookup(df['Full_date'])
    else:
        df = rd.read_data(read_weather_station=False, read_water_sensor=False)

    # Extract the EPA (technically USGS) model performance from the data
    epa_model_df = df[['Drek_Prediction', 'Escherichia.coli']].dropna()

    # Prepare the data
    predictors, meta_info = prepare_data(df)
    timestamps = meta_info['Full_date']
    classes = meta_info['Escherichia.coli'] > 235

    print('Using the following columns as predictors:')
    for c in predictors.columns:
        print('\t' + str(c))
    if args.suffix:
        model_suffix = args.suffix
    else:
        model_suffix = time.strftime("%d_%m_%Y")
    
    directory = 'model_'+model_suffix    
    if not os.path.exists(directory):
        os.makedirs(directory)

    ##########################    
    ###   Load the data   
    ##########################
    if args.input_data:
        print('Loading data from {0}'.format(args.input_data))
        df = pd.read_csv(args.input_data, parse_dates='Full_date', low_memory=False)
        df['Full_date'] = rd.date_lookup(df['Full_date'])
    else:
        print('Reading and loading data. Saving to {}'.format(directory+'/all_data.csv'))        
        df = rd.read_data(read_weather_station=False, read_water_sensor=False, add_each_beach_data=True)
        df.to_csv(directory+'/all_data.csv', index=False)
               
    ###############################   
    ###   Prepare Predictors  
    ###############################
    if args.input_processed:
        print('Using Preprocessed data from {0} and {1}'.format(args.input_processed, args.input_meta ))
        datafilename = args.input_processed
        metadatafilename = args.input_meta
        data_processed = pd.read_csv(datafilename)
        meta_info = pd.read_csv(metadatafilename, parse_dates='Full_date')      
        meta_info['Full_date'] =  rd.date_lookup(meta_info['Full_date'])
    return predictors, meta_info


if __name__ == '__main__':
    # Command Line Argument parsing
    parser = argparse.ArgumentParser(description='Process beach data.')
    parser.add_argument('-i', '--input', type=str,
                        metavar='input_filename', help='input CSV filename')
    parser.add_argument('-v', '--verbose', action='count', default=0)

    args = parser.parse_args()

    # Load the data
    if args.input:
        df = pd.read_csv(args.input, parse_dates='Full_date')
        df['Full_date'] = rd.date_lookup(df['Full_date'])
    else:
        df = rd.read_data(read_weather_station=False, read_water_sensor=False)

    # Extract the EPA (technically USGS) model performance from the data
    epa_model_df = df[['Drek_Prediction', 'Escherichia.coli']].dropna()

    # Prepare the data
    predictors, meta_info = prepare_data(df)
    timestamps = meta_info['Full_date']
    classes = meta_info['Escherichia.coli'] > 235

    print('Using the following columns as predictors:')
    for c in predictors.columns:
        print('\t' + str(c))