コード例 #1
0
ファイル: MapReduce.py プロジェクト: kanwalbir/nyc_subway
    def execute(self, data, mapper, reducer, col_num):

        # Iterate over all the files
        for files in data:
            csv_file_object, date_range, column_names = read_header(files)

            # Only consider recent data files which include Student fare data
            if 'STUDENTS' in column_names:
                for row in csv_file_object:
                    mapper(row, col_num) # Call the mapper

        for key in self.intermediate:
            reducer(key, self.intermediate[key]) # Call the reducer

        # Sort the result by station with largest number of fares
        self.result.sort(key=lambda tup: tup[1], reverse=True)
        
        return self.result

#-----------------------------------------------------------------------------#
コード例 #2
0
ファイル: combine_data.py プロジェクト: kanwalbir/nyc_subway
def combine_data():
    
    # Create a new combined csv master file, with a header row
    combo_file = csv.writer(open('fares_combined.csv', 'wb'))
    header = ['DATE RANGE', 'REMOTE', ' STATION', 'FF', 'SEN/DIS', ' 7-D AFAS UNL', \
                '30-D AFAS/RMF UNL', 'JOINT RR TKT', '7-D UNL', '30-D UNL', '14-D RFM UNL', \
                '1-D UNL', '14-D UNL', '7D-XBUS PASS', 'TCMC', 'LIB SPEC SEN/RF 2 TRIP', \
                'RR UNL NO TRADE', 'TCMC ANNUAL MC', 'MR EZPAY EXP', 'MR EZPAY UNL', 'PATH 2-T', \
                'AIRTRAIN FF', 'AIRTRAIN 30-D', 'AIRTRAIN 10-T', 'AIRTRAIN MTHLY', 'STUDENTS']
    combo_file.writerow(header)

    # Capture list of files in the fare_data folder
    data_files = os.listdir('fare_data')
    for files in data_files:
        csv_file_object, date_range, column_names = read_header(files)

        # Only consider recent data files which include Student fare data
        if 'STUDENTS' in column_names:
            for row in csv_file_object:
                row.insert(0, date_range)           # Insert date range as first column
                combo_file.writerow(row[:-1])

    print "All files combined, please check the 'fares_combined.csv' in current directory."