def mapSpelling(self, inputLine): words = inputLine.split(",") airportCodes = ["atl", "pek", "lhr", "ord", "hnd", "lax", "cdg", "dfw", "fra", "hkg", "den", "dxb", "cgk", "ams", "mad", "bkk", "jfk", "sin", "can", "las", "pvg", "sfo", "phx", "iah", "clt", "mia", "muc", "kul", "fco", "ist"] if words[2] in airportCodes and words[3] in airportCodes: return KVPair(words[0] + "," + words[1], words[2] + "," + words[3] + "," + words[4] + "," + words[5]) else: print(":: ERROR: Airport not found, removed from dataset: " + inputLine)
def redCalcFlightInfo(self, kvPairs): #kvPair.value column headers: # [0] = Passenger ID (XXXnnnnXXn) # [1] = Dept. airport code (XXX) # [2] = Arr. airport code (XXX) # [3] = Arr. airport code Departure time GMT (n [10] (This is using unix epoch time)) # [4] = Total Flight time (mins) (n [1..4]) retVal = "" # Split the first KVPair's value words = kvPairs[0].value.split(",") # Add airport codes to return value retVal += words[1] retVal += "," + words[2] # Add departure time to return value timestamp = datetime.fromtimestamp(int(words[3])) hms = str(timestamp).split(" ") retVal += "," + hms[1] # Add flight duration to the return value secs = int(words[4]) * 60 mins, seconds = divmod(secs, 60) hours, minutes = divmod(mins, 60) retVal += "," + (str(hours) + ":" + str(minutes) + ":" + str(seconds)) # Add arrival time to the return value arrivalTime = timestamp + timedelta( hours=hours, minutes=minutes, seconds=seconds) hms = str(arrivalTime).split(" ") retVal += "," + hms[1] # Add passenger IDs to return value passengers = "" for pair in kvPairs: values = pair.value.split(",") passengers += (";" + values[0]) retVal += "," + passengers return KVPair(kvPairs[0].key, retVal)
def redCalcFlightDistance(self, kvPairs): #kvPair.key column headers: # [0] = Passenger ID # [1] = Flight ID #kvPair.value column headers: # [0] = Dept. airport code (XXX) # [1] = Arr. airport code (XXX) # Open the airport information file and split it on new line airports = open("./inputFiles/Top30_airports_LatLong.csv") if not airports: print(":: Error, could not open airport info file") airportInfo = airports.read() airportInfo = airportInfo.split("\n") # Split the first KVPair's value kvPairValues = kvPairs[0].value.split(",") # Find the latitude and longitude of the airpots in use and calculate the distance between them deptLat = "" deptLong = "" arrLat = "" arrLong = "" for line in airportInfo[:-1]: info = line.split(",") if kvPairValues[0] in info[1].lower(): # print(":: Found departure airport!") deptLat = float(info[2]) deptLong = float(info[3]) if kvPairValues[1] in info[1].lower(): # print(":: Found destinartion airport!") arrLat = float(info[2]) arrLong = float(info[3]) if len(kvPairs) > 1: for pairs in kvPairs: print(pairs.key) print(pairs.value) distance = haversine(deptLat, deptLong, arrLat, arrLong) return KVPair(kvPairs[0].key, str(round(distance)))
def run(self): # Read the file, if there's a line then change everything in that line to # lowercase, and split on new line character, then run the mapper function on each of those lines # and set that array of mapped lines to the variable 'mapped' # Mapped will now be an array of arrays where each sub array is the result from mapping each line if not self.mapper: print(":: No mapper function set") return 0 else: inFile = open(self.inputFile) if not inFile: print(":: File could not be opened") # else: # print(":: File opened successfully") mapped = [ self.mapper(self, line) for line in inFile.read().lower().split("\n") if line ] # print(":: Found " + str(len(mapped)) + " lines") output = [] for pair in mapped: # print(type(pair)) if str(type(pair)) == "<class 'KVPair.KVPair'>": output.append(KVPair(pair.key, pair.value)) # else: # print(":: Not of class KVPair") return output
def mapMakePairs(self, inputLine): inputLine = inputLine.split(",") return KVPair(inputLine[0], inputLine[1])
def mapPassengerToFlight(self, inputLine): words = inputLine.split(",") return KVPair(words[1], words[0])
def redCountPassengers(self, kvPairs): cnt = 0 for pairs in kvPairs: cnt += 1 return KVPair(kvPairs[0].key, str(cnt))
def mapReOrder(self, inputLine): words = inputLine.split(",") return KVPair( words[1], str(words[0] + "," + words[2] + "," + words[3] + "," + words[4] + "," + words[5]))
def mapDistaces(self, inputLine): words = inputLine.split(",") return KVPair(words[1], words[2])
def redCountFlights(self, kvPairs): val = 0 for pairs in kvPairs: val += int(pairs.value) return KVPair(kvPairs[0].key, str(val))
def redWrite(self, kvPairs): return KVPair(kvPairs[0].key, kvPairs[0].value)
def redTotalPassengerDistance(self, kvPairs): distance = 0 for pairs in kvPairs: distance += int(pairs.value) return KVPair(kvPairs[0].key, str(distance))
def redDistances(self, kvPairs): return KVPair(kvPairs[0].key, kvPairs[0].value)
def mapTotalPassengerDistance(self, inputLine): words = inputLine.split(",") return KVPair(words[0], words[2])
def redUnusedAirports(self, kvPairs): return KVPair(kvPairs[0].key, "0")
def redUsedAirports(self, kvPairs): flightCodes = [pair.value for pair in kvPairs] # print(":: Found " + str(len(set(flightCodes))) + " from " + kvPairs[0].key) return KVPair(kvPairs[0].key, str(len(set(flightCodes))))
def mapDuplicates(self, inputLine): if stripErrors(inputLine): inputLine = inputLine.split(",") return KVPair(inputLine[0] + "," + inputLine[1], inputLine[2] + "," + inputLine[3] + "," + inputLine[4] + "," + inputLine[5]) else: print(":: ERROR: Input line not propely formed, removed from dataset: " + inputLine)
def mapUnusedAirports(self, inputLine): inputLine = inputLine.split(",") return KVPair(inputLine[1], "0")
def mapCalcFlightDistances(self, inputLine): words = inputLine.split(",") return KVPair(str(words[0] + "," + words[1]), str(words[2] + "," + words[3]))