Esempio n. 1
0
def reducer(stream):
    airport_acc = {}
    for airport, count in mapred.iter_key_values(stream):
        airport_acc[airport] = airport_acc.get(airport, 0) + int(count)

    for airport, count in airport_acc.iteritems():
        mapred.send(airport, count)
Esempio n. 2
0
def mapper(stream):
    fields = ['DayOfWeek', 'ArrDelay']
    dow_map = [
        'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
        'Sunday'
    ]

    for dow, delay in mapred.iter_curated_fields(stream, fields):
        dow_str = dow_map[int(dow) - 1]
        mapred.send(dow_str, (delay, 1))
Esempio n. 3
0
def reducer_step1(stream):
    """This implements the first reduction step. The output are keyed against day/day-part for the
    specific KeySplitOutputFormat to split the files.
    """

    least_delay_legs = {}
    for key, value in mapred.iter_key_values(stream):
        _, origin, dest, delay, _, _, _ = value
        leg_key = (key, origin, dest)
        if leg_key not in least_delay_legs:
            least_delay_legs[leg_key] = value
        else:
            _, _, _, best_delay, _, _, _ = least_delay_legs[leg_key]
            if float(delay) < float(best_delay):
                least_delay_legs[leg_key] = value

    for (key, _, _), value in least_delay_legs.iteritems():
        mapred.send(key, value)
Esempio n. 4
0
def mapper_step1(stream):
    """This mapper produces the keys to be used for the first problem reduction step: selecting
    the single best flight for each day/day-part/origin/dest.
    """

    fields = [
        'FlightDate', 'Origin', 'Dest', 'ArrDelay', 'CRSDepTime',
        'UniqueCarrier', 'FlightNum'
    ]

    for date, origin, dest, delay, time, carrier, flight in mapred.iter_curated_fields(
            stream, fields):
        try:
            minute_of_day = int(time[:2]) * 60 + int(time[2:])
        except:
            pass
        else:
            period = 'AM' if minute_of_day < 12 * 60 else 'PM'
            mapred.send((date, period),
                        (date, origin, dest, delay, time, carrier, flight))
Esempio n. 5
0
def mapper(stream):
    fields = ['Origin', 'Dest', 'UniqueCarrier', 'ArrDelay']

    for origin, dest, carrier, delay in mapred.iter_curated_fields(
            stream, fields):
        mapred.send((origin, dest, carrier), (delay, 1))
Esempio n. 6
0
def reducer(stream):
    depdest_carr_mean_delay = mapred.mean_accumulator_reducer(stream)

    for (origin, dest,
         carrier), mean_count in depdest_carr_mean_delay.iteritems():
        mapred.send((origin, dest, carrier), mean_count)
Esempio n. 7
0
def mapper(stream):
    fields = ['Origin', 'Dest', 'DepDelay']

    for origin, dest, delay in mapred.iter_curated_fields(stream, fields):
        mapred.send((origin, dest), (delay, 1))
Esempio n. 8
0
def mapper(stream):
    fields = ['Origin', 'Dest']

    for origin, dest in mapred.iter_curated_fields(stream, fields):
        mapred.send(origin, 1)
        mapred.send(dest, 1)
Esempio n. 9
0
def reducer(stream):
    dow_mean_delay = mapred.mean_accumulator_reducer(stream)

    for dow, mean_count in dow_mean_delay.iteritems():
        mapred.send(dow, mean_count)
Esempio n. 10
0
def mapper(stream):
    fields = ['UniqueCarrier', 'ArrDelay']

    for carrier, delay in mapred.iter_curated_fields(stream, fields):
        mapred.send(carrier, (delay, 1))
Esempio n. 11
0
def reducer(stream):
    carrier_mean_delay = mapred.mean_accumulator_reducer(stream)

    for carrier, mean_count in carrier_mean_delay.iteritems():
        mapred.send(carrier, mean_count)