"TRIP_ID", "START_POINT", "GRID_POLYLINE", "TRUNC_GRID_POLYLINE" ], converters={ "START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x) }) # Loop through every trip to make a prediction for nr, partial_trip in validation.iterrows(): # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object posterior = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(train): # Only look at trips that start from the same position trips = chunk[chunk.START_CELL == partial_trip.GRID_POLYLINE[0]] for idx, row in trips.iterrows(): if match(end, row.GRID_POLYLINE, k): destination = row.GRID_POLYLINE[-1] posterior._table[destination] += 1 posterior.normalizeProbs() # Export the probabilities dests = [id_to_nr(dest) for dest in posterior]
for simulation in xrange(S): # Initialize the walker wlk = Walker(lattice = ltc, start = start_simulation, dest_threshold = 1) # Simulate a random walk and record the destination destinations.append(wlk.simulateWalker()) # Use pandas facilities table = pd.DataFrame({"DEST": pd.Series(destinations)}) table["COUNT"] = 1 prob_table = table.groupby(["DEST"], as_index = False).aggregate({"COUNT": "sum"}) prob_table.COUNT = prob_table.COUNT / np.sum(prob_table.COUNT) # Store the probabilities in a DestinationGrid object posterior = DestinationGrid(N, M) for idx, row in prob_table.iterrows(): posterior.setProb(row.DEST, row.COUNT) # Plot the new distribution plt.subplot(1,2,1) plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest") plt.title("Complete trip superimposed") plt.subplot(1,2,2) plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation = "nearest") plt.title("Partial trip superimposed")
# Initialize the walker wlk = Walker(lattice=ltc, start=start_simulation, dest_threshold=1) # Simulate a random walk and record the destination destinations.append(wlk.simulateWalker()) # Use pandas facilities table = pd.DataFrame({"DEST": pd.Series(destinations)}) table["COUNT"] = 1 prob_table = table.groupby(["DEST"], as_index=False).aggregate({"COUNT": "sum"}) prob_table.COUNT = prob_table.COUNT / np.sum(prob_table.COUNT) # Store the probabilities in a DestinationGrid object posterior = DestinationGrid(N, M) for idx, row in prob_table.iterrows(): posterior.setProb(row.DEST, row.COUNT) # Plot the new distribution plt.subplot(1, 2, 1) plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation="nearest") plt.title("Complete trip superimposed") plt.subplot(1, 2, 2) plt.imshow(np.log(posterior.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation="nearest")
# Import the validation data validation = pd.read_csv(filepath_or_buffer = filepath_val, sep = ",", usecols = ["TRIP_ID", "START_POINT", "GRID_POLYLINE", "TRUNC_GRID_POLYLINE"], converters = {"START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x)}) # Loop through every trip to make a prediction for nr, partial_trip in validation.iterrows(): # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object posterior = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(train): # Only look at trips that start from the same position trips = chunk[chunk.START_CELL == partial_trip.GRID_POLYLINE[0]] for idx, row in trips.iterrows(): if match(end, row.GRID_POLYLINE, k): destination = row.GRID_POLYLINE[-1] posterior._table[destination] += 1 posterior.normalizeProbs() # Export the probabilities dests = [id_to_nr(dest) for dest in posterior]
{ #"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x), "POLYLINE": lambda x: json.loads(x), "START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x) }) # Select a partial trip partial_trip = test.loc[0] # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object grid = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(trips): for idx, row in chunk.iterrows(): if match(end, row.GRID_POLYLINE, k): dest = row.GRID_POLYLINE[-1] grid._table[dest] += 1 print "Processed chunk %d" % i grid.normalizeProbs() # Plot the new distribution plt.subplot(1, 2, 1) plt.imshow(np.log(grid.as_array() +
trips["MATCH"] = False # Match this segment with trips in the training set for idx, row in trips.iterrows(): if match(end, row.GRID_POLYLINE, k): trips.set_value(idx, 'MATCH', True) # We are only really interested in the matched trips now trips = trips[trips.MATCH] # Compute the destination distribution trips_agg = trips.groupby(["DEST_CELL"], as_index=False).aggregate({"MATCH": "sum"}) trips_agg.MATCH = trips_agg.MATCH / np.sum(trips_agg.MATCH) # Store it in a DestinationGrid object grid = DestinationGrid(N, M) grid.setProbs(trips_agg.DEST_CELL.values, trips_agg.MATCH.values) # Plot the new distribution plt.subplot(1, 2, 1) plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation="nearest") plt.title("Complete trip superimposed") plt.subplot(1, 2, 2) plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation="nearest") plt.title("Partial trip superimposed")
nrows = 10, converters = {#"TIMESTAMP" : lambda x: datetime.datetime.fromtimestamp(x), "POLYLINE": lambda x: json.loads(x), "START_POINT": lambda x: eval(x), "GRID_POLYLINE": lambda x: eval(x), "TRUNC_POLYLINE": lambda x: eval(x), "TRUNC_GRID_POLYLINE": lambda x: eval(x)}) # Select a partial trip partial_trip = test.loc[0] # Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a DestinationGrid object grid = DestinationGrid(N, M) # Match this segment with trips in the training set for i, chunk in enumerate(trips): for idx, row in chunk.iterrows(): if match(end, row.GRID_POLYLINE, k): dest = row.GRID_POLYLINE[-1] grid._table[dest] += 1 print "Processed chunk %d" % i grid.normalizeProbs() # Plot the new distribution plt.subplot(1,2,1) plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest")
# Select the last segment of the partial trip end = partial_trip.TRUNC_GRID_POLYLINE[-k:] # Create a dummy flag to indicate if a trip matches or not trips["MATCH"] = False # Match this segment with trips in the training set for idx, row in trips.iterrows(): if match(end, row.GRID_POLYLINE, k): trips.set_value(idx, 'MATCH', True) # We are only really interested in the matched trips now trips = trips[trips.MATCH] # Compute the destination distribution trips_agg = trips.groupby(["DEST_CELL"], as_index = False).aggregate({"MATCH": "sum"}) trips_agg.MATCH = trips_agg.MATCH / np.sum(trips_agg.MATCH) # Store it in a DestinationGrid object grid = DestinationGrid(N, M) grid.setProbs(trips_agg.DEST_CELL.values, trips_agg.MATCH.values) # Plot the new distribution plt.subplot(1,2,1) plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.GRID_POLYLINE, N, M)), interpolation = "nearest") plt.title("Complete trip superimposed") plt.subplot(1,2,2) plt.imshow(np.log(grid.as_array() + trip_to_array(partial_trip.TRUNC_GRID_POLYLINE, N, M)), interpolation = "nearest") plt.title("Partial trip superimposed")