def label_emission_data(file_to_label, **kwargs): radius = kwargs['radius'] if 'radius' in kwargs else RADIUS filename = kwargs['filename'] if 'filename' in kwargs else None window = kwargs['window'] if 'window' in kwargs else WINDOW n = kwargs['n'] if 'n' in kwargs else N distance_only = kwargs[ 'distance_only'] if 'distance_only' in kwargs else False tmpfile = 'matched_files/Rental2Youssef_matched.csv' #viterbi.run_viterbi(file_to_label,filename=tmpfile, radius=radius, window=window,n=n) observations = read_observations(file_to_label) results = read_resulting_path(tmpfile) labeled = [] for t, obs in enumerate(observations): possible_segments, prob, point = emission_probability.compute_emission_probabilities( obs, radius, n) for i, seg in enumerate(possible_segments): start_node = get_node_id(seg['way_osm_id'], seg['index_in_way']) end_node = get_node_id(seg['way_osm_id'], seg['index_in_way'] + 1) node_ids = (start_node[0], end_node[0]) node_ids2 = (end_node[0], start_node[0]) if ((node_ids == results[t]) or (node_ids2 == results[t])): if (distance_only): labeled.append(seg['distance']) else: labeled.append((node_ids, seg['distance_score'], seg['tangent_score'], 1)) else: if (distance_only == False): labeled.append((node_ids, seg['distance_score'], seg['tangent_score'], -1)) if filename is not None: with open(filename, 'w') as f: f.write('Label, Distance Score, Tangent Score \n') for l in labeled: if (distance_only): f.write(str(l) + '\n') else: f.write( str(l[3]) + ', ' + str(l[1]) + ', ' + str(l[2]) + '\n') return return labeled
def label_emission_data(file_to_label, **kwargs): radius = kwargs['radius'] if 'radius' in kwargs else RADIUS filename = kwargs['filename'] if 'filename' in kwargs else None window = kwargs['window'] if 'window' in kwargs else WINDOW n = kwargs['n'] if 'n' in kwargs else N distance_only = kwargs['distance_only'] if 'distance_only' in kwargs else False tmpfile = 'matched_files/Rental2Youssef_matched.csv' #viterbi.run_viterbi(file_to_label,filename=tmpfile, radius=radius, window=window,n=n) observations = read_observations(file_to_label) results = read_resulting_path(tmpfile) labeled =[] for t, obs in enumerate(observations): possible_segments, prob, point = emission_probability.compute_emission_probabilities(obs,radius,n) for i,seg in enumerate(possible_segments): start_node = get_node_id(seg['way_osm_id'],seg['index_in_way']) end_node = get_node_id(seg['way_osm_id'],seg['index_in_way']+1) node_ids=(start_node[0],end_node[0]) node_ids2 = (end_node[0],start_node[0]) if ((node_ids == results[t]) or (node_ids2 == results[t])): if (distance_only): labeled.append(seg['distance']) else: labeled.append((node_ids,seg['distance_score'],seg['tangent_score'], 1)) else: if (distance_only == False): labeled.append((node_ids,seg['distance_score'],seg['tangent_score'], -1)) if filename is not None: with open(filename, 'w') as f: f.write('Label, Distance Score, Tangent Score \n') for l in labeled: if (distance_only): f.write(str(l) + '\n') else: f.write(str(l[3]) + ', ' + str(l[1]) +', '+str(l[2]) + '\n') return return labeled
def viterbi(observations, **kwargs): TRANSITION_PROBS = [] radius = kwargs['radius'] if 'radius' in kwargs else RADIUS filename = kwargs['filename'] if 'filename' in kwargs else None window = kwargs['window'] if 'window' in kwargs else WINDOW n = kwargs['n'] if 'n' in kwargs else N return_gps = kwargs['return_gps'] if 'return_gps' in kwargs else False print 'Running viterbi. window size: {0}, max states {1}, max radius {2}'.format(window,n,radius) result_sequence = [] segments_table = [] probabilities_table = [] segments, emission_probabilities, point = compute_emission_probabilities(observations[0],radius, n) for i, segment in enumerate(segments): segments[i]['previous'] = None segments[i]['direction'] = None segments_table.append(segments) probabilities_table.append(emission_probabilities) for window_idx in range(len(observations) / window + 1): current_obs = observations[window_idx*window:(window_idx+1)*window] if (len(current_obs) == 0): break for t, obs in enumerate(current_obs): TRANSITION_PROBS.append([]) if t == 0: continue previous_point = point segments, emission_probabilities, point = compute_emission_probabilities(obs, radius, n) transition_probabilities = compute_transition_probabilities_training(previous_point, point, segments_table[t-1], segments, window_idx*window+t, TRANSITION_PROBS) segments_table.append([]) probabilities_table.append([]) for i, emission_probability in enumerate(emission_probabilities): candidates = [] for j, previous_probability in enumerate(probabilities_table[t-1]): candidates.append(previous_probability * transition_probabilities[j][i] * emission_probability) idx, highest_probability = max(enumerate(candidates), key=lambda x: x[1]) probabilities_table[t].append(highest_probability) segments[i]['previous'] = idx segments[i]['direction'] = utils.calculate_direction(segments_table[t-1][idx], segments[i]) segments_table[t].append(segments[i]) last_idx, last_val = max(enumerate(probabilities_table[t]), key=lambda x: x[1]) idx = last_idx intermediate_result = [] for _t in range(len(current_obs))[::-1]: cur = segments_table[_t][idx] intermediate_result.append(cur) if _t != 0: idx = cur['previous'] probabilities_table = [[1]] segments_table = [[segments_table[t][last_idx]]] result_sequence = result_sequence + intermediate_result[::-1] for t, cur in enumerate(result_sequence): if t == 0 or not TRANSITION_PROBS[t]: continue prev = result_sequence[t-1] prev_str = '{0},{1}'.format(prev['way_osm_id'], prev['index_in_way']) cur_str = '{0},{1}'.format(cur['way_osm_id'], cur['index_in_way']) TRANSITION_PROBS[t] = {prev_str : TRANSITION_PROBS[t][prev_str]} TRANSITION_PROBS[t][prev_str][cur_str][2] = 1 return TRANSITION_PROBS
def viterbi(observations, **kwargs): radius = kwargs['radius'] if 'radius' in kwargs else RADIUS filename = kwargs['filename'] if 'filename' in kwargs else None window = kwargs['window'] if 'window' in kwargs else WINDOW n = kwargs['n'] if 'n' in kwargs else N return_gps = kwargs['return_gps'] if 'return_gps' in kwargs else False print 'Running viterbi. window size: {0}, max states {1}, max radius {2}'.format(window,n,radius) result_sequence = [] segments_table = [] probabilities_table = [] segments, emission_probabilities, point = compute_emission_probabilities(observations[0],radius, n) for i, segment in enumerate(segments): segments[i]['previous'] = None segments[i]['direction'] = None segments_table.append(segments) probabilities_table.append(emission_probabilities) for window_idx in range(len(observations) / window + 1): current_obs = observations[window_idx*window:(window_idx+1)*window] if (len(current_obs) == 0): break for t, obs in enumerate(current_obs): if t == 0: continue previous_point = point segments, emission_probabilities, point = compute_emission_probabilities(obs, radius, n) transition_probabilities = compute_transition_probabilities(previous_point, point, segments_table[t-1], segments) segments_table.append([]) probabilities_table.append([]) for i, emission_probability in enumerate(emission_probabilities): candidates = [] for j, previous_probability in enumerate(probabilities_table[t-1]): candidates.append(previous_probability * transition_probabilities[j][i] * emission_probability) idx, highest_probability = max(enumerate(candidates), key=lambda x: x[1]) probabilities_table[t].append(highest_probability) segments[i]['previous'] = idx segments[i]['direction'] = utils.calculate_direction(segments_table[t-1][idx], segments[i]) segments_table[t].append(segments[i]) last_idx, last_val = max(enumerate(probabilities_table[t]), key=lambda x: x[1]) idx = last_idx intermediate_result = [] for _t in range(len(current_obs))[::-1]: cur = segments_table[_t][idx] intermediate_result.append(cur) if _t != 0: idx = cur['previous'] probabilities_table = [[1]] segments_table = [[segments_table[t][last_idx]]] result_sequence = result_sequence + intermediate_result[::-1] if return_gps: node_gps = utils.get_node_gps_points(result_sequence) start_points = ['{0},{1}'.format(point[0][0], point[0][1]) for point in node_gps] end_points = ['{0},{1}'.format(point[1][0], point[1][1]) for point in node_gps] with open('result_nodes.csv', 'w') as resf: for i, point in enumerate(start_points): resf.write(point+'\n') resf.write(end_points[i]+'\n') return node_ids = utils.get_node_ids(result_sequence) if filename is not None: utils.write_to_file(node_ids, filename) return return node_ids