Python getcleangpsdata Examples

Programming Language: Python

Namespace/Package Name: preprocessing

Method/Function: getcleangpsdata

Examples at hotexamples.com: 2

Python getcleangpsdata - 2 examples found. These are the top rated real world Python examples of preprocessing.getcleangpsdata extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: runMe2.py Project: syedshabihhasan/gps_processing

def main():
    print 'now entered main'

    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '-I', help='input CSV file', required=True)
    parser.add_argument('-o', '-O', help='output path', required=True)
    parser.add_argument('-c', '-C', help='cluster data path', required=False)

    args = parser.parse_args()

    input_file = args.i
    output_path = args.o
    cluster_data_path = args.c

    cluster_service = gps_service()

    print 'arguments assigned variables'
    data = pr.getAllData(input_file)
    print 'keeping only app init and live listening'
    data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['1', '2', '3', '4'])
    cluster_data_files = glob(cluster_data_path + '*.data')
    app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false'])
    listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true'])
    data = app_init_data + listening_data
    print 'done'
    per_participant_data = pr.getPerParticipantData(data)
    print 'per participant data extracted'
    participant_list = per_participant_data.keys()
    print participant_list
    min_data_sample_no = 5
    final_result = {}
    cluster_results = {}
    conf_mat_results = {}
    for pid in participant_list:
        print '\n\npid: ' + pid
        if len(per_participant_data[pid]) < min_data_sample_no:
            print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid'
            continue
        if cluster_data_path + pid + '_all_data.data' not in cluster_data_files:
            print 'could not find data file for pid: ', pid, ', skipping'
            continue
        final_result[pid] = {}
        cluster_results[pid] = {}
        cluster_service.clean_house()
        cluster_service.set_pid(pid)
        cluster_service.set_participant_data(per_participant_data[pid])
        with open(cluster_data_path+pid+'_all_data.data', 'rb') as f:
            data_dict = pickle.load(f)
            cluster_boundaries = data_dict['boundary']
            cluster_labels = data_dict['label']
            missing_gps = 0
        for data_sample in per_participant_data[pid]:
            n_pid = data_sample[SurveyConstants.PATIENT_ID]
            cid = data_sample[SurveyConstants.CONDITION_ID]
            sid = data_sample[SurveyConstants.SESSION_ID]
            if '' == data_sample[SurveyConstants.GPS_PATH]:
                #print 'empty gps file path, skipping \n', data_sample
                missing_gps += 1
                continue
            gps_coords_clean = pr.getcleangpsdata(data_sample[SurveyConstants.GPS_PATH], remove_duplicates=True,
                                                  pid=n_pid, cid=cid, sid=sid)
            if gps_coords_clean is None:
                print 'no GPS data for ', n_pid, cid, sid, ', skipping'
                continue
            travel_result = cluster_service.find_travelling(gps_coords_clean)
            final_result[pid][(n_pid, cid, sid)] = \
                [(LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                  len(gps_coords_clean))]
            cluster_results[pid][(n_pid, cid, sid)] = [
                (LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                 gps_coords_clean)]
            if travel_result[0]:
                if not 0 == len(travel_result[1]):
                    final_result[pid][(n_pid, cid, sid)].append(('Travel', len(travel_result[1])))
                    cluster_results[pid][(n_pid, cid, sid)].append(('Travel', (travel_result[1])))
                if not 0 == len(travel_result[2]):
                    cluster_decisions = gps.check_polygon_memberships(cluster_boundaries, travel_result[2])
                    cluster_vals = [sum(x) for x in cluster_decisions]
                    for idx in range(len(cluster_vals)):
                        if not 0 == cluster_vals[idx]:
                            final_result[pid][(n_pid, cid, sid)].append((cluster_labels[idx], cluster_vals[idx]))
                            cluster_results[pid][(n_pid, cid, sid)].append((cluster_labels[idx], travel_result[2],
                                                                            cluster_decisions[idx]))
        print 'confusion matrix: '
        conf_mat = {}
        for main_tuple in final_result[pid]:
            count_data = final_result[pid][main_tuple]
            actual_label = count_data[0][0]
            for count_idx in range(1, len(count_data)):
                predicted_label = count_data[count_idx][0]
                if (actual_label, predicted_label) not in conf_mat:
                    conf_mat[(actual_label, predicted_label)] = 0
                conf_mat[(actual_label, predicted_label)] += count_data[count_idx][1]
        print conf_mat
        conf_mat_results[pid] = conf_mat
        print 'Missing GPS: ', missing_gps

    print 'Writing results to file'
    bD.write_variable(final_result, 'count_result.res', output_path)
    bD.write_variable(cluster_results, 'cluster_results.res', output_path)
    bD.write_variable(conf_mat_results, 'conf_mat_results.res', output_path)
    print 'done'
    print 'TADAA!!'

Example #2

Show file

File: gpsService.py Project: syedshabihhasan/gps_processing

 def get_travelling_and_stationary_clusters(self, eps_list=range(20, 51, 10), min_sample_list=[3, 5, 7]):
     '''
     for each data point within the participant data, distinguish between the travelling, and non-travelling data.
     Once all the travelling clusters, and non-travelling points have been extracted perform the DBSCAN clustering
     on the non-travelling points to obtain the stationary clusters, and noise markers.
     :return:
     '''
     for data_sample in self.__participant_data:
         try:
             self.__internal_location_info[(data_sample[SurveyConstants.PATIENT_ID],
                                            data_sample[SurveyConstants.CONDITION_ID],
                                            data_sample[SurveyConstants.SESSION_ID])] = \
                 LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]]
             gps_coords_clean = pr.getcleangpsdata(data_sample[34], remove_duplicates=True,
                                                   pid=data_sample[0], cid=data_sample[1], sid=data_sample[2])
             if not gps_coords_clean:
                 continue
         except IOError:
             self.__error_files += 1
             continue
         # TODO: the speed limit has to be decided, are people walking also considered travelling?
         travel_result = self.find_travelling(gps_coords_clean)
         if travel_result[0]:
             # travel_clusters.append(gps_coords_clean)
             if not 0 == len(travel_result[1]):
                 self.__travel_clusters.append(travel_result[1])
             if not 0 == len(travel_result[2]):
                 self.__stationary_points += travel_result[2]
         else:
             self.__stationary_points += gps_coords_clean
     '''
     since all the stationary points are being collected for a given participants, the hull intersection functions
     never get called.
     '''
     print 'collected all points, clustering, eps_list:', eps_list, ', min_sample_list:', min_sample_list
     sc_nz = clusters.getdbscanclusters(self.__stationary_points, eps_list, min_sample_list)
     print 'done'
     if sc_nz is not None:
         if not ([] == sc_nz['sc']):
             self.__stationary_clusters = sc_nz['sc']
         if not ([] == sc_nz['nz']):
             self.__noise_markers = sc_nz['nz']
     print 'stationary clusters: ' + str(len(self.__stationary_clusters)) + ', travel clusters: ' + str(
         len(self.__travel_clusters))
     x = 0
     for cluster_points in self.__stationary_clusters:
         x += 1
         try:
             boundary_points = gps.getconvexhull(cluster_points)
         except:
             print 'Error getting the convex hull of the cluster. cluster #', x
             self.__stationary_cluster_label.append("Error,C-"+str(x))
             continue
         self.__stationary_cluster_boundaries.append(boundary_points)
         cluster_point_types = []
         for cluster_point in cluster_points:
             cluster_point_types.append(self.__internal_location_info[(cluster_point[-3],
                                                                       cluster_point[-2],
                                                                       cluster_point[-1])])
         label_counts = collections.Counter(cluster_point_types)
         most_common_label = label_counts.most_common(1)
         self.__stationary_cluster_label.append(most_common_label[0][0])
     return self.__travel_clusters, self.__stationary_clusters, self.__stationary_cluster_boundaries, \
            self.__stationary_cluster_label, self.__noise_markers, self.__error_files, self.__stationary_points