def main(): print 'now entered main' parser = argparse.ArgumentParser() parser.add_argument('-i', '-I', help='input CSV file', required=True) parser.add_argument('-ts', '-TS', help='template start', required=True) parser.add_argument('-te', '-TE', help='template end', required=True) parser.add_argument('-rs', '-RS', help='rectangle stationary', required=True) parser.add_argument('-rt', '-RT', help='rectangle travel', required=True) parser.add_argument('-me', '-ME', help='marker end', required=True) parser.add_argument('-o', '-O', help='output path', required=True) args = parser.parse_args() input_file = args.i template_start = args.ts template_end = args.te rectangle_travel = args.rt rectangle_stationary = args.rs marker_end = args.me output_path = args.o cluster_service = gps_service() print 'arguments assigned variables' data = pr.getAllData(input_file) print 'keeping only app init and live listening' data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['99', '5', '6']) app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false']) listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true']) data = app_init_data + listening_data print 'done' per_participant_data = pr.getPerParticipantData(data) print 'per participant data extracted' participant_list = per_participant_data.keys() print participant_list min_data_sample_no = 5 for pid in participant_list: print '\n\npid: ' + pid if len(per_participant_data[pid]) < min_data_sample_no: print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid' continue cluster_service.clean_house() cluster_service.set_pid(pid) cluster_service.set_participant_data(per_participant_data[pid]) travel_clusters, stationary_clusters, stationary_cluster_boundaries, stationary_cluster_labels, \ noise_markers, error_files, stationary_points = cluster_service.get_travelling_and_stationary_clusters() # bD.writecluster(pid, stationary_clusters, output_path, 'S', stationary_cluster_labels) # bD.writecluster(pid, noise_markers, output_path, 'N') # bD.writecluster(pid, travel_clusters, output_path, 'T') # bD.write_variable([stationary_cluster_boundaries, stationary_cluster_labels], # pid + '_cluster_boundary_label.data', output_path) bD.write_variable({'travel': travel_clusters, 'stationary': stationary_clusters, 'boundary': stationary_cluster_boundaries, 'label': stationary_cluster_labels, 'noise': noise_markers, 'points': stationary_points, 'data': per_participant_data[pid]}, pid+'_all_data.data', output_path) print 'writing clusters, done' plotcl.createclusterplot(output_path + '/' + pid + '.html', stationary_clusters, travel_clusters, noise_markers, rectangle_travel, rectangle_stationary, marker_end, template_start, template_end, stationary_cluster_labels) print 'plotted' print 'there was an error opening a few files, total number :' + str(error_files)
def main(): print 'now entered main' parser = argparse.ArgumentParser() parser.add_argument('-i', '-I', help='input CSV file', required=True) parser.add_argument('-o', '-O', help='output path', required=True) parser.add_argument('-c', '-C', help='cluster data path', required=False) args = parser.parse_args() input_file = args.i output_path = args.o cluster_data_path = args.c cluster_service = gps_service() print 'arguments assigned variables' data = pr.getAllData(input_file) print 'keeping only app init and live listening' data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['1', '2', '3', '4']) cluster_data_files = glob(cluster_data_path + '*.data') app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false']) listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true']) data = app_init_data + listening_data print 'done' per_participant_data = pr.getPerParticipantData(data) print 'per participant data extracted' participant_list = per_participant_data.keys() print participant_list min_data_sample_no = 5 final_result = {} cluster_results = {} conf_mat_results = {} for pid in participant_list: print '\n\npid: ' + pid if len(per_participant_data[pid]) < min_data_sample_no: print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid' continue if cluster_data_path + pid + '_all_data.data' not in cluster_data_files: print 'could not find data file for pid: ', pid, ', skipping' continue final_result[pid] = {} cluster_results[pid] = {} cluster_service.clean_house() cluster_service.set_pid(pid) cluster_service.set_participant_data(per_participant_data[pid]) with open(cluster_data_path+pid+'_all_data.data', 'rb') as f: data_dict = pickle.load(f) cluster_boundaries = data_dict['boundary'] cluster_labels = data_dict['label'] missing_gps = 0 for data_sample in per_participant_data[pid]: n_pid = data_sample[SurveyConstants.PATIENT_ID] cid = data_sample[SurveyConstants.CONDITION_ID] sid = data_sample[SurveyConstants.SESSION_ID] if '' == data_sample[SurveyConstants.GPS_PATH]: #print 'empty gps file path, skipping \n', data_sample missing_gps += 1 continue gps_coords_clean = pr.getcleangpsdata(data_sample[SurveyConstants.GPS_PATH], remove_duplicates=True, pid=n_pid, cid=cid, sid=sid) if gps_coords_clean is None: print 'no GPS data for ', n_pid, cid, sid, ', skipping' continue travel_result = cluster_service.find_travelling(gps_coords_clean) final_result[pid][(n_pid, cid, sid)] = \ [(LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]], len(gps_coords_clean))] cluster_results[pid][(n_pid, cid, sid)] = [ (LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]], gps_coords_clean)] if travel_result[0]: if not 0 == len(travel_result[1]): final_result[pid][(n_pid, cid, sid)].append(('Travel', len(travel_result[1]))) cluster_results[pid][(n_pid, cid, sid)].append(('Travel', (travel_result[1]))) if not 0 == len(travel_result[2]): cluster_decisions = gps.check_polygon_memberships(cluster_boundaries, travel_result[2]) cluster_vals = [sum(x) for x in cluster_decisions] for idx in range(len(cluster_vals)): if not 0 == cluster_vals[idx]: final_result[pid][(n_pid, cid, sid)].append((cluster_labels[idx], cluster_vals[idx])) cluster_results[pid][(n_pid, cid, sid)].append((cluster_labels[idx], travel_result[2], cluster_decisions[idx])) print 'confusion matrix: ' conf_mat = {} for main_tuple in final_result[pid]: count_data = final_result[pid][main_tuple] actual_label = count_data[0][0] for count_idx in range(1, len(count_data)): predicted_label = count_data[count_idx][0] if (actual_label, predicted_label) not in conf_mat: conf_mat[(actual_label, predicted_label)] = 0 conf_mat[(actual_label, predicted_label)] += count_data[count_idx][1] print conf_mat conf_mat_results[pid] = conf_mat print 'Missing GPS: ', missing_gps print 'Writing results to file' bD.write_variable(final_result, 'count_result.res', output_path) bD.write_variable(cluster_results, 'cluster_results.res', output_path) bD.write_variable(conf_mat_results, 'conf_mat_results.res', output_path) print 'done' print 'TADAA!!'