예제 #1
0
def main():
    print 'now entered main'

    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '-I', help='input CSV file', required=True)
    parser.add_argument('-ts', '-TS', help='template start', required=True)
    parser.add_argument('-te', '-TE', help='template end', required=True)
    parser.add_argument('-rs', '-RS', help='rectangle stationary', required=True)
    parser.add_argument('-rt', '-RT', help='rectangle travel', required=True)
    parser.add_argument('-me', '-ME', help='marker end', required=True)
    parser.add_argument('-o', '-O', help='output path', required=True)

    args = parser.parse_args()

    input_file = args.i
    template_start = args.ts
    template_end = args.te
    rectangle_travel = args.rt
    rectangle_stationary = args.rs
    marker_end = args.me
    output_path = args.o

    cluster_service = gps_service()

    print 'arguments assigned variables'
    data = pr.getAllData(input_file)
    print 'keeping only app init and live listening'
    data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['99', '5', '6'])
    app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false'])
    listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true'])
    data = app_init_data + listening_data
    print 'done'
    per_participant_data = pr.getPerParticipantData(data)
    print 'per participant data extracted'
    participant_list = per_participant_data.keys()
    print participant_list
    min_data_sample_no = 5
    for pid in participant_list:
        print '\n\npid: ' + pid
        if len(per_participant_data[pid]) < min_data_sample_no:
            print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid'
            continue
        cluster_service.clean_house()
        cluster_service.set_pid(pid)
        cluster_service.set_participant_data(per_participant_data[pid])
        travel_clusters, stationary_clusters, stationary_cluster_boundaries, stationary_cluster_labels, \
        noise_markers, error_files, stationary_points = cluster_service.get_travelling_and_stationary_clusters()
        # bD.writecluster(pid, stationary_clusters, output_path, 'S', stationary_cluster_labels)
        # bD.writecluster(pid, noise_markers, output_path, 'N')
        # bD.writecluster(pid, travel_clusters, output_path, 'T')
        # bD.write_variable([stationary_cluster_boundaries, stationary_cluster_labels],
        #                   pid + '_cluster_boundary_label.data', output_path)
        bD.write_variable({'travel': travel_clusters,
                           'stationary': stationary_clusters,
                           'boundary': stationary_cluster_boundaries,
                           'label': stationary_cluster_labels,
                           'noise': noise_markers,
                           'points': stationary_points,
                           'data': per_participant_data[pid]}, pid+'_all_data.data', output_path)
        print 'writing clusters, done'
        plotcl.createclusterplot(output_path + '/' + pid + '.html', stationary_clusters, travel_clusters,
                                 noise_markers, rectangle_travel, rectangle_stationary, marker_end,
                                 template_start, template_end, stationary_cluster_labels)
        print 'plotted'
        print 'there was an error opening a few files, total number :' + str(error_files)
예제 #2
0
def main():
    print 'now entered main'

    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '-I', help='input CSV file', required=True)
    parser.add_argument('-o', '-O', help='output path', required=True)
    parser.add_argument('-c', '-C', help='cluster data path', required=False)

    args = parser.parse_args()

    input_file = args.i
    output_path = args.o
    cluster_data_path = args.c

    cluster_service = gps_service()

    print 'arguments assigned variables'
    data = pr.getAllData(input_file)
    print 'keeping only app init and live listening'
    data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['1', '2', '3', '4'])
    cluster_data_files = glob(cluster_data_path + '*.data')
    app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false'])
    listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true'])
    data = app_init_data + listening_data
    print 'done'
    per_participant_data = pr.getPerParticipantData(data)
    print 'per participant data extracted'
    participant_list = per_participant_data.keys()
    print participant_list
    min_data_sample_no = 5
    final_result = {}
    cluster_results = {}
    conf_mat_results = {}
    for pid in participant_list:
        print '\n\npid: ' + pid
        if len(per_participant_data[pid]) < min_data_sample_no:
            print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid'
            continue
        if cluster_data_path + pid + '_all_data.data' not in cluster_data_files:
            print 'could not find data file for pid: ', pid, ', skipping'
            continue
        final_result[pid] = {}
        cluster_results[pid] = {}
        cluster_service.clean_house()
        cluster_service.set_pid(pid)
        cluster_service.set_participant_data(per_participant_data[pid])
        with open(cluster_data_path+pid+'_all_data.data', 'rb') as f:
            data_dict = pickle.load(f)
            cluster_boundaries = data_dict['boundary']
            cluster_labels = data_dict['label']
            missing_gps = 0
        for data_sample in per_participant_data[pid]:
            n_pid = data_sample[SurveyConstants.PATIENT_ID]
            cid = data_sample[SurveyConstants.CONDITION_ID]
            sid = data_sample[SurveyConstants.SESSION_ID]
            if '' == data_sample[SurveyConstants.GPS_PATH]:
                #print 'empty gps file path, skipping \n', data_sample
                missing_gps += 1
                continue
            gps_coords_clean = pr.getcleangpsdata(data_sample[SurveyConstants.GPS_PATH], remove_duplicates=True,
                                                  pid=n_pid, cid=cid, sid=sid)
            if gps_coords_clean is None:
                print 'no GPS data for ', n_pid, cid, sid, ', skipping'
                continue
            travel_result = cluster_service.find_travelling(gps_coords_clean)
            final_result[pid][(n_pid, cid, sid)] = \
                [(LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                  len(gps_coords_clean))]
            cluster_results[pid][(n_pid, cid, sid)] = [
                (LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                 gps_coords_clean)]
            if travel_result[0]:
                if not 0 == len(travel_result[1]):
                    final_result[pid][(n_pid, cid, sid)].append(('Travel', len(travel_result[1])))
                    cluster_results[pid][(n_pid, cid, sid)].append(('Travel', (travel_result[1])))
                if not 0 == len(travel_result[2]):
                    cluster_decisions = gps.check_polygon_memberships(cluster_boundaries, travel_result[2])
                    cluster_vals = [sum(x) for x in cluster_decisions]
                    for idx in range(len(cluster_vals)):
                        if not 0 == cluster_vals[idx]:
                            final_result[pid][(n_pid, cid, sid)].append((cluster_labels[idx], cluster_vals[idx]))
                            cluster_results[pid][(n_pid, cid, sid)].append((cluster_labels[idx], travel_result[2],
                                                                            cluster_decisions[idx]))
        print 'confusion matrix: '
        conf_mat = {}
        for main_tuple in final_result[pid]:
            count_data = final_result[pid][main_tuple]
            actual_label = count_data[0][0]
            for count_idx in range(1, len(count_data)):
                predicted_label = count_data[count_idx][0]
                if (actual_label, predicted_label) not in conf_mat:
                    conf_mat[(actual_label, predicted_label)] = 0
                conf_mat[(actual_label, predicted_label)] += count_data[count_idx][1]
        print conf_mat
        conf_mat_results[pid] = conf_mat
        print 'Missing GPS: ', missing_gps

    print 'Writing results to file'
    bD.write_variable(final_result, 'count_result.res', output_path)
    bD.write_variable(cluster_results, 'cluster_results.res', output_path)
    bD.write_variable(conf_mat_results, 'conf_mat_results.res', output_path)
    print 'done'
    print 'TADAA!!'