Python getAllData Examples

Programming Language: Python

Namespace/Package Name: preprocessing

Method/Function: getAllData

Examples at hotexamples.com: 2

Python getAllData - 2 examples found. These are the top rated real world Python examples of preprocessing.getAllData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: runMe.py Project: syedshabihhasan/gps_processing

def main():
    print 'now entered main'

    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '-I', help='input CSV file', required=True)
    parser.add_argument('-ts', '-TS', help='template start', required=True)
    parser.add_argument('-te', '-TE', help='template end', required=True)
    parser.add_argument('-rs', '-RS', help='rectangle stationary', required=True)
    parser.add_argument('-rt', '-RT', help='rectangle travel', required=True)
    parser.add_argument('-me', '-ME', help='marker end', required=True)
    parser.add_argument('-o', '-O', help='output path', required=True)

    args = parser.parse_args()

    input_file = args.i
    template_start = args.ts
    template_end = args.te
    rectangle_travel = args.rt
    rectangle_stationary = args.rs
    marker_end = args.me
    output_path = args.o

    cluster_service = gps_service()

    print 'arguments assigned variables'
    data = pr.getAllData(input_file)
    print 'keeping only app init and live listening'
    data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['99', '5', '6'])
    app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false'])
    listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true'])
    data = app_init_data + listening_data
    print 'done'
    per_participant_data = pr.getPerParticipantData(data)
    print 'per participant data extracted'
    participant_list = per_participant_data.keys()
    print participant_list
    min_data_sample_no = 5
    for pid in participant_list:
        print '\n\npid: ' + pid
        if len(per_participant_data[pid]) < min_data_sample_no:
            print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid'
            continue
        cluster_service.clean_house()
        cluster_service.set_pid(pid)
        cluster_service.set_participant_data(per_participant_data[pid])
        travel_clusters, stationary_clusters, stationary_cluster_boundaries, stationary_cluster_labels, \
        noise_markers, error_files, stationary_points = cluster_service.get_travelling_and_stationary_clusters()
        # bD.writecluster(pid, stationary_clusters, output_path, 'S', stationary_cluster_labels)
        # bD.writecluster(pid, noise_markers, output_path, 'N')
        # bD.writecluster(pid, travel_clusters, output_path, 'T')
        # bD.write_variable([stationary_cluster_boundaries, stationary_cluster_labels],
        #                   pid + '_cluster_boundary_label.data', output_path)
        bD.write_variable({'travel': travel_clusters,
                           'stationary': stationary_clusters,
                           'boundary': stationary_cluster_boundaries,
                           'label': stationary_cluster_labels,
                           'noise': noise_markers,
                           'points': stationary_points,
                           'data': per_participant_data[pid]}, pid+'_all_data.data', output_path)
        print 'writing clusters, done'
        plotcl.createclusterplot(output_path + '/' + pid + '.html', stationary_clusters, travel_clusters,
                                 noise_markers, rectangle_travel, rectangle_stationary, marker_end,
                                 template_start, template_end, stationary_cluster_labels)
        print 'plotted'
        print 'there was an error opening a few files, total number :' + str(error_files)

Example #2

Show file

File: runMe2.py Project: syedshabihhasan/gps_processing

def main():
    print 'now entered main'

    parser = argparse.ArgumentParser()

    parser.add_argument('-i', '-I', help='input CSV file', required=True)
    parser.add_argument('-o', '-O', help='output path', required=True)
    parser.add_argument('-c', '-C', help='cluster data path', required=False)

    args = parser.parse_args()

    input_file = args.i
    output_path = args.o
    cluster_data_path = args.c

    cluster_service = gps_service()

    print 'arguments assigned variables'
    data = pr.getAllData(input_file)
    print 'keeping only app init and live listening'
    data_to_use = pr.filtersurveydata(data, SurveyConstants.CONDITION_ID, ['1', '2', '3', '4'])
    cluster_data_files = glob(cluster_data_path + '*.data')
    app_init_data = pr.filtersurveydata(data_to_use, SurveyConstants.USER_INITIATED, ['false'])
    listening_data = pr.filtersurveydata(data_to_use, SurveyConstants.LISTENING, ['true'])
    data = app_init_data + listening_data
    print 'done'
    per_participant_data = pr.getPerParticipantData(data)
    print 'per participant data extracted'
    participant_list = per_participant_data.keys()
    print participant_list
    min_data_sample_no = 5
    final_result = {}
    cluster_results = {}
    conf_mat_results = {}
    for pid in participant_list:
        print '\n\npid: ' + pid
        if len(per_participant_data[pid]) < min_data_sample_no:
            print '# of samples < min_data_sample_no (' + str(min_data_sample_no) + '), skipping pid'
            continue
        if cluster_data_path + pid + '_all_data.data' not in cluster_data_files:
            print 'could not find data file for pid: ', pid, ', skipping'
            continue
        final_result[pid] = {}
        cluster_results[pid] = {}
        cluster_service.clean_house()
        cluster_service.set_pid(pid)
        cluster_service.set_participant_data(per_participant_data[pid])
        with open(cluster_data_path+pid+'_all_data.data', 'rb') as f:
            data_dict = pickle.load(f)
            cluster_boundaries = data_dict['boundary']
            cluster_labels = data_dict['label']
            missing_gps = 0
        for data_sample in per_participant_data[pid]:
            n_pid = data_sample[SurveyConstants.PATIENT_ID]
            cid = data_sample[SurveyConstants.CONDITION_ID]
            sid = data_sample[SurveyConstants.SESSION_ID]
            if '' == data_sample[SurveyConstants.GPS_PATH]:
                #print 'empty gps file path, skipping \n', data_sample
                missing_gps += 1
                continue
            gps_coords_clean = pr.getcleangpsdata(data_sample[SurveyConstants.GPS_PATH], remove_duplicates=True,
                                                  pid=n_pid, cid=cid, sid=sid)
            if gps_coords_clean is None:
                print 'no GPS data for ', n_pid, cid, sid, ', skipping'
                continue
            travel_result = cluster_service.find_travelling(gps_coords_clean)
            final_result[pid][(n_pid, cid, sid)] = \
                [(LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                  len(gps_coords_clean))]
            cluster_results[pid][(n_pid, cid, sid)] = [
                (LocationContext.LOCATION_CONTEXT_VALUES[data_sample[SurveyConstants.LOCATION_CONTEXT]],
                 gps_coords_clean)]
            if travel_result[0]:
                if not 0 == len(travel_result[1]):
                    final_result[pid][(n_pid, cid, sid)].append(('Travel', len(travel_result[1])))
                    cluster_results[pid][(n_pid, cid, sid)].append(('Travel', (travel_result[1])))
                if not 0 == len(travel_result[2]):
                    cluster_decisions = gps.check_polygon_memberships(cluster_boundaries, travel_result[2])
                    cluster_vals = [sum(x) for x in cluster_decisions]
                    for idx in range(len(cluster_vals)):
                        if not 0 == cluster_vals[idx]:
                            final_result[pid][(n_pid, cid, sid)].append((cluster_labels[idx], cluster_vals[idx]))
                            cluster_results[pid][(n_pid, cid, sid)].append((cluster_labels[idx], travel_result[2],
                                                                            cluster_decisions[idx]))
        print 'confusion matrix: '
        conf_mat = {}
        for main_tuple in final_result[pid]:
            count_data = final_result[pid][main_tuple]
            actual_label = count_data[0][0]
            for count_idx in range(1, len(count_data)):
                predicted_label = count_data[count_idx][0]
                if (actual_label, predicted_label) not in conf_mat:
                    conf_mat[(actual_label, predicted_label)] = 0
                conf_mat[(actual_label, predicted_label)] += count_data[count_idx][1]
        print conf_mat
        conf_mat_results[pid] = conf_mat
        print 'Missing GPS: ', missing_gps

    print 'Writing results to file'
    bD.write_variable(final_result, 'count_result.res', output_path)
    bD.write_variable(cluster_results, 'cluster_results.res', output_path)
    bD.write_variable(conf_mat_results, 'conf_mat_results.res', output_path)
    print 'done'
    print 'TADAA!!'