percentage_gap_dict[d_gap]['total_trips_{0}'.format(d_gap)] = N percentage_gap_dict[d_gap]['covered_distance_{0}'.format(d_gap)] = total_distance percentage_gap_dict[d_gap]['number_chained_trips_{0}'.format(d_gap)] = 'N/A' return percentage_gap_dict headers = ["sub_id","day_7","chaining_distance_percentage_7","number_chained_trips_7","total_trips_7","number_isolated_chains_7","covered_distance_7", "day_28","chaining_distance_percentage_28","number_chained_trips_28","total_trips_28","number_isolated_chains_28","covered_distance_28", "day_90","chaining_distance_percentage_90","number_chained_trips_90","total_trips_90","number_isolated_chains_90","covered_distance_90"] #headers = ["sub_id","day_7","chaining_count_percentage_7","number_chained_trips_7","total_trips_7","number_isolated_chains_7","covered_distance_7", #"day_28","chaining_count_percentage_28","number_chained_trips_28","total_trips_28","number_isolated_chains_28","covered_distance_28", #"day_90","chaining_count_percentage_90","number_chained_trips_90","total_trips_90","number_isolated_chains_90","covered_distance_90"] dates = get_dates('2014-01-13') sav_file_timestamps = "../sav_files/axaie_data_for_production_timestamps_2014-01-13.sav" #sav_file_data = "../sav_files/production_output/axaie_data_for_production_just_timestamps_10029970.sav" timestamps = lsl.get_data(sav_file_timestamps) #data = lsl.get_data(sav_file_data) #get_percentages(data,timestamps['10029970'],dates) counter = 0 c = csv.writer(open("Lifesense-distance-chaining-percentage-2014-01-13_1.csv", "wb")) c.writerow(headers) total_counter = 0 root = "../sav_files/2014-01-13/" for path, subdirs, files in os.walk(root): for file in files: subid = file.split('_')[-1].split('.')[0] sav_file = "{0}{1}".format(path,file) data = lsl.get_data(sav_file) total_counter+=1 percentages = get_percentages(data,timestamps[subid],dates) if 7 in percentages:
#percentage_gap_dict[days_to_increment][day_with_gap]['percentage'] = lsl.get_percentage_chain(chaining_distances,gaps) percentage_gap_dict[days_to_increment][day_with_gap]['percentage'] = lsl.get_percentage_chain_counts(gaps,N) percentage_gap_dict[days_to_increment][day_with_gap]['gaps'] = dx_array if days_to_increment == 7: percentage_gap_dict[days_to_increment][day_with_gap]['number_of_trips'] = len(timestamps) return percentage_gap_dict contract ='axaie' ext = "../sav_files/subids_distances_counts/{0}/".format(contract) root = "../sav_files/subids_{0}/".format(contract) for path, subdirs, files in os.walk(root): counter = 0 number_of_files = len(files) for file in files: if counter%int(number_of_files/float(10))==0: print 100.0*counter/float(number_of_files),counter,float(number_of_files),int(number_of_files/float(100)) counter+=1 subid = file.split('.')[0].split('_')[-1] sav_file = "{0}{1}".format(path,file) data_subid= lsl.get_data(sav_file) distances = get_distances(data_subid,852.0) break #sav_file = '{0}{1}_for_axaie_data_analysis_files_{2}.sav'.format(ext,contract,subid) #with open(sav_file,'w') as g: # pickle.dump(distances,g)
number_of_fields = len(row) for header in headers: indices[header] = row.index(header) first_line = False else: subid = row[indices["subscription_id"]] trip_id = row[indices["trip_id"]] date = row[indices["start_time"]].split("T")[0] time_stamp = trip_id.split("-")[0] if subid not in Data: Data[subid] = OrderedDict() if date not in Data[subid]: Data[subid][date] = OrderedDict() if time_stamp not in Data[subid][date]: Data[subid][date][time_stamp] = {} for header in headers: if header not in ["subscription_id", "trip_id", "start_time"]: Data[subid][date][time_stamp][header] = float(row[indices[header]]) counter += 1 return Data # data = read_csv() # sav_file = "../sav_files/axaie_data_for_production.sav" # with open(sav_file,'w') as g: # pickle.dump(data,g) sav_file = "../sav_files/axaie_data_for_production.sav" data = lsl.get_data(sav_file) get_data(data, "2013-12-13")
number_of_fields = len(row) for header in headers: indices[header]= row.index(header) first_line = False else: subid = row[indices['subscription_id']] if subid not in data: data[subid] = 0.0 data[subid] = float(row[indices['score']]) return data def process_data(percentages,scores,days_to_chain,contract): counter = 0 for subid in scores: if subid in percentages: plt.scatter(percentages[subid][days_to_chain],scores[subid]) plt.xlim(0.0,100.0) plt.ylim(0.0,100.0) plt.xlabel('%Chaining_distance') plt.ylabel('Expert score') plt.title('{0} score correlation with chaining distance percentage {1}'.format(contract,days_to_chain)) plt.show() contract = "axaie" sav_ext = "../sav_files/score_correlation/" sav_file = '{0}{1}_percentagese_for_score_correlation.sav'.format(sav_ext,contract) csv_file = "../data/{0}-driver-reports-2013-12-13/all-long-term-profiles.csv".format(contract) days_to_chain = 90 percentages = lsl.get_data(sav_file) scores = grab_scores_from_csv(csv_file) process_data(percentages,scores,days_to_chain,contract)
indices = OrderedDict() number_of_fields = len(row) for header in headers: indices[header]= row.index(header) first_line = False else: subid = row[indices['subscription_id']] if subid not in data: data[subid] = 0.0 data[subid] = float(row[indices['score']]) return data sav_file = "../sav_files/phone_to_subid_dict.sav" phone_data = lsl.get_data(sav_file) sav_file_new_method = '../sav_files/score_correlation/autoline_percentagese_for_score_correlation.sav' percentages_autoline = lsl.get_data(sav_file_new_method) result = {} counter_dict = {} for subid in phone_data: phone = phone_data[subid].split()[0] if len(phone.split(','))>1: phone = "iPhone" if phone not in counter_dict: counter_dict[phone]=0 counter_dict[phone]+=1 if subid in percentages_autoline: