percentage_gap_dict[d_gap]['total_trips_{0}'.format(d_gap)] = N
            percentage_gap_dict[d_gap]['covered_distance_{0}'.format(d_gap)] = total_distance
            percentage_gap_dict[d_gap]['number_chained_trips_{0}'.format(d_gap)] = 'N/A'
    return percentage_gap_dict

headers = ["sub_id","day_7","chaining_distance_percentage_7","number_chained_trips_7","total_trips_7","number_isolated_chains_7","covered_distance_7",
"day_28","chaining_distance_percentage_28","number_chained_trips_28","total_trips_28","number_isolated_chains_28","covered_distance_28",
"day_90","chaining_distance_percentage_90","number_chained_trips_90","total_trips_90","number_isolated_chains_90","covered_distance_90"]    
#headers = ["sub_id","day_7","chaining_count_percentage_7","number_chained_trips_7","total_trips_7","number_isolated_chains_7","covered_distance_7",
#"day_28","chaining_count_percentage_28","number_chained_trips_28","total_trips_28","number_isolated_chains_28","covered_distance_28",
#"day_90","chaining_count_percentage_90","number_chained_trips_90","total_trips_90","number_isolated_chains_90","covered_distance_90"]                  
dates = get_dates('2014-01-13')
sav_file_timestamps = "../sav_files/axaie_data_for_production_timestamps_2014-01-13.sav"
#sav_file_data       = "../sav_files/production_output/axaie_data_for_production_just_timestamps_10029970.sav"

timestamps = lsl.get_data(sav_file_timestamps)
#data       = lsl.get_data(sav_file_data)
#get_percentages(data,timestamps['10029970'],dates)
counter = 0
c = csv.writer(open("Lifesense-distance-chaining-percentage-2014-01-13_1.csv", "wb"))
c.writerow(headers)
total_counter = 0
root = "../sav_files/2014-01-13/"
for path, subdirs, files in os.walk(root):
    for file in files:
        subid = file.split('_')[-1].split('.')[0]
        sav_file    = "{0}{1}".format(path,file)
        data        = lsl.get_data(sav_file)
        total_counter+=1
        percentages = get_percentages(data,timestamps[subid],dates)
        if 7 in percentages:
                    #percentage_gap_dict[days_to_increment][day_with_gap]['percentage'] = lsl.get_percentage_chain(chaining_distances,gaps)
                    percentage_gap_dict[days_to_increment][day_with_gap]['percentage'] = lsl.get_percentage_chain_counts(gaps,N)
                    percentage_gap_dict[days_to_increment][day_with_gap]['gaps'] = dx_array
                    if days_to_increment == 7:
                        percentage_gap_dict[days_to_increment][day_with_gap]['number_of_trips'] = len(timestamps) 

    return percentage_gap_dict

contract ='axaie'
ext = "../sav_files/subids_distances_counts/{0}/".format(contract)
root = "../sav_files/subids_{0}/".format(contract)
for path, subdirs, files in os.walk(root):
    counter = 0
    number_of_files = len(files)
    for file in files:
        if counter%int(number_of_files/float(10))==0:
            print 100.0*counter/float(number_of_files),counter,float(number_of_files),int(number_of_files/float(100))
        counter+=1
        subid     = file.split('.')[0].split('_')[-1]
        sav_file  = "{0}{1}".format(path,file)
        data_subid= lsl.get_data(sav_file)
        distances = get_distances(data_subid,852.0)
        break
        #sav_file = '{0}{1}_for_axaie_data_analysis_files_{2}.sav'.format(ext,contract,subid)
        #with open(sav_file,'w') as g:
        #       pickle.dump(distances,g)
        
        
    

                number_of_fields = len(row)
                for header in headers:
                    indices[header] = row.index(header)
                first_line = False
            else:
                subid = row[indices["subscription_id"]]
                trip_id = row[indices["trip_id"]]
                date = row[indices["start_time"]].split("T")[0]
                time_stamp = trip_id.split("-")[0]
                if subid not in Data:
                    Data[subid] = OrderedDict()

                if date not in Data[subid]:
                    Data[subid][date] = OrderedDict()
                if time_stamp not in Data[subid][date]:
                    Data[subid][date][time_stamp] = {}
                for header in headers:
                    if header not in ["subscription_id", "trip_id", "start_time"]:
                        Data[subid][date][time_stamp][header] = float(row[indices[header]])
                counter += 1
    return Data


# data = read_csv()
# sav_file = "../sav_files/axaie_data_for_production.sav"
# with open(sav_file,'w') as g:
#       pickle.dump(data,g)
sav_file = "../sav_files/axaie_data_for_production.sav"
data = lsl.get_data(sav_file)
get_data(data, "2013-12-13")
                number_of_fields = len(row)
                for header in headers:
                    indices[header]= row.index(header)
                first_line = False
            else:
                subid      = row[indices['subscription_id']]
                if subid not in data:
                    data[subid] = 0.0
                data[subid] = float(row[indices['score']])        
    return data
    
def process_data(percentages,scores,days_to_chain,contract):
    counter = 0
    for subid in scores:
        if subid in percentages:
            plt.scatter(percentages[subid][days_to_chain],scores[subid])
    plt.xlim(0.0,100.0)
    plt.ylim(0.0,100.0)
    plt.xlabel('%Chaining_distance')
    plt.ylabel('Expert score')
    plt.title('{0} score correlation with chaining distance percentage {1}'.format(contract,days_to_chain))
    plt.show()
contract = "axaie"
sav_ext = "../sav_files/score_correlation/"
sav_file = '{0}{1}_percentagese_for_score_correlation.sav'.format(sav_ext,contract)
csv_file = "../data/{0}-driver-reports-2013-12-13/all-long-term-profiles.csv".format(contract)

days_to_chain = 90
percentages = lsl.get_data(sav_file)
scores      = grab_scores_from_csv(csv_file)
process_data(percentages,scores,days_to_chain,contract)
                indices = OrderedDict()
                number_of_fields = len(row)
                for header in headers:
                    indices[header]= row.index(header)
                first_line = False
            else:
                subid      = row[indices['subscription_id']]
                if subid not in data:
                    data[subid] = 0.0
                data[subid] = float(row[indices['score']])        
    return data
    


sav_file   = "../sav_files/phone_to_subid_dict.sav"
phone_data = lsl.get_data(sav_file)

sav_file_new_method  = '../sav_files/score_correlation/autoline_percentagese_for_score_correlation.sav'
percentages_autoline = lsl.get_data(sav_file_new_method)

    
result = {}
counter_dict = {}
for subid in phone_data:
    phone = phone_data[subid].split()[0]
    if len(phone.split(','))>1:
        phone = "iPhone"
    if phone not in counter_dict:
        counter_dict[phone]=0
    counter_dict[phone]+=1
    if subid in percentages_autoline: