def main(): all_recs = cyb_records.Stats() all_recs.ReadRecords() print 'Number of total stats', len(all_recs.records) cdf = CdfPerDay(all_recs.records) myplot.Cdfs(cdf) myplot.Show(title="CDF: daily usage of machines at the YMCA", xlabel = 'Distance (in m / day)', ylabel = 'Percentile')
def main(): all_recs = cyb_records.Stats() all_recs.ReadRecords() print 'Number of total stats', len(all_recs.records) cdf = CdfPerMachine(all_recs.records) myplot.Cdfs(cdf) myplot.Show(title="CDF of cardio machine average distances", xlabel='Average Distances', ylabel='Probability')
def main(): all_recs = cyb_records.Stats() all_recs.ReadRecords() print 'Number of total stats', len(all_recs.records) cdf = CdfPerDay_ScipyStyle(all_recs.records) print "Fri-norm ", str(kstest(cdf[5], 'norm')) print "Fri-Mon ", str(ks_2samp(cdf[5], cdf[1])) print "Fri-Tue ", str(ks_2samp(cdf[5], cdf[2])) print "Fri-Wed ", str(ks_2samp(cdf[5], cdf[3])) print "Fri-Thu ", str(ks_2samp(cdf[5], cdf[4])) print "Fri-Sat ", str(ks_2samp(cdf[5], cdf[6])) print "Fri-Sun ", str(ks_2samp(cdf[5], cdf[0]))
def main(): evs = cyb_records.Stats() #"Stat", or "Events", etc. evs.ReadRecords() machines_events_list = split_up_machine_events(evs.records) check_install_date_consistency(machines_events_list) max_latency = 60 # seconds? # testing code- will end up in another function #percent, ave_latent = check_created_recieved_discrepancies(machines_events_list[0], max_latency) #print ("Machine " + str(machines_events_list[0][0].sn) + " was " + str(percent) +" percent latent (cuttoff at " # + str(max_latency) + " seconds latent) and had an average latency of " + str(ave_latent) + " seconds") machine_entries_length = [] for machine in machines_events_list: machine_entries_length.append((len(machine), machine)) machine_entries_length = sorted(machine_entries_length) for machine in machine_entries_length: print( str(machine[1][0].sn) + " is in the stats database " + str(machine[0]) + " times.")
def main(): # so that you don't have to type in passwords and usernames every time you connect- since you connect separately # for each table. myusername = raw_input("Please enter your username: "******"Please enter your password: "******"Stat", or "Events", etc. evs.ReadRecords(username=myusername, password=mypassword) machines_events_list = prep.split_up_machine_events(evs.records) current_machines = [ ] # list of all the attributes we are interested in for each machine, for the most current entry. # format for a machine in current_machines: # [sn(str), up_time(int), site_code(int), number of errors] # grab sn, up time from the stats table, add them to the current_machines list for machine in machines_events_list: new_list = [] if "" in str(machine[0].sn ): # optional filtering of which records are collected. record = latest_record(machine) #print (str(record.sn) + str(record.up_time))#str(int((record.up_time/1000000.0)))) new_list.append(str(record.sn)) new_list.append(int(record.up_time)) current_machines.append(new_list) # add in the facility id from the machines table. evs = cyb_records.Machines() evs.ReadRecords(username=myusername, password=mypassword) for record in evs.records: current_sn = str(record.sn) for machine in current_machines: machine_sn = machine[0] if machine_sn == current_sn: machine.append(int(record.facility_id)) #print("First Machine in list: " + str(current_machines[0])) evs = cyb_records.Events() evs.ReadRecords(username=myusername, password=mypassword) #clean_records = clean_events.CleanEvents(evs.records) clean_records = evs.records print("Total number of cleaned errors: " + str(len(clean_records))) errors = [] # add in the number of errors by summing errors for each sn in the error table. for record in clean_records: machine_found = False for machine in errors: if machine[0] == str(record.sn): machine[1] += 1 machine_found = True if machine_found == False: errors.append([str(record.sn), 1]) #print errors # taking these numbers of errors and adding them to the current_machines list for error in errors: for machine in current_machines: if machine[0] == error[0]: machine.append(error[1]) # in case any machines have never thrown an error, make sure they still get an entry for machine in current_machines: if len(machine) < 4: # machine has no prior errors machine.append(0) #print current_machines # extract the variables into separate vectors for plotting, based on what we want to plot them by. # mainly split up by site code- ignoring sites that don't exist. real_up_time = [] real_error_count = [] fake_up_time = [] fake_error_count = [] for machine in current_machines: site_code = machine[2] if site_code >= 9 and site_code <= 12: # site is a real one real_up_time.append(machine[1]) real_error_count.append(machine[3]) else: # site code is a fake. still worth trying? fake_up_time.append(machine[1]) fake_error_count.append(machine[3]) print("Up times: " + str(real_up_time)) print("Error Counts: " + str(real_error_count)) # preparing to print with color codes for site codes. splitting up main vector again. up_time9 = [] up_time10 = [] up_time11 = [] up_time12 = [] error_count9 = [] error_count10 = [] error_count11 = [] error_count12 = [] for machine in current_machines: site_code = machine[2] if site_code == 9: up_time9.append(machine[1]) error_count9.append(machine[3]) elif site_code == 10: up_time10.append(machine[1]) error_count10.append(machine[3]) elif site_code == 11: up_time11.append(machine[1]) error_count11.append(machine[3]) elif site_code == 12: up_time12.append(machine[1]) error_count12.append(machine[3]) else: pass # fake machine # plotting #plt.scatter(real_up_time, real_error_count, s = 100) #plt.scatter(fake_up_time, fake_error_count, s = 20) # PUT THIS IN A FUNCTION!! NO NEED TO HAVE 10 copies! up_time9_kilos = [] for time in up_time9: up_time9_kilos.append(time / 1000.0) up_time10_kilos = [] for time in up_time10: up_time10_kilos.append(time / 1000.0) up_time11_kilos = [] for time in up_time11: up_time11_kilos.append(time / 1000.0) up_time12_kilos = [] for time in up_time12: up_time12_kilos.append(time / 1000.0) #plt.scatter(up_time9_kilos, error_count9, facecolor= 'r', s = 300, label= 'site code 9') #plt.scatter(up_time10_kilos, error_count10, facecolor= 'b', s = 300, label = 'site code 10') #plt.scatter(up_time11_kilos, error_count11, facecolor= 'g', s = 300, label = 'site code 11') #plt.scatter(up_time12_kilos, error_count12, facecolor= 'k', s = 300, label = 'site code 12') # unfiltered version plt.scatter(up_time9, error_count9, facecolor='r', s=300, label='site code 9') plt.scatter(up_time10, error_count10, facecolor='b', s=300, label='site code 10') plt.scatter(up_time11, error_count11, facecolor='g', s=300, label='site code 11') plt.scatter(up_time12, error_count12, facecolor='k', s=300, label='site code 12') plt.legend() plt.xlabel('Distance (kilometers)', fontsize=24) plt.ylabel('Number of Errors', fontsize=24) plt.title('Errors vs. Distance, by Machine and Site', fontsize=30) plt.xlim([0, 100000000]) plt.ylim([0, 600]) plt.show()