def show(): # load in all the data participants = list() studyLengths = list() for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) pid = str(pNum) try: lenData = studyLength(fileLoc=settings.getFileName('metaData')) studyLen = len(lenData.days) except IOError: # print 'p '+pid+' metaData file not found' continue # if no problems: participants.append(pid) studyLengths.append(studyLen) # print a nice little table of interaction lengths divider = '======================================' print divider cols = ['pid', 'studyLen'] row_format = "{:>15}" * (len(cols)) print row_format.format(*cols) for pid, length in zip(participants, studyLengths): print row_format.format(pid, length) print divider
def show(): # load in all the data participants = list() studyLengths = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) pid = str(pNum) try: lenData = studyLength(fileLoc=settings.getFileName('metaData')) studyLen = len(lenData.days) except IOError: # print 'p '+pid+' metaData file not found' continue # if no problems: participants.append(pid) studyLengths.append(studyLen) # print a nice little table of interaction lengths divider = '======================================' print divider cols = ['pid', 'studyLen'] row_format ="{:>15}" * (len(cols)) print row_format.format(*cols) for pid, length in zip(participants, studyLengths): print row_format.format(pid, length) print divider
def plot(): # load in all the data participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = str(pNum) fName = settings.getFileName('viewLog') interact = (interactionData(fName)) # TODO the rest... except IOError: print 'p '+pid+' interactions file not found' continue # if no problems: participants.append(pid) interactions.append(interact) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: '+str(len(participants)) print ' ==================================================' # shift to time since start... for pnum in range(0,len(interactions)): start_t = min(interactions[pnum].t) for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] -= start_t # shift all to same daily cycle for pnum in range(0,len(interactions)): timeOfDay = interactions[pnum].x[0] #assuming earliest time is first... ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] += ToD_start # make the plots figName = "interaction_sparkScatterplot" pylab.figure(figName) pylab.subplots_adjust(left=0,bottom=0,right=1,top=1,wspace=0,hspace=0) num = 0 for interaction in interactions: pylab.subplot(len(interactions),1,num) pylab.scatter(interaction.viewTimes,[0]*len(interaction.viewTimes),marker='|',color=interaction.viewMarkerColor) pylab.plt.gca().axes.get_xaxis().set_visible(False) num += 1 fname = DATA_LOC+figName+'.png' pylab.plt.savefig(fname, dpi = 1000)
def plot(): # load in all the data participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = str(pNum) fName = settings.getFileName('viewLog') interact = (interactionData(fName)) except IOError: print 'p '+pid+' interactions file not found' continue # if no problems: participants.append(pid) interactions.append(interact) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: '+str(len(participants)) print ' ==================================================' # shift to time since start... for pnum in range(0,len(interactions)): start_t = min(interactions[pnum].t) for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] -= start_t # shift all to same daily cycle for pnum in range(0,len(interactions)): timeOfDay = interactions[pnum].x[0] #assuming earliest time is first... ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] += ToD_start figName = "interaction_sparkLines" pylab.figure(figName) num = 0 for interaction in interactions: pylab.subplot(len(interactions),1,num) pylab.plot(interaction.t,interaction.v) num += 1 fname = DATA_LOC+figName+'.png' pylab.plt.savefig(fname, dpi = 1000)
def plot(): # load in all the data participants = list() PA = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = str(pNum) fName = settings.getFileName('fitbit') pa = PAdata(fName,method='fitbit',timeScale='minute') # TODO the rest... except IOError: print 'p '+pid+' PA file "'+ fName +'" not found' continue # if no problems: participants.append(pid) PA.append(pa) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: '+str(len(participants)) print ' ==================================================' # shift to time since start... # for pnum in range(0,len(PA)): # start_t = min(PA[pnum].time) # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] -= start_t # # shift all to same daily cycle # for pnum in range(0,len(PA)): # timeOfDay = PA[pnum].time[0] #assuming earliest time is first... # ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] += ToD_start # make the plots figName = "PA_sparkScatterplot" pylab.figure(figName) num = 0 for pa in PA: pylab.subplot(len(PA),1,num) pylab.scatter(pa.time, pa.steps, marker='x', color='b') num += 1
def plot(): # load in all the data participants = list() PA = list() for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = str(pNum) fName = settings.getFileName('fitbit') pa = PAdata(fName, method='fitbit', timeScale='minute') # TODO the rest... except IOError: print 'p ' + pid + ' PA file "' + fName + '" not found' continue # if no problems: participants.append(pid) PA.append(pa) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: ' + str(len(participants)) print ' ==================================================' # shift to time since start... # for pnum in range(0,len(PA)): # start_t = min(PA[pnum].time) # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] -= start_t # # shift all to same daily cycle # for pnum in range(0,len(PA)): # timeOfDay = PA[pnum].time[0] #assuming earliest time is first... # ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] += ToD_start # make the plots figName = "PA_sparkScatterplot" pylab.figure(figName) num = 0 for pa in PA: pylab.subplot(len(PA), 1, num) pylab.scatter(pa.time, pa.steps, marker='x', color='b') num += 1
def plot(): ### analysis run on each participant and displayed together ### # load in all the data participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) pid = str(pNum) try: interact = interactionData(fileName=settings.getFileName('viewLog'),timeScale='daily') # TODO the rest... except IOError: print 'p '+pid+' interactions file not found' continue # if no problems: participants.append(pid) interactions.append(interact) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: '+str(len(participants)) print ' ==================================================' maxStudyLen = max([len(interact.dailyTotals) for interact in interactions]) # make the plots figName = "interactions_sparkBars" pylab.figure(figName) pylab.subplots_adjust(left=0,bottom=0,right=1,top=1,wspace=0,hspace=0) num = 0 for interact in interactions: # make all data the same length so bars are sime width while len(interact.dailyTotals) < maxStudyLen: interact.dailyTotals.append(-1) # plot pylab.subplot(len(interactions),1,num) pylab.bar(range(len(interact.dailyTotals)),interact.dailyTotals,width=.5) num += 1
def __init__(self, settings, min_quality=QUALITY_LEVEL.acceptable, used_data_types=DATA_TYPES.all, trim=True, check=True, avatar_view_freq=60): """ :param min_quality: minimum quality level of data to be considered in the dataset """ self.settings = settings self.pids = settings.get_pid_list() self.excluded = settings.get_exluded_list(min_level=min_quality, used_data=used_data_types) for pid in list( self.pids ): # note: need to use copy b/c we are modifying as we go if pid in self.excluded: self.pids.remove(pid) #print 'removing ', pid set_uses = dict() for type in used_data_types: set_uses[type] = True self.subject_data = list() for pid in self.pids: self.subject_data.append( Subject(setup(dataset=settings.dataset, data_loc=settings.dataLoc, subject_n=pid), avatar_view_freq=avatar_view_freq, uses=set_uses)) if trim: self.subject_data[-1].trim_data() if check: self.subject_data[-1].integrity_check() print len(self), 'subjects loaded. pids = ', self.pids print 'excluding pids ', self.excluded
def get_data(pre_win, post_win): settings = setup(dataset='test', data_loc='./data/controlIntervention/', subject_n=3) data = Dataset( settings, trim=True, check=False, used_data_types=[DATA_TYPES.event, DATA_TYPES.fitbit] ) minutes = post_win+pre_win PNUM = 0 bars = [] for evt in data.subject_data[0].event_data.time: time = evt-timedelta(minutes=pre_win) bars.append(data.get_steps_after_time(time, minutes, PNUM)) pids = [1]*len(bars) # all events are same participant return minutes, pids, bars
def get_fake_data(pre_win, post_win, minutes, pids, bars): # returns data from randomly chosen fake data points settings = setup(dataset='test', data_loc='./data/controlIntervention/', subject_n=3) data = Dataset( settings, trim=True, check=False, used_data_types=[DATA_TYPES.event, DATA_TYPES.fitbit] ) PNUM = 0 fake_bars = [] for evt in data.subject_data[0].event_data.time: time = evt-timedelta(days=1, minutes=pre_win) # get random(ish) time fake_bars.append(data.get_steps_after_time(time, minutes, PNUM)) diff_bars = [] for i in range(len(bars)): diff_bars.append(list_subtract(bars[i], fake_bars[i])) return minutes, pids, diff_bars
def plot(dataset='test',dataLoc = "./data/", paMethod=DEFAULT_METHOD, bypass_data_check=False): # change plot font # font = {'family' : 'monospace', # 'weight' : 'normal', # 'size' : 16} # pylab.plt.rc('font', **font) pltName = 'participants\' avg PA for sedentary & active avatar days' print 'making plot "'+pltName+'"' pylab.figure(pltName) cmap = pylab.cm.get_cmap(name='terrain') activePAs = list() sedentPAs = list() zeroPAs = list() dataCounter = 0 # a count of how many datapoints are used so far (for colormapping and debug) for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants print '===p:'+str(pNum)+'===' if pNum==1 or pNum==3: print 'skipping p1 & p3 b/c data is incomplete.\n' continue elif pNum==13: print 'skip p13 b/c data needs some massaging.\n' continue elif pNum==14: print 'skip p14 b/c data is incomplete.\n' continue else: activePAtotal = sedentPAtotal = zeroPAtotal = 0 activePAcount = sedentPAcount = zeroPAcount = 0 try : settings = setup(dataset=dataset, data_loc=dataLoc, subject_n=pNum) # load interaction data interact = interactionData(settings.getFileName('viewLog')) interactScore,interactDate = segmentInteractionIntoDays(interact) # load PA data PA = PAdata(PAfile=settings.getFileName(paMethod), method=paMethod, timeScale='daily') if paMethod == 'mMonitor': PAscore, PAdate = segmentPAIntoDays(PA,PAscoreFunction=getPAscore_postiveOnly) elif paMethod == 'fitbit': PAscore = PA.steps PAdate = PA.time else : raise ValueError('unknown PA method "'+str(paMethod)+'"') if not bypass_data_check: trim_data(interactDate, PAdate, interactScore, PAscore) interactions = list() for iScore in interactScore: if iScore > 0: interactions.append(1)#'active') elif iScore < 0: interactions.append(-1)#'sedentary') else: # iScore = 0 interactions.append(0)#'0') for day in range(len(PAscore)): #print 'day ='+str(day) #print 'interact='+str(interactions[day]) #print 'PAscore ='+str(PAscore[day]) if interactions[day] > 0: activePAtotal+=PAscore[day] activePAcount+=1 elif interactions[day] < 0: sedentPAtotal+=PAscore[day] sedentPAcount+=1 else: # interactions[day] == 0 zeroPAtotal+=PAscore[day] zeroPAcount+=1 activePAs.append(activePAtotal/activePAcount) sedentPAs.append(sedentPAtotal/sedentPAcount) try: zeroPAs.append(zeroPAtotal/zeroPAcount) except ZeroDivisionError: zeroPAs.append(0) # no exceptions means this data was good, increment counter dataCounter += 1 print str(pNum)+' loaded.\n' except InputError as e: print e.message print 'participant '+str(pNum)+' not valid.\n' except IOError as e: print e.message print 'participant '+str(pNum)+' not found.\n' except Warning as w: print w.message print 'some issues loading participant '+str(pNum)+'; continuing anyway.\n' if paMethod == 'mMonitor': pylab.plt.ylabel('physical activity score') elif paMethod == 'fitbit': pylab.plt.ylabel('average step counts') pylab.plt.xlabel('<-sedentary active->\navatar behavior') pylab.plt.gca().axes.get_xaxis().set_ticks([]) pylab.plt.draw() print activePAs print sedentPAs print zeroPAs # TODO: colormapping should use dataCounter and actual number of data available (instead of pNum and HIGHEST_P_NUM, respectively) base = 0 for PA in activePAs: pylab.plt.bar(1, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA base = 0 for PA in sedentPAs: pylab.plt.bar(-1, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA base = 0 for PA in zeroPAs: pylab.plt.bar(0, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA paired_sample = stats.ttest_rel(sedentPAs, activePAs) print "================================================" print str(dataCounter) + " subjects analyzed using " + paMethod + " data." print "The t-statistic is %.3f and the p-value is %.3f." % paired_sample print "================================================" print 'done.'
""" analysis run on just one participant: """ import warnings import pylab from src.settings import setup from src.data.subject.Subject import Subject settings = setup(dataset='USF', data_loc='../subjects/') # use dataset='test' to select sample dataset with warnings.catch_warnings(): warnings.simplefilter("ignore") sub = Subject(settings) sub.trim_data() sub.integrity_check() pylab.plt.figure('fitbit time series') sub.fitbit_data.ts.plot() pylab.plt.figure('avatar view time series') sub.avatar_view_data.ts.plot() pylab.plt.show() #this one isn't very impressive, and I don't think it is working right now anyway #import src.interaction.timeSeries.simple #src.interaction.timeSeries.simple.plot() #pylab.plt.show() import src.dep.interaction.timeSeries.multicolorBars
__author__ = 'tylar' import csv from src.settings import setup, DATA_TYPES MIN_LEN = 10 # min legitimate view time in ms MAX_LEN = 60*1000 # max legitimate view time in ms REPLACE_TIME = 10*1000 # length of time placed at start and end of illegitimate times settings = setup(dataset='USF', data_loc='../subjects/', subject_n=0) old_total = new_total = replaced = removed = kept = 0 # various counters pids = settings.get_pid_list() for pid in pids: view_file_loc = setup(dataset=settings.dataset, data_loc=settings.dataLoc, subject_n=pid).get_file_name(DATA_TYPES.avatar_views) # read all the rows in cols = list() with open(view_file_loc, 'rb') as csvfile: spamreader = csv.reader(csvfile, delimiter=',') for row in spamreader: old_total += 1 #print ', '.join(row) # print the raw data # print row # print raw data matrix t0 = int(row[0]) tf = int(row[1]) len = int(row[2]) act = row[3]
def plot(dataset='test',dataLoc = "./data/"): # change plot font # font = {'family' : 'monospace', # 'weight' : 'normal', # 'size' : 16} # pylab.plt.rc('font', **font) pltName = 'All participants\' PA for sedentary & active avatar days' print 'making plot "'+pltName+'"' pylab.figure(pltName) activeBase = 0 sedentBase = 0 zeroBase = 0 cmap = pylab.cm.get_cmap(name='terrain') activePAs = list() sedentPAs = list() zeroPAs = list() dataCounter = 0 # a count of how many datapoints are used so far (for colormapping and debug) for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants if pNum == 1 or pNum == 3 or pNum == 13 or pNum == 14: # skip p1 & p3 b/c data is incomplete # skip p13 b/c data needs some massaging # skip p14 b/c data is incomplete continue try : settings = setup(dataset=dataset, data_loc=dataLoc, subject_n=pNum) # load interaction data interact = interactionData(settings.getFileName('viewLog')) interactScore,interactDate = segmentInteractionIntoDays(interact) # load PA data PA = PAdata(settings.getFileName('mMonitor')) PAscore,PAdate = segmentPAIntoDays(PA,PAscoreFunction=getPAscore_postiveOnly) while (interactDate[0].date() != PAdate[0].date()) or (interactDate[-1].date() != PAdate[-1].date()) or (len(interactScore) != len(PAscore)): print 'day mismatch: ' #print 'data is ' + str(len(PAscore)) + 'x' + str(len(interactScore)) print '\t NAME \tSTART \t\t\tEND \t\t\tLEN' print '\t interact\t'+str(interactDate[0])+'\t'+str(interactDate[-1])+'\t'+str(len(interactScore)) print '\t PA \t'+str(PAdate[0])+ '\t'+str(PAdate[-1]) +'\t'+str(len(PAscore))+'\n' if(PAdate[0].date() < interactDate[0].date()):#if pa starts before interact print 'pa data removed from start' PAdate.pop(0) PAscore.pop(0) elif PAdate[0].date() > interactDate[0].date():#if pa starts after interact print 'interact data removed from start' interactDate.pop(0) interactScore.pop(0) elif PAdate[-1].date() < interactDate[-1].date() :#if pa ends before interact print 'pa data removed from end' interactDate.pop() interactScore.pop() elif PAdate[-1].date() > interactDate[-1].date() :#if pa ends after interact print 'interact data removed from end' PAscore.pop() PAdate.pop() else: # uneven values must be from missing days in middle of one dataset longer = list() shorter = list() ldate = list() sdate = list() shortName = '' if len(PAscore) > len(interactScore): shortName = 'interaction' longName = 'PA' longer = PAscore shorter= interactScore ldate = PAdate sdate = interactDate else: shortName = 'PA' longName = 'interaction' longer = interactScore shorter= PAscore ldate = interactDate sdate = PAdate for i in range(len(longer)): #if i >= len(shorter): #check for end of shortlist reached # print '\nERR: unknown data mismatch!!!\n' # print 'data dump:' # print '=== PA ===' # print 'DATES='+str(PAdate) # print 'VALUES='+str(PAscore) # print '===interact===' # print 'DATES='+str(interactDate) # print 'VALUES='+str(interactScore) # return if i>=len(shorter) or sdate[i].date() != ldate[i].date():#remove extra dates not in shortlist print 'value removed from '+longName+' at '+str(ldate[i]) longer.pop(i) ldate.pop(i) #shorter.insert(i,0) #sdate.insert(i,ldate[i]) #print 'zero value inserted into '+shortName+' at '+str(sdate[i]) break if len(PAscore)<=0 or len(interactScore)<=0: raise Warning('data has no overlap!') # modify interaction score to just separate (+) and (-) values interactions = list() for iScore in interactScore: if iScore > 0: interactions.append(1)#'active') elif iScore < 0: interactions.append(-1)#'sedentary') else: # iScore = 0 interactions.append(0)#'0') for day in range(len(PAscore)): #print 'day ='+str(day) #print 'interact='+str(interactions[day]) #print 'PAscore ='+str(PAscore[day]) if interactions[day] > 0: activePAs.append(PAscore[day]) # TODO: colormapping should use dataCounter and actual number of data available (instead of pNum and HIGHEST_P_NUM, respectively) pylab.plt.bar(interactions[day], PAscore[day], bottom=activeBase, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) activeBase += PAscore[day] elif interactions[day] < 0: sedentPAs.append(PAscore[day]) pylab.plt.bar(interactions[day], PAscore[day], bottom=sedentBase, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) sedentBase += PAscore[day] else: # interactions[day] == 0 zeroPAs.append(PAscore[day]) pylab.plt.bar(interactions[day], PAscore[day], bottom=zeroBase, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) zeroBase += PAscore[day] # no exceptions means this data was good, increment counter dataCounter += 1 except InputError: print 'participant '+str(pNum)+' not valid.' except IOError: print 'participant '+str(pNum)+' not found.' except Warning as w: print w.message pylab.plt.ylabel('physical activity score') pylab.plt.xlabel('<-sedentary zero active->\navatar behavior') pylab.plt.draw() print activePAs print sedentPAs print zeroPAs paired_sample = stats.ttest_ind(sedentPAs, activePAs) print "================================================" print "The t-statistic is %.3f and the p-value is %.3f." % paired_sample print "================================================" print 'done.'
""" analysis run on just one participant: """ import warnings import pylab from src.settings import setup from src.data.subject.Subject import Subject settings = setup( dataset='USF', data_loc='../subjects/') # use dataset='test' to select sample dataset with warnings.catch_warnings(): warnings.simplefilter("ignore") sub = Subject(settings) sub.trim_data() sub.integrity_check() pylab.plt.figure('fitbit time series') sub.fitbit_data.ts.plot() pylab.plt.figure('avatar view time series') sub.avatar_view_data.ts.plot() pylab.plt.show() #this one isn't very impressive, and I don't think it is working right now anyway #import src.interaction.timeSeries.simple #src.interaction.timeSeries.simple.plot() #pylab.plt.show()
def plot(): ### analysis run on each participant and displayed together ### # load in all the data participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) pid = str(pNum) try: interact = (interactionData(settings.getFileName('viewLog'))) # TODO the rest... except IOError: print 'p '+pid+' interactions file not found' continue # if no problems: participants.append(pid) interactions.append(interact) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: '+str(len(participants)) print ' ==================================================' # figName = "all_interactions_x_time" # pylab.figure(figName) # pylab.plot(interactions[0].x,interactions[0].v, # interactions[1].x,interactions[1].v, # interactions[2].x,interactions[2].v, # interactions[3].x,interactions[3].v, # interactions[4].x,interactions[4].v, # interactions[5].x,interactions[5].v, # interactions[6].x,interactions[6].v, # interactions[7].x,interactions[7].v, # interactions[8].x,interactions[8].v, # interactions[9].x,interactions[9].v, # interactions[10].x,interactions[10].v) #TODO: add more if applicable # fname = DATA_LOC+figName+'.png' # pylab.plt.savefig(fname, dpi = 100) # shift to time since start... for pnum in range(0,len(interactions)): start_t = min(interactions[pnum].t) for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] -= start_t # figName = "all_interactions_x_time_since_study_start" # pylab.figure(figName) # pylab.plot(interactions[0].t,interactions[0].v, # interactions[1].t,interactions[1].v, # interactions[2].t,interactions[2].v, # interactions[3].t,interactions[3].v, # interactions[4].t,interactions[4].v, # interactions[5].t,interactions[5].v, # interactions[6].t,interactions[6].v, # interactions[7].t,interactions[7].v, # interactions[8].t,interactions[8].v, # interactions[9].t,interactions[9].v, # interactions[10].t,interactions[10].v) #TODO: add more if applicable # fname = DATA_LOC+figName+'.png' # pylab.plt.savefig(fname, dpi = 100) # shift all to same daily cycle for pnum in range(0,len(interactions)): timeOfDay = interactions[pnum].x[0] #assuming earliest time is first... ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 for i in range (0, len(interactions[pnum])): interactions[pnum].t[i] += ToD_start figName = "all_interactions_x_time_of_day_since_study_start" pylab.figure(figName) try: assert(len(interactions) == 15) except AssertionError as e: print 'len(interactions)=',len(interactions) raise pylab.plot(interactions[0].t,interactions[0].v, interactions[1].t,interactions[1].v, interactions[2].t,interactions[2].v, interactions[3].t,interactions[3].v, interactions[4].t,interactions[4].v, interactions[5].t,interactions[5].v, interactions[6].t,interactions[6].v, interactions[7].t,interactions[7].v, interactions[8].t,interactions[8].v, interactions[9].t,interactions[9].v, interactions[10].t,interactions[10].v, interactions[11].t,interactions[11].v, interactions[12].t,interactions[12].v, interactions[13].t,interactions[13].v, interactions[14].t,interactions[14].v) #TODO: add more if applicable fname = DATA_LOC+figName+'.png' pylab.plt.savefig(fname, dpi = 100)
import knowMe # these are switches to help debugging, set true to run on each part avatar = False knowme = True # some optional tests: # from src.post_view_event_steps_bars import test_get_avg_list # test_get_avg_list() #knowMe.makePlots(type=PLOT_TYPES.bars, show=True, pre_win=10, post_win=40) #knowMe.makePlots(type=PLOT_TYPES.bars, show=True) if avatar: ### USF mAVATAR DATA LOADING ### settings = setup(dataset='USF', data_loc='../subjects/', subject_n=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") data = Dataset(settings, min_quality=QUALITY_LEVEL.acceptable, trim=True, check=True, used_data_types=[DATA_TYPES.fitbit, DATA_TYPES.avatar_views], avatar_view_freq=60) UP_TO_DATE = True # true if software versions are good if pandas.version.version < '0.12.0': UP_TO_DATE = False print '\n\nWARN: Some analysis cannot be completed due to outdated pandas version ' + pandas.version.version + '\n\n' # comparison of events selected with/without overlap from mAvatar dataset # to demonstrate difference (especially at high time intervals like no-overlap for 3hrs around event)
import warnings import pylab import pandas from src.settings import setup, QUALITY_LEVEL, DATA_TYPES from src.data.mAvatar.Data import DAY_TYPE from src.data.Dataset import Dataset settings = setup(dataset='USF', data_loc='../subjects/', subject_n=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") data = Dataset( settings, min_quality=QUALITY_LEVEL.acceptable, trim=True, check=True, used_data_types=[DATA_TYPES.fitbit, DATA_TYPES.avatar_views], avatar_view_freq=60) UP_TO_DATE = True # true if software versions are good if pandas.version.version < '0.12.0': UP_TO_DATE = False print '\n\nWARN: Some analysis cannot be completed due to outdated pandas version ' + pandas.version.version + '\n\n' ################### ### BEGIN plots ### ################### if UP_TO_DATE:
def plot(): # load in all the data PA_participants = list() PA = list() for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = pNum fName = settings.getFileName('fitbit') pa = PAdata(fName, method='fitbit', timeScale='minute') except IOError: print 'p ' + str(pid) + ' PA file "' + fName + '" not found' continue # if no problems: PA_participants.append(pid) PA.append(pa) # print 'p' + pid + ' data loaded.' interact_participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = pNum fName = settings.getFileName('viewLog') interact = (interactionData(fName)) except IOError: print 'p ' + str(pid) + ' interactions file not found' continue # if no problems: interact_participants.append(pid) interactions.append(interact) # print 'p' + pid + ' data loaded.' #scale all the data PAscaler = 1.0 / 50.0 for pnum in range(0, len(PA)): for i in range(0, len(PA[pnum])): PA[pnum].steps[i] *= PAscaler # shift to time since start... # for pnum in range(0,len(PA)): # start_t = min(PA[pnum].timestamp) # for i in range (0, len(PA[pnum])): # PA[pnum].timestamp[i] -= start_t # for pnum in range(0,len(interactions)): # start_t = min(interactions[pnum].t) # for i in range (0, len(interactions[pnum])): # interactions[pnum].t[i] -= start_t # # shift all to same daily cycle # for pnum in range(0,len(PA)): # timeOfDay = PA[pnum].time[0] #assuming earliest time is first... # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] += timeOfDay # timeOfDay = interactions[pnum].t[0] #assuming earliest time is first... # ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 # for i in range (0, len(interactions[pnum])): # interactions[pnum].t[i] += ToD_start # class Bunch: # ''' bunches! makes a dictionary look like a class! bam! ''' # __init__ = lambda self, **kw: setattr(self, '__dict__', kw) # make the plots figName = "scatterplot_masterdash" pylab.figure(figName) pylab.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0) num = 0 max_p = max(max(PA_participants), max(interact_participants)) + 1 pCount = max(len(PA_participants), len(interact_participants)) + 1 actualCount = 1 for i in range(max_p): pylab.subplot(pCount, 1, actualCount) try: pa = PA[PA_participants.index(num)] except ValueError as m: print 'p' + m.message + ' of PA' pa = None # Bunch(time=0,steps=0) try: interact = interactions[interact_participants.index(num)] except ValueError as m: print 'p' + m.message + ' of interactions' interact = None # Bunch(t=0,v=0) num += 1 isData = False try: pylab.scatter(pa.timestamp, pa.steps, marker='x', color='g') isData = True except AttributeError as e: pass try: pylab.scatter(interact.viewTimes, [0] * len(interact.viewTimes), marker='+', color=interact.viewMarkerColor) isData = True except AttributeError as e: pass if isData: actualCount += 1
def plot(): ### analysis run on each participant and displayed together ### # load in all the data participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) pid = str(pNum) try: interact = (interactionData(settings.getFileName('viewLog'))) # TODO the rest... except IOError: print 'p ' + pid + ' interactions file not found' continue # if no problems: participants.append(pid) interactions.append(interact) print 'p' + pid + ' data loaded.' #scale all the data #TODO print '\n ==================================================' print '\t total participants: ' + str(len(participants)) print ' ==================================================' # figName = "all_interactions_x_time" # pylab.figure(figName) # pylab.plot(interactions[0].x,interactions[0].v, # interactions[1].x,interactions[1].v, # interactions[2].x,interactions[2].v, # interactions[3].x,interactions[3].v, # interactions[4].x,interactions[4].v, # interactions[5].x,interactions[5].v, # interactions[6].x,interactions[6].v, # interactions[7].x,interactions[7].v, # interactions[8].x,interactions[8].v, # interactions[9].x,interactions[9].v, # interactions[10].x,interactions[10].v) #TODO: add more if applicable # fname = DATA_LOC+figName+'.png' # pylab.plt.savefig(fname, dpi = 100) # shift to time since start... for pnum in range(0, len(interactions)): start_t = min(interactions[pnum].t) for i in range(0, len(interactions[pnum])): interactions[pnum].t[i] -= start_t # figName = "all_interactions_x_time_since_study_start" # pylab.figure(figName) # pylab.plot(interactions[0].t,interactions[0].v, # interactions[1].t,interactions[1].v, # interactions[2].t,interactions[2].v, # interactions[3].t,interactions[3].v, # interactions[4].t,interactions[4].v, # interactions[5].t,interactions[5].v, # interactions[6].t,interactions[6].v, # interactions[7].t,interactions[7].v, # interactions[8].t,interactions[8].v, # interactions[9].t,interactions[9].v, # interactions[10].t,interactions[10].v) #TODO: add more if applicable # fname = DATA_LOC+figName+'.png' # pylab.plt.savefig(fname, dpi = 100) # shift all to same daily cycle for pnum in range(0, len(interactions)): timeOfDay = interactions[pnum].x[ 0] #assuming earliest time is first... ToD_start = (timeOfDay.hour * 60 + timeOfDay.minute) * 60 for i in range(0, len(interactions[pnum])): interactions[pnum].t[i] += ToD_start figName = "all_interactions_x_time_of_day_since_study_start" pylab.figure(figName) try: assert (len(interactions) == 15) except AssertionError as e: print 'len(interactions)=', len(interactions) raise pylab.plot(interactions[0].t, interactions[0].v, interactions[1].t, interactions[1].v, interactions[2].t, interactions[2].v, interactions[3].t, interactions[3].v, interactions[4].t, interactions[4].v, interactions[5].t, interactions[5].v, interactions[6].t, interactions[6].v, interactions[7].t, interactions[7].v, interactions[8].t, interactions[8].v, interactions[9].t, interactions[9].v, interactions[10].t, interactions[10].v, interactions[11].t, interactions[11].v, interactions[12].t, interactions[12].v, interactions[13].t, interactions[13].v, interactions[14].t, interactions[14].v) #TODO: add more if applicable fname = DATA_LOC + figName + '.png' pylab.plt.savefig(fname, dpi=100)
def plot(): # load in all the data PA_participants = list() PA = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = pNum fName = settings.getFileName('fitbit') pa = PAdata(fName,method='fitbit',timeScale='minute') except IOError: print 'p '+str(pid)+' PA file "'+ fName +'" not found' continue # if no problems: PA_participants.append(pid) PA.append(pa) # print 'p' + pid + ' data loaded.' interact_participants = list() interactions = list() for pNum in range(HIGHEST_P_NUMBER+1): #cycle through all participants settings = setup(dataset='USF', data_loc=DATA_LOC, subject_n=pNum) try: pid = pNum fName = settings.getFileName('viewLog') interact = (interactionData(fName)) except IOError: print 'p '+str(pid)+' interactions file not found' continue # if no problems: interact_participants.append(pid) interactions.append(interact) # print 'p' + pid + ' data loaded.' #scale all the data PAscaler = 1.0/50.0 for pnum in range (0,len(PA)): for i in range(0,len(PA[pnum])): PA[pnum].steps[i] *= PAscaler # shift to time since start... # for pnum in range(0,len(PA)): # start_t = min(PA[pnum].timestamp) # for i in range (0, len(PA[pnum])): # PA[pnum].timestamp[i] -= start_t # for pnum in range(0,len(interactions)): # start_t = min(interactions[pnum].t) # for i in range (0, len(interactions[pnum])): # interactions[pnum].t[i] -= start_t # # shift all to same daily cycle # for pnum in range(0,len(PA)): # timeOfDay = PA[pnum].time[0] #assuming earliest time is first... # for i in range (0, len(PA[pnum])): # PA[pnum].time[i] += timeOfDay # timeOfDay = interactions[pnum].t[0] #assuming earliest time is first... # ToD_start = (timeOfDay.hour*60 + timeOfDay.minute) * 60 # for i in range (0, len(interactions[pnum])): # interactions[pnum].t[i] += ToD_start # class Bunch: # ''' bunches! makes a dictionary look like a class! bam! ''' # __init__ = lambda self, **kw: setattr(self, '__dict__', kw) # make the plots figName = "scatterplot_masterdash" pylab.figure(figName) pylab.subplots_adjust(left=0,bottom=0,right=1,top=1,wspace=0,hspace=0) num = 0 max_p = max(max(PA_participants),max(interact_participants))+1 pCount = max(len(PA_participants),len(interact_participants))+1 actualCount = 1 for i in range(max_p): pylab.subplot(pCount,1,actualCount) try: pa = PA[PA_participants.index(num)] except ValueError as m: print 'p' + m.message + ' of PA' pa = None # Bunch(time=0,steps=0) try: interact = interactions[interact_participants.index(num)] except ValueError as m: print 'p' + m.message + ' of interactions' interact = None # Bunch(t=0,v=0) num += 1 isData = False try: pylab.scatter(pa.timestamp, pa.steps, marker='x', color = 'g') isData = True except AttributeError as e: pass try: pylab.scatter(interact.viewTimes, [0]*len(interact.viewTimes), marker='+', color=interact.viewMarkerColor) isData = True except AttributeError as e: pass if isData: actualCount += 1
def setUp(self): from src.settings import setup self.settings = setup(dataset='default')
def plot(dataset='test', dataLoc="./data/", paMethod=DEFAULT_METHOD, bypass_data_check=False): # change plot font # font = {'family' : 'monospace', # 'weight' : 'normal', # 'size' : 16} # pylab.plt.rc('font', **font) pltName = 'participants\' avg PA for sedentary & active avatar days' print 'making plot "' + pltName + '"' pylab.figure(pltName) cmap = pylab.cm.get_cmap(name='terrain') activePAs = list() sedentPAs = list() zeroPAs = list() dataCounter = 0 # a count of how many datapoints are used so far (for colormapping and debug) for pNum in range(HIGHEST_P_NUMBER + 1): #cycle through all participants print '===p:' + str(pNum) + '===' if pNum == 1 or pNum == 3: print 'skipping p1 & p3 b/c data is incomplete.\n' continue elif pNum == 13: print 'skip p13 b/c data needs some massaging.\n' continue elif pNum == 14: print 'skip p14 b/c data is incomplete.\n' continue else: activePAtotal = sedentPAtotal = zeroPAtotal = 0 activePAcount = sedentPAcount = zeroPAcount = 0 try: settings = setup(dataset=dataset, data_loc=dataLoc, subject_n=pNum) # load interaction data interact = interactionData(settings.getFileName('viewLog')) interactScore, interactDate = segmentInteractionIntoDays( interact) # load PA data PA = PAdata(PAfile=settings.getFileName(paMethod), method=paMethod, timeScale='daily') if paMethod == 'mMonitor': PAscore, PAdate = segmentPAIntoDays( PA, PAscoreFunction=getPAscore_postiveOnly) elif paMethod == 'fitbit': PAscore = PA.steps PAdate = PA.time else: raise ValueError('unknown PA method "' + str(paMethod) + '"') if not bypass_data_check: trim_data(interactDate, PAdate, interactScore, PAscore) interactions = list() for iScore in interactScore: if iScore > 0: interactions.append(1) #'active') elif iScore < 0: interactions.append(-1) #'sedentary') else: # iScore = 0 interactions.append(0) #'0') for day in range(len(PAscore)): #print 'day ='+str(day) #print 'interact='+str(interactions[day]) #print 'PAscore ='+str(PAscore[day]) if interactions[day] > 0: activePAtotal += PAscore[day] activePAcount += 1 elif interactions[day] < 0: sedentPAtotal += PAscore[day] sedentPAcount += 1 else: # interactions[day] == 0 zeroPAtotal += PAscore[day] zeroPAcount += 1 activePAs.append(activePAtotal / activePAcount) sedentPAs.append(sedentPAtotal / sedentPAcount) try: zeroPAs.append(zeroPAtotal / zeroPAcount) except ZeroDivisionError: zeroPAs.append(0) # no exceptions means this data was good, increment counter dataCounter += 1 print str(pNum) + ' loaded.\n' except InputError as e: print e.message print 'participant ' + str(pNum) + ' not valid.\n' except IOError as e: print e.message print 'participant ' + str(pNum) + ' not found.\n' except Warning as w: print w.message print 'some issues loading participant ' + str( pNum) + '; continuing anyway.\n' if paMethod == 'mMonitor': pylab.plt.ylabel('physical activity score') elif paMethod == 'fitbit': pylab.plt.ylabel('average step counts') pylab.plt.xlabel( '<-sedentary active->\navatar behavior' ) pylab.plt.gca().axes.get_xaxis().set_ticks([]) pylab.plt.draw() print activePAs print sedentPAs print zeroPAs # TODO: colormapping should use dataCounter and actual number of data available (instead of pNum and HIGHEST_P_NUM, respectively) base = 0 for PA in activePAs: pylab.plt.bar( 1, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA base = 0 for PA in sedentPAs: pylab.plt.bar( -1, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA base = 0 for PA in zeroPAs: pylab.plt.bar( 0, PA, bottom=base, linewidth=1, width=1.7) #, color=cmap(float(pNum)/float(HIGHEST_P_NUMBER)) ) base += PA paired_sample = stats.ttest_rel(sedentPAs, activePAs) print "================================================" print str(dataCounter) + " subjects analyzed using " + paMethod + " data." print "The t-statistic is %.3f and the p-value is %.3f." % paired_sample print "================================================" print 'done.'