def processData(): #takes data from database and process it to produce analysisInfo with features that can be used for machine learning children=parse.parseData() analysis = extract(children) startT=findStartDate(analysis) endT=findEndDate(analysis) print "date diff", dateDiff(startT, endT) extend(children, analysis, startT, endT) addSleepChart(children, analysis) computeRateNtTr(analysis) compBigImprov(analysis) computeCompleteDevRate(analysis) userReport(analysis) #searchMissingUsers(analysis) addFeatureForML(children, analysis) computeCorrCoef(analysis) return children, analysis, startT, endT
def dataForInitialAnalysis(children, analysis): #generates data for machine learning with "initial" data (prior history on sleep disorder) provided from the users index=analysis.keys() random.shuffle(index) #print "Does shuffle work", index #collect data in machine learning format X=[] Y=[] for i in index: init=children[i].initial if analysis[i].start_date: s_date=dateDiff(start, analysis[i].start_date) #print i, s_date, analysis[i].start_date X.append([init.month_count, init.night_count, s_date]) Y.append(len(analysis[i].use)) X=numpy.array(X) Y=numpy.array(Y) print 'mean', numpy.mean(X[:,0]) print 'sdt', numpy.std(X[:,0]) print 'sample X', X[:10] plotInitData(X,Y) return index, X, Y
def extend(children, analysis, start, end): #adds additional fields to analysisInfo() #such as time_for_bed (vector), event1_time (vector), dist_event, values for dev for ch in children.values(): cid=ch.child_id for date, d in sorted(ch.dates.items()): t_bed=d.time_for_bed t_asleep=d.event1_time if len(t_bed)>0 or len(t_asleep)>0: if len(t_bed)>0: analysis[cid].use.append(dateDiff(start, t_bed[0])) else: analysis[cid].use.append(dateDiff(start, t_asleep[0])) #print "gg", analysis[cid].use dst_event=d.dev_time_cu num_tr=len(dst_event) if num_tr>0: if len(t_bed)>0: analysis[cid].dist_event.append([dateDiff(start, t_bed[0]), num_tr]) elif len(t_asleep)>0: analysis[cid].dist_event.append([dateDiff(start, t_asleep[0]), num_tr]) else: dev_d=dtToStr((strToDT(dst_event[0])-datetime.timedelta(hours=8))) #print '???', dst_event analysis[cid].dist_event.append([dateDiff(start, dev_d), num_tr]) #print 'device info:', d.device if d.device is not None: for r in d.device.react: device_times=d.device.device_start_stop #print device_times num=len(device_times) if num>0: for times in device_times: #print times if len(t_bed)>0: analysis[cid].dev[r].append([dateDiff(start, t_bed[0]),num ]) elif len(t_asleep)>0: analysis[cid].dev[r].append([dateDiff(start, t_asleep[0]),num ]) else: t_device=dtToStr((strToDT(device_times[0][0])-datetime.timedelta(hours=8))) analysis[cid].dev[r].append([dateDiff(start, t_device),num ]) if len(d.device.react)==0: device_times=d.device.device_start_stop if len(device_times)>0: r='no react' analysis[cid].dev[r]=[] #print 'device no react', device_times num=len(device_times) if num>0: for times in device_times: #print times if len(t_bed)>0: analysis[cid].dev[r].append([dateDiff(start, t_bed[0]),num ]) elif len(t_asleep)>0: analysis[cid].dev[r].append([dateDiff(start, t_asleep[0]),num ]) else: t_device=dtToStr((strToDT(device_times[0][0])-datetime.timedelta(hours=8))) analysis[cid].dev[r].append([dateDiff(start, t_device),num ])