Ejemplo n.º 1
0
def buildSingleDietExcel(subjectID):
    '''
    build diet excel for single subject, including the date, diet item and type
    '''
    file_location = 'subject_template_' + subjectID + '.xlsx'
    workbookR = xlrd.open_workbook(file_location)
    sheet = workbookR.sheet_by_index(3)

    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('sheet1')

    rowW = 0
    index = 0

    row_labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())

    for rowR in range(8, sheet.nrows):

        if sheet.cell_value(rowR, 0):

            index += 1

            dd = {}
            PList = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6']
            for key in PList:
                dd[key] = {}

            for key in dd:
                for label in row_labels:
                    dd[key][label] = 0

            temp = buildTypeIndex.build_daily_single_diet_index_with_time4DC(
                subjectID, index)

            for key in dd:
                for type in temp[key]:
                    if type in dd[key]:
                        dd[key][type] = temp[key][type]

            ws.write(rowW, 0, subjectID)
            ws.write(rowW, 1, sheet.cell_value(rowR, 0))
            ws.write(rowW, 2, str(dd['P1'].keys()))
            ws.write(rowW, 3, 'P1')
            ws.write(rowW, 4, str(dd['P1'].values()))
            ws.write(rowW, 5, 'P2')
            ws.write(rowW, 6, str(dd['P2'].values()))
            ws.write(rowW, 7, 'P3')
            ws.write(rowW, 8, str(dd['P3'].values()))
            ws.write(rowW, 9, 'P4')
            ws.write(rowW, 10, str(dd['P4'].values()))
            ws.write(rowW, 11, 'P5')
            ws.write(rowW, 12, str(dd['P5'].values()))
            ws.write(rowW, 13, 'P6')
            ws.write(rowW, 14, str(dd['P6'].values()))
            rowW += 1

    workbookW.save('diet/dietTable_' + subjectID + '_withFreqP.xls')
def buildSingleDietExcel(subjectID):
    '''
    build diet excel for single subject, including the date, diet item and type
    '''
    file_location = 'subject_template_' + subjectID + '.xlsx'
    workbookR = xlrd.open_workbook(file_location)
    sheet = workbookR.sheet_by_index(3)

    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('sheet1')

    rowW = 0
    index = 0

    row_labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())

    #    for i in range(len(row_labels)):
    #        ws.write(rowW,2+i,row_labels[i])
    #    rowW += 1

    for rowR in range(8, sheet.nrows):

        if sheet.cell_value(rowR, 0):

            ws.write(rowW, 0, subjectID)
            ws.write(rowW, 1, sheet.cell_value(rowR, 0))
            index += 1

            dd = {}
            for label in row_labels:
                dd[label] = 0

            temp = buildTypeIndex.build_daily_single_diet_index_with_time4DC(
                subjectID, index)

            for key in temp:
                for type in temp[key]:
                    dd[type] += temp[key][type]

            for i in range(len(row_labels)):
                ws.write(rowW, 2 + i, dd[row_labels[i]])

            rowW += 1

    workbookW.save('diet/dietTable_' + subjectID + '_withFreq4DC.xls')
def buildSingleActExcel(subjectID):
    '''
    build activity excel for single subject, including the date, activity item and type
    '''
    file_location = 'subject_template_' + subjectID + '.xlsx'
    workbookR = xlrd.open_workbook(file_location)
    sheet = workbookR.sheet_by_index(3)

    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('sheet1')

    rowW = 0
    index = 0

    row_labels = utilise.itemDict2list(dataGen4DietAct.genActTypeDict())

    for rowR in range(8, sheet.nrows):

        if sheet.cell_value(rowR, 0):

            ws.write(rowW, 0, subjectID)
            ws.write(rowW, 1, sheet.cell_value(rowR, 0))
            index += 1

            dd = {}
            for label in row_labels:
                dd[label] = 0

            for line in open(
                    'activity/activityTypeFreq/activityType_frequency_' +
                    subjectID + '_' + str(index) + '.txt', 'r'):
                line = line.strip('\n')
                words = wordpunct_tokenize(line)
                if words[0] in dd:
                    dd[words[0]] = int(words[1])

            # sorted_dd =
            ws.write(rowW, 2, str(dd.keys()))
            ws.write(rowW, 3, str(dd.values()))
            rowW += 1

    workbookW.save('activity/activityTable_' + subjectID + '_withFreq.xls')
def buildDietWithSleepExcel():
    '''
    build diet excel for all the subjects, including the date, activity item and type, sleep
    '''
    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('sheet1')

    row_labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())
    titles = ['SubjId', 'Day'] + row_labels + [
        'Morningness', 'Eveningness', 'Lark', 'Owl', 'HoursSleep',
        'SleepMoveCount', 'SleepQuality', 'MedianHR', 'MedianBefore',
        'MedianHRAfter', 'age', 'gender', 'height', 'weight', 'BMI',
        'FatFreeMass', 'FatMass', 'PercFat', 'vo2max'
    ]

    for i in range(len(titles)):
        ws.write(0, i, titles[i])

    rowW = 1
    file_location1 = 'diet/dietTable_withFreq4DC.xls'
    workbookR1 = xlrd.open_workbook(file_location1)
    sheet1 = workbookR1.sheet_by_index(0)

    file_location2 = 'allSubjectsSleepDatamatrix.xls'
    workbookR2 = xlrd.open_workbook(file_location2)
    sheet2 = workbookR2.sheet_by_index(0)

    for rowRDiet in range(0, sheet1.nrows):
        for rowRSlp in range(1, sheet2.nrows):
            sub = unicode(int(sheet2.cell_value(rowRSlp, 0)))
            sub = '0' + sub
            # print sub
            if sheet1.cell_value(rowRDiet, 0) == sub:
                if sheet1.cell_value(rowRDiet,
                                     1) == sheet2.cell_value(rowRSlp, 1):

                    if rowRSlp < sheet2.nrows - 1:
                        if sheet2.cell_value(rowRSlp, 1) == sheet2.cell_value(
                                rowRSlp + 1, 1):
                            day = sheet2.cell_value(rowRSlp, 1)
                            temp = int(day.split('.')[1]) - 1
                            day = day.split('.')[0] + '.' + str(
                                temp) + '.' + day.split('.')[2]

                            if rowRDiet >= 1:
                                dd = sheet1.cell_value(rowRDiet - 1, 1)
                                temp = int(dd.split('.')[1])
                                dd = dd.split('.')[0] + '.' + str(
                                    temp) + '.' + dd.split('.')[2]
                                if dd == day:
                                    for i in range(2, 14):
                                        ws.write(
                                            rowW, i,
                                            sheet1.cell_value(rowRDiet - 1, i))
                                else:
                                    break
                            else:
                                break
                        else:
                            day = sheet2.cell_value(rowRSlp, 1)
                            temp = int(day.split('.')[1])
                            day = day.split('.')[0] + '.' + str(
                                temp) + '.' + day.split('.')[2]
                            for i in range(2, 14):
                                ws.write(rowW, i,
                                         sheet1.cell_value(rowRDiet, i))
                    else:
                        day = sheet2.cell_value(rowRSlp, 1)
                        temp = int(day.split('.')[1])
                        day = day.split('.')[0] + '.' + str(
                            temp) + '.' + day.split('.')[2]
                        for i in range(2, 14):
                            ws.write(rowW, i, sheet1.cell_value(rowRDiet, i))

                    ws.write(rowW, 0, sub)
                    ws.write(rowW, 1, day)
                    for i in range(14, 32):
                        ws.write(rowW, i, sheet2.cell_value(rowRSlp, i - 9))

                    rowW += 1

    workbookW.save('diet/dietTableWithSleep_withFreq4DC.xls')
def buildSubAveInfo():
    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('AveInfo')

    groupAct = dietActInfoRetrv.getGroups(labelsActType)
    groupDiet = dietActInfoRetrv.getGroups(labelsDietType)

    Age, Gender, Height, Weight, BMI, FatFree, FatMass, PercFat, Vo2max = slpInfoRetrv.getDemoGInfo(
    )
    SlpHours = slpInfoRetrv.getSlpHours()
    MedianHR = slpInfoRetrv.getMedianHR()
    MedianHRBefore = slpInfoRetrv.getMedianHRBefore()
    MedianHRAfter = slpInfoRetrv.getMedianHRAfter()

    titles = [
        'SubjId', 'ActGroup', 'DietGroup', 'HoursSleep', 'MedianHR',
        'MedianHRBefore', 'MedianHRAfter', 'age', 'gender', 'height', 'weight',
        'BMI', 'FatFreeMass', 'FatMass', 'PercFat', 'vo2max'
    ]

    for i in range(len(titles)):
        ws.write(0, i, titles[i])

    rowW = 1

    for index in range(len(sleep_list)):
        ws.write(rowW, 0, sleep_list[index])

        for key in groupAct:
            if sleep_list[index] in groupAct[key]:
                ws.write(rowW, 1, key)
                break

        for key in groupDiet:
            if sleep_list[index] in groupDiet[key]:
                ws.write(rowW, 2, key)
                break

        ws.write(rowW, 1 + 2, SlpHours[index])
        ws.write(rowW, 2 + 2, MedianHR[index])
        ws.write(rowW, 3 + 2, MedianHRBefore[index])
        ws.write(rowW, 4 + 2, MedianHRAfter[index])
        ws.write(rowW, 5 + 2, Age[index])
        ws.write(rowW, 6 + 2, Gender[index])
        ws.write(rowW, 7 + 2, Height[index])
        ws.write(rowW, 8 + 2, Weight[index])
        ws.write(rowW, 9 + 2, BMI[index])
        ws.write(rowW, 10 + 2, FatFree[index])
        ws.write(rowW, 11 + 2, FatMass[index])
        ws.write(rowW, 12 + 2, PercFat[index])
        ws.write(rowW, 13 + 2, Vo2max[index])
        rowW += 1

    ws2 = workbookW.add_sheet('DietTF')

    row_labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())

    X = utilise.normArray(dataGen4DietAct.genDietTypeTFArray())

    ws2.write(0, 0, 'SubjId')
    ws2.write(0, 1, 'DietGroup')

    for i in range(len(row_labels)):
        ws2.write(0, i + 2, row_labels[i])

    rowW = 1
    for index in range(len(available_list)):
        ws2.write(rowW, 0, available_list[index])

        for key in groupDiet:
            if available_list[index] in groupDiet[key]:
                ws2.write(rowW, 1, key)
                break

        for i in range(len(row_labels)):
            ws2.write(rowW, i + 2, X[index][i])

        rowW += 1

    ws3 = workbookW.add_sheet('ActTF')

    row_labels = utilise.itemDict2list(dataGen4DietAct.genActTypeDict())

    X = utilise.normArray(dataGen4DietAct.genActTypeTFArray())

    ws3.write(0, 0, 'SubjId')
    ws3.write(0, 1, 'ActGroup')

    for i in range(len(row_labels)):
        ws3.write(0, i + 2, row_labels[i])

    rowW = 1
    for index in range(len(available_list)):
        ws3.write(rowW, 0, available_list[index])

        for key in groupAct:
            if available_list[index] in groupAct[key]:
                ws3.write(rowW, 1, key)
                break

        for i in range(len(row_labels)):
            ws3.write(rowW, i + 2, X[index][i])

        rowW += 1

    workbookW.save('SubAveInfo.xls')
Ejemplo n.º 6
0
def visDailyPatternStack():

    for sub in available_list:
        d = dataGen4DietAct.genDailySingleActTypeTFArray(sub)

        #        for i in range(d.shape[0]):
        #            for j in range(d.shape[1]):
        #                if d[i][j] > 1:
        #                    d[i][j] = 1
        labels = utilise.itemDict2list(dataGen4DietAct.genActTypeDict())

        #        x = np.arange(d.shape[0])
        #        plt.figure()
        #        plt.stackplot(x,d[:,0],d[:,1],d[:,2],d[:,3],d[:,4],d[:,5],d[:,6],d[:,7])
        #        plt.title('DailyActivityPattern_'+sub)
        #        plt.xlabel('days')
        #        plt.savefig('visDailyActTypePattStack/DailyActivityPattern_'+sub)

        #        plt.figure()
        #        x = np.arange(d.shape[0])
        #        data = np.array([d[:,0],d[:,1],d[:,2],d[:,3],d[:,4],d[:,5],d[:,6],d[:,7]])
        #        bottom = np.cumsum(data, axis=0)
        #        colors = ('#ff3333', '#33ff33', '#3333ff', '#33ffff','#ff3333', '#33ff33', '#3333ff', '#33ffff')
        #        plt.bar(x, data[0], color=colors[0])
        #        for j in xrange(1, data.shape[0]):
        #            plt.bar(x, data[1], color=colors[j], bottom=bottom[j-1])

        #        colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
        colors = plt.cm.Paired

        df = pd.DataFrame(d, columns=labels)
        ax = df.plot.bar(colormap=colors, stacked=True)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
        plt.title('DailyActivityPattern_' + sub)
        plt.xlabel('days')
        plt.ylabel('frequency per day')
        data = dietActInfoRetrv.getDaysList(sub)
        ax.set_xticklabels(data)
        plt.savefig('visDailyActTypePattStack/DailyActivityPattern_' + sub,
                    bbox_inches='tight')

    for sub in available_list:
        d = dataGen4DietAct.genDailySingleDietTypeTFArray(sub)

        #        for i in range(d.shape[0]):
        #            for j in range(d.shape[1]):
        #                if d[i][j] > 1:
        #                    d[i][j] = 1
        labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())

        #        x = np.arange(d.shape[0])
        #        plt.figure()
        #        plt.stackplot(x,d[:,0],d[:,1],d[:,2],d[:,3],d[:,4],d[:,5],d[:,6],d[:,7],d[:,8],d[:,9],d[:,10],d[:,11])
        #        plt.title('DailyDietPattern_'+sub)
        #        plt.xlabel('days')
        #        plt.savefig('visDailyDietTypePattStack/DailyDietPattern_'+sub)

        colors = plt.cm.Paired

        df = pd.DataFrame(d, columns=labels)
        ax = df.plot.bar(colormap=colors, stacked=True)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
        #        df.plot.area()
        plt.title('DailyDietPattern_' + sub)
        plt.ylabel('frequency per day')
        plt.xlabel('days')
        data = dietActInfoRetrv.getDaysList(sub)
        ax.set_xticklabels(data)
        plt.savefig('visDailyDietTypePattStack/DailyDietPattern_' + sub,
                    bbox_inches='tight')
def bestLabel(labelsDietType,labelsActType):

    workbookW = xlwt.Workbook()
    ws = workbookW.add_sheet('sheet1')
    rowW = 0

    for domain in Domain:
        if domain == 'DietType':
            labels = utilise.string2array(labelsDietType) 
            row_labels = utilise.itemDict2list(dataGen4DietAct.genDietTypeDict())
            X = dataGen4DietAct.genDietTypeTFArray()
        elif domain == 'ActType':
            labels = utilise.string2array(labelsActType)
            row_labels = utilise.itemDict2list(dataGen4DietAct.genActTypeDict())
            X = dataGen4DietAct.genActTypeTFArray()
        X = utilise.normArray(X)
        
        
        # write the lables to excel file  
        col = 0
        for label in row_labels:
            ws.write(rowW,col,label)
            col += 1 
        rowW += 1 
        
        # print type(labels)
        plt.figure()
        
        n_clusters = np.max(labels) + 1 
        
        for k in range(n_clusters):
            class_members = labels == k
            group = [] 
            for x in X[class_members]:
                group.append(x)
            group = np.array(group)
            
            meanVec = np.mean(group,axis=0)
            meanVec.tolist()
            stdVec = np.std(group,axis=0)
            stdVec.tolist() 
            
            # write the mean vector of each group to excel file 
            col = 0
            for value in meanVec:
                ws.write(rowW,col,value)
                col += 1 
            rowW += 1 
            # print meanVec 
            
            # we don't have to do normalization here, as the input X has already been normalized 
            # totalSum = np.sum(meanVec[0])
            # print totalSum
            # meanVec = meanVec/totalSum
            
            # # normalize the meanVec 
            # firstMax = np.max(meanVec)
            # meanVec = meanVec/firstMax
            
            firstMax = np.max(meanVec)
            # print firstMax
            tempVec = np.copy(meanVec)
            for j in range(X.shape[1]):
                if tempVec[j] == firstMax:
                    tempVec[j] = 0
            secondMax = np.max(tempVec)
            # print secondMax
            tempVec2 = np.copy(tempVec)
            for j in range(X.shape[1]):
                if tempVec2[j]==secondMax:
                    tempVec2[j] = 0
            thirdMax = np.max(tempVec2)
            # print thirdMax

            
            x = range(X.shape[1])
            plt.plot(x,meanVec)
            # print meanVec
            for j in range(X.shape[1]):
                # if meanVec[j] == firstMax:
                # if meanVec[j] == firstMax or meanVec[j] == secondMax:
                if meanVec[j] == firstMax or meanVec[j] == secondMax or meanVec[j] == thirdMax:
                    print k,domain,n_clusters,meanVec[j],row_labels[j]
                    plt.text(x[j],meanVec[j],row_labels[j])

        # print row_labels
        # plt.xlabel(row_labels)
        plt.title(domain+'_TF_KMeans_'+str(n_clusters))
        plt.savefig('visClustering'+domain+'Pattern/KMeans__TF_'+str(n_clusters)+'_groupFreq')
    
    workbookW.save('tempLabels.xls')
def HC(domain, para):
    if para in Metric:
        if para == 'TF':
            if domain == 'DietItem':
                X = dataGen4DietAct.genDietItemTFArray()
            elif domain == 'ActItem':
                X = dataGen4DietAct.genActItemTFArray()
            elif domain == 'DietType':
                X = dataGen4DietAct.genDietTypeTFArray()
            elif domain == 'ActType':
                X = dataGen4DietAct.genActTypeTFArray()
        elif para == 'TFIDF':
            if domain == 'DietItem':
                X = dataGen4DietAct.DietItemTfidfArray()
            elif domain == 'ActItem':
                X = dataGen4DietAct.ActItemTfidfArray()
            elif domain == 'DietType':
                X = dataGen4DietAct.DietTypeTfidfArray()
            elif domain == 'ActType':
                X = dataGen4DietAct.ActTypeTfidfArray()
        X = utilise.normArray(X)

    if para in Sim:
        Similarity_dict = {}
        if domain == 'DietItem':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'ActItem':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'DietType':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        elif domain == 'ActType':
            Similarity_dict = utilise.SimilarityDict(domain, para)
        X = visSimilarityMat.similarityDict2array(Similarity_dict, 0)

    # method can be ward, complete, average
    method = 'ward'
    row_method = method
    row_metric = 'euclidean'
    column_method = method
    column_metric = 'euclidean'

    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html
    # d1 = ssd.pdist(X,'cosine')
    d1 = ssd.pdist(X)
    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform
    D1 = ssd.squareform(d1)  # full matrix
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    Y1 = sch.linkage(D1, method=row_method, metric=row_metric)
    row_idxing = sch.leaves_list(Y1)

    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.pdist.html
    d2 = ssd.pdist(X.T)
    # http://docs.scipy.org/doc/scipy-0.16.0/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform
    D2 = ssd.squareform(d2)
    # http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage
    Y2 = sch.linkage(D2, method=column_method, metric=column_metric)
    col_idxing = sch.leaves_list(Y2)

    heatmap_array = X[:, col_idxing][
        row_idxing, :]  #a numpy.ndarray or numpy.matrix, for this example, let's say mxn array
    top_dendrogram = Y2  #a (n-1) x 4 array
    side_dendrogram = Y1  #a (m-1) x 4 array

    row_labels = range(X.shape[0])
    if para in Sim:
        col_labels = range(X.shape[1])
    if para in Metric:
        if domain == 'DietItem':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genDietItemDict())
        elif domain == 'ActItem':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genActItemDict())
        elif domain == 'DietType':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genDietTypeDict())
        elif domain == 'ActType':
            col_labels = utilise.itemDict2list(
                dataGen4DietAct.genActTypeDict())
    col_idxing = list(col_idxing)
    row_idxing = list(row_idxing)
    print col_idxing

    new_row_labels = []
    new_col_labels = []
    for i in range(len(row_idxing)):
        new_row_labels.append(str(row_labels[row_idxing[i]]))
    for j in range(len(col_idxing)):
        new_col_labels.append(str(col_labels[col_idxing[j]]))

    heatmap = pdh.DendroHeatMap(heat_map_data=heatmap_array,
                                left_dendrogram=side_dendrogram,
                                top_dendrogram=top_dendrogram)
    heatmap.title = 'HC_' + domain + '_' + para + '_' + method
    heatmap.row_labels = new_row_labels
    heatmap.col_labels = new_col_labels

    # heatmap.show()
    heatmap.export('VisClustering' + domain + 'Pattern/Hierarchy_' + para +
                   '_' + method + '.png')