예제 #1
0
def extractSingleDailyActDiet(subjectID):
    file_location = 'subject_template_' + subjectID + '.xlsx'
    workbook = xlrd.open_workbook(file_location)
    sheet = workbook.sheet_by_index(3)
    duration = dietActInfoRetrv.getDuration(subjectID)
    for n in range(1, duration + 1):
        f_act = open(
            'activity/activityFromExcel/activity_' + subjectID + '_' + str(n) +
            '.txt', 'w')
        f_diet = open(
            'diet/dietFromExcel/diet_' + subjectID + '_' + str(n) + '.txt',
            'w')
        f_act.close()
        f_diet.close()

    count = 0
    for row in range(8, sheet.nrows):
        if sheet.cell_value(row, 0):
            count += 1
        if sheet.cell_value(row, 3):
            temp = str(sheet.cell_value(row, 3).encode('utf-8'))
            f_act = open(
                'activity/activityFromExcel/activity_' + subjectID + '_' +
                str(count) + '.txt', 'a')
            f_act.write(temp)
            f_act.write('\n')
            f_act.close()
        if sheet.cell_value(row, 4):
            temp = str(sheet.cell_value(row, 4))
            f_diet = open(
                'diet/dietFromExcel/diet_' + subjectID + '_' + str(count) +
                '.txt', 'a')
            f_diet.write(temp)
            f_diet.write('\n')
            f_diet.close()
예제 #2
0
def buildDailyItemFreqTXTFile():
	'''
	write daily diet and activity index of each subject into txt files 
	'''
	print 'in buildDailyItemFreqTXTFile()'
	for subjectID in available_list:
		# print subjectID
		duration = dietActInfoRetrv.getDuration(subjectID)
		# print duration 
		for n in range(1,duration+1):
			f_act = open('activity/activityItemFreq/activity_frequency_'+subjectID+'_'+str(n)+'.txt','w')
			f_diet = open('diet/dietItemFreq/diet_frequency_'+subjectID+'_'+str(n)+'.txt','w')
			index_act = buildItemIndex.build_daily_single_activity_index(subjectID,n)
			index_diet = buildItemIndex.build_daily_single_diet_index(subjectID,n)
			# print index_act
			# print index_diet
			
			for key in index_act:
				f_act.write("%-25s%-10s"%(key,index_act[key]))
				f_act.write('\n')
			for key in index_diet:
				f_diet.write("%-25s%-10s"%(key,index_diet[key]))
				f_diet.write('\n')
			
			f_act.close()
			f_diet.close()
def buildDailySingleActTypeFreqFile():
	
	for subjectID in available_list:
		duration = dietActInfoRetrv.getDuration(subjectID)
		
		for n in range(1,duration+1):
			f_act = open('activity/activityTypeFreq/activityType_frequency_'+subjectID+'_'+str(n)+'.txt','w')
			singleActType_dict = buildTypeIndex.build_daily_single_activity_index(subjectID,n)
			for key in singleActType_dict:
				f_act.write("%-25s%-10s"%(key,singleActType_dict[key]))
				f_act.write('\n')
			f_act.close()
예제 #4
0
def genDailyActTypeDataSet():
	dataset = [] 

	for subjectid in available_list:
		duration = dietActInfoRetrv.getDuration(subjectid)
		for i in range(duration):
			indexDict = buildTypeIndex.build_daily_single_activity_index(subjectid,i+1)
			temp = tuple(indexDict)
			dataset.append(temp)

	dataset = tuple(dataset)
	print len(dataset)
	return dataset
예제 #5
0
def genDailyDietTypeDataSet():
	dataset = [] 

	for subjectid in available_list:
		duration = dietActInfoRetrv.getDuration(subjectid)
		for i in range(duration):
			indexDict = buildTypeIndex.build_daily_single_diet_index(subjectid,i+1)
			if 'compositeP' in indexDict: del indexDict['compositeP']
			temp = tuple(indexDict)
			dataset.append(temp)

	dataset = tuple(dataset)
	print len(dataset)
	return dataset
예제 #6
0
def preprocessDailyDiaryWithTime():
    print 'in preprocessDailyDiaryWithTime()'
    for subjectID in available_list:
        print subjectID
        duration = dietActInfoRetrv.getDuration(subjectID)
        for n in range(1, duration + 1):
            preprocessingWithTime(
                'activity/activityFromExcel/activity_' + subjectID + '_' +
                str(n) + '_with_time.txt',
                'activity/activityProcessed/processed_activity_' + subjectID +
                '_' + str(n) + '_with_time.txt')
            preprocessingWithTime(
                'diet/dietFromExcel/diet_' + subjectID + '_' + str(n) +
                '_with_time.txt', 'diet/dietProcessed/processed_diet_' +
                subjectID + '_' + str(n) + '_with_time.txt')
예제 #7
0
def genDailySingleActTypeTFArray(subjectID):

    item_dict = genActTypeDict()
    duration = dietActInfoRetrv.getDuration(subjectID)
    n = len(item_dict)
    dims = (duration, n)
    array = np.zeros(dims)

    for i in range(duration):
        ItemIndex = buildTypeIndex.build_daily_single_activity_index(
            subjectID, i + 1)
        for key in item_dict:
            if item_dict[key] in ItemIndex:
                # print item_dict[key]
                array[i, key] = ItemIndex[item_dict[key]]
    return array
예제 #8
0
def getDietTypeTFArray4DC():
    type_dict = dataGen4DietAct.genDietTypeDict()
    x = len(available_list)
    n = len(type_dict)
    array = np.zeros((x,n))

    i = 0
    for subjectID in available_list:
        duration = dietActInfoRetrv.getDuration(subjectID)
        
        for n in range(1,duration+1):
            dictWithTime = buildTypeIndex.build_daily_single_diet_index_with_time4DC(subjectID,n)
            for time in dictWithTime:
                for key in type_dict:
                    if type_dict[key] in dictWithTime[time]:
                        array[i,key] += dictWithTime[time][type_dict[key]] 
        i += 1 
    return array 
예제 #9
0
def genDailyActDietTypeDataSet():
    dataset = [] 

    for subjectid in available_list:
        duration = dietActInfoRetrv.getDuration(subjectid)
        for i in range(duration):
            indexDict1 = buildTypeIndex.build_daily_single_activity_index(subjectid,i+1)
            if 'others' in indexDict1: del indexDict1['others']
            temp1 = tuple(indexDict1)
            indexDict2 = buildTypeIndex.build_daily_single_diet_index(subjectid,i+1)
            if 'compositeP' in indexDict2: del indexDict2['compositeP']
            temp2 = tuple(indexDict2)
            temp = temp1+temp2
            dataset.append(temp)

    dataset = tuple(dataset)
    print len(dataset)
    return dataset
def singleSubjectDailyArray(domain, subjectID):
    '''
	build daily item TFIDF normalization array 
	'''
    if domain == 'ActItem':
        item_dict = dataGen4DietAct.genActItemDict()
    elif domain == 'DietItem':
        item_dict = dataGen4DietAct.genDietItemDict()

    duration = dietActInfoRetrv.getDuration(subjectID)
    x = duration
    n = len(item_dict)
    dims = (x, n)
    array = np.zeros(dims)

    if domain == 'ActItem':
        for i in range(duration):
            ItemIndex = buildItemIndex.build_daily_single_activity_index(
                subjectID, i + 1)
            for key in item_dict:
                if "'" + item_dict[key] + "'" in ItemIndex:
                    array[i, key] = ItemIndex["'" + item_dict[key] + "'"]
    if domain == 'DietItem':
        for i in range(duration):
            ItemIndex = buildItemIndex.build_daily_single_diet_index(
                subjectID, i + 1)
            for key in item_dict:
                if "'" + item_dict[key] + "'" in ItemIndex:
                    array[i, key] = ItemIndex["'" + item_dict[key] + "'"]

    transformer = TfidfTransformer(norm=None)
    tfidf = transformer.fit_transform(array)
    aa = tfidf.toarray()
    tfidfNorm = utilise.normArray(aa)
    print tfidfNorm.shape
    return tfidfNorm
예제 #11
0
def singleSubjectDailyArray(domain,subjectID):
    '''
    build daily item TF array 
    '''
    if domain == 'DietType':
        item_dict = dataGen4DietAct.genDietTypeDict()
    elif domain == 'ActType':
        item_dict = dataGen4DietAct.genActTypeDict()
    # print item_dict
    
    duration = dietActInfoRetrv.getDuration(subjectID)
    x = duration 
    n = len(item_dict)
    dims = (x,n)
    array = np.zeros(dims)
    
    if domain == 'ActItem':
        for i in range(duration):
            ItemIndex = buildItemIndex.build_daily_single_activity_index(subjectID,i+1)
            for key in item_dict:
                if item_dict[key] in ItemIndex:
                    array[i,key] = ItemIndex[item_dict[key]]
                else:
                    array[i,key] = 0.0
    
    if domain == 'DietItem':
        for i in range(duration):
            ItemIndex = buildItemIndex.build_daily_single_diet_index(subjectID,i+1)
            # print ItemIndex
            for key in item_dict:
                if item_dict[key] in ItemIndex:
                    array[i,key] = ItemIndex[item_dict[key]]
                else:
                    array[i,key] = 0.0
    
    if domain == 'DietType':
        for i in range(duration):
            ItemIndex = buildTypeIndex.build_daily_single_diet_index(subjectID,i+1)
            # print ItemIndex
            for key in item_dict:
                if item_dict[key] in ItemIndex:
                    array[i,key] = ItemIndex[item_dict[key]]
                else:
                    array[i,key] = 0.0
    
    if domain == 'ActType':
        for i in range(duration):
            ItemIndex = buildTypeIndex.build_daily_single_activity_index(subjectID,i+1)
            for key in item_dict:
                if item_dict[key] in ItemIndex:
                    array[i,key] = ItemIndex[item_dict[key]]
                else:
                    array[i,key] = 0.0
    '''
    change the TF array to TFIDF array. But the DF here is not equal to the one we use for mean Vector 
    '''
    # transformer = TfidfTransformer(norm=None)
    # tfidf = transformer.fit_transform(array)
    # aa = tfidf.toarray() 
    # tfidfNorm = utilise.normArray(aa)
    
    # result = utilise.normArray(array)
    
    # print array 
    return array