Example #1
0
def extractDataFilesAndFeatures(initialFolderpath,finalFolderPath, resultFolderPath):
    csvTextualFeature = csvLineMaker(getTextualHeaderFeatures())+'\n'
    csvTemporalFeature = csvLineMaker(getTemporalHeaderFeatures())+'\n'
    csvClassFeature = csvLineMaker(getClassHeaderFeatures())+'\n'
    fileNames = getFilesFromFolder(initialFolderpath)
    for fileName in fileNames: 
        content, textFeatures, temporalFeatures = extractTextAndFeatures(initialFolderpath+'/'+fileName)
        info = fileName.split('.')
        userID = str(info[0])
        gender = str(info[1])
        age = str(info[2])
        ageClass = str(extractAgeClass(float(age)))
        job = str(info[3])
        place = str(info[4])
        saveFile(finalFolderPath+'/'+userID+'.txt', content)
        csvTextLine= csvLineMaker(textFeatures)
        classFeatures = []
        classFeatures.append(userID)
        classFeatures.append(age)
        classFeatures.append(gender)
        classFeatures.append(job)
        classFeatures.append(place)
        classFeatures.append(ageClass)
        csvClassLine = csvLineMaker(classFeatures)
        csvTemporalLine= csvLineMaker(temporalFeatures)
        csvClassFeature = csvClassFeature+csvClassLine+'\n'
        csvTextualFeature = csvTextualFeature+csvTextLine+'\n'
        csvTemporalFeature = csvTemporalFeature+csvTemporalLine+'\n'
    saveFile(resultFolderPath+'/textualFeature.csv', csvTextualFeature)  
    saveFile(resultFolderPath+'/temporalFeature.csv', csvTemporalFeature)
    saveFile(resultFolderPath+'/classFeature.csv', csvClassFeature)

    
    
def tenFoldSplitCSV(src_csv, dst_path):
	with open(src_csv, 'rb') as csvfile:
		if not os.path.exists(dst_path):
			os.makedirs(dst_path)
		length = 0
		lines = []
		reader = csv.reader(csvfile, delimiter=',')
		for row in reader:
			lines.append(row)
			length+=1
		kf = KFold(length, n_folds = 10)
		dir_iter = 1
		for train, test in kf:
			foldcsv = "Train\n"			
			for train_index in train:
				foldcsv = foldcsv + csvLineMaker(lines[train_index]) + "\n"
			foldcsv = foldcsv + "Test\n"
			for test_index in test:
				foldcsv = foldcsv + csvLineMaker(lines[train_index]) + "\n"
			saveFile(dst_path + "fold" + str(dir_iter) + ".csv", foldcsv)
			dir_iter+=1