def extractDataFilesAndFeatures(initialFolderpath,finalFolderPath, resultFolderPath): csvTextualFeature = csvLineMaker(getTextualHeaderFeatures())+'\n' csvTemporalFeature = csvLineMaker(getTemporalHeaderFeatures())+'\n' csvClassFeature = csvLineMaker(getClassHeaderFeatures())+'\n' fileNames = getFilesFromFolder(initialFolderpath) for fileName in fileNames: content, textFeatures, temporalFeatures = extractTextAndFeatures(initialFolderpath+'/'+fileName) info = fileName.split('.') userID = str(info[0]) gender = str(info[1]) age = str(info[2]) ageClass = str(extractAgeClass(float(age))) job = str(info[3]) place = str(info[4]) saveFile(finalFolderPath+'/'+userID+'.txt', content) csvTextLine= csvLineMaker(textFeatures) classFeatures = [] classFeatures.append(userID) classFeatures.append(age) classFeatures.append(gender) classFeatures.append(job) classFeatures.append(place) classFeatures.append(ageClass) csvClassLine = csvLineMaker(classFeatures) csvTemporalLine= csvLineMaker(temporalFeatures) csvClassFeature = csvClassFeature+csvClassLine+'\n' csvTextualFeature = csvTextualFeature+csvTextLine+'\n' csvTemporalFeature = csvTemporalFeature+csvTemporalLine+'\n' saveFile(resultFolderPath+'/textualFeature.csv', csvTextualFeature) saveFile(resultFolderPath+'/temporalFeature.csv', csvTemporalFeature) saveFile(resultFolderPath+'/classFeature.csv', csvClassFeature)
def tenFoldSplitCSV(src_csv, dst_path): with open(src_csv, 'rb') as csvfile: if not os.path.exists(dst_path): os.makedirs(dst_path) length = 0 lines = [] reader = csv.reader(csvfile, delimiter=',') for row in reader: lines.append(row) length+=1 kf = KFold(length, n_folds = 10) dir_iter = 1 for train, test in kf: foldcsv = "Train\n" for train_index in train: foldcsv = foldcsv + csvLineMaker(lines[train_index]) + "\n" foldcsv = foldcsv + "Test\n" for test_index in test: foldcsv = foldcsv + csvLineMaker(lines[train_index]) + "\n" saveFile(dst_path + "fold" + str(dir_iter) + ".csv", foldcsv) dir_iter+=1