file.write(str(matrix[i][featNo])+" ") file.write("\n") return 0 previousRecord = [4,5] hotelProp = [8,9,10,11,12,13,15,16] typeOftrip = [18,19,20,21,22,23] etc = [24,25] comp = range(27,51) relativeFreq = [54,55,56,57] oneHotHotel = range(58,110) sumComp = [110,111,112] ratio = range(113,117) allData = hh.getCSVmatrix("smalltrainOld&New2",100000) trainData = allData[:3*(len(allData))/4] evalData = allData[3*(len(allData))/4::] print len(trainData) print len(evalData) trainData = hh.convertMatrixtoFloat(trainData) evalData = hh.convertMatrixtoFloat(evalData) writeForSVMRank('train',trainData) writeForSVMRank('test',evalData)
step1Output = "smalltestProcessedStep1" step2Output = "smalltestRelFreq" step3_1Output = "smalltestProcessedStep3" step3_2Output = "smalltestRatios" step4_FinalOutput = "smalltestOld&New2" step1 = 0 step2 = 0 step3 = 0 step4 = 0 step5 = 0 step6 = 0 if (step1 == 1): print "Step1 - Fill Null" allData = hh.getCSVmatrix(firstInput,totalRows) searchIDs = hh.getColumn(allData,0) #Date_time - Get weeks is [[0,..0,1,0],[0,..1,0,0]... ,[1,0,..]] date = hh.getColumn(allData,1) booking = np.array(hh.getColumn(allData,19)) date = hh.getColumn(allData,1) for i in range(len(date)): time = date[i] time = (time.split())[0] month = int(time[5:7]) day = int(time[-11:-9]) dictMonth = {1:31,2:59,3:90,4:120,5:151,6:181,7:212,8:243,9:273,10:304,11:334,12:365} num_week = (dictMonth[month] + day+ int(booking[i]) ) if num_week>=365: num_week = num_week%365 num_week = num_week//7