step2Output = "smalltestRelFreq" step3_1Output = "smalltestProcessedStep3" step3_2Output = "smalltestRatios" step4_FinalOutput = "smalltestOld&New2" step1 = 0 step2 = 0 step3 = 0 step4 = 0 step5 = 0 step6 = 0 if (step1 == 1): print "Step1 - Fill Null" allData = hh.getCSVmatrix(firstInput,totalRows) searchIDs = hh.getColumn(allData,0) #Date_time - Get weeks is [[0,..0,1,0],[0,..1,0,0]... ,[1,0,..]] date = hh.getColumn(allData,1) booking = np.array(hh.getColumn(allData,19)) date = hh.getColumn(allData,1) for i in range(len(date)): time = date[i] time = (time.split())[0] month = int(time[5:7]) day = int(time[-11:-9]) dictMonth = {1:31,2:59,3:90,4:120,5:151,6:181,7:212,8:243,9:273,10:304,11:334,12:365} num_week = (dictMonth[month] + day+ int(booking[i]) ) if num_week>=365: num_week = num_week%365 num_week = num_week//7 if num_week > 51:
# print "Accuracy:", (acB + acIg + acClic) / 3 print "Accuracy:", (acB + acIg) / 2 #Get probability of each point allCheck = [] refinedScore = [] propResult = clf.decision_function(evalData) for i in range(len(result)): #Times (label+1) * max(value of the probability) refinedScore.append((result[i]+1)*max(propResult[i])) result = propResult #Get searchIDs searchID = hh.getColumn(evalData,0) #Put Truelabels, predicted labels(weighted with probability) back into list per searchID results = hh.splitColumnsForEachID(searchID, result) labels = hh.splitColumnsForEachID(searchID, labelEval) #Put our predicted labels and true label in a tuple #Rank the predicted labels ourRank = [] for i in range(len(results)): rank = [] for j in range(len(results[i])): rank.append((results[i][j], labels[i][j])) rank = sorted(rank, key=itemgetter(0)) ourRank.append(rank)