def findPairwiseDistance(featureFile, outFile): featMan = FeatureManager() featMan.readFeatures(featureFile) featDict = featMan.featureDict oFile = open(outFile, 'w') ids = featDict.keys() keys = sorted(ids) print len(keys), keys[-5:] for i in range(0, len(keys) - 1): qid1, qf1 = featMan.returnFeature(keys[i]) for j in range(i + 1, len(keys)): qid2, qf2 = featMan.returnFeature(keys[j]) qcos, ucos, userCos, sessionCos, ngramCos, entCos, \ catCos,typeCos = qf1.findCosineDistance(qf2) qjac = qf1.findJacardDistance(qf2) #qedit = qf1.findEditDistance(qf2) edgeScore = (15*((qcos + qjac )/2.0) +\ 12.5*ngramCos + 12.5*ucos + 20*sessionCos +\ 20*userCos + 10*((entCos + catCos)/2.0) + 10*typeCos)/100.0 if edgeScore > 0.0: oFile.write( #str(qid1) + ' ' + str(qid2) + ' ' + str(round(edgeScore, 3)) + '\n') featMan.returnQuery(qid1) + '\t' + featMan.returnQuery(qid2) + '\t' + str(round(edgeScore, 3)) + '\n') #oFile1.write(str(qid1)+'\t'+str(qid2)+'\t'+\ #str(round(qcos,2))+'\t'+str(round(qjac,2))+'\t'+\ #str(round(ngramCos,2))+'\t'+str(round(userCos,2))+'\t' + \ #str(round(entCos,2))+'\t'+ str(round(catCos,2))+\ #'\t'+ str(round(sessionCos,2))+'\t'+ str(round(typeCos,2))+'\n') oFile.close()
edgeScore = 1.0 - weightMatrix[qid2][qid1] if edgeScore > threshold: qcc.addEdge(qid1, qid2, edgeScore) except: pass else: print 'Query feature error ', session[i] sessCount += 1 labels = qcc.getTaskComponents() fname = args.outDir + '_'+args.algo+'_' + str(threshold) + '.txt' outFile = open(fname, 'w') for entry in labels: string = '' for qid in entry: string += featMan.returnQuery(qid) + '\t' outFile.write(string.strip() + '\n') outFile.close() predicted_same_pairs, predicted_different_pairs=\ getPairLabelsFromClusters(labels,featMan) metrics[threshold] = getRecallPrecision(samePairsSet, differentPairsSet, predicted_same_pairs, predicted_different_pairs) for tcount, met in metrics.items(): print tcount, met mergeMetrics(total_metrics_dict, metrics) computeAverageAndVarianceOfMetrics(args.algo, total_metrics_dict) #qcos, ucos, userCos, sessionCos, ngramCos, entCos, \ #catCos,typeCos = qf1.findCosineDistance(qf2) #qjac = qf1.findJacardDistance(qf2) ##qedit = qf1.findEditDistance(qf2) ##normalized distance