コード例 #1
0
ファイル: __init__.py プロジェクト: vmanisha/QueryExpansion
def findPairwiseDistance(featureFile, outFile):
  featMan = FeatureManager()

  featMan.readFeatures(featureFile)
  featDict = featMan.featureDict

  oFile = open(outFile, 'w')

  ids = featDict.keys()
  keys = sorted(ids)
  print len(keys), keys[-5:]
  for i in range(0, len(keys) - 1):
    qid1, qf1 = featMan.returnFeature(keys[i])
    for j in range(i + 1, len(keys)):
      qid2, qf2 = featMan.returnFeature(keys[j])
      qcos, ucos, userCos, sessionCos, ngramCos, entCos, \
			catCos,typeCos = qf1.findCosineDistance(qf2)
      qjac = qf1.findJacardDistance(qf2)
      #qedit = qf1.findEditDistance(qf2)
      edgeScore = (15*((qcos + qjac )/2.0) +\
			12.5*ngramCos + 12.5*ucos + 20*sessionCos +\
			20*userCos + 10*((entCos + catCos)/2.0) + 10*typeCos)/100.0
      if edgeScore > 0.0:
        oFile.write(
            #str(qid1) + ' ' + str(qid2) + ' ' + str(round(edgeScore, 3)) + '\n')
            featMan.returnQuery(qid1) + '\t' + featMan.returnQuery(qid2) + '\t' + str(round(edgeScore, 3)) + '\n')
        
        #oFile1.write(str(qid1)+'\t'+str(qid2)+'\t'+\
        #str(round(qcos,2))+'\t'+str(round(qjac,2))+'\t'+\
        #str(round(ngramCos,2))+'\t'+str(round(userCos,2))+'\t' + \
        #str(round(entCos,2))+'\t'+ str(round(catCos,2))+\
        #'\t'+ str(round(sessionCos,2))+'\t'+ str(round(typeCos,2))+'\n')
  oFile.close()
コード例 #2
0
ファイル: qccTasks.py プロジェクト: vmanisha/QueryExpansion
                  edgeScore = 1.0 - weightMatrix[qid2][qid1]
                if edgeScore > threshold:
                  qcc.addEdge(qid1, qid2, edgeScore)
              except:
                pass
        else:
            print 'Query feature error ', session[i]
      sessCount += 1
    labels = qcc.getTaskComponents()
    fname = args.outDir + '_'+args.algo+'_' + str(threshold) + '.txt'
    outFile = open(fname, 'w')

    for entry in labels:
      string = ''
      for qid in entry:
        string += featMan.returnQuery(qid) + '\t'
      outFile.write(string.strip() + '\n')
    outFile.close()
    predicted_same_pairs, predicted_different_pairs=\
     getPairLabelsFromClusters(labels,featMan)
    metrics[threshold] = getRecallPrecision(samePairsSet, differentPairsSet, predicted_same_pairs, predicted_different_pairs)
    for tcount, met in metrics.items():
      print tcount, met
    mergeMetrics(total_metrics_dict, metrics)
  computeAverageAndVarianceOfMetrics(args.algo, total_metrics_dict)

  #qcos, ucos, userCos, sessionCos, ngramCos, entCos, \
  #catCos,typeCos = qf1.findCosineDistance(qf2)
  #qjac = qf1.findJacardDistance(qf2)
  ##qedit = qf1.findEditDistance(qf2)
  ##normalized distance