Exemple #1
0
def sampleQueryPairs(fileName, weightFile, featFile):
  weightMatrix = readWeightMatrix(weightFile)
  featMan = FeatureManager()
  featMan.loadQueries(featFile)
  idDict = featMan.returnIdDict()
  qDict = featMan.returnQueryDict()

  clusters = loadClustersWithQueryFile(fileName, idDict)
  done = {}
  for entry in clusters:
    minPair = None
    maxPair = None
    minDist = 1000
    maxDist = 0
    #print entry
    if len(entry) > 3:
      sentry = sorted(entry)
      for i in range(len(sentry) - 1):
        for j in range(i + 1, len(sentry)):
          try:
            if weightMatrix[sentry[i]][sentry[j]] < minDist:
              minDist = weightMatrix[sentry[i]][sentry[j]]
              minPair = (qDict[sentry[i]], qDict[sentry[j]])
            if weightMatrix[sentry[i]][sentry[j]] > maxDist:
              maxDist = weightMatrix[sentry[i]][sentry[j]]
              maxPair = (qDict[sentry[i]], qDict[sentry[j]])
          except:
            dist = random.uniform(0.8, 1.0)
            if dist < minDist:
              minDist = dist
              minPair = (qDict[sentry[i]], qDict[sentry[j]])
            if dist > maxDist:
              maxDist = dist
              maxPair = (qDict[sentry[i]], qDict[sentry[j]])

    if minPair and minPair[0] not in done and minPair[1] not in done:
      print 'Min\t' + minPair[0] + '\t' + minPair[1]

    if maxPair and maxPair[0] not in done and maxPair[1] not in done:
      print 'Max\t' + maxPair[0] + '\t' + maxPair[1]

    if minPair:
      done[minPair[0]] = 1
      done[minPair[1]] = 1
    if maxPair:
      done[maxPair[0]] = 1
      done[maxPair[1]] = 1
Exemple #2
0
              avg_inter_ij[i] = min(avg_inter_ij[i], score)
              avg_inter_ij[j] = min(avg_inter_ij[j], score)

    fmin_i = []
    for i, mini in avg_inter_ij.items():
      #print i, vals.values()
      fmin_i.append(mini / maxDiam)

    print 'FMIN ', fmin_i
    print 'Dunn index ', min(fmin_i)


if __name__ == '__main__':
  argv = sys.argv
  lbreak = False
  weightMatrix = readWeightMatrix(argv[2])
  featMan = FeatureManager()
  featMan.loadQueries(argv[3])
  for ifile in os.listdir(argv[1]):
    clusters = loadClustersWithQueryFile(argv[1] + '/' + ifile,
                                         featMan.returnIdDict())
    print len(clusters), len(featMan.returnIdDict()), len(weightMatrix)
    #load the cluster-assignments and points

    Dunn(clusters, weightMatrix)
  #DB(clusters,weightMatrix)

  #load the weight matrix

  #load the centers