def sampleQueryPairs(fileName, weightFile, featFile): weightMatrix = readWeightMatrix(weightFile) featMan = FeatureManager() featMan.loadQueries(featFile) idDict = featMan.returnIdDict() qDict = featMan.returnQueryDict() clusters = loadClustersWithQueryFile(fileName, idDict) done = {} for entry in clusters: minPair = None maxPair = None minDist = 1000 maxDist = 0 #print entry if len(entry) > 3: sentry = sorted(entry) for i in range(len(sentry) - 1): for j in range(i + 1, len(sentry)): try: if weightMatrix[sentry[i]][sentry[j]] < minDist: minDist = weightMatrix[sentry[i]][sentry[j]] minPair = (qDict[sentry[i]], qDict[sentry[j]]) if weightMatrix[sentry[i]][sentry[j]] > maxDist: maxDist = weightMatrix[sentry[i]][sentry[j]] maxPair = (qDict[sentry[i]], qDict[sentry[j]]) except: dist = random.uniform(0.8, 1.0) if dist < minDist: minDist = dist minPair = (qDict[sentry[i]], qDict[sentry[j]]) if dist > maxDist: maxDist = dist maxPair = (qDict[sentry[i]], qDict[sentry[j]]) if minPair and minPair[0] not in done and minPair[1] not in done: print 'Min\t' + minPair[0] + '\t' + minPair[1] if maxPair and maxPair[0] not in done and maxPair[1] not in done: print 'Max\t' + maxPair[0] + '\t' + maxPair[1] if minPair: done[minPair[0]] = 1 done[minPair[1]] = 1 if maxPair: done[maxPair[0]] = 1 done[maxPair[1]] = 1
avg_inter_ij[i] = min(avg_inter_ij[i], score) avg_inter_ij[j] = min(avg_inter_ij[j], score) fmin_i = [] for i, mini in avg_inter_ij.items(): #print i, vals.values() fmin_i.append(mini / maxDiam) print 'FMIN ', fmin_i print 'Dunn index ', min(fmin_i) if __name__ == '__main__': argv = sys.argv lbreak = False weightMatrix = readWeightMatrix(argv[2]) featMan = FeatureManager() featMan.loadQueries(argv[3]) for ifile in os.listdir(argv[1]): clusters = loadClustersWithQueryFile(argv[1] + '/' + ifile, featMan.returnIdDict()) print len(clusters), len(featMan.returnIdDict()), len(weightMatrix) #load the cluster-assignments and points Dunn(clusters, weightMatrix) #DB(clusters,weightMatrix) #load the weight matrix #load the centers