def buildNetwork(queryFile, skosFile, featMan): catQueryDist = findCatQueryDist(queryFile, featMan) total = set(catQueryDist.keys()) #found = set() #i = 0 #for line in open(argv[2],'r'): print len(catQueryDist) #print skosFile if 'owl' in skosFile: found, related, broad = loadOntology(skosFile, catQueryDist) print len(found), len(broad) else: catQueryDist['dow_jones_transportation_average'] = set() found, related, broad = loadSkosCategories(skosFile, catQueryDist) notFound = total.difference(found) categoryNetwork = nx.DiGraph() #B parent of A for entry in broad: categoryNetwork.add_edge(entry[1], entry[0]) print len(notFound), len(related), len(broad) return categoryNetwork, catQueryDist
task_label_dict, samePairsSet,\ differentPairsSet, args.outDir) mergeMetrics(total_metrics_dict, metrics) else: #weightMatrix = readWeightMatrix(args.distFile) if args.algo == 'kmeans': outSuff = args.outDir+'/kmeans_' metrics = clusterAllWithKMeans(args.lowerLimit, args.upperLimit,\ featMan, weightMatrix, \ samePairsSet, differentPairsSet,\ args.outDir) mergeMetrics(total_metrics_dict, metrics) elif args.algo == 'cat_kmeans': catQueryDist = findCatQueryDist(args.featFile,featMan) outSuff = args.outDir+'/cat_kmeans_' metrics = clusterCatWithKMeans(args.lowerLimit, args.upperLimit,\ featMan, weightMatrix, samePairsSet,\ differentPairsSet, catQueryDist, \ outSuff) mergeMetrics(total_metrics_dict, metrics) elif args.algo == 'cat_kmediods': catQueryDist = findCatQueryDist(args.featFile,featMan) outSuff = args.outDir+'/cat_kmeds_' metrics = clusterCatWithMediods(args.lowerLimit, args.upperLimit,\ featMan, weightMatrix, samePairsSet,\ differentPairsSet,catQueryDist,\ outSuff) mergeMetrics(total_metrics_dict, metrics) elif args.algo == 'cat_merge':