# -- Load the feature file log.info("Loading features from file...") f = FeatureList(opts.feature_file) f.process() log.debug("Loaded %s rows" % len(f.dataset)) # -- Load the clusters log.info("Loading clusters from file") cluster_file = file(opts.cluster_file,'r') base = basename(opts.cluster_file) clusters = imp.load_source(base,opts.cluster_file).clusters # -- Load the clusters into FLANN as if they were the points log.info("Building cluster index...") flann = buildIndex(clusters) # -- For each point in the feature list, find its 'nearest neighbor', # i.e. which cluster it belongs to. log.info("Calculating distances...") cluster_list,distance_list = flann.nn_index(f.dataset) log.info( "Have %i nearest-clusters" % len(cluster_list)) # -- For each file, write its words to [out_dir]/[image].txt current_feature = 0 for filename in image_filenames: base = basename(filename).replace('jpg','txt') out = file("%s/%s" % (opts.output_dir,base), 'w+') for feature in xrange(feature_count[filename]):
from buildIndex import buildIndex options = { 'crawl': True, 'pageRank': False, 'parse': True, 'database': True, 'idf': True, 'tfidf': True } # buildIndex(3, passwordLock=False) buildIndex(2, threads=4, passwordLock=False, reset=False, resetFiles=False, options=options, dev=True)
# -- Load the feature file log.info("Loading features from file...") f = FeatureList(opts.feature_file) f.process() log.debug("Loaded %s rows" % len(f.dataset)) # -- Load the clusters log.info("Loading clusters from file") cluster_file = file(opts.cluster_file, 'r') base = basename(opts.cluster_file) clusters = imp.load_source(base, opts.cluster_file).clusters # -- Load the clusters into FLANN as if they were the points log.info("Building cluster index...") flann = buildIndex(clusters) # -- For each point in the feature list, find its 'nearest neighbor', # i.e. which cluster it belongs to. log.info("Calculating distances...") cluster_list, distance_list = flann.nn_index(f.dataset) log.info("Have %i nearest-clusters" % len(cluster_list)) # -- For each file, write its words to [out_dir]/[image].txt current_feature = 0 for filename in image_filenames: base = basename(filename).replace('jpg', 'txt') out = file("%s/%s" % (opts.output_dir, base), 'w+') for feature in xrange(feature_count[filename]):
log.info("Loading clusters from file") cluster_file = file(opts.cluster_file, 'r') base = basename(opts.cluster_file) clusters = imp.load_source(base, opts.cluster_file).clusters else: log.info("Calculating %s clusters (%s passes)" % (opts.num_clusters, opts.num_iterations)) clusters = clusteringFlann.kmeans(pts=featureSet, num_clusters=opts.num_clusters, dtype=float32, max_iterations=opts.num_iterations, centers_init='gonzales') # -- Load the clusters into FLANN as if they were the points log.info("Building cluster index...") clusterIndex = buildIndex(clusters) # -- For each point in the feature list, find its 'nearest neighbor', # i.e. which cluster it belongs to. log.info("Calculating distances...") cluster_list, distance_list = clusterIndex.nn_index(featureSet, num_neighbors=1) log.info("Have %i nearest-clusters, writing files" % len(cluster_list)) # -- For each file, write its words to [out_dir]/[image].txt current_feature = 0 for filename in image_filenames: base = basename(filename).replace('jpg', 'txt') out = file("%s/%s" % (opts.output_dir, base), 'w+')
log.info("Loading clusters from file") cluster_file = file(opts.cluster_file,'r') base = basename(opts.cluster_file) clusters = imp.load_source(base,opts.cluster_file).clusters else: log.info("Calculating %s clusters (%s passes)" % (opts.num_clusters,opts.num_iterations)) clusters = clusteringFlann.kmeans( pts=featureSet, num_clusters=opts.num_clusters, dtype=float32, max_iterations=opts.num_iterations, centers_init='gonzales') # -- Load the clusters into FLANN as if they were the points log.info("Building cluster index...") clusterIndex = buildIndex(clusters) # -- For each point in the feature list, find its 'nearest neighbor', # i.e. which cluster it belongs to. log.info("Calculating distances...") cluster_list,distance_list = clusterIndex.nn_index(featureSet,num_neighbors=1) log.info( "Have %i nearest-clusters, writing files" % len(cluster_list)) # -- For each file, write its words to [out_dir]/[image].txt current_feature = 0 for filename in image_filenames: base = basename(filename).replace('jpg','txt') out = file("%s/%s" % (opts.output_dir,base), 'w+') for feature in xrange(feature_count[filename]):