# -- Load the feature file
    log.info("Loading features from file...")
    f = FeatureList(opts.feature_file)
    f.process()
    log.debug("Loaded %s rows" % len(f.dataset))

    # -- Load the clusters
    log.info("Loading clusters from file")
    cluster_file = file(opts.cluster_file,'r')
    base = basename(opts.cluster_file)
    clusters = imp.load_source(base,opts.cluster_file).clusters
    
    # -- Load the clusters into FLANN as if they were the points
    log.info("Building cluster index...")
    flann = buildIndex(clusters)
    
    # -- For each point in the feature list, find its 'nearest neighbor',
    # i.e. which cluster it belongs to.
    log.info("Calculating distances...")
    cluster_list,distance_list = flann.nn_index(f.dataset)

    log.info( "Have %i nearest-clusters" % len(cluster_list))

    # -- For each file, write its words to [out_dir]/[image].txt
    current_feature = 0
    for filename in image_filenames:
        base = basename(filename).replace('jpg','txt')
        out = file("%s/%s" % (opts.output_dir,base), 'w+')
        
        for feature in xrange(feature_count[filename]):
Exemple #2
0
from buildIndex import buildIndex

options = {
    'crawl': True,
    'pageRank': False,
    'parse': True,
    'database': True,
    'idf': True,
    'tfidf': True
}
# buildIndex(3, passwordLock=False)
buildIndex(2,
           threads=4,
           passwordLock=False,
           reset=False,
           resetFiles=False,
           options=options,
           dev=True)
    # -- Load the feature file
    log.info("Loading features from file...")
    f = FeatureList(opts.feature_file)
    f.process()
    log.debug("Loaded %s rows" % len(f.dataset))

    # -- Load the clusters
    log.info("Loading clusters from file")
    cluster_file = file(opts.cluster_file, 'r')
    base = basename(opts.cluster_file)
    clusters = imp.load_source(base, opts.cluster_file).clusters

    # -- Load the clusters into FLANN as if they were the points
    log.info("Building cluster index...")
    flann = buildIndex(clusters)

    # -- For each point in the feature list, find its 'nearest neighbor',
    # i.e. which cluster it belongs to.
    log.info("Calculating distances...")
    cluster_list, distance_list = flann.nn_index(f.dataset)

    log.info("Have %i nearest-clusters" % len(cluster_list))

    # -- For each file, write its words to [out_dir]/[image].txt
    current_feature = 0
    for filename in image_filenames:
        base = basename(filename).replace('jpg', 'txt')
        out = file("%s/%s" % (opts.output_dir, base), 'w+')

        for feature in xrange(feature_count[filename]):
        log.info("Loading clusters from file")
        cluster_file = file(opts.cluster_file, 'r')
        base = basename(opts.cluster_file)
        clusters = imp.load_source(base, opts.cluster_file).clusters
    else:
        log.info("Calculating %s clusters (%s passes)" %
                 (opts.num_clusters, opts.num_iterations))
        clusters = clusteringFlann.kmeans(pts=featureSet,
                                          num_clusters=opts.num_clusters,
                                          dtype=float32,
                                          max_iterations=opts.num_iterations,
                                          centers_init='gonzales')

    # -- Load the clusters into FLANN as if they were the points
    log.info("Building cluster index...")
    clusterIndex = buildIndex(clusters)

    # -- For each point in the feature list, find its 'nearest neighbor',
    # i.e. which cluster it belongs to.
    log.info("Calculating distances...")
    cluster_list, distance_list = clusterIndex.nn_index(featureSet,
                                                        num_neighbors=1)

    log.info("Have %i nearest-clusters, writing files" % len(cluster_list))

    # -- For each file, write its words to [out_dir]/[image].txt
    current_feature = 0
    for filename in image_filenames:
        base = basename(filename).replace('jpg', 'txt')
        out = file("%s/%s" % (opts.output_dir, base), 'w+')
        log.info("Loading clusters from file")
        cluster_file = file(opts.cluster_file,'r')
        base = basename(opts.cluster_file)
        clusters = imp.load_source(base,opts.cluster_file).clusters
    else:
        log.info("Calculating %s clusters (%s passes)" % (opts.num_clusters,opts.num_iterations))        
        clusters = clusteringFlann.kmeans(
            pts=featureSet,
            num_clusters=opts.num_clusters,
            dtype=float32,
            max_iterations=opts.num_iterations,
            centers_init='gonzales')

    # -- Load the clusters into FLANN as if they were the points
    log.info("Building cluster index...")
    clusterIndex = buildIndex(clusters)

    # -- For each point in the feature list, find its 'nearest neighbor',
    # i.e. which cluster it belongs to.
    log.info("Calculating distances...")
    cluster_list,distance_list = clusterIndex.nn_index(featureSet,num_neighbors=1)

    log.info( "Have %i nearest-clusters, writing files" % len(cluster_list))

    # -- For each file, write its words to [out_dir]/[image].txt
    current_feature = 0
    for filename in image_filenames:
        base = basename(filename).replace('jpg','txt')
        out = file("%s/%s" % (opts.output_dir,base), 'w+')

        for feature in xrange(feature_count[filename]):