def index(dataPath): vladDescriptors = list() imageIDs = list() for imagePath in glob.glob(dataPath+"/*.jpg"): vlad = extract(imagePath) vladDescriptors.append(vlad) imageIDs.append(imagePath) # Save the vlad descriptors with open("kvd.pickle", "wb") as f: pickle.dump([imageIDs, vladDescriptors], f) # Ball tree algorithm in order to organize the vlad descriptors for query purposes # TODO: play with different leaf_size tree = BallTree(vladDescriptors, leaf_size=10) # Save the index with open("index.pickle", "wb") as f: pickle.dump([imageIDs, tree], f)
def query(imagePath): print "Querying image: ", imagePath #load the index with open("index.pickle", 'rb') as f: index = pickle.load(f) #load the visual dictionary with open("kvd.pickle", 'rb') as f: visualDictionary = pickle.load(f) imageIDs = index[0] tree = index[1] vlad = extract(imagePath) # Find the distance and corresponding neaighbouring indices already in the index distance, indices = tree.query(vlad, 4) print distance, indices indices = list(itertools.chain.from_iterable(indices)) # Report the similar images to the query image in the index for i in indices: print "Similar Image: ", imageIDs[i]
def extract(tweet): """Extracts location names from a tweet text and return a list of tuples""" return core.extract(tweet)
def yandex(): msg = "[%s] Yandex" % strftime("%H:%M:%S") print(msg) f = scrape_yandex() extract(f)
def weblogs(): msg = "[%s] Weblogs" % strftime("%H:%M:%S") print(msg) f = scrape_weblogs() extract(f)
def google(): msg = "[%s] Google" % strftime("%H:%M:%S") print(msg) f = scrape_google() extract(f)