def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH): feat_dir = os.path.join(rootpath, collection, "FeatureData", feature) id_file = os.path.join(feat_dir, "id.txt") feat_file = os.path.join(feat_dir, "feature.bin") nr_of_images, ndims = map( int, open(os.path.join(feat_dir, 'shape.txt')).readline().split()) self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file) self.searcher.set_distance(distance) tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp) self.textstore = RecordStore(tagfile) self.nr_neighbors = 1000 self.nr_newtags = 100 printStatus( INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
def __init__(self, db_file): self.nr_of_sents, self.feat_dim = map(int, open(self.shape_file).readline().split()) self.sent_pool = map(str.strip, open(self.sent_file).readlines()) self.sent_searcher = load_model(os.path.join(self.sent_feat_dir, 'feature.bin'), self.feat_dim, self.nr_of_sents, self.sent_id_file) self.sent_searcher.set_distance('cosine') feat_dir = os.path.join(self.rootpath, self.img_collection, "FeatureData", self.vis_feat) self.vis_feat_file = BigFile(feat_dir) imageSetFile = open(os.path.join(self.rootpath, self.img_collection, "ImageSets", "%s.txt"%self.img_collection), 'r') self.imageSet = imageSetFile.readlines() self.db_file = db_file
def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH): feat_dir = os.path.join(rootpath, collection, "FeatureData", feature) id_file = os.path.join(feat_dir, "id.txt") feat_file = os.path.join(feat_dir, "feature.bin") nr_of_images, ndims = map(int, open(os.path.join(feat_dir,'shape.txt')).readline().split()) self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file) self.searcher.set_distance(distance) tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp) self.textstore = RecordStore(tagfile) self.nr_neighbors = 1000 self.nr_newtags = 100 printStatus(INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
if vec: word_vecs.append(vec) if len(word_vecs) > 0: return np.array(word_vecs).mean(axis=0) else: return None if __name__ == '__main__': rootpath = ROOT_PATH syn2vec = Synset2Vec() syn2vec2 = PartialSynset2Vec() queryset = str.split( 'n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245' ) from simpleknn import simpleknn feat_dir = os.path.join(rootpath, 'flickr4m', 'word2vec', 'tagvec500') searcher = simpleknn.load_model(feat_dir) for wnid in queryset: for s2v in [syn2vec, syn2vec2]: vec = s2v.embedding(wnid) print(s2v, wnid, syn2vec.explain(wnid)) for distance in ['cosine']: searcher.set_distance(distance) visualNeighbors = searcher.search_knn(vec, max_hits=100) print(wnid, distance, visualNeighbors[:10]) print('-' * 100)
for name in str.split('conse conse2 hierse hierse2'): encoder_class = get_synset_encoder(name) syn2vec_list.append( encoder_class(corpus, word2vec_model, rootpath=rootpath) ) queryset = str.split('n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245') wnhier = WordnetHierarchy() for wnid in queryset: ancestor_list = wnhier.get_ancestors(wnid) print (wnid, syn2vec_list[0].explain(wnid)) print ([syn2vec_list[0].explain(x) for x in ancestor_list]) print ('') from simpleknn import simpleknn feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model) searcher = simpleknn.load_model(feat_dir) for wnid in queryset: for s2v in syn2vec_list: vec = s2v.embedding(wnid) print (s2v, wnid, s2v.explain(wnid)) for distance in ['l2']: searcher.set_distance(distance) visualNeighbors = searcher.search_knn(vec, max_hits=100) print (wnid, distance, visualNeighbors[:10]) print ('-'*100)
queryset = str.split('n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245') wnhier = WordnetHierarchy() for wnid in queryset: ancestor_list = wnhier.get_ancestors(wnid) print wnid, syn2vec_list[0].explain(wnid) print [syn2vec_list[0].explain(x) for x in ancestor_list] print '' from simpleknn import simpleknn feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model) dim = syn2vec_list[0].word2vec.ndims nr_of_images = syn2vec_list[0].word2vec.nr_of_images id_file = os.path.join(feat_dir, 'id.txt') searcher = simpleknn.load_model(os.path.join(feat_dir, "feature.bin"), dim, nr_of_images, id_file) for wnid in queryset: for s2v in syn2vec_list: vec = s2v.embedding(wnid) print s2v, wnid, s2v.explain(wnid) for distance in ['l2']: searcher.set_distance(distance) visualNeighbors = searcher.search_knn(vec, max_hits=100) print wnid, distance, visualNeighbors[:10] print '-'*100
encoder_class(corpus, word2vec_model, rootpath=rootpath)) queryset = str.split( 'n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245' ) wnhier = WordnetHierarchy() for wnid in queryset: ancestor_list = wnhier.get_ancestors(wnid) print wnid, syn2vec_list[0].explain(wnid) print[syn2vec_list[0].explain(x) for x in ancestor_list] print '' from simpleknn import simpleknn feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model) dim = syn2vec_list[0].word2vec.ndims nr_of_images = syn2vec_list[0].word2vec.nr_of_images id_file = os.path.join(feat_dir, 'id.txt') searcher = simpleknn.load_model(os.path.join(feat_dir, "feature.bin"), dim, nr_of_images, id_file) for wnid in queryset: for s2v in syn2vec_list: vec = s2v.embedding(wnid) print s2v, wnid, s2v.explain(wnid) for distance in ['l2']: searcher.set_distance(distance) visualNeighbors = searcher.search_knn(vec, max_hits=100) print wnid, distance, visualNeighbors[:10] print '-' * 100