コード例 #1
0
ファイル: tagrel.py プロジェクト: xiaojiew1/KDGAN
    def __init__(self,
                 collection,
                 feature,
                 distance,
                 tpp='lemm',
                 rootpath=ROOT_PATH):
        feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
        id_file = os.path.join(feat_dir, "id.txt")
        feat_file = os.path.join(feat_dir, "feature.bin")
        nr_of_images, ndims = map(
            int,
            open(os.path.join(feat_dir, 'shape.txt')).readline().split())

        self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images,
                                             id_file)
        self.searcher.set_distance(distance)

        tagfile = os.path.join(rootpath, collection, "TextData",
                               "id.userid.%stags.txt" % tpp)
        self.textstore = RecordStore(tagfile)

        self.nr_neighbors = 1000
        self.nr_newtags = 100

        printStatus(
            INFO, "nr_neighbors=%d, nr_newtags=%d" %
            (self.nr_neighbors, self.nr_newtags))
コード例 #2
0
ファイル: sentence.py プロジェクト: yelinyun123/coco-cn
 def __init__(self, db_file):
     self.nr_of_sents, self.feat_dim = map(int, open(self.shape_file).readline().split())
     self.sent_pool = map(str.strip, open(self.sent_file).readlines())
     self.sent_searcher = load_model(os.path.join(self.sent_feat_dir, 'feature.bin'), self.feat_dim,
                                     self.nr_of_sents, self.sent_id_file)
     self.sent_searcher.set_distance('cosine')
     feat_dir = os.path.join(self.rootpath, self.img_collection, "FeatureData", self.vis_feat)
     self.vis_feat_file = BigFile(feat_dir)
     imageSetFile = open(os.path.join(self.rootpath, self.img_collection, "ImageSets", "%s.txt"%self.img_collection), 'r')
     self.imageSet = imageSetFile.readlines()
     self.db_file = db_file
コード例 #3
0
ファイル: tagrel.py プロジェクト: li-xirong/tagrel
    def __init__(self, collection, feature, distance, tpp='lemm', rootpath=ROOT_PATH):
        feat_dir = os.path.join(rootpath, collection, "FeatureData", feature)
        id_file = os.path.join(feat_dir, "id.txt")
        feat_file = os.path.join(feat_dir, "feature.bin")
        nr_of_images, ndims = map(int, open(os.path.join(feat_dir,'shape.txt')).readline().split())

        self.searcher = simpleknn.load_model(feat_file, ndims, nr_of_images, id_file)
        self.searcher.set_distance(distance)

        tagfile = os.path.join(rootpath, collection, "TextData", "id.userid.%stags.txt" % tpp)
        self.textstore = RecordStore(tagfile)
        
        self.nr_neighbors = 1000
        self.nr_newtags = 100

        printStatus(INFO, "nr_neighbors=%d, nr_newtags=%d" % (self.nr_neighbors, self.nr_newtags))
コード例 #4
0
            if vec:
                word_vecs.append(vec)

        if len(word_vecs) > 0:
            return np.array(word_vecs).mean(axis=0)
        else:
            return None


if __name__ == '__main__':
    rootpath = ROOT_PATH
    syn2vec = Synset2Vec()
    syn2vec2 = PartialSynset2Vec()
    queryset = str.split(
        'n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245'
    )

    from simpleknn import simpleknn
    feat_dir = os.path.join(rootpath, 'flickr4m', 'word2vec', 'tagvec500')
    searcher = simpleknn.load_model(feat_dir)

    for wnid in queryset:
        for s2v in [syn2vec, syn2vec2]:
            vec = s2v.embedding(wnid)
            print(s2v, wnid, syn2vec.explain(wnid))
            for distance in ['cosine']:
                searcher.set_distance(distance)
                visualNeighbors = searcher.search_knn(vec, max_hits=100)
                print(wnid, distance, visualNeighbors[:10])
                print('-' * 100)
コード例 #5
0
ファイル: synset2vec_hier.py プロジェクト: li-xirong/hierse
    for name in str.split('conse conse2 hierse hierse2'):
        encoder_class = get_synset_encoder(name)
        syn2vec_list.append( encoder_class(corpus, word2vec_model, rootpath=rootpath) )

    queryset = str.split('n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245')

    wnhier = WordnetHierarchy()
    for wnid in queryset:
        ancestor_list = wnhier.get_ancestors(wnid)
        print (wnid, syn2vec_list[0].explain(wnid))
        print ([syn2vec_list[0].explain(x) for x in ancestor_list])
        print ('')


    from simpleknn import simpleknn
    feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model)
    searcher = simpleknn.load_model(feat_dir)

    
    for wnid in queryset:
        for s2v in syn2vec_list:
            vec = s2v.embedding(wnid)
            print (s2v, wnid, s2v.explain(wnid))
            for distance in ['l2']:
                searcher.set_distance(distance)
                visualNeighbors = searcher.search_knn(vec, max_hits=100)
                print (wnid, distance, visualNeighbors[:10])
                print ('-'*100)


コード例 #6
0
ファイル: synset2vec_hier.py プロジェクト: silasxue/hierse
    queryset = str.split('n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245')

    wnhier = WordnetHierarchy()
    for wnid in queryset:
        ancestor_list = wnhier.get_ancestors(wnid)
        print wnid, syn2vec_list[0].explain(wnid)
        print [syn2vec_list[0].explain(x) for x in ancestor_list]
        print ''


    from simpleknn import simpleknn
    feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model)
    dim = syn2vec_list[0].word2vec.ndims
    nr_of_images = syn2vec_list[0].word2vec.nr_of_images 
    id_file = os.path.join(feat_dir, 'id.txt')
    searcher = simpleknn.load_model(os.path.join(feat_dir, "feature.bin"), dim, nr_of_images, id_file)

    
    for wnid in queryset:
        for s2v in syn2vec_list:
            vec = s2v.embedding(wnid)
            print s2v, wnid, s2v.explain(wnid)
            for distance in ['l2']:
                searcher.set_distance(distance)
                visualNeighbors = searcher.search_knn(vec, max_hits=100)
                print wnid, distance, visualNeighbors[:10]
                print '-'*100


コード例 #7
0
ファイル: synset2vec_hier.py プロジェクト: zuacubd/hierse
            encoder_class(corpus, word2vec_model, rootpath=rootpath))

    queryset = str.split(
        'n02084071 n04490091 n02114100 n03982060 n03219135 n05311054 n08615149 n02801525 n02330245'
    )

    wnhier = WordnetHierarchy()
    for wnid in queryset:
        ancestor_list = wnhier.get_ancestors(wnid)
        print wnid, syn2vec_list[0].explain(wnid)
        print[syn2vec_list[0].explain(x) for x in ancestor_list]
        print ''

    from simpleknn import simpleknn
    feat_dir = os.path.join(rootpath, corpus, 'word2vec', word2vec_model)
    dim = syn2vec_list[0].word2vec.ndims
    nr_of_images = syn2vec_list[0].word2vec.nr_of_images
    id_file = os.path.join(feat_dir, 'id.txt')
    searcher = simpleknn.load_model(os.path.join(feat_dir, "feature.bin"), dim,
                                    nr_of_images, id_file)

    for wnid in queryset:
        for s2v in syn2vec_list:
            vec = s2v.embedding(wnid)
            print s2v, wnid, s2v.explain(wnid)
            for distance in ['l2']:
                searcher.set_distance(distance)
                visualNeighbors = searcher.search_knn(vec, max_hits=100)
                print wnid, distance, visualNeighbors[:10]
                print '-' * 100