Beispiel #1
0
        def topics(k):
            from topicexplorer.lib.color import rgb2hex
            import numpy as np

            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())
            response.set_header('Cache-Control', 'max-age=86400')

            # set a parameter for number of words to return
            wordmax = 10  # for alphabetic languages
            if kwargs.get('lang', None) == 'cn':
                wordmax = 25  # for ideographic languages

            # populate word values
            phi = self.v[k].phi.T
            idxs = phi.argsort(axis=1)[:, ::-1][:, :wordmax]
            # https://github.com/numpy/numpy/issues/4724
            idx_hack = np.arange(np.shape(phi)[0])[:, np.newaxis]

            dt = [('Word', self.c.words.dtype), ('Prob', phi.dtype)]
            data = np.zeros(shape=(phi.shape[0], wordmax), dtype=dt)
            data['Word'] = self.c.words[idxs]
            data['Prob'] = phi[idx_hack, idxs]

            js = {}
            for i, topic in enumerate(data):
                js[str(i)] = {
                    "color": rgb2hex(self.colors[k][i]),
                    'words':
                    dict([(str(w), float(p)) for w, p in topic[:wordmax]])
                }

            return json.dumps(js)
Beispiel #2
0
        def topics(k):
            from topicexplorer.lib.color import rgb2hex
            import numpy as np

            etag = _generate_etag(self.v[k])
            # Check if there is a "If-None-Match" ETag in the request
            if request.get_header('If-None-Match', '') == etag:
                response.status = 304
                return "Not Modified"

            if k not in self.topic_range:
                response.status = 400  # Not Found
                return "No model for k = {}".format(k)

            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())
            response.set_header('Cache-Control', 'max-age=120')
            response.set_header('ETag', etag)

            # set a parameter for number of words to return
            wordmax = 10  # for alphabetic languages
            if kwargs.get('lang', None) == 'cn':
                wordmax = 25  # for ideographic languages

            # populate word values
            phi = self.v[k].phi.T
            idxs = phi.argsort(axis=1)[:, ::-1][:, :wordmax]
            # https://github.com/numpy/numpy/issues/4724
            idx_hack = np.arange(np.shape(phi)[0])[:, np.newaxis]

            dt = [('Word', self.c.words.dtype), ('Prob', phi.dtype)]
            data = np.zeros(shape=(phi.shape[0], wordmax), dtype=dt)
            data['Word'] = self.c.words[idxs]
            data['Prob'] = phi[idx_hack, idxs]

            labels = []
            if self.label_file:
                with open(self.label_file) as labels_in:
                    for label in labels_in:
                        label = label.strip()
                        labels.append(label)
            else:
                for i, _ in enumerate(data):
                    labels.append('Topic {}'.format(i))

            js = {}
            for i, topic in enumerate(data):
                js[text(i)] = {
                    "color":
                    rgb2hex(self.colors[k][i]),
                    'words':
                    dict([(text(w), float(p)) for w, p in topic[:wordmax]]),
                    'label':
                    labels[i]
                }

            return json.dumps(js)
Beispiel #3
0
        def topics(k):
            from topicexplorer.lib.color import rgb2hex
            import numpy as np

            etag = _generate_etag(self.v[k])
            # Check if there is a "If-None-Match" ETag in the request
            if request.get_header('If-None-Match', '') == etag:
                response.status = 304
                return "Not Modified"

            if k not in self.topic_range:
                response.status = 400  # Not Found
                return "No model for k = {}".format(k)

            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())
            response.set_header('Cache-Control', 'max-age=120')
            response.set_header('ETag', etag)

            # set a parameter for number of words to return
            wordmax = 10  # for alphabetic languages
            if kwargs.get('lang', None) == 'cn':
                wordmax = 25  # for ideographic languages

            # populate word values
            phi = self.v[k].phi.T
            idxs = phi.argsort(axis=1)[:,::-1][:,:wordmax]
            # https://github.com/numpy/numpy/issues/4724
            idx_hack = np.arange(np.shape(phi)[0])[:,np.newaxis]

            dt = [('Word',self.c.words.dtype),('Prob',phi.dtype)]
            data = np.zeros(shape=(phi.shape[0], wordmax), dtype=dt)
            data['Word'] = self.c.words[idxs]
            data['Prob'] = phi[idx_hack, idxs]

            js = {}
            for i, topic in enumerate(data):
                js[text(i)] = {
                    "color": rgb2hex(self.colors[k][i]),
                    'words': dict([(text(w), float(p))
                                       for w, p in topic[:wordmax]])
                    }

            return json.dumps(js)
Beispiel #4
0
        def topics(k):
            from topicexplorer.lib.color import rgb2hex

            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())
            response.set_header('Cache-Control', 'max-age=86400')
            
            # populate word values
            data = self.v[k].topics()

            js = {}
            wordmax = 10  # for alphabetic languages
            if kwargs.get('lang', None) == 'cn':
                wordmax = 25  # for ideographic languages

            for i, topic in enumerate(data):
                js[str(i)] = {
                    "color": rgb2hex(self.colors[k][i]),
                    'words': dict([(str(w), float(p))
                                       for w, p in topic[:wordmax]])
                    }

            return json.dumps(js)
Beispiel #5
0
def topics():
    from topicexplorer.lib.color import rgb2hex

    response.content_type = 'application/json; charset=UTF8'
    response.set_header('Expires', _cache_date())

    # populate partial jsd values
    data = lda_v.topic_jsds()

    js = {}
    for rank,topic_H in enumerate(data):
        topic, H = topic_H
        js[str(topic)] = {
            "H" : H, 
            "color" : rgb2hex(colors[topic])
        }
    
    # populate word values
    data = lda_v.topics()
    for i,topic in enumerate(data):
        js[str(i)].update({'words' : dict([(w, p) for w,p in topic[:10]])})

    return json.dumps(js)
Beispiel #6
0
def topics():
    from topicexplorer.lib.color import rgb2hex

    response.content_type = 'application/json; charset=UTF8'
    response.set_header('Expires', _cache_date())

    # populate partial jsd values
    data = lda_v.topic_jsds()

    js = {}
    for rank,topic_H in enumerate(data):
        topic, H = topic_H
        js[str(topic)] = {
            "H" : H, 
            "color" : rgb2hex(colors[topic])
        }
    
    # populate word values
    data = lda_v.topics()
    for i,topic in enumerate(data):
        js[str(i)].update({'words' : dict([(w, p) for w,p in topic[:10]])})

    return json.dumps(js)