コード例 #1
0
ファイル: webvectors.py プロジェクト: azukka/webvectors
def visual_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) - s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = request.form.getlist('list_query')
        if list_data:
            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            groups = []
            for inputform in list_data[:10]:
                group = set([process_query(w) for w in inputform.split(',') if len(w) > 1
                             and w.replace('_', '').replace('-', '').replace('::', '').replace(' ', '').isalnum()][:30])
                groups.append(group)

            querywords = [word for group in groups for word in group]
            if len(set(querywords)) != len(querywords):
                error_value = "Words must be unique!"
                return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)
            if len(querywords) < 7:
                error_value = "Too few words!"
                return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)

            if "Incorrect tag!" in querywords:
                error_value = "Incorrect tag!"
                return render_template('visual.html', error=error_value, models=our_models, other_lang=other_lang,
                                       languages=languages, url=url, usermodels=model_value)

            classes = []
            for word in querywords:
                for group in groups:
                    if word in group:
                        classes.append(groups.index(group))

            unknown = {}
            models_row = {}
            links_row = {}
            frequencies = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html', other_lang=other_lang, languages=languages, url=url,
                                           usermodels=model_value)
                frequencies[model] = {}
                unknown[model] = set()
                words2vis = querywords
                m = hashlib.md5()
                name = ':::'.join(['__'.join(group) for group in groups])
                name = name.encode('ascii', 'backslashreplace')
                m.update(name)
                fname = m.hexdigest()
                plotfile = "%s_%s.png" % (model, fname)
                identifier = plotfile[:-4]
                models_row[model] = plotfile
                labels = []
                if not os.path.exists(root + 'data/images/tsneplots'):
                    os.makedirs(root + 'data/images/tsneplots')
                if not os.access(root + 'data/images/tsneplots/' + plotfile, os.F_OK):
                    print('No previous image found', root + 'data/images/tsneplots/' + plotfile, file=sys.stderr)
                    vectors = []
                    for w in words2vis:
                        if model_props[model]['tags'] == 'False':
                            message = {'operation': '4', 'query': w.split('_')[0], 'model': model}
                        else:
                            message = {'operation': '4', 'query': w, 'model': model}
                        result = json.loads(serverquery(message).decode('utf-8'))
                        frequencies[model].update(result['frequencies'])
                        if w.split('_')[0] in frequencies[model] and w not in frequencies[model]:
                            frequencies[model][w] = frequencies[model][w.split('_')[0]]
                        if w + " is unknown to the model" in result:
                            unknown[model].add(w)
                            continue
                        vector = np.array(result['vector'])
                        vectors.append(vector)
                        labels.append(w)
                    if len(vectors) > 5:
                        if len(list_data) == 1 and model_props[model]['tags'] == 'True':
                            classes = [word.split('_')[-1] for word in labels]
                        print('Embedding...', file=sys.stderr)
                        matrix2vis = np.vstack(([v for v in vectors]))
                        embed(labels, matrix2vis.astype('float64'), classes, model, fname)
                        models_row[model] = plotfile
                        if tensorflow_integration:
                            l2c = word2vec2tensor(identifier, vectors, labels, classes)
                        else:
                            l2c = None
                        links_row[model] = l2c
                    else:
                        models_row[model] = "Too few words!"
                else:
                    if tensorflow_integration:
                        links_row[model] = open(root + 'data/images/tsneplots/' + identifier + '.url', 'r').read()
                    else:
                        links_row[model] = None
            return render_template('visual.html', languages=languages, visual=models_row, words=groups,
                                   number=len(model_value), models=our_models, unknown=unknown, url=url,
                                   usermodels=model_value, l2c=links_row, qwords=querywords, frequencies=frequencies)
        else:
            error_value = "Incorrect query!"
            return render_template("visual.html", error=error_value, models=our_models, other_lang=other_lang,
                                   languages=languages, url=url, usermodels=[defaultmodel])
    return render_template('visual.html', models=our_models, other_lang=other_lang, languages=languages,
                           url=url, usermodels=[defaultmodel])
コード例 #2
0
def visual_page(lang):
    g.lang = lang
    s = set()
    s.add(lang)
    other_lang = list(set(language_dicts.keys()) -
                      s)[0]  # works only for two languages
    g.strings = language_dicts[lang]

    if request.method == 'POST':
        list_data = 'dummy'
        try:
            list_data = request.form['list_query']
        except:
            pass
        if list_data != 'dummy':

            model_value = request.form.getlist('model')
            if len(model_value) < 1:
                model_value = [defaultmodel]

            querywords = set([
                process_query(w) for w in list_data.split()
                if len(w) > 1 and w.replace('_', '').replace('-', '').replace(
                    '::', '').isalnum()
            ][:30])
            if len(querywords) < 7:
                error_value = "Too few words!"
                return render_template("visual.html",
                                       error=error_value,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)

            if "Incorrect tag!" in querywords:
                return render_template('visual.html',
                                       word=list_data,
                                       models=our_models,
                                       other_lang=other_lang,
                                       languages=languages,
                                       url=url,
                                       usermodels=model_value)

            unknown = {}
            models_row = {}
            for model in model_value:
                if not model.strip() in our_models:
                    return render_template('home.html',
                                           other_lang=other_lang,
                                           languages=languages,
                                           url=url,
                                           usermodels=model_value)
                unknown[model] = set()
                words2vis = querywords
                m = hashlib.md5()
                name = '_'.join(words2vis).encode('ascii', 'backslashreplace')
                m.update(name)
                fname = m.hexdigest()
                plotfile = "%s_%s.png" % (model, fname)
                models_row[model] = plotfile
                labels = []
                if not os.path.exists(root + 'data/images/tsneplots'):
                    os.makedirs(root + 'data/images/tsneplots')
                if not os.access(root + 'data/images/tsneplots/' + plotfile,
                                 os.F_OK):
                    print >> sys.stderr, 'No previous image found'
                    vectors = []
                    for w in words2vis:
                        message = "4;" + w + ";" + model
                        result = serverquery(message)
                        if 'is unknown' in result:
                            unknown[model].add(w)
                            continue
                        vector = np.array(result.split(','))
                        vectors.append(vector)
                        labels.append(w)
                    if len(vectors) > 1:
                        matrix2vis = np.vstack(([v for v in vectors]))
                        embed(labels, matrix2vis.astype('float64'), model)
                        m = hashlib.md5()
                        name = '_'.join(labels).encode('ascii',
                                                       'backslashreplace')
                        m.update(name)
                        fname = m.hexdigest()
                        plotfile = "%s_%s.png" % (model, fname)
                        models_row[model] = plotfile
                    else:
                        models_row[model] = "Too few words!"

            return render_template('visual.html',
                                   visual=models_row,
                                   words=querywords,
                                   number=len(model_value),
                                   models=our_models,
                                   unknown=unknown,
                                   url=url,
                                   usermodels=model_value)
        else:
            error_value = "Incorrect query!"
            return render_template("visual.html",
                                   error=error_value,
                                   models=our_models,
                                   other_lang=other_lang,
                                   languages=languages,
                                   url=url,
                                   usermodels=[defaultmodel])
    return render_template('visual.html',
                           models=our_models,
                           other_lang=other_lang,
                           languages=languages,
                           url=url,
                           usermodels=[defaultmodel])