Example #1
0
def get_similarity():
    keywords = request.form['keywords'].split('\n')
    new_style = 'old_style' not in request.form
    similarity_dict = get_dataset_compatibility(keywords, True, new_style)
    for key, val in similarity_dict.items():
        similarity_dict[key] = "%3.0f%%" % (val * 100,)
    return jsonpickle.encode(similarity_dict, False)
Example #2
0
def text_processing():
    title = request.form.get('d_title', None)
    body = request.form.get('d_body', None)

    keywords = None
    categories = None
    datasets = None
    dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())])

    if title or body:
        keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""})
        keywords = sorted(keywords['all'])

        similarity_dict = get_dataset_compatibility(keywords)
        categories = []
        for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True):
            if val > 0:
                row = {
                    "name": key,
                    "similarity": "%4.1f%%" % (val * 100,)
                }
                categories.append(row)

        related_datasets = get_related_datasets(keywords)
        datasets = []
        for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True):
            if val == 0:
                continue
            row = iotools.load_dataset(name)
            row['similarity'] = "%4.1f%%" % (val * 100,)
            datasets.append(row)

    return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords,
                           categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)