def get_similarity(): keywords = request.form['keywords'].split('\n') new_style = 'old_style' not in request.form similarity_dict = get_dataset_compatibility(keywords, True, new_style) for key, val in similarity_dict.items(): similarity_dict[key] = "%3.0f%%" % (val * 100,) return jsonpickle.encode(similarity_dict, False)
def text_processing(): title = request.form.get('d_title', None) body = request.form.get('d_body', None) keywords = None categories = None datasets = None dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())]) if title or body: keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""}) keywords = sorted(keywords['all']) similarity_dict = get_dataset_compatibility(keywords) categories = [] for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True): if val > 0: row = { "name": key, "similarity": "%4.1f%%" % (val * 100,) } categories.append(row) related_datasets = get_related_datasets(keywords) datasets = [] for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True): if val == 0: continue row = iotools.load_dataset(name) row['similarity'] = "%4.1f%%" % (val * 100,) datasets.append(row) return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords, categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)