def text_processing(): title = request.form.get('d_title', None) body = request.form.get('d_body', None) keywords = None categories = None datasets = None dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())]) if title or body: keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""}) keywords = sorted(keywords['all']) similarity_dict = get_dataset_compatibility(keywords) categories = [] for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True): if val > 0: row = { "name": key, "similarity": "%4.1f%%" % (val * 100,) } categories.append(row) related_datasets = get_related_datasets(keywords) datasets = [] for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True): if val == 0: continue row = iotools.load_dataset(name) row['similarity'] = "%4.1f%%" % (val * 100,) datasets.append(row) return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords, categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)
def user_study(): repo = iotools.load_datasets() dataset = random.choice(repo) while dataset['category'] == "Address Space Allocation Data": dataset = random.choice(repo) dataset_index = repo.index(dataset) print dataset['category'] related_datasets = get_related_datasets_for_dataset(dataset, True) related1 = [] for data, val in related_datasets[:10]: data['index'] = repo.index(data) data['similarity'] = "%3.0f%%" % (val * 100,) related1.append(data) related_datasets = get_related_datasets_for_dataset(dataset, False) related2 = [] for data, val in related_datasets[:10]: data['index'] = repo.index(data) data['similarity'] = "%3.0f%%" % (val * 100,) related2.append(data) return render_template('user-study.html', dataset=dataset, dataset_index=dataset_index, related1=related1, related2=related2)
def get_all_keywords(): keywords = iotools.load_keywords_dict() keywords['all'] = OrderedDict( [item for item in sorted(keywords['all'].items(), key=lambda x: len(x[1]), reverse=False)]) for key in keywords['all']: keywords['all'][key] = list(set([dataset for dataset, _ in keywords['all'][key]])) dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())]) return render_template('dataset-keywords.html', keywords=keywords, dataset_ids=dataset_ids)
def tag_cloud_text_old_keywords(): ret = [] for dataset in iotools.load_datasets(): ret += [normalized(keyword) for keyword in dataset['keywords']] return ret