Beispiel #1
0
def text_processing():
    title = request.form.get('d_title', None)
    body = request.form.get('d_body', None)

    keywords = None
    categories = None
    datasets = None
    dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())])

    if title or body:
        keywords = generate_dataset_keywords_dict({"name": title, "long_desc": body, "short_desc": ""})
        keywords = sorted(keywords['all'])

        similarity_dict = get_dataset_compatibility(keywords)
        categories = []
        for key, val in sorted(similarity_dict.items(), key=lambda x: x[1], reverse=True):
            if val > 0:
                row = {
                    "name": key,
                    "similarity": "%4.1f%%" % (val * 100,)
                }
                categories.append(row)

        related_datasets = get_related_datasets(keywords)
        datasets = []
        for name, val in sorted(related_datasets[:20], key=lambda x: x[1], reverse=True):
            if val == 0:
                continue
            row = iotools.load_dataset(name)
            row['similarity'] = "%4.1f%%" % (val * 100,)
            datasets.append(row)

    return render_template("text_processing.html", d_title=title, d_body=body, keywords=keywords,
                           categories=categories, related_datasets=datasets, dataset_ids=dataset_ids)
Beispiel #2
0
def user_study():
    repo = iotools.load_datasets()
    dataset = random.choice(repo)
    while dataset['category'] == "Address Space Allocation Data":
        dataset = random.choice(repo)

    dataset_index = repo.index(dataset)
    print dataset['category']

    related_datasets = get_related_datasets_for_dataset(dataset, True)
    related1 = []
    for data, val in related_datasets[:10]:
        data['index'] = repo.index(data)
        data['similarity'] = "%3.0f%%" % (val * 100,)
        related1.append(data)

    related_datasets = get_related_datasets_for_dataset(dataset, False)
    related2 = []
    for data, val in related_datasets[:10]:
        data['index'] = repo.index(data)
        data['similarity'] = "%3.0f%%" % (val * 100,)
        related2.append(data)

    return render_template('user-study.html', dataset=dataset, dataset_index=dataset_index, related1=related1,
                           related2=related2)
Beispiel #3
0
def get_all_keywords():
    keywords = iotools.load_keywords_dict()
    keywords['all'] = OrderedDict(
        [item for item in sorted(keywords['all'].items(), key=lambda x: len(x[1]), reverse=False)])
    for key in keywords['all']:
        keywords['all'][key] = list(set([dataset for dataset, _ in keywords['all'][key]]))
    dataset_ids = dict([(dataset['name'], index) for index, dataset in enumerate(iotools.load_datasets())])
    return render_template('dataset-keywords.html', keywords=keywords, dataset_ids=dataset_ids)
Beispiel #4
0
def tag_cloud_text_old_keywords():
    ret = []
    for dataset in iotools.load_datasets():
        ret += [normalized(keyword) for keyword in dataset['keywords']]
    return ret