Exemple #1
0
def keyword_neighbors(keyword):
    keywords = iotools.load_keywords_dict()
    keyword_weight = weighting_function(keyword)
    related_datasets = set([x[0] for x in keywords['all'][keyword]])
    ret = {}
    for dataset in related_datasets:
        dataset_keywords = iotools.load_dataset_keywords_dict(dataset)['all']
        for keyword2 in dataset_keywords:
            ret[keyword2] = ret.get(keyword2, 0) + 1

    for keyword2, val in ret.items():
        weight = 1.0 * weighting_function(keyword2) * mylog(val) / keyword_weight / mylog(len(related_datasets))
        ret[keyword2] = weight

    return sorted(ret.items(), key=lambda x: x[1], reverse=True)
Exemple #2
0
def generate_network():
    g = nx.Graph()
    matrix = all_keywords_neighbors()

    for keyword1, kwlist in matrix:
        g.add_node(keyword1, weight=weighting_function(keyword1))

        for keyword2, weight in kwlist:
            if weight > 0.30:
                g.add_edge(keyword1, keyword2, weight=weight)

    print len(g.edges())
    nx.write_gexf(g, "output/keyword-graph.gexf")
Exemple #3
0
def tag_cloud_text_new_keywords_weighted():
    ret = []
    for keyword, klist in iotools.load_keywords_dict()['all'].items():
        val = int(weighting_function(keyword)*1000)
        ret += [normalized(keyword)] * val
    return ret
Exemple #4
0
def df_new_keywords_list_weighted(dataset):
    name = dataset['name']
    keywords = iotools.load_dataset_keywords_dict(name)['all'].keys()
    weights = [weighting_function(keyword) for keyword in keywords]
    return dict(zip(keywords, weights))