コード例 #1
0
ファイル: area.py プロジェクト: huxiaoqian/case
def pagerank_rank(top_n, date, topic_id, window_size, topicname, real_topic_id):
    data = []

    tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(topic_id, date, window_size, topicname, real_topic_id)
    top_n = N
    ds_top_n = ds_N
    print 'page_rank start'
    if not tmp_file or not ds_tmp_file:
        return data

    input_tmp_path = tmp_file.name
    ds_input_tmp_path = ds_tmp_file.name
    print input_tmp_path, ds_input_tmp_path

    iter_count = PAGERANK_ITER_MAX
    print 'pagerank_source_network'
    sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path, top_n) # 排序的uid的序列
    print 'pagerank_direct_superior_network'
    ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path, ds_top_n)
    print 'top_n:', top_n
    print 'len(sorted_uid):', len(ds_sorted_uids)
    print 'len(ds_all_uid_pr):', len(ds_all_uid_pr)
    print 'ds_top_n:', ds_top_n
    topicname = acquire_topic_name(topic_id)
    print 'topicname:', topicname
    if not topicname:
        return data
    print 'save_rank_results'
    data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, all_uid_pr)
    ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank', date, window_size, topicname, ds_all_uid_pr)

    return all_uid_pr, ds_all_uid_pr, data, ds_data
コード例 #2
0
ファイル: area.py プロジェクト: huxiaoqian/project
def prepare_data_for_pr(topic_id, date, window_size):
    tmp_file = tempfile.NamedTemporaryFile(delete=False)

    topic = acquire_topic_name(topic_id)
    if not topic:
        return None

    g = make_network(topic, date, window_size)

    if not g:
        return None

    N = len(g.nodes())
    print 'topic network size %s' % N

    if not N:
        return None

    for node in g.nodes():
        outlinks = g.out_edges(nbunch=[node])
        outlinks = map(str, [n2 for n1, n2 in outlinks])
        if not outlinks:
            value = 'pr_results,%s,%s' % (1.0/N, N)
            tmp_file.write('%s\t%s\n' % (node, value))
        else:
            outlinks_str = ','.join(outlinks)
            value = 'pr_results,%s,%s,' % (1.0/N, N)
            value += outlinks_str
            tmp_file.write('%s\t%s\n' % (node, value))

    tmp_file.flush()
    return tmp_file
コード例 #3
0
ファイル: area.py プロジェクト: huxiaoqian/project
def prepare_data_for_degree(topic_id, date, window_size):
    topic = acquire_topic_name(topic_id)
    if not topic:
        return None

    g = make_network(topic, date, window_size)

    if not g:
        return None

    N = len(g.nodes())
    print 'topic network size %s' % N

    if not N:
        return None

    return g.degree()
コード例 #4
0
ファイル: area.py プロジェクト: huxiaoqian/project
def pagerank_rank(top_n, date, topic_id, window_size):
    data = []

    tmp_file = prepare_data_for_pr(topic_id, date, window_size)

    if not tmp_file:
        return data

    input_tmp_path = tmp_file.name
    
    job_id = generate_job_id(datetime2ts(date), window_size, topic_id)
    iter_count = PAGERANK_ITER_MAX

    sorted_uids = pagerank(job_id, iter_count, input_tmp_path, top_n)

    topicname = acquire_topic_name(topic_id)
    if not topicname:
        return data

    data = save_rank_results(sorted_uids, 'topic', 'pagerank', date, window_size, topicname)

    return data
コード例 #5
0
def pagerank_rank(top_n, date, topic_id, window_size, topicname,
                  real_topic_id):
    data = []

    tmp_file, N, ds_tmp_file, ds_N = prepare_data_for_pr(
        topic_id, date, window_size, topicname, real_topic_id)
    top_n = N
    ds_top_n = ds_N
    print 'page_rank start'
    if not tmp_file or not ds_tmp_file:
        return data

    input_tmp_path = tmp_file.name
    ds_input_tmp_path = ds_tmp_file.name
    print input_tmp_path, ds_input_tmp_path

    iter_count = PAGERANK_ITER_MAX
    print 'pagerank_source_network'
    sorted_uids, all_uid_pr = pagerank(iter_count, input_tmp_path,
                                       top_n)  # 排序的uid的序列
    print 'pagerank_direct_superior_network'
    ds_sorted_uids, ds_all_uid_pr = pagerank(iter_count, ds_input_tmp_path,
                                             ds_top_n)
    print 'top_n:', top_n
    print 'len(sorted_uid):', len(ds_sorted_uids)
    print 'len(ds_all_uid_pr):', len(ds_all_uid_pr)
    print 'ds_top_n:', ds_top_n
    topicname = acquire_topic_name(topic_id)
    print 'topicname:', topicname
    if not topicname:
        return data
    print 'save_rank_results'
    data = save_rank_results(sorted_uids, 'topic', 'spark_pagerank', date,
                             window_size, topicname, all_uid_pr)
    ds_data = save_ds_rank_results(ds_sorted_uids, 'topic', 'spark_pagerank',
                                   date, window_size, topicname, ds_all_uid_pr)

    return all_uid_pr, ds_all_uid_pr, data, ds_data
コード例 #6
0
ファイル: area.py プロジェクト: huxiaoqian/project
def degree_rank(top_n, date, topic_id, window_size):
    data = []
    degree = prepare_data_for_degree(topic_id, date, window_size)

    if not degree:
        return data

    sorted_degree = sorted(degree.iteritems(), key=operator.itemgetter(1), reverse=True)
    sorted_uids = []
    count = 0
    for uid, value in sorted_degree:
        if count >= top_n:
            break
        sorted_uids.append(uid)
        count += 1

    topicname = acquire_topic_name(topic_id)
    if not topicname:
        return data

    data = save_rank_results(sorted_uids, 'topic', 'degree', date, window_size, topicname)

    return data
コード例 #7
0
ファイル: topic.py プロジェクト: huxiaoqian/project
def make_network_graph(current_date, topic_id, window_size, key_user_labeled=True):
    date = current_date

    if key_user_labeled:
        key_users = read_key_users(current_date, window_size, topic_id, top_n=10)
    else:
        key_users = []

    topic = acquire_topic_name(topic_id)
    if not topic:
        return None
              
    uid_ts, G = make_network(topic, date, window_size, ts=True)

    N = len(G.nodes())

    if not N:
        return ''

    node_degree = nx.degree(G)

    G = cut_network(G, node_degree)
    
    gexf = Gexf("Yang Han", "Topic Network")

    node_id = {}
    graph = gexf.addGraph("directed", "static", "demp graph")
    graph.addNodeAttribute('name', type='string', force_id='name')
    graph.addNodeAttribute('location', type='string', force_id='location')
    graph.addNodeAttribute('timestamp', type='int', force_id='timestamp')

    pos = nx.spring_layout(G)

    node_counter = 0
    edge_counter = 0

    for node in G.nodes():
        x, y = pos[node]
        degree = node_degree[node]
        if node not in node_id:
            node_id[node] = node_counter
            node_counter += 1
        uid = node
        if uid in key_users:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='255', g='51', b='51', size=str(degree))
        else:
            _node = graph.addNode(node_id[node], str(node), x=str(x), y=str(y), z='0', r='0', g='204', b='204', size=str(degree))
        user_info = acquire_user_by_id('area', uid)
        if user_info:
            _node.addAttribute('name', user_info['name'].decode('utf-8'))
            _node.addAttribute('location', user_info['location'].decode('utf-8'))
        else:
            _node.addAttribute('name', 'Unknown')
            _node.addAttribute('location', 'Unknown')
        _node.addAttribute('timestamp', str(uid_ts[uid]))

    for edge in G.edges():
        start, end = edge
        start_id = node_id[start]
        end_id = node_id[end]
        graph.addEdge(str(edge_counter), str(start_id), str(end_id))
        edge_counter += 1

    return etree.tostring(gexf.getXML(), pretty_print=True, encoding='utf-8', xml_declaration=True)