예제 #1
0
def test_hin_basic_adj():
	(docs, phrase_set) = generate_doc_meta_from_file(DATA_PATH + 'toy_corpus_new')
	toy_hin = build_hin.HIN(docs_meta=docs)
	print toy_hin.m_d_a
	print toy_hin.m_d_v
	print toy_hin.m_d_d
import expert_finder.bibrank as bibrank
import expert_finder.build_hin as build_hin
from test_expertfinder import generate_doc_meta_from_file, generate_phrase_topic_dist

DATA_PATH = os.path.dirname(__file__) + 'dataset/'
PHRASE_DIST_PATH = DATA_PATH + 'phrase_topic_dist/'


# the following experiments are
# conducted 
k = 2 
p = 27451 
a = 6 
v = 3

(docs, phrase_set) = generate_doc_meta_from_file(DATA_PATH + 'toy_corpus_new')
toy_hin = build_hin.HIN(p=p, a=a, v=v, docs_meta=docs)

alpha = np.ones(k) #may try different topic distribution
beta = np.ones(a) #authors - uniform
gamma = np.array([[1,1,1],
    		     [100,1,1]])
back_phrase_prob = 1.0 / p
background_topic_dist = [back_phrase_prob] * p
fpm_topic_dist = generate_phrase_topic_dist(PHRASE_DIST_PATH + 'frequent_pattern_mining', 27451, 0.4)
phrase_dist = [background_topic_dist, fpm_topic_dist]

toy_expert_finder = ef.ExpertFinder(
		K=k,
        docs_meta=docs,
        P=p,