dict_fp = "../data/sdg_v2.dict"
    corpus_fp = "../data/sdg_v2.mm"
    model_fp = "../data/sdg_v2.lda_model"

    # loading dictionary, corpus, lda_model
    dictionary = corpora.Dictionary.load(dict_fp)
    corpus = corpora.MmCorpus(corpus_fp)
    lda = models.ldamodel.LdaModel.load(model_fp)

    cosSim = CosineSimilarity(dictionary=dictionary, corpus=corpus, model=lda)

    cosSim.save_index("../data/sdg_v2.index")

    # loading undaf data
    undaf_fp = "../data/undaf_v2.json"
    p = Preprocessor(undaf_fp)
    p.loadjson()
    undaf_docs = p.json

    # loading metatopic data
    sdg = Preprocessor(clean_data_fp)
    sdg.loadjson()
    sdg_metatopics = sdg.json[0].keys()

    for topic in sdg_metatopics:
        print topic

    for k, v in p.json[0].items():
        simScore = cosSim.calculate_sim(v, 10)
        print "\n", k
        {"y": 0.089070708, "x": 0, "doc": 2},
Beispiel #2
0
def preprocess(input_fp, output_fp):
    p = Preprocessor(input_fp)
    p.loadjson()
    p.processjson()
    p.savejson(output_fp)
Beispiel #3
0
    dict_fp = '../data/sdg_v2.dict'
    corpus_fp = '../data/sdg_v2.mm'
    model_fp = '../data/sdg_v2.lda_model'

    # loading dictionary, corpus, lda_model
    dictionary = corpora.Dictionary.load(dict_fp)
    corpus = corpora.MmCorpus(corpus_fp)
    lda = models.ldamodel.LdaModel.load(model_fp)

    cosSim = CosineSimilarity(dictionary=dictionary, corpus=corpus, model=lda)

    cosSim.save_index('../data/sdg_v2.index')

    # loading undaf data
    undaf_fp = '../data/undaf_v2.json'
    p = Preprocessor(undaf_fp)
    p.loadjson()
    undaf_docs = p.json

    # loading metatopic data
    sdg = Preprocessor(clean_data_fp)
    sdg.loadjson()
    sdg_metatopics = sdg.json[0].keys()

    for topic in sdg_metatopics:
        print topic

    for k, v in p.json[0].items():
        simScore = cosSim.calculate_sim(v, 10)
        print '\n', k
        {'y': 0.089070708, 'x': 0, 'doc': 2},
Beispiel #4
0
def preprocess(input_fp, output_fp):
    p = Preprocessor(input_fp)
    p.loadjson()
    p.processjson()
    p.savejson(output_fp)