Esempio n. 1
0
def topicOfClassificationForAllYear(probDir, modelDir, classDir, clf_dict, fun):

    probFiles = fileSys.traverseDirectory(probDir)
    topicFiles = fileSys.traverseTopicDirecotry(modelDir, 1)
    classFiles = fileSys.traverseDirectory(classDir)
    
    N = len(probFiles)
    if len(topicFiles) != N or len(classFiles) != N:
        print "numbers of files are not same"
        sys.exit('System will exit')
    
    all_clf_topic = {}
    if fun == 0:
        irange = range(0, N)
    # acm-class start from 1998
    elif fun == 1:
        irange = range(5, N)
    for i in irange:
        prob = ioFile.load_object(probFiles[i])
        topics = ioFile.load_object(topicFiles[i])
        inFile = ioFile.dataFromFile(classFiles[i])
        
        year = probFiles[i][-8:-4]
        topic_index = np.squeeze(np.array(prob.argmax(1)))
        doc_topic = topic_index
        #doc_topic = []
        #[doc_topic.append(' '.join(topics[index])) for index in topic_index]
 
        all_clf, unique_clf = classificationOfDocument(inFile, clf_dict, fun)
        clf_topic = topicOfClassification(unique_clf, all_clf, clf_dict, doc_topic, fun)
        
        all_clf_topic[year] = clf_topic
    
    return all_clf_topic
Esempio n. 2
0
def topics_for_year(year):

    topicFiles = fileSys.traverseDirectory(path.join(root_path, 'lda_model', str(year), 'topic'))

    distanceFiles = fileSys.traverseDirectory(path.join(root_path, 'lda_model', str(year), 'distance'))

    topic_tree = graph.createTree(topicFiles, distanceFiles)

    return topic_tree
Esempio n. 3
0
def topics_for_year(year):

    topicFiles = fileSys.traverseDirectory(path.join(root_path, 'lda_model', str(year), 'topic'))

    distanceFiles = fileSys.traverseDirectory(path.join(root_path, 'lda_model', str(year), 'distance'))

    topic_tree = graph.createTree(topicFiles, distanceFiles)

    return topic_tree
Esempio n. 4
0
def topicOfClassificationForAllYear(probDir, modelDir, classDir, clf_dict,
                                    fun):

    probFiles = fileSys.traverseDirectory(probDir)
    topicFiles = fileSys.traverseTopicDirecotry(modelDir, 1)
    classFiles = fileSys.traverseDirectory(classDir)

    N = len(probFiles)
    if len(topicFiles) != N or len(classFiles) != N:
        print "numbers of files are not same"
        sys.exit('System will exit')

    all_clf_topic = {}
    if fun == 0:
        irange = range(0, N)
    # acm-class start from 1998
    elif fun == 1:
        irange = range(5, N)
    for i in irange:
        prob = ioFile.load_object(probFiles[i])
        topics = ioFile.load_object(topicFiles[i])
        inFile = ioFile.dataFromFile(classFiles[i])

        year = probFiles[i][-8:-4]
        topic_index = np.squeeze(np.array(prob.argmax(1)))
        doc_topic = topic_index
        #doc_topic = []
        #[doc_topic.append(' '.join(topics[index])) for index in topic_index]

        all_clf, unique_clf = classificationOfDocument(inFile, clf_dict, fun)
        clf_topic = topicOfClassification(unique_clf, all_clf, clf_dict,
                                          doc_topic, fun)

        all_clf_topic[year] = clf_topic

    return all_clf_topic
Esempio n. 5
0
def topics_for_cs():
    topicFiles = fileSys.traverseDirectory(path.join(root_path, 'htm/topic'))
    distanceFiles = fileSys.traverseDirectory(path.join(root_path, 'htm/distance'))
    topic_tree = graph.createTree(topicFiles, distanceFiles)
    
    return topic_tree
Esempio n. 6
0
def topics_for_cs():
    topicFiles = fileSys.traverseDirectory(path.join(root_path, 'htm/topic'))
    distanceFiles = fileSys.traverseDirectory(path.join(root_path, 'htm/distance'))
    topic_tree = graph.createTree(topicFiles, distanceFiles)
    
    return topic_tree