def get_topic_proportions_for_every_image():
       
        from dir_processing import DirProcessing

        landmarks_urls_list = []

        person_ids = DirProcessing.get_all_person_ids()
        for person_id in person_ids:
            perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id)
            for perform_id in perform_ids:
                landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id)
                landmarks_urls_list.extend(landmarks_urls)
       
        doc_num = len(landmarks_urls_list)

        dt_file = '../ctm-dist/CTM46/final-lambda.dat'
        dt_vector = np.loadtxt(dt_file)
        topic_num = dt_vector.size / doc_num
        dt_matrix = np.reshape(dt_vector, (doc_num, topic_num)) 
        np.set_printoptions(suppress=True)

        final_theta = np.exp(dt_matrix)
        final_theta = final_theta / np.sum(final_theta, axis=1)[:, np.newaxis]

        return landmarks_urls_list, final_theta
    def divide_persons_into_subsets(self):
        from dir_processing import DirProcessing

        person_ids = DirProcessing.get_all_person_ids()
        permute_ids = np.random.permutation(person_ids)

        total_person_num = len(person_ids)

        step_arr = np.linspace(0, total_person_num, self.subset_num + 1)
        step_arr = step_arr.astype(int)

        subsets_dict = dict.fromkeys(person_ids)

        for i in range(0, self.subset_num):
            for j in range(step_arr[i], step_arr[i + 1]):
                subsets_dict[permute_ids[j]] = i

        return subsets_dict
Esempio n. 3
0
    def generate_corpus_and_write_to_file():
        """ generate the copus, write it to files and store the LSF corpus features """
        
        import os
        import sys
        lib_path = os.path.abspath('../utilization/')
        sys.path.append(lib_path)

        from dir_processing import DirProcessing

        LSF.build_dictionary()

        lsf_corpus = []

        person_ids = DirProcessing.get_all_person_ids()
        for person_id in person_ids:
            perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id)
            for perform_id in perform_ids:
                landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id)
                expression_sequence = LSF.lsf_from_sequence(landmarks_urls)
                print 'The feature extraction of expression person S{} and perform time {} has ' \
                        'been done.'.format(person_id, perform_id)
                lsf_corpus.append(expression_sequence)

        import cPickle
        with open('../model/corpus.pk', 'wb') as f:
            cPickle.dump(lsf_corpus, f)
       
        with open('../model/corpus.txt', 'w') as f:
            for expression_sequence in lsf_corpus:
                lsf_sequence = expression_sequence.lsf_sequence
                for lsf_document in lsf_sequence:
                    f.write(str(len(lsf_document)))
                    for word, count in lsf_document.iteritems():
                        wid = LSF.word2id[word]
                        s = " %d:%d" %(wid, count)
                        f.write(s)
                    f.write("\n")