def get_topic_proportions_for_every_image(): from dir_processing import DirProcessing landmarks_urls_list = [] person_ids = DirProcessing.get_all_person_ids() for person_id in person_ids: perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id) for perform_id in perform_ids: landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id) landmarks_urls_list.extend(landmarks_urls) doc_num = len(landmarks_urls_list) dt_file = '../ctm-dist/CTM46/final-lambda.dat' dt_vector = np.loadtxt(dt_file) topic_num = dt_vector.size / doc_num dt_matrix = np.reshape(dt_vector, (doc_num, topic_num)) np.set_printoptions(suppress=True) final_theta = np.exp(dt_matrix) final_theta = final_theta / np.sum(final_theta, axis=1)[:, np.newaxis] return landmarks_urls_list, final_theta
def generate_corpus_and_write_to_file(): """ generate the copus, write it to files and store the LSF corpus features """ import os import sys lib_path = os.path.abspath('../utilization/') sys.path.append(lib_path) from dir_processing import DirProcessing LSF.build_dictionary() lsf_corpus = [] person_ids = DirProcessing.get_all_person_ids() for person_id in person_ids: perform_ids = DirProcessing.get_all_perform_ids_from_person_id(person_id) for perform_id in perform_ids: landmarks_urls = DirProcessing.get_all_landmarks_urls_from_sequence(person_id, perform_id) expression_sequence = LSF.lsf_from_sequence(landmarks_urls) print 'The feature extraction of expression person S{} and perform time {} has ' \ 'been done.'.format(person_id, perform_id) lsf_corpus.append(expression_sequence) import cPickle with open('../model/corpus.pk', 'wb') as f: cPickle.dump(lsf_corpus, f) with open('../model/corpus.txt', 'w') as f: for expression_sequence in lsf_corpus: lsf_sequence = expression_sequence.lsf_sequence for lsf_document in lsf_sequence: f.write(str(len(lsf_document))) for word, count in lsf_document.iteritems(): wid = LSF.word2id[word] s = " %d:%d" %(wid, count) f.write(s) f.write("\n")