def load_topics(self, dirname): self.topics = [] for subdir in [x[0] for x in os.walk(dirname)][1:]: for file in os.listdir(subdir): if file.endswith('pkl'): print("attempting... ", file) lda = ScikitLda.load(subdir + "/" + file) for topic in lda.topics: self.topics.append(topic / topic.sum())
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from scipy.spatial.distance import cosine from corpora.scikit import ScikitLda import argparse if __name__ == "__main__": parser = argparse.ArgumentParser(description="Calculate distance between topics") parser.add_argument("-t", "--topic-model", default=None, required=True) args = parser.parse_args() lda = ScikitLda.load(args.topic_model) topics = [] for topic in lda.topics: topics.append(topic / topic.sum()) for i in range(len(topics)): for j in range(i + 1, len(topics)): print("Topic#{0} Topic#{1} {2}".format(i, j, cosine(topics[i], topics[j])))