dirs = gslib.LDAdirs(indir=r"Z:\ermunds\results\2012 20topics", modelName="201220topics") docsfilename=dirs.allDocsFileName (dict1,mm,lda)=gslib.loadStuff(dirs) brands = sims.BrandsClustered_1 # decompose a post into topics and ptint them gslib.make_sense(1,lda,mm,docsfilename) # guess the topic of concept list consepts= ['cheap','ugly','unrelaible'] consepts= ['young','trendy','fast','macho'] # fail consepts= ['green','environment','sustainable','hybrid'] #n75 consepts= ['reliable','safe'] # n8 consepts= 'air hot heat cool exhaust system fan coolant temp blow'.split() # n5 ws,IDl,ID2index = gslib.world_list2IDs(dict1,consepts,tokenizef=gslib.wordCleanUp) for t,p in lda[ [(t,1) for t in IDl] ]: print '__with prob:{}% is N{}: {}'.format(int(p*100),t, ' '.join([w for _,w in lda.show_topic(t,10)])) # rank brands withing a topic brands = (sims.BrandsClustered_1) + ['leaf','prius','volt','camaro'] brands,IDl,ID2index = gslib.world_list2IDs(dict1,brands,tokenizef=gslib.wordCleanUp) topicN=5 topic = lda.state.get_lambda()[topicN] probss = sorted([(topic[ID],brands[ID2index[ID]]) for ID in IDl],reverse=True) for p in probss: print p ## rank topics by brand/word word = 'toyota' _,IDl,_ = gslib.world_list2IDs(dict1,[word],tokenizef=gslib.wordCleanUp)
#import someBrandFiltering as bf import ldaModel2WD as wd import sims_csv_plotter as draw import mess_with_sims as sims #dirs = gslib.LDAdirs(indir=r"Z:\ermunds\results\all unbranded threads", modelName="unbranded2passes_20topics") dirs = gslib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") dict1 = gensim.corpora.dictionary.Dictionary().load(dirs.dictFileName) mm=gensim.corpora.MmCorpus(dirs.corpusFname) #raw_brands = bf.getMakes(); raw_brands=sims.BrandsClustered_1 brandsl,IDl,ID2index = gslib.world_list2IDs(dict1,raw_brands,tokenizef=gslib.wordCleanUp) l_1=len(IDl) l_2=len(IDl) IDset_1=set(IDl) IDset_2=set(IDl) ID2index_1=ID2index ID2index_2=ID2index counter_1 = numpy.zeros((1,l_1)) counter_2 = numpy.zeros((1,l_2)) coocM= numpy.zeros((l_1,l_2)) #for bow in mm: print 'tic'
import numpy dirs = gslib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") docsfilename = dirs.allDocsFileName (dict1, mm, lda) = gslib.loadStuff(dirs) brands = sims.BrandsClustered_1 # decompose a post into topics and ptint them gslib.make_sense(1, lda, mm, docsfilename) # rank brands withing a topic brands = (sims.BrandsClustered_1) + ['leaf', 'prius', 'volt', 'camaro'] brands, IDl, ID2index = gslib.world_list2IDs(dict1, brands, tokenizef=gslib.wordCleanUp) topicN = 7 topic = lda.state.get_lambda()[topicN] probss = sorted([(topic[ID], brands[ID2index[ID]]) for ID in IDl], reverse=True) for p in probss: print p ''' ## rank topics by brand/word word = 'toyota' _,IDl,_ = gslib.world_list2IDs(dict1,[word],tokenizef=gslib.wordCleanUp) topics = lda.state.get_lambda() probs = numpy.zeros(lda.num_topics) for i,t in enumerate(topics): probs[i] =topics[i][IDl[0]]/sum(t) idx= numpy.argsort(-probs)
(dict1, mm, lda) = gslib.loadStuff(dirs) brands = sims.BrandsClustered_1 # decompose a post into topics and ptint them gslib.make_sense(1, lda, mm, docsfilename) # guess the topic of concept list consepts = ['cheap', 'ugly', 'unrelaible'] consepts = ['young', 'trendy', 'fast', 'macho'] # fail consepts = ['green', 'environment', 'sustainable', 'hybrid'] #n75 consepts = ['reliable', 'safe'] # n8 consepts = 'air hot heat cool exhaust system fan coolant temp blow'.split( ) # n5 ws, IDl, ID2index = gslib.world_list2IDs(dict1, consepts, tokenizef=gslib.wordCleanUp) for t, p in lda[[(t, 1) for t in IDl]]: print '__with prob:{}% is N{}: {}'.format( int(p * 100), t, ' '.join([w for _, w in lda.show_topic(t, 10)])) # rank brands withing a topic brands = (sims.BrandsClustered_1) + ['leaf', 'prius', 'volt', 'camaro'] brands, IDl, ID2index = gslib.world_list2IDs(dict1, brands, tokenizef=gslib.wordCleanUp) topicN = 5 topic = lda.state.get_lambda()[topicN] probss = sorted([(topic[ID], brands[ID2index[ID]]) for ID in IDl], reverse=True) for p in probss: