def main(): ''' resorts sims and saves a png copy ''' # dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all\all unbranded threads",modelName="unbranded2passes_20topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all branded threads", modelName="All2passes_20topics") dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all unbranded threads 2", modelName="unbranded220topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") CSVin= "simsN_posts" CSVout= "simsNtweaked" suffix='' figName='heatmap_from_posts_no whitening'+suffix #mp.generateCSV(indir=dirs.indir,modelName=dirs.modelName,suffix = suffix) sims,brands= mp.loadCSV(dirs,CSVin) nbrands= BrandsClustered_1 # caps bug of may 14 del nbrands[nbrands.index('mercedes-benz')] idx=numpy.zeros(len(nbrands),dtype=int) for i,b in enumerate(nbrands): idx[i]=brands.index(b) ''' ibrand = brands.index('ram') idx = numpy.argsort(-sims[ibrand,:]) ibrand = brands.index('jeep') sort_a_slice(idx,sims,a=6,b=None,compare_to=ibrand) ibrand = brands.index('nissan') sort_a_slice(idx,sims,a=10,b=None,compare_to=ibrand) ibrand = brands.index('chrysler') sort_a_slice(idx,sims,a=15,b=None,compare_to=ibrand) ibrand = brands.index('bmw') sort_a_slice(idx,sims,a=23,b=None,compare_to=ibrand,sign=1) ''' (sims,nbrands)=select(sims,brands,idx) mp.saveCSV(dirs,CSVout,nbrands,sims) draw.main(dirs,CSVout,figName)
def main(): ''' resorts sims and saves a png copy ''' # dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all\all unbranded threads",modelName="unbranded2passes_20topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all branded threads", modelName="All2passes_20topics") dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all unbranded threads 2", modelName="unbranded220topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") CSVin = "simsN_posts" CSVout = "simsNtweaked" suffix = '' figName = 'heatmap_from_posts_no whitening' + suffix #mp.generateCSV(indir=dirs.indir,modelName=dirs.modelName,suffix = suffix) sims, brands = mp.loadCSV(dirs, CSVin) nbrands = BrandsClustered_1 # caps bug of may 14 del nbrands[nbrands.index('mercedes-benz')] idx = numpy.zeros(len(nbrands), dtype=int) for i, b in enumerate(nbrands): idx[i] = brands.index(b) ''' ibrand = brands.index('ram') idx = numpy.argsort(-sims[ibrand,:]) ibrand = brands.index('jeep') sort_a_slice(idx,sims,a=6,b=None,compare_to=ibrand) ibrand = brands.index('nissan') sort_a_slice(idx,sims,a=10,b=None,compare_to=ibrand) ibrand = brands.index('chrysler') sort_a_slice(idx,sims,a=15,b=None,compare_to=ibrand) ibrand = brands.index('bmw') sort_a_slice(idx,sims,a=23,b=None,compare_to=ibrand,sign=1) ''' (sims, nbrands) = select(sims, brands, idx) mp.saveCSV(dirs, CSVout, nbrands, sims) draw.main(dirs, CSVout, figName)
#for i in xrange(20000): bow = mm[i] if not i%10000: print i temp_counter_1 = numpy.zeros((1,l_1)) temp_counter_2 = numpy.zeros((1,l_2)) for ID, count in bow: if ID in IDset_1: index=ID2index_1[ID] temp_counter_1[0,index]+=count counter_1[0,index]+=count if ID in IDset_2: index=ID2index_2[ID] counter_2[0,index]+=count temp_counter_2[0,index]+=count coocM=coocM+temp_counter_1.T*temp_counter_2 wd.saveCSV(dirs,'coocM_raw',brandsl,coocM) temp2 = wd.normalize(coocM) temp25=numpy.log(temp2) temp3=temp25-numpy.diag(temp25.diagonal()) wd.saveCSV(dirs,'coocM',brandsl,temp3) draw.main(dirs,'coocM',figName='from_cooc_log') temp2 = wd.normalize(coocM) temp25=temp2 temp3=temp25-numpy.diag(temp25.diagonal()) wd.saveCSV(dirs,'coocM',brandsl,temp3) draw.main(dirs,'coocM',figName='from_cooc')
LDA_vectorsWithOutCooc=pickle.load( open(dirs.indir+'\\'+'LDA_vectorsWithOutCooc.pickle','r')) pickle.dump(brandsl, open(dirs.indir+'\\'+'brandsl.pickle','w')) brandsl=pickle.load( open(dirs.indir+'\\'+'brandsl.pickle','r')) ## convert these vectors to sims vectors = LDA_vectorsWithCooc #vectors = LDA_vectorsWithOutCooc simCos=numpy.zeros((len(brandsl),len(brandsl))) for b1i in xrange(len(brandsl)): for b2i in xrange(len(brandsl)): v1 =vectors[b1i,:]# (vectors[b1i,:] -ave)/stds v2 =vectors[b2i,:]# (vectors[b2i,:] -ave)/stds simCos[b1i,b2i] = (v1.dot(v2))/(scipy.linalg.norm(v1)*scipy.linalg.norm(v2)) mp.saveCSV(dirs,'simsN_posts',brandsl,simCos) brandsOrdered=sims.BrandsClustered_1 simCos=sims.shuffle_sims(simCos,brandsl,brandsOrdered) simplot.plotSims(simCos,brandsOrdered,dirs,figName='from topics '+'withRepeadedPosts') ## for i in xrange(len(LDA_vectorsWithCooc)): LDA_vectorsWithCooc[i,:]=LDA_vectorsWithCooc[i,:]/sum(LDA_vectorsWithCooc[i,:]) LDA_vectorsWithOutCooc[i,:]=LDA_vectorsWithOutCooc[i,:]/sum(LDA_vectorsWithOutCooc[i,:])