def main(): ''' resorts sims and saves a png copy ''' # dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all\all unbranded threads",modelName="unbranded2passes_20topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all branded threads", modelName="All2passes_20topics") dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all unbranded threads 2", modelName="unbranded220topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") CSVin= "simsN_posts" CSVout= "simsNtweaked" suffix='' figName='heatmap_from_posts_no whitening'+suffix #mp.generateCSV(indir=dirs.indir,modelName=dirs.modelName,suffix = suffix) sims,brands= mp.loadCSV(dirs,CSVin) nbrands= BrandsClustered_1 # caps bug of may 14 del nbrands[nbrands.index('mercedes-benz')] idx=numpy.zeros(len(nbrands),dtype=int) for i,b in enumerate(nbrands): idx[i]=brands.index(b) ''' ibrand = brands.index('ram') idx = numpy.argsort(-sims[ibrand,:]) ibrand = brands.index('jeep') sort_a_slice(idx,sims,a=6,b=None,compare_to=ibrand) ibrand = brands.index('nissan') sort_a_slice(idx,sims,a=10,b=None,compare_to=ibrand) ibrand = brands.index('chrysler') sort_a_slice(idx,sims,a=15,b=None,compare_to=ibrand) ibrand = brands.index('bmw') sort_a_slice(idx,sims,a=23,b=None,compare_to=ibrand,sign=1) ''' (sims,nbrands)=select(sims,brands,idx) mp.saveCSV(dirs,CSVout,nbrands,sims) draw.main(dirs,CSVout,figName)
def main(): ''' resorts sims and saves a png copy ''' # dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all\all unbranded threads",modelName="unbranded2passes_20topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all branded threads", modelName="All2passes_20topics") dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\all unbranded threads 2", modelName="unbranded220topics") #dirs = gsLib.LDAdirs(indir=r"Z:\ermunds\results\sink", modelName="unbranded220topics") CSVin = "simsN_posts" CSVout = "simsNtweaked" suffix = '' figName = 'heatmap_from_posts_no whitening' + suffix #mp.generateCSV(indir=dirs.indir,modelName=dirs.modelName,suffix = suffix) sims, brands = mp.loadCSV(dirs, CSVin) nbrands = BrandsClustered_1 # caps bug of may 14 del nbrands[nbrands.index('mercedes-benz')] idx = numpy.zeros(len(nbrands), dtype=int) for i, b in enumerate(nbrands): idx[i] = brands.index(b) ''' ibrand = brands.index('ram') idx = numpy.argsort(-sims[ibrand,:]) ibrand = brands.index('jeep') sort_a_slice(idx,sims,a=6,b=None,compare_to=ibrand) ibrand = brands.index('nissan') sort_a_slice(idx,sims,a=10,b=None,compare_to=ibrand) ibrand = brands.index('chrysler') sort_a_slice(idx,sims,a=15,b=None,compare_to=ibrand) ibrand = brands.index('bmw') sort_a_slice(idx,sims,a=23,b=None,compare_to=ibrand,sign=1) ''' (sims, nbrands) = select(sims, brands, idx) mp.saveCSV(dirs, CSVout, nbrands, sims) draw.main(dirs, CSVout, figName)
#for i in xrange(20000): bow = mm[i] if not i%10000: print i temp_counter_1 = numpy.zeros((1,l_1)) temp_counter_2 = numpy.zeros((1,l_2)) for ID, count in bow: if ID in IDset_1: index=ID2index_1[ID] temp_counter_1[0,index]+=count counter_1[0,index]+=count if ID in IDset_2: index=ID2index_2[ID] counter_2[0,index]+=count temp_counter_2[0,index]+=count coocM=coocM+temp_counter_1.T*temp_counter_2 wd.saveCSV(dirs,'coocM_raw',brandsl,coocM) temp2 = wd.normalize(coocM) temp25=numpy.log(temp2) temp3=temp25-numpy.diag(temp25.diagonal()) wd.saveCSV(dirs,'coocM',brandsl,temp3) draw.main(dirs,'coocM',figName='from_cooc_log') temp2 = wd.normalize(coocM) temp25=temp2 temp3=temp25-numpy.diag(temp25.diagonal()) wd.saveCSV(dirs,'coocM',brandsl,temp3) draw.main(dirs,'coocM',figName='from_cooc')