def main(args): argp = _argparse().parse_args(args[1:]) # Read the data data = [] titles = [] #gzipFile = gzip.open("data/english-embeddings.turian.txt.gz") #for line in gzipFile: # tokens = string.split(line) # titles.append(tokens[0]) # data.append([float(f) for f in tokens[1:]]) #data = numpy.array(data) print "Reading Data" lensingJson = featureExtraction.readData('data/fullData.json') #ExtractBagOfWord features print "Extracting Features" data = featureExtraction.extBagOfWordFeatures(lensingJson) for i in range(0,len(lensingJson)): titles.append(str(i)) #Call PCA #data = PCA(data,30) #print "PCA Complete" #call bh_tsne and get the results. Zip the titles and results for writing result = bh_tsne(data, perplexity=argp.perplexity, theta=argp.theta, verbose=argp.verbose) #render image if argp.render: print "Rendering Image" import render render.render([(title, point[0], point[1]) for title, point in zip(titles, result)], "output/lensing500p30-data.rendered.png", width=3000, height=1800) #convert result into json and write it if argp.write: print "Writing data to file" resData = {} minx = 0 maxx = 0 miny = 0 maxy = 0 for (title,result) in zip(titles,[[res[0],res[1]] for res in result]): resData[title] = {'x':result[0], 'y':result[1]} if minx > result[0]: minx = result[0] if maxx < result[0]: maxx = result[0] if miny > result[1]: miny = result[1] if maxy < result[1]: maxy = result[1] print "creating json" print len(resData) jsonStr = json.dumps(resData) print "MinX - %s MaxX - %s MinY - %s MaxY - %s" % (minx, maxx, miny, maxy) with open('output/coordinateslensing-full-srl-p40.json','w') as outFile: outFile.write("jsonstr = "); outFile.write(jsonStr+'\n')
import unicodedata import sys def readCoordinateJson(filename): with open(filename) as inFile: for line in inFile: line = line.strip('\n\r') if line: line = line.split('jsonstr = ')[1] jsonObjects = json.loads(line) return jsonObjects coordinateObjs = readCoordinateJson('output/coordinateslensing-full-srl-p40.json') titles = range(0,len(coordinateObjs)) fullDataObjs = featureExtraction.readData('data/fullData.json')[:len(titles)] finalData = {} for (fdObj, title) in izip(fullDataObjs, titles): coordinateObj = coordinateObjs[str(title)] assert coordinateObj for e in fdObj['event']: if isinstance(e, basestring): event = e break text = fdObj['description'] coordinateObj['event'] = event coordinateObj['text'] = text finalData[str(title)] = coordinateObj with open('output/coordinate-srl-full-p40.json','w') as outFile: