def __init__(self, value): json = dumps(value) avro = schema.dump_report(value) self._value = zlib.compress(avro) print "json: %i gzip-json: %i avro: %i gzip-avro: %i" % ( len(json), len(zlib.compress(json)), len(avro), len(self._value)) self.length = len(self._value)
def apply_operator(self, opdata, first, last): tic = time.time() # process for d in opdata: d[:, 0] *= 1000 opdata = operators.DataChunk((self.data_spec['start'], self.data_spec['end']), first, last, opdata) redata = self.op.process(opdata) log.msg("STATS: Operator processing took %0.6fs" % (time.time() - tic)) # log.msg("writing " + str(map(len, redata))) # construct a return value with metadata and data merged redata = map(self.build_result, zip(redata, self.op.outputs)) # print "processing and writing took", time.time() - tic if not self._stop: self.consumer.write(json.dumps(redata)) self.consumer.write('\r\n') if last: self.consumer.unregisterProducer() self.consumer.finish()
def start_processing(self, data): """data: a list with two elements: the first is the metadata, and the second is the stream information we will need to fetch the actual data""" # save the metadata and streamids for loading opmeta = data[0][1] opmeta = map(lambda x: dict(util.buildkv("", x)), opmeta) if not len(opmeta): self.consumer.write(json.dumps([])) self.consumer.unregisterProducer() self.consumer.finish() return # sort the streamids to be in the same order as the operator inputs meta_uid_order = dict(zip(map(operator.itemgetter("uuid"), opmeta), xrange(0, len(opmeta)))) self.streamids = data[1][1] self.streamids.sort(key=lambda elt: meta_uid_order[elt[0]]) # use a heuristic for how much data we want to load at once... self.chunk_length = (3600 * 24 * self.DATA_DAYS) / len(self.streamids) if self.chunk_length < 300: self.chunk_length = 300 # build the operator if self.group and len(self.group): self.op = grouping.GroupByTagOperator(opmeta, self.group, self.op) else: self.op = self.op(opmeta) for o in self.op.outputs: if not "Metadata/Extra/Operator" in o: o["Metadata/Extra/Operator"] = str(self.op) self.resumeProducing()
def render_DELETE(self, request): request.setHeader('Content-type', 'application/json') content = request.content.read() if content: del_uuids = json.loads(content) self.inst.jobs.jobs = filter(lambda j: j.uuid not in del_uuids, self.inst.jobs.jobs) self.inst.jobs.cancel_job(del_uuids) return json.dumps(map(lambda j: j.uuid, self.inst.jobs.jobs))
def render_PUT(self, request): request.setHeader('Content-type', 'application/json') content = request.content.read() if content: obj = json.loads(content) uids = self.add_jobs(obj) return json.dumps(uids) else: return None
def cancel_jobs(self, job_ids): url = self.base + '/jobs' payload = json.dumps(job_ids) opener = urllib2.build_opener(urllib2.HTTPHandler) request = urllib2.Request(url, data=payload) request.add_header('Content-Type', 'your/contenttype') request.get_method = lambda: 'DELETE' fp = opener.open(request) rv = json.loads(fp.read()) return rv
def republish(self, key, public, obj): data = json.dumps(obj) for client, streams in self.listeners.iteritems(): if receive_object(client, key, public): if streams == None: # if they've subscribed to all streams, we can # just forward them the object client.write("\n\n") client.write(data) else: # only include topical data custom = dict((k, obj[k]) for k in obj.iterkeys() \ if not 'uuid' in obj[k] or \ obj[k]['uuid'] in streams) # don't bother filtering metadata at the moment # since it's expensive to construct and mostly # won't happen. if sum((1 for v in custom.itervalues() if 'uuid' in v)): client.write(json.dumps(custom)) client.write("\n\n")
def render_GET(self, request): request.setHeader('Content-type', 'application/json') rv = [] jobs = map(lambda j: j.__dict__, self.inst.jobs.jobs) for j in jobs: obj = {'name': j['name'], 'start_time': j['start_time'], 'after': j['after'], 'actions': j['actions'] } rv.append(obj) return json.dumps(rv)
def render_GET(self, request): request.setHeader('Content-type', 'application/json') rv = [] jobs = map(lambda j: j.__dict__, self.inst.jobs.jobs) for j in jobs: obj = { 'name': j['name'], 'start_time': j['start_time'], 'after': j['after'], 'actions': j['actions'] } rv.append(obj) return json.dumps(rv)
def submit_jobs(self, jobs): """ jobs is an array of job objects: job is an object that is formed according to ../schema/job.av properties: Name (str), StartTime (longint), Actions (array) where actions is an array where each entry is an object with properties State (longint or double) and Path (str) """ url = self.base + '/jobs' payload = json.dumps(jobs) opener = urllib2.build_opener(urllib2.HTTPHandler) request = urllib2.Request(url, data=payload) request.get_method = lambda: 'PUT' fp = opener.open(request) rv = json.loads(fp.read()) return rv
def abort(self, error): self._stop = True if hasattr(error, "getTraceback"): tb = str(error.getTraceback()) else: tb = str(error) error = { 'error': "Encountered error while reading data; results are incomplete", 'exception': str(error.value), 'traceback': tb, } self.consumer.write(json.dumps(error)) self.consumer.unregisterProducer() self.consumer.finish() return error
def abort(self, error): self._stop = True if hasattr(error, "getTraceback"): tb = str(error.getTraceback()) else: tb = str(error) error = { "error": "Encountered error while reading data; results are incomplete", "exception": str(error.value), "traceback": tb, } self.consumer.write(json.dumps(error)) self.consumer.unregisterProducer() self.consumer.finish() return error
def apply_operator(self, opdata, first, last): tic = time.time() # process for d in opdata: d[:, 0] *= 1000 opdata = operators.DataChunk((self.data_spec["start"], self.data_spec["end"]), first, last, opdata) redata = self.op.process(opdata) log.msg("STATS: Operator processing took %0.6fs" % (time.time() - tic)) # log.msg("writing " + str(map(len, redata))) # construct a return value with metadata and data merged redata = map(self.build_result, zip(redata, self.op.outputs)) # print "processing and writing took", time.time() - tic if not self._stop: self.consumer.write(json.dumps(redata)) self.consumer.write("\r\n") if last: self.consumer.unregisterProducer() self.consumer.finish()
def start_processing(self, data): """data: a list with two elements: the first is the metadata, and the second is the stream information we will need to fetch the actual data""" # save the metadata and streamids for loading opmeta = data[0][1] opmeta = map(lambda x: dict(util.buildkv('', x)), opmeta) if not len(opmeta): self.consumer.write(json.dumps([])) self.consumer.unregisterProducer() self.consumer.finish() return # sort the streamids to be in the same order as the operator inputs meta_uid_order = dict(zip(map(operator.itemgetter('uuid'), opmeta), xrange(0, len(opmeta)))) self.streamids = data[1][1] self.streamids.sort(key=lambda elt: meta_uid_order[elt[0]]) # use a heuristic for how much data we want to load at once... self.chunk_length = (3600 * 24 * self.DATA_DAYS) / len(self.streamids) if self.chunk_length < 300: self.chunk_length = 300 # build the operator if self.group and len(self.group): self.op = grouping.GroupByTagOperator(opmeta, self.group, self.op) else: self.op = self.op(opmeta) for o in self.op.outputs: if not 'Metadata/Extra/Operator' in o: o['Metadata/Extra/Operator'] = str(self.op) self.resumeProducing()
def write(self, data): self.consumer.write(json.dumps(data)) self.consumer.write("\r\n")
def write(self, obj): self.request.sendMessage(json.dumps(obj), False)
def write(self, obj): self.request.write(json.dumps(obj)) self.request.write("\n\n")
def __init__(self, value): value = dumps(value) self._value = zlib.compress(value) print "%i -> %i" % (len(value), len(self._value)) self.length = len(self._value)
def render_GET(self, request): request.setHeader('Content-type', 'application/json') return json.dumps(self.value)
def republish(self, key, public, obj): insert = sterilize_object(obj) insert['__submitted'] = int(time.time() * 1000) insert['__key'] = key return self.db.runOperation("INSERT INTO republish (key, obj) VALUES (%s, %s)", (key, json.dumps(insert)))
def send_result((request, result)): request.write(json.dumps(result)) request.finish()
def write(self, data): self.consumer.write(json.dumps(data)) self.consumer.write("\n")