def _parse(self, ses, opt, sniff=0): self.chunk_init() keys = None for line in util.file_progress(ses, self.fn, sniff): line = line.strip() fields = line.split(',') if fields and fields[0] and fields[0][0] == '"': fields = [f.strip('"') for f in fields] if not keys: keys = fields else: self.chunk_extend() values = fields for k, v in zip(keys, values): self.chunk[k][-1] = v for chunk in self.chunk_emit(): yield chunk for chunk in self.chunk_emit(True): yield chunk
def _parse(self, ses, opt, sniff=0): self.chunk_init() keys = None for line in util.file_progress(ses, self.fn, sniff): line = line.strip() fields = line.split(',') if fields and fields[0] and fields[0][0]=='"': fields = [f.strip('"') for f in fields] if not keys: keys = fields else: self.chunk_extend() values = fields for k, v in zip(keys, values): self.chunk[k][-1] = v for chunk in self.chunk_emit(): yield chunk for chunk in self.chunk_emit(True): yield chunk
def _parse(self, ses, opt, sniff=0): ignore = set(['floatApprox', '$date', '$numberLong', '$timestamp']) chunk_size = 100 def flatten(result, j, key=None): if type(j) == dict: for k, v in j.items(): if k in ignore: flatten(result, v, key) else: flatten(result, v, key + util.SEP + k if key else k) elif type(j) == list: for i, v in enumerate(j): flatten(result, v, key + util.SEP + str(i) if key else str(i)) else: result[key] = [j] return result chunk = {} for line in util.file_progress(ses, self.fn, sniff): try: j = flatten({}, json.loads(line)) if j.keys() != chunk.keys() or len( chunk.values()[0]) >= chunk_size: if chunk: yield chunk chunk = j else: for k, v in j.items(): chunk[k].extend(v) except ValueError: # ignore bad json pass except: traceback.print_exc() break yield chunk
def _parse(self, ses, opt, sniff=0): ignore = set(['floatApprox', '$date', '$numberLong', '$timestamp']) chunk_size = 100 def flatten(result, j, key=None): if type(j)==dict: for k, v in j.items(): if k in ignore: flatten(result, v, key) else: flatten(result, v, key + util.SEP + k if key else k) elif type(j)==list: for i, v in enumerate(j): flatten(result, v, key + util.SEP + str(i) if key else str(i)) else: result[key] = [j] return result chunk = {} for line in util.file_progress(ses, self.fn, sniff): try: j = flatten({}, json.loads(line)) if j.keys() != chunk.keys() or len(chunk.values()[0]) >= chunk_size: if chunk: yield chunk chunk = j else: for k, v in j.items(): chunk[k].extend(v) except ValueError: # ignore bad json pass except: traceback.print_exc() break yield chunk
def _parse(self, ses, opt, sniff=0): # init self.chunk_init() pt = util.parse_time() # process the file for line in util.file_progress(ses, self.fn, sniff): # match line line = line.strip() m = rec.match(line) if m: # process time_key time = m.group(time_key) if time: for chunk in self.chunk_emit(flush=False): yield chunk self.chunk_extend() self.chunk[time_key][-1] = time self.last_time = time # process each data_key for data_key in rec.groupindex: if data_key != time_key: data = m.group(data_key) if data != None: if self.chunk[data_key][-1] != None: self.chunk_extend() self.chunk[time_key][-1] = self.last_time self.chunk[data_key][-1] = data # finish up for chunk in self.chunk_emit(flush=True): yield chunk