def read(stream): '''Parses a playlist from the given stream. Returns an generator that produces calliope.playlist.Item objects. The generator will read from the file on demand, so you must be careful not to do this: with open('playlist.cpe', 'r') as f: playlist = calliope.playlist.read(f) for item in playlist: # You will see 'ValueError: I/O operation on closed file.'. ... If you want to read the playlist in one operation, convert it to a list: with open('playlist.cpe', 'r') as f: playlist = list(calliope.playlist.read(f)) ''' for text in splitstream.splitfile(stream, format='json'): try: json_document = json.loads(text) except ValueError as e: raise PlaylistError from e if isinstance(json_document, dict): yield Item(json_document) elif isinstance(json_document, list): yield from (Item(item) for item in json_document) else: raise PlaylistError("Expected JSON object, got {}".format(type(json_document).__name__))
def setup_superbatches(): """ Setup superbatch directory to allow for fine-grained dataset loading. Assumes yelp_academic_dataset_review.json exists in root directory. This method is run if the "--setup" flag is passed. """ superbatch = [] superbatch_id = 0 superbatch_size = 500 max_batches = 500 reviews_path = open( os.path.join(DATASET_PATH, "yelp_academic_dataset_review.json")) reviews_loader = tqdm(splitfile(reviews_path, format="json"), position=0, leave=True) reviews_loader.set_description("Loading superbatch %d..." % superbatch_id) for jsonstr in reviews_loader: superbatch.append(json.loads(jsonstr)) if len(superbatch) >= superbatch_size: with open( os.path.join( DATASET_PATH, "superbatches/yelp_superbatch_%d.pkl" % superbatch_id), 'wb') as fout: pickle.dump(superbatch, fout) superbatch = [] superbatch_id += 1 reviews_loader.set_description("Loading superbatch %d..." % superbatch_id) if superbatch_id > max_batches: break
def process_one(self): got_protocol = False s = b"" while not self.__protocol: got_protocol = True s = self.input.read(1) if s == b'<': self.__protocol = protocol.XmlRpc() elif s in b'{[': self.__protocol = protocol.JsonRpc() if not self.__split: self.__split = splitstream.splitfile(self.input, format=self.__protocol.splitfmt(), maxdocsize=1024*1024*120, preamble=s) if got_protocol: for a,kw in self.__regs: self.__protocol.register_function(*a, **kw) self.__regs = [] try: for rsps in self.__split: response = self.__protocol.dispatch_request(rsps) self.output.write(response) self.output.flush() return raise EOFError() except Exception: # Internal error self.close() raise
def read(stream): '''Parses a playlist from the given stream. Returns an generator that produces calliope.playlist.Item objects. The generator will read from the file on demand, so you must be careful not to do this: with open('playlist.cpe', 'r') as f: playlist = calliope.playlist.read(f) for item in playlist: # You will see 'ValueError: I/O operation on closed file.'. ... If you want to read the playlist in one operation, convert it to a list: with open('playlist.cpe', 'r') as f: playlist = list(calliope.playlist.read(f)) ''' for text in splitstream.splitfile(stream, format='json'): try: json_document = json.loads(text) except ValueError as e: raise PlaylistError from e if isinstance(json_document, dict): yield Item(json_document) elif isinstance(json_document, list): yield from (Item(item) for item in json_document) else: raise PlaylistError("Expected JSON object, got {}".format( type(json_document).__name__))
def read_line(): for jsonline in splitfile(input_handle, format="json"): try: yield json.loads(jsonline) except Exception: logger.error( f'JSON parsing failed for record {jsonline}: {traceback.format_exc()}' )
def read_key(self, full_key): s3_object = self.client.get_key(full_key, bucket_name=self.bucket) records_bytes = splitfile(s3_object.get()['Body'], format="json", startdepth=1, bufsize=65536) for record_bytes in records_bytes: yield json.loads(record_bytes)
def _do_split(self, string, startdepth=0): f = self._loadstr(string) try: return list( splitstream.splitfile(f, "json", bufsize=self._bufsize, startdepth=startdepth)) finally: f.close()
def main(): """main program""" csv.register_dialect('iperf3log', delimiter=',', quoting=csv.QUOTE_MINIMAL) csvwriter = csv.writer(sys.stdout, 'iperf3log') # accummulate volume per ip in a dict db = {} # this will yield each test as a parsed json objs = (json.loads(jsonstr) for jsonstr in splitfile(sys.stdin, format="json", bufsize=1)) csvwriter.writerow(["date", "ip", "localport", "remoteport", "duration", "protocol", "num_streams", "cookie", "sent", "sent_mbps", "rcvd", "rcvd_mbps", "totalsent", "totalreceived"]) for obj in objs: # caveat: assumes multiple streams are all from same IP so we take the 1st one # todo: handle errors and missing elements ip = (obj["start"]["connected"][0]["remote_host"]).encode('ascii', 'ignore') local_port = obj["start"]["connected"][0]["local_port"] remote_port = obj["start"]["connected"][0]["remote_port"] sent = obj["end"]["sum_sent"]["bytes"] rcvd = obj["end"]["sum_received"]["bytes"] sent_speed = obj["end"]["sum_sent"]["bits_per_second"] / 1000 / 1000 rcvd_speed = obj["end"]["sum_received"]["bits_per_second"] / 1000 / 1000 reverse = obj["start"]["test_start"]["reverse"] time = (obj["start"]["timestamp"]["time"]).encode('ascii', 'ignore') cookie = (obj["start"]["cookie"]).encode('ascii', 'ignore') protocol = (obj["start"]["test_start"]["protocol"]).encode('ascii', 'ignore') duration = obj["start"]["test_start"]["duration"] num_streams = obj["start"]["test_start"]["num_streams"] if reverse not in [0, 1]: sys.exit("unknown reverse") s = 0 r = 0 if ip in db: (s, r) = db[ip] if reverse == 0: r += rcvd sent = 0 sent_speed = 0 else: s += sent rcvd = 0 rcvd_speed = 0 db[ip] = (s, r) csvwriter.writerow([time, ip, local_port, remote_port, duration, protocol, num_streams, cookie, sent, sent_speed, rcvd, rcvd_speed, s, r]) # for obj sys.exit(0)
def generate_jsons(self, data): result = [] f = io.BytesIO(data) buffer = data for js in splitfile(f, format="json"): #buffer = data[len(js):] #self.buffer = buffer.decode("utf-8") #self.buffer = buffer.decode("utf-8") #result.append(js) temp = js.decode("utf-8") self.buffer = self.buffer[len(temp):] result.append(temp) return result
def convert_log_to_dict(input_text): """ Convert the git log output to json. """ items = [] f = StringIO.StringIO(input_text) for jsonstr in splitfile(f, format="json"): try: items.append(json.loads(jsonstr)) except Exception as e: return e return items
current_mode = element_mode else: raise ValueError("Can't currently mix modes!") io_elements += [element] #print(io_elements) #Main loop iocc_emitter.init(sys.argv[1], sys.argv[3]) #XXX parse_connection({'id': 0}) try: with open(sys.argv[2], 'r') as iospec_f: for jsonstr in splitfile(iospec_f, format="json"): iospec = json.loads(jsonstr) if (iospec['type'] == "connection"): parse_connection(iospec) elif (iospec['type'] == "printf" or iospec['type'] == "display"): parse_io_element(iospec) else: raise ValueError except FileNotFoundError as e: print("No iospec file generated, building dummy binary") current_mode = IOModes.PRINTF #TODO support for multiple connections if (current_mode == IOModes.PRINTF): iocc_emitter.emit_printf(list(connections.values())[0], io_elements) elif (current_mode == IOModes.DISPLAY):
def generate_json(log_file): """Reads log file as JSON""" with open(log_file, 'r') as f: for jsonstr in splitfile(f, format="json"): yield json.loads(jsonstr)
def __init__(self, protocol, input=None, output=None, process=None, socket=None): self.__input, self.__output = _ios(input, output, process, socket) self.__protocol = protocol self.__split = splitstream.splitfile(self.__input, format=protocol.splitfmt())
def combine_dict(dict1,dict2): for key in dict2.keys(): if key in dict1.keys(): dict1[key]+=dict2[key] else: dict1[key]=dict2[key] return dict1 #MPI Variables comm = MPI.COMM_WORLD comm.Barrier() size = comm.size rank = comm.rank data = None if rank == 0: data=splitfile(open("data/bigTwitter.json","r"), format="json", startdepth=2) #Number of Tweets count=0 #Dictionary of Languages final_lang_dict={} #Dictionary of HashTags final_hashtag_dict={} #chunk size chunk_size=size #Tweets Container chunk=[] received_tweet=None if rank == 0:
if not sys.stdin.isatty(): input_stream = sys.stdin else: message = 'Expecting json to be piped in.' raise IndexError(message) has_output_file = True try: output_filename = sys.argv[1] except IndexError: has_output_file = False #message = 'Need a filename to save output to.' #raise IndexError(message) json_inv = {} for jsonstr in splitfile(sys.stdin, format="json"): json_inv = json.loads(jsonstr.decode('utf-8')) inventory_str = "" # Create [allservers] inventory_str += "[allservers]\n" for k in json_inv.keys(): if isinstance(json_inv[k]["value"], list): for v in json_inv[k]["value"]: inventory_str += v + "\n" else: inventory_str += json_inv[k]["value"] + "\n" inventory_str += "\n" # Create sections for each Key
def _do_split(self, string, startdepth=0): f = self._loadstr(string) try: return list(splitstream.splitfile(f, "json", bufsize=self._bufsize, startdepth=startdepth)) finally: f.close()
def _do_split(self, string, **kw): f = self._loadstr(string) try: return list(splitstream.splitfile(f, "xml", bufsize=self._bufsize, **kw)) finally: f.close()