Example #1
0
def read(stream):
    '''Parses a playlist from the given stream.

    Returns an generator that produces calliope.playlist.Item objects.

    The generator will read from the file on demand, so you must be careful not
    to do this:

        with open('playlist.cpe', 'r') as f:
            playlist = calliope.playlist.read(f)

        for item in playlist:
            # You will see 'ValueError: I/O operation on closed file.'.
            ...

    If you want to read the playlist in one operation, convert it to a list:

        with open('playlist.cpe', 'r') as f:
            playlist = list(calliope.playlist.read(f))

    '''
    for text in splitstream.splitfile(stream, format='json'):
        try:
            json_document = json.loads(text)
        except ValueError as e:
            raise PlaylistError from e
        if isinstance(json_document, dict):
            yield Item(json_document)
        elif isinstance(json_document, list):
            yield from (Item(item) for item in json_document)
        else:
            raise PlaylistError("Expected JSON object, got {}".format(type(json_document).__name__))
Example #2
0
def setup_superbatches():
    """
    Setup superbatch directory to allow for fine-grained dataset loading. Assumes
    yelp_academic_dataset_review.json exists in root directory. This method
    is run if the "--setup" flag is passed.
    """
    superbatch = []
    superbatch_id = 0
    superbatch_size = 500
    max_batches = 500

    reviews_path = open(
        os.path.join(DATASET_PATH, "yelp_academic_dataset_review.json"))
    reviews_loader = tqdm(splitfile(reviews_path, format="json"),
                          position=0,
                          leave=True)
    reviews_loader.set_description("Loading superbatch %d..." % superbatch_id)

    for jsonstr in reviews_loader:
        superbatch.append(json.loads(jsonstr))
        if len(superbatch) >= superbatch_size:
            with open(
                    os.path.join(
                        DATASET_PATH,
                        "superbatches/yelp_superbatch_%d.pkl" % superbatch_id),
                    'wb') as fout:
                pickle.dump(superbatch, fout)
            superbatch = []
            superbatch_id += 1
            reviews_loader.set_description("Loading superbatch %d..." %
                                           superbatch_id)
            if superbatch_id > max_batches:
                break
Example #3
0
 def process_one(self):
     got_protocol = False
     
     s = b""
     while not self.__protocol:
         got_protocol = True
         s = self.input.read(1)
         if s == b'<':
             self.__protocol = protocol.XmlRpc()
         elif s in b'{[':
             self.__protocol = protocol.JsonRpc()
     
     if not self.__split:
         self.__split = splitstream.splitfile(self.input, format=self.__protocol.splitfmt(), maxdocsize=1024*1024*120, preamble=s)
     
     if got_protocol:
         for a,kw in self.__regs:
             self.__protocol.register_function(*a, **kw)
         self.__regs = []
     
     try:
         for rsps in self.__split:
             response = self.__protocol.dispatch_request(rsps)
             self.output.write(response)
             self.output.flush()
             return
         raise EOFError()
     except Exception: # Internal error
         self.close()
         raise
Example #4
0
def read(stream):
    '''Parses a playlist from the given stream.

    Returns an generator that produces calliope.playlist.Item objects.

    The generator will read from the file on demand, so you must be careful not
    to do this:

        with open('playlist.cpe', 'r') as f:
            playlist = calliope.playlist.read(f)

        for item in playlist:
            # You will see 'ValueError: I/O operation on closed file.'.
            ...

    If you want to read the playlist in one operation, convert it to a list:

        with open('playlist.cpe', 'r') as f:
            playlist = list(calliope.playlist.read(f))

    '''
    for text in splitstream.splitfile(stream, format='json'):
        try:
            json_document = json.loads(text)
        except ValueError as e:
            raise PlaylistError from e
        if isinstance(json_document, dict):
            yield Item(json_document)
        elif isinstance(json_document, list):
            yield from (Item(item) for item in json_document)
        else:
            raise PlaylistError("Expected JSON object, got {}".format(
                type(json_document).__name__))
Example #5
0
 def read_line():
     for jsonline in splitfile(input_handle, format="json"):
         try:
             yield json.loads(jsonline)
         except Exception:
             logger.error(
                 f'JSON parsing failed for record {jsonline}: {traceback.format_exc()}'
             )
Example #6
0
 def read_key(self, full_key):
     s3_object = self.client.get_key(full_key, bucket_name=self.bucket)
     records_bytes = splitfile(s3_object.get()['Body'],
                               format="json",
                               startdepth=1,
                               bufsize=65536)
     for record_bytes in records_bytes:
         yield json.loads(record_bytes)
Example #7
0
 def _do_split(self, string, startdepth=0):
     f = self._loadstr(string)
     try:
         return list(
             splitstream.splitfile(f,
                                   "json",
                                   bufsize=self._bufsize,
                                   startdepth=startdepth))
     finally:
         f.close()
Example #8
0
def main():
    """main program"""

    csv.register_dialect('iperf3log', delimiter=',', quoting=csv.QUOTE_MINIMAL)

    csvwriter = csv.writer(sys.stdout, 'iperf3log')

    # accummulate volume per ip in a dict
    db = {}
    # this will yield each test as a parsed json
    objs = (json.loads(jsonstr) for jsonstr in splitfile(sys.stdin, format="json", bufsize=1))

    csvwriter.writerow(["date", "ip", "localport", "remoteport", "duration", "protocol", "num_streams", "cookie", "sent", "sent_mbps", "rcvd", "rcvd_mbps", "totalsent", "totalreceived"])
    for obj in objs:
        # caveat: assumes multiple streams are all from same IP so we take the 1st one
        # todo: handle errors and missing elements
        ip = (obj["start"]["connected"][0]["remote_host"]).encode('ascii', 'ignore')
        local_port = obj["start"]["connected"][0]["local_port"]
        remote_port = obj["start"]["connected"][0]["remote_port"]

        sent = obj["end"]["sum_sent"]["bytes"]
        rcvd = obj["end"]["sum_received"]["bytes"]
        sent_speed = obj["end"]["sum_sent"]["bits_per_second"] / 1000 / 1000
        rcvd_speed = obj["end"]["sum_received"]["bits_per_second"] / 1000 / 1000
        

        reverse = obj["start"]["test_start"]["reverse"]
        time = (obj["start"]["timestamp"]["time"]).encode('ascii', 'ignore')
        cookie = (obj["start"]["cookie"]).encode('ascii', 'ignore')
        protocol = (obj["start"]["test_start"]["protocol"]).encode('ascii', 'ignore')
        duration = obj["start"]["test_start"]["duration"]
        num_streams = obj["start"]["test_start"]["num_streams"]
        if reverse not in [0, 1]:
            sys.exit("unknown reverse")

        s = 0
        r = 0
        if ip in db:
            (s, r) = db[ip]

        if reverse == 0:
            r += rcvd
            sent = 0
            sent_speed = 0
        else:
            s += sent
            rcvd = 0
            rcvd_speed = 0

        db[ip] = (s, r)

        csvwriter.writerow([time, ip, local_port, remote_port, duration, protocol, num_streams, cookie, sent, sent_speed, rcvd, rcvd_speed, s, r])
    # for obj
    sys.exit(0)
Example #9
0
 def generate_jsons(self, data):
     result = []
     f = io.BytesIO(data)
     buffer = data
     for js in splitfile(f, format="json"):
         #buffer = data[len(js):]
         #self.buffer = buffer.decode("utf-8")
         #self.buffer = buffer.decode("utf-8")
         #result.append(js)
         temp = js.decode("utf-8")
         self.buffer = self.buffer[len(temp):]
         result.append(temp)
     return result
Example #10
0
def convert_log_to_dict(input_text):
    """
    Convert the git log output to json.
    """

    items = []
    f = StringIO.StringIO(input_text)
    for jsonstr in splitfile(f, format="json"):
        try:
            items.append(json.loads(jsonstr))
        except Exception as e:
            return e

    return items
Example #11
0
def convert_log_to_dict(input_text):
    """
    Convert the git log output to json.
    """

    items = []
    f = StringIO.StringIO(input_text)
    for jsonstr in splitfile(f, format="json"):
        try:
            items.append(json.loads(jsonstr))
        except Exception as e:
            return e

    return items
Example #12
0
        current_mode = element_mode
    else:
        raise ValueError("Can't currently mix modes!")
    io_elements += [element]
    #print(io_elements)


#Main loop
iocc_emitter.init(sys.argv[1], sys.argv[3])

#XXX
parse_connection({'id': 0})

try:
    with open(sys.argv[2], 'r') as iospec_f:
        for jsonstr in splitfile(iospec_f, format="json"):
            iospec = json.loads(jsonstr)
            if (iospec['type'] == "connection"):
                parse_connection(iospec)
            elif (iospec['type'] == "printf" or iospec['type'] == "display"):
                parse_io_element(iospec)
            else:
                raise ValueError
except FileNotFoundError as e:
    print("No iospec file generated, building dummy binary")
    current_mode = IOModes.PRINTF

#TODO support for multiple connections
if (current_mode == IOModes.PRINTF):
    iocc_emitter.emit_printf(list(connections.values())[0], io_elements)
elif (current_mode == IOModes.DISPLAY):
def generate_json(log_file):
    """Reads log file as JSON"""
    with open(log_file, 'r') as f:
        for jsonstr in splitfile(f, format="json"):
            yield json.loads(jsonstr)
Example #14
0
 def __init__(self, protocol, input=None, output=None, process=None, socket=None):
     self.__input, self.__output = _ios(input, output, process, socket)
     self.__protocol = protocol
     self.__split = splitstream.splitfile(self.__input, format=protocol.splitfmt())
Example #15
0
def combine_dict(dict1,dict2):
    for key in dict2.keys():
        if key in dict1.keys():
            dict1[key]+=dict2[key]
        else:
            dict1[key]=dict2[key]
    return dict1

#MPI Variables
comm = MPI.COMM_WORLD
comm.Barrier()
size = comm.size
rank = comm.rank
data = None
if rank == 0:
    data=splitfile(open("data/bigTwitter.json","r"), format="json", startdepth=2)

#Number of Tweets
count=0
#Dictionary of Languages
final_lang_dict={}
#Dictionary of HashTags
final_hashtag_dict={}
#chunk size
chunk_size=size
#Tweets Container
chunk=[]


received_tweet=None
if rank == 0:
Example #16
0
if not sys.stdin.isatty():
    input_stream = sys.stdin
else:
    message = 'Expecting json to be piped in.'
    raise IndexError(message)

has_output_file = True
try:
    output_filename = sys.argv[1]
except IndexError:
    has_output_file = False
    #message = 'Need a filename to save output to.'
    #raise IndexError(message)

json_inv = {}
for jsonstr in splitfile(sys.stdin, format="json"):
    json_inv = json.loads(jsonstr.decode('utf-8'))

inventory_str = ""

# Create [allservers]
inventory_str += "[allservers]\n"
for k in json_inv.keys():
    if isinstance(json_inv[k]["value"], list):
        for v in json_inv[k]["value"]:
            inventory_str += v + "\n"
    else:
        inventory_str += json_inv[k]["value"] + "\n"
inventory_str += "\n"

# Create sections for each Key
Example #17
0
 def _do_split(self, string, startdepth=0):
     f = self._loadstr(string)
     try:
         return list(splitstream.splitfile(f, "json", bufsize=self._bufsize, startdepth=startdepth))
     finally:
         f.close()
Example #18
0
 def _do_split(self, string, **kw):
     f = self._loadstr(string)
     try:
         return list(splitstream.splitfile(f, "xml", bufsize=self._bufsize, **kw))
     finally:
         f.close()