コード例 #1
0
ファイル: json_to_cbor.py プロジェクト: yqyunjie/dots_client
def to_cbor(json_file, output_file):
    print('Converting to cbor')
    with open(json_file, 'r') as fi:
        with open(output_file, 'wb') as fo:
            dic = json.load(fi)
            dic_mapped = replace_dict_key_recursively(dic, JSON_TO_CBOR_MAPPER)
            cbor.dump(dic_mapped, fo)
コード例 #2
0
    def add(self, item):
        '''add `item` to `current_file`, opening it as temporary file if not
        already open.  This also constructs the `current_file_path`
        when it opens the temporary file.

        '''
        if self.current_file is None:
            ## construct a final path to which this fil will be moved
            ## when it rolls
            self.current_file_path = os.path.join(
                self.output_dir, 
                'trec-dd-local-politics-%d.cbor' % self.total_written)
            if self.compress:
                self.current_file = gzip.open(self.tmp_file_path, 'wb')
                self.current_file_path += '.gz'
            else:
                self.current_file =      open(self.tmp_file_path, 'wb')

        ## write the data
        cbor.dump(item, self.current_file)

        ## roll the files each time we reach max_chunk_size
        self.total_written += 1
        if self.total_written % self.max_chunk_size == 0:
            self.roll()
コード例 #3
0
def main(n):

    for i in range(n):
        rec = {'url': 'http://{}.localdomain/{}'.format(i, i)}
        k, v = identifier(i)
        rec[k] = [v]
        dump(rec, stdout)
コード例 #4
0
ファイル: coordinatec.py プロジェクト: diffeo/coordinate
 def do_status(self, args):
     logfile = None
     try:
         while True:
             they = []
             for ws in self.task_master.iter_work_specs():
                 name = ws['name']
                 counts = self.task_master.count_work_units(name)
                 they.append({'name': name, 'data': ws, 'counts': counts})
             they.sort(key=lambda x: x['name'])
             if args.logfile:
                 record = {'time': time.time(), 'ws': they}
                 if logfile is None:
                     logfile = open(args.logfile, 'ab')
                 cbor.dump(record, logfile)
                 logfile.flush()
             else:
                 # write json text to stdout
                 self.stdout.write(json.dumps(they) + '\n')
                 self.stdout.flush()
             if (((args.repeat_seconds is None) or
                  not (args.repeat_seconds > 0.0))):
                 break
             time.sleep(args.repeat_seconds)
     finally:
         if logfile is not None:
             logfile.close()
コード例 #5
0
def save_variables(filename='demo_vars'):
    global mode_library

    outfile = open(filename, 'wb')
    variable_names = [
        variable for variable in dir(mode_library)
        if not variable.startswith('__')
    ]
    variables = {}

    for variable_name in variable_names:
        variable = getattr(mode_library, variable_name)
        if not callable(variable) and not isinstance(
                variable, ModuleType) and (type(variable) in [
                    int, bool, str, str, dict, list
                ]):
            try:
                variables[variable_name] = variable
            except TypeError:
                print("      Can't encode with cbor:", copy)

    try:
        cbor.dump(variables, outfile)
    except Exception:
        print("      Error during save...")

    outfile.close()
コード例 #6
0
 def dump(self, key, conversation_id, payload):
     if self.save:
         filename = f"{key}.{conversation_id}.cbor"
         if os.path.exists(filename):
             filename = f"{key}.{conversation_id}.2.cbor"
         with open(filename, "wb") as output:
             cbor.dump(payload, output)
         print("Payload saved as", filename)
     else:
         print("Payload:")
         pprint.pprint(payload)
コード例 #7
0
 def write(self, kind: FileType) -> None:
     fn = self._path(kind)
     if kind == FileType.PICKLE:
         # serialize as TreeNode
         with open(fn, "wb") as f:
             pickle.dump(self.treenode, f, protocol=-1)
     elif kind == FileType.CSV:
         # serialize as id_dict
         with open(fn, "w") as f:
             w = csv.DictWriter(f, Node._fields)
             w.writeheader()
             for item in self.treenode.node_iter():
                 w.writerow(item._asdict())
     elif kind == FileType.MSGPACK:
         # https://msgpack-python.readthedocs.io/en/latest/api.html
         with open(fn, "wb") as f:
             # Doesn't improve speed
             # msgpack.pack(self._to_dict(), f, use_bin_type=True)
             msgpack.pack(self.to_dict_list(), f)
     elif kind == FileType.JSON:
         self._json_dump(fn, json.dump)
     elif kind == FileType.UJSON:
         self._json_dump(fn, ujson.dump)
     elif kind == FileType.SIMPLEJSON:
         # NOTE: simplejson includes key names when serializing NamedTuples
         with open(fn, "w") as f:
             if self.json_dict_list:
                 simplejson.dump(list(self.id_dict.values()), f, ensure_ascii=True)
             else:
                 simplejson.dump(self.id_dict, f, ensure_ascii=True)
     elif kind == FileType.CBOR2:
         with open(fn, "wb") as f:
             cbor2.dump(self.to_dict_list(), f)
     elif kind == FileType.CBOR:
         with open(fn, "wb") as f:
             cbor.dump(self.to_dict_list(), f)
     elif kind == FileType.RAPIDJSON:
         # https://python-rapidjson.readthedocs.io/en/latest/benchmarks.html
         # TODO: See this example for possible speed improvement - deeper integration with Node
         #  https://python-rapidjson.readthedocs.io/en/latest/encoder.html
         # NOTE: can't use id_dict - keys must be strings
         #       can't use self.id_dict.values() - not serializable
         #       list(self.id_dict.values()) produces a list of lists - no keys - very fragile
         with open(fn, "w") as f:
             if self.json_dict_list:
                 rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(self.to_dict_list(), f)
             else:
                 rapidjson.Encoder(number_mode=rapidjson.NM_NATIVE, ensure_ascii=False)(list(self.id_dict.values()), f)
     elif kind == FileType.BSON:
         with open(fn, "wb") as f:
             co = CodecOptions(document_class=RawBSONDocument)
             for node in self.treenode.node_iter():
                 f.write(BSON.encode(node._asdict(), codec_options=co))
コード例 #8
0
def main():
    parser = argparse.ArgumentParser(
        'process streamcorpus.Chunk files to generate CBOR files'
        ' to load into memex_dossier.akagraph.'                                     
    )
    parser.add_argument('input_paths', nargs='+', 
                        help='paths to streamcorpus.Chunk files')
    parser.add_argument('--output-path', help='cbor file (or cbor.gz) to create')
    parser.add_argument('--xform', action='store_true', default=False,
                        help='run structured_features transform before page_extractors')
    parser.add_argument('--total', type=int, help='anticipated number of StreamItems')
    parser.add_argument('--limit', type=int, 
                        help='stop processing after this many StreamItems')
    args = parser.parse_args()

    xform = structured_features(structured_features.default_config)

    fopen = open
    if args.output_path.endswith('.gz'):
        fopen = gzip.open
    fh = fopen(args.output_path, 'wb')

    count = 0
    start = time.time()
    for path in args.input_paths:
        for si in Chunk(path):
            count += 1
            if count % 100 == 0:
                elapsed = time.time() - start
                rate = count / elapsed
                msg = '%d done in %.1f secs --> %.1f per sec' % (count, elapsed, rate)
                if args.total:
                    remaining = (args.total - count) / rate
                    msg += ' --> %.1f sec remaining' % remaining
                print(msg)
                sys.stdout.flush()
            if args.limit and count > args.limit:
                break
            #url_parts = urlparse(si.abs_url)
            if args.xform:
                si = xform(si)
            slots = profile_page(si)
            if slots:
                slots = cbor.loads(slots)
                better_slots = {}
                for key, values in slots['slots'].iteritems():
                    assert isinstance(values, list), values
                    better_slots[key.lower()] = [unicodedata.normalize('NFKC', v).lower()
                                                 for v in values]
                better_slots['url'] = si.abs_url
                cbor.dump(better_slots, fh)
    fh.close()
    print('done')
コード例 #9
0
ファイル: work_spec.py プロジェクト: diffeo/coordinate
 def _cbor_dump(self, fileo):
     with self.mutex:
         state = self.__getstate__()
         try:
             return cbor.dump(state, fileo)
         except:
             logger.error('could not cbor serialize state for spec %s', self.name, exc_info=True)
             raise
コード例 #10
0
    def mark_as_downloaded(self, link_key, obj_changes):
        link_as_file = self.make_link_into_file(link_key)
        if not os.path.exists(link_as_file):
            # add the data to the file
            link_data = {
                dimname: dimchange
                for dimname, dimchange in (obj_changes["dims"].iteritems())
                if dimname is not "download_complete"
            }

            cbor.dump(link_data, open(link_as_file, "wb"))

        new_data = {"download_complete": {"type": Record.BOOL, "value": True}}
        if "error_reason" in obj_changes["dims"]:
            new_data["error_reason"] = (obj_changes["dims"]["error_reason"])
        else:
            new_data["error_reason"] = {"type": Record.STRING, "value": ""}
        obj_changes["dims"] = new_data
コード例 #11
0
ファイル: mkfw.py プロジェクト: vooon/ota-mkfw
def main():
    def dflt_build_date():
        v = os.environ.get("BUILD_DATE")
        if v:
            return dt_parse(v)
        else:
            return DT.now(tz=tzlocal())

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("-o", "--outfile", metavar="OUTFILE", type=argparse.FileType("wb"), required=True, help="Output file")
    parser.add_argument("--no-compress", action="store_true", help="Do not compress images")

    meta = parser.add_argument_group("Metadata")
    meta.add_argument("--desc", help="Description")
    meta.add_argument("-b", "--board", required=True, help="Board ID")
    meta.add_argument("-d", "--build-date", type=dt_parse, default=dflt_build_date(), help="Build date")
    meta_scm_excl = meta.add_mutually_exclusive_group(required=True)
    meta_scm_excl.add_argument("-vr", "--ver-rev", nargs=2, metavar=('VER', 'REV'), help="Version and SCM revision strings")
    meta_scm_excl.add_argument("--git-identity", action="store_true", help="Get revision and version from GIT SCM")

    # unfortunately it can't help parse flags like this: "-n abc.bin file.bin -a 0x00800000 file2.bin" to:
    # <group #1: name=abc.bin srcfile=file.bin>
    # <group #2: load_addr=0x00800000 srcfile=file2.bin>
    srcgroup = parser.add_argument_group("Source file arguments")
    srcgroup.add_argument("-n", "--name", action="append", help="Image name (e.g. firmware.bin)")
    srcgroup.add_argument("-a", "--load-addr", metavar="ADDR", type=lambda x: int(x, base=0), action="append", help="Image load address")
    srcgroup.add_argument("--dfu-alt", metavar="N", type=int, action="append", help="DFU Alternate setting")
    srcgroup.add_argument("infile", metavar="INFILE", type=argparse.FileType("rb"), nargs="+", help="Source file(s)")

    args = parser.parse_args()

    # I found bug in _cbor.so: don't encode Tag 0 (ver 0.1.25)
    # https://bitbucket.org/bodhisnarkva/cbor/issues/11/failed-to-encode-tag-0-invalid-negative

    if args.git_identity:
        p = subprocess.Popen(["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE)
        rev_parse, err = p.communicate()

        p = subprocess.Popen(["git", "describe", "--always", "--dirty"], stdout=subprocess.PIPE)
        describe, err = p.communicate()

        ver, rev = describe.decode().strip(), rev_parse.decode().strip()
    else:
        ver, rev = args.ver_rev

    image_meta = {
        'description': args.desc or '',
        'build_date': cbor.Tag(0, args.build_date.isoformat()),
        'version': ver,
        'revision': rev,
        'board': args.board,
    }

    images = {}

    def getindex(lst, idx, default=None):
        if lst and len(lst) > idx:
            return lst[idx]
        else:
            return default

    for i, src in enumerate(args.infile):
        name = getindex(args.name, i, basename(src.name))
        addr = getindex(args.load_addr, i)
        dalt = getindex(args.dfu_alt, i)

        image = {}
        if addr is not None: image['load_address'] = addr
        if dalt is not None: image['dfu_alt'] = dalt

        with src as fd:
            buffer = bytes(fd.read())

            hasher = hashlib.sha1()
            hasher.update(buffer)

            image['size'] = len(buffer)
            image['sha1sum'] = bytes(hasher.digest())

            # less effective if data to random, but usual case use compressed
            deflated_buffer = bytes(zlib.compress(buffer, 9))
            if len(buffer) > len(deflated_buffer) and not args.no_compress:
                # Tag: 'z' * 100 + 22 -> zipped, base64 repr
                image['image'] = cbor.Tag(12222, deflated_buffer)
            else:
                image['image'] = buffer

        images[name] = image

    with args.outfile as fd:
        # In version 1.0.0 Tag(0) bug fixed.
        cbor.dump(
            ("OTAFWv1", image_meta, images),
            fd)
コード例 #12
0
    def _write_block(self, blktype, **kwargs):
        LOG.debug('writing block: type=%s, content=%s', blktype, repr(kwargs))

        cbor.dump(dict(blktype=blktype, **kwargs), self.fh)
コード例 #13
0
 def Dumps(self, dict_obj, stream_buf):
     cbor.dump(dict_obj, stream_buf)
コード例 #14
0
ファイル: b2j.py プロジェクト: hillbw/exi-test
# Parse the JSON string from the file into 
# a Python dictionary object
data = json.load(fin)

# Write the object to the file in BSON
fout.write(bson.serialize_to_bytes(data))

# Close both files
fin.close()
fout.close()

# Open a new file for writing out the CBOR encoding
fout = open(fname + '.cbor', 'wb')

# Use CBOR codec to write to
cbor.dump(data, fout)

# Close the CBOR file
fout.close()


# Open the BSON version in read-only mode, and a new file
# for the roundtrip JSON output.
fin = open(fname + '.bson', 'rb')
fout = open(fname + '-roundtrip.json', 'w')

# Parse the BSON file into a Python dictionary object
data = bson.parse_stream(fin)

# Dump the dictionary object out in JSON format
json.dump(data, fout)
コード例 #15
0
ファイル: writer.py プロジェクト: larsks/muxdemux
    def _write_block(self, blktype, **kwargs):
        LOG.debug('writing block: type=%s, content=%s',
                  blktype, repr(kwargs))

        cbor.dump(dict(blktype=blktype, **kwargs), self.fh)
コード例 #16
0
ファイル: _cbor_chunk.py プロジェクト: qiugen/streamcorpus
 def write_msg_impl(self, msg):
     assert self._o_chunk_fh is not None
     cbor.dump(msg, self._o_chunk_fh)
コード例 #17
0
ファイル: _cbor_chunk.py プロジェクト: diffeo/streamcorpus
 def write_msg_impl(self, msg):
     assert self._o_chunk_fh is not None
     cbor.dump(msg, self._o_chunk_fh)