예제 #1
0
 def test_tuple_array_dump(self):
     t = (1, 2, 3)
     expect = pbjson.dumps(list(t))
     # Default is True
     sio = BytesIO()
     pbjson.dump(t, sio)
     self.assertEqual(expect, sio.getvalue())
     # Ensure that the "default" does not get called
     sio = BytesIO()
     pbjson.dump(t, sio, convert=repr)
     self.assertEqual(expect, sio.getvalue())
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Convert between pbjson and json',
        epilog='If converting a PBJSON file with binary elements, you may need to use `--repr` since JSON cannot handle binary data.')
    parser.add_argument('-r', '--repr', action='store_true', help='instead of converting to JSON, just output the `repr` of the object')
    parser.add_argument('-p', '--pretty', action='store_true', help='make it nice for humans')
    if yaml is not None:
        parser.add_argument('-y', '--yaml', action='store_true', help='input or output is YAML instead of JSON')
    parser.add_argument('infile', nargs='?', type=argparse.FileType('rb'), default=sys.stdin, help='filename to convert from or to pbjson (default: stdin)')
    parser.add_argument('outfile', nargs='?', type=argparse.FileType('wb'), default=sys.stdout, help='filename to write the converted file to (default: stdout)')
    args = parser.parse_args()

    contents = args.infile.read()
    try:
        text = contents.decode()
    except Exception:
        text = None

    if text:
        if yaml is not None and args.yaml:
            try:
                obj = yaml.load(text, Loader=Loader)
            except ValueError:
                raise SystemExit(sys.exc_info()[1])
        else:
            try:
                obj = json.loads(text, object_pairs_hook=OrderedDict)
            except ValueError:
                if yaml is None:
                    raise SystemExit(sys.exc_info()[1])
                try:
                    obj = yaml.load(text, Loader=Loader)
                except ValueError:
                    raise SystemExit(sys.exc_info()[1])
        pbjson.dump(obj, args.outfile)
    else:
        try:
            obj = pbjson.loads(contents, document_class=OrderedDict)
        except ValueError:
            raise SystemExit(sys.exc_info()[1])
        if yaml is not None and args.yaml:
            j = yaml.dump(obj, Dumper=Dumper)
        elif args.repr:
            j = pprint.pformat(obj, indent=1) if args.pretty else repr(obj)
        else:
            kw = {'ensure_ascii': False} if does_unicode else {}
            j = json.dumps(obj, sort_keys=True, indent=4 if args.pretty else None, **kw)
        if args.outfile == sys.stdout:
            j += '\n'
        else:
            j = j.encode()
        args.outfile.write(j)
예제 #3
0
    frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
    k_frequent_itemsets = frequent_itemsets[frequent_itemsets['length'] == max_k]

    # create hash table from frequent itemsets
    if verbose:
        print('Creating hash table')

    track_hash = defaultdict(set)
    itemset_list = k_frequent_itemsets['itemsets'].to_list()
    print('Num itemsets %d' % len(itemset_list))
    coverage = set()
    for fr_set in itemset_list:
        for item in fr_set:
            track_hash[item] = track_hash[item].union(fr_set) # each track points to the set of the union of itemsets the track is in
            coverage |= fr_set
    
    print('Song Coverage: {:.5f}%'.format(len(coverage) / unique_songs * 100))

    # dump hash table as 'model'
    if verbose:
        print('Writing model')
    f = open(config['model']['FrequentItemsetsModel'], 'wb+')
    f.truncate(0)
    pbjson.dump(track_hash, f)
    f.close()

    # we are done
    if verbose:
        print('Done')