def serialize_power_sync(dumpdir, experiment): ps_num = experiment.num ps = experiment.powerSync dirname = os.path.join(dumpdir, 'power_sync') if not os.path.exists(dirname): os.makedirs(dirname) filename = os.path.join(dirname, '%d.bin' % (ps_num)) serde.serialize(ps, filename)
def config(clazz): """Decorator allowing to transform a python object into a configuration file, and vice versa :param clazz: class to decorate :return: the decorated class """ return deserialize(serialize(dataclass(clazz)))
def serialize_measurement(dumpdir, measurement, light=False): m_num = measurement.num iface = measurement.interface op = measurement.operation dirname = os.path.join(dumpdir, iface, op) if not os.path.exists(dirname): os.makedirs(dirname) filename = os.path.join(dirname, '%d.bin' % (m_num)) if light: loglines = measurement.loglines del measurement.loglines serde.serialize(measurement, filename) measurement.loglines = loglines else: serde.serialize(measurement, filename)
def postprocess(self, inference_output, accept='application/python-pickle'): import torch from serde import serialize detached_inference_output = inference_output.detach() serialized_data = serialize(detached_inference_output.numpy(), accept) return serialized_data
weights[word] = (1 + log10(count)) length = vecLength(weights.values()) norm_weights = {word: weight / length for word, weight in weights.items()} for word, norm_weight in norm_weights.items(): # Document uses cosine normalization index[word].add((doc_id, norm_weight)) # Create a dictionary which stores the data necessary to retrieve # the posting for the term dictionary = {} # include the document frequency and total document count in the dictionary # for quick retrieval by the search script (uppercase keys prevent collisions # with word stems, which are lowercase) dictionary["DF"] = df dictionary["N"] = N # write the values of the index to a postings file with open(args.postings, 'wb') as postings_file: for word, docs in index.items(): s = serialize(docs) # Write a tuple with information for indexing the postings file, as well # as the count of documents to potentially optimize searching dictionary[word] = (len(docs), len(s), postings_file.tell()) postings_file.write(s) # Write the dictionary to the specified dictionary file with open(args.dictionary, 'wb') as dict_file: dict_file.write(serialize(dictionary))
bar: 'ForwardReferenceBar' @serialize @deserialize @dataclass class ForwardReferenceBar: i: int # assert type is str assert 'ForwardReferenceBar' == dataclasses.fields(ForwardReferenceFoo)[0].type # setup pyserde for Foo after Bar becomes visible to global scope deserialize(ForwardReferenceFoo) serialize(ForwardReferenceFoo) # now the type really is of type Bar assert ForwardReferenceBar == dataclasses.fields(ForwardReferenceFoo)[0].type assert ForwardReferenceBar == next(serde.compat.dataclass_fields(ForwardReferenceFoo)).type # verify usage works def test_string_forward_reference_works(): h = ForwardReferenceFoo(bar=ForwardReferenceBar(i=10)) h_dict = {"bar": {"i": 10}} assert to_dict(h) == h_dict assert from_dict(ForwardReferenceFoo, h_dict) == h
def serialize_metadata(dumpdir, experiments): filename = os.path.join(dumpdir, 'metadata.bin') d = get_metadata(experiments) serde.serialize(d, filename)
@dataclass class Foo: i: int s: str bar: 'Bar' # Specify type annotation in string. @deserialize @serialize @dataclass class Bar: f: float b: bool # Evaluate pyserde decorators after `Bar` is defined. deserialize(Foo) serialize(Foo) def main(): f = Foo(i=10, s='foo', bar=Bar(f=100.0, b=True)) print(f"Into Json: {to_json(f)}") s = '{"i": 10, "s": "foo", "bar": {"f": 100.0, "b": true}}' print(f"From Json: {from_json(Foo, s)}") if __name__ == '__main__': main()
index["ALL_DOCS"].add(doc_id) for sentence in nltk.sent_tokenize(doc): # Split the string into individual words to index for word in nltk.word_tokenize(sentence): # Stem each word to its root to reduce dictionary size, with a # trade-off in the precision of queries word = stemmer.stem(word) ''' # Rough work for essay questions word = normalize(word) if skipword(word): continue ''' index[word].add(doc_id) # Create a dictionary which stores the data necessary to retrieve # the posting for the term dictionary = {} # For each term, write a skiplist of the documents to the postings file with open(args.postings, 'wb') as postings_file: for word, docs in index.items(): s = serialize(skiplist(sorted(list(docs)))) dictionary[word] = (len(docs), len(s), postings_file.tell()) postings_file.write(s) # Write the dictionary to the specified dictionary file with open(args.dictionary, 'wb') as dict_file: dict_file.write(serialize(dictionary))