예제 #1
0
 def keys_and_values():
     total_mb = 0.
     for si in streamcorpus.Chunk(t_path):
         key1 = uuid.UUID(int=si.stream_time.epoch_ticks)
         key2 = uuid.UUID(hex=si.doc_id)
         data = streamcorpus.serialize(si)
         errors, data = streamcorpus.compress_and_encrypt(data)
         assert not errors, errors
         total_mb += float(len(data)) / 2**20
         logger.info('%r, %r --> %d, %.3f', key1, key2, len(data), total_mb)
         yield (key1, key2), data
         inverted_keys.append( ((key2, key1), r'') )
예제 #2
0
        def keys_and_values():
            for si in streamcorpus.Chunk(t_path):
                key1 = uuid.UUID(int=si.stream_time.epoch_ticks)
                key2 = uuid.UUID(hex=si.doc_id)
                data = streamcorpus.serialize(si)
                errors, data = streamcorpus.compress_and_encrypt(data)
                assert not errors, errors

                yield (key1, key2), data

                for ndx in indexes:
                    if ndx == 'doc_id_epoch_ticks':
                        kvp = ((key2, key1), r'')
                    elif ndx == 'with_source':
                        ## si.source can be None but we can't write None blobs to kvlayer
                        if si.source:
                            kvp = ((key1, key2), si.source)
                        else:
                            continue
                    else:
                        assert False, ('invalid index type ' + ndx)
                    indexes[ndx].append(kvp)
예제 #3
0
        def keys_and_values():
            for si in streamcorpus.Chunk(t_path):
                key1 = uuid.UUID(int=si.stream_time.epoch_ticks)
                key2 = uuid.UUID(hex=si.doc_id)
                data = streamcorpus.serialize(si)
                errors, data = streamcorpus.compress_and_encrypt(data)
                assert not errors, errors

                yield (key1, key2), data

                for ndx in indexes:
                    if ndx == 'doc_id_epoch_ticks':
                        kvp = ((key2, key1), r'')
                    elif ndx == 'with_source':
                        ## si.source can be None but we can't write None blobs to kvlayer
                        if si.source:
                            kvp = ((key1, key2), si.source)
                        else:
                            continue
                    else:
                        assert False, ('invalid index type ' + ndx)
                    indexes[ndx].append(kvp)