Beispiel #1
0
def test_insert():
    from kvfile import KVFile
    kv = KVFile()
    kv.insert(((str(i), ':{}'.format(i)) for i in range(50000)))
    assert len(list(kv.keys())) == 50000
    assert len(list(kv.items())) == 50000
    assert kv.get('49999') == ':49999'

    kv.insert(((str(i), ':{}'.format(i)) for i in range(50000, 100000)),
              batch_size=40000)
    assert len(list(kv.items())) == 100000

    kv.insert(((str(i), ':{}'.format(i)) for i in range(100000, 100002)),
              batch_size=1)
    kv.insert(((str(i), ':{}'.format(i)) for i in range(100002, 100005)),
              batch_size=0)
    assert len(list(kv.items())) == 100005
Beispiel #2
0
def _sorter(rows, key_calc, reverse, batch_size):
    db = KVFile()
    db.insert(((key_calc(row) + "{:08x}".format(row_num), row)
               for row_num, row in enumerate(rows)),
              batch_size=batch_size)

    for _, value in db.items(reverse=reverse):
        yield value
Beispiel #3
0
def _sorter(rows, key_calc, reverse, batch_size):
    db = KVFile()

    def process(rows):
        for row_num, row in enumerate(rows):
            key = key_calc(row) + '{:08x}'.format(row_num)
            yield (key, row)

    db.insert(process(rows), batch_size=batch_size)
    for _, value in db.items(reverse=reverse):
        yield value
Beispiel #4
0
def test_filename():
    from kvfile import KVFile, db_kind
    filename = 'bla.filename.' + db_kind + '.db'
    kv1 = KVFile(filename=filename)
    kv1.insert(((str(i), ':{}'.format(i)) for i in range(50000)))
    del kv1

    kv = KVFile(filename=filename)
    assert len(list(kv.keys())) == 50000
    assert len(list(kv.items())) == 50000
    assert kv.get('49999') == ':49999'
Beispiel #5
0
def test_insert_generator():
    from kvfile import KVFile
    kv = KVFile()
    data = [(str(i), ':{}'.format(i)) for i in range(50)]
    expected_data = []
    for key, value in kv.insert_generator(data):
        expected_data.append((key, value))
    assert data == expected_data
    assert len(list(kv.keys())) == 50
    assert len(list(kv.items())) == 50
    assert kv.get('49') == ':49'
Beispiel #6
0
def test_sanity():
    from kvfile import KVFile

    kv = KVFile()

    data = dict(s='value',
                i=123,
                d=datetime.datetime.fromtimestamp(12325),
                n=decimal.Decimal('1234.56'),
                ss=set(range(10)),
                o=dict(d=decimal.Decimal('1234.58'),
                       n=datetime.datetime.fromtimestamp(12325)))

    for k, v in data.items():
        kv.set(k, v)

    for k, v in data.items():
        assert kv.get(k) == v

    assert list(kv.keys()) == sorted(data.keys())
    assert list(kv.items()) == sorted(data.items())

    assert list(kv.keys(reverse=True)) == sorted(data.keys(), reverse=True)
    assert list(kv.items(reverse=True)) == sorted(data.items(), reverse=True)
 def _get_resource(self, last_update_resource=None):
     last_kvfile, last_update, key_fields, incremental_field = None, None, None, None
     if last_update_resource is not None:
         last_kvfile = KVFile()
         key_fields = self._parameters.get('incremental-field-key',
                                           [self._primary_key_field_name])
         incremental_field = self._parameters['incremental-field']
         for row in last_update_resource:
             key = '-'.join([str(row[k]) for k in key_fields])
             try:
                 last_row = last_kvfile.get(key)
             except KeyError:
                 last_row = None
             if not last_row or last_row[incremental_field] < row[
                     incremental_field]:
                 last_kvfile.set(key, dict(row))
                 if not last_update or last_update < row[incremental_field]:
                     last_update = row[incremental_field]
         if last_update:
             logging.info('last_update={}'.format(last_update))
     resources_yielded = 0
     with utils.temp_loglevel():
         logging.info(
             "Loading dataservice resource from service {} method {}".
             format(self._parameters["service-name"],
                    self._parameters["method-name"]))
         # with process_metrics('dataservice_collection_row',
         #                      {'service_name': self._parameters['service-name'],
         #                       'method_name': self._parameters['method-name']}) as send_process_metrics:
         if last_update:
             if self._parameters.get('incremental-field-type') == 'integer':
                 last_update_str = last_update
             else:
                 last_update_str = (
                     last_update -
                     datetime.timedelta(days=1)).strftime('%Y-%m-%d')
             since_last_update = (self._parameters['incremental-field'],
                                  last_update_str,
                                  self._parameters.get(
                                      'incremental-field-type', 'datetime'))
         else:
             since_last_update = None
         for dataservice_object in self.dataservice_class.get_all(
                 since_last_update=since_last_update):
             row = self._filter_dataservice_object(dataservice_object)
             if os.environ.get(
                     "OVERRIDE_DATASERVICE_COLLECTION_LIMIT_ITEMS", ""):
                 if int(
                         os.environ.get(
                             "OVERRIDE_DATASERVICE_COLLECTION_LIMIT_ITEMS",
                             "")) < resources_yielded:
                     return
             for k in row:
                 for field in self._schema["fields"]:
                     if field["name"] == k:
                         if field["type"] == "integer" and row[
                                 k] is not None:
                             row[k] = int(row[k])
             if last_update:
                 key = '-'.join([str(row[k]) for k in key_fields])
                 last_kvfile.set(key, dict(row))
             else:
                 resources_yielded += 1
                 yield row
             # send_process_metrics()
             if resources_yielded > 0 and resources_yielded % 10000 == 0:
                 logging.info("Loaded {} dataservice objects".format(
                     resources_yielded))
         if last_update:
             for key, row in last_kvfile.items():
                 resources_yielded += 1
                 yield row
                 if resources_yielded % 10000 == 0:
                     logging.info("Loaded {} dataservice objects".format(
                         resources_yielded))