Beispiel #1
0
def sync_readings(session, rows: iter, sensors: list, awesome_sensors: dict,
                  reading_types: dict):
    """
    Bulk store readings
    """

    # Convert list to dictionary
    sensors = {sensor['name']: sensor for sensor in sensors}

    def get_readings(_rows) -> iter:
        for row in _rows:
            # Convert rows of portal data into portal readings
            yield from maps.row_to_readings(row,
                                            sensors=sensors,
                                            reading_types=reading_types,
                                            awesome_sensors=awesome_sensors)

    # Iterate over data chunks
    for chunk in utils.iter_chunks(
            get_readings(rows), chunk_size=settings.BULK_READINGS_CHUNK_SIZE):
        if chunk:
            try:
                objects.Reading.store_bulk(session, readings=chunk)
            # No more readings, so stop
            except exceptions.EmptyValueError:
                break
Beispiel #2
0
def split_test_training(positions_w_context, est_num_positions):
    print("Estimated number of chunks: %s" % (est_num_positions // CHUNK_SIZE), file=sys.stderr)
    desired_test_size = 10**5
    if est_num_positions < 2 * desired_test_size:
        positions_w_context = list(positions_w_context)
        test_size = len(positions_w_context) // 3
        return positions_w_context[:test_size], [positions_w_context[test_size:]]
    else:
        shuffled_positions = utils.shuffler(positions_w_context)
        test_chunk = utils.take_n(desired_test_size, shuffled_positions)
        training_chunks = utils.iter_chunks(CHUNK_SIZE, shuffled_positions)
        return test_chunk, training_chunks
Beispiel #3
0
def split_test_training(positions_w_context, est_num_positions):
    print("Estimated number of chunks: %s" % (est_num_positions // CHUNK_SIZE),
          file=sys.stderr)
    desired_test_size = 10**5
    if est_num_positions < 2 * desired_test_size:
        positions_w_context = list(positions_w_context)
        test_size = len(positions_w_context) // 3
        return positions_w_context[:test_size], [
            positions_w_context[test_size:]
        ]
    else:
        shuffled_positions = utils.shuffler(positions_w_context)
        test_chunk = utils.take_n(desired_test_size, shuffled_positions)
        training_chunks = utils.iter_chunks(CHUNK_SIZE, shuffled_positions)
        return test_chunk, training_chunks
Beispiel #4
0
def main(argv):
    """Main program.
    """
    del argv  # Unused
    total_games = FLAGS.training_games
    total_moves = FLAGS.training_moves
    fresh = FLAGS.training_fresh
    batch_size = FLAGS.batch_size
    output_prefix = FLAGS.output_prefix

    spec = bigtable_input.BigtableSpec(FLAGS.cbt_project, FLAGS.cbt_instance,
                                       FLAGS.cbt_table)
    gq_r = bigtable_input.GameQueue(spec.project, spec.instance, spec.table)
    gq_c = bigtable_input.GameQueue(spec.project, spec.instance,
                                    spec.table + '-nr')

    mix = bigtable_input.mix_by_decile(total_games, total_moves, 9)
    trainings = [
        (spec, start_r, start_c, mix, batch_size,
         '{}{:0>10}_{:0>10}.tfrecord.zz'.format(output_prefix, start_r,
                                                start_c))
        for start_r, finish_r, start_c, finish_c in reversed(
            list(
                training_series(gq_r.latest_game_number,
                                gq_c.latest_game_number, mix, fresh)))
    ]

    if FLAGS.starting_game:
        game = FLAGS.starting_game
        starts = [t[1] for t in trainings]
        where = bisect.bisect_left(starts, game)
        trainings = trainings[where:]

    if FLAGS.max_trainings:
        trainings = trainings[:FLAGS.max_trainings]

    if FLAGS.dry_run:
        for t in trainings:
            print(t)
        raise SystemExit

    concurrency = min(FLAGS.concurrency, multiprocessing.cpu_count() * 2)
    with tqdm(desc='Training Sets', unit_scale=2,
              total=len(trainings)) as pbar:
        for b in utils.iter_chunks(concurrency, trainings):
            with multiprocessing.Pool(processes=concurrency) as pool:
                pool.map(_export_training_set, b)
                pbar.update(len(b))
    def delete_row_range(self, format_str, start_game, end_game):
        """Delete rows related to the given game range.

        Args:
          format_str:  a string to `.format()` by the game numbers
            in order to create the row prefixes.
          start_game:  the starting game number of the deletion.
          end_game:  the ending game number of the deletion.
        """
        row_keys = make_single_array(
            self.tf_table.keys_by_range_dataset(
                format_str.format(start_game),
                format_str.format(end_game)))
        row_keys = list(row_keys)
        if not row_keys:
            utils.dbg('No rows left for games %d..%d' % (
                start_game, end_game))
            return
        utils.dbg('Deleting %d rows:  %s..%s' % (
            len(row_keys), row_keys[0], row_keys[-1]))

        # Reverse the keys so that the queue is left in a more
        # sensible end state if you change your mind (say, due to a
        # mistake in the timestamp) and abort the process: there will
        # be a bit trimmed from the end, rather than a bit
        # trimmed out of the middle.
        row_keys.reverse()
        total_keys = len(row_keys)
        utils.dbg('Deleting total of %d keys' % total_keys)
        concurrency = min(MAX_BT_CONCURRENCY,
                          multiprocessing.cpu_count() * 2)
        with multiprocessing.Pool(processes=concurrency) as pool:
            batches = []
            with tqdm(desc='Keys', unit_scale=2, total=total_keys) as pbar:
                for b in utils.iter_chunks(bigtable.row.MAX_MUTATIONS,
                                           row_keys):
                    pbar.update(len(b))
                    batches.append((self.btspec, b))
                    if len(batches) >= concurrency:
                        pool.map(_delete_rows, batches)
                        batches = []
                pool.map(_delete_rows, batches)
                batches = []