Beispiel #1
0
def tokenize_dataset(input_folder: str, output_folder: str, chunk_size: int):
    make_dir(output_folder)
    data_writers = {
        TRAIN:
        DataWriter(os.path.join(output_folder, TRAIN),
                   file_prefix='data',
                   file_suffix='jsonl.gz',
                   chunk_size=chunk_size),
        VALID:
        DataWriter(os.path.join(output_folder, VALID),
                   file_prefix='data',
                   file_suffix='jsonl.gz',
                   chunk_size=chunk_size),
        TEST:
        DataWriter(os.path.join(output_folder, TEST),
                   file_prefix='data',
                   file_suffix='jsonl.gz',
                   chunk_size=chunk_size)
    }

    partition_counters = {TRAIN: Counter(), VALID: Counter(), TEST: Counter()}

    for i, (sample, partition) in enumerate(data_generator(input_folder)):
        data_writers[partition].add(sample)
        partition_counters[partition][sample[OUTPUT]] += 1

        if (i + 1) % chunk_size == 0:
            print('Wrote {0} samples.'.format(i + 1), end='\r')
    print()

    for writer in data_writers.values():
        writer.close()

    print(partition_counters)
Beispiel #2
0
    def __init__(self, hyper_parameters: HyperParameters, save_folder: str,
                 is_train: bool):
        self.hypers = hyper_parameters
        self.save_folder = save_folder
        self.metadata: Dict[str, Any] = dict()

        # Get the model output type
        self._output_type = OutputType[
            self.hypers.model_params['output_type'].upper()]

        make_dir(self.save_folder)
        self.name = 'model'  # Default name
Beispiel #3
0
    def __init__(self,
                 output_folder: str,
                 file_prefix: str,
                 file_suffix: str,
                 chunk_size: int,
                 mode: str = 'w'):
        self._output_folder = output_folder
        self._file_prefix = file_prefix
        self._file_suffix = file_suffix
        self._chunk_size = chunk_size

        # Initialize the data list
        self._dataset: List[Any] = []

        # Create the output directory if necessary
        make_dir(self._output_folder)

        # Set the writing mode
        mode = mode.lower()
        if mode in ('w', 'write'):
            self._mode = WriteMode.WRITE
        elif mode in ('a', 'append'):
            self._mode = WriteMode.APPEND
        else:
            raise ValueError(f'Unknown writing mode: {mode}')

        # Set the initial file index
        self._file_index = 0
        if self._mode == WriteMode.APPEND:
            # Regex to extract index from existing files
            file_name_regex = re.compile(
                f'{file_prefix}([0-9]+)\.{file_suffix}')

            # Get index from all existing files
            for file_name in os.listdir(output_folder):
                match = file_name_regex.match(file_name)
                if match is not None:
                    index = int(match.group(1))
                    self._file_index = max(self._file_index, index + 1)
            partition = TEST

        writers[partition].add(sample)
        label_counters[partition][sample[OUTPUT]] += 1

        if (index + 1) % CHUNK_SIZE == 0:
            print('Completed {0} samples'.format(index + 1), end='\r')
    print()

    # Close all writers
    for writer in writers.values():
        writer.close()

    print(label_counters)


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('--input-folder', type=str, required=True)
    parser.add_argument('--output-folder', type=str, required=True)
    args = parser.parse_args()

    random.seed(42)
    make_dir(args.output_folder)

    print('Starting Training Dataset...')
    write_dataset(args.input_folder, args.output_folder, series=TRAIN)

    print('Starting Test Dataset...')
    write_dataset(args.input_folder, args.output_folder, series=TEST)
Beispiel #5
0
    parser.add_argument('--log-folder',
                        type=str,
                        required=True,
                        help='Path to folder containing the simulation logs.')
    parser.add_argument('--power-system-type',
                        type=str,
                        choices=['bluetooth', 'temp'],
                        required=True,
                        help='The sensor type.')
    parser.add_argument('--output-folder',
                        type=str,
                        help='Path to the output folder.')
    args = parser.parse_args()

    output_folder = args.log_folder if args.output_folder is None else args.output_folder
    make_dir(output_folder)

    # We first copy all the non-SAMPLE models to the output folder. This is done for convenience
    for log_file_name in os.listdir(args.log_folder):
        if ('SAMPLE_RNN' not in log_file_name) or ('BUDGET_RNN'
                                                   not in log_file_name):
            old_path = os.path.join(args.log_folder, log_file_name)
            new_path = os.path.join(output_folder, log_file_name)
            copyfile(old_path, new_path)

    # Restore the given models and get the validation results
    adaptive_model_accuracy: List[Dict[float, float]] = []
    fixed_model_accuracy: List[Dict[float, float]] = []

    adaptive_logs: List[Dict[str, Dict[str, Dict[str, Any]]]] = []
    fixed_budget_logs: List[Dict[str, Dict[str, Dict[str, Any]]]] = []
Beispiel #6
0
def plot_and_save(sim_results: Dict[str, SimulationResult],
                  runtime_systems: List[RuntimeSystem], output_folder: str,
                  budget: float, max_time: int,
                  noise_generator: NoiseGenerator, noise_terms: List[float],
                  power_system_type: PowerType, should_plot: bool,
                  save_plots: bool):
    # Make the output folder if necessary
    make_dir(output_folder)

    # Log the test results for each adaptive system
    model_names: Set[str] = set()

    system_dict = {system.name: system for system in runtime_systems}
    for system_name in sorted(sim_results.keys()):
        system = system_dict[system_name]
        sim_result = sim_results[system_name]

        # We compute the validation accuracy for this budget for the adaptive models.
        # This allows us to choose which backend model to select at testing time.
        if system.system_type == SystemType.ADAPTIVE:
            valid_accuracy = system.estimate_validation_results(
                budget=budget, max_time=max_time)
        else:
            valid_accuracy = None

        model_names.add(system_name.split()[0])

        log_file_name = LOG_FILE_FMT.format(system.system_type.name.lower(),
                                            system.model_name,
                                            power_system_type.name.lower())
        log_path = os.path.join(output_folder, log_file_name)
        save_test_log(accuracy=sim_result.accuracy[-1],
                      power=sim_result.power[-1],
                      valid_accuracy=valid_accuracy,
                      budget=budget,
                      key=str(noise_generator),
                      system_name=system.name,
                      output_file=log_path)

        print('{0} Accuracy: {1:.5f}, {0} Power: {2:.5f}'.format(
            system_name, sim_result.accuracy[-1], sim_result.power[-1]))

    if not should_plot:
        return

    # Filter the simulation results
    systems_to_keep = []
    if baseline_to_plot in ('under_budget', 'all'):
        for name in model_names:
            systems_to_keep.append('{0} FIXED_UNDER_BUDGET'.format(name))

    sim_results = {
        system_name: result
        for system_name, result in sim_results.items()
        if system_name in systems_to_keep or 'ADAPTIVE' in system_name
        or 'RANDOMIZED' in system_name
    }
    colors = {
        system_name: COLORS[i]
        for i, system_name in enumerate(sim_results.keys())
    }

    # List of times for plotting
    times = np.arange(max_time) + 1

    # Plot the results
    with plt.style.context('seaborn-ticks'):
        fig, (ax1, ax2, ax3) = plt.subplots(figsize=(16, 12),
                                            nrows=3,
                                            ncols=1,
                                            sharex=True)

        # Plot the energy noise terms
        ax1.plot(times, noise_terms, color='#e34a33')
        ax1.set_title('Per-Step Energy Noise')
        ax1.set_ylabel('Energy (mJ)')

        # Plot the Setpoints of each system
        for system_name, sim_result in sorted(sim_results.items()):
            if 'adaptive' in system_name.lower():
                ax2.plot(times,
                         sim_result.target_budgets * max_time,
                         label=system_name,
                         color=colors[system_name])

        ax2.axhline(budget * max_time, color='k', linewidth=2)

        ax2.legend(fontsize=9)
        ax2.set_title('Budget Setpoint')
        ax2.set_ylabel('Energy (mJ)')

        # Plot the Moving Avg Power of each system
        for system_name, sim_result in sorted(sim_results.items()):
            avg_power = moving_avg_power(sim_result.energy, window=20)
            diff = len(times) - len(avg_power)

            ax3.plot(times[diff:],
                     avg_power,
                     label=system_name,
                     color=colors[system_name])

        ax3.axhline(budget, color='k', linewidth=2)

        ax3.legend(loc='lower center', fontsize=8)
        ax3.set_title('Moving Average Power for Each Policy')
        ax3.set_ylabel('Power (mW)')
        ax3.set_xlabel('Time')

        plt.tight_layout()

        if save_plots:
            output_file = os.path.join(output_folder,
                                       'results_{0}.pdf'.format(budget))
            plt.savefig(output_file)
        else:
            plt.show()
Beispiel #7
0
def split_dataset(input_folder: str, output_folder: str,
                  fractions: List[float], file_prefix: str, chunk_size: int,
                  file_type: str):
    assert len(fractions) == len(
        PARTITIONS
    ), f'Must provide enough fractions to account for all partitions'
    assert file_type in FILE_TYPES, f'Invalid file type: {file_type}'

    # Make output folder if necessary
    make_dir(output_folder)

    # Create the data manager
    data_manager = get_data_manager(input_folder,
                                    SAMPLE_ID,
                                    DATA_FIELDS,
                                    extension=file_type)
    data_manager.load()
    data_iterator = data_manager.iterate(should_shuffle=False,
                                         batch_size=chunk_size)
    num_samples = data_manager.length

    # Get folders for each partition
    train_folder = os.path.join(output_folder, TRAIN)
    valid_folder = os.path.join(output_folder, VALID)
    test_folder = os.path.join(output_folder, TEST)

    # Track counts per partition
    partition_counters: Counter = Counter()

    # Create data writers
    if file_type == 'npz':
        partition_writers = {
            TRAIN:
            NpzDataWriter(train_folder,
                          file_prefix=file_prefix,
                          file_suffix=file_type,
                          chunk_size=chunk_size,
                          sample_id_name=SAMPLE_ID,
                          data_fields=DATA_FIELDS,
                          mode='w'),
            VALID:
            NpzDataWriter(valid_folder,
                          file_prefix=file_prefix,
                          file_suffix=file_type,
                          chunk_size=chunk_size,
                          sample_id_name=SAMPLE_ID,
                          data_fields=DATA_FIELDS,
                          mode='w'),
            TEST:
            NpzDataWriter(test_folder,
                          file_prefix=file_prefix,
                          file_suffix=file_type,
                          chunk_size=chunk_size,
                          sample_id_name=SAMPLE_ID,
                          data_fields=DATA_FIELDS,
                          mode='w')
        }
    else:
        partition_writers = {
            TRAIN:
            DataWriter(train_folder,
                       file_prefix=file_prefix,
                       file_suffix=file_type,
                       chunk_size=chunk_size,
                       mode='w'),
            VALID:
            DataWriter(valid_folder,
                       file_prefix=file_prefix,
                       file_suffix=file_type,
                       chunk_size=chunk_size,
                       mode='w'),
            TEST:
            DataWriter(test_folder,
                       file_prefix=file_prefix,
                       file_suffix=file_type,
                       chunk_size=chunk_size,
                       mode='w')
        }

    # Write to chunked files
    for index, sample in enumerate(data_iterator):
        partition_index = get_partition_index(sample, fractions)
        partition_folder = PARTITIONS[partition_index]

        partition_writers[partition_folder].add(sample)
        partition_counters[partition_folder] += 1

        if (index + 1) % chunk_size == 0:
            print(f'Completed {index + 1}/{num_samples} samples.', end='\r')
    print()

    # Flush any remaining data samples
    for writer in partition_writers.values():
        writer.flush()

    # Print out metrics and save metadata
    print('====== RESULTS ======')
    total = sum(partition_counters.values())
    metadata: Dict[str, Dict[str, float]] = dict()
    for series in PARTITIONS:
        count = partition_counters[series]
        frac = count / total
        metadata[series] = dict(count=count, frac=frac)

        print(f'{series.capitalize()}: {count} ({frac:.03f})')

    metadata_file = os.path.join(output_folder, 'metadata.json')
    save_by_file_suffix(metadata, metadata_file)
from camera import VideoStreamer, VideoWriter
from abc import abstractmethod
from math import pi
import cv2
import cv2.aruco as aruco
import numpy as np
import os
import time
from utils import file_utils

CALIBRATION_FILE = "camera_calibration/calibration_parameters/arducam.yaml"
POSE_DIR = "marker_detection/logs/pose_data"
file_utils.make_dir(POSE_DIR)
POSE_FILE = file_utils.create_file_name_date(
) + ".txt"  # Default pose file name

DEFAULT_FREQ = 20  # Hz
''' Marker Tracker Classes
    These classes are used to track a marker using a single camera. They maintain various image and marker data, and
    can be used to retrieve the pose relative to the camera. Each class is used to detect a different kind of marker.
    The classes are derived from the VideoStreamer class, which is used to retrieve images from a camera or video file 
    in a parallel thread.'''


# Abstract Base Class. Each marker tracker needs to define marker length, pose, the current frame,
# and some variables used for visualization.
class MarkerTracker(VideoStreamer):
    def __init__(self,
                 src=0,
                 use_pi=-1,
                 resolution=480,
Beispiel #9
0
            params_files.extend(iterate_files(params_file, pattern=r'.*json'))
        else:
            params_files.append(params_file)

    for params_file in params_files:
        assert os.path.exists(
            params_file), f'The file {params_file} does not exist!'
        assert params_file.endswith(
            '.json'), f'The params file must be a JSON.'

    trials = max(args.trials, 1)
    num_models = trials * len(params_files)

    # Create save folder (if necessary)
    base_save_folder = args.save_folder
    make_dir(base_save_folder)

    # Create date-named folder for better organization
    current_day = datetime.now().strftime('%d_%m_%Y')
    save_folder = os.path.join(base_save_folder, current_day)
    make_dir(save_folder)

    for data_folder in args.data_folders:
        print(f'Started {data_folder}')
        print('====================')

        # Use absolute path to avoid issues with relative referencing during later optimization phases
        data_folder = os.path.abspath(data_folder)

        for trial in range(trials):
            print(f'Starting trial {trial+1}/{trials}')