def main(*, port=DEFAULT_PORT, mode=None, delay=None):
    if mode:
        global _mode
        if mode not in VALID_MODES:
            _helper.errorExit('unrecognized mode: ' + mode)
        _mode = mode
        if _mode == None:
            print('Starting server in default mode.')
        else:
            print('Starting server in mode ' + _mode + '.')

    if delay:
        global _debug_delay
        _debug_delay = delay / 1000.0

    #class ThreadedHTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
    #    pass
    #httpd = ThreadedHTTPServer((HOST_NAME, port), Handler)

    httpd = http.server.HTTPServer((HOST_NAME, port), Handler)

    try:
        httpd.serve_forever()
    except KeyboardInterrupt:
        pass

    httpd.server_close()
Esempio n. 2
0
def main(foldername):
    if os.path.isfile(foldername):
        _helper.errorExit('Must be a folder, not an individual file')

    # find all the datasets to import
    datasets = []

    for item in os.listdir(foldername):
        itempath = os.path.abspath(os.path.join(foldername, item))
        if os.path.isfile(itempath):
            if _helper.isFilenameDatasetImportable(itempath):
                datasets.append(
                    (_helper.makeIdFromFilename(itempath), [itempath]))
        elif os.path.isdir(itempath):
            subitems = _helper.findDatasetImportableFilesRecursively(itempath)
            if len(subitems) > 0:
                datasets.append(
                    (_helper.makeIdFromFilename(itempath), subitems))

    # try to import all the daasets
    for name, files in datasets:
        if os.path.exists(_helper.datasetDir(name)):
            print('Dataset %s exists, skipping.' % name)
        else:
            print('Importing dataset %s.' % name)
            import_dataset.main(files, name=name)
        print()
def parseRange(what, rng):
    groups = re.match(r'(\d*)(-?)(\d*)', rng).groups()

    if groups[0] == '' and groups[1] == '' and groups[2] == '':
        _helper.errorExit(
            'Argument for ' + what +
            ' range has invalid form. Valid forms include: "1" or "1-3" or "-3" or "1-"'
        )
    elif groups[0] != '' and groups[1] == '' and groups[2] == '':
        start = int(groups[0])
        end = start
    else:
        start = int(groups[0]) if groups[0] != '' else None
        end = int(groups[2]) if groups[2] != '' else None

    if start is not None and end is not None:
        if end < start:
            _helper.errorExit('End ' + what + ' index must be >= than start ' +
                              what + ' index')

    return start, end
def main(dataset, *, allfiles=False):
    # Delete the dataset folder
    dataset_folder = _helper.datasetDir(dataset)

    if not os.path.exists(dataset_folder):
        _helper.errorExit('The dataset does not exist ' + dataset_folder)

    shutil.rmtree(dataset_folder)
    print("Deleted dataset ", dataset_folder)

    if allfiles:

        # Delete the labels folder for the given dataset
        labels_folder = _folder.data_abspath(_helper._get_labels_folder(),
                                             dataset)

        if os.path.exists(labels_folder):
            shutil.rmtree(labels_folder)
            print("Deleted labels for the dataset ", labels_folder)

        # Delete exported labels files for the given dataset
        export_file = _helper.exportFilename(dataset)
        if os.path.exists(export_file):
            os.remove(export_file)
            print("Deleted exported labels file for the dataset ", export_file)

        # Delete all mturk submissions for the given dataset
        mturk_submit_folder = _folder.data_abspath('mturksubmit')
        if os.path.exists(mturk_submit_folder):
            mturk_session_ids = os.listdir(mturk_submit_folder)

            for session in mturk_session_ids:
                session_datasets = os.listdir(
                    os.path.join(mturk_submit_folder, session))
                if dataset in session_datasets:
                    dataset_folder = os.path.join(mturk_submit_folder, session,
                                                  dataset)
                    shutil.rmtree(dataset_folder)

            print("Deleted mturk submissions for the dataset ", dataset)
Esempio n. 5
0
def main(filename, outfolder):
    if filename.endswith('.gz'):
        use_open = gzip.open
    else:
        use_open = open

    with use_open(filename, 'rt') as csvfile:
        header_rate, header_start_ms = _helper.process_actigraph_header(
            csvfile)

        header_start_sec = header_start_ms / 1000
        if header_start_sec != int(header_start_sec):
            _helper.errorExit('start time can only have second precision')
        header_start_sec = int(header_start_sec)

        csv_header = csvfile.readline().strip()

        if csv_header != 'Accelerometer X,Accelerometer Y,Accelerometer Z':
            _helper.errorExit(
                'unrecognized CSV header: only "Accelerometer X,Accelerometer Y,Accelerometer Z" supported'
            )

        tm = datetime.datetime.utcfromtimestamp(header_start_sec)
        tm_sample = 0

        outfile = None

        for row in csvfile:
            tm_msec = int(1000 * tm_sample / header_rate + 0.5)

            if outfile == None:
                outfilecsvname = 'NONE-NONE-NA.NONE-NONE.%04d-%02d-%02d-%02d-%02d-%02d-%03d-P0000.sensor.csv' % (
                    tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second,
                    tm_msec)
                outfilename = os.path.join(outfolder, 'default',
                                           'MasterSynced', '%04d' % tm.year,
                                           '%02d' % tm.month, '%02d' % tm.day,
                                           '%02d' % tm.hour, outfilecsvname)
                print('Create new hourly file: %s' % outfilecsvname)

                outfile = open(_helper.ensureDirExists(outfilename, True),
                               'wt')
                outfile.write(
                    'HEADER_TIME_STAMP,X_ACCELERATION_METERS_PER_SECOND_SQUARED,Y_ACCELERATION_METERS_PER_SECOND_SQUARED,Z_ACCELERATION_METERS_PER_SECOND_SQUARED\n'
                )

            tm_str = '%04d-%02d-%02d %02d:%02d:%02d.%03d' % (
                tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second,
                tm_msec)
            outfile.write(tm_str + ',' +
                          (','.join([(e if ('.' in e) else (e + '.0'))
                                     for e in row.strip().split(',')])) + '\n')

            tm_sample += 1
            if tm_sample == header_rate:
                prev_tm = tm
                tm = tm + datetime.timedelta(seconds=1)
                tm_sample = 0

                if prev_tm.year != tm.year or prev_tm.month != tm.month or prev_tm.day != tm.day or prev_tm.hour != tm.hour:
                    outfile.close()
                    outfile = None

        if outfile != None:
            outfile.close()
def main(filenames,
         *,
         name=None,
         labelfilenames=None,
         zoom=None,
         mag=DEFAULT_MAGNITUDE,
         sample=None,
         day=None):
    if len(filenames) > 1 and not name:
        _helper.errorExit(
            'Must specify a custom dataset --name when importing multiple files'
        )

    if mag <= 0:
        _helper.errorExit('magnitude must be positive')

    if sample is not None and day is not None:
        _helper.errorExit('Can only provide one of --sample and --day')

    start_sample, end_sample = None, None
    if sample is not None:
        start_sample, end_sample = parseRange('sample', sample)

    start_day, end_day = None, None
    if day is not None:
        start_day, end_day = parseRange('day', day)

    # load labels
    if not labelfilenames:
        labelfilenames = [
            _folder.file_abspath('common', 'labels_test.csv'),
            _folder.file_abspath('common', 'labels_unknown.csv')
        ]

    labels = []
    labels_names = set()

    for labelfile in labelfilenames:
        print('Reading labels from %s...' % labelfile)

        with open(labelfile, 'rt') as csvfile:
            reader = csv.DictReader(csvfile)

            if set(reader.fieldnames) != set(['label', 'red', 'green', 'blue'
                                              ]):
                _helper.errorExit('Incorrect label csv headers')

            for row in reader:
                label_name = row['label'].strip()
                rr = float(row['red'].strip())
                gg = float(row['green'].strip())
                bb = float(row['blue'].strip())

                if re.search('[^\w\- ]', label_name, re.ASCII):
                    _helper.errorExit(
                        'Only alphanumeric, underscore, dash, and space allowed in label names: '
                        + label_name)
                if label_name in labels_names:
                    _helper.errorExit('Duplicate label: ' + label_name)

                labels.append((label_name, rr, gg, bb))
                labels_names.add(label_name)

    # process arguments
    signal_names = []
    for filename in filenames:
        signal_names.append(_helper.makeIdFromFilename(filename))
    if len(signal_names) != len(set(signal_names)):
        _helper.errorExit('Duplicate signal names')

    if name:
        if not _helper.checkId(name, False):
            _helper.errorExit(
                'Only alphanumeric and underscore allowed in dataset names')
        dataset = name
    else:
        dataset = signal_names[0]

    if start_sample is not None or end_sample is not None:
        dataset = dataset + strRange('sample', start_sample, end_sample)
    if start_day is not None or end_day is not None:
        dataset = dataset + strRange('day', start_day, end_day)

    out_folder = _helper.datasetDir(dataset)
    tile_folder = _helper.datasetTileDir(dataset)

    if os.path.exists(out_folder):
        _helper.errorExit('Please remove output folder ' + out_folder)

    print('Using output folder ' + out_folder)

    _helper.ensureDirExists(out_folder, False)
    _helper.ensureDirExists(tile_folder, False)

    # read in data
    print('reading header...')

    # open files
    csvfiles = []
    for filename in filenames:
        if filename.endswith('.gz'):
            use_open = gzip.open
        else:
            use_open = open

        csvfiles.append(use_open(filename, 'rt'))

    # read headers
    files_start_ms = []
    dataset_rate = None

    for filename, csvfile in zip(filenames, csvfiles):
        header_rate, header_start_ms = _helper.process_actigraph_header(
            csvfile)

        if dataset_rate == None:
            dataset_rate = int(header_rate)
        elif dataset_rate != int(header_rate):
            _helper.errorExit('Multiple sample rates found')

        files_start_ms.append(header_start_ms)

    # determine sample range
    dataset_start_ms = min(files_start_ms)
    dataset_start_date = datetime.datetime.utcfromtimestamp(dataset_start_ms /
                                                            1000).date()

    if start_sample is not None or end_sample is not None:
        pass

    if start_day is not None or end_day is not None:
        if start_day is not None:
            output_min_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(start_day - 1))).timetuple())
            start_sample = (max(output_min_ms, dataset_start_ms) -
                            dataset_start_ms) * dataset_rate / 1000
            if start_sample != int(start_sample):
                _helper.errorExit('day start sample error')
            start_sample = int(start_sample)
        else:
            start_sample = None

        if end_day is not None:
            output_max_ms = 1000 * calendar.timegm(
                (dataset_start_date +
                 datetime.timedelta(days=(end_day))).timetuple())
            end_sample = (output_max_ms -
                          dataset_start_ms) * dataset_rate / 1000
            if end_sample != int(end_sample):
                _helper.errorExit('day end sample error')
            end_sample = int(end_sample)
        else:
            end_sample = None

    # determine starting day index
    start_day_index = 1
    if start_sample:
        start_day_index = 1 + (datetime.datetime.utcfromtimestamp(
            dataset_start_ms / 1000 + start_sample / dataset_rate).date() -
                               dataset_start_date).days

    # print header summary
    if len(filenames) > 1:
        for filename, signalname, file_start_ms in zip(filenames, signal_names,
                                                       files_start_ms):
            print('file start:   ',
                  _helper.timeMillisecondToTimeString(file_start_ms),
                  signalname, filename)
    print('input start:  ',
          _helper.timeMillisecondToTimeString(dataset_start_ms), dataset)

    # read data
    sample_len = 3 * len(filenames)
    sample_data = []

    min_smp = 1e100
    max_smp = -1e100

    for fileindex, (filename, file_start_ms, csvfile) in enumerate(
            zip(filenames, files_start_ms, csvfiles)):
        print('reading ' + filename + '...')

        # Checks if csv header is absent and adds the header if needed
        csvstartpos = csvfile.tell()
        firstrow = next(csvfile)
        csvfile.seek(csvstartpos)

        fieldnames = None
        if 'Accelerometer' not in firstrow:
            # No headers present
            DEFAULT_FIELDNAMES = [
                'Timestamp', 'Accelerometer X', 'Accelerometer Y',
                'Accelerometer Z'
            ]
            no_of_fields = len(firstrow.split(','))
            if no_of_fields == 4:
                fieldnames = DEFAULT_FIELDNAMES
            elif no_of_fields == 3:
                fieldnames = DEFAULT_FIELDNAMES[1:]
            else:
                _helper.errorExit(
                    'missing header has unrecognized number of fields')

        if fieldnames != None:
            _helper.warning('input file missing field names, using ' +
                            ','.join(fieldnames))

        reader = csv.DictReader(csvfile, fieldnames=fieldnames)

        if 'Timestamp' in reader.fieldnames:
            _helper.warning(
                'input file has Timestamp field, but it will be ignored')

        # process rows
        reader_sample_index = 0

        sample_offset = (file_start_ms -
                         dataset_start_ms) * dataset_rate / 1000
        if sample_offset != int(sample_offset):
            _helper.errorExit('sample offset error')
        sample_offset = int(sample_offset)

        if start_sample != None:
            sample_offset -= start_sample

        for row in reader:
            data_sample_index = reader_sample_index + sample_offset
            reader_sample_index += 1

            if data_sample_index < 0:
                continue
            if end_sample != None and data_sample_index >= end_sample - (
                    start_sample if start_sample != None else 0):
                break

            x = float(row['Accelerometer X'])
            y = float(row['Accelerometer Y'])
            z = float(row['Accelerometer Z'])

            min_smp = min(min_smp, x, y, z)
            max_smp = max(max_smp, x, y, z)

            while data_sample_index >= len(sample_data):
                sample_data.append([None] * sample_len)

            sample_data[data_sample_index][3 * fileindex + 0] = x
            sample_data[data_sample_index][3 * fileindex + 1] = y
            sample_data[data_sample_index][3 * fileindex + 2] = z

            if reader_sample_index % (60 * 60 * dataset_rate) == 0:
                print('read %d hours...' % (reader_sample_index /
                                            (60 * 60 * dataset_rate)))

    if min_smp < -mag or mag < max_smp:
        _helper.warning('sample exceeds magnitude')
    output_start_ms = dataset_start_ms
    if start_sample != None:
        output_start_ms_offset = start_sample * 1000 / dataset_rate
        if output_start_ms_offset != int(output_start_ms_offset):
            _helper.errorExit('output start offset sample error')
        output_start_ms += int(output_start_ms_offset)
    output_end_ms = output_start_ms + (len(sample_data) -
                                       1) * 1000 / dataset_rate

    # figure out max zoom level, if needed
    if zoom is None:
        for zz in range(10):
            zoom = zz
            if len(sample_data) / math.pow(SUBSAMPLE, zz + 1) <= 2 * TILE_SIZE:
                break

    # print summary
    print('length:       ', len(sample_data))
    print('rate:         ', dataset_rate)
    print('max zoom:     ', zoom)
    print('output start: ',
          _helper.timeMillisecondToTimeString(output_start_ms))
    print('output end:   ', _helper.timeMillisecondToTimeString(output_end_ms))

    # write tiles
    for zoom_level in range(zoom + 1):
        print('writing zoom %d...' % zoom_level)

        zoom_subsample = SUBSAMPLE**zoom_level
        zoom_tile_size = TILE_SIZE * zoom_subsample

        ntiles = int(len(sample_data) / zoom_tile_size)
        if len(sample_data) % zoom_tile_size != 0:
            ntiles += 1

        for tt in range(ntiles):
            tile_id = 'z%02dt%06d' % (zoom_level, tt)

            outfilename = os.path.join(tile_folder, tile_id + '.json')

            with open(outfilename, 'wt') as outfile:
                write_startfile(outfile, zoom_subsample,
                                dataset + ':' + tile_id)

                prev = False
                for ss in range(tt * TILE_SIZE, (tt + 1) * TILE_SIZE + 1):
                    rangesmp = sample_data[ss * zoom_subsample:(ss + 1) *
                                           zoom_subsample]
                    write_sample(outfile, rangesample(rangesmp, sample_len),
                                 prev, sample_len)
                    prev = True

                write_endfile(outfile)

            if (tt + 1) % 1000 == 0:
                print('wrote %d tiles...' % (tt + 1))

    print('writing origin...')

    outfilename = _helper.datasetOriginFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write("{\n")
        outfile.write('    "origin": %s\n' % json.dumps(filenames))
        outfile.write('}\n')

    print('writing config...')

    outfilename = _helper.datasetConfigFilename(dataset)

    with open(outfilename, 'wt') as outfile:
        outfile.write('{\n')
        outfile.write('    "title": "%s",\n' % dataset)
        outfile.write('    "tile_size": %d,\n' % TILE_SIZE)
        outfile.write('    "tile_subsample": %d,\n' % SUBSAMPLE)
        outfile.write('    "zoom_max": %d,\n' % zoom)
        outfile.write('    "length": %d,\n' % len(sample_data))
        outfile.write('    "start_time_ms": %s,\n' % output_start_ms)
        outfile.write('    "sample_rate": %d,\n' % dataset_rate)
        outfile.write('    "start_day_idx": %d,\n' % start_day_index)
        outfile.write('    "magnitude": %d,\n' % mag)
        outfile.write('    "signals": ["%s"],\n' % ('", "'.join(signal_names)))
        outfile.write('    "labels": [\n')
        for ii, (ll, rr, gg, bb) in enumerate(labels):
            outfile.write(
                '        { "label": "%s", "color": [ %0.2f, %0.2f, %0.2f ] }%s\n'
                % (ll, rr, gg, bb, ',' if ii + 1 < len(labels) else ''))
        outfile.write('    ]\n')
        outfile.write('}\n')

    print('dataset written to ' + out_folder)
def main(dataset,
         filename,
         *,
         source=None,
         session=None,
         stdout=False,
         trim=False,
         qcfix=False):
    dataset_config_filename = _helper.datasetConfigFilename(dataset)

    if not os.path.exists(dataset_config_filename):
        _helper.errorExit('could not find dataset config file: ' +
                          dataset_config_filename)

    with open(dataset_config_filename, 'rt') as configfile:
        config = json.load(configfile)

    sample_rate = config['sample_rate']
    length = config['length']

    start_millisecond = config['start_time_ms']
    print('start time:',
          _helper.timeMillisecondToTimeString(start_millisecond))

    FORMAT_NOTES = 'NOTES'
    FORMAT_NOTES_TIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y'
    FORMAT_NOTES_LENGTH_SECONDS = 10  # how long a note label should try to be
    FORMAT_ACTIVITY_GROUP = 'ACTIVITY_GROUP'
    FORMAT_PREDICTION = 'PREDICTION'
    FORMAT_PREDICTED = 'PREDICTED'
    FORMAT_PREDICTED_LABEL_SECONDS = 30

    with open(filename, 'rt') as csvfile:
        reader = csv.DictReader(csvfile)

        # check if file contains session and source columns
        if 'SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames and (
                session or source):
            _helper.errorExit(
                'Session and source info detected in file, will be used instead of given arguments.'
            )
        elif ('SESSION' in reader.fieldnames or 'SOURCE'
              in reader.fieldnames) and ('SESSION' not in reader.fieldnames
                                         or 'SOURCE' not in reader.fieldnames):
            _helper.errorExit(
                'Must provide both session and source fields in file or neither.'
            )
        elif (session is None or source is None) and (session or source):
            _helper.errorExit(
                'Must provide both session and source arguments or neither.')

        if session is None and 'SESSION' not in reader.fieldnames:
            _helper.errorExit(
                "No session argument provided and no session info in file. Cannot import labels."
            )
        if source is None and 'SOURCE' not in reader.fieldnames:
            _helper.errorExit(
                'No source argument provided and no source info in file. Cannot import labels.'
            )

        use_source_session_from_file = ('SESSION' in reader.fieldnames
                                        and 'SOURCE' in reader.fieldnames)

        # figure out format
        format = None
        format_meta = None
        if ('TIME' in reader.fieldnames) and ('TAG' in reader.fieldnames) and (
                'NOTE' in reader.fieldnames):
            format = FORMAT_NOTES
        elif ('START_TIME' in reader.fieldnames) and (
                'STOP_TIME' in reader.fieldnames) and ('ACTIVITY_GROUP.y'
                                                       in reader.fieldnames):
            format = FORMAT_ACTIVITY_GROUP
        elif ('START_TIME' in reader.fieldnames) and (
                'STOP_TIME' in reader.fieldnames) and ('PREDICTION'
                                                       in reader.fieldnames):
            format = FORMAT_PREDICTION
        elif ('HEADER_START_TIME'
              in reader.fieldnames) and ('PREDICTED' in reader.fieldnames):
            format = FORMAT_PREDICTED
            # get label names from header
            format_meta = []
            for field in reader.fieldnames[2:]:
                label = field.split('_')
                if label[0] != 'PROB' or len(label) < 2:
                    sys.stderr.write(
                        'unrecognized field in header: expected PROB_...\n')
                    sys.exit(-1)
                label = ' '.join([word.capitalize() for word in label[1:]])
                format_meta.append(label)
        else:
            sys.stderr.write('could not determine format from header fields\n')
            sys.exit(-1)

        sys.stderr.write('detected %s format\n' % format)
        if use_source_session_from_file:
            sys.stderr.write('reading source and session from file\n')
        else:
            sys.stderr.write('using source %s and session %s\n' %
                             (source, session))

        # process rows
        sessions = set()
        session_labels = {}
        session_sources = {}

        # this will keep track of the time the last label started to make sure they are sorted
        last_label_start_millisecond = 0

        for row in reader:
            # figure out sample range
            if format == FORMAT_NOTES:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['TIME'], FORMAT_NOTES_TIME_FORMAT)
                label_stop_millisecond = label_start_millisecond + FORMAT_NOTES_LENGTH_SECONDS * 1000
                label_value = row['TAG']
                label_detail = row['NOTE']
            elif format == FORMAT_ACTIVITY_GROUP:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = _helper.timeStringToTimeMillisecond(
                    row['STOP_TIME'], _helper.DATE_FORMAT_YMD)
                label_value = row['ACTIVITY_GROUP.y']
                label_detail = None
            elif format == FORMAT_PREDICTION:
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = _helper.timeStringToTimeMillisecond(
                    row['STOP_TIME'], _helper.DATE_FORMAT_YMD)
                label_value = row['PREDICTION']
                label_detail = None
            elif format == FORMAT_PREDICTED:
                if int(row['PREDICTED']) >= len(format_meta):
                    sys.stderr.write('PREDICTED index out of range')
                    sys.exit(-1)
                label_start_millisecond = _helper.timeStringToTimeMillisecond(
                    row['HEADER_START_TIME'], _helper.DATE_FORMAT_YMD)
                label_stop_millisecond = label_start_millisecond + 1000 * FORMAT_PREDICTED_LABEL_SECONDS
                label_value = format_meta[int(row['PREDICTED'])]
                label_detail = None
            else:
                _helper.errorExit('unknown format error')

            # check labels are in order
            if label_start_millisecond <= last_label_start_millisecond:
                _helper.errorExit('label start times not sorted')
            last_label_start_millisecond = label_start_millisecond

            # apply fix for QC end times, if needed
            if qcfix:
                if label_stop_millisecond % 100 == 88:
                    label_stop_millisecond += 12

            # convert from ms to sample
            label_start_sample_thousand = (label_start_millisecond -
                                           start_millisecond) * sample_rate
            label_stop_sample_thousand = (label_stop_millisecond -
                                          start_millisecond) * sample_rate

            if label_start_sample_thousand % 1000 != 0 or label_stop_sample_thousand % 1000 != 0:
                _helper.errorExit('sample precision error')

            label_start_sample = (label_start_sample_thousand / 1000)
            label_stop_sample = (label_stop_sample_thousand / 1000)

            # figure out source and session
            if use_source_session_from_file:
                current_session = row['SESSION']
                current_source = row['SOURCE']
            else:
                current_session = session
                current_source = source

            if current_source not in VALID_SOURCES:
                _helper.errorExit('unrecognized source: ' + source)

            # for notes, go back and make sure any previous note doesn't overlap this one
            if format == FORMAT_NOTES:
                if current_session in sessions and len(
                        session_labels[current_session]) > 0:
                    session_labels[current_session][-1][1] = min(
                        session_labels[current_session][-1][1],
                        label_start_sample)

            # append this label to the session
            if current_session not in sessions:
                sessions.add(current_session)
                session_labels[current_session] = []
                session_sources[current_session] = current_source

            if session_sources[current_session] != current_source:
                _helper.errorExit('Session with multiple sources detected.')

            session_labels[current_session].append([
                label_start_sample, label_stop_sample, label_value,
                label_detail
            ])

        # write labels out
        for session in sessions:
            labels = session_labels[session]
            source = session_sources[session]

            # this will be used to merge adjacent time windows that have the same label
            last_activity = None

            # keep track of information about labels output
            was_prev = False
            any_outside = False
            any_far_outside = False

            output = ''
            output += '{"session":"%s", "source": "%s", "labels":[' % (session,
                                                                       source)

            for label_start_sample, label_stop_sample, label_value, label_detail in session_labels[
                    session]:
                # see if the label extends beyond the dataset time
                if label_start_sample < 0 or length < label_stop_sample:
                    any_outside = True
                if label_start_sample < 0 - 0.1 * length or length + 0.1 * length < label_stop_sample:
                    any_far_outside = True

                # merge adjacent labels that match
                if not last_activity:
                    last_activity = [
                        label_start_sample, label_stop_sample, label_value,
                        label_detail
                    ]
                elif last_activity[1] == label_start_sample and last_activity[
                        2] == label_value and last_activity[3] == label_detail:
                    last_activity[1] = label_stop_sample
                else:
                    if trimActivity(last_activity, trim, 0, length):
                        output += _helper.activityJSON(last_activity, was_prev)
                        was_prev = True
                    last_activity = [
                        label_start_sample, label_stop_sample, label_value,
                        label_detail
                    ]

            # account for any remaining label
            if last_activity:
                if trimActivity(last_activity, trim, 0, length):
                    output += _helper.activityJSON(last_activity, was_prev)
                    was_prev = True

            output += ']}\n'

            # display warnings about labels
            if any_far_outside:
                _helper.warning('label found FAR OUTSIDE signal in ' + session)
            elif any_outside:
                _helper.warning('label found outside signal in ' + session)

            # do output
            if stdout:
                sys.stdout.write(output)

            else:
                labels_filename = _helper.latestLabelsFilename(
                    dataset, session)
                with open(_helper.ensureDirExists(labels_filename, True),
                          'wt') as labelsfile:
                    labelsfile.write(output)

                print('labels added to', labels_filename)
def main(source_dataset, dest_dataset, *, notrim=False):
    # Process arguments to get name of dataset

    source_config = _helper.datasetConfigFilename(source_dataset)
    dest_config = _helper.datasetConfigFilename(dest_dataset)

    source = {}
    dest = {}

    with open(source_config, 'rt') as configfile:
        config = json.load(configfile)
        source['sample_rate'] = config['sample_rate']
        source['start_time_ms'] = config['start_time_ms']

    with open(dest_config, 'rt') as configfile:
        config = json.load(configfile)
        dest['sample_rate'] = config['sample_rate']
        dest['length'] = config['length']
        dest['start_time_ms'] = config['start_time_ms']

    if source['sample_rate'] != dest['sample_rate']:
        _helper.errorExit("Source and dest datasets should have the same sample rate")

    start_sample = ((dest['start_time_ms'] - source['start_time_ms']) / 1000) * source['sample_rate']

    if start_sample != int(start_sample):
        _helper.errorExit("Source and dest datasets are not offset by an integer number of samples")

    start_sample = int(start_sample)
    end_sample = int(start_sample + dest['length'])

    source_labels = _helper.getLabelsLatest(source_dataset)
    if source_labels:
        for session in source_labels:

            session_name = session['session']
            source_name = session['source']
            session_labels = session['labels']
            label_filename = _helper.latestLabelsFilename(dest_dataset, session_name)

            output = ''
            output += ('{"session":"%s", "source": "%s", "labels":[' % (session_name, source_name))
            was_prev = False

            for ll in session_labels:
                label_start = ll['lo']
                label_end = ll['hi']
                label_name = ll['label']
                label = [label_start, label_end, label_name]

                if notrim:
                    output += _helper.activityJSON(label, was_prev)
                    was_prev = True

                elif label_end > start_sample and label_start < end_sample:

                    # Trim label start if needed
                    if label_start < start_sample:
                        label[0] = start_sample

                    # Trim label end if needed
                    if label_end > end_sample:
                        label[1] = end_sample

                    # Start label offset from 0
                    label[0] -= start_sample
                    label[1] -= start_sample

                    output += _helper.activityJSON(label, was_prev)
                    was_prev = True

            output += ']}\n'

            _helper.ensureDirExists(label_filename, True)
            with open(label_filename, 'wt') as labelsfile:
                labelsfile.write(output)
            print('labels added to ', label_filename)