def main(*, port=DEFAULT_PORT, mode=None, delay=None): if mode: global _mode if mode not in VALID_MODES: _helper.errorExit('unrecognized mode: ' + mode) _mode = mode if _mode == None: print('Starting server in default mode.') else: print('Starting server in mode ' + _mode + '.') if delay: global _debug_delay _debug_delay = delay / 1000.0 #class ThreadedHTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer): # pass #httpd = ThreadedHTTPServer((HOST_NAME, port), Handler) httpd = http.server.HTTPServer((HOST_NAME, port), Handler) try: httpd.serve_forever() except KeyboardInterrupt: pass httpd.server_close()
def main(foldername): if os.path.isfile(foldername): _helper.errorExit('Must be a folder, not an individual file') # find all the datasets to import datasets = [] for item in os.listdir(foldername): itempath = os.path.abspath(os.path.join(foldername, item)) if os.path.isfile(itempath): if _helper.isFilenameDatasetImportable(itempath): datasets.append( (_helper.makeIdFromFilename(itempath), [itempath])) elif os.path.isdir(itempath): subitems = _helper.findDatasetImportableFilesRecursively(itempath) if len(subitems) > 0: datasets.append( (_helper.makeIdFromFilename(itempath), subitems)) # try to import all the daasets for name, files in datasets: if os.path.exists(_helper.datasetDir(name)): print('Dataset %s exists, skipping.' % name) else: print('Importing dataset %s.' % name) import_dataset.main(files, name=name) print()
def parseRange(what, rng): groups = re.match(r'(\d*)(-?)(\d*)', rng).groups() if groups[0] == '' and groups[1] == '' and groups[2] == '': _helper.errorExit( 'Argument for ' + what + ' range has invalid form. Valid forms include: "1" or "1-3" or "-3" or "1-"' ) elif groups[0] != '' and groups[1] == '' and groups[2] == '': start = int(groups[0]) end = start else: start = int(groups[0]) if groups[0] != '' else None end = int(groups[2]) if groups[2] != '' else None if start is not None and end is not None: if end < start: _helper.errorExit('End ' + what + ' index must be >= than start ' + what + ' index') return start, end
def main(dataset, *, allfiles=False): # Delete the dataset folder dataset_folder = _helper.datasetDir(dataset) if not os.path.exists(dataset_folder): _helper.errorExit('The dataset does not exist ' + dataset_folder) shutil.rmtree(dataset_folder) print("Deleted dataset ", dataset_folder) if allfiles: # Delete the labels folder for the given dataset labels_folder = _folder.data_abspath(_helper._get_labels_folder(), dataset) if os.path.exists(labels_folder): shutil.rmtree(labels_folder) print("Deleted labels for the dataset ", labels_folder) # Delete exported labels files for the given dataset export_file = _helper.exportFilename(dataset) if os.path.exists(export_file): os.remove(export_file) print("Deleted exported labels file for the dataset ", export_file) # Delete all mturk submissions for the given dataset mturk_submit_folder = _folder.data_abspath('mturksubmit') if os.path.exists(mturk_submit_folder): mturk_session_ids = os.listdir(mturk_submit_folder) for session in mturk_session_ids: session_datasets = os.listdir( os.path.join(mturk_submit_folder, session)) if dataset in session_datasets: dataset_folder = os.path.join(mturk_submit_folder, session, dataset) shutil.rmtree(dataset_folder) print("Deleted mturk submissions for the dataset ", dataset)
def main(filename, outfolder): if filename.endswith('.gz'): use_open = gzip.open else: use_open = open with use_open(filename, 'rt') as csvfile: header_rate, header_start_ms = _helper.process_actigraph_header( csvfile) header_start_sec = header_start_ms / 1000 if header_start_sec != int(header_start_sec): _helper.errorExit('start time can only have second precision') header_start_sec = int(header_start_sec) csv_header = csvfile.readline().strip() if csv_header != 'Accelerometer X,Accelerometer Y,Accelerometer Z': _helper.errorExit( 'unrecognized CSV header: only "Accelerometer X,Accelerometer Y,Accelerometer Z" supported' ) tm = datetime.datetime.utcfromtimestamp(header_start_sec) tm_sample = 0 outfile = None for row in csvfile: tm_msec = int(1000 * tm_sample / header_rate + 0.5) if outfile == None: outfilecsvname = 'NONE-NONE-NA.NONE-NONE.%04d-%02d-%02d-%02d-%02d-%02d-%03d-P0000.sensor.csv' % ( tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm_msec) outfilename = os.path.join(outfolder, 'default', 'MasterSynced', '%04d' % tm.year, '%02d' % tm.month, '%02d' % tm.day, '%02d' % tm.hour, outfilecsvname) print('Create new hourly file: %s' % outfilecsvname) outfile = open(_helper.ensureDirExists(outfilename, True), 'wt') outfile.write( 'HEADER_TIME_STAMP,X_ACCELERATION_METERS_PER_SECOND_SQUARED,Y_ACCELERATION_METERS_PER_SECOND_SQUARED,Z_ACCELERATION_METERS_PER_SECOND_SQUARED\n' ) tm_str = '%04d-%02d-%02d %02d:%02d:%02d.%03d' % ( tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second, tm_msec) outfile.write(tm_str + ',' + (','.join([(e if ('.' in e) else (e + '.0')) for e in row.strip().split(',')])) + '\n') tm_sample += 1 if tm_sample == header_rate: prev_tm = tm tm = tm + datetime.timedelta(seconds=1) tm_sample = 0 if prev_tm.year != tm.year or prev_tm.month != tm.month or prev_tm.day != tm.day or prev_tm.hour != tm.hour: outfile.close() outfile = None if outfile != None: outfile.close()
def main(filenames, *, name=None, labelfilenames=None, zoom=None, mag=DEFAULT_MAGNITUDE, sample=None, day=None): if len(filenames) > 1 and not name: _helper.errorExit( 'Must specify a custom dataset --name when importing multiple files' ) if mag <= 0: _helper.errorExit('magnitude must be positive') if sample is not None and day is not None: _helper.errorExit('Can only provide one of --sample and --day') start_sample, end_sample = None, None if sample is not None: start_sample, end_sample = parseRange('sample', sample) start_day, end_day = None, None if day is not None: start_day, end_day = parseRange('day', day) # load labels if not labelfilenames: labelfilenames = [ _folder.file_abspath('common', 'labels_test.csv'), _folder.file_abspath('common', 'labels_unknown.csv') ] labels = [] labels_names = set() for labelfile in labelfilenames: print('Reading labels from %s...' % labelfile) with open(labelfile, 'rt') as csvfile: reader = csv.DictReader(csvfile) if set(reader.fieldnames) != set(['label', 'red', 'green', 'blue' ]): _helper.errorExit('Incorrect label csv headers') for row in reader: label_name = row['label'].strip() rr = float(row['red'].strip()) gg = float(row['green'].strip()) bb = float(row['blue'].strip()) if re.search('[^\w\- ]', label_name, re.ASCII): _helper.errorExit( 'Only alphanumeric, underscore, dash, and space allowed in label names: ' + label_name) if label_name in labels_names: _helper.errorExit('Duplicate label: ' + label_name) labels.append((label_name, rr, gg, bb)) labels_names.add(label_name) # process arguments signal_names = [] for filename in filenames: signal_names.append(_helper.makeIdFromFilename(filename)) if len(signal_names) != len(set(signal_names)): _helper.errorExit('Duplicate signal names') if name: if not _helper.checkId(name, False): _helper.errorExit( 'Only alphanumeric and underscore allowed in dataset names') dataset = name else: dataset = signal_names[0] if start_sample is not None or end_sample is not None: dataset = dataset + strRange('sample', start_sample, end_sample) if start_day is not None or end_day is not None: dataset = dataset + strRange('day', start_day, end_day) out_folder = _helper.datasetDir(dataset) tile_folder = _helper.datasetTileDir(dataset) if os.path.exists(out_folder): _helper.errorExit('Please remove output folder ' + out_folder) print('Using output folder ' + out_folder) _helper.ensureDirExists(out_folder, False) _helper.ensureDirExists(tile_folder, False) # read in data print('reading header...') # open files csvfiles = [] for filename in filenames: if filename.endswith('.gz'): use_open = gzip.open else: use_open = open csvfiles.append(use_open(filename, 'rt')) # read headers files_start_ms = [] dataset_rate = None for filename, csvfile in zip(filenames, csvfiles): header_rate, header_start_ms = _helper.process_actigraph_header( csvfile) if dataset_rate == None: dataset_rate = int(header_rate) elif dataset_rate != int(header_rate): _helper.errorExit('Multiple sample rates found') files_start_ms.append(header_start_ms) # determine sample range dataset_start_ms = min(files_start_ms) dataset_start_date = datetime.datetime.utcfromtimestamp(dataset_start_ms / 1000).date() if start_sample is not None or end_sample is not None: pass if start_day is not None or end_day is not None: if start_day is not None: output_min_ms = 1000 * calendar.timegm( (dataset_start_date + datetime.timedelta(days=(start_day - 1))).timetuple()) start_sample = (max(output_min_ms, dataset_start_ms) - dataset_start_ms) * dataset_rate / 1000 if start_sample != int(start_sample): _helper.errorExit('day start sample error') start_sample = int(start_sample) else: start_sample = None if end_day is not None: output_max_ms = 1000 * calendar.timegm( (dataset_start_date + datetime.timedelta(days=(end_day))).timetuple()) end_sample = (output_max_ms - dataset_start_ms) * dataset_rate / 1000 if end_sample != int(end_sample): _helper.errorExit('day end sample error') end_sample = int(end_sample) else: end_sample = None # determine starting day index start_day_index = 1 if start_sample: start_day_index = 1 + (datetime.datetime.utcfromtimestamp( dataset_start_ms / 1000 + start_sample / dataset_rate).date() - dataset_start_date).days # print header summary if len(filenames) > 1: for filename, signalname, file_start_ms in zip(filenames, signal_names, files_start_ms): print('file start: ', _helper.timeMillisecondToTimeString(file_start_ms), signalname, filename) print('input start: ', _helper.timeMillisecondToTimeString(dataset_start_ms), dataset) # read data sample_len = 3 * len(filenames) sample_data = [] min_smp = 1e100 max_smp = -1e100 for fileindex, (filename, file_start_ms, csvfile) in enumerate( zip(filenames, files_start_ms, csvfiles)): print('reading ' + filename + '...') # Checks if csv header is absent and adds the header if needed csvstartpos = csvfile.tell() firstrow = next(csvfile) csvfile.seek(csvstartpos) fieldnames = None if 'Accelerometer' not in firstrow: # No headers present DEFAULT_FIELDNAMES = [ 'Timestamp', 'Accelerometer X', 'Accelerometer Y', 'Accelerometer Z' ] no_of_fields = len(firstrow.split(',')) if no_of_fields == 4: fieldnames = DEFAULT_FIELDNAMES elif no_of_fields == 3: fieldnames = DEFAULT_FIELDNAMES[1:] else: _helper.errorExit( 'missing header has unrecognized number of fields') if fieldnames != None: _helper.warning('input file missing field names, using ' + ','.join(fieldnames)) reader = csv.DictReader(csvfile, fieldnames=fieldnames) if 'Timestamp' in reader.fieldnames: _helper.warning( 'input file has Timestamp field, but it will be ignored') # process rows reader_sample_index = 0 sample_offset = (file_start_ms - dataset_start_ms) * dataset_rate / 1000 if sample_offset != int(sample_offset): _helper.errorExit('sample offset error') sample_offset = int(sample_offset) if start_sample != None: sample_offset -= start_sample for row in reader: data_sample_index = reader_sample_index + sample_offset reader_sample_index += 1 if data_sample_index < 0: continue if end_sample != None and data_sample_index >= end_sample - ( start_sample if start_sample != None else 0): break x = float(row['Accelerometer X']) y = float(row['Accelerometer Y']) z = float(row['Accelerometer Z']) min_smp = min(min_smp, x, y, z) max_smp = max(max_smp, x, y, z) while data_sample_index >= len(sample_data): sample_data.append([None] * sample_len) sample_data[data_sample_index][3 * fileindex + 0] = x sample_data[data_sample_index][3 * fileindex + 1] = y sample_data[data_sample_index][3 * fileindex + 2] = z if reader_sample_index % (60 * 60 * dataset_rate) == 0: print('read %d hours...' % (reader_sample_index / (60 * 60 * dataset_rate))) if min_smp < -mag or mag < max_smp: _helper.warning('sample exceeds magnitude') output_start_ms = dataset_start_ms if start_sample != None: output_start_ms_offset = start_sample * 1000 / dataset_rate if output_start_ms_offset != int(output_start_ms_offset): _helper.errorExit('output start offset sample error') output_start_ms += int(output_start_ms_offset) output_end_ms = output_start_ms + (len(sample_data) - 1) * 1000 / dataset_rate # figure out max zoom level, if needed if zoom is None: for zz in range(10): zoom = zz if len(sample_data) / math.pow(SUBSAMPLE, zz + 1) <= 2 * TILE_SIZE: break # print summary print('length: ', len(sample_data)) print('rate: ', dataset_rate) print('max zoom: ', zoom) print('output start: ', _helper.timeMillisecondToTimeString(output_start_ms)) print('output end: ', _helper.timeMillisecondToTimeString(output_end_ms)) # write tiles for zoom_level in range(zoom + 1): print('writing zoom %d...' % zoom_level) zoom_subsample = SUBSAMPLE**zoom_level zoom_tile_size = TILE_SIZE * zoom_subsample ntiles = int(len(sample_data) / zoom_tile_size) if len(sample_data) % zoom_tile_size != 0: ntiles += 1 for tt in range(ntiles): tile_id = 'z%02dt%06d' % (zoom_level, tt) outfilename = os.path.join(tile_folder, tile_id + '.json') with open(outfilename, 'wt') as outfile: write_startfile(outfile, zoom_subsample, dataset + ':' + tile_id) prev = False for ss in range(tt * TILE_SIZE, (tt + 1) * TILE_SIZE + 1): rangesmp = sample_data[ss * zoom_subsample:(ss + 1) * zoom_subsample] write_sample(outfile, rangesample(rangesmp, sample_len), prev, sample_len) prev = True write_endfile(outfile) if (tt + 1) % 1000 == 0: print('wrote %d tiles...' % (tt + 1)) print('writing origin...') outfilename = _helper.datasetOriginFilename(dataset) with open(outfilename, 'wt') as outfile: outfile.write("{\n") outfile.write(' "origin": %s\n' % json.dumps(filenames)) outfile.write('}\n') print('writing config...') outfilename = _helper.datasetConfigFilename(dataset) with open(outfilename, 'wt') as outfile: outfile.write('{\n') outfile.write(' "title": "%s",\n' % dataset) outfile.write(' "tile_size": %d,\n' % TILE_SIZE) outfile.write(' "tile_subsample": %d,\n' % SUBSAMPLE) outfile.write(' "zoom_max": %d,\n' % zoom) outfile.write(' "length": %d,\n' % len(sample_data)) outfile.write(' "start_time_ms": %s,\n' % output_start_ms) outfile.write(' "sample_rate": %d,\n' % dataset_rate) outfile.write(' "start_day_idx": %d,\n' % start_day_index) outfile.write(' "magnitude": %d,\n' % mag) outfile.write(' "signals": ["%s"],\n' % ('", "'.join(signal_names))) outfile.write(' "labels": [\n') for ii, (ll, rr, gg, bb) in enumerate(labels): outfile.write( ' { "label": "%s", "color": [ %0.2f, %0.2f, %0.2f ] }%s\n' % (ll, rr, gg, bb, ',' if ii + 1 < len(labels) else '')) outfile.write(' ]\n') outfile.write('}\n') print('dataset written to ' + out_folder)
def main(dataset, filename, *, source=None, session=None, stdout=False, trim=False, qcfix=False): dataset_config_filename = _helper.datasetConfigFilename(dataset) if not os.path.exists(dataset_config_filename): _helper.errorExit('could not find dataset config file: ' + dataset_config_filename) with open(dataset_config_filename, 'rt') as configfile: config = json.load(configfile) sample_rate = config['sample_rate'] length = config['length'] start_millisecond = config['start_time_ms'] print('start time:', _helper.timeMillisecondToTimeString(start_millisecond)) FORMAT_NOTES = 'NOTES' FORMAT_NOTES_TIME_FORMAT = '%a %b %d %H:%M:%S %Z %Y' FORMAT_NOTES_LENGTH_SECONDS = 10 # how long a note label should try to be FORMAT_ACTIVITY_GROUP = 'ACTIVITY_GROUP' FORMAT_PREDICTION = 'PREDICTION' FORMAT_PREDICTED = 'PREDICTED' FORMAT_PREDICTED_LABEL_SECONDS = 30 with open(filename, 'rt') as csvfile: reader = csv.DictReader(csvfile) # check if file contains session and source columns if 'SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames and ( session or source): _helper.errorExit( 'Session and source info detected in file, will be used instead of given arguments.' ) elif ('SESSION' in reader.fieldnames or 'SOURCE' in reader.fieldnames) and ('SESSION' not in reader.fieldnames or 'SOURCE' not in reader.fieldnames): _helper.errorExit( 'Must provide both session and source fields in file or neither.' ) elif (session is None or source is None) and (session or source): _helper.errorExit( 'Must provide both session and source arguments or neither.') if session is None and 'SESSION' not in reader.fieldnames: _helper.errorExit( "No session argument provided and no session info in file. Cannot import labels." ) if source is None and 'SOURCE' not in reader.fieldnames: _helper.errorExit( 'No source argument provided and no source info in file. Cannot import labels.' ) use_source_session_from_file = ('SESSION' in reader.fieldnames and 'SOURCE' in reader.fieldnames) # figure out format format = None format_meta = None if ('TIME' in reader.fieldnames) and ('TAG' in reader.fieldnames) and ( 'NOTE' in reader.fieldnames): format = FORMAT_NOTES elif ('START_TIME' in reader.fieldnames) and ( 'STOP_TIME' in reader.fieldnames) and ('ACTIVITY_GROUP.y' in reader.fieldnames): format = FORMAT_ACTIVITY_GROUP elif ('START_TIME' in reader.fieldnames) and ( 'STOP_TIME' in reader.fieldnames) and ('PREDICTION' in reader.fieldnames): format = FORMAT_PREDICTION elif ('HEADER_START_TIME' in reader.fieldnames) and ('PREDICTED' in reader.fieldnames): format = FORMAT_PREDICTED # get label names from header format_meta = [] for field in reader.fieldnames[2:]: label = field.split('_') if label[0] != 'PROB' or len(label) < 2: sys.stderr.write( 'unrecognized field in header: expected PROB_...\n') sys.exit(-1) label = ' '.join([word.capitalize() for word in label[1:]]) format_meta.append(label) else: sys.stderr.write('could not determine format from header fields\n') sys.exit(-1) sys.stderr.write('detected %s format\n' % format) if use_source_session_from_file: sys.stderr.write('reading source and session from file\n') else: sys.stderr.write('using source %s and session %s\n' % (source, session)) # process rows sessions = set() session_labels = {} session_sources = {} # this will keep track of the time the last label started to make sure they are sorted last_label_start_millisecond = 0 for row in reader: # figure out sample range if format == FORMAT_NOTES: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['TIME'], FORMAT_NOTES_TIME_FORMAT) label_stop_millisecond = label_start_millisecond + FORMAT_NOTES_LENGTH_SECONDS * 1000 label_value = row['TAG'] label_detail = row['NOTE'] elif format == FORMAT_ACTIVITY_GROUP: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = _helper.timeStringToTimeMillisecond( row['STOP_TIME'], _helper.DATE_FORMAT_YMD) label_value = row['ACTIVITY_GROUP.y'] label_detail = None elif format == FORMAT_PREDICTION: label_start_millisecond = _helper.timeStringToTimeMillisecond( row['START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = _helper.timeStringToTimeMillisecond( row['STOP_TIME'], _helper.DATE_FORMAT_YMD) label_value = row['PREDICTION'] label_detail = None elif format == FORMAT_PREDICTED: if int(row['PREDICTED']) >= len(format_meta): sys.stderr.write('PREDICTED index out of range') sys.exit(-1) label_start_millisecond = _helper.timeStringToTimeMillisecond( row['HEADER_START_TIME'], _helper.DATE_FORMAT_YMD) label_stop_millisecond = label_start_millisecond + 1000 * FORMAT_PREDICTED_LABEL_SECONDS label_value = format_meta[int(row['PREDICTED'])] label_detail = None else: _helper.errorExit('unknown format error') # check labels are in order if label_start_millisecond <= last_label_start_millisecond: _helper.errorExit('label start times not sorted') last_label_start_millisecond = label_start_millisecond # apply fix for QC end times, if needed if qcfix: if label_stop_millisecond % 100 == 88: label_stop_millisecond += 12 # convert from ms to sample label_start_sample_thousand = (label_start_millisecond - start_millisecond) * sample_rate label_stop_sample_thousand = (label_stop_millisecond - start_millisecond) * sample_rate if label_start_sample_thousand % 1000 != 0 or label_stop_sample_thousand % 1000 != 0: _helper.errorExit('sample precision error') label_start_sample = (label_start_sample_thousand / 1000) label_stop_sample = (label_stop_sample_thousand / 1000) # figure out source and session if use_source_session_from_file: current_session = row['SESSION'] current_source = row['SOURCE'] else: current_session = session current_source = source if current_source not in VALID_SOURCES: _helper.errorExit('unrecognized source: ' + source) # for notes, go back and make sure any previous note doesn't overlap this one if format == FORMAT_NOTES: if current_session in sessions and len( session_labels[current_session]) > 0: session_labels[current_session][-1][1] = min( session_labels[current_session][-1][1], label_start_sample) # append this label to the session if current_session not in sessions: sessions.add(current_session) session_labels[current_session] = [] session_sources[current_session] = current_source if session_sources[current_session] != current_source: _helper.errorExit('Session with multiple sources detected.') session_labels[current_session].append([ label_start_sample, label_stop_sample, label_value, label_detail ]) # write labels out for session in sessions: labels = session_labels[session] source = session_sources[session] # this will be used to merge adjacent time windows that have the same label last_activity = None # keep track of information about labels output was_prev = False any_outside = False any_far_outside = False output = '' output += '{"session":"%s", "source": "%s", "labels":[' % (session, source) for label_start_sample, label_stop_sample, label_value, label_detail in session_labels[ session]: # see if the label extends beyond the dataset time if label_start_sample < 0 or length < label_stop_sample: any_outside = True if label_start_sample < 0 - 0.1 * length or length + 0.1 * length < label_stop_sample: any_far_outside = True # merge adjacent labels that match if not last_activity: last_activity = [ label_start_sample, label_stop_sample, label_value, label_detail ] elif last_activity[1] == label_start_sample and last_activity[ 2] == label_value and last_activity[3] == label_detail: last_activity[1] = label_stop_sample else: if trimActivity(last_activity, trim, 0, length): output += _helper.activityJSON(last_activity, was_prev) was_prev = True last_activity = [ label_start_sample, label_stop_sample, label_value, label_detail ] # account for any remaining label if last_activity: if trimActivity(last_activity, trim, 0, length): output += _helper.activityJSON(last_activity, was_prev) was_prev = True output += ']}\n' # display warnings about labels if any_far_outside: _helper.warning('label found FAR OUTSIDE signal in ' + session) elif any_outside: _helper.warning('label found outside signal in ' + session) # do output if stdout: sys.stdout.write(output) else: labels_filename = _helper.latestLabelsFilename( dataset, session) with open(_helper.ensureDirExists(labels_filename, True), 'wt') as labelsfile: labelsfile.write(output) print('labels added to', labels_filename)
def main(source_dataset, dest_dataset, *, notrim=False): # Process arguments to get name of dataset source_config = _helper.datasetConfigFilename(source_dataset) dest_config = _helper.datasetConfigFilename(dest_dataset) source = {} dest = {} with open(source_config, 'rt') as configfile: config = json.load(configfile) source['sample_rate'] = config['sample_rate'] source['start_time_ms'] = config['start_time_ms'] with open(dest_config, 'rt') as configfile: config = json.load(configfile) dest['sample_rate'] = config['sample_rate'] dest['length'] = config['length'] dest['start_time_ms'] = config['start_time_ms'] if source['sample_rate'] != dest['sample_rate']: _helper.errorExit("Source and dest datasets should have the same sample rate") start_sample = ((dest['start_time_ms'] - source['start_time_ms']) / 1000) * source['sample_rate'] if start_sample != int(start_sample): _helper.errorExit("Source and dest datasets are not offset by an integer number of samples") start_sample = int(start_sample) end_sample = int(start_sample + dest['length']) source_labels = _helper.getLabelsLatest(source_dataset) if source_labels: for session in source_labels: session_name = session['session'] source_name = session['source'] session_labels = session['labels'] label_filename = _helper.latestLabelsFilename(dest_dataset, session_name) output = '' output += ('{"session":"%s", "source": "%s", "labels":[' % (session_name, source_name)) was_prev = False for ll in session_labels: label_start = ll['lo'] label_end = ll['hi'] label_name = ll['label'] label = [label_start, label_end, label_name] if notrim: output += _helper.activityJSON(label, was_prev) was_prev = True elif label_end > start_sample and label_start < end_sample: # Trim label start if needed if label_start < start_sample: label[0] = start_sample # Trim label end if needed if label_end > end_sample: label[1] = end_sample # Start label offset from 0 label[0] -= start_sample label[1] -= start_sample output += _helper.activityJSON(label, was_prev) was_prev = True output += ']}\n' _helper.ensureDirExists(label_filename, True) with open(label_filename, 'wt') as labelsfile: labelsfile.write(output) print('labels added to ', label_filename)