async def _run(node, start, end, stream): my_intervals = await node.data_intervals(stream, start, end) if len(my_intervals) == 0: print("no stream data") return for interval in my_intervals: print( "[%s - %s]" % (timestamp_to_human(interval[0]), timestamp_to_human(interval[1])))
def print_table(annotations): headers = ["Title", "Content", "Start", "End"] result = [] for a in annotations: row = [a.title, a.content, utilities.timestamp_to_human(a.start)] if a.end is not None: row.append(utilities.timestamp_to_human(a.end)) else: row.append("\u2014") result.append(row) click.echo(tabulate(result, headers=headers, tablefmt="fancy_grid"))
async def build_network_pipes(inputs: Dict[str, str], outputs: Dict[str, str], configured_streams: Dict[str, data_stream.DataStream], my_node: BaseNode, start_time: Optional[int], end_time: Optional[int], live=False, new=False, force=False): if not force: _display_warning(outputs.values(), start_time, end_time) pipes_in = {} pipes_out = {} try: for name in inputs: my_stream = await _parse_stream(my_node, inputs[name], configured_streams) if new: if start_time is not None: raise errors.ConfigurationError( "Cannot specify [start] and [new], pick one") # determine start time based on last time stamp in outputs start_time, end_time = await _compute_new_interval( my_node, inputs, outputs) print("Running from [%s] to [%s]" % (timestamp_to_human(start_time), timestamp_to_human(end_time))) if start_time is None and end_time is None: # subscribe to live data pipes_in[name] = await my_node.data_subscribe(my_stream) else: pipes_in[name] = await my_node.data_read( my_stream, start_time, end_time) for name in outputs: my_stream = await _parse_stream(my_node, outputs[name], configured_streams) pipes_out[name] = await my_node.data_write(my_stream, start_time, end_time) except (errors.ApiError, errors.ConfigurationError) as e: # close any pipes that were created for name in pipes_in: await pipes_in[name].close() for name in pipes_out: await pipes_out[name].close() # re-raise the exception to be handled elsewhere raise e return pipes_in, pipes_out
def _display_warning(paths, start_time, end_time): # warn about data removal for historic execution if start_time is not None or end_time is not None: if end_time is None: msg = "after [%s]" % timestamp_to_human(start_time) elif start_time is None: msg = "before [%s]" % timestamp_to_human(end_time) else: msg = "between [%s - %s]" % (timestamp_to_human(start_time), timestamp_to_human(end_time)) output_paths = ", ".join([x.split(':')[0] for x in paths]) if not click.confirm( "This will remove any data %s in the output streams [%s]" % (msg, output_paths)): exit(1)
async def _run(config_node, start, end, new, destination_node, source, destination, source_url=None): # determine if the source node is NilmDB or Joule if source_url is None: source_node = config_node nilmdb_source = False else: source_node = source_url await _validate_nilmdb_url(source_node) nilmdb_source = True # determine if the destination node is NilmDB or Joule nilmdb_dest = False try: if destination_node is None: dest_node = config_node elif type(destination_node) is str: dest_node = get_node(destination_node) else: dest_node = destination_node except errors.ApiError: nilmdb_dest = True dest_node = destination_node await _validate_nilmdb_url(dest_node) # retrieve the source stream src_stream = await _retrieve_source(source_node, source, is_nilmdb=nilmdb_source) # retrieve the destination stream (create it if necessary) dest_stream = await _retrieve_destination(dest_node, destination, src_stream, is_nilmdb=nilmdb_dest) # make sure streams are compatible if src_stream.layout != dest_stream.layout: raise errors.ApiError( "Error: source (%s) and destination (%s) datatypes are not compatible" % (src_stream.layout, dest_stream.layout)) # warn if the elements are not the same element_warning = False src_elements = sorted(src_stream.elements, key=attrgetter('index')) dest_elements = sorted(dest_stream.elements, key=attrgetter('index')) for i in range(len(src_elements)): if src_elements[i].name != dest_elements[i].name: element_warning = True if src_elements[i].units != dest_elements[i].units: element_warning = True if (element_warning and not click.confirm( "WARNING: Element configurations do not match. Continue?")): click.echo("Cancelled") return # if new is set start and end may not be specified if new and start is not None: raise click.ClickException( "Error: either specify 'new' or a starting timestamp, not both") # make sure the time bounds make sense if start is not None: try: start = human_to_timestamp(start) except ValueError: raise errors.ApiError("invalid start time: [%s]" % start) if end is not None: try: end = human_to_timestamp(end) except ValueError: raise errors.ApiError("invalid end time: [%s]" % end) if (start is not None) and (end is not None) and ((end - start) <= 0): raise click.ClickException( "Error: start [%s] must be before end [%s]" % (datetime.datetime.fromtimestamp( start / 1e6), datetime.datetime.fromtimestamp(end / 1e6))) if new: # pull all the destination intervals and use the end of the last one as the 'end' for the copy dest_intervals = await _get_intervals(dest_node, dest_stream, destination, None, None, is_nilmdb=nilmdb_dest) if len(dest_intervals) > 0: start = dest_intervals[-1][-1] print("Starting copy at [%s]" % timestamp_to_human(start)) else: print("Starting copy at beginning of source") # compute the target intervals (source - dest) src_intervals = await _get_intervals(source_node, src_stream, source, start, end, is_nilmdb=nilmdb_source) dest_intervals = await _get_intervals(dest_node, dest_stream, destination, start, end, is_nilmdb=nilmdb_dest) new_intervals = interval_difference(src_intervals, dest_intervals) existing_intervals = interval_difference(src_intervals, new_intervals) async def _copy(intervals): # compute the duration of data to copy duration = 0 for interval in intervals: duration += interval[1] - interval[0] with click.progressbar(label='Copying data', length=duration) as bar: for interval in intervals: await _copy_interval(interval[0], interval[1], bar) await _copy_annotations(interval[0], interval[1]) async def _copy_annotations(istart, iend): if nilmdb_source: src_annotations = await _get_nilmdb_annotations( source_node, source, istart, iend) else: src_annotations = await source_node.annotation_get(src_stream.id, start=istart, end=iend) if nilmdb_dest: # get *all* the destination annotations, otherwise we'll loose annotations outside this interval dest_annotations = await _get_nilmdb_annotations( dest_node, destination) new_annotations = [ a for a in src_annotations if a not in dest_annotations ] if len(new_annotations) > 0: # create ID's for the new annotations if len(dest_annotations) > 0: id_val = max([a.id for a in dest_annotations]) + 1 else: id_val = 0 for a in new_annotations: a.id = id_val id_val += 1 await _create_nilmdb_annotations( dest_node, destination, new_annotations + dest_annotations) else: dest_annotations = await dest_node.annotation_get(dest_stream.id, start=istart, end=iend) new_annotations = [ a for a in src_annotations if a not in dest_annotations ] for annotation in new_annotations: await dest_node.annotation_create(annotation, dest_stream.id) if len(new_intervals) == 0: if len(src_intervals) > 0: click.echo("Nothing to copy, syncing annotations") for interval in src_intervals: await _copy_annotations(interval[0], interval[1]) else: click.echo("Nothing to copy") # clean up if not nilmdb_dest: await dest_node.close() if not nilmdb_source: await source_node.close() return async def _copy_interval(istart, iend, bar): #print("[%s] -> [%s]" % (timestamp_to_human(istart), timestamp_to_human(iend))) if nilmdb_source: src_params = { 'path': source, 'binary': 1, 'start': istart, 'end': iend } src_url = "{server}/stream/extract".format(server=source_node) src_headers = {} src_ssl = None else: src_params = {'id': src_stream.id, 'start': istart, 'end': iend} src_url = "{server}/data".format(server=source_node.session.url) src_headers = {"X-API-KEY": source_node.session.key} src_ssl = source_node.session.ssl_context async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout( total=None)) as session: async with session.get(src_url, params=src_params, headers=src_headers, ssl=src_ssl) as src_response: if src_response.status != 200: msg = await src_response.text() if msg == 'this stream has no data': # This is not an error because a previous copy may have been interrupted # This will cause the destination to have an interval gap where the source has no data # Example: source: |** *******| # dest: |** | |*******| # ^--- looks like missing data but there's nothing in the source return # ignore empty intervals raise click.ClickException( "Error reading from source: %s" % msg) pipe = pipes.InputPipe(stream=dest_stream, reader=src_response.content) async def _data_sender(): last_ts = istart try: while True: data = await pipe.read() pipe.consume(len(data)) if len(data) > 0: cur_ts = data[-1]['timestamp'] yield data.tobytes() # total time extents of this chunk bar.update(cur_ts - last_ts) last_ts = cur_ts # if pipe.end_of_interval: # yield pipes.interval_token(dest_stream.layout). \ # tostring() except pipes.EmptyPipe: pass bar.update(iend - last_ts) if nilmdb_dest: dst_params = { "start": istart, "end": iend, "path": destination, "binary": 1 } dst_url = "{server}/stream/insert".format(server=dest_node) await _send_nilmdb_data( dst_url, dst_params, _data_sender(), pipes.compute_dtype(dest_stream.layout), session) else: dst_url = "{server}/data".format( server=dest_node.session.url) dst_params = {"id": dest_stream.id} dst_headers = {"X-API-KEY": dest_node.session.key} dst_ssl = dest_node.session.ssl_context async with session.post(dst_url, params=dst_params, data=_data_sender(), headers=dst_headers, ssl=dst_ssl, chunked=True) as dest_response: if dest_response.status != 200: msg = await dest_response.text() raise errors.ApiError( "Error writing to destination: %s" % msg) try: # copy over any new annotations from existing intervals for interval in existing_intervals: await _copy_annotations(interval[0], interval[1]) await _copy(new_intervals) click.echo("\tOK") # this should be caught by the stream info requests # it is only generated if the joule server stops during the # data read/write except aiohttp.ClientError as e: # pragma: no cover raise click.ClickException("Error: %s" % str(e)) finally: if not nilmdb_dest: await dest_node.close() if not nilmdb_source: await source_node.close()
async def _run(start, end, source, destination, window, node): # --- RETRIEVE SOURCE STREAMS --- # prints out error messages if the source streams do not exist try: source_stream: joule.DataStream = await node.data_stream_get(source) except joule.errors.ApiError as e: raise click.ClickException(f"Source stream {source}: {str(e)}") # --- CHECK FOR DESTINATION STREAM --- destination_width = len(source_stream.elements) dest_exists = True dest_stream = None # this is set below, or created later try: dest_stream = await node.data_stream_get(destination) if len(dest_stream.elements) != destination_width: raise click.ClickException( f"Destination must have {destination_width} elements") if not dest_stream.datatype.startswith('float'): raise click.ClickException(f"Destination must be a float datatype") except joule.errors.ApiError as e: dest_exists = False click.echo("Median filter with window size %d" % window) click.echo(f"Source Stream: \n\t{source}") if dest_exists: click.echo(f"Destination Stream: \n\t{destination}") else: click.echo(f"Creating Destination: \n\t{destination}") if not click.confirm("Proceed?"): click.echo("Cancelled") return if not dest_exists: # create the destination dest_path = destination.split('/')[:-1] dest_name = destination.split('/')[-1] dest_stream = joule.api.DataStream(name=dest_name, elements=source_stream.elements, datatype='float32') try: dest_stream = await node.data_stream_create( dest_stream, '/'.join(dest_path)) except joule.errors.ApiError as e: raise click.ClickException(f"Creating destination: {str(e)}") # --- READY TO GO --- for interval in await node.data_intervals(source_stream, start=start, end=end): start = interval[0] end = interval[1] click.echo("Processing [%s] -> [%s]" % (timestamp_to_human(start), timestamp_to_human(end))) median_filter = MedianFilter() inputs = {'input': await node.data_read(source_stream, start, end)} outputs = {'output': await node.data_write(dest_stream, start, end)} args = argparse.Namespace(window=window) await median_filter.run(args, inputs, outputs) await outputs['output'].close()
def admin_ingest(config, backup, node, map, pgctl_binary, yes, start, end): """Restore data from a backup.""" # expensive imports so only execute if the function is called from joule.services import load_config import sqlalchemy from sqlalchemy.orm import Session from joule.models import Base, TimescaleStore, NilmdbStore parser = configparser.ConfigParser() loop = asyncio.get_event_loop() # make sure either a backup or a node is specified if (((backup is None) and (node is None)) or ((backup is not None) and (node is not None))): raise click.ClickException( "Specify either a backup or a node to ingest data from") # make sure the time bounds make sense if start is not None: try: start = utilities.human_to_timestamp(start) except ValueError: raise errors.ApiError("invalid start time: [%s]" % start) if end is not None: try: end = utilities.human_to_timestamp(end) except ValueError: raise errors.ApiError("invalid end time: [%s]" % end) if (start is not None) and (end is not None) and ((end - start) <= 0): raise click.ClickException( "Error: start [%s] must be before end [%s]" % (utilities.timestamp_to_human(start), utilities.timestamp_to_human(end))) # parse the map file if specified stream_map = None if map is not None: stream_map = [] try: with open(map, newline='') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='|', skipinitialspace=True) for row in reader: if len(row) == 0: # ignore blank lines continue if len(row) == 1 and len( row[0]) == 0: # line with only whitespace continue if row[0][0] == '#': # ignore comments continue if len(row) != 2: raise errors.ConfigurationError( """invalid map format. Refer to template below: # this line is a comment # only paths in this file will be copied # source and destination paths are separated by a ',' /source/path, /destination/path /source/path2, /destination/path2 #..etc """) stream_map.append(row) except FileNotFoundError: raise click.ClickException("Cannot find map file at [%s]" % map) except PermissionError: raise click.ClickException("Cannot read map file at [%s]" % map) except errors.ConfigurationError as e: raise click.ClickException(str(e)) # load the Joule configuration file try: with open(config, 'r') as f: parser.read_file(f, config) joule_config = load_config.run(custom_values=parser) except FileNotFoundError: raise click.ClickException( "Cannot load joule configuration file at [%s]" % config) except PermissionError: raise click.ClickException( "Cannot read joule configuration file at [%s] (run as root)" % config) except errors.ConfigurationError as e: raise click.ClickException("Invalid configuration: %s" % e) dest_engine = sqlalchemy.create_engine(joule_config.database) Base.metadata.create_all(dest_engine) dest_db = Session(bind=dest_engine) if joule_config.nilmdb_url is not None: dest_datastore = NilmdbStore(joule_config.nilmdb_url, 0, 0, loop) else: dest_datastore = TimescaleStore(joule_config.database, 0, 0, loop) # demote priveleges if "SUDO_GID" in os.environ: os.setgid(int(os.environ["SUDO_GID"])) if "SUDO_UID" in os.environ: os.setuid(int(os.environ["SUDO_UID"])) # create a log file for exec cmds pg_log_name = "joule_restore_log_%s.txt" % uuid.uuid4().hex.upper()[0:6] pg_log = open(pg_log_name, 'w') # if pgctl_binary is not specified, try to autodect it if pgctl_binary is None: try: completed_proc = subprocess.run(["psql", "-V"], stdout=subprocess.PIPE) output = completed_proc.stdout.decode('utf-8') version = output.split(" ")[2] major_version = version.split(".")[0] pgctl_binary = "/usr/lib/postgresql/%s/bin/pg_ctl" % major_version except (FileNotFoundError, IndexError): raise click.ClickException( "cannot autodetect pg_ctl location, specify with -b") # determine if the source is a backup or a node if node is not None: live_restore = True src_dsn = loop.run_until_complete(get_dsn(node)) # check whether the source uses nilmdb click.echo("WARNING: Nilmdb sources are not supported yet") src_datastore = TimescaleStore(src_dsn, 0, 0, loop) nilmdb_proc = None else: if not os.path.isdir(backup): raise click.ClickException("backup [%s] does not exist" % backup) src_dsn = start_src_db(backup, pgctl_binary, pg_log) # check whether the source uses nilmdb nilmdb_path = os.path.join(backup, 'nilmdb') nilmdb_proc = None if os.path.exists(nilmdb_path): port = unused_port() nilmdb_proc = start_src_nilmdb(nilmdb_path, port, pg_log) click.echo("waiting for nilmdb to initialize...") time.sleep(2) src_datastore = NilmdbStore('http://127.0.0.1:%d' % port, 0, 0, loop) if joule_config.nilmdb_url is None: click.echo( "Note: re-copying from NilmDB to Timescale may result in --nothing to copy-- messages" ) else: src_datastore = TimescaleStore(src_dsn, 0, 0, loop) live_restore = False src_engine = sqlalchemy.create_engine(src_dsn) num_tries = 0 max_tries = 1 while True: try: Base.metadata.create_all(src_engine) break except sqlalchemy.exc.OperationalError as e: if live_restore: raise click.ClickException( str(e)) # this should work immediately num_tries += 1 click.echo("... attempting to connect to source database (%d/%d)" % (num_tries, max_tries)) time.sleep(2) if num_tries >= max_tries: raise click.ClickException( "cannot connect to source database, log saved in [%s]" % pg_log_name) src_db = Session(bind=src_engine) try: loop.run_until_complete( run(src_db, dest_db, src_datastore, dest_datastore, stream_map, yes, start, end)) except errors.ConfigurationError as e: print("Logs written to [%s]" % pg_log_name) raise click.ClickException(str(e)) finally: # close connections dest_db.close() src_db.close() loop.run_until_complete(dest_datastore.close()) loop.run_until_complete(src_datastore.close()) # clean up database if not a live_restore if not live_restore: args = ["-D", os.path.join(backup)] args += ["stop"] cmd = [pgctl_binary] + args subprocess.call(cmd, stderr=pg_log, stdout=pg_log) sock_path = os.path.join(backup, 'sock') sockets = os.listdir(sock_path) for s in sockets: os.remove(os.path.join(sock_path, s)) os.rmdir(sock_path) if nilmdb_proc is not None: nilmdb_proc.terminate() nilmdb_proc.communicate() pg_log.close() os.remove(pg_log_name) click.echo("OK")
async def run(src_db: 'Session', dest_db: 'Session', src_datastore: 'DataStore', dest_datastore: 'DataStore', stream_map: Optional[List], confirmed: bool, start: Optional[int], end: Optional[int]): from joule.models import DataStream, folder, data_stream from joule.services import parse_pipe_config src_streams = src_db.query(DataStream).all() dest_streams = dest_db.query(DataStream).all() await src_datastore.initialize(src_streams) await dest_datastore.initialize(dest_streams) if stream_map is None: src_streams = src_db.query(DataStream).all() src_paths = map(folder.get_stream_path, src_streams) stream_map = map(lambda _path: [_path, _path], src_paths) # create the copy map array copy_maps = [] for item in stream_map: # get the source stream source = folder.find_stream_by_path(item[0], src_db) if source is None: raise errors.ConfigurationError( "source stream [%s] does not exist" % item[0]) src_intervals = await src_datastore.intervals(source, start, end) # get or create the destination stream dest = folder.find_stream_by_path(item[1], dest_db) if dest is None: (path, name, _) = parse_pipe_config.parse_pipe_config(item[1]) dest_folder = folder.find(path, dest_db, create=True) dest = data_stream.from_json(source.to_json()) # set the attributes on the new stream dest.name = name dest.keep_us = dest.KEEP_ALL dest.is_configured = False dest.is_source = False dest.is_destination = False dest.id = None for e in dest.elements: e.id = None dest_folder.streams.append(dest) dest_intervals = None else: # make sure the destination is compatible if dest.layout != source.layout: raise errors.ConfigurationError( "source stream [%s] is not compatible with destination stream [%s]" % (item[0], item[1])) dest_intervals = await dest_datastore.intervals(dest, start, end) # figure out the time bounds to copy if dest_intervals is None: copy_intervals = src_intervals else: copy_intervals = utilities.interval_difference( src_intervals, dest_intervals) copy_maps.append(CopyMap(source, dest, copy_intervals)) # display the copy table rows = [] copy_required = False for item in copy_maps: if item.start is None: start = "\u2014" end = "\u2014" else: start = utilities.timestamp_to_human(item.start) end = utilities.timestamp_to_human(item.end) copy_required = True rows.append([item.source_path, item.dest_path, start, end]) click.echo( tabulate(rows, headers=["Source", "Destination", "From", "To"], tablefmt="fancy_grid")) if not copy_required: click.echo("No data needs to be copied") return if not confirmed and not click.confirm("Start data copy?"): click.echo("cancelled") return dest_db.commit() # execute the copy for item in copy_maps: await copy(item, src_datastore, dest_datastore, src_db, dest_db)
async def _run(): nonlocal stream_path # Open the file and make sure it is the right type try: hdf_root = h5py.File(file, 'r') hdf_timestamp = hdf_root['timestamp'] hdf_data = hdf_root['data'] start = hdf_timestamp[0, 0] end = hdf_timestamp[-1, 0] # make sure the length of both datasets are the same if len(hdf_data) != len(hdf_timestamp): raise click.ClickException( "Length of [data] and [timestamp] datasets must match") # if a stream is not specified see if one is in the data file if stream_path is None: try: stream_path = hdf_root.attrs['path'] except KeyError: raise click.ClickException( "Specify a target stream with --stream") except OSError: raise click.ClickException("Data file [%s] must be hdf5 format" % file) except KeyError: raise click.ClickException( "Data file must contain [data] and [timestamp] datasets") # get the stream object from the API try: stream_obj = await config.node.data_stream_get(stream_path) print("Destination stream: %s" % stream_path) stream_info = await config.node.data_stream_info(stream_path) # make sure the datatypes match dtype = compute_dtype(stream_obj.layout) if dtype[1].base != hdf_data.dtype: raise click.ClickException( "Incompatible datatypes, stream is [%s] and data file is [%s]" % ((dtype[1].base, hdf_data.dtype))) # make sure the number of elements match if len(stream_obj.elements) != hdf_data.shape[1]: raise click.ClickException( "DataStream has [%d] elements but data file has [%d] elements" % (len(stream_obj.elements), hdf_data.shape[1])) # check if there is existing data in this time period if stream_info.rows > 0 and (start < stream_info.end and end >= stream_info.start): # confirm overwrite if not click.confirm( "This will remove existing data between %s- %s" % (timestamp_to_human(start), timestamp_to_human(end))): click.echo("Cancelled") return await config.node.data_delete(stream_obj, start, end + 1) except errors.ApiError as e: if '404' not in str(e): raise click.ClickException(str(e)) # this stream doesn't exist, create it from the hdf attributes stream_obj = await _create_stream(stream_path, hdf_root, config.node) pipe = await config.node.data_write(stream_obj) # progress bar for writing to a file bar_ctx = click.progressbar(length=len(hdf_data), label='ingesting data') bar = bar_ctx.__enter__() for idx in range(0, len(hdf_data), BLOCK_SIZE): ts = hdf_timestamp[idx:idx + BLOCK_SIZE] data = hdf_data[idx:idx + BLOCK_SIZE] sdata = np.empty(len(ts), dtype=compute_dtype(stream_obj.layout)) sdata['timestamp'][:, None] = ts sdata['data'] = data await pipe.write(sdata) bar.update(len(data)) await pipe.close() bar_ctx.__exit__(None, None, None)
async def _run(start, end, destination, primary, secondaries, node): # --- RETRIEVE SOURCE STREAMS --- # prints out error messages if the source streams do not exist try: primary_stream: joule.DataStream = await node.data_stream_get( primary.split(':')[0]) except joule.errors.ApiError as e: raise click.ClickException(f"Primary stream {primary}: {str(e)}") secondary_streams = [] for s in secondaries: try: s_stream = await node.data_stream_get(s.split(':')[0]) secondary_streams.append(s_stream) except joule.errors.ApiError as e: raise click.ClickException(f"Secondary stream {s}: {str(e)}") # --- CHECK FOR DESTINATION STREAM --- destination_width = len(primary_stream.elements) for s in secondary_streams: destination_width += len(s.elements) dest_exists = True dest_stream = None # this is set below, or created later try: dest_stream = await node.data_stream_get(destination) if len(dest_stream.elements) != destination_width: raise click.ClickException( f"Destination must have {destination_width} elements") if not dest_stream.datatype.startswith('float'): raise click.ClickException(f"Destination must be a float datatype") except joule.errors.ApiError as e: dest_exists = False click.echo(f"Primary Stream: \n\t{primary}") click.echo(f"Secondary Streams: \n\t{', '.join(s for s in secondaries)}") if dest_exists: click.echo(f"Destination Stream: \n\t{destination}") else: click.echo(f"Creating Destination: \n\t{destination}") if not click.confirm("Proceed?"): click.echo("Cancelled") return if not dest_exists: # create the destination dest_path = destination.split('/')[:-1] dest_name = destination.split('/')[-1] elements = [] # check for a primary prefix prefix = "" if ':' in primary: prefix = primary.split(':')[-1] else: prefix = primary_stream.name for e in primary_stream.elements: e.name = prefix + ' ' + e.name elements.append(e) for i in range(len(secondaries)): prefix = "" if ':' in secondaries[i]: prefix = secondaries[i].split(':')[-1] else: prefix = secondary_streams[i].name for e in secondary_streams[i].elements: e.name = prefix + ' ' + e.name elements.append(e) dest_stream = joule.api.DataStream(name=dest_name, elements=elements, datatype='float32') try: dest_stream = await node.data_stream_create( dest_stream, '/'.join(dest_path)) except joule.errors.ApiError as e: raise click.ClickException(f"Creating destination: {str(e)}") # --- READY TO GO --- common_intervals = await node.data_intervals(primary_stream, start=start, end=end) for s in secondary_streams: s_intervals = await node.data_intervals(s, start=start, end=end) common_intervals = interval_intersection(common_intervals, s_intervals) copied_intervals = await node.data_intervals(dest_stream, start=start, end=end) # do not copy intervals that we already have pending_intervals = interval_difference(common_intervals, copied_intervals) # only copy intervals with at least 1 second of data- there are issues with 1 sample offsets # that cause merge to think there is missing data when there is not any- this is probably due to # the backend not having the data boundaries stored exactly correctly pending_intervals = [i for i in pending_intervals if (i[1] - i[0]) > 1e6] if len(pending_intervals) == 0: click.echo("Destination already has all the data, nothing to do") else: start_time = joule.utilities.timestamp_to_human( pending_intervals[0][0]) end_time = joule.utilities.timestamp_to_human(pending_intervals[-1][0]) click.echo( f"Merging from {start_time} to {end_time} ({len(pending_intervals)} intervals)" ) for interval in pending_intervals: start = interval[0] end = interval[1] click.echo("Processing [%s] -> [%s]" % (timestamp_to_human(start), timestamp_to_human(end))) merge_filter = MergeFilter() inputs = {'primary': await node.data_read(primary_stream, start, end)} for i in range(len(secondary_streams)): inputs[f"secondary_{i}"] = await node.data_read( secondary_streams[i], start, end) outputs = { 'destination': await node.data_write(dest_stream, start, end) } args = argparse.Namespace(primary="primary") await merge_filter.run(args, inputs, outputs) await outputs['destination'].close()