Beispiel #1
0
async def _run(node, start, end, stream):
    my_intervals = await node.data_intervals(stream, start, end)
    if len(my_intervals) == 0:
        print("no stream data")
        return
    for interval in my_intervals:
        print(
            "[%s - %s]" %
            (timestamp_to_human(interval[0]), timestamp_to_human(interval[1])))
Beispiel #2
0
def print_table(annotations):
    headers = ["Title", "Content", "Start", "End"]
    result = []
    for a in annotations:
        row = [a.title, a.content, utilities.timestamp_to_human(a.start)]
        if a.end is not None:
            row.append(utilities.timestamp_to_human(a.end))
        else:
            row.append("\u2014")
        result.append(row)
    click.echo(tabulate(result, headers=headers, tablefmt="fancy_grid"))
Beispiel #3
0
async def build_network_pipes(inputs: Dict[str, str],
                              outputs: Dict[str, str],
                              configured_streams: Dict[str,
                                                       data_stream.DataStream],
                              my_node: BaseNode,
                              start_time: Optional[int],
                              end_time: Optional[int],
                              live=False,
                              new=False,
                              force=False):
    if not force:
        _display_warning(outputs.values(), start_time, end_time)

    pipes_in = {}
    pipes_out = {}
    try:
        for name in inputs:
            my_stream = await _parse_stream(my_node, inputs[name],
                                            configured_streams)
            if new:
                if start_time is not None:
                    raise errors.ConfigurationError(
                        "Cannot specify [start] and [new], pick one")
                # determine start time based on last time stamp in outputs
                start_time, end_time = await _compute_new_interval(
                    my_node, inputs, outputs)
                print("Running from [%s] to [%s]" %
                      (timestamp_to_human(start_time),
                       timestamp_to_human(end_time)))
            if start_time is None and end_time is None:
                # subscribe to live data
                pipes_in[name] = await my_node.data_subscribe(my_stream)
            else:
                pipes_in[name] = await my_node.data_read(
                    my_stream, start_time, end_time)

        for name in outputs:
            my_stream = await _parse_stream(my_node, outputs[name],
                                            configured_streams)
            pipes_out[name] = await my_node.data_write(my_stream, start_time,
                                                       end_time)
    except (errors.ApiError, errors.ConfigurationError) as e:
        # close any pipes that were created
        for name in pipes_in:
            await pipes_in[name].close()
        for name in pipes_out:
            await pipes_out[name].close()
        # re-raise the exception to be handled elsewhere
        raise e

    return pipes_in, pipes_out
Beispiel #4
0
def _display_warning(paths, start_time, end_time):
    # warn about data removal for historic execution
    if start_time is not None or end_time is not None:
        if end_time is None:
            msg = "after [%s]" % timestamp_to_human(start_time)
        elif start_time is None:
            msg = "before [%s]" % timestamp_to_human(end_time)
        else:
            msg = "between [%s - %s]" % (timestamp_to_human(start_time),
                                         timestamp_to_human(end_time))
        output_paths = ", ".join([x.split(':')[0] for x in paths])
        if not click.confirm(
                "This will remove any data %s in the output streams [%s]" %
            (msg, output_paths)):
            exit(1)
Beispiel #5
0
async def _run(config_node,
               start,
               end,
               new,
               destination_node,
               source,
               destination,
               source_url=None):
    # determine if the source node is NilmDB or Joule
    if source_url is None:
        source_node = config_node
        nilmdb_source = False
    else:
        source_node = source_url
        await _validate_nilmdb_url(source_node)
        nilmdb_source = True

    # determine if the destination node is NilmDB or Joule
    nilmdb_dest = False

    try:
        if destination_node is None:
            dest_node = config_node
        elif type(destination_node) is str:
            dest_node = get_node(destination_node)
        else:
            dest_node = destination_node
    except errors.ApiError:
        nilmdb_dest = True
        dest_node = destination_node
        await _validate_nilmdb_url(dest_node)

    # retrieve the source stream
    src_stream = await _retrieve_source(source_node,
                                        source,
                                        is_nilmdb=nilmdb_source)

    # retrieve the destination stream (create it if necessary)
    dest_stream = await _retrieve_destination(dest_node,
                                              destination,
                                              src_stream,
                                              is_nilmdb=nilmdb_dest)
    # make sure streams are compatible
    if src_stream.layout != dest_stream.layout:
        raise errors.ApiError(
            "Error: source (%s) and destination (%s) datatypes are not compatible"
            % (src_stream.layout, dest_stream.layout))
    # warn if the elements are not the same
    element_warning = False
    src_elements = sorted(src_stream.elements, key=attrgetter('index'))
    dest_elements = sorted(dest_stream.elements, key=attrgetter('index'))
    for i in range(len(src_elements)):
        if src_elements[i].name != dest_elements[i].name:
            element_warning = True
        if src_elements[i].units != dest_elements[i].units:
            element_warning = True
    if (element_warning and not click.confirm(
            "WARNING: Element configurations do not match. Continue?")):
        click.echo("Cancelled")
        return
    # if new is set start and end may not be specified
    if new and start is not None:
        raise click.ClickException(
            "Error: either specify 'new' or a starting timestamp, not both")

    # make sure the time bounds make sense
    if start is not None:
        try:
            start = human_to_timestamp(start)
        except ValueError:
            raise errors.ApiError("invalid start time: [%s]" % start)
    if end is not None:
        try:
            end = human_to_timestamp(end)
        except ValueError:
            raise errors.ApiError("invalid end time: [%s]" % end)
    if (start is not None) and (end is not None) and ((end - start) <= 0):
        raise click.ClickException(
            "Error: start [%s] must be before end [%s]" %
            (datetime.datetime.fromtimestamp(
                start / 1e6), datetime.datetime.fromtimestamp(end / 1e6)))
    if new:
        # pull all the destination intervals and use the end of the last one as the 'end' for the copy
        dest_intervals = await _get_intervals(dest_node,
                                              dest_stream,
                                              destination,
                                              None,
                                              None,
                                              is_nilmdb=nilmdb_dest)
        if len(dest_intervals) > 0:
            start = dest_intervals[-1][-1]
            print("Starting copy at [%s]" % timestamp_to_human(start))
        else:
            print("Starting copy at beginning of source")
    # compute the target intervals (source - dest)
    src_intervals = await _get_intervals(source_node,
                                         src_stream,
                                         source,
                                         start,
                                         end,
                                         is_nilmdb=nilmdb_source)
    dest_intervals = await _get_intervals(dest_node,
                                          dest_stream,
                                          destination,
                                          start,
                                          end,
                                          is_nilmdb=nilmdb_dest)
    new_intervals = interval_difference(src_intervals, dest_intervals)
    existing_intervals = interval_difference(src_intervals, new_intervals)

    async def _copy(intervals):
        # compute the duration of data to copy
        duration = 0
        for interval in intervals:
            duration += interval[1] - interval[0]

        with click.progressbar(label='Copying data', length=duration) as bar:
            for interval in intervals:
                await _copy_interval(interval[0], interval[1], bar)
                await _copy_annotations(interval[0], interval[1])

    async def _copy_annotations(istart, iend):
        if nilmdb_source:
            src_annotations = await _get_nilmdb_annotations(
                source_node, source, istart, iend)
        else:
            src_annotations = await source_node.annotation_get(src_stream.id,
                                                               start=istart,
                                                               end=iend)

        if nilmdb_dest:
            # get *all* the destination annotations, otherwise we'll loose annotations outside this interval
            dest_annotations = await _get_nilmdb_annotations(
                dest_node, destination)
            new_annotations = [
                a for a in src_annotations if a not in dest_annotations
            ]
            if len(new_annotations) > 0:
                # create ID's for the new annotations
                if len(dest_annotations) > 0:
                    id_val = max([a.id for a in dest_annotations]) + 1
                else:
                    id_val = 0
                for a in new_annotations:
                    a.id = id_val
                    id_val += 1
                await _create_nilmdb_annotations(
                    dest_node, destination, new_annotations + dest_annotations)
        else:
            dest_annotations = await dest_node.annotation_get(dest_stream.id,
                                                              start=istart,
                                                              end=iend)
            new_annotations = [
                a for a in src_annotations if a not in dest_annotations
            ]
            for annotation in new_annotations:
                await dest_node.annotation_create(annotation, dest_stream.id)

    if len(new_intervals) == 0:
        if len(src_intervals) > 0:
            click.echo("Nothing to copy, syncing annotations")
            for interval in src_intervals:
                await _copy_annotations(interval[0], interval[1])
        else:
            click.echo("Nothing to copy")
        # clean up
        if not nilmdb_dest:
            await dest_node.close()
        if not nilmdb_source:
            await source_node.close()
        return

    async def _copy_interval(istart, iend, bar):
        #print("[%s] -> [%s]" % (timestamp_to_human(istart), timestamp_to_human(iend)))
        if nilmdb_source:
            src_params = {
                'path': source,
                'binary': 1,
                'start': istart,
                'end': iend
            }
            src_url = "{server}/stream/extract".format(server=source_node)
            src_headers = {}
            src_ssl = None
        else:
            src_params = {'id': src_stream.id, 'start': istart, 'end': iend}
            src_url = "{server}/data".format(server=source_node.session.url)
            src_headers = {"X-API-KEY": source_node.session.key}
            src_ssl = source_node.session.ssl_context
        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(
                total=None)) as session:
            async with session.get(src_url,
                                   params=src_params,
                                   headers=src_headers,
                                   ssl=src_ssl) as src_response:
                if src_response.status != 200:
                    msg = await src_response.text()
                    if msg == 'this stream has no data':
                        # This is not an error because a previous copy may have been interrupted
                        # This will cause the destination to have an interval gap where the source has no data
                        # Example:   source:  |**     *******|
                        #            dest:    |** |  |*******|
                        #                          ^--- looks like missing data but there's nothing in the source
                        return  # ignore empty intervals
                    raise click.ClickException(
                        "Error reading from source: %s" % msg)

                pipe = pipes.InputPipe(stream=dest_stream,
                                       reader=src_response.content)

                async def _data_sender():

                    last_ts = istart
                    try:
                        while True:
                            data = await pipe.read()
                            pipe.consume(len(data))
                            if len(data) > 0:
                                cur_ts = data[-1]['timestamp']
                                yield data.tobytes()
                                # total time extents of this chunk
                                bar.update(cur_ts - last_ts)
                                last_ts = cur_ts
                            # if pipe.end_of_interval:
                            #    yield pipes.interval_token(dest_stream.layout). \
                            #        tostring()
                    except pipes.EmptyPipe:
                        pass
                    bar.update(iend - last_ts)

                if nilmdb_dest:
                    dst_params = {
                        "start": istart,
                        "end": iend,
                        "path": destination,
                        "binary": 1
                    }
                    dst_url = "{server}/stream/insert".format(server=dest_node)
                    await _send_nilmdb_data(
                        dst_url, dst_params, _data_sender(),
                        pipes.compute_dtype(dest_stream.layout), session)
                else:
                    dst_url = "{server}/data".format(
                        server=dest_node.session.url)
                    dst_params = {"id": dest_stream.id}
                    dst_headers = {"X-API-KEY": dest_node.session.key}
                    dst_ssl = dest_node.session.ssl_context
                    async with session.post(dst_url,
                                            params=dst_params,
                                            data=_data_sender(),
                                            headers=dst_headers,
                                            ssl=dst_ssl,
                                            chunked=True) as dest_response:
                        if dest_response.status != 200:
                            msg = await dest_response.text()
                            raise errors.ApiError(
                                "Error writing to destination: %s" % msg)

    try:
        # copy over any new annotations from existing intervals
        for interval in existing_intervals:
            await _copy_annotations(interval[0], interval[1])
        await _copy(new_intervals)
        click.echo("\tOK")
    # this should be caught by the stream info requests
    # it is only generated if the joule server stops during the
    # data read/write
    except aiohttp.ClientError as e:  # pragma: no cover
        raise click.ClickException("Error: %s" % str(e))
    finally:
        if not nilmdb_dest:
            await dest_node.close()
        if not nilmdb_source:
            await source_node.close()
Beispiel #6
0
async def _run(start, end, source, destination, window, node):
    # --- RETRIEVE SOURCE STREAMS ---
    # prints out error messages if the source streams do not exist
    try:
        source_stream: joule.DataStream = await node.data_stream_get(source)
    except joule.errors.ApiError as e:
        raise click.ClickException(f"Source stream {source}: {str(e)}")

    # --- CHECK FOR DESTINATION STREAM ---
    destination_width = len(source_stream.elements)
    dest_exists = True
    dest_stream = None  # this is set below, or created later
    try:
        dest_stream = await node.data_stream_get(destination)
        if len(dest_stream.elements) != destination_width:
            raise click.ClickException(
                f"Destination must have {destination_width} elements")
        if not dest_stream.datatype.startswith('float'):
            raise click.ClickException(f"Destination must be a float datatype")

    except joule.errors.ApiError as e:
        dest_exists = False
    click.echo("Median filter with window size %d" % window)

    click.echo(f"Source Stream: \n\t{source}")
    if dest_exists:
        click.echo(f"Destination Stream: \n\t{destination}")
    else:
        click.echo(f"Creating Destination: \n\t{destination}")

    if not click.confirm("Proceed?"):
        click.echo("Cancelled")
        return

    if not dest_exists:
        # create the destination
        dest_path = destination.split('/')[:-1]
        dest_name = destination.split('/')[-1]

        dest_stream = joule.api.DataStream(name=dest_name,
                                           elements=source_stream.elements,
                                           datatype='float32')
        try:
            dest_stream = await node.data_stream_create(
                dest_stream, '/'.join(dest_path))
        except joule.errors.ApiError as e:
            raise click.ClickException(f"Creating destination: {str(e)}")

    # --- READY TO GO ---
    for interval in await node.data_intervals(source_stream,
                                              start=start,
                                              end=end):
        start = interval[0]
        end = interval[1]
        click.echo("Processing [%s] -> [%s]" %
                   (timestamp_to_human(start), timestamp_to_human(end)))
        median_filter = MedianFilter()
        inputs = {'input': await node.data_read(source_stream, start, end)}
        outputs = {'output': await node.data_write(dest_stream, start, end)}
        args = argparse.Namespace(window=window)
        await median_filter.run(args, inputs, outputs)
        await outputs['output'].close()
Beispiel #7
0
def admin_ingest(config, backup, node, map, pgctl_binary, yes, start, end):
    """Restore data from a backup."""
    # expensive imports so only execute if the function is called
    from joule.services import load_config
    import sqlalchemy
    from sqlalchemy.orm import Session
    from joule.models import Base, TimescaleStore, NilmdbStore

    parser = configparser.ConfigParser()
    loop = asyncio.get_event_loop()
    # make sure either a backup or a node is specified
    if (((backup is None) and (node is None))
            or ((backup is not None) and (node is not None))):
        raise click.ClickException(
            "Specify either a backup or a node to ingest data from")

    # make sure the time bounds make sense
    if start is not None:
        try:
            start = utilities.human_to_timestamp(start)
        except ValueError:
            raise errors.ApiError("invalid start time: [%s]" % start)
    if end is not None:
        try:
            end = utilities.human_to_timestamp(end)
        except ValueError:
            raise errors.ApiError("invalid end time: [%s]" % end)
    if (start is not None) and (end is not None) and ((end - start) <= 0):
        raise click.ClickException(
            "Error: start [%s] must be before end [%s]" %
            (utilities.timestamp_to_human(start),
             utilities.timestamp_to_human(end)))

    # parse the map file if specified
    stream_map = None
    if map is not None:
        stream_map = []
        try:
            with open(map, newline='') as csvfile:
                reader = csv.reader(csvfile,
                                    delimiter=',',
                                    quotechar='|',
                                    skipinitialspace=True)
                for row in reader:
                    if len(row) == 0:  # ignore blank lines
                        continue
                    if len(row) == 1 and len(
                            row[0]) == 0:  # line with only whitespace
                        continue
                    if row[0][0] == '#':  # ignore comments
                        continue
                    if len(row) != 2:
                        raise errors.ConfigurationError(
                            """invalid map format. Refer to template below:
    
     # this line is a comment
     # only paths in this file will be copied
     # source and destination paths are separated by a ','
     
     /source/path, /destination/path
     /source/path2, /destination/path2
     #..etc
     
     """)
                    stream_map.append(row)
        except FileNotFoundError:
            raise click.ClickException("Cannot find map file at [%s]" % map)
        except PermissionError:
            raise click.ClickException("Cannot read map file at [%s]" % map)
        except errors.ConfigurationError as e:
            raise click.ClickException(str(e))

    # load the Joule configuration file
    try:
        with open(config, 'r') as f:
            parser.read_file(f, config)
            joule_config = load_config.run(custom_values=parser)
    except FileNotFoundError:
        raise click.ClickException(
            "Cannot load joule configuration file at [%s]" % config)
    except PermissionError:
        raise click.ClickException(
            "Cannot read joule configuration file at [%s] (run as root)" %
            config)
    except errors.ConfigurationError as e:
        raise click.ClickException("Invalid configuration: %s" % e)

    dest_engine = sqlalchemy.create_engine(joule_config.database)

    Base.metadata.create_all(dest_engine)
    dest_db = Session(bind=dest_engine)
    if joule_config.nilmdb_url is not None:
        dest_datastore = NilmdbStore(joule_config.nilmdb_url, 0, 0, loop)
    else:
        dest_datastore = TimescaleStore(joule_config.database, 0, 0, loop)

    # demote priveleges
    if "SUDO_GID" in os.environ:
        os.setgid(int(os.environ["SUDO_GID"]))
    if "SUDO_UID" in os.environ:
        os.setuid(int(os.environ["SUDO_UID"]))

    # create a log file for exec cmds
    pg_log_name = "joule_restore_log_%s.txt" % uuid.uuid4().hex.upper()[0:6]
    pg_log = open(pg_log_name, 'w')

    # if pgctl_binary is not specified, try to autodect it
    if pgctl_binary is None:
        try:
            completed_proc = subprocess.run(["psql", "-V"],
                                            stdout=subprocess.PIPE)
            output = completed_proc.stdout.decode('utf-8')
            version = output.split(" ")[2]
            major_version = version.split(".")[0]
            pgctl_binary = "/usr/lib/postgresql/%s/bin/pg_ctl" % major_version
        except (FileNotFoundError, IndexError):
            raise click.ClickException(
                "cannot autodetect pg_ctl location, specify with -b")

    # determine if the source is a backup or a node
    if node is not None:
        live_restore = True
        src_dsn = loop.run_until_complete(get_dsn(node))
        # check whether the source uses nilmdb
        click.echo("WARNING: Nilmdb sources are not supported yet")
        src_datastore = TimescaleStore(src_dsn, 0, 0, loop)
        nilmdb_proc = None
    else:
        if not os.path.isdir(backup):
            raise click.ClickException("backup [%s] does not exist" % backup)
        src_dsn = start_src_db(backup, pgctl_binary, pg_log)
        # check whether the source uses nilmdb
        nilmdb_path = os.path.join(backup, 'nilmdb')
        nilmdb_proc = None
        if os.path.exists(nilmdb_path):
            port = unused_port()
            nilmdb_proc = start_src_nilmdb(nilmdb_path, port, pg_log)
            click.echo("waiting for nilmdb to initialize...")
            time.sleep(2)
            src_datastore = NilmdbStore('http://127.0.0.1:%d' % port, 0, 0,
                                        loop)
            if joule_config.nilmdb_url is None:
                click.echo(
                    "Note: re-copying from NilmDB to Timescale may result in --nothing to copy-- messages"
                )
        else:
            src_datastore = TimescaleStore(src_dsn, 0, 0, loop)

        live_restore = False

    src_engine = sqlalchemy.create_engine(src_dsn)

    num_tries = 0
    max_tries = 1
    while True:
        try:
            Base.metadata.create_all(src_engine)
            break
        except sqlalchemy.exc.OperationalError as e:
            if live_restore:
                raise click.ClickException(
                    str(e))  # this should work immediately
            num_tries += 1
            click.echo("... attempting to connect to source database (%d/%d)" %
                       (num_tries, max_tries))
            time.sleep(2)
            if num_tries >= max_tries:
                raise click.ClickException(
                    "cannot connect to source database, log saved in [%s]" %
                    pg_log_name)

    src_db = Session(bind=src_engine)

    try:
        loop.run_until_complete(
            run(src_db, dest_db, src_datastore, dest_datastore, stream_map,
                yes, start, end))
    except errors.ConfigurationError as e:
        print("Logs written to [%s]" % pg_log_name)
        raise click.ClickException(str(e))
    finally:
        # close connections
        dest_db.close()
        src_db.close()
        loop.run_until_complete(dest_datastore.close())
        loop.run_until_complete(src_datastore.close())
        # clean up database if not a live_restore
        if not live_restore:
            args = ["-D", os.path.join(backup)]
            args += ["stop"]
            cmd = [pgctl_binary] + args
            subprocess.call(cmd, stderr=pg_log, stdout=pg_log)
            sock_path = os.path.join(backup, 'sock')
            sockets = os.listdir(sock_path)
            for s in sockets:
                os.remove(os.path.join(sock_path, s))
            os.rmdir(sock_path)
            if nilmdb_proc is not None:
                nilmdb_proc.terminate()
                nilmdb_proc.communicate()

    pg_log.close()
    os.remove(pg_log_name)
    click.echo("OK")
Beispiel #8
0
async def run(src_db: 'Session', dest_db: 'Session',
              src_datastore: 'DataStore', dest_datastore: 'DataStore',
              stream_map: Optional[List], confirmed: bool,
              start: Optional[int], end: Optional[int]):
    from joule.models import DataStream, folder, data_stream
    from joule.services import parse_pipe_config

    src_streams = src_db.query(DataStream).all()
    dest_streams = dest_db.query(DataStream).all()
    await src_datastore.initialize(src_streams)
    await dest_datastore.initialize(dest_streams)

    if stream_map is None:
        src_streams = src_db.query(DataStream).all()
        src_paths = map(folder.get_stream_path, src_streams)
        stream_map = map(lambda _path: [_path, _path], src_paths)

    # create the copy map array
    copy_maps = []
    for item in stream_map:
        # get the source stream
        source = folder.find_stream_by_path(item[0], src_db)
        if source is None:
            raise errors.ConfigurationError(
                "source stream [%s] does not exist" % item[0])
        src_intervals = await src_datastore.intervals(source, start, end)
        # get or create the destination stream
        dest = folder.find_stream_by_path(item[1], dest_db)
        if dest is None:
            (path, name, _) = parse_pipe_config.parse_pipe_config(item[1])
            dest_folder = folder.find(path, dest_db, create=True)
            dest = data_stream.from_json(source.to_json())
            # set the attributes on the new stream
            dest.name = name
            dest.keep_us = dest.KEEP_ALL
            dest.is_configured = False
            dest.is_source = False
            dest.is_destination = False
            dest.id = None
            for e in dest.elements:
                e.id = None
            dest_folder.streams.append(dest)
            dest_intervals = None
        else:
            # make sure the destination is compatible
            if dest.layout != source.layout:
                raise errors.ConfigurationError(
                    "source stream [%s] is not compatible with destination stream [%s]"
                    % (item[0], item[1]))

            dest_intervals = await dest_datastore.intervals(dest, start, end)
        # figure out the time bounds to copy
        if dest_intervals is None:
            copy_intervals = src_intervals
        else:
            copy_intervals = utilities.interval_difference(
                src_intervals, dest_intervals)

        copy_maps.append(CopyMap(source, dest, copy_intervals))

    # display the copy table
    rows = []
    copy_required = False
    for item in copy_maps:
        if item.start is None:
            start = "\u2014"
            end = "\u2014"
        else:
            start = utilities.timestamp_to_human(item.start)
            end = utilities.timestamp_to_human(item.end)
            copy_required = True
        rows.append([item.source_path, item.dest_path, start, end])
    click.echo(
        tabulate(rows,
                 headers=["Source", "Destination", "From", "To"],
                 tablefmt="fancy_grid"))
    if not copy_required:
        click.echo("No data needs to be copied")
        return

    if not confirmed and not click.confirm("Start data copy?"):
        click.echo("cancelled")
        return

    dest_db.commit()
    # execute the copy
    for item in copy_maps:
        await copy(item, src_datastore, dest_datastore, src_db, dest_db)
Beispiel #9
0
    async def _run():
        nonlocal stream_path
        # Open the file and make sure it is the right type
        try:
            hdf_root = h5py.File(file, 'r')
            hdf_timestamp = hdf_root['timestamp']
            hdf_data = hdf_root['data']
            start = hdf_timestamp[0, 0]
            end = hdf_timestamp[-1, 0]
            # make sure the length of both datasets are  the same
            if len(hdf_data) != len(hdf_timestamp):
                raise click.ClickException(
                    "Length of [data] and [timestamp] datasets must match")
            # if a stream is not specified see if one is in the data file
            if stream_path is None:
                try:
                    stream_path = hdf_root.attrs['path']
                except KeyError:
                    raise click.ClickException(
                        "Specify a target stream with --stream")
        except OSError:
            raise click.ClickException("Data file [%s] must be hdf5 format" %
                                       file)
        except KeyError:
            raise click.ClickException(
                "Data file must contain [data] and [timestamp] datasets")

        # get the stream object from the API
        try:
            stream_obj = await config.node.data_stream_get(stream_path)
            print("Destination stream: %s" % stream_path)

            stream_info = await config.node.data_stream_info(stream_path)
            # make sure the datatypes match
            dtype = compute_dtype(stream_obj.layout)
            if dtype[1].base != hdf_data.dtype:
                raise click.ClickException(
                    "Incompatible datatypes, stream is [%s] and data file is [%s]"
                    % ((dtype[1].base, hdf_data.dtype)))
            # make sure the number of elements match
            if len(stream_obj.elements) != hdf_data.shape[1]:
                raise click.ClickException(
                    "DataStream has [%d] elements but data file has [%d] elements"
                    % (len(stream_obj.elements), hdf_data.shape[1]))
            # check if there is existing data in this time period
            if stream_info.rows > 0 and (start < stream_info.end
                                         and end >= stream_info.start):
                # confirm overwrite
                if not click.confirm(
                        "This will remove existing data between %s- %s" %
                    (timestamp_to_human(start), timestamp_to_human(end))):
                    click.echo("Cancelled")
                    return
                await config.node.data_delete(stream_obj, start, end + 1)
        except errors.ApiError as e:
            if '404' not in str(e):
                raise click.ClickException(str(e))
            # this stream doesn't exist, create it from the hdf attributes
            stream_obj = await _create_stream(stream_path, hdf_root,
                                              config.node)

        pipe = await config.node.data_write(stream_obj)

        # progress bar for writing to a file
        bar_ctx = click.progressbar(length=len(hdf_data),
                                    label='ingesting data')
        bar = bar_ctx.__enter__()
        for idx in range(0, len(hdf_data), BLOCK_SIZE):
            ts = hdf_timestamp[idx:idx + BLOCK_SIZE]
            data = hdf_data[idx:idx + BLOCK_SIZE]
            sdata = np.empty(len(ts), dtype=compute_dtype(stream_obj.layout))
            sdata['timestamp'][:, None] = ts
            sdata['data'] = data
            await pipe.write(sdata)
            bar.update(len(data))
        await pipe.close()
        bar_ctx.__exit__(None, None, None)
Beispiel #10
0
async def _run(start, end, destination, primary, secondaries, node):
    # --- RETRIEVE SOURCE STREAMS ---
    # prints out error messages if the source streams do not exist
    try:
        primary_stream: joule.DataStream = await node.data_stream_get(
            primary.split(':')[0])
    except joule.errors.ApiError as e:
        raise click.ClickException(f"Primary stream {primary}: {str(e)}")
    secondary_streams = []
    for s in secondaries:
        try:
            s_stream = await node.data_stream_get(s.split(':')[0])
            secondary_streams.append(s_stream)
        except joule.errors.ApiError as e:
            raise click.ClickException(f"Secondary stream {s}: {str(e)}")

    # --- CHECK FOR DESTINATION STREAM ---
    destination_width = len(primary_stream.elements)
    for s in secondary_streams:
        destination_width += len(s.elements)
    dest_exists = True
    dest_stream = None  # this is set below, or created later
    try:
        dest_stream = await node.data_stream_get(destination)
        if len(dest_stream.elements) != destination_width:
            raise click.ClickException(
                f"Destination must have {destination_width} elements")
        if not dest_stream.datatype.startswith('float'):
            raise click.ClickException(f"Destination must be a float datatype")

    except joule.errors.ApiError as e:
        dest_exists = False

    click.echo(f"Primary Stream: \n\t{primary}")
    click.echo(f"Secondary Streams: \n\t{', '.join(s for s in secondaries)}")
    if dest_exists:
        click.echo(f"Destination Stream: \n\t{destination}")
    else:
        click.echo(f"Creating Destination: \n\t{destination}")

    if not click.confirm("Proceed?"):
        click.echo("Cancelled")
        return

    if not dest_exists:
        # create the destination
        dest_path = destination.split('/')[:-1]
        dest_name = destination.split('/')[-1]
        elements = []
        # check for a primary prefix
        prefix = ""
        if ':' in primary:
            prefix = primary.split(':')[-1]
        else:
            prefix = primary_stream.name
        for e in primary_stream.elements:
            e.name = prefix + ' ' + e.name
            elements.append(e)
        for i in range(len(secondaries)):
            prefix = ""
            if ':' in secondaries[i]:
                prefix = secondaries[i].split(':')[-1]
            else:
                prefix = secondary_streams[i].name
            for e in secondary_streams[i].elements:
                e.name = prefix + ' ' + e.name
                elements.append(e)
        dest_stream = joule.api.DataStream(name=dest_name,
                                           elements=elements,
                                           datatype='float32')
        try:
            dest_stream = await node.data_stream_create(
                dest_stream, '/'.join(dest_path))
        except joule.errors.ApiError as e:
            raise click.ClickException(f"Creating destination: {str(e)}")

    # --- READY TO GO ---
    common_intervals = await node.data_intervals(primary_stream,
                                                 start=start,
                                                 end=end)
    for s in secondary_streams:
        s_intervals = await node.data_intervals(s, start=start, end=end)
        common_intervals = interval_intersection(common_intervals, s_intervals)
    copied_intervals = await node.data_intervals(dest_stream,
                                                 start=start,
                                                 end=end)
    # do not copy intervals that we already have
    pending_intervals = interval_difference(common_intervals, copied_intervals)
    # only copy intervals with at least 1 second of data- there are issues with 1 sample offsets
    # that cause merge to think there is missing data when there is not any- this is probably due to
    # the backend not having the data boundaries stored exactly correctly
    pending_intervals = [i for i in pending_intervals if (i[1] - i[0]) > 1e6]
    if len(pending_intervals) == 0:
        click.echo("Destination already has all the data, nothing to do")
    else:
        start_time = joule.utilities.timestamp_to_human(
            pending_intervals[0][0])
        end_time = joule.utilities.timestamp_to_human(pending_intervals[-1][0])
        click.echo(
            f"Merging from {start_time} to {end_time} ({len(pending_intervals)} intervals)"
        )
    for interval in pending_intervals:
        start = interval[0]
        end = interval[1]
        click.echo("Processing [%s] -> [%s]" %
                   (timestamp_to_human(start), timestamp_to_human(end)))
        merge_filter = MergeFilter()
        inputs = {'primary': await node.data_read(primary_stream, start, end)}
        for i in range(len(secondary_streams)):
            inputs[f"secondary_{i}"] = await node.data_read(
                secondary_streams[i], start, end)
        outputs = {
            'destination': await node.data_write(dest_stream, start, end)
        }
        args = argparse.Namespace(primary="primary")
        await merge_filter.run(args, inputs, outputs)
        await outputs['destination'].close()