Ejemplo n.º 1
0
def hapitime2datetime(Time, **kwargs):
    """Convert HAPI timestamps to Python datetimes.

    A HAPI-compliant server represents time as an ISO 8601 string
    (with several constraints - see the `HAPI specification
    <https://github.com/hapi-server/data-specification/blob/master/hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time>`)
    hapi() reads these into a NumPy array of Python byte literals.

    This function converts the byte literals to Python datetime objects.

    Typical usage:
        data = hapi(...) # Get data
        DateTimes = hapitime2datetime(data['Time']) # Convert

    Parameter
    ----------
    Time:
        - A numpy array of HAPI timestamp byte literals
        - A numpy array of HAPI timestamp strings
        - A list of HAPI timestamp byte literals
        - A list of HAPI timestamp strings
        - A HAPI timestamp byte literal
        - A HAPI timestamp strings

    Returns
    ----------
    A NumPy array Python datetime objects with length = len(Time)

    Examples
    ----------
    All of the following return
      array([datetime.datetime(1970, 1, 1, 0, 0)], dtype=object)

    from hapiclient.hapi import hapitime2datetime
    import numpy as np

    hapitime2datetime(np.array([b'1970-01-01T00:00:00.000Z']))
    hapitime2datetime(np.array(['1970-01-01T00:00:00.000Z']))

    hapitime2datetime([b'1970-01-01T00:00:00.000Z'])
    hapitime2datetime(['1970-01-01T00:00:00.000Z'])

    hapitime2datetime([b'1970-01-01T00:00:00.000Z'])
    hapitime2datetime('1970-01-01T00:00:00.000Z')

    """

    from datetime import datetime

    try:
        # Python 2
        import pytz
        tzinfo = pytz.UTC
    except:
        tzinfo = datetime.timezone.utc

    opts = {'logging': False}

    opts = setopts(opts, kwargs)

    if type(Time) == list:
        Time = np.asarray(Time)
    if type(Time) == str or type(Time) == bytes:
        Time = np.asarray([Time])

    if type(Time) != np.ndarray:
        error('Problem with time data.' + '\n')
        return

    if Time.size == 0:
        error('Time array is empty.' + '\n')
        return

    reshape = False
    if Time.shape[0] != Time.size:
        reshape = True
        shape = Time.shape
        Time = Time.flatten()

    if type(Time[0]) == np.bytes_:
        try:
            Time = Time.astype('U')
        except:
            error('Problem with time data. First value: ' + str(Time[0]) +
                  '\n')
            return

    tic = time.time()

    try:
        # Will fail if no pandas, if YYYY-DOY format and other valid ISO 8601
        # dates such as 2001-01-01T00:00:03.Z
        # When infer_datetime_format is used, TimeStamp object returned.
        # When format=... is used, datetime object is used.
        Time = pandas.to_datetime(Time,
                                  infer_datetime_format=True).to_pydatetime()
        if reshape:
            Time = np.reshape(Time, shape)
        toc = time.time() - tic
        log("Pandas processing time = %.4fs, Input = %s\n" % (toc, Time[0]),
            opts)
        return Time
    except:
        pass

    # Convert from Python byte literals to unicode strings
    # https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.astype.html
    # https://www.b-list.org/weblog/2017/sep/05/how-python-does-unicode/
    # Note the new Time variable requires 4x more memory.
    Time = Time.astype('U')
    # Could save memory at cost of speed by decoding at each iteration below, e.g.
    # Time[i] -> Time[i].decode('utf-8')

    d = 0
    # Catch case where no trailing Z
    # Technically HAPI ISO 8601 must have trailing Z:
    # https://github.com/hapi-server/data-specification/blob/master/hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time
    if not re.match(r".*Z$", Time[0]):
        d = 1

    pythonDateTime = np.empty(len(Time), dtype=object)

    # Parse date part
    # If h=True then hour given.
    # If hm=True, then hour and minute given.
    # If hms=True, them hour, minute, and second given.
    (h, hm, hms) = (False, False, False)

    if len(Time[0]) == 4 or (len(Time[0]) == 5 and Time[0][-1] == "Z"):
        fmt = '%Y'
        to = 5
    elif re.match(r"[0-9]{4}-[0-9]{3}", Time[0]):
        # YYYY-DOY format
        fmt = "%Y-%j"
        to = 9
        if len(Time[0]) >= 12 - d:
            h = True
        if len(Time[0]) >= 15 - d:
            hm = True
        if len(Time[0]) >= 18 - d:
            hms = True
    elif re.match(r"[0-9]{4}-[0-9]{2}", Time[0]):
        # YYYY-MM-DD format
        fmt = "%Y-%m"
        to = 8
        if len(Time[0]) > 8:
            fmt = fmt + "-%d"
            to = 11
        if len(Time[0]) >= 14 - d:
            h = True
        if len(Time[0]) >= 17 - d:
            hm = True
        if len(Time[0]) >= 20 - d:
            hms = True
    else:
        # TODO: Also check for invalid time string lengths.
        # Should use JSON schema regular expressions for allowed versions of ISO 8601.
        error('First time value %s is not a valid HAPI Time' % Time[0])

    fmto = fmt
    if h:
        fmt = fmt + "T%H"
    if hm:
        fmt = fmt + ":%M"
    if hms:
        fmt = fmt + ":%S"

    if re.match(r".*\.[0-9].*$", Time[0]):
        fmt = fmt + ".%f"
    if re.match(r".*\.$", Time[0]) or re.match(r".*\.Z$", Time[0]):
        fmt = fmt + "."

    if re.match(r".*Z$", Time[0]):
        fmt = fmt + "Z"

    # TODO: Why not use pandas.to_datetime here with fmt?
    try:
        for i in range(0, len(Time)):
            pythonDateTime[i] = datetime.strptime(Time[i],
                                                  fmt).replace(tzinfo=tzinfo)
    except:
        error('Could not parse time value ' + Time[i] + ' using ' + fmt)

    toc = time.time() - tic
    log(
        "Manual processing time = %.4fs, Input = %s, fmto = %s, fmt = %s\n" %
        (toc, Time[0], fmto, fmt), opts)

    if reshape:
        pythonDateTime = np.reshape(pythonDateTime, shape)

    return pythonDateTime
Ejemplo n.º 2
0
def autoplot(server, dataset, parameters, start, stop, **kwargs):
    """Plot data from a HAPI server using Autoplot.
    
    If not found, autoplot.jar is downloaded an launched. If found, 
    autoplot.jar is updated if server version is newer than cached version.
    
    Example
    -------
    >>> from hapiclient import autoplot
    >>> server = 'http://hapi-server.org/servers/TestData2.0/hapi'
    >>> autoplot(server, 'dataset1', 'scalar,vector', '1970-01-01', '1970-01-02')
    
    Autoplot application launches or its canvas is updated.
    
    The options are the same as that for `hapiplot` with the addition of
    the kwargs
    
    stack : bool [False] Create a stack plot of parameters.

    port : int [8079]
        The port number to use to connect to Autoplot.

    version : string ['devel']
        The version of Autoplot to use. Can be a version string, e.g.,
        'v2018a_11', 'devel', 'latest', or 'nightly'. See 
        <http://autoplot.org/developer#Development_Versions> for a
        description of the difference between versions.

    """

    import os
    import re
    import platform
    import subprocess

    from hapiclient.util import setopts, log, urlopen, urlretrieve, urlquote
    from hapiclient.hapi import cachedir

    opts = {
        'logging': False,
        'cache': True,
        'cachedir': cachedir(),
        'usecache': False,
        'newwindow': False,
        'version': 'devel',
        'port': 8079
    }

    # Override defaults
    opts = setopts(opts, kwargs)

    autoplotserver = "http://localhost:" + str(opts['port']) + "/"

    url = server + "?id=" + dataset + "&parameters=" + parameters
    url = url + "&timerange=" + start + "/" + stop

    serverrunning = False
    try:
        # See if server needs to be started.
        if opts['logging']:
            log('Trying test. Requesting ' + autoplotserver, opts)
        f = urlopen(autoplotserver)
        res = f.read().decode('utf-8')
        if res.startswith('OK'):
            log('Server running.', opts)
            serverrunning = True
        else:
            log('Server responding but with wrong response to test.', opts)
        f.close()
    except:
        log('Server not running. Will start server.', opts)

    print(url)
    if serverrunning:
        # Send request to update GUI.
        try:
            # This won't detect if the version requested matches
            # the version running.
            rurl = autoplotserver + "?uri=" + urlquote("vap+hapi:" + url)
            if opts['logging']: print("autoplot(): Requesting " + rurl)
            log('Autoplot GUI should be updating.', opts)
            f = urlopen(rurl)
            res = f.read().decode('utf-8')
            if res.startswith('OK'):
                log('Request successful. Autoplot GUI updated.', opts)
                f.close()
                return
            else:
                f.close()
                log('Request unsuccessful.', opts)
                serverrunning = False
        except Exception as e:
            print(e)

    # Request was sent, so return.
    if serverrunning == True: return

    if opts['version'] == 'nightly':
        jarurl = 'https://ci-pw.physics.uiowa.edu/job/autoplot-release/lastSuccessfulBuild/artifact/autoplot/Autoplot/dist/autoplot.jar'
    elif opts['version'] == 'devel':
        jarurl = 'http://autoplot.org/jnlp/devel/autoplot.jar'
    elif opts['version'].startswith('v'):
        jarurl = 'http://autoplot.org/jnlp/' + opts['version'] + '/autoplot.jar'
    else:
        opts['version'] = 'latest'
        jarurl = 'http://autoplot.org/jnlp/latest/autoplot.jar'

    try:
        result = subprocess.check_output('java -version',
                                         shell=True,
                                         stderr=subprocess.STDOUT)
        version = re.sub(r'.*"(.*)".*', r'\1', result.decode().split('\n')[0])
        log("Java version: " + version, opts)
    except:
        # TODO: Automatically download and extract from https://jdk.java.net/14/?
        log(
            "Java is required. See https://www.java.com/en/download/ or https://jdk.java.net/14/",
            opts)
        return

    jydir = os.path.dirname(os.path.realpath(__file__))
    jarpath = os.path.join(opts['cachedir'],
                           'jar/autoplot-' + opts['version'] + '.jar')
    jaricon = os.path.join(jydir, 'autoplot.png')

    # Download jar file if needed.
    log('Checking if autoplot.jar needs to be downloaded or updated.', opts)
    urlretrieve(jarurl, jarpath, check_last_modified=True, **opts)
    #download(jarpath, jarurl, **opts)

    com = "java"

    if 'darwin' in platform.platform().lower():
        com = com + " -Xdock:icon=" + jaricon
        com = com + ' -Xdock:name="Autoplot"'
    com = com + " -DPORT=" + str(opts['port'])
    com = com + " -DHAPI_DATA=" + opts['cachedir']
    com = com + " -DhapiServerCache=true"
    com = com + " -jar " + jarpath
    com = com + " --noAskParams"
    com = com + " '" + os.path.join(jydir, 'server.jy?uri=')
    com = com + urlquote("vap+hapi:" + url) + "'"
    com = com + " &"
    if opts['logging']: log("Executing " + com, opts)
    os.system(com)
Ejemplo n.º 3
0
def hapi(*args, **kwargs):
    """Request data from a HAPI server.

    For additional documentation and demonstration, see
    https://github.com/hapi-server/client-python-notebooks/blob/master/hapi_demo.ipynb

    Version: 0.1.5b3

    Parameters
    ----------
    server : str
        A string with the URL to a HAPI compliant server. (A HAPI URL
        always ends with "/hapi").
    dataset : str
        A string specifying a dataset from a `server`
    parameters: str
        A Comma-separated list of parameters in `dataset`
    start: str
        The start time of the requested data
    stop: str
        The end time of the requested data; end times are exclusive - the
        last data record returned by a HAPI server should have a timestamp
        before `start`.
    options : dict
        
            `logging` (False) - Log to console

            `cache` (True) - Save responses and processed responses in cachedir

            `cachedir` (./hapi-data)

            `usecache` (True) - Use files in `cachedir` if found

            `serverlist` (https://github.com/hapi-server/servers/raw/master/all.txt)

    Returns
    -------
    result : various
        `result` depend on the input parameters.

        servers = hapi() returns a list of available HAPI server URLs from
        https://github.com/hapi-server/data-specification/blob/master/all.txt

        dataset = hapi(server) returns a dict of datasets available from a
        URL given by the string `server`.  The dictionary structure follows the
        HAPI JSON structure.

        parameters = hapi(server, dataset) returns a dictionary of parameters
        in the string `dataset`. The dictionary structure follows the HAPI JSON
        structure.

        metadata = hapi(server, dataset, parameters) returns metadata
        associated each parameter in the comma-separated string `parameters`. The
        dictionary structure follows the HAPI JSON structure.

        data = hapi(server, dataset, parameters, start, stop) returns a
        dictionary with elements corresponding to `parameters`, e.g., if
        `parameters` = 'scalar,vector' and the number of records in the time
        range `start` <= t < `stop` returned is N, then

          data['scalar'] is a NumPy array of shape (N)
          data['vector'] is a NumPy array of shape (N,3)
          data['Time'] is a NumPy array of byte literals with shape (N).
          
          Byte literal times can be converted to Python datetimes using 
          
          dtarray = hapitime2datetime(data['Time'])
        
        data, meta = hapi(server, dataset, parameters, start, stop) returns
        the metadata for parameters in `meta`.

    References
    ----------
        * `HAPI Server Definition <https://github.com/hapi-server/data-specification>`_

    Examples
    ----------
       See https://github.com/hapi-server/client-python-notebooks
    """

    nin = len(args)

    if nin > 0:
        SERVER = args[0]
    if nin > 1:
        DATASET = args[1]
    if nin > 2:
        PARAMETERS = args[2]
    if nin > 3:
        START = args[3]
    if nin > 4:
        STOP = args[4]

    # Override defaults
    opts = setopts(hapiopts(), kwargs)

    from hapiclient import __version__
    log('Running hapi.py version %s' % __version__, opts)

    if nin == 0:  # hapi()
        log('Reading %s' % opts['server_list'], opts)
        # decode('utf8') in following needed to make Python 2 and 3 types match.
        data = urlopen(opts['server_list']).read().decode('utf8').split('\n')
        data = [x for x in data if x]  # Remove empty items (if blank lines)
        # Display server URLs to console.
        log('List of HAPI servers in %s:\n' % opts['server_list'], opts)
        for url in data:
            log("   %s" % url, opts)
        return data

    if nin == 1:  # hapi(SERVER)
        # TODO: Cache
        url = SERVER + '/catalog'
        log('Reading %s' % url, opts)
        res = urlopen(url)
        meta = jsonparse(res, url)
        return meta

    if nin == 2:  # hapi(SERVER, DATASET)
        # TODO: Cache
        url = SERVER + '/info?id=' + DATASET
        log('Reading %s' % url, opts)
        res = urlopen(url)
        meta = jsonparse(res, url)
        return meta

    if nin == 4:
        error('A stop time is required if a start time is given.')

    if nin == 3 or nin == 5:
        # hapi(SERVER, DATASET, PARAMETERS) or
        # hapi(SERVER, DATASET, PARAMETERS, START, STOP)

        if re.search(r', ', PARAMETERS):
            warning(
                "Removing spaces after commas in given parameter list of '" +
                PARAMETERS + "'")
            PARAMETERS = re.sub(r',\s+', ',', PARAMETERS)

        # urld = url subdirectory of cachedir to store files from SERVER
        urld = cachedir(opts["cachedir"], SERVER)

        if opts["cachedir"]: log('file directory = %s' % urld, opts)

        urljson = SERVER + '/info?id=' + DATASET

        # Output from urljson will be saved in a .json file. Parsed json
        # will be stored in a .pkl file. Metadata for all parameters is
        # requested and response is subsetted so only metadata for PARAMETERS
        # is returned.
        fname_root = request2path(SERVER, DATASET, '', '', '',
                                  opts['cachedir'])
        fnamejson = fname_root + '.json'
        fnamepkl = fname_root + '.pkl'

        if nin == 5:  # Data requested
            # URL to get CSV (will be used if binary response is not available)
            urlcsv = SERVER + '/data?id=' + DATASET + '&parameters=' + \
                     PARAMETERS + '&time.min=' + START + '&time.max=' + STOP
            # URL for binary request
            urlbin = urlcsv + '&format=binary'

            # Raw CSV and HAPI Binary (no header) will be stored in .csv and
            # .bin files. Parsed response of either CSV or HAPI Binary will
            # be stored in a .npy file.
            # fnamepklx will contain additional metadata about the request
            # including d/l time, parsing time, and the location of files.
            fname_root = request2path(SERVER, DATASET, PARAMETERS, START, STOP,
                                      opts['cachedir'])
            fnamecsv = fname_root + '.csv'
            fnamebin = fname_root + '.bin'
            fnamenpy = fname_root + '.npy'
            fnamepklx = fname_root + ".pkl"

        metaFromCache = False
        if opts["usecache"]:
            if nin == 3 and os.path.isfile(fnamepkl):
                # Read cached metadata from .pkl file.
                # This returns subsetted metadata with no additional "x_"
                # information (which is stored in fnamepklx).
                log('Reading %s' % fnamepkl.replace(urld + '/', ''), opts)
                f = open(fnamepkl, 'rb')
                meta = pickle.load(f)
                f.close()
                metaFromCache = True
                # Remove parameters not requested.
                meta = subset(meta, PARAMETERS)
                return meta
            if os.path.isfile(fnamepklx):
                # Read subsetted meta file with x_ information
                log('Reading %s' % fnamepklx.replace(urld + '/', ''), opts)
                f = open(fnamepklx, 'rb')
                meta = pickle.load(f)
                metaFromCache = True
                f.close()

        if not metaFromCache:
            # No cached metadata loaded so request it from server.
            log('Reading %s' % urljson.replace(urld + '/', ''), opts)
            res = urlopen(urljson)
            meta = jsonparse(res, urljson)

        # Add information to metdata so we can figure out request needed
        # to generated it. Will also be used for labeling plots by hapiplot().
        meta.update({"x_server": SERVER})
        meta.update({"x_dataset": DATASET})

        if opts["cache"]:
            if not os.path.exists(urld): os.makedirs(urld)

        if opts["cache"] and not metaFromCache:
            # Cache metadata for all parameters if it was not already loaded
            # from cache. Note that fnamepklx is written after data downloaded
            # and parsed.
            log('Writing %s ' % fnamejson.replace(urld + '/', ''), opts)
            f = open(fnamejson, 'w')
            json.dump(meta, f, indent=4)
            f.close()

            log('Writing %s ' % fnamepkl.replace(urld + '/', ''), opts)
            f = open(fnamepkl, 'wb')
            # protocol=2 used for Python 2.7 compatability.
            pickle.dump(meta, f, protocol=2)
            f.close()

        # Remove unrequested parameters if they have not have already been
        # removed (b/c loaded from cache).
        if not metaFromCache:
            meta = subset(meta, PARAMETERS)

        if nin == 3:
            return meta

        if opts["usecache"] and os.path.isfile(fnamenpy):
            # Read cached data file.
            log('Reading %s ' % fnamenpy.replace(urld + '/', ''), opts)
            f = open(fnamenpy, 'rb')
            data = np.load(f)
            f.close()
            # There is a possibility that the fnamenpy file existed but
            # fnamepklx was not found (b/c removed). In this case, the meta
            # returned will not have all of the "x_" information inserted below.
            # Code that uses this information needs to account for this.
            return data, meta

        cformats = ['csv', 'binary']  # client formats
        if not opts['format'] in cformats:
            # Check if requested format is implemented by this client.
            error('This client does not handle transport '
                  'format "%s".  Available options: %s' %
                  (opts['format'], ', '.join(cformats)))

        # See if server supports binary
        if opts['format'] != 'csv':
            log('Reading %s' % (SERVER + '/capabilities'), opts)
            res = urlopen(SERVER + '/capabilities')
            caps = jsonparse(res, SERVER + '/capabilities')
            sformats = caps["outputFormats"]  # Server formats
            if 'format' in kwargs and not kwargs['format'] in sformats:
                warning(
                    "hapi", 'Requested transport format "%s" not avaiable '
                    'from %s. Will use "csv". Available options: %s' %
                    (opts['format'], SERVER, ', '.join(sformats)))
                opts['format'] = 'csv'
            if not 'binary' in sformats:
                opts['format'] = 'csv'

        ##################################################################
        # Compute data type variable dt used to read HAPI response into
        # a data structure.
        pnames, psizes, dt = [], [], []
        # Each element of cols is an array with start/end column number of
        # parameter.

        cols = np.zeros([len(meta["parameters"]), 2], dtype=np.int32)
        ss = 0  # running sum of prod(size)

        # missing_length = True will be set if HAPI String or ISOTime
        # parameter has no length attribute in metadata (length attribute is
        # required for both in binary but only for primary time column in CSV).
        # When missing_length=True the CSV read gets more complicated.
        missing_length = False

        # Extract sizes and types of parameters.
        for i in range(0, len(meta["parameters"])):
            ptype = str(meta["parameters"][i]["type"])
            pnames.append(str(meta["parameters"][i]["name"]))
            if 'size' in meta["parameters"][i]:
                psizes.append(meta["parameters"][i]['size'])
            else:
                psizes.append(1)

            # For size = [N] case, readers want
            # dtype = ('name', type, N)
            # not
            # dtype = ('name', type, [N])
            if type(psizes[i]) is list and len(psizes[i]) == 1:
                psizes[i] = psizes[i][0]

            if type(psizes[i]) is list and len(psizes[i]) > 1:
                #psizes[i] = list(reversed(psizes[i]))
                psizes[i] = list(psizes[i])

            # First column of ith parameter.
            cols[i][0] = ss
            # Last column of ith parameter.
            cols[i][1] = ss + np.prod(psizes[i]) - 1
            # Running sum of columns.
            ss = cols[i][1] + 1

            # HAPI numerical formats are 64-bit LE floating point and 32-bit LE
            # signed integers.
            if ptype == 'double':
                dtype = (pnames[i], '<d', psizes[i])
            if ptype == 'integer':
                dtype = (pnames[i], np.dtype('<i4'), psizes[i])

            if opts['format'] == 'binary':
                # TODO: If 'length' not available, warn and fall back to CSV.
                # Technically, server response is invalid in this case b/c length attribute
                # required for all parameters if format=binary.
                if ptype == 'string' or ptype == 'isotime':
                    dtype = (pnames[i],
                             'S' + str(meta["parameters"][i]["length"]),
                             psizes[i])
            else:
                # When format=csv, length attribute may not be given (but must be given for
                # first parameter according to the HAPI spec).
                if ptype == 'string' or ptype == 'isotime':
                    if 'length' in meta["parameters"][i]:
                        # length is specified for parameter in metadata. Use it.
                        if ptype == 'string' or 'isotime':
                            dtype = (pnames[i], 'S' +
                                     str(meta["parameters"][i]["length"]),
                                     psizes[i])
                    else:
                        # A string or isotime parameter did not have a length.
                        # Will need to use slower CSV read method.
                        missing_length = True
                        if ptype == 'string' or ptype == 'isotime':
                            dtype = (pnames[i], object, psizes[i])

            # For testing reader. Force use of slow read method.
            if opts['format'] == 'csv':
                if opts['method'] == 'numpynolength' or opts[
                        'method'] == 'pandasnolength':
                    missing_length = True
                    if ptype == 'string' or ptype == 'isotime':
                        dtype = (pnames[i], object, psizes[i])

            # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version
            if dtype[2] == 1:
                dtype = dtype[0:2]

            dt.append(dtype)
        ##################################################################

        # length attribute required for all parameters when serving binary but
        # is only required for time parameter when serving CSV. This catches
        # case where server provides binary but is missing a length attribute
        # in one or more string parameters that were requested. Note that
        # this is will never be true. Need to update code above.
        # if opts['format'] == 'binary' and missing_length:
        #    warnings.warn('Requesting CSV instead of binary because of problem with server metadata.')
        #    opts['format'] == 'csv'

        # Read the data. toc0 is time to download (or build buffer);
        # toc is time to parse (includes download time if buffered IO is used.)
        if opts['format'] == 'binary':
            # HAPI Binary
            if opts["cache"]:
                log(
                    'Writing %s to %s' %
                    (urlbin, fnamebin.replace(urld + '/', '')), opts)
                tic0 = time.time()
                urlretrieve(urlbin, fnamebin)
                toc0 = time.time() - tic0
                log('Reading %s' % fnamebin.replace(urld + '/', ''), opts)
                tic = time.time()
                data = np.fromfile(fnamebin, dtype=dt)
                toc = time.time() - tic
            else:
                from io import BytesIO
                log('Creating buffer: %s' % urlbin, opts)
                tic0 = time.time()
                buff = BytesIO(urlopen(urlbin).read())
                toc0 = time.time() - tic0
                log('Parsing buffer.', opts)
                tic = time.time()
                data = np.frombuffer(buff.read(), dtype=dt)
                toc = time.time() - tic
        else:
            # HAPI CSV
            if opts["cache"]:
                log('Saving %s' % urlcsv.replace(urld + '/', ''), opts)
                tic0 = time.time()
                urlretrieve(urlcsv, fnamecsv)
                toc0 = time.time() - tic0
                log('Parsing %s' % fnamecsv.replace(urld + '/', ''), opts)
            else:
                from io import StringIO
                log('Creating buffer: %s' % urlcsv.replace(urld + '/', ''),
                    opts)
                tic0 = time.time()
                fnamecsv = StringIO(urlopen(urlcsv).read().decode())
                toc0 = time.time() - tic0
                log('Parsing buffer.', opts)

            if not missing_length:
                # All string and isotime parameters have a length in metadata.
                tic = time.time()
                if opts['method'] == 'numpy':
                    data = np.genfromtxt(fnamecsv, dtype=dt, delimiter=',')
                    toc = time.time() - tic
                if opts['method'] == 'pandas':
                    # Read file into Pandas DataFrame
                    df = pandas.read_csv(fnamecsv, sep=',', header=None)

                    # Allocate output N-D array (It is not possible to pass dtype=dt
                    # as computed to pandas.read_csv; pandas dtype is different
                    # from numpy's dtype.)
                    data = np.ndarray(shape=(len(df)), dtype=dt)
                    print(df)
                    # Insert data from dataframe 'df' columns into N-D array 'data'
                    for i in range(0, len(pnames)):
                        shape = np.append(len(data), psizes[i])
                        # In numpy 1.8.2 and Python 2.7, this throws an error
                        # for no apparent reason. Works as expected in numpy 1.10.4
                        print(cols)
                        data[pnames[i]] = np.squeeze(
                            np.reshape(
                                df.values[:,
                                          np.arange(cols[i][0], cols[i][1] +
                                                    1)], shape))

                    toc = time.time() - tic
            else:
                # At least one requested string or isotime parameter does not
                # have a length in metadata. More work to do to read.
                tic = time.time()
                if opts['method'] == 'numpy' or opts[
                        'method'] == 'numpynolength':
                    # If requested method was numpy, use numpynolength method.

                    # With dtype='None', the data type is determined automatically
                    table = np.genfromtxt(fnamecsv,
                                          dtype=None,
                                          delimiter=',',
                                          encoding='utf-8')
                    # table is a 1-D array. Each element is a row in the file.
                    # - If the data types are not the same for each column,
                    # the elements are tuples with length equal to the number
                    # of columns.
                    # - If the data types are the same for each column, which
                    # will happen if only Time is requested or Time and
                    # a string or isotime parameter is requested, then table
                    # has rows that are 1-D numpy arrays.

                    # Contents of 'table' will be placed into N-D array 'data'.
                    data = np.ndarray(shape=(len(table)), dtype=dt)

                    # Insert data from 'table' into N-D array 'data'
                    if table.dtype.names is None:
                        if len(pnames) == 1:
                            # Only time parameter requested.
                            data[pnames[0]] = table[:]
                        else:
                            # All columns in 'table' have the same data type
                            # so table is a 2-D numpy matrix
                            for i in range(0, len(pnames)):
                                shape = np.append(len(data), psizes[i])
                                data[pnames[i]] = np.squeeze(
                                    np.reshape(
                                        table[:,
                                              np.
                                              arange(cols[i][0], cols[i][1] +
                                                     1)], shape))
                    else:
                        # Table is not a 2-D numpy matrix.
                        # Extract each column (don't know how to do this with slicing
                        # notation, e.g., data['varname'] = table[:][1:3]). Instead,
                        # loop over each parameter (pn) and aggregate columns.
                        # Then insert aggregated columns into N-D array 'data'.
                        for pn in range(0, len(cols)):
                            shape = np.append(len(data), psizes[pn])
                            for c in range(cols[pn][0], cols[pn][1] + 1):
                                if c == cols[pn][0]:  # New parameter
                                    tmp = table[table.dtype.names[c]]
                                else:  # Aggregate
                                    tmp = np.vstack(
                                        (tmp, table[table.dtype.names[c]]))
                            tmp = np.squeeze(
                                np.reshape(np.transpose(tmp), shape))

                            data[pnames[pn]] = tmp

                if opts['method'] == 'pandas' or opts[
                        'method'] == 'pandasnolength':
                    # If requested method was pandas, use pandasnolength method.

                    # Read file into Pandas DataFrame
                    df = pandas.read_csv(fnamecsv, sep=',', header=None)

                    # Allocate output N-D array (It is not possible to pass dtype=dt
                    # as computed to pandas.read_csv, so need to create new ND array.)
                    data = np.ndarray(shape=(len(df)), dtype=dt)

                    # Insert data from dataframe into N-D array
                    for i in range(0, len(pnames)):
                        shape = np.append(len(data), psizes[i])
                        # In numpy 1.8.2 and Python 2.7, this throws an error for no apparent reason.
                        # Works as expected in numpy 1.10.4
                        data[pnames[i]] = np.squeeze(
                            np.reshape(
                                df.values[:,
                                          np.arange(cols[i][0], cols[i][1] +
                                                    1)], shape))

                # Any of the string parameters that do not have an associated
                # length in the metadata will have dtype='O' (object).
                # These parameters must be converted to have a dtype='SN', where
                # N is the maximum string length. N is determined automatically
                # when using astype('<S') (astype uses largest N needed).
                dt2 = []  # Will have dtypes with strings lengths calculated.
                for i in range(0, len(pnames)):
                    if data[pnames[i]].dtype == 'O':
                        dtype = (pnames[i],
                                 str(data[pnames[i]].astype('<S').dtype),
                                 psizes[i])
                    else:
                        dtype = dt[i]

                    # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version
                    if len(dtype) > 2 and dtype[2] == 1:
                        dtype = dtype[0:2]
                    dt2.append(dtype)

                # Create new N-D array that won't have any parameters with
                # type = 'O'.
                data2 = np.ndarray(data.shape, dt2)

                for i in range(0, len(pnames)):
                    if data[pnames[i]].dtype == 'O':
                        data2[pnames[i]] = data[pnames[i]].astype(dt2[i][1])
                    else:
                        data2[pnames[i]] = data[pnames[i]]
                        # Save memory by not copying (does this help?)
                        #data2[pnames[i]] = np.array(data[pnames[i]],copy=False)

            toc = time.time() - tic

        # Extra metadata associated with request will be saved in
        # a pkl file with same base name as npy data file.
        meta.update({"x_server": SERVER})
        meta.update({"x_dataset": DATASET})
        meta.update({"x_parameters": PARAMETERS})
        meta.update({"x_time.min": START})
        meta.update({"x_time.max": STOP})
        meta.update({"x_requestDate": datetime.now().isoformat()[0:19]})
        meta.update({"x_cacheDir": urld})
        meta.update({"x_downloadTime": toc0})
        meta.update({"x_readTime": toc})
        meta.update({"x_metaFileParsed": fnamepkl})
        meta.update({"x_dataFileParsed": fnamenpy})
        meta.update({"x_metaFile": fnamejson})
        if opts['format'] == 'binary':
            meta.update({"x_dataFile": fnamebin})
        else:
            meta.update({"x_dataFile": fnamecsv})

        # Note that this should only technically be
        # written if cache=True. Will do this when output is
        # h = hapi(...)
        # h.data
        # h.meta
        # h.info
        # Create cache directory
        if not os.path.exists(opts["cachedir"]):
            os.makedirs(opts["cachedir"])
        if not os.path.exists(urld):
            os.makedirs(urld)
        log('Writing %s' % fnamepklx, opts)
        f = open(fnamepklx, 'wb')
        pickle.dump(meta, f, protocol=2)
        f.close()

        if opts["cache"]:
            log('Writing %s' % fnamenpy, opts)
            if missing_length:
                np.save(fnamenpy, data2)
            else:
                np.save(fnamenpy, data)

        if missing_length:
            return data2, meta
        else:
            return data, meta
Ejemplo n.º 4
0
def gallery(*args, **kwargs):
    """Create a web-browsable gallery of plots (aka "PNG Walk").

    Experimental code. Requires hapiplotserver. Use
    pip install 'git+https://github.com/hapi-server/plotserver-python'
    
    For additional documentation and demonstration, see hapi_demo.ipynb
    at <https://github.com/hapi-server/client-python-notebooks/>

    Usage
    ----------
    gallery(server, dataset)
    gallery(server, dataset, parameter)

    Examples
    ----------
    >>> from hapiclient import gallery
    >>> gallery('http://hapi-server.org/servers/TestData/hapi', 'dataset1')
    # Webpage tab opens

    >>> from hapiclient import gallery
    >>> gallery('http://hapi-server.org/servers/TestData/hapi','dataset1', 'vector')
    # Webpage tab opens

    Parameters
    ----------
    server : str
        A URL for a HAPI-compliant server. (A HAPI URL always ends with "/hapi".)
    dataset : str
        A dataset from a HAPI server. The valid datasets can
        be determined using `hapi(server)`.
    parameter : str
        A parameter in dataset. The valid parameters can be determined using
        `hapi(server, dataset)`.

    Returns
    ----------
    None (a new tab is opened in the user's default browser)

    """

    import time
    import webbrowser

    from multiprocessing import Process
    from hapiclient.hapi import cachedir
    from hapiclient.util import error, warning, setopts, prompt
    from hapiplotserver import hapiplotserver

    if len(args) != 2 and len(args) != 3:
        error('Number of arguments must be 2 or 3. See help(gallery).')

    server = args[0]
    dataset = args[1]
    if len(args) == 3:
        parameters = args[2].split(",")
    else:
        parameters = ['']

    if len(parameters) > 1:
        # Eventually, mulitple parameters will result is a stack plot.
        warning('Multiple parameters given; only first will be shown.')
    parameters = parameters[0]

    if not all(type(arg) is str for arg in args):
        error('All inputs must be a strings. See help(gallery).')

    # Default options
    opts = {
        'cache_dir': cachedir(),
        'usecache': True,
        'port': 5002,
        'format': 'png',
        'figsize': (7, 3),
        'dpi': 144,
        'transparent': True,
        'loglevel': 'default'
    }

    # Override defaults
    opts = setopts(opts, kwargs)

    if not parameters == '':
        paramopt = "&parameters=" + parameters
    else:
        paramopt = ''

    url = 'http://127.0.0.1:' + str(opts['port'])
    url = url + '/?server=' + server
    url = url + '&id=' + dataset
    url = url + paramopt
    url = url + '&format=gallery'

    try:
        process = Process(target=hapiplotserver, kwargs=opts)
        process.start()
    except Exception as e:
        print(e)
        print("Terminating server.")
        process.terminate()

    print(" * Opening ViViz in browser in 1 second.")
    time.sleep(1)
    webbrowser.open(url, new=2)
    prompt(
        "\n\033[0;34mPress a key at any time to terminate ViViz gallery server.\033[0m\n\n"
    )
    process.terminate()
    print("ViViz gallery server has terminated.")