Esempio n. 1
0
def create_descriptor(value, restriction_dict={}):
    """
        Create a scalar (single textual value) from CSV data
    """
    validate_value(value, restriction_dict)
    descriptor = value
    return descriptor
Esempio n. 2
0
def create_scalar(value, restriction_dict={}):
    """
        Create a scalar (single numerical value) from CSV data
    """
    validate_value(value, restriction_dict)
    scalar = str(value)
    return scalar
Esempio n. 3
0
def create_array(dataset, restriction_dict={}):
    """
        Create a (multi-dimensional) array from csv data
    """
    #First column is always the array dimensions
    arr_shape = dataset[0]
    #The actual data is everything after column 0
    eval_dataset = []
    for d in dataset[1:]:
        try:
            d = eval(d)
        except:
            d = str(d)
        eval_dataset.append(d)
        #dataset = [eval(d) for d in dataset[1:]]

    #If the dimensions are not set, we assume the array is 1D
    if arr_shape != '':
        array_shape = tuple([int(a) for a in arr_shape.split(" ")])
    else:
        array_shape = (len(eval_dataset), )

    #Reshape the array back to its correct dimensions
    arr = np.array(eval_dataset)
    try:
        arr = np.reshape(arr, array_shape)
    except:
        raise HydraPluginError("You have an error with your array data."
                               " Please ensure that the dimension is correct."
                               " (array = %s, dimension = %s)" %
                               (arr, array_shape))

    validate_value(arr.tolist(), restriction_dict)

    arr = json.dumps(arr.tolist())

    return arr
Esempio n. 4
0
def create_timeseries(data,
                      restriction_dict={},
                      data_columns=None,
                      filename="",
                      timezone=pytz.utc):
    if len(data) == 0:
        return None

    if data_columns is not None:
        col_headings = data_columns
    else:
        col_headings = [str(idx) for idx in range(len(data[0][2:]))]

    date = data[0][0]
    global time_formats
    timeformat = time_formats.get(date)
    if timeformat is None:
        timeformat = hydra_dateutil.guess_timefmt(date)
        time_formats[date] = timeformat

    seasonal = False

    if 'XXXX' in timeformat or seasonal_key in timeformat:
        seasonal = True

    ts_values = {}
    for col in col_headings:
        ts_values[col] = {}
    ts_times = []  # to check for duplicae timestamps in a timeseries.
    timedata = data
    for dataset in timedata:

        if len(dataset) == 0 or dataset[0] == '#':
            continue

        tstime = datetime.strptime(dataset[0], timeformat)
        tstime = timezone.localize(tstime)

        ts_time = hydra_dateutil.date_to_string(tstime, seasonal=seasonal)

        if ts_time in ts_times:
            raise HydraPluginError("A duplicate time %s has been found "
                                   "in %s where the value = %s)" %
                                   (ts_time, filename, dataset[2:]))
        else:
            ts_times.append(ts_time)

        value_length = len(dataset[2:])
        shape = dataset[1]
        if shape != '':
            array_shape = tuple([int(a) for a in shape.split(" ")])
        else:
            array_shape = (value_length, )

        ts_val_1d = []
        for i in range(value_length):
            ts_val_1d.append(str(dataset[i + 2]))

        try:
            ts_arr = np.array(ts_val_1d)
            ts_arr = np.reshape(ts_arr, array_shape)
        except:
            raise HydraPluginError(
                "Error converting %s in file %s to an array" %
                (ts_val_1d, filename))

        ts_value = ts_arr.tolist()

        for i, ts_val in enumerate(ts_value):
            idx = col_headings[i]
            ts_values[idx][ts_time] = ts_val

    timeseries = json.dumps(ts_values)

    validate_value(pd.read_json(timeseries), restriction_dict)

    return timeseries