def create_descriptor(value, restriction_dict={}): """ Create a scalar (single textual value) from CSV data """ validate_value(value, restriction_dict) descriptor = value return descriptor
def create_scalar(value, restriction_dict={}): """ Create a scalar (single numerical value) from CSV data """ validate_value(value, restriction_dict) scalar = str(value) return scalar
def create_array(dataset, restriction_dict={}): """ Create a (multi-dimensional) array from csv data """ #First column is always the array dimensions arr_shape = dataset[0] #The actual data is everything after column 0 eval_dataset = [] for d in dataset[1:]: try: d = eval(d) except: d = str(d) eval_dataset.append(d) #dataset = [eval(d) for d in dataset[1:]] #If the dimensions are not set, we assume the array is 1D if arr_shape != '': array_shape = tuple([int(a) for a in arr_shape.split(" ")]) else: array_shape = (len(eval_dataset), ) #Reshape the array back to its correct dimensions arr = np.array(eval_dataset) try: arr = np.reshape(arr, array_shape) except: raise HydraPluginError("You have an error with your array data." " Please ensure that the dimension is correct." " (array = %s, dimension = %s)" % (arr, array_shape)) validate_value(arr.tolist(), restriction_dict) arr = json.dumps(arr.tolist()) return arr
def create_timeseries(data, restriction_dict={}, data_columns=None, filename="", timezone=pytz.utc): if len(data) == 0: return None if data_columns is not None: col_headings = data_columns else: col_headings = [str(idx) for idx in range(len(data[0][2:]))] date = data[0][0] global time_formats timeformat = time_formats.get(date) if timeformat is None: timeformat = hydra_dateutil.guess_timefmt(date) time_formats[date] = timeformat seasonal = False if 'XXXX' in timeformat or seasonal_key in timeformat: seasonal = True ts_values = {} for col in col_headings: ts_values[col] = {} ts_times = [] # to check for duplicae timestamps in a timeseries. timedata = data for dataset in timedata: if len(dataset) == 0 or dataset[0] == '#': continue tstime = datetime.strptime(dataset[0], timeformat) tstime = timezone.localize(tstime) ts_time = hydra_dateutil.date_to_string(tstime, seasonal=seasonal) if ts_time in ts_times: raise HydraPluginError("A duplicate time %s has been found " "in %s where the value = %s)" % (ts_time, filename, dataset[2:])) else: ts_times.append(ts_time) value_length = len(dataset[2:]) shape = dataset[1] if shape != '': array_shape = tuple([int(a) for a in shape.split(" ")]) else: array_shape = (value_length, ) ts_val_1d = [] for i in range(value_length): ts_val_1d.append(str(dataset[i + 2])) try: ts_arr = np.array(ts_val_1d) ts_arr = np.reshape(ts_arr, array_shape) except: raise HydraPluginError( "Error converting %s in file %s to an array" % (ts_val_1d, filename)) ts_value = ts_arr.tolist() for i, ts_val in enumerate(ts_value): idx = col_headings[i] ts_values[idx][ts_time] = ts_val timeseries = json.dumps(ts_values) validate_value(pd.read_json(timeseries), restriction_dict) return timeseries