def _read_colormap_data_from_file(filename): if not os.path.exists(filename): filename = get_config_path(filename) ext = os.path.splitext(filename)[1] if ext in (".npy", ".npz"): file_content = np.load(filename) if ext == ".npz": # .npz is a collection # assume position list-like and get the first element file_content = file_content["arr_0"] return file_content # CSV return np.loadtxt(filename, delimiter=",")
def create_sections(structure): """Create file sections.""" sections = {} format_fn = get_config_path("eps_avhrrl1b_6.5.xml") form = eps.XMLFormat(format_fn) for count, (rec_class, sub_class) in structure: try: the_dtype = form.dtype((rec_class, sub_class)) except KeyError: continue item_size = the_dtype.itemsize + grh_dtype.itemsize the_dtype = np.dtype(grh_dtype.descr + the_dtype.descr) item = np.zeros(count, the_dtype) item['record_class'] = eps.record_class.index(rec_class) item['RECORD_SUBCLASS'] = sub_class item['RECORD_SIZE'] = item_size sections[(rec_class, sub_class)] = item return sections
def read_records(filename): """Read *filename* without scaling it afterwards.""" format_fn = get_config_path("eps_avhrrl1b_6.5.xml") form = XMLFormat(format_fn) grh_dtype = np.dtype([("record_class", "|i1"), ("INSTRUMENT_GROUP", "|i1"), ("RECORD_SUBCLASS", "|i1"), ("RECORD_SUBCLASS_VERSION", "|i1"), ("RECORD_SIZE", ">u4"), ("RECORD_START_TIME", "S6"), ("RECORD_STOP_TIME", "S6")]) max_lines = np.floor((CHUNK_SIZE**2) / 2048) dtypes = [] cnt = 0 counts = [] classes = [] prev = None with open(filename, "rb") as fdes: while True: grh = np.fromfile(fdes, grh_dtype, 1) if grh.size == 0: break rec_class = record_class[int(grh["record_class"])] sub_class = grh["RECORD_SUBCLASS"][0] expected_size = int(grh["RECORD_SIZE"]) bare_size = expected_size - grh_dtype.itemsize try: the_type = form.dtype((rec_class, sub_class)) # the_descr = grh_dtype.descr + the_type.descr except KeyError: the_type = np.dtype([('unknown', 'V%d' % bare_size)]) the_descr = grh_dtype.descr + the_type.descr the_type = np.dtype(the_descr) if the_type.itemsize < expected_size: padding = [('unknown%d' % cnt, 'V%d' % (expected_size - the_type.itemsize))] cnt += 1 the_descr += padding new_dtype = np.dtype(the_descr) key = (rec_class, sub_class) if key == prev: counts[-1] += 1 else: dtypes.append(new_dtype) counts.append(1) classes.append(key) prev = key fdes.seek(expected_size - grh_dtype.itemsize, 1) sections = {} offset = 0 for dtype, count, rec_class in zip(dtypes, counts, classes): fdes.seek(offset) if rec_class == ('mdr', 2): record = da.from_array(np.memmap(fdes, mode='r', dtype=dtype, shape=count, offset=offset), chunks=(max_lines, )) else: record = np.fromfile(fdes, dtype=dtype, count=count) offset += dtype.itemsize * count if rec_class in sections: logger.debug('Multiple records for ', str(rec_class)) sections[rec_class] = np.hstack((sections[rec_class], record)) else: sections[rec_class] = record return sections, form