def _coverage_mapper(qresult, dx, filter): filter, filter_args = unpack_callable(filter) for rows in qresult: assert len(rows) if filter is not None: rows = filter(rows, *filter_args) lon, lat = list(rows.as_columns())[:2] # Work around PS1 bugs: tofix = (lon < 0) | (lon >= 360) if np.any(tofix): #print "Fixing RIGHT ASCENSION in cell ", rows.info.cell_id lon[tofix] = np.fmod(np.fmod(lon[tofix], 360.) + 360., 360.) tofix = (lat < -90) | (lat > 90) if np.any(tofix): print "Fixing DECLINATION in cell ", rows.info.cell_id lat[lat < -90] = -90 lat[lat > 90] = 90 i = (lon / dx).astype(int) j = ((90 - lat) / dx).astype(int) assert len(lon) assert len(lat) assert len(i) assert len(j) (imin, imax, jmin, jmax) = (i.min(), i.max(), j.min(), j.max()) w = imax - imin + 1 h = jmax - jmin + 1 i -= imin j -= jmin if w <= 0 or h <= 0 or w > 10800 or h > 5400: print w, h print rows.info.cell_id exit() if False: # Binning (method #1, straightforward but slow) sky = np.zeros((w, h)) for (ii, jj) in izip(i, j): sky[ii, jj] += 1 else: # Binning (method #2, fast) sky2 = np.zeros(w * h) idx = np.bincount(j + i * h) sky2[0:len(idx)] = idx sky = sky2.reshape((w, h)) yield (sky, imin, jmin)
def _coverage_mapper(qresult, dx, filter): filter, filter_args = unpack_callable(filter) for rows in qresult: assert len(rows) if filter is not None: rows = filter(rows, *filter_args) lon, lat = list(rows.as_columns())[:2] # Work around PS1 bugs: tofix = (lon < 0) | (lon >= 360) if np.any(tofix): #print "Fixing RIGHT ASCENSION in cell ", rows.info.cell_id lon[tofix] = np.fmod(np.fmod(lon[tofix], 360.) + 360., 360.) tofix = (lat < -90) | (lat > 90) if np.any(tofix): print "Fixing DECLINATION in cell ", rows.info.cell_id lat[lat < -90] = -90 lat[lat > 90] = 90 i = (lon / dx).astype(int) j = ((90 - lat) / dx).astype(int) assert len(lon) assert len(lat) assert len(i) assert len(j) (imin, imax, jmin, jmax) = (i.min(), i.max(), j.min(), j.max()) w = imax - imin + 1 h = jmax - jmin + 1 i -= imin; j -= jmin if w <= 0 or h <= 0 or w > 10800 or h > 5400: print w, h print rows.info.cell_id exit() if False: # Binning (method #1, straightforward but slow) sky = np.zeros((w, h)) for (ii, jj) in izip(i, j): sky[ii, jj] += 1 else: # Binning (method #2, fast) sky2 = np.zeros(w*h) idx = np.bincount(j + i*h) sky2[0:len(idx)] = idx sky = sky2.reshape((w, h)) yield (sky, imin, jmin)
def map_reduce_chain(self, input, kernels, progress_callback=None): """ A poor-man's map-reduce implementation. Calls the mapper for each value in the <input> iterable. The mapper shall return a list of key/value pairs as a result. Once all mappers have run, reducers will be called with a key, and a list of values associated with that key, once for each key. The reducer's return values are yielded to the user. Input: Any iterable Output: Iterable (generated) Notes: - mapper must return a dictionary of (key, value) pairs - reducer must expect a (key, value) pair as the first argument, where the value will be an iterable """ if progress_callback == None: progress_callback = progress_default progress_callback('mapreduce', 'begin', input, None, None) if back_to_disk: fp, prev_fp = None, None mm, prev_mm = None, None for i, K in enumerate(kernels): K_fun, K_args = unpack_callable(K) last_step = (i + 1 == len(kernels)) stage = where(i == 0, 'map', 'reduce') if back_to_disk: # Reinitialize the unique_hash->file_offset map unique_objects = {} # Insert picklers/unpicklers if i != 0: # Insert unpickler K_fun, K_args = _reduce_from_pickle_jar, (prev_fp.name, K_fun, K_args) if not last_step: # Insert pickler K_fun, K_args = _output_pickled_kv, (K_fun, K_args) # Create a disk backing store for intermediate results fp = tempfile.NamedTemporaryFile(mode='wb', prefix='mapresults-', dir=os.getenv('LSD_TEMPDIR'), suffix='.pkl', delete=True) if use_mmap: fd = fp.file.fileno() os.ftruncate(fd, BUFSIZE) mm = mmap.mmap(fd, 0) else: mm = fp try: # Call the distributed mappers mresult = defaultdict(list) for r in self.imap_unordered(input, K_fun, K_args, progress_callback=progress_callback, progress_callback_stage=stage): if last_step: # yield the final result yield r else: (k, v) = r if back_to_disk: (hash, v) = v if hash in unique_objects: v = unique_objects[hash] else: # The output value has already been pickled (but not the key). Store the # pickled value into the pickle jar, and keep the (key, offset) tuple. offs = mm.tell() mm.write(v) assert len(v) == mm.tell() - offs v = offs unique_objects[hash] = offs # Prepare for next reduction mresult[k].append(v) input = mresult.items() except: # In case of an exception, delete the temporary file so the kernel # won't attempt to flush them to the disk if back_to_disk: if mm is not None and use_mmap: mm.resize(1) mm.close() mm = None if fp is not None: os.ftruncate(fp.file.fileno(), 0) fp.close() fp = None raise finally: if back_to_disk: # Close/clear the intermediate result backing store from the previous step # ensuring it's truncated first so it doesn't hit the disk if it hasn't # already. if prev_fp is not None: if use_mmap: prev_mm.resize(1) prev_mm.close() os.ftruncate(prev_fp.file.fileno(), 0) prev_fp.close() if fp is not None: prev_fp, prev_mm = fp, mm fp, mm = None, None if progress_callback != None: progress_callback('mapreduce', 'end', None, None, None)
def map_reduce_chain(self, input, kernels, progress_callback=None): """ A poor-man's map-reduce implementation. Calls the mapper for each value in the <input> iterable. The mapper shall return a list of key/value pairs as a result. Once all mappers have run, reducers will be called with a key, and a list of values associated with that key, once for each key. The reducer's return values are yielded to the user. Input: Any iterable Output: Iterable (generated) Notes: - mapper must return a dictionary of (key, value) pairs - reducer must expect a (key, value) pair as the first argument, where the value will be an iterable """ if progress_callback == None: progress_callback = progress_default progress_callback('mapreduce', 'begin', input, None, None) if back_to_disk: fp, prev_fp = None, None mm, prev_mm = None, None for i, K in enumerate(kernels): K_fun, K_args = unpack_callable(K) last_step = (i + 1 == len(kernels)) stage = where(i == 0, 'map', 'reduce') if back_to_disk: # Reinitialize the unique_hash->file_offset map unique_objects = {} # Insert picklers/unpicklers if i != 0: # Insert unpickler K_fun, K_args = _reduce_from_pickle_jar, (prev_fp.name, K_fun, K_args) if not last_step: # Insert pickler K_fun, K_args = _output_pickled_kv, (K_fun, K_args) # Create a disk backing store for intermediate results fp = tempfile.NamedTemporaryFile( mode='wb', prefix='mapresults-', dir=os.getenv('LSD_TEMPDIR'), suffix='.pkl', delete=True) if use_mmap: fd = fp.file.fileno() os.ftruncate(fd, BUFSIZE) mm = mmap.mmap(fd, 0) else: mm = fp try: # Call the distributed mappers mresult = defaultdict(list) for r in self.imap_unordered( input, K_fun, K_args, progress_callback=progress_callback, progress_callback_stage=stage): if last_step: # yield the final result yield r else: (k, v) = r if back_to_disk: (hash, v) = v if hash in unique_objects: v = unique_objects[hash] else: # The output value has already been pickled (but not the key). Store the # pickled value into the pickle jar, and keep the (key, offset) tuple. offs = mm.tell() mm.write(v) assert len(v) == mm.tell() - offs v = offs unique_objects[hash] = offs # Prepare for next reduction mresult[k].append(v) input = mresult.items() except: # In case of an exception, delete the temporary file so the kernel # won't attempt to flush them to the disk if back_to_disk: if mm is not None and use_mmap: mm.resize(1) mm.close() mm = None if fp is not None: os.ftruncate(fp.file.fileno(), 0) fp.close() fp = None raise finally: if back_to_disk: # Close/clear the intermediate result backing store from the previous step # ensuring it's truncated first so it doesn't hit the disk if it hasn't # already. if prev_fp is not None: if use_mmap: prev_mm.resize(1) prev_mm.close() os.ftruncate(prev_fp.file.fileno(), 0) prev_fp.close() if fp is not None: prev_fp, prev_mm = fp, mm fp, mm = None, None if progress_callback != None: progress_callback('mapreduce', 'end', None, None, None)