Beispiel #1
0
def _coverage_mapper(qresult, dx, filter):
    filter, filter_args = unpack_callable(filter)

    for rows in qresult:
        assert len(rows)
        if filter is not None:
            rows = filter(rows, *filter_args)

        lon, lat = list(rows.as_columns())[:2]

        # Work around PS1 bugs:
        tofix = (lon < 0) | (lon >= 360)
        if np.any(tofix):
            #print "Fixing RIGHT ASCENSION in cell ", rows.info.cell_id
            lon[tofix] = np.fmod(np.fmod(lon[tofix], 360.) + 360., 360.)
        tofix = (lat < -90) | (lat > 90)
        if np.any(tofix):
            print "Fixing DECLINATION in cell ", rows.info.cell_id
            lat[lat < -90] = -90
            lat[lat > 90] = 90

        i = (lon / dx).astype(int)
        j = ((90 - lat) / dx).astype(int)

        assert len(lon)
        assert len(lat)
        assert len(i)
        assert len(j)

        (imin, imax, jmin, jmax) = (i.min(), i.max(), j.min(), j.max())
        w = imax - imin + 1
        h = jmax - jmin + 1
        i -= imin
        j -= jmin
        if w <= 0 or h <= 0 or w > 10800 or h > 5400:
            print w, h
            print rows.info.cell_id
            exit()

        if False:
            # Binning (method #1, straightforward but slow)
            sky = np.zeros((w, h))
            for (ii, jj) in izip(i, j):
                sky[ii, jj] += 1
        else:
            # Binning (method #2, fast)
            sky2 = np.zeros(w * h)
            idx = np.bincount(j + i * h)
            sky2[0:len(idx)] = idx
            sky = sky2.reshape((w, h))

        yield (sky, imin, jmin)
Beispiel #2
0
def _coverage_mapper(qresult, dx, filter):
	filter, filter_args = unpack_callable(filter)

	for rows in qresult:
		assert len(rows)
		if filter is not None:
			rows = filter(rows, *filter_args)

		lon, lat = list(rows.as_columns())[:2]

		# Work around PS1 bugs:
		tofix = (lon < 0) | (lon >= 360)
		if np.any(tofix):
			#print "Fixing RIGHT ASCENSION in cell ", rows.info.cell_id
			lon[tofix] = np.fmod(np.fmod(lon[tofix], 360.) + 360., 360.)
		tofix = (lat < -90) | (lat > 90)
		if np.any(tofix):
			print "Fixing DECLINATION in cell ", rows.info.cell_id
			lat[lat < -90] = -90
			lat[lat > 90]  = 90

		i = (lon / dx).astype(int)
		j = ((90 - lat) / dx).astype(int)

		assert len(lon)
		assert len(lat)
		assert len(i)
		assert len(j)

		(imin, imax, jmin, jmax) = (i.min(), i.max(), j.min(), j.max())
		w = imax - imin + 1
		h = jmax - jmin + 1
		i -= imin; j -= jmin
		if w <= 0 or h <= 0 or w > 10800 or h > 5400:
			print w, h
			print rows.info.cell_id
			exit()
	
		if False:
			# Binning (method #1, straightforward but slow)
			sky = np.zeros((w, h))
			for (ii, jj) in izip(i, j):
				sky[ii, jj] += 1
		else:
			# Binning (method #2, fast)
			sky2 = np.zeros(w*h)
			idx = np.bincount(j + i*h)
			sky2[0:len(idx)] = idx
			sky = sky2.reshape((w, h))

		yield (sky, imin, jmin)
Beispiel #3
0
	def map_reduce_chain(self, input, kernels, progress_callback=None):
		""" A poor-man's map-reduce implementation.
		
		    Calls the mapper for each value in the <input> iterable. 
		    The mapper shall return a list of key/value pairs as a
		    result.  Once all mappers have run, reducers will be
		    called with a key, and a list of values associated with
		    that key, once for each key.  The reducer's return
		    values are yielded to the user.

		    Input: Any iterable
		    Output: Iterable (generated)

		    Notes:
		    	- mapper must return a dictionary of (key, value) pairs
		    	- reducer must expect a (key, value) pair as the first
		    	  argument, where the value will be an iterable
		"""

		if progress_callback == None:
			progress_callback = progress_default

		progress_callback('mapreduce', 'begin', input, None, None)

		if back_to_disk:
			fp, prev_fp = None, None
			mm, prev_mm = None, None

		for i, K in enumerate(kernels):
			K_fun, K_args = unpack_callable(K)
			last_step = (i + 1 == len(kernels))
			stage = where(i == 0, 'map', 'reduce')

			if back_to_disk:
				# Reinitialize the unique_hash->file_offset map
				unique_objects = {}

				# Insert picklers/unpicklers
				if i != 0:
					# Insert unpickler
					K_fun, K_args = _reduce_from_pickle_jar, (prev_fp.name, K_fun, K_args)

				if not last_step:
					# Insert pickler
					K_fun, K_args = _output_pickled_kv, (K_fun, K_args)

					# Create a disk backing store for intermediate results
					fp = tempfile.NamedTemporaryFile(mode='wb', prefix='mapresults-', dir=os.getenv('LSD_TEMPDIR'), suffix='.pkl', delete=True)
					if use_mmap:
						fd = fp.file.fileno()
						os.ftruncate(fd, BUFSIZE)
						mm = mmap.mmap(fd, 0)
					else:
						mm = fp

			try:
				# Call the distributed mappers
				mresult = defaultdict(list)
				for r in self.imap_unordered(input, K_fun, K_args, progress_callback=progress_callback, progress_callback_stage=stage):
					if last_step:
						# yield the final result
						yield r
					else:
						(k, v) = r

						if back_to_disk:
							(hash, v) = v
							if hash in unique_objects:
								v = unique_objects[hash]
							else:
								# The output value has already been pickled (but not the key). Store the
								# pickled value into the pickle jar, and keep the (key, offset) tuple.
								offs = mm.tell()
								mm.write(v)
								assert len(v) == mm.tell() - offs
								v = offs
								unique_objects[hash] = offs

						# Prepare for next reduction
						mresult[k].append(v)

				input = mresult.items()
			except:
				# In case of an exception, delete the temporary file so the kernel
				# won't attempt to flush them to the disk
				if back_to_disk:
					if mm is not None and use_mmap:
						mm.resize(1)
						mm.close()
						mm = None

					if fp is not None:
						os.ftruncate(fp.file.fileno(), 0)
						fp.close()
						fp = None
				raise
			finally:
				if back_to_disk:
					# Close/clear the intermediate result backing store from the previous step
					# ensuring it's truncated first so it doesn't hit the disk if it hasn't
					# already.
					if prev_fp is not None:
						if use_mmap:
							prev_mm.resize(1)
							prev_mm.close()
						os.ftruncate(prev_fp.file.fileno(), 0)
						prev_fp.close()

					if fp is not None:
						prev_fp, prev_mm = fp, mm
						fp, mm = None, None

		if progress_callback != None:
			progress_callback('mapreduce', 'end', None, None, None)
Beispiel #4
0
    def map_reduce_chain(self, input, kernels, progress_callback=None):
        """ A poor-man's map-reduce implementation.
		
		    Calls the mapper for each value in the <input> iterable. 
		    The mapper shall return a list of key/value pairs as a
		    result.  Once all mappers have run, reducers will be
		    called with a key, and a list of values associated with
		    that key, once for each key.  The reducer's return
		    values are yielded to the user.

		    Input: Any iterable
		    Output: Iterable (generated)

		    Notes:
		    	- mapper must return a dictionary of (key, value) pairs
		    	- reducer must expect a (key, value) pair as the first
		    	  argument, where the value will be an iterable
		"""

        if progress_callback == None:
            progress_callback = progress_default

        progress_callback('mapreduce', 'begin', input, None, None)

        if back_to_disk:
            fp, prev_fp = None, None
            mm, prev_mm = None, None

        for i, K in enumerate(kernels):
            K_fun, K_args = unpack_callable(K)
            last_step = (i + 1 == len(kernels))
            stage = where(i == 0, 'map', 'reduce')

            if back_to_disk:
                # Reinitialize the unique_hash->file_offset map
                unique_objects = {}

                # Insert picklers/unpicklers
                if i != 0:
                    # Insert unpickler
                    K_fun, K_args = _reduce_from_pickle_jar, (prev_fp.name,
                                                              K_fun, K_args)

                if not last_step:
                    # Insert pickler
                    K_fun, K_args = _output_pickled_kv, (K_fun, K_args)

                    # Create a disk backing store for intermediate results
                    fp = tempfile.NamedTemporaryFile(
                        mode='wb',
                        prefix='mapresults-',
                        dir=os.getenv('LSD_TEMPDIR'),
                        suffix='.pkl',
                        delete=True)
                    if use_mmap:
                        fd = fp.file.fileno()
                        os.ftruncate(fd, BUFSIZE)
                        mm = mmap.mmap(fd, 0)
                    else:
                        mm = fp

            try:
                # Call the distributed mappers
                mresult = defaultdict(list)
                for r in self.imap_unordered(
                        input,
                        K_fun,
                        K_args,
                        progress_callback=progress_callback,
                        progress_callback_stage=stage):
                    if last_step:
                        # yield the final result
                        yield r
                    else:
                        (k, v) = r

                        if back_to_disk:
                            (hash, v) = v
                            if hash in unique_objects:
                                v = unique_objects[hash]
                            else:
                                # The output value has already been pickled (but not the key). Store the
                                # pickled value into the pickle jar, and keep the (key, offset) tuple.
                                offs = mm.tell()
                                mm.write(v)
                                assert len(v) == mm.tell() - offs
                                v = offs
                                unique_objects[hash] = offs

                        # Prepare for next reduction
                        mresult[k].append(v)

                input = mresult.items()
            except:
                # In case of an exception, delete the temporary file so the kernel
                # won't attempt to flush them to the disk
                if back_to_disk:
                    if mm is not None and use_mmap:
                        mm.resize(1)
                        mm.close()
                        mm = None

                    if fp is not None:
                        os.ftruncate(fp.file.fileno(), 0)
                        fp.close()
                        fp = None
                raise
            finally:
                if back_to_disk:
                    # Close/clear the intermediate result backing store from the previous step
                    # ensuring it's truncated first so it doesn't hit the disk if it hasn't
                    # already.
                    if prev_fp is not None:
                        if use_mmap:
                            prev_mm.resize(1)
                            prev_mm.close()
                        os.ftruncate(prev_fp.file.fileno(), 0)
                        prev_fp.close()

                    if fp is not None:
                        prev_fp, prev_mm = fp, mm
                        fp, mm = None, None

        if progress_callback != None:
            progress_callback('mapreduce', 'end', None, None, None)