Python Pool Examples, pool2.Pool Python Examples

Example #1

0

Show file

File: sdss.py Project: gregreen/lsd

def import_from_sweeps(db, sdss_tabname, sweep_files, create=False, all=False):
    """ Import an SDSS catalog from a collection of SDSS sweep files.

	    Note: Assumes underlying shared storage for all output table
	          cells (i.e., any worker is able to write to any cell).
	"""
    if create:
        # Create the new database
        sdss_table = db.create_table(sdss_tabname, sdss_table_def)
    else:
        sdss_table = db.table(sdss_tabname)

    t0 = time.time()
    at = 0
    ntot = 0
    pool = pool2.Pool()
    for (file, nloaded,
         nin) in pool.imap_unordered(sweep_files,
                                     import_from_sweeps_aux,
                                     (db, sdss_tabname, all),
                                     progress_callback=pool2.progress_pass):
        at = at + 1
        ntot = ntot + nloaded
        t1 = time.time()
        time_pass = (t1 - t0) / 60
        time_tot = time_pass / at * len(sweep_files)
        sfile = "..." + file[-67:] if len(file) > 70 else file
        print(
            '  ===> Imported %-70s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)'
            % (sfile, at, len(sweep_files), 100 * float(at) / len(sweep_files),
               nloaded, ntot, time_pass, time_tot))
    del pool

Example #2

0

Show file

    def get_cells(self, bounds=None, return_bounds=False, include_cached=True):
        """ Return a list of (cell_id, bounds) tuples completely
		    covering the requested bounds.

			bounds must be a list of (Polygon, intervalset) tuples.

		    Output is a list of (cell_id, xybounds, tbounds) tuples,
		    unless return_bounds=False when the output is just a
		    list of cell_ids.
		"""
        self._include_cached = include_cached

        # Special case of bounds=None (all sky)
        if bounds == None:
            bounds = [(bn.ALLSKY, intervalset((-np.inf, np.inf)))]

        # Find all existing cells satisfying the bounds
        cells = defaultdict(dict)
        if False:
            # Single-threaded implementation
            for bounds_xy, bounds_t in bounds:
                self._get_cells_recursive(cells, bounds_xy, bounds_t, 0, 0, 1,
                                          1.)
                self._get_cells_recursive(cells, bounds_xy, bounds_t, 0, 1, 1,
                                          1.)
                self._get_cells_recursive(cells, bounds_xy, bounds_t, 1, 0, 1,
                                          1.)
                self._get_cells_recursive(cells, bounds_xy, bounds_t, 1, 1, 1,
                                          1.)
        else:
            # Multi-process implementation (appears to be as good or better than single thread in
            # nearly all cases of interest)
            pool = pool2.Pool()
            lev = min(4, self._pix.level)
            ij = np.indices(
                (2**lev, 2**lev)).reshape(2, -1).T  # List of i,j coordinates
            for cells_ in pool.imap_unordered(
                    ij,
                    _get_cells_kernel, (lev, self, bounds),
                    progress_callback=pool2.progress_pass):
                for cell_id, b in cells_.iteritems():
                    for xyb, tb in b.iteritems():
                        _add_bounds(cells, cell_id, xyb, tb)
            del pool

        if len(cells):
            # Transform (x, y, t) tuples to cell_ids
            xyt = np.array(cells.keys()).transpose()
            cell_ids = self._pix._cell_id_for_xyt(xyt[0], xyt[1], xyt[2])

            # Reorder cells to be a dict of cell: [(poly, time), (poly, time)] entries
            cells = dict(
                ((cell_id, v.items())
                 for (cell_id, (k, v)) in izip(cell_ids, cells.iteritems())))

        if not return_bounds:
            return cells.keys()
        else:
            return cells

Example #3

0

Show file

File: tasks.py Project: gregreen/lsd

def compute_counts_aux(db, tabname, cells, progress_callback=None):
    ntotal = 0

    pool = pool2.Pool()
    for nobjects in pool.map_reduce_chain(cells, [(ls_mapper, db, tabname)],
                                          progress_callback=progress_callback):
        ntotal += nobjects

    return ntotal

Example #4

0

Show file

def import_from_dvo(catdir, dvo_files, create=False):
	""" Import a PS1 catalog from DVO

	    Note: Assumes underlying shared storage for all catalog
	          cells (i.e., any worker is able to write to any cell).
	"""
	if create:
		# Create the new database
		cat = catalog.Table(catdir, name='ps1', mode='c')
		cat.create_cgroup('astrometry', { 'columns': to_dtype(astromCols), 'primary_key': 'id', 'spatial_keys': ('ra', 'dec'), "cached_flag": "cached" })
		cat.create_cgroup('photometry', { 'columns': to_dtype(photoCols) })
		cat.create_cgroup('import',     { 'columns': [
				('file_id', 'a20'),
				('hdr', 'i8'),
				('cksum', 'a32'),
				('imageid',	'64i8'),
				('blobarr',	'64i8'),
			],
			'blobs': [
				'hdr',
				'blobarr'
			] })
	else:
		cat = catalog.Table(catdir)

	t0 = time.time()
	at = 0; ntot = 0
	pool = pool2.Pool()
	for (file, nloaded, nin) in pool.imap_unordered(dvo_files, import_from_dvo_aux, (cat,)):
	#for (file, nloaded, nin) in imap(lambda file: import_from_dvo_aux(file, cat), dvo_files):
		at = at + 1
		ntot = ntot + nloaded
		t1 = time.time()
		time_pass = (t1 - t0) / 60
		time_tot = time_pass / at * len(dvo_files)
		print('  ===> Imported %s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)' % (file, at, len(dvo_files), 100 * float(at) / len(dvo_files), nloaded, ntot, time_pass, time_tot))

Example #5

0

Show file

    def _update(self, table_path, snapid):
        # Find what we already have loaded
        prevsnap = np.max(
            self._leaves['snapid']) if len(self._leaves) > 2 else None
        assert prevsnap <= snapid, "Cannot update a catalog to an older snapshot"

        ## Enumerate all existing snapshots older or equal to snapid, and newer than prevsnap, and sort them, newest first
        self.__snapshots = sorted(isnapshots(table_path,
                                             first=prevsnap,
                                             last=snapid,
                                             no_first=True,
                                             return_path=True),
                                  reverse=True)

        # Add the snapid snapshot by hand, if not in the list of committed snapshots
        # (because it is permissible to update to a yet-uncommitted snapshot)
        if len(self.__snapshots) == 0 or self.__snapshots[0][0] != snapid:
            self.__snapshots.insert(
                0, (snapid, get_snapshot_path(table_path, snapid)))

        # Recursively scan, in parallel
        w = bhpix.width(self._pix.level)
        bmap = np.zeros((w, w), dtype=object)

        lev = min(4, self._pix.level)
        dx = bhpix.pix_size(lev)
        i, j = np.indices(
            (2**lev, 2**lev)).reshape(2, -1)  # List of i,j coordinates
        w2 = 1 << (lev - 1)
        x, y = (i - w2 + 0.5) * dx, (j - w2 + 0.5) * dx

        pool = pool2.Pool()
        for bmap2 in pool.imap_unordered(zip(x, y), _scan_recursive_kernel,
                                         (lev, self)):
            assert not np.any((bmap != 0) & (bmap2 != 0))
            mask = bmap2 != 0
            bmap[mask] = bmap2[mask]
        del pool

        # Add data about cells that were not touched by this update
        bmap_cur = self._bmaps[self._pix.level]
        mask_cur = (bmap_cur != 0) & (bmap == 0)
        lists_cur = [[(mjd, snapid, cell_id, next > 0)
                      for (mjd, snapid, cell_id,
                           next) in iter_siblings(self._leaves, offs)]
                     for offs in bmap_cur[mask_cur]]
        try:
            bmap[mask_cur] = lists_cur
        except ValueError:
            # Workaround for a numpy 1.6.0 bug (http://projects.scipy.org/numpy/ticket/1870)
            coords = np.mgrid[
                0:w, 0:
                w][:,
                   mask_cur].T  # List of coordinates corresponding to True entries in mask_cur
            for (ii, jj), vv in izip(coords, lists_cur):
                bmap[ii, jj] = vv

        # Repack the temporal siblings to a single numpy array, emulating a linked list
        lists = bmap[bmap != 0]
        llens = np.fromiter((len(l) for l in lists), dtype=np.int32)
        leaves = np.empty(np.sum(llens) + 2,
                          dtype=[('mjd', 'f4'), ('snapid', object),
                                 ('cell_id', 'u8'), ('next', 'i4')])
        leaves[:2] = [
            (np.inf, 0, 0, END_MARKER)
        ] * 2  # We start with two dummy entries, so that offs=0 and 1 are invalid and can take other meanings.
        seen = dict()
        at = 2
        for l in lists:
            last_i = len(l) - 1
            for (i, (mjd, snapid, cell_id, has_data)) in enumerate(l):
                # Make equal strings refer to the same string object
                try:
                    snapid = seen[snapid]
                except KeyError:
                    seen[snapid] = snapid

                next = 1 if has_data else -1
                if i == last_i:
                    next *= END_MARKER

                leaves[at] = (mjd, snapid, cell_id, next)
                at += 1

        # Construct bmap that has offsets to head of the linked list of siblings
        offs = np.zeros(len(llens), dtype=np.int64)
        offs[1:] = np.cumsum(llens)[:-1]
        offs += 2  # Pointers to beginnings of individual lists
        assert np.all(abs(leaves['next'][offs - 1]) == END_MARKER)
        obmap = np.zeros(bmap.shape, dtype=np.int32)
        obmap[bmap != 0] = offs

        # Recompute mipmaps
        bmaps = self._compute_mipmaps(obmap)

        return bmaps, leaves

Example #6

0

Show file

def make_object_catalog(db,
                        obj_tabname,
                        det_tabname,
                        exp_tabname,
                        radius=1. / 3600.,
                        explist=None,
                        oldexps=None,
                        fovradius=None):
    """ Create the object catalog
	"""

    # For debugging -- a simple check to see if matching works is to rerun
    # the match across a just matched table. In this case, we expect
    # all detections to be matched to existing objects, and no new ones added.
    _rematching = int(os.getenv('REMATCHING', False))

    o2d_tabname = '_%s_to_%s' % (obj_tabname, det_tabname)
    det_table = db.table(det_tabname)
    obj_table = db.table(obj_tabname)
    o2d_table = db.table(o2d_tabname)

    # Fetch all non-empty cells with detections. Group them by the same spatial
    # cell ID. This will be the list over which the first kernel will map.
    if explist is None:
        det_cells = det_table.get_cells()
    else:
        # Fetch only those cells that can contain data from the given exposure list
        print >> sys.stderr, "Enumerating cells with new detections: ",
        det_cells = get_cells_with_dets_from_exps(db, explist, exp_tabname,
                                                  det_tabname, fovradius)
    print >> sys.stderr, "%d cells to process." % (len(det_cells))
    det_cells_grouped = det_table.pix.group_cells_by_spatial(det_cells).items()

    t0 = time.time()
    pool = pool2.Pool()
    ntot = 0
    ntotobj = 0
    at = 0
    for (nexp, nobj, ndet, nnew, nmatch, ndetnc) in pool.map_reduce_chain(
            det_cells_grouped, [
                (_obj_det_match, db, obj_tabname, det_tabname, o2d_tabname,
                 radius, explist, _rematching),
            ],
            progress_callback=pool2.progress_pass):
        at += 1
        if nexp is None:
            continue

        t1 = time.time()
        time_pass = (t1 - t0) / 60
        time_tot = time_pass / at * len(det_cells)

        ntot += nmatch
        ntotobj += nnew
        nobjnew = nobj + nnew
        pctnew = 100. * nnew / nobjnew if nobjnew else 0.
        pctmatch = 100. * nmatch / ndetnc if ndetnc else 0.
        print "  match %7d det to %7d obj (%3d exps): %7d new (%6.2f%%), %7d matched (%6.2f%%)  [%.0f/%.0f min.]" % (
            ndet, nobj, nexp, nnew, pctnew, nmatch, pctmatch, time_pass,
            time_tot)

    # Save the list of exposures that has been matched to the object table
    if oldexps is not None and len(explist):
        allexps = set(oldexps) | set(explist)

        uri = "lsd:%s:cache:all_matched_exposures.txt" % obj_tabname
        with db.open_uri(uri, mode='w') as f:
            for exp in allexps:
                f.write("%d\n" % exp)

    print "Matched a total of %d sources." % (ntot)
    print "Total of %d objects added." % (ntotobj)

Example #7

0

Show file

def import_from_smf(db,
                    det_tabname,
                    exp_tabname,
                    smf_files,
                    survey,
                    create=False):
    """ Import a PS1 table from DVO

	    Note: Assumes underlying shared storage for all table
	          cells (i.e., any worker is able to write to any cell).
	"""
    with locking.lock(db.path[0] + "/.__smf-import-lock.lock"):
        if not db.table_exists(det_tabname) and create:
            # Set up commit hooks
            exp_table_def['commit_hooks'] = [
                ('Updating neighbors', 1, 'lsd.smf', 'make_image_cache',
                 [det_tabname])
            ]

            # Create new tables
            det_table = db.create_table(det_tabname, det_table_def)
            exp_table = db.create_table(exp_tabname, exp_table_def)

            # Set up a one-to-X join relationship between the two tables (join det_table:exp_id->exp_table:exp_id)
            db.define_default_join(det_tabname,
                                   exp_tabname,
                                   type='indirect',
                                   m1=(det_tabname, "det_id"),
                                   m2=(det_tabname, "exp_id"),
                                   _overwrite=create)
        else:
            det_table = db.table(det_tabname)
            exp_table = db.table(exp_tabname)

    det_c2f = gen_tab2fits(det_table_def)
    exp_c2f = gen_tab2fits(exp_table_def)

    t0 = time.time()
    at = 0
    ntot = 0
    pool = pool2.Pool()
    smf_fns = []
    exp_ids = []
    for (file, exp_id, smf_fn, nloaded, nin) in pool.imap_unordered(
            smf_files,
            import_from_smf_aux,
        (det_table, exp_table, det_c2f, exp_c2f, survey),
            progress_callback=pool2.progress_pass):
        smf_fns.append(smf_fn)
        exp_ids.append(exp_id)
        at = at + 1
        ntot = ntot + nloaded
        t1 = time.time()
        time_pass = (t1 - t0) / 60
        time_tot = time_pass / at * len(smf_files)
        print >> sys.stderr, '  ===> Imported %s [%d/%d, %5.2f%%] +%-6d %9d (%.0f/%.0f min.)' % (
            file, at, len(smf_files), 100 * float(at) / len(smf_files),
            nloaded, ntot, time_pass, time_tot)
    del pool

    ret = colgroup.ColGroup()
    ret._EXP = np.array(exp_ids, dtype=np.uint64)
    ret.smf_fn = np.array(smf_fns, dtype='a40')
    return ret