Python MapReduceの例、legacypipe.internal.sharedmem.MapReduce Pythonの例

コード例 #1

0

ファイルを表示

def list_bricks(ns):
    t0 = time()

    if ns.filelist is not None:
        d = dict([(parse_filename(fn.strip()), fn.strip())
                  for fn in open(ns.filelist, 'r').readlines()])
    else:
        d = dict(iter_tractor(ns.src))

    if ns.verbose:
        print('enumerated %d bricks in %g seconds' % (len(d), time() - t0))

    if ns.bricksdesc is not None:
        bricksdesc = fitsio.read(ns.bricksdesc, 1, upper=True)
        # ADM convert from bytes_ to str_ type if fitsio version < 1.
        if bricksdesc["BRICKNAME"].dtype.type == np.bytes_:
            bricksdesc = dict([(item['BRICKNAME'].decode(), item)
                               for item in bricksdesc])
        else:
            bricksdesc = dict([(item['BRICKNAME'], item)
                               for item in bricksdesc])
    else:
        bricksdesc = None

    #- Load list of bricknames to use
    if ns.bricklist is not None:
        bricklist = np.loadtxt(ns.bricklist, dtype='S8')
        # TODO: skip unknown bricks?
        d = dict([(brickname.decode(), d[brickname])
                  for brickname in bricklist])

    t0 = time()

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        chunksize = 1024
        keys = list(d.keys())

        def work(i):
            return [(brickname, d[brickname],
                     read_region(brickname, d[brickname], bricksdesc))
                    for brickname in keys[i:i + chunksize]]

        bricks = sum(pool.map(work, range(0, len(keys), chunksize)), [])

    if ns.verbose:
        print('read regions of %d bricks in %g seconds' %
              (len(bricks), time() - t0))

    return bricks

コード例 #2

0

ファイルを表示

ファイル: generate-sweep-files.py プロジェクト: djschlegel/legacypipe

def make_sweep(sweep, bricks, ns):
    data = [np.empty(0, dtype=SWEEP_DTYPE)]
    ra1, dec1, ra2, dec2 = sweep

    with sharedmem.MapReduce(np=0) as pool:
        def filter(brickname, filename, region):
            if not intersect(sweep, region): 
                return None
            objects = fitsio.read(filename, 1, upper=True)

            mask = objects['BRICK_PRIMARY'] != 0
            objects = objects[mask]
            mask = objects['RA'] >= ra1
            mask &= objects['RA'] < ra2
            mask &= objects['DEC'] >= dec1
            mask &= objects['DEC'] < dec2
            objects = objects[mask]

            chunk = np.empty(len(objects), dtype=SWEEP_DTYPE)

            for colname in chunk.dtype.names:
                if colname not in objects.dtype.names:
                    # skip missing columns 
                    continue
                try:
                    chunk[colname][...] = objects[colname][...]
                except ValueError:
                    print('failed on column `%s`' % colname)
                    raise
                    
            return chunk
        def reduce(chunk):
            if chunk is not None:
                data.append(chunk)

        pool.map(filter, bricks, star=True, reduce=reduce)

    neff = len(data) - 1

    data = np.concatenate(data, axis=0)
    return data, neff

コード例 #3

0

ファイルを表示

ファイル: generate-sweep-files.py プロジェクト: DriftingPig/Obi-Metallica

def make_sweep(sweep, bricks, ns):
    data = [np.empty(0, dtype=SWEEP_DTYPE)]
    header = {}
    ra1, dec1, ra2, dec2 = sweep
    def merge_header(header, header2):
        for key, value in header2.items():
            if key not in header:
                header[key] = value
            else:
                if header[key] is NA:
                    pass
                else:
                    if header[key] != value:
                        header[key] = NA

    with sharedmem.MapReduce(np=0) as pool:
        def filter(brickname, filename, region):
            if not intersect(sweep, region):
                return None, None
            try:
                objects = fitsio.read(filename, 1, upper=True)
                chunkheader = fitsio.read_header(filename, 0, upper=True)
            except:
                if ns.ignore_errors:
                    print('IO error on %s' % filename)
                    return None, None
                else:
                    raise
            # ADM check all the column dtypes match.
            if not ns.ignore_errors:
                sflds = SWEEP_DTYPE.fields
                tflds = objects.dtype.fields
                for fld in sflds:
                    sdt, tdt = sflds[fld][0], tflds[fld][0]
                    if sdt != tdt:
                        msg = 'sweeps/Tractor dtypes differ for field '
                        msg += '{}. Sweeps: {}, Tractor: {}'.format(fld, sdt, tdt)
                        raise ValueError(msg)

            mask = objects['BRICK_PRIMARY'] != 0
            objects = objects[mask]
            mask = objects['RA'] >= ra1
            mask &= objects['RA'] < ra2
            mask &= objects['DEC'] >= dec1
            mask &= objects['DEC'] < dec2
            objects = objects[mask]

            chunk = np.empty(len(objects), dtype=SWEEP_DTYPE)

            for colname in chunk.dtype.names:
                if colname not in objects.dtype.names:
                    # skip missing columns
                    continue
                try:
                    chunk[colname][...] = objects[colname][...]
                except ValueError:
                    print('failed on column `%s`' % colname)
                    raise
            chunkheader = dict([(key, chunkheader[key]) for key in chunkheader.keys()])
            return chunk, chunkheader

        def reduce(chunk, chunkheader):
            if chunk is not None:
                data.append(chunk)
                merge_header(header, chunkheader)
        pool.map(filter, bricks, star=True, reduce=reduce)

    neff = len(data) - 1

    data = np.concatenate(data, axis=0)
    header = dict([(key, value) for key, value in header.items() if value is not NA])
    return data, header, neff

コード例 #4

0

ファイルを表示

ファイル: generate-sweep-files.py プロジェクト: DriftingPig/Obi-Metallica

def main():
    ns = parse_args()

    if ns.ignore_errors:
        print("Warning: *** Will ignore broken tractor catalogue files ***")
        print("         *** Disable -I for final data product.         ***")
    # avoid each subprocess importing h5py again and again.
    if 'hdf5' in ns.format:
        import h5py

    # this may take a while on a file system with slow meta-data
    # access
    # bricks = [(name, filepath, region), ...]
    bricks = list_bricks(ns)

    t0 = time()

    try:
        os.makedirs(ns.dest)
    except OSError:
        pass

    # blocks or ra stripes?
    schemas = {
        'ra' : sweep_schema_ra(360),
        'blocks' : sweep_schema_blocks(36, 36),
        'dec' : sweep_schema_dec(180),
        }

    sweeps = schemas[ns.schema]

    t0 = time()

    nbricks_tot = np.zeros((), 'i8')
    nobj_tot = np.zeros((), 'i8')

    def work(sweep):
        data, header, nbricks = make_sweep(sweep, bricks, ns)

        header.update({
            'RAMIN'  : sweep[0],
            'DECMIN' : sweep[1],
            'RAMAX'  : sweep[2],
            'DECMAX' : sweep[3],
            })

        template = "sweep-%(ramin)s%(decmin)s-%(ramax)s%(decmax)s.%(format)s"

        def formatdec(dec):
            return ("%+04g" % dec).replace('-', 'm').replace('+', 'p')
        def formatra(ra):
            return ("%03g" % ra)

        for format in ns.format:
            filename = template %  \
                dict(ramin=formatra(sweep[0]),
                     decmin=formatdec(sweep[1]),
                     ramax=formatra(sweep[2]),
                     decmax=formatdec(sweep[3]),
                     format=format)

            if len(data) > 0:
                save_sweep_file(os.path.join(ns.dest, filename),
                    data, header, format)

        return filename, nbricks, len(data)

    def reduce(filename, nbricks, nobj):
        nbricks_tot[...] += nbricks
        nobj_tot[...] += nobj

        if ns.verbose and nobj > 0:
            print (
            '%s : %d bricks %d primary objects, %g bricks / sec %g objs / sec' %
            ( filename, nbricks, nobj,
              nbricks_tot / (time() - t0),
              nobj_tot / (time() - t0),
            )
            )

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        pool.map(work, sweeps, reduce=reduce)

コード例 #5

0

ファイルを表示

def main():
    ns = parse_args()
        
    if ns.ignore_errors:
        print("Warning: *** Will ignore broken tractor catalog files ***")
        print("         *** Disable -I for final data product.         ***")

    bricks = list_bricks(ns)

    # ADM grab a {FIELD: unit} dict from the first Tractor file.
    unitdict = get_units(bricks[0][1])

    tree, nobj, morecols = read_external(ns.external, ns)

    # get the data type of the match
    brickname, path = bricks[0]
    peek = fitsio.read(path, 1, upper=True)
    matched_catalog = sharedmem.empty(nobj, dtype=peek.dtype)
    matched_catalog['OBJID'] = -1

    matched_distance = sharedmem.empty(nobj, dtype='f4')

    # convert to radian
    tol = ns.tolerance / (60. * 60.)  * (np.pi / 180)

    matched_distance[:] = tol
    nprocessed = np.zeros((), dtype='i8')
    nmatched = np.zeros((), dtype='i8')
    ntotal = np.zeros((), dtype='i8')
    t0 = time()

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        def work(brickname, path):
            try:
                objects = fitsio.read(path, 1, upper=True)
            except:
                if ns.ignore_errors:
                    print ("IO Error on %s" %path)
                    return None, None, None
                else:
                    raise
        
            pos = radec2pos(objects['RA'], objects['DEC'])
            d, i = tree.query(pos, 1)
            assert (objects['OBJID'] != -1).all()
            with pool.critical:
                mask = d < matched_distance[i]
                mask &= objects['BRICK_PRIMARY'] 
                i = i[mask]
                matched_catalog[i] = objects[mask][list(matched_catalog.dtype.names)]
                matched_distance[i] = d[mask]
            matched = mask.sum()

            return brickname, matched, len(objects)

        def reduce(brickname, matched, total):
            if brickname is None:
                return
            nprocessed[...] += 1
            nmatched[...] += matched
            ntotal[...] += total
            if ns.verbose:
                if nprocessed % 1000 == 0:
                    print("Processed %d files, %g / second, matched %d / %d objects."
                        % (nprocessed, nprocessed / (time() - t0), nmatched, ntotal)
                        )

        pool.map(work, bricks, star=True, reduce=reduce)

        nrealmatched = (matched_catalog['OBJID'] != -1).sum()
        if ns.verbose:
            print("Processed %d files, %g / second, matched %d / %d objects into %d slots."
                % (nprocessed, nprocessed / (time() - t0), 
                    nmatched, ntotal, 
                    nrealmatched)
                )

        try:
            os.makedirs(os.path.dirname(ns.dest))
        except OSError:
            pass
        
        hdr = fitsio.FITSHDR()
        hdr.add_record(dict(name='NMATCHED', value=nrealmatched,
                            comment='Number of unique matches.'))
        hdr.add_record(dict(name='NCOLL', value=nmatched - nrealmatched,
                            comment='Total number of matches.'))
        hdr.add_record(dict(name='NCOLL', value=nrealmatched,
                            comment='Total number of matches.'))
        hdr.add_record(dict(name='RADIUS', value=ns.tolerance,
                            comment='Search radius (arcsec).'))
        value = ns.external
        if len(value) > 68:
            hdr.add_record(dict(name='EXTERNAL', value=value[:67]+'&'))
            while len(value):
                value = value[67:]
                if len(value) == 0:
                    break
                hdr.add_record(dict(name='CONTINUE', value="  '%s%s'" % (
                    value[:67], '&' if len(value) > 67 else '')))
            added_long = True
        else:
            added_long = False

        if added_long:
            hdr.add_record(dict(name='LONGSTRN', value='OGIP 1.0',
                                comment='CONTINUE cards are used'))

        # Optionally add the new columns
        if len(morecols) > 0:
            newdtype = matched_catalog.dtype.descr
    
            for coldata, col in zip( morecols, ns.copycols ):
                newdtype = newdtype + [(col, coldata.dtype)]
            newdtype = np.dtype(newdtype)
        
            _matched_catalog = np.empty(matched_catalog.shape, dtype=newdtype)
            for field in matched_catalog.dtype.fields:
                _matched_catalog[field] = matched_catalog[field]
            for coldata, col in zip( morecols, ns.copycols ):
                _matched_catalog[col] = coldata
                
            matched_catalog = _matched_catalog.copy()
            del _matched_catalog

        for format in ns.format:
            save_file(ns.dest, matched_catalog, hdr, format, unitdict=unitdict)

コード例 #6

0

ファイルを表示

ファイル: match-external-catalog.py プロジェクト: findaz/legacypipe

def main():
    ns = parse_args()
        
    if ns.ignore_errors:
        print("Warning: *** Will ignore broken tractor catalog files ***")
        print("         *** Disable -I for final data product.         ***")

    bricks = list_bricks(ns)

    tree, nobj, morecols = read_external(ns.external, ns)

    # get the data type of the match
    brickname, path = bricks[0]
    peek = fitsio.read(path, 1, upper=True)
    matched_catalog = sharedmem.empty(nobj, dtype=peek.dtype)
    matched_catalog['OBJID'] = -1

    matched_distance = sharedmem.empty(nobj, dtype='f4')

    # convert to radian
    tol = ns.tolerance / (60. * 60.)  * (np.pi / 180)

    matched_distance[:] = tol
    nprocessed = np.zeros((), dtype='i8')
    nmatched = np.zeros((), dtype='i8')
    ntotal = np.zeros((), dtype='i8')
    t0 = time()

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        def work(brickname, path):
            try:
                objects = fitsio.read(path, 1, upper=True)
            except:
                if ns.ignore_errors:
                    print ("IO Error on %s" %path)
                    return None, None, None
                else:
                    raise
        
            pos = radec2pos(objects['RA'], objects['DEC'])
            d, i = tree.query(pos, 1)
            assert (objects['OBJID'] != -1).all()
            with pool.critical:
                mask = d < matched_distance[i]
                mask &= objects['BRICK_PRIMARY'] 
                i = i[mask]
                matched_catalog[i] = objects[mask]
                matched_distance[i] = d[mask]
            matched = mask.sum()

            return brickname, matched, len(objects)

        def reduce(brickname, matched, total):
            if brickname is None:
                return
            nprocessed[...] += 1
            nmatched[...] += matched
            ntotal[...] += total
            if ns.verbose:
                if nprocessed % 1000 == 0:
                    print("Processed %d files, %g / second, matched %d / %d objects."
                        % (nprocessed, nprocessed / (time() - t0), nmatched, ntotal)
                        )

        pool.map(work, bricks, star=True, reduce=reduce)

        nrealmatched = (matched_catalog['OBJID'] != -1).sum()
        if ns.verbose:
            print("Processed %d files, %g / second, matched %d / %d objects into %d slots."
                % (nprocessed, nprocessed / (time() - t0), 
                    nmatched, ntotal, 
                    nrealmatched)
                )

        try:
            os.makedirs(os.path.dirname(ns.dest))
        except OSError:
            pass
        header = {}

        header['NMATCHED'] = nrealmatched
        header['NCOLLISION'] = nmatched - nrealmatched
        header['TOL_ARCSEC'] = ns.tolerance

        # Optionally add the new columns
        if len(morecols) > 0:
            newdtype = matched_catalog.dtype.descr
    
            for coldata, col in zip( morecols, ns.copycols ):
                newdtype = newdtype + [(col, coldata.dtype)]
            newdtype = np.dtype(newdtype)
        
            _matched_catalog = np.empty(matched_catalog.shape, dtype=newdtype)
            for field in matched_catalog.dtype.fields:
                _matched_catalog[field] = matched_catalog[field]
            for coldata, col in zip( morecols, ns.copycols ):
                _matched_catalog[col] = coldata
                
            matched_catalog = _matched_catalog.copy()
            del _matched_catalog

        for format in ns.format:
            save_file(ns.dest, matched_catalog, header, format)

コード例 #7

0

ファイルを表示

def main():
    ns = parse_args()
        
    if ns.ignore_errors:
        print("Warning: *** Will ignore broken tractor catalog files ***")
        print("         *** Disable -I for final data product.         ***")

    bricks = list_bricks(ns)

    # ADM grab a {FIELD: unit} dict from the first Tractor file.
    unitdict = get_units(bricks[0][1])

    # convert to radian
    tol = ns.tolerance / (60. * 60.)  * (np.pi / 180)

    tree, nobj, morecols, maxdups = read_external(ns.external, tol, ns)

    # get the data type of the match
    brickname, path = bricks[0]
    peek = fitsio.read(path, 1, upper=True)
    matched_catalog = sharedmem.empty(nobj, dtype=peek.dtype)
    matched_catalog['OBJID'] = -1

    matched_distance = sharedmem.empty(nobj, dtype='f4')

    matched_distance[:] = tol
    nprocessed = np.zeros((), dtype='i8')
    nmatched = np.zeros((), dtype='i8')
    ntotal = np.zeros((), dtype='i8')
    t0 = time()

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        def work(brickname, path):
            try:
                objects = fitsio.read(path, 1, upper=True)
            except:
                if ns.ignore_errors:
                    print ("IO Error on %s" %path)
                    return None, None, None
                else:
                    raise

            # ADM limit to just PRIMARY objects from imaging.
            bp = objects["BRICK_PRIMARY"]
            objects = objects[bp]
            pos = radec2pos(objects['RA'], objects['DEC'])

            # ADM query tree allowing duplicates.
            dd, ii = tree.query(pos, maxdups, distance_upper_bound=tol)

            # ADM collect relevant information (retaining duplicates).
            _s = ii[dd < tol]           # ADM the spec object indices.
            _p = np.where(dd < tol)[0]  # ADM the imaging object indices.
            _d = dd[dd < tol]           # ADM the matching distances.

            # ADM bail if there are no matches.
            if len(_s) == 0:
                return brickname, 0, len(objects)

            # ADM look-up dictionaries of the relevant distances and
            # ADM imaging object indices for each spec object index.
            ddict, pdict = {s: [] for s in _s}, {s: [] for s in _s}
            _ = [ddict[s].append(d) for s, d in zip(_s, _d)]
            _ = [pdict[s].append(p) for s, p in zip(_s, _p)]

            # ADM collapse the lookup dict based on minimum distances.
            sdp = [[s, d[np.argmin(d)], p[np.argmin(d)]] for s, d, p in
                   zip(ddict.keys(), ddict.values(), pdict.values())]

            # ADM we're left with the spectroscopic and photometric indexes
            # ADM distances and indexes contingent on the minimum distances.
            i = np.array(sdp, dtype='i4')[:,0]
            d = np.array(sdp, dtype='f4')[:,1]
            iphot = np.array(sdp, dtype='i4')[:,2]

            assert (objects['OBJID'] != -1).all()
            with pool.critical:
                mask = d < matched_distance[i]
                i = i[mask]
                iphot = iphot[mask]
                matched_catalog[i] = objects[iphot][list(matched_catalog.dtype.names)]
                matched_distance[i] = d[mask]
            matched = mask.sum()

            return brickname, matched, len(objects)

        def reduce(brickname, matched, total):
            if brickname is None:
                return
            nprocessed[...] += 1
            nmatched[...] += matched
            ntotal[...] += total
            if ns.verbose:
                if nprocessed % 1000 == 0:
                    print("Processed %d files, %g / second, matched %d / %d brick primary objects."
                        % (nprocessed, nprocessed / (time() - t0), nmatched, ntotal)
                        )

        pool.map(work, bricks, star=True, reduce=reduce)

        nrealmatched = (matched_catalog['OBJID'] != -1).sum()
        if ns.verbose:
            print("Processed %d files, %g / second, matched %d / %d objects into %d slots."
                % (nprocessed, nprocessed / (time() - t0), 
                    nmatched, ntotal, 
                    nrealmatched)
                )

        try:
            os.makedirs(os.path.dirname(ns.dest))
        except OSError:
            pass
        
        hdr = fitsio.FITSHDR()
        hdr.add_record(dict(name='NMATCHED', value=nrealmatched,
                            comment='Number of unique matches.'))
        hdr.add_record(dict(name='NCOLL', value=nmatched - nrealmatched,
                            comment='Total number of matches.'))
        hdr.add_record(dict(name='NCOLL', value=nrealmatched,
                            comment='Total number of matches.'))
        hdr.add_record(dict(name='RADIUS', value=ns.tolerance,
                            comment='Search radius (arcsec).'))
        value = ns.external
        if len(value) > 68:
            hdr.add_record(dict(name='EXTERNAL', value=value[:67]+'&'))
            while len(value):
                value = value[67:]
                if len(value) == 0:
                    break
                hdr.add_record(dict(name='CONTINUE', value="  '%s%s'" % (
                    value[:67], '&' if len(value) > 67 else '')))
            added_long = True
        else:
            added_long = False

        if added_long:
            hdr.add_record(dict(name='LONGSTRN', value='OGIP 1.0',
                                comment='CONTINUE cards are used'))

        # Optionally add the new columns
        if len(morecols) > 0:
            newdtype = matched_catalog.dtype.descr
    
            for coldata, col in zip( morecols, ns.copycols ):
                newdtype = newdtype + [(col, coldata.dtype)]
            newdtype = np.dtype(newdtype)
        
            _matched_catalog = np.empty(matched_catalog.shape, dtype=newdtype)
            for field in matched_catalog.dtype.fields:
                _matched_catalog[field] = matched_catalog[field]
            for coldata, col in zip( morecols, ns.copycols ):
                _matched_catalog[col] = coldata
                
            matched_catalog = _matched_catalog.copy()
            del _matched_catalog

        for format in ns.format:
            save_file(ns.dest, matched_catalog, hdr, format, unitdict=unitdict)

コード例 #8

0

ファイルを表示

def main():
    ns = parse_args()
    if ns.ignore_errors:
        print("Warning: *** Will ignore broken tractor catalogue files ***")
        print("         *** Disable -I for final data product.         ***")
    # avoid each subprocess importing h5py again and again.
    if 'hdf5' in ns.format:
        import h5py

    # this may take a while on a file system with slow meta-data
    # access
    # bricks = [(name, filepath, region), ...]
    bricks = list_bricks(ns)

    # ADM get a {FIELD: unit} dictionary from one of the Tractor files.
    fn = bricks[0][1]
    unitdict = get_units(fn)
    # ADM read in a small amount of information from one of the Tractor
    # ADM files to establish the full dtype.
    testdata = fitsio.read(fn, rows=[0], upper=True)
    ALL_DTYPE = testdata.dtype

    t0 = time()

    for odn in outdirnames:
        try:
            os.makedirs(os.path.join(ns.dest, odn))
        except OSError:
            pass

    # blocks or ra stripes?
    schemas = {
        'ra': sweep_schema_ra(360),
        'blocks': sweep_schema_blocks(36, 36),
        'dec': sweep_schema_dec(180),
    }

    sweeps = schemas[ns.schema]

    t0 = time()

    nbricks_tot = np.zeros((), 'i8')
    nobj_tot = np.zeros((), 'i8')

    def work(sweep):
        data, header, nbricks = make_sweep(sweep,
                                           bricks,
                                           ns,
                                           ALL_DTYPE=ALL_DTYPE)

        header.update({
            'RAMIN': sweep[0],
            'DECMIN': sweep[1],
            'RAMAX': sweep[2],
            'DECMAX': sweep[3],
        })

        template = "sweep-%(ramin)s%(decmin)s-%(ramax)s%(decmax)s.%(format)s"

        def formatdec(dec):
            return ("%+04g" % dec).replace('-', 'm').replace('+', 'p')

        def formatra(ra):
            return ("%03g" % ra)

        for format in ns.format:
            filename = template %  \
                dict(ramin=formatra(sweep[0]),
                     decmin=formatdec(sweep[1]),
                     ramax=formatra(sweep[2]),
                     decmax=formatdec(sweep[3]),
                     format=format)

            if len(data) > 0:
                # ADM the columns to always include to form a unique ID.
                uniqid = [
                    dt for dt in SWEEP_DTYPE.descr if dt[0] == "RELEASE"
                    or dt[0] == "BRICKID" or dt[0] == "OBJID"
                ]
                # ADM write out separate sweeps for:
                # ADM    the SWEEP_DTYPE columns (without light-curves).
                sweepdt = [dt for dt in SWEEP_DTYPE.descr if 'LC' not in dt[0]]
                # ADM    the SWEEP_DTYPE columns (just light-curves).
                lcdt = uniqid + [
                    dt for dt in SWEEP_DTYPE.descr if 'LC' in dt[0]
                ]
                # ADM    the remaining "extra" columns.
                alldt = uniqid + [
                    dt
                    for dt in ALL_DTYPE.descr if dt[0] not in SWEEP_DTYPE.names
                ]
                ender = [".fits", "-lc.fits", "-ex.fits"]
                for dt, odn, end in zip([sweepdt, lcdt, alldt], outdirnames,
                                        ender):
                    fn = filename.replace(".fits", end)
                    dest = os.path.join(ns.dest, odn, fn)
                    if len(dt) > 0:
                        newdata = np.empty(len(data), dtype=dt)
                        for col in newdata.dtype.names:
                            newdata[col] = data[col]
                        save_sweep_file(dest,
                                        newdata,
                                        header,
                                        format,
                                        unitdict=unitdict)

        return filename, nbricks, len(data)

    def reduce(filename, nbricks, nobj):
        nbricks_tot[...] += nbricks
        nobj_tot[...] += nobj

        if ns.verbose and nobj > 0:
            print(
                '%s : %d bricks %d primary objects, %g bricks / sec %g objs / sec'
                % (
                    filename,
                    nbricks,
                    nobj,
                    nbricks_tot / (time() - t0),
                    nobj_tot / (time() - t0),
                ))

    with sharedmem.MapReduce(np=ns.numproc) as pool:
        pool.map(work, sweeps, reduce=reduce)

コード例 #9

0

ファイルを表示

ファイル: match-boss-catalogue.py プロジェクト: djschlegel/legacypipe

def main():
    ns = parse_args()
        
    bricks = list_bricks(ns)

    tree, boss = read_boss(ns.boss, ns)

    # get the data type of the match
    brickname, path = bricks[0]
    peek = fitsio.read(path, 1, upper=True)
    matched_catalogue = sharedmem.empty(len(boss), dtype=peek.dtype)

    matched_catalogue['OBJID'] = -1
    matched_distance = sharedmem.empty(len(boss), dtype='f4')

    # convert to radian
    tol = ns.tolerance / (60. * 60.)  * (np.pi / 180)

    matched_distance[:] = tol
    nprocessed = np.zeros((), dtype='i8')
    nmatched = np.zeros((), dtype='i8')
    ntotal = np.zeros((), dtype='i8')
    t0 = time()


    with sharedmem.MapReduce(np=ns.numproc) as pool:
        def work(brickname, path):
            objects = fitsio.read(path, 1, upper=True)
            pos = radec2pos(objects['RA'], objects['DEC'])
            d, i = tree.query(pos, 1)
            assert (objects['OBJID'] != -1).all()
            with pool.critical:
                mask = d < matched_distance[i]
                mask &= objects['BRICK_PRIMARY'] 
                i = i[mask]
                matched_catalogue[i] = objects[mask]
                matched_distance[i] = d[mask]
            matched = mask.sum()

            return brickname, matched, len(objects)

        def reduce(brickname, matched, total):
            nprocessed[...] += 1
            nmatched[...] += matched
            ntotal[...] += total
            if ns.verbose:
                if nprocessed % 50 == 0:
                    print("Processed %d files, %g / second, matched %d / %d objects."
                        % (nprocessed, nprocessed / (time() - t0), nmatched, ntotal)
                        )

        pool.map(work, bricks, star=True, reduce=reduce)

        nrealmatched = (matched_catalogue['OBJID'] != -1).sum()
        if ns.verbose:
            print("Processed %d files, %g / second, matched %d / %d objects into %d slots."
                % (nprocessed, nprocessed / (time() - t0), 
                    nmatched, ntotal, 
                    nrealmatched)
                )

        try:
            os.makedirs(os.path.dirname(ns.dest))
        except OSError:
            pass
        header = {}

        header['NMATCHED'] = nrealmatched
        header['NCOLLISION'] = nmatched - nrealmatched
        header['TOL_ARCSEC'] = ns.tolerance

        for format in ns.format:
            save_file(ns.dest, matched_catalogue, header, format)