コード例 #1
0
ファイル: test_sub.py プロジェクト: zuds-survey/zuds-pipeline
def test_multi_epoch_sub(sci_image_data_20200601, sci_image_data_20200604,
                         refimg_data_first2_imgs):

    outdir = os.path.dirname(sci_image_data_20200601.local_path)
    outname = os.path.join(outdir, f'{uuid.uuid4().hex}.fits')

    coadd = zuds.ScienceCoadd.from_images([sci_image_data_20200604,
                                           sci_image_data_20200601],
                                          outname)
    coadd.binleft = '2020-06-01'
    coadd.binright = '2020-06-04'

    zuds.DBSession().add(coadd)

    for a in coadd.input_images:
        se_sub = zuds.SingleEpochSubtraction.from_images(
            a, refimg_data_first2_imgs
        )

        zuds.DBSession().add(se_sub)

    final = zuds.MultiEpochSubtraction.from_images(
        coadd, refimg_data_first2_imgs, force_map_subs=False,
    )
    zuds.DBSession().add(final)
    zuds.DBSession().commit()
コード例 #2
0
def test_stack_input_images(sci_image_data_20200531, sci_image_data_20200601):
    images = [sci_image_data_20200531, sci_image_data_20200601]
    outdir = os.path.dirname(images[0].local_path)
    outname = os.path.join(outdir, f'{uuid.uuid4().hex}.fits')
    stack = zuds.ReferenceImage.from_images(images, outname)
    zuds.DBSession().add(stack)
    zuds.DBSession().commit()
    assert len(stack.input_images) == 2
    assert sci_image_data_20200601 in stack.input_images
    assert sci_image_data_20200531 in stack.input_images
コード例 #3
0
ファイル: test_db.py プロジェクト: zuds-survey/zuds-pipeline
def test_science_image_modified(science_image):
    db = zuds.DBSession()
    science_image.seeing = 2.3
    db.add(science_image)
    db.commit()
    modified = science_image.modified
    science_image.basename = 'abcd'
    db.add(science_image)
    db.commit()
    new_modified = science_image.modified
    assert new_modified > modified
コード例 #4
0
ファイル: dophot.py プロジェクト: zuds-survey/zuds-pipeline
def unphotometered_sources(image_id, footprint):

    poly = array(tuple(footprint.ravel()))

    jcond2 = sa.and_(zuds.ForcedPhotometry.image_id == image_id,
                     zuds.ForcedPhotometry.source_id == zuds.Source.id)

    query = zuds.DBSession().query(
        zuds.Source.id, zuds.Source.ra, zuds.Source.dec).outerjoin(
            zuds.ForcedPhotometry,
            jcond2).filter(zuds.ForcedPhotometry.id == None).filter(
                sa.func.q3c_poly_query(zuds.Source.ra, zuds.Source.dec, poly))

    return query.all()
コード例 #5
0
        fn = f'/global/cfs/cdirs/m937/www/data/scratch/{s.field:06d}/' \
             f'c{s.ccdid:02d}/q{s.qid}/{zuds.fid_map[s.fid]}/{s.basename}'

        shutil.copy(inpt, fn)
        shutil.copy(inpt.replace('sciimg', 'mskimg'),
                    fn.replace('sciimg', 'mskimg'))

        # commits
        try:
            detections, sub = dosub.do_one(fn,
                                           sciclass,
                                           subclass,
                                           refvers,
                                           tmpdir='tmp')
        except (dosub.TooManyDetectionsError, OSError, ValueError) as e:
            zuds.DBSession().rollback()
            print(f'Error: too many detections on {fn} sub')
            sci = zuds.ScienceImage.get_by_basename(os.path.basename(fn))
            ref = zuds.DBSession().query(zuds.ReferenceImage).filter(
                zuds.ReferenceImage.field == sci.field,
                zuds.ReferenceImage.ccdid == sci.ccdid,
                zuds.ReferenceImage.qid == sci.qid,
                zuds.ReferenceImage.fid == sci.fid,
                zuds.ReferenceImage.version == refvers).first()
            blocker = zuds.FailedSubtraction(target_image=sci,
                                             reference_image=ref,
                                             reason=str(e))
            zuds.DBSession().add(blocker)
            zuds.DBSession().commit()
            continue
        except dosub.PredecessorError as e:
コード例 #6
0
zuds.init_db()
# db.DBSession().autoflush = False
# db.DBSession().get_bind().echo = True

__author__ = 'Danny Goldstein <*****@*****.**>'
__whatami__ = 'Make the subtractions for ZUDS.'

infile = sys.argv[1]  # file listing all the subs to do photometry on

my_work = zuds.get_my_share_of_work(infile)

alerts = []
for detid in my_work:
    start = time.time()
    d = zuds.DBSession().query(zuds.Detection).get(int(detid))
    if d.alert is not None:
        alert = d.alert
    else:
        alert = zuds.Alert.from_detection(d)
        zuds.DBSession().add(alert)
        zuds.DBSession().commit()
    stop = time.time()
    print(f'made alert for {detid} ({d.source.id}) in {stop-start:.2f} sec',
          flush=True)
    if not alert.sent:
        zuds.send_alert(alert)
        alert.sent = True
        print(
            f'sent alert for {alert.detection.id} ({alert.detection.source.id})',
            flush=True)
コード例 #7
0
import sys
import time
import zuds
zuds.init_db()

zuds.init_db()
zuds.DBSession().get_bind().echo = True

__author__ = 'Danny Goldstein <*****@*****.**>'
__whatami__ = 'Make the references for ZUDS.'

infile = sys.argv[1]  # file listing all the images to make subtractions of
# get the work
sources = zuds.get_my_share_of_work(infile)

for source_id in sources:

    tstart = time.time()
    sstart = time.time()
    source = zuds.DBSession().query(zuds.Source).get(source_id)

    bestdet = source.best_detection
    best_sub = bestdet.image
    best_new = best_sub.target_image
    best_ref = best_sub.reference_image

    field = f'{best_sub.field:06d}'
    ccdid = f'c{best_sub.ccdid:02d}'
    qid = f'q{best_sub.qid}'
    fid = f'{zuds.fid_map[best_sub.fid]}'
コード例 #8
0
ファイル: dosub.py プロジェクト: dannygoldstein/zuds-pipeline
def do_one(fn, sciclass, subclass, refvers, tmpdir='/tmp'):
    tstart = time.time()

    sstart = time.time()
    sci = sciclass.get_by_basename(os.path.basename(fn))
    sci.map_to_local_file(fn)
    maskname = os.path.join(os.path.dirname(fn), sci.mask_image.basename)
    sci.mask_image.map_to_local_file(maskname)

    weightname = fn.replace('.fits', '.weight.fits')
    rmsname = fn.replace('.fits', '.rms.fits')
    if os.path.exists(weightname):
        sci._weightimg = zuds.FITSImage.from_file(weightname)
    elif os.path.exists(rmsname):
        sci._rmsimg = zuds.FITSImage.from_file(rmsname)
    else:
        if sciclass == zuds.ScienceImage:
            # use sextractor to make the science image
            _ = sci.rms_image
        else:
            raise RuntimeError(f'Cannot produce a subtraction for {fn},'
                               f' the image has no weightmap or rms map.')

    sstop = time.time()
    print(f'sci: {sstop-sstart:.2f} sec to load  {sci.basename}', flush=True)

    field = f'{sci.field:06d}'
    ccdid = f'c{sci.ccdid:02d}'
    qid = f'q{sci.qid}'
    fid = f'{zuds.fid_map[sci.fid]}'
    refname = f'/global/cfs/cdirs/m937/www/data/scratch/{field}/{ccdid}/{qid}/' \
              f'{fid}/ref.{field}_{ccdid}_{qid}_{fid}.{refvers}.fits'

    if not (zuds.ReferenceImage.get_by_basename(os.path.basename(refname))
            and os.path.exists(refname)):
        zuds.DBSession().rollback()
        raise RuntimeError(f'Ref {refname} does not exist. Skipping...')

    rstart = time.time()

    ref = zuds.ReferenceImage.get_by_basename(os.path.basename(refname))
    ref.map_to_local_file(refname)
    ref.mask_image.map_to_local_file(refname.replace('.fits', '.mask.fits'))
    ref._weightimg = zuds.FITSImage.from_file(
        refname.replace('.fits', '.weight.fits'))
    rstop = time.time()

    print(f'ref: {rstop-rstart:.2f} sec to load ref for {sci.basename}',
          flush=True)

    basename = zuds.sub_name(sci.basename, ref.basename)

    prev = subclass.get_by_basename(basename)
    #prev=None

    #if (prev is not None) and (prev.modified is not None) and \
    #   (prev.modified > datetime.now() - timedelta(hours=24)):
    #    db.DBSession().rollback()
    #    continue

    if prev is not None:
        raise PredecessorError(f'{basename} already has a predecessor')

    substart = time.time()
    sub = subclass.from_images(sci,
                               ref,
                               data_product=False,
                               tmpdir=tmpdir,
                               refined=True)

    substop = time.time()
    print(f'sub: {substop-substart:.2f} sec to make {sub.basename}',
          flush=True)

    catstart = time.time()
    cat = zuds.PipelineFITSCatalog.from_image(sub)

    catstop = time.time()
    print(
        f'cat: {catstop-catstart:.2f} sec to make catalog for {sub.basename}',
        flush=True)

    dstart = time.time()
    detections = zuds.Detection.from_catalog(cat, filter=True)

    if len(detections) > MAX_DETS:
        raise TooManyDetectionsError(
            f'Error: {len(detections)} detections (>{MAX_DETS}) '
            f'on "{sub.basename}", something wrong with the image probably')

    dstop = time.time()
    print(f'det: {dstop-dstart:.2f} sec to make detections for {sub.basename}',
          flush=True)

    stampstart = time.time()

    if isinstance(sub, zuds.SingleEpochSubtraction):
        sub_target = sub.aligned_to(sub.reference_image)
    else:
        sub_target = sub
    if isinstance(sub.target_image, zuds.ScienceImage):
        new_target = sub.target_image.aligned_to(sub.reference_image)
    else:
        new_target = sub.target_image
    stamps = []
    for detection in detections:
        for i in [sub_target, new_target, sub.reference_image]:
            # make a stamp for the first detection
            stamp = zuds.Thumbnail.from_detection(detection, i)
            stamps.append(stamp)

    archstart = time.time()
    #subcopy = db.HTTPArchiveCopy.from_product(sub)
    #catcopy = db.HTTPArchiveCopy.from_product(cat)
    #mskcopy = db.HTTPArchiveCopy.from_product(sub.mask_image)
    zuds.DBSession().add(sub)
    #db.DBSession().add(cat)
    zuds.DBSession().add_all(detections)
    zuds.DBSession().add_all(stamps)

    #db.DBSession().add(mskcopy)
    #db.DBSession().add(catcopy)
    #db.DBSession().add(subcopy)
    #archive.archive(subcopy)
    #archive.archive(catcopy)
    #archive.archive(mskcopy)
    #db.DBSession().commit()
    archstop = time.time()
    print(
        f'archive: {archstop-archstart:.2f} sec to archive stuff for '
        f'{sub.basename}',
        flush=True)

    cleanstart = time.time()
    sci.unmap()
    cleanstop = time.time()

    tstop = time.time()
    print(
        f'clean: took {cleanstop - cleanstart} sec to clean '
        f'up after {sub.basename}"',
        flush=True)
    print(f'took {tstop - tstart} sec to make "{sub.basename}"', flush=True)

    return detections, sub
コード例 #9
0
ファイル: dosub.py プロジェクト: dannygoldstein/zuds-pipeline
        f'clean: took {cleanstop - cleanstart} sec to clean '
        f'up after {sub.basename}"',
        flush=True)
    print(f'took {tstop - tstart} sec to make "{sub.basename}"', flush=True)

    return detections, sub


if __name__ == '__main__':

    infile = sys.argv[1]  # file listing all the images to make subtractions of
    refvers = sys.argv[2]

    subclass = zuds.MultiEpochSubtraction
    sciclass = zuds.ScienceCoadd

    #subclass = db.SingleEpochSubtraction
    #sciclass = db.ScienceImage

    # get the work
    imgs = zuds.get_my_share_of_work(infile)
    for fn in imgs:
        try:
            detections, sub = do_one(fn, sciclass, subclass, refvers)
        except Exception as e:
            traceback.print_exception(*sys.exc_info())
            zuds.DBSession().rollback()
            continue
        else:
            zuds.DBSession().commit()
コード例 #10
0
# set the stack window size
STACK_WINDOW = 7.  # days
STACK_INTERVAL = timedelta(days=STACK_WINDOW)

# create the date table
gs = sa.func.generate_series
timetype = sa.DateTime(timezone=False)
mindate = sa.cast('2017-01-03', timetype)
maxdate = sa.cast(sa.func.now(), timetype)

lcol = gs(mindate, maxdate - STACK_INTERVAL, STACK_INTERVAL).label('left')

rcol = gs(mindate + STACK_INTERVAL, maxdate, STACK_INTERVAL).label('right')

daterange = zuds.DBSession().query(lcol, rcol).subquery()

target = sa.func.array_agg(zuds.ScienceImage.id).label('target')
stacksize = sa.func.array_length(target, 1).label('stacksize')
stackcond = stacksize >= 2
jcond = sa.and_(zuds.ScienceImage.obsdate > daterange.c.left,
                zuds.ScienceImage.obsdate <= daterange.c.right)

res = zuds.DBSession().query(
    zuds.ScienceImage.field, zuds.ScienceImage.ccdid, zuds.ScienceImage.qid,
    zuds.ScienceImage.fid, daterange.c.left, daterange.c.right,
    target).select_from(
        sa.join(
            zuds.SingleEpochSubtraction, zuds.ScienceImage.__table__,
            zuds.SingleEpochSubtraction.target_image_id == zuds.ScienceImage.id
        ).join(
コード例 #11
0
import time
import zuds


__author__ = 'Danny Goldstein <*****@*****.**>'
__whatami__ = 'Make alerts for ZUDS.'

# get unalerted detections
unalerted = zuds.DBSession().query(
    zuds.Detection
).filter(
    zuds.Detection.source_id != None
).outerjoin(
    zuds.Alert
).filter(
    zuds.Alert.id == None
).all()

print(f'Need to make alerts for {len(unalerted)} detections')

alerts = []
for detection in unalerted:
    tstart = time.time()
    alert = zuds.Alert.from_detection(detection)
    alerts.append(alert)
    tstop = time.time()
    print(f'took {tstop - tstart:.2f} sec to make alert '
          f'for {detection.source_id}')
コード例 #12
0
#db.init_db()
#db.DBSession().get_bind().echo = True

__author__ = 'Danny Goldstein <*****@*****.**>'
__whatami__ = 'Make the references for ZUDS.'

infile = sys.argv[1]  # file listing all the images to make subtractions of
# get the work
jobs = zuds.get_my_share_of_work(infile, reader=pd.read_csv)

for _, job in jobs.iterrows():

    tstart = time.time()
    sstart = time.time()
    images = zuds.DBSession().query(zuds.ZTFFile).filter(
        zuds.ZTFFile.id.in_(eval(job['target']))).all()
    zuds.ensure_images_have_the_same_properties(images, zuds.GROUP_PROPERTIES)

    field = f'{images[0].field:06d}'
    ccdid = f'c{images[0].ccdid:02d}'
    qid = f'q{images[0].qid}'
    fid = f'{zuds.fid_map[images[0].fid]}'

    for image in images:
        path = f'/global/cfs/cdirs/m937/www/data/scratch/{field}/{ccdid}/{qid}/' \
               f'{fid}/{image.basename}'
        image.map_to_local_file(path)
        image.mask_image.map_to_local_file(path.replace('sciimg', 'mskimg'))

    basename = f'{field}_{ccdid}_{qid}_{fid}_{job["left"]}_' \
               f'{job["right"]}.coadd.fits'
コード例 #13
0
ファイル: dophot.py プロジェクト: zuds-survey/zuds-pipeline
    rank = comm.Get_rank()
    size = comm.Get_size()

    # avoid pandas to csv bottleneck using parallelism
    df = pd.DataFrame(output)
    df.to_csv(f'output_{rank:04d}.csv', index=False, header=rank == 0)
    comm.Barrier()

    if rank == 0:
        with open(outfile, 'w') as f:
            for fn in [f'output_{r:04d}.csv' for r in range(size)]:
                if os.path.exists(fn):
                    with open(fn, 'r') as g:
                        f.write(g.read())
                os.remove(fn)

        jobid = os.getenv('SLURM_JOB_ID')
        if jobid is not None:
            job = zuds.DBSession().query(zuds.ForcePhotJob).filter(
                zuds.ForcePhotJob.slurm_id == jobid).first()
            job.status = 'ready_for_loading'
            zuds.DBSession().add(job)
            zuds.DBSession().commit()

else:
    df = pd.DataFrame(output)
    df.to_csv(outfile, index=False)

stop = time.time()
zuds.print_time(start, stop, 0, 'start to finish')
コード例 #14
0
ファイル: makeref.py プロジェクト: zuds-survey/zuds-pipeline
        sci.mask_image.clear()
        sci.mask_image.map_to_local_file(maskname)

        c1 = min_date <= sci.obsdate <= max_date
        c2 = 1.7 < sci.seeing < 2.5
        c3 = 19.2 < sci.maglimit < 22.
        c4 = sci.infobits == 0
        if c1 and c2 and c3 and c4:
            ok.append(sci)

    # get the very best images
    top = sorted(ok, key=lambda i: i.maglimit, reverse=True)[:50]
    if len(top) == 0:
        print(f'Not enough images ({len(top)} < 14) to make reference '
              f'for {d}. Skipping...')
        zuds.DBSession().rollback()
        continue

    coaddname = os.path.join(
        d, f'ref.{ok[0].field:06d}_c{ok[0].ccdid:02d}'
        f'_q{ok[0].qid}_{zuds.fid_map[ok[0].fid]}.{version}.fits')

    if len(top) < 14:
        print(f'Not enough images ({len(top)} < 14) to make reference '
              f'{coaddname}. Skipping...')
        zuds.DBSession().rollback()
        continue

    try:
        coadd = zuds.ReferenceImage.from_images(top,
                                                coaddname,
コード例 #15
0
def retrieve_images(images_or_ids,
                    job_script_destination='.',
                    frame_destination='.',
                    log_destination='.',
                    preserve_dirs=False,
                    n_jobs=14,
                    tape=True,
                    http=True,
                    ipac=True,
                    archive_new=True):
    """Image whereclause should be a clause element on ZTFFile."""

    images_or_ids = np.atleast_1d(images_or_ids)
    ids = [int(i) if not hasattr(i, 'id') else i.id for i in images_or_ids]

    got = []

    if tape:
        jt = sa.join(zuds.ZTFFile, zuds.TapeCopy,
                     zuds.ZTFFile.id == zuds.TapeCopy.product_id)
        full_query = zuds.DBSession().query(
            zuds.ZTFFile, zuds.TapeCopy).select_from(jt).outerjoin(
                zuds.HTTPArchiveCopy,
                zuds.ZTFFile.id == zuds.HTTPArchiveCopy.product_id).filter(
                    zuds.HTTPArchiveCopy.product_id == None)
        full_query = full_query.filter(
            zuds.ZTFFile.id.in_(ids),
            zuds.ZTFFile.fid != 3)  # dont use i-band from tape

        # this is the query to get the image paths
        metatable = pd.read_sql(full_query.statement,
                                zuds.DBSession().get_bind())

        df = metatable[['basename', 'archive_id']]
        df = df.rename({'archive_id': 'tarpath'}, axis='columns')
        tars = df['tarpath'].unique()
        got.extend(metatable['product_id'].tolist())

        # sort tarball retrieval by location on tape
        t = datetime.datetime.utcnow().isoformat().replace(' ', '_')
        hpss_in = Path(job_script_destination) / f'hpss_{t}.in'
        hpss_out = Path(job_script_destination) / f'hpss_{t}.out'

        with open(hpss_in, 'w') as f:
            f.write("\n".join([f'ls -P {tar}' for tar in tars]))
            f.write("\n")  # always end with a \n

        syscall = f'/usr/common/mss/bin/hsi -O {hpss_out} in {hpss_in}'

        # for some reason hsi writes in >> mode, so need to delete the output
        # file if it exists to prevent it from mixing with results of a previous
        # run

        if hpss_out.exists():
            os.remove(hpss_out)

        p = subprocess.Popen(syscall.split(),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)

        while True:
            if p.poll() is not None:
                break
            else:
                time.sleep(0.01)

        retcode = p.returncode
        stderr, stdout = p.stderr, p.stdout

        # 64 means some of the files didnt exist, that's ok
        if retcode not in [0, 64]:
            raise subprocess.CalledProcessError(stderr.read())

        # filter out the lines that dont start with FILE
        with open(hpss_out, 'r') as f:
            lines = [line for line in f.readlines() if line.startswith('FILE')]
        stream = io.StringIO(''.join(lines))

        # read it into pandas
        ordered = pd.read_csv(stream,
                              delim_whitespace=True,
                              names=[
                                  'ignore-2', 'hpsspath', 'ignore-1',
                                  'ignore0', 'position', 'tape', 'ignore1',
                                  'ignore2', 'ignore3', 'ignore4', 'ignore5',
                                  'ignore6', 'ignore7'
                              ])
        ordered['tape'] = [t[:-2] for t in ordered['tape']]
        ordered['position'] = [t.split('+')[0] for t in ordered['position']]
        ordered = ordered.sort_values(['tape', 'position'])

        for column in ordered.copy().columns:
            if column.startswith('ignore'):
                del ordered[column]

        # submit the jobs based on which tape the tar files reside on
        # and in what order they are on the tape

        dependency_dict = {}

        for tape, group in ordered.groupby(
                np.arange(len(ordered)) // (len(ordered) // n_jobs)):

            # get the tarfiles
            tarnames = group['hpsspath'].tolist()
            images = [
                df[df['tarpath'] == tarname]['basename'].tolist()
                for tarname in tarnames
            ]

            jobid = submit_hpss_job(tarnames, images, job_script_destination,
                                    frame_destination, log_destination, tape,
                                    preserve_dirs)

            for image in df[[name in tarnames
                             for name in df['tarpath']]]['basename']:
                dependency_dict[image] = jobid

    if http:

        # now do the ones that are on disk
        jt = sa.join(zuds.ZTFFile, zuds.HTTPArchiveCopy,
                     zuds.ZTFFile.id == zuds.HTTPArchiveCopy.product_id)

        full_query = zuds.DBSession().query(
            zuds.ZTFFile, zuds.HTTPArchiveCopy).select_from(jt)

        full_query = full_query.filter(zuds.ZTFFile.id.in_(ids))

        # this is the query to get the image paths
        metatable2 = pd.read_sql(full_query.statement,
                                 zuds.DBSession().get_bind())
        got.extend(metatable2['product_id'].tolist())

        # copy each image over
        for _, row in metatable2.iterrows():
            path = row['archive_path']
            if preserve_dirs:
                target = Path(frame_destination) / os.path.join(
                    *path.split('/')[-5:])
            else:
                target = Path(frame_destination) / os.path.basename(path)

            if Path(target).absolute() == Path(path).absolute():
                # don't overwrite an already existing file
                continue

            target.parent.mkdir(exist_ok=True, parents=True)
            shutil.copy(path, target)

    # download the remaining images individually from IPAC

    if ipac:
        remaining = [int(i) for i in np.setdiff1d(ids, got)]
        remaining = zuds.DBSession().query(zuds.ZTFFile).filter(
            zuds.ZTFFile.id.in_(remaining)).all()

        cookie = zuds.ipac_authenticate()

        for i in remaining:
            if preserve_dirs:
                destination = Path(frame_destination) / i.relname
            else:
                destination = Path(frame_destination) / i.basename

            suffix = 'mskimg.fits' if isinstance(
                i, zuds.MaskImage) else 'sciimg.fits'

            try:
                if isinstance(i, zuds.MaskImage):
                    i.parent_image.download(suffix=suffix,
                                            destination=destination,
                                            cookie=cookie)
                else:
                    i.download(suffix=suffix,
                               destination=destination,
                               cookie=cookie)
            except requests.RequestException:
                continue

            if archive_new:

                i.map_to_local_file(destination)

                # ensure the image header is written to the DB
                i.load_header()

                acopy = zuds.HTTPArchiveCopy.from_product(i, check=False)
                acopy.put()

                # and archive the file to disk
                zuds.DBSession().add(acopy)
                zuds.DBSession().commit()
コード例 #16
0
ファイル: dothumb.py プロジェクト: zuds-survey/zuds-pipeline
# db.DBSession().autoflush = False
# db.DBSession().get_bind().echo = True

__author__ = 'Danny Goldstein <*****@*****.**>'
__whatami__ = 'Make the subtractions for ZUDS.'

infile = sys.argv[1]  # file listing all the subs to do photometry on

BATCH_SIZE = 50
my_work = zuds.get_my_share_of_work(infile)

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

for thumbids in batch(my_work, n=BATCH_SIZE):
    start = time.time()
    thumbs = zuds.DBSession().query(zuds.Thumbnail).filter(zuds.Thumbnail.id.in_(thumbids.tolist()))
    for t in thumbs:
        t.persist()
    stop = time.time()
    zuds.print_time(start, stop, t, 'get and persist')

    start = time.time()
    zuds.DBSession().commit()
    stop = time.time()
    zuds.print_time(start, stop, t, 'commit')