def allocate_chunks(results_dir,
                    trial_no,
                    rgs_selected=None,
                    respect_rungroup_barriers=True,
                    runs_selected=None,
                    stripe=False,
                    max_size=1000,
                    integrated=False):
    refl_ending = "_integrated.pickle" if integrated else "_indexed.pickle"
    expt_ending = "_refined_experiments.json"
    trial = "%03d" % trial_no
    print "processing trial %s" % trial
    if rgs_selected:
        rg_condition = lambda rg: rg in rgs_selected
    else:
        rg_condition = lambda rg: True
    rgs = {}  # rungroups and associated runs
    for run in os.listdir(results_dir):
        if not (run.startswith("r") and run.split("r")[1].isdigit()):
            continue
        if runs_selected and run not in runs_selected:
            continue
        trgs = [
            trg for trg in os.listdir(os.path.join(results_dir, run))
            if (trg[:6] == trial + "_rg") and rg_condition(trg[-5:])
        ]
        if not trgs:
            continue
        rungroups = set(map(lambda n: n.split("_")[1], trgs))
        for rg in rungroups:
            if rg not in rgs.keys():
                rgs[rg] = [run]
            else:
                rgs[rg].append(run)
    batch_chunk_nums_sizes = {}
    batch_contents = {}
    if respect_rungroup_barriers:
        batchable = {rg: {rg: runs} for rg, runs in rgs.iteritems()}
    else:
        batchable = {"all": rgs}
    # for either grouping, iterate over the top level keys in batchable and
    # distribute the events within those "batches" in stripes or chunks
    for batch, rungroups in batchable.iteritems():
        rg_by_run = {}
        for rungroup, runs in rungroups.iteritems():
            for run in runs:
                rg_by_run[run] = rungroup
        n_img = 0
        batch_contents[batch] = []
        for run, rg in rg_by_run.iteritems():
            try:
                trg = trial + "_" + rg
                contents = sorted(
                    os.listdir(os.path.join(results_dir, run, trg, "out")))
            except OSError:
                print "skipping run %s missing out directory" % run
                continue
            abs_contents = [
                os.path.join(results_dir, run, trg, "out", c) for c in contents
            ]
            batch_contents[batch].extend(abs_contents)
            expts = [c for c in contents if c.endswith(expt_ending)]
            n_img += len(expts)
        if n_img == 0:
            print "no images found for %s" % batch
            del batch_contents[batch]
            continue
        n_chunks = int(math.ceil(n_img / max_size))
        chunk_size = int(math.ceil(n_img / n_chunks))
        batch_chunk_nums_sizes[batch] = (n_chunks, chunk_size)
    if len(batch_contents) == 0:
        raise Sorry, "no DIALS integration results found."
    batch_chunks = {}
    for batch, num_size_tuple in batch_chunk_nums_sizes.iteritems():
        num, size = num_size_tuple
        batch_chunks[batch] = []
        contents = batch_contents[batch]
        expts = [c for c in contents if c.endswith(expt_ending)]
        refls = [c for c in contents if c.endswith(refl_ending)]
        expts, refls = match_dials_files(expts, refls, expt_ending,
                                         refl_ending)
        if stripe:
            for i in xrange(num):
                expts_stripe = expts[i::num]
                refls_stripe = refls[i::num]
                batch_chunks[batch].append((expts_stripe, refls_stripe))
            print "striped %d experiments in %s with %d experiments per stripe and %d stripes" % \
              (len(expts), batch, len(batch_chunks[batch][0][0]), len(batch_chunks[batch]))
        else:
            for i in xrange(num):
                expts_chunk = expts[i * size:(i + 1) * size]
                refls_chunk = refls[i * size:(i + 1) * size]
                batch_chunks[batch].append((expts_chunk, refls_chunk))
            print "chunked %d experiments in %s with %d experiments per chunk and %d chunks" % \
              (len(expts), batch, len(batch_chunks[batch][0][0]), len(batch_chunks[batch]))
    return batch_chunks
Beispiel #2
0
def allocate_chunks_per_rungroup(results_dir,
                                 trial_no,
                                 rgs_selected=None,
                                 runs_selected=None,
                                 stripe=False,
                                 max_size=1000,
                                 integrated=False):
    refl_ending = "_integrated.pickle" if integrated else "_indexed.pickle"
    expt_ending = "_refined_experiments.json"
    trial = "%03d" % trial_no
    print "processing trial %s" % trial
    if rgs_selected:
        rg_condition = lambda rg: rg in rgs_selected
    else:
        rg_condition = lambda rg: True
    rgs = {}  # rungroups and associated runs
    for run in os.listdir(results_dir):
        if not (run.startswith("r") and run.split("r")[1].isdigit()):
            continue
        if runs_selected and run not in runs_selected:
            continue
        trgs = [
            trg for trg in os.listdir(os.path.join(results_dir, run))
            if (trg[:6] == trial + "_rg") and rg_condition(trg[-5:])
        ]
        if not trgs:
            continue
        rungroups = set(map(lambda n: n.split("_")[1], trgs))
        for rg in rungroups:
            if rg not in rgs.keys():
                rgs[rg] = [run]
            else:
                rgs[rg].append(run)
    rg_ch_nums_sizes = {}
    rg_contents = {}
    for rg, runs in rgs.iteritems():
        n_img = 0
        trg = trial + "_" + rg
        rg_contents[rg] = []
        for run in runs:
            try:
                contents = os.listdir(
                    os.path.join(results_dir, run, trg, "out"))
            except OSError:
                print "skipping run %s missing out directory" % run
                continue
            abs_contents = [
                os.path.join(results_dir, run, trg, "out", c) for c in contents
            ]
            rg_contents[rg].extend(abs_contents)
            expts = [c for c in contents if c.endswith(expt_ending)]
            n_img += len(expts)
        if n_img == 0:
            print "no images found for %s" % rg
            del rg_contents[rg]
            continue
        n_chunks = int(math.ceil(n_img / max_size))
        chunk_size = int(math.ceil(n_img / n_chunks))
        rg_ch_nums_sizes[rg] = (n_chunks, chunk_size)
    if len(rg_contents) == 0:
        raise Sorry, "no DIALS integration results found."
    rg_chunks = {}
    for rg, nst in rg_ch_nums_sizes.iteritems():
        num, size = nst
        rg_chunks[rg] = []
        contents = rg_contents[rg]
        expts = [c for c in contents if c.endswith(expt_ending)]
        refls = [c for c in contents if c.endswith(refl_ending)]
        expts, refls = match_dials_files(expts, refls, expt_ending,
                                         refl_ending)
        if stripe:
            for i in xrange(num):
                expts_stripe = expts[i::num]
                refls_stripe = refls[i::num]
                rg_chunks[rg].append((expts_stripe, refls_stripe))
            print "striped %d experiments in %s with %d experiments per stripe and %d stripes" % \
              (len(expts), rg, len(rg_chunks[rg][0][0]), len(rg_chunks[rg]))
        else:
            for i in xrange(num):
                expts_chunk = expts[i * size:(i + 1) * size]
                refls_chunk = refls[i * size:(i + 1) * size]
                rg_chunks[rg].append((expts_chunk, refls_chunk))
            print "chunked %d experiments in %s with %d experiments per chunk and %d chunks" % \
              (len(expts), rg, len(rg_chunks[rg][0][0]), len(rg_chunks[rg]))
    return rg_chunks