예제 #1
0
def test_assign_to_nearest_center_few_centers():

    # assign_to_nearest_center takes two code paths, one for
    # n_centers > n_frames and one for n_frames > n_centers. This tests
    # the latter.
    trj = md.load(get_fn('frame0.xtc'), top=get_fn('native.pdb'))
    center_frames = [0, int(len(trj) / 3), int(len(trj) / 2)]

    assigns, distances = util.assign_to_nearest_center(trj, trj[center_frames],
                                                       md.rmsd)

    alldists = np.zeros((len(center_frames), len(trj)))
    for i, center_frame in enumerate(trj[center_frames]):
        alldists[i] = md.rmsd(trj, center_frame)

    assert_allclose(np.min(alldists, axis=0), distances, atol=1e-3)
    assert_array_equal(np.argmin(alldists, axis=0), assigns)
예제 #2
0
def batch_reassign(targets, centers, lengths, frac_mem, n_procs=None):

    example_center = centers[0]

    DTYPE_BYTES = 4
    batch_size, batch_gb = determine_batch_size(
        example_center.n_atoms, DTYPE_BYTES, frac_mem)

    logger.info(
        'Batch max size set to %s frames (~%.2f GB, %.1f%% of total RAM).' %
        (batch_size, batch_gb, frac_mem*100))

    if batch_size < max(lengths):
        raise enspara.exception.ImproperlyConfigured(
            'Batch size of %s was smaller than largest file (size %s).' %
            (batch_size, max(lengths)))

    batches = compute_batches(lengths, batch_size)

    assignments = []
    distances = []

    for i, batch_indices in enumerate(batches):
        tick = time.perf_counter()
        logger.info("Starting batch %s of %s", i+1, len(batches))
        batch_targets = [targets[i] for i in batch_indices]

        with timed("Loaded frames for batch in %.1f seconds", logger.info):
            batch_lengths, xyz = load_as_concatenated(
                [tfile for tfile, top, aids in batch_targets],
                lengths=[lengths[i] for i in batch_indices],
                args=[{'top': top, 'atom_indices': aids}
                      for t, top, aids in batch_targets],
                processes=n_procs)

        # mdtraj loads as float32, and load_as_concatenated should thus
        # also load as float32. This should _never_ be hit, but there might be
        # some platform-specific situation where double != float64?
        assert xyz.dtype.itemsize == DTYPE_BYTES

        trj = md.Trajectory(xyz, topology=example_center.top)

        with timed("Precentered trajectories in %.1f seconds", logger.debug):
            trj.center_coordinates()

        with timed("Assigned trajectories in %.1f seconds", logger.debug):
            batch_assignments, batch_distances = assign_to_nearest_center(
                    trj, centers, partial(md.rmsd, precentered=True))

        # clear memory of xyz and trj to allow cleanup to deallocate
        # these large arrays; may help with memory high-water mark
        with timed("Cleared array from memory in %.1f seconds", logger.debug):
            xyz_size = xyz.size
            del trj, xyz

        assignments.extend(partition_list(batch_assignments, batch_lengths))
        distances.extend(partition_list(batch_distances, batch_lengths))

        logger.info(
            "Finished batch %s of %s in %.1f seconds. Coordinates array had "
            "memory footprint of %.2f GB (of memory high-water mark %.2f/%.2f "
            "GB).",
            i, len(batches), time.perf_counter() - tick,
            xyz_size * DTYPE_BYTES / 1024**3,
            resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024**2,
            psutil.virtual_memory().total / 1024**3)

    return assignments, distances