예제 #1
0
    def teardown_poller(self):
        """ Teardown poller, after all coodinates and statistics have been updated for the whole population """

        total_violations, total_restraints = self._summary_data[
            'n_violations'], self._summary_data['n_imposed']

        if total_restraints == 0:
            violation_score = 0
        else:
            # percentage of restraint violations
            violation_score = total_violations / total_restraints

        # open HSS population file
        self._hss = HssFile(self.hssfilename, 'r+')  # LB

        # store violation score, all the coordinates and the restraint violation summary into the HSS file
        self._hss.set_violation(violation_score)
        self._hss.set_coordinates(self._hss_crd)  # LB
        h5_create_or_replace_dataset(self._hss,
                                     'summary',
                                     data=json.dumps(
                                         self._summary_data,
                                         default=lambda a: a.tolist()))

        # close HSS file
        self._hss.close()
예제 #2
0
    def task(batch_id, cfg, tmp_dir):
        """ Compute activation distances for batch identified by parameter batch_id """

        dictHiC = cfg['restraints']['Hi-C']
        hss = HssFile(cfg.get("optimization/structure_output"), 'r')

        # read params
        fname = os.path.join(tmp_dir, '%d.in.npy' % batch_id)
        params = np.load(fname)

        # initialize result list
        results = []

        # compute activation distances for all pairs of locus indexes, append to "results" list
        for i, j, pwish, plast in params:
            res = get_actdist(int(i),
                              int(j),
                              pwish,
                              plast,
                              hss,
                              contactRange=dictHiC.get('contact_range', 2.0))

            for r in res:
                results.append(r)  # (i, j, actdist, p)
            # -

        hss.close()

        # save activation distances from current batch to a batch-unique output file, using format specifier 'actdist_fmt_str'
        fname = os.path.join(tmp_dir, '%d.out.tmp' % batch_id)
        with open(fname, 'w') as f:
            f.write('\n'.join([actdist_fmt_str % x for x in results]))
예제 #3
0
def modeling_step(model, cfg):

    with HssFile(cfg['mstep']['input_hss'], 'r') as f:
        radii = f.radii
        index = f.index
        genome = f.genome

    n_struct = cfg['mstep']['n_struct']
    n_beads = cfg['mstep']['n_beads']

    basepath = os.path.join(cfg['mstep']['workdir'], cfg['mstep']['run_name'])
    cfg['mstep']['templates'] = {
        'crd_out': basepath + '.outcrd.{}.npy',
        'info_out': basepath + '.info.{}.txt'
    }
    cfg.save(basepath + '.config')

    serial_function = partial(modeling_task, cfg_file=basepath + '.config')
    pctype = cfg['parallel_controller']
    pcopts = cfg['parallel_controller_options']
    controller = controller_class[pctype](**pcopts)
    argument_list = list(range(n_struct))

    controller.map(serial_function, argument_list)

    # write coordinates
    crd_shape = (n_beads, n_struct, 3)
    with HssFile(cfg['mstep']['output_hss'], 'w') as hss:
        hss.index = index
        hss.genome = genome
        hss.radii = radii
        all_crd = hss.create_dataset('coordinates',
                                     shape=crd_shape,
                                     dtype=COORD_DTYPE)
        for i in range(n_struct):
            local_vars = resolve_templates(cfg['mstep']['templates'], [i])
            crd = np.load(local_vars['crd_out'])
            all_crd[:,
                    i, :] = crd  # note that we discard all the positions of added atoms

        # write info
        kernel = kernel_class[cfg['mstep']['kernel']]
        with open(cfg['mstep']['info_out'], 'w') as outf:
            outf.write('#')
            for k in kernel.INFO_KEYS:
                outf.write(k + '\t')
            outf.write('\n')
            for i in range(n_struct):
                local_vars = resolve_templates(cfg['mstep']['templates'], [i])
                with open(local_vars['info_out']) as inf:
                    outf.write(inf.read() + '\n')

    # cleanup
    for i in range(n_struct):
        local_vars = resolve_templates(cfg['mstep']['templates'], [i])
        os.remove(local_vars['crd_out'])
        os.remove(local_vars['info_out'])
예제 #4
0
    def setup(self):

        opt = self.cfg['restraints']['sprite']

        self.tmp_extensions = [".npy", ".npz"]

        self.tmp_dir = make_absolute_path(
            self.cfg.get('restraints/sprite/tmp_dir', 'sprite'),
            self.cfg.get('parameters/tmp_dir'))

        self.keep_temporary_files = self.cfg.get(
            'restraints/sprite/keep_temporary_files', False)

        if not os.path.isdir(self.tmp_dir):
            os.makedirs(self.tmp_dir)
        #---

        clusters_file = self.cfg.get('restraints/sprite/clusters')
        with h5py.File(clusters_file, 'r') as h5:
            n_clusters = len(h5['indptr']) - 1

        with HssFile(self.cfg.get("optimization/structure_output"),
                     'r') as hss:
            self.n_struct = hss.nstruct

        batch_size = self.cfg.get('restraints/sprite/batch_size', 10)
        n_batches = n_clusters // batch_size
        if n_clusters % batch_size != 0:
            n_batches += 1

        self.n_batches = n_batches
        self.n_clusters = n_clusters
        self.argument_list = range(n_batches)
예제 #5
0
def prepareHss(fname,
               nbead,
               nstruct,
               genome,
               index,
               radii,
               nucleus_shape='sphere',
               nucleus_parameters=5000.0,
               nucleus_volume=0,
               coord_chunks=None):
    with HssFile(fname, 'w') as hss:
        #put everything into hssFile
        hss.set_nbead(nbead)
        hss.set_nstruct(nstruct)
        hss.set_genome(genome)
        hss.set_index(index)
        hss.set_radii(radii)
        if coord_chunks is None:
            hss.set_coordinates(np.zeros((nbead, nstruct, 3)))
        else:
            hss.create_dataset('coordinates',
                               shape=(nbead, nstruct, 3),
                               dtype=COORD_DTYPE,
                               chunks=coord_chunks)
        env = hss.create_group('envelope')
        env.create_dataset('shape', data=nucleus_shape)
        env.create_dataset('volume', data=nucleus_volume)
        env.create_dataset('params', data=nucleus_parameters)
예제 #6
0
def checkViolations(cfg):
    hss = HssFile(cfg["structure_output"])
    vio = hss.get_violation()
    if "sigma" in cfg["restraints"]["Hi-C"]:
        print(cfg["restraints"]["Hi-C"]["sigma"], vio)
        if vio < 0.01:
            if len(cfg["restraints"]["Hi-C"]["sigma_list"]) > 0:
                cfg["restraints"]["Hi-C"]["sigma"] = cfg["restraints"]["Hi-C"][
                    "sigma_list"].pop(0)
            else:
                return False
    else:
        print("Start", vio)
        cfg["restraints"]["Hi-C"]["sigma"] = cfg["restraints"]["Hi-C"][
            "sigma_list"].pop(0)
    #-
    return True
예제 #7
0
파일: step.py 프로젝트: davidzhan1995/igm
    def reduce(self):
        """
        Collect all structure coordinates together to assemble a hssFile
        """

        hssfilename = self.cfg["optimization"]["structure_output"] + '.T'

        # bonimba: using changes as Nan
        with HssFile(hssfilename, 'r+') as hss:
            n_struct = hss.nstruct
            n_beads = hss.nbead
            #iterate all structure files and
            total_restraints = 0.0
            total_violations = 0.0

            # extract coordinates and put them in matrix
            master = hss.coordinates
            print('Collecting all the coordinates from all configurations....')

            for i in tqdm(range(hss.nstruct), desc='(REDUCE)'):
                fname = "{}_{}.hms".format(self.tmp_file_prefix, i)
                hms = HmsFile(os.path.join(self.tmp_dir, fname))
                crd = hms.get_coordinates()
                total_restraints += hms.get_total_restraints()
                total_violations += hms.get_total_violations()

                # edit master numpy matrix...
                master[:, i, :] = crd

            # in un colpo solo, chiudi il fil
            hss.set_coordinates(master)
            #-
            if (total_violations == 0) and (total_restraints == 0):
                hss.set_violation(np.nan)
            else:
                hss.set_violation(total_violations / total_restraints)

        hss.close()

        # repack
        PACK_SIZE = 1e6
        pack_beads = max(1, int(PACK_SIZE / n_struct / 3))
        pack_beads = min(pack_beads, n_beads)

        logger.info('repacking...')
        cmd = 'h5repack -l coordinates:CHUNK={:d}x{:d}x3 {:s} {:s}'.format(
            pack_beads, n_struct, hssfilename, hssfilename + '.swap')
        os.system(cmd)
        logger.info('done.')
        os.rename(hssfilename + '.swap',
                  self.cfg.get("optimization/structure_output"))

        if self.keep_intermediate_structures:
            copyfile(self.cfg["optimization"]["structure_output"],
                     self.intermediate_name())
예제 #8
0
    def setup_poller(self):
        """ Set up polling function: define coordinate master matrix and a dictionary summarizing statistics from run"""

        _hss = HssFile(self.hssfilename, 'r')
        #self._hss = HssFile(self.hssfilename, 'r+')

        self._hss_crd = _hss.coordinates
        _hss.close()

        self._summary_data = {
            'n_imposed': 0.0,
            'n_violations': 0.0,
            'histogram': {
                'counts':
                np.zeros(DEFAULT_HIST_BINS + 1),
                'edges':
                np.arange(0, DEFAULT_HIST_MAX,
                          DEFAULT_HIST_MAX / DEFAULT_HIST_BINS).tolist() +
                [DEFAULT_HIST_MAX, np.inf]
            },
            'bystructure': {
                'n_imposed':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'n_violations':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'total_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'pair_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'bond_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'thermo': {}
            },
            'byrestraint': {}
        }
예제 #9
0
    def reduce(self):
        """
        Collect all structure coordinates together to assemble a hssFile
        """

        # wait for poller to finish (see also RelaxInit.py script)
        for _ in tqdm(self.file_poller.enumerate(), desc='(REDUCE)'):
            pass

        # read and log details, and save the runtime variables
        with HssFile(self.hssfilename, 'r+') as hss:
            n_struct = hss.nstruct
            n_beads = hss.nbead
            violation_score = log_stats(hss, self.cfg)
            self.cfg['runtime']['violation_score'] = violation_score
            h5_create_or_replace_dataset(hss,
                                         'config_data',
                                         data=json.dumps(
                                             self.cfg,
                                             default=lambda a: a.tolist()))

        # repack hss file
        PACK_SIZE = 1e6
        pack_beads = max(1, int(PACK_SIZE / n_struct / 3))
        pack_beads = min(pack_beads, n_beads)
        cmd = [
            'h5repack', '-i', self.hssfilename, '-o',
            self.hssfilename + '.swap', '-l',
            'coordinates:CHUNK={:d}x{:d}x3'.format(pack_beads, n_struct), '-v'
        ]

        sp = Popen(cmd, stderr=PIPE, stdout=PIPE)
        logger.info('repacking...')
        stdout, stderr = sp.communicate()
        if sp.returncode != 0:
            print(' '.join(cmd))
            print('O:', stdout.decode('utf-8'))
            print('E:', stderr.decode('utf-8'))
            raise RuntimeError('repacking failed. error code: %d' %
                               sp.returncode)
        logger.info('done.')

        # save the output file with a unique file name if requested (see 'intermediate_name' function below)
        if self.keep_intermediate_structures:
            copyfile(self.hssfilename + '.swap',
                     self.intermediate_name() + '.hss')

        # finally replace output file
        shutil.move(self.hssfilename + '.swap',
                    self.cfg.get("optimization/structure_output"))
예제 #10
0
    def reduce(self):
        """
        Collect all structure coordinates together to assemble a hssFile, using the polling function, and repack
        """

        # update structure coordinates
        for i in tqdm(self.file_poller.enumerate(), desc='(REDUCE)'):
            pass

        with HssFile(self.hssfilename, 'r+') as hss:
            n_struct = hss.nstruct
            n_beads = hss.nbead

        logger.info(
            'Coordinates in master file updated for ALL structures; repacking starts...'
        )

        # repack hss file (this is a syntax proper to h5df files)
        PACK_SIZE = 1e6
        pack_beads = max(1, int(PACK_SIZE / n_struct / 3))
        pack_beads = min(pack_beads, n_beads)
        cmd = [
            'h5repack', '-i', self.hssfilename, '-o',
            self.hssfilename + '.swap', '-l',
            'coordinates:CHUNK={:d}x{:d}x3'.format(pack_beads, n_struct), '-v'
        ]

        sp = Popen(cmd, stderr=PIPE, stdout=PIPE)
        logger.info('repacking...')
        stdout, stderr = sp.communicate()
        if sp.returncode != 0:
            print(' '.join(cmd))
            print('O:', stdout.decode('utf-8'))
            print('E:', stderr.decode('utf-8'))
            raise RuntimeError('repacking failed. error code: %d' %
                               sp.returncode)
        logger.info('repacking done.')

        # save the output file with a unique file name if requested
        if self.keep_intermediate_structures:
            copyfile(self.hssfilename + '.swap',
                     self.intermediate_name() + '.hss')

        # get rid of temporary .swap file
        os.rename(self.hssfilename + '.swap',
                  self.cfg.get("optimization/structure_output"))
예제 #11
0
    def task(struct_id, cfg, tmp_dir):
        """
        generate one random structure with territories
        """
        k = np.random.randint(0, 2**32)
        np.random.seed((k * struct_id) % (2**32))
        hssfilename = cfg["optimization"]["structure_output"]
        nucleus_radius = cfg.get("model/init_radius")

        with HssFile(hssfilename, 'r') as hss:
            index = hss.index

        crd = generate_territories(index, nucleus_radius)

        ofname = os.path.join(tmp_dir, 'random_%d.hms' % struct_id)
        with HmsFile(ofname, 'w') as hms:
            hms.saveCoordinates(struct_id, crd)
예제 #12
0
    def task(batch_id, cfg, tmp_dir):
        """ Read in temporary in.tmp files, generated list of Damid activation distances, produce out.tmp files """

        nucleus_parameters = None
        shape = cfg.get('model/restraints/envelope/nucleus_shape')
        if shape == 'sphere':
            nucleus_parameters = cfg.get(
                'model/restraints/envelope/nucleus_radius')
        elif shape == 'ellipsoid':
            nucleus_parameters = cfg.get(
                'model/restraints/envelope/nucleus_semiaxes')
        else:
            raise NotImplementedError(
                'shape %s has not been implemented yet.' % shape)

        with HssFile(cfg.get("optimization/structure_output"), 'r') as hss:

            # read params from temporary damid.in.npy files
            fname = os.path.join(tmp_dir, '%d.damid.in.npy' % batch_id)
            params = np.load(fname)

            # compute the corresponding output to save to out.tmp file
            results = []
            for i, pwish, plast in params:
                res = get_damid_actdist(int(i),
                                        pwish,
                                        plast,
                                        hss,
                                        contact_range=cfg.get(
                                            'restraints/DamID/contact_range',
                                            0.05),
                                        shape=shape,
                                        nucleus_param=nucleus_parameters)
                results += res  #(i, damid_actdist, p)
            #-

        # save output for this chunk to file, using the format specified by string 'damid_actdist_fmt_str'
        fname = os.path.join(tmp_dir, '%d.out.tmp' % batch_id)
        with open(fname, 'w') as f:
            f.write('\n'.join([damid_actdist_fmt_str % x for x in results]))
예제 #13
0
def get_structure(path, n, folder='.'):
    path = os.path.join(folder, path)

    with HssFile(path, 'r') as f:
        crd = f.get_struct_crd(n).tolist()
        chrom = f.genome.chroms[f.index.chrom].tolist()
        radius = f.radii.tolist()
        nstruct = f.nstruct
        cstarts = f.index.offset.tolist()

    return {
        'crd':
        [crd[cstarts[i]:cstarts[i + 1]] for i in range(len(cstarts) - 1)],
        'idx':
        chrom,
        'rad':
        [radius[cstarts[i]:cstarts[i + 1]] for i in range(len(cstarts) - 1)],
        'n':
        int(nstruct),
        'cstarts':
        cstarts,
        'chroms':
        [str(v) for i, v in enumerate(chrom) if i == 0 or v != chrom[i - 1]],
    }
예제 #14
0
    def task(batch_id, cfg, tmp_dir):

        clusters_file = cfg.get('restraints/sprite/clusters')
        batch_size = cfg.get('restraints/sprite/batch_size', 10)
        keep_best = cfg.get('restraints/sprite/keep_best', 50)

        # read the clusters
        with h5py.File(clusters_file, 'r') as h5:
            start = batch_id * batch_size
            stop = (batch_id + 1) * batch_size + 1
            ii = h5['indptr'][start:stop][()]
            data = h5['data'][ii[0]:ii[-1]]  #load everything for performance
            ii -= ii[0]  # subtract offset (ii[0]) from the full ii array

            # generate a list of arrays of 'stop-start' length
            clusters = [data[ii[i - 1]:ii[i]] for i in range(1, len(ii))]
            del data

        # open the structure file and read index
        hss = HssFile(cfg.get("optimization/structure_output"), 'r')
        index = hss.index

        indexes, values, selected_beads = [], [], []

        for cluster in clusters:

            # effective number of different chromosomes involved in current cluster (no repetition)
            n_chrom = len(np.unique(index.chrom[cluster]))

            # if max_chrom_in_cluster exceeeded, then append arrays full of -1 entries
            if n_chrom > cfg.get('restraints/sprite/max_chrom_in_cluster', 6):
                selected_beads.append(
                    np.zeros((keep_best, len(cluster)), dtype='i4') -
                    1  # matrix (keep_best, len(cluster)) of -1s
                )
                indexes.append(np.array(
                    [-1] * keep_best))  # array with "keep_best" -1 entries
                values.append(np.array(
                    [-1] * keep_best))  # array with "keep_best" -1 entries
                continue

            # compute radius**2 of giration for a set of genomic segments, across population (three outputs)
            # NB: current_selected_beads = the bead indexes making up the cluster after considering the possible combinations of chromosome copies.
            rg2s, _, current_selected_beads = compute_gyration_radius(
                hss['coordinates'], cluster, index, index.copy_index)

            # rg2s = array of size (n_struct), one radius of gyration for each structure, for a given cluster
            # current_selected_beads = array/list of (n_struct, len(cluster)), each row pertains to a different configuration

            # ind is the array of indexes which would sort the array, aka rg2s[ind[0]] <= rgs2[ind[1]] <= ...
            # sorting out radii of gyration array, from smallest to largest
            # (using argpartition & argsort is suggested on stackoverflow, in order to maximize efficiency)

            ind = np.argpartition(rg2s,
                                  keep_best)[:keep_best]  # this is O(n_struct)
            ind = ind[np.argsort(
                rg2s[ind])]  # sorting is O(keep_best ln(keep_best))

            # extract sorted arrays (smallest keep_best entries)
            best_rg2s = rg2s[ind]
            current_selected_beads = current_selected_beads[
                ind]  # select the indices of "keep_best" configurations, and correpsonding beads
            # for each configuration, we have the bead indexes associated to rg.. the numnber
            # of beads is determined by the size of the cluster
            # append quantities to master lists, which sweeps over the different clusters in batch
            selected_beads.append(current_selected_beads)
            indexes.append(ind)
            values.append(
                best_rg2s)  # append keep_best best values of radii of gyration

        #------- saving step into a batch-dependent set of files
        sel_file = os.path.join(tmp_dir, 'tmp.%d.selected.npz' % batch_id)
        idx_file = os.path.join(tmp_dir, 'tmp.%d.idx.npy' % batch_id)
        val_file = os.path.join(tmp_dir, 'tmp.%d.values.npy' % batch_id)

        np.savez(
            sel_file, *selected_beads
        )  # for each configuration in batch, save list of beads making up the cluster (batch_size, keep_best, n_beads)
        # n_beads is different from cluster to cluster (different cluster sizes)
        np.save(
            idx_file, np.array(indexes, dtype=np.int32)
        )  # save indices of configurations explored in current batch (batch_size, keep_best)
        np.save(
            val_file, values
        )  # save radius of gyration values for each of the configutations explored in current batch (batch_size, rg)

        # verify pickle integrity, sometimes weird io problems happen
        selected_beads = np.load(sel_file)
        indexes = np.load(idx_file)
        values = np.load(val_file)
예제 #15
0
    def reduce(self):

        # reconstruct contacts full map
        # ii = []
        # jj = []
        # data = []
        # with HssFile(self.cfg.get('optimization/structure_output')) as structure_output:

        out_dir = self.out_dir
        sigma = self.cfg.get('runtime/Hi-C/sigma')

        input_matrix = Contactmatrix(
            self.cfg.get('restraints/Hi-C/input_matrix'))
        with HssFile(self.cfg.get(
                'optimization/structure_output')) as structure_output:
            output_matrix = structure_output.buildContactMap(
                contactRange=self.cfg.get('restraints/Hi-C/contact_range') *
                (1 +
                 eps))  # give some tolerance. only in one direction though.
        output_matrix.save(os.path.join(out_dir, 'full_matrix.hcs'))
        output_matrix = output_matrix.sumCopies()
        output_matrix.matrix.data[:] = output_matrix.matrix.data.clip(0, 1)
        output_matrix.save(os.path.join(out_dir, 'out_matrix.hcs'))
        plot_comparison(input_matrix,
                        output_matrix,
                        labels=['input', 'output'],
                        file=os.path.join(out_dir, 'matrix_comparison.pdf'),
                        vmax=0.2)
        for c in input_matrix.index.get_chrom_names():
            plot_comparison(input_matrix[c],
                            output_matrix[c],
                            labels=['input', 'output'],
                            file=os.path.join(out_dir,
                                              'matrix_comparison_%s.pdf' % c),
                            title=c,
                            vmax=0.2)
        with np.errstate(divide='ignore', invalid='ignore'):
            diffmat = np.log2(output_matrix.matrix.toarray() /
                              input_matrix.matrix.toarray())
        maxv = np.percentile(np.abs(diffmat[np.isfinite(diffmat)]), 99)
        plt.figure()
        plt.imshow(diffmat, vmax=maxv, vmin=-maxv, cmap='RdBu_r')
        plt.title('difference_matrix')
        plt.colorbar()
        plt.savefig(os.path.join(out_dir, 'diffmat.pdf'))
        plt.close()
        for c in input_matrix.genome.chroms:
            ii = input_matrix.index.chrom == input_matrix.genome.getchrnum(c)
            plt.figure()
            xmat = diffmat[ii][:, ii]
            plt.imshow(xmat, vmax=maxv, vmin=-maxv, cmap='RdBu_r')
            plt.colorbar()
            plt.savefig(os.path.join(out_dir, 'diffmap_' + c + '.pdf'))
            plt.close()

        input_matrix = {(i, j): pwish
                        for i, j, pwish in input_matrix.matrix.coo_generator()
                        if pwish >= sigma and i != j}
        diffs = []
        reldiffs = []
        totp = 0
        for i, j, pout in output_matrix.matrix.coo_generator():
            p = input_matrix.get((i, j))
            if p is not None:
                diffs.append(pout - p)
                reldiffs.append((pout - p) / p)
                totp += p
        del output_matrix
        del input_matrix
        diffs = np.array(diffs)
        reldiffs = np.array(reldiffs)

        f, ax = plt.subplots(2, 2)
        ax[0, 0].set_title('Absolute matrix differences')
        ax[0, 0].hist(diffs, bins=100, range=(-1, 1))
        ax[0, 1].set_title('Relative matrix differences')
        ax[0, 1].hist(reldiffs, bins=100, range=(-1, 1))
        ax[1, 0].set_title('Absolute matrix differences (log)')
        ax[1, 0].hist(diffs, bins=100, log=True, range=(-1, 1))
        ax[1, 1].set_title('Relative matrix differences (log)')
        ax[1, 1].hist(reldiffs, bins=100, log=True, range=(-1, 1))

        plt.tight_layout()
        plt.savefig(os.path.join(out_dir, 'difference_histograms.pdf'))

        tol = self.cfg.get('restraints/Hi-C/evaluation_tolerance', 0.01)
        n = np.count_nonzero(np.abs(diffs) > tol)
        self.score = np.abs(reldiffs).mean()
        #self.ok = n < 0.01 * len(diffs)
        #self.score = float(n)/len(diffs)
        #self.cfg['runtime']['violation_score'] = self.score
        with open(os.path.join(out_dir, 'stats.txt'), 'w') as f:
            print("#score ave_differences ave_relative_differences", file=f)
            print(self.score, np.average(diffs), np.average(reldiffs), file=f)
        logger.info('>>>  Average relative difference: {:6.3f}%  <<<'.format(
            self.score * 100))
예제 #16
0
class ModelingStep(StructGenStep):
    def name(self):
        """ This explains verbatim which optimization is performed, is printed to the logger """

        s = 'ModelingStep'
        additional_data = []
        if "Hi-C" in self.cfg['restraints']:
            additional_data.append('sigma={:.2f}%'.format(
                self.cfg['runtime']['Hi-C']['sigma'] * 100.0))

        if "DamID" in self.cfg['restraints']:
            additional_data.append('damid={:.2f}'.format(
                self.cfg.get('runtime/DamID/sigma', -1.0)))

        if "sprite" in self.cfg['restraints']:
            additional_data.append('sprite={:.1f}%'.format(
                self.cfg['restraints']['sprite']['volume_fraction'] * 100.0))

        if 'opt_iter' in self.cfg['runtime']:
            additional_data.append('iter={}'.format(self.cfg['runtime'].get(
                'opt_iter', 'N/A')))

        if len(additional_data):
            s += ' (' + ', '.join(additional_data) + ')'
        return s

    #-

    def setup(self):
        """ Read in parameters from cfg file """

        self.tmp_extensions = [".hms", ".data", ".lam", ".lammpstrj", ".ready"]
        self.tmp_file_prefix = "mstep"
        self.argument_list = range(self.cfg["model"]["population_size"])
        self.hssfilename = self.cfg["optimization"]["structure_output"] + '.T'
        self.file_poller = None

    #-

    def _run_poller(self):
        """ Setup polling function (See also RelaxInit.py script) """

        readyfiles = [
            os.path.join(self.tmp_dir, '%s.%d.ready' % (self.uid, struct_id))
            for struct_id in self.argument_list
        ]

        self.file_poller = FilePoller(readyfiles,
                                      callback=self.set_structure,
                                      args=[[i] for i in self.argument_list],
                                      setup=self.setup_poller,
                                      teardown=self.teardown_poller)
        self.file_poller.watch_async()

    #-

    def before_map(self):
        """
        This runs only if map step is not skipped
        """
        # clean up ready files (those that arae there and spotter by the poller) if we want a clean restart of the modeling step
        readyfiles = [
            os.path.join(self.tmp_dir, '%s.%d.ready' % (self.uid, struct_id))
            for struct_id in self.argument_list
        ]
        if self.cfg.get('optimization/clean_restart', False):
            for f in readyfiles:
                if os.path.isfile(f):
                    os.remove(f)

        self._run_poller()

    #-

    def before_reduce(self):
        """
        This runs only if reduce step is not skipped
        """
        # if we don't have a poller, set it up
        if self.file_poller is None:
            self._run_poller()

    #-

    @staticmethod
    def task(struct_id, cfg, tmp_dir):
        """
        Do single structure modeling with restraint assignment from A-step
        """

        # the static method modifications to the cfg should only be local,
        # use a copy of the config file
        cfg = deepcopy(cfg)

        # extract structure information
        step_id = cfg.get('runtime/step_hash', 'xxxx')

        readyfile = os.path.join(tmp_dir, '%s.%d.ready' % (step_id, struct_id))

        # if the ready file exists it does nothing, unless it is a clear run
        if not cfg.get('optimization/clean_restart', False):
            if os.path.isfile(readyfile):
                return

        hssfilename = cfg["optimization"]["structure_output"]

        # read index, radii, coordinates
        with HssFile(hssfilename, 'r') as hss:
            index = hss.index
            radii = hss.radii
            if cfg.get('optimization/random_shuffling', False):
                crd = generate_random_in_sphere(
                    radii, cfg.get('model/restraints/envelope/nucleus_radius'))
            else:
                crd = hss.get_struct_crd(struct_id)

        # init Model class (igm.model)
        model = Model(uid=struct_id)

        # get the chain ids
        chain_ids = np.concatenate([[i] * s
                                    for i, s in enumerate(index.chrom_sizes)])

        # add particles into model
        n_particles = len(crd)
        for i in range(n_particles):
            model.addParticle(crd[i],
                              radii[i],
                              Particle.NORMAL,
                              chainID=chain_ids[i])

        # Add restraints
        monitored_restraints = []

        # ---- POLYMER STRUCTURAL INTEGRITY INTRINSIC restraints ----- #

        # add excluded volume restraint
        ex = Steric(cfg.get("model/restraints/excluded/evfactor"))
        model.addRestraint(ex)

        # add nucleus envelope restraint
        shape = cfg.get('model/restraints/envelope/nucleus_shape')
        envelope_k = cfg.get('model/restraints/envelope/nucleus_kspring')
        radius = 0
        semiaxes = (0, 0, 0)

        if shape == 'sphere':
            radius = cfg.get('model/restraints/envelope/nucleus_radius')
            ev = Envelope(shape, radius, envelope_k)
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'ellipsoid':
            semiaxes = cfg.get('model/restraints/envelope/nucleus_semiaxes')
            ev = Envelope(shape, semiaxes, envelope_k)
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'exp_map':
            volume_file = cfg.get('model/restraints/envelope/input_map')
            ev = GenEnvelope(shape, volume_file, envelope_k)

        model.addRestraint(ev)
        monitored_restraints.append(ev)

        # add consecutive bead polymer restraint to ensure chain connectivity
        if cfg.get('model/restraints/polymer/polymer_bonds_style') != 'none':
            contact_probabilities = cfg['runtime'].get(
                'consecutive_contact_probabilities', None)
            pp = Polymer(
                index,
                cfg['model']['restraints']['polymer']['contact_range'],
                cfg['model']['restraints']['polymer']['polymer_kspring'],
                contact_probabilities=contact_probabilities)
            model.addRestraint(pp)
            monitored_restraints.append(pp)

        # LB: add nuclear body excluded volume restraints
        if "nucleolus" in cfg['restraints']:

            # read in nucle lus coordinates and radius from cfg file

            for mappa in cfg['restraints']['nucleolus']['input_map']:

                nucl = GenEnvelope(cfg['restraints']['nucleolus']['shape'],
                                   mappa,
                                   cfg['restraints']['nucleolus']['k_spring'])
                model.addRestraint(nucl)
                monitored_restraints.append(nucl)

        # ---- IGM MODELING RESTRAINTS FROM EXPERIMENTAL DATA (FISH MISSING) ---- #

        # add Hi-C restraint
        if "Hi-C" in cfg['restraints']:

            # read parameters from cfg file
            actdist_file = cfg.get('runtime/Hi-C/actdist_file')
            contact_range = cfg.get('restraints/Hi-C/contact_range', 2.0)
            k = cfg.get('restraints/Hi-C/contact_kspring', 0.05)

            # effectively add HiC restraints (bonds)
            hic = HiC(actdist_file, contact_range, k)
            model.addRestraint(hic)
            monitored_restraints.append(hic)

        # add DAMID restraint
        if "DamID" in cfg['restraints']:

            # read parameters from cfg file
            actdist_file = cfg.get('runtime/DamID/damid_actdist_file')
            contact_range = cfg.get('restraints/DamID/contact_range', 2.0)
            k = cfg.get('restraints/DamID/contact_kspring', 0.05)

            # effectively add DAMID restraints
            damid = Damid(damid_file=actdist_file,
                          contact_range=contact_range,
                          nuclear_radius=radius,
                          k=k,
                          shape=shape,
                          semiaxes=semiaxes)
            model.addRestraint(damid)
            monitored_restraints.append(damid)

        # add SPRITE restraint
        if "sprite" in cfg['restraints']:

            # read parameters from cfg file
            sprite_tmp = make_absolute_path(
                cfg.get('restraints/sprite/tmp_dir', 'sprite'),
                cfg.get('parameters/tmp_dir'))
            assignment_filename = make_absolute_path(
                cfg.get('restraints/sprite/assignment_file', 'assignment.h5'),
                sprite_tmp)

            # effectively add SPRITE retraints
            sprite = Sprite(assignment_filename,
                            cfg.get('restraints/sprite/volume_fraction',
                                    0.05), struct_id,
                            cfg.get('restraints/sprite/kspring', 1.0))
            model.addRestraint(sprite)
            monitored_restraints.append(sprite)

        # ========Optimization
        cfg['runtime']['run_name'] = cfg.get('runtime/step_hash') + '_' + str(
            struct_id)
        optinfo = model.optimize(cfg)

        # tolerance parameter: if violation score is smaller than tolerance, then restraint is satisfied
        tol = cfg.get('optimization/violation_tolerance', 0.01)

        # save optimization results to .hms file
        ofname = os.path.join(tmp_dir, 'mstep_%d.hms' % struct_id)
        with HmsFile(ofname, 'w') as hms:
            hms.saveModel(struct_id, model)

            # create violations statistics and save all of that into the "vstat" dictionary
            vstat = {}
            for r in monitored_restraints:
                vs = []
                n_imposed = 0
                for fid in r.forceID:
                    f = model.forces[fid]
                    n_imposed += f.rnum
                    if f.rnum > 1:
                        # a list of values is appended to vs = [] at once
                        vs += f.getViolationRatios(model.particles).tolist()
                    else:
                        # one value is appended at the time to vs = []
                        vs.append(f.getViolationRatio(model.particles))
                vs = np.array(vs)
                H, edges = get_violation_histogram(vs)
                num_violations = np.count_nonzero(
                    vs > tol
                )  # the same 'tol' value is used to compute the number of violations across different restraint kinds...is that too easy?
                vstat[repr(r)] = {
                    'histogram': {
                        'edges': edges.tolist(),
                        'counts': H.tolist()
                    },
                    'n_violations': num_violations,
                    'n_imposed': n_imposed
                }

            # add violation dictionary to hms file
            h5_create_or_replace_dataset(hms, 'violation_stats',
                                         json.dumps(vstat))

            if isinstance(optinfo, dict):
                grp = h5_create_group_if_not_exist(hms, 'opt_info')
                for k, v in optinfo.items():
                    if not isinstance(v, dict):
                        h5_create_or_replace_dataset(grp, k, data=v)
                h5_create_or_replace_dataset(hms,
                                             'opt_info_dict',
                                             data=json.dumps(optinfo))

        # double check it has been written correctly
        with HmsFile(ofname, 'r') as hms:
            if not np.all(hms.get_coordinates() == model.getCoordinates()):
                raise RuntimeError('error writing the file %s' % ofname)

        # generat;e the .ready file, which signals to the poller that optimization for that structure has been completed
        readyfile = os.path.join(tmp_dir, '%s.%d.ready' % (step_id, struct_id))
        open(readyfile, 'w').close()  # touch the ready-file

    #-

    def setup_poller(self):
        """ Set up polling function: define coordinate master matrix and a dictionary summarizing statistics from run"""

        _hss = HssFile(self.hssfilename, 'r')
        #self._hss = HssFile(self.hssfilename, 'r+')

        self._hss_crd = _hss.coordinates
        _hss.close()

        self._summary_data = {
            'n_imposed': 0.0,
            'n_violations': 0.0,
            'histogram': {
                'counts':
                np.zeros(DEFAULT_HIST_BINS + 1),
                'edges':
                np.arange(0, DEFAULT_HIST_MAX,
                          DEFAULT_HIST_MAX / DEFAULT_HIST_BINS).tolist() +
                [DEFAULT_HIST_MAX, np.inf]
            },
            'bystructure': {
                'n_imposed':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'n_violations':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'total_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'pair_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'bond_energies':
                np.zeros(self.cfg["model"]["population_size"],
                         dtype=np.float32),
                'thermo': {}
            },
            'byrestraint': {}
        }

    #-

    def set_structure(self, i):
        """ This is the function run the in the poller, coordinates and statistics are updated for i-th configuration """

        fname = "{}_{}.hms".format(self.tmp_file_prefix, i)
        with HmsFile(os.path.join(self.tmp_dir, fname), 'r') as hms:

            # extract coordinates from .hms file and update master matrix
            crd = hms.get_coordinates()

            # hss and summary_data are globals
            #self._hss.set_struct_crd(i, crd)
            self._hss_crd[:, i, :] = crd

            # collect violation statistics
            try:
                vstat = json.loads(hms['violation_stats'][()])
            except:
                vstat = {}

            n_tot = 0
            n_vio = 0
            hist_tot = np.zeros(DEFAULT_HIST_BINS + 1)
            for k, cstat in vstat.items():
                if k not in self._summary_data['byrestraint']:
                    self._summary_data['byrestraint'][k] = {
                        'histogram': {
                            'counts': np.zeros(DEFAULT_HIST_BINS + 1)
                        },
                        'n_violations': 0,
                        'n_imposed': 0
                    }

                n_tot += cstat.get('n_imposed', 0)
                n_vio += cstat.get('n_violations', 0)
                hist_tot += cstat['histogram']['counts']
                self._summary_data['byrestraint'][k][
                    'n_violations'] += cstat.get('n_violations', 0)
                self._summary_data['byrestraint'][k]['n_imposed'] += cstat.get(
                    'n_imposed', 0)
                self._summary_data['byrestraint'][k]['histogram'][
                    'counts'] += cstat['histogram']['counts']

            self._summary_data['n_imposed'] += n_tot
            self._summary_data['n_violations'] += n_vio
            self._summary_data['histogram']['counts'] += hist_tot
            self._summary_data['bystructure']['n_imposed'][i] = n_tot
            self._summary_data['bystructure']['n_violations'][i] = n_vio

            # collect optimization statistics
            try:
                self._summary_data['bystructure']['total_energies'][i] = hms[
                    'opt_info']['final-energy'][()]
                self._summary_data['bystructure']['pair_energies'][i] = hms[
                    'opt_info']['pair-energy'][()]
                self._summary_data['bystructure']['bond_energies'][i] = hms[
                    'opt_info']['bond-energy'][()]
            except KeyError:
                pass

            # detailed optimization stats
            if 'opt_info_dict' in hms:
                infodict = json.loads(hms['opt_info_dict'][()])
                for k, v in infodict['thermo'].items():
                    if k not in self._summary_data['bystructure']['thermo']:
                        self._summary_data['bystructure']['thermo'][
                            k] = np.zeros(self._hss_crd.shape[1]
                                          )  # LB replaced hss.n_struct
                    self._summary_data['bystructure']['thermo'][k][i] = v

    #-

    def teardown_poller(self):
        """ Teardown poller, after all coodinates and statistics have been updated for the whole population """

        total_violations, total_restraints = self._summary_data[
            'n_violations'], self._summary_data['n_imposed']

        if total_restraints == 0:
            violation_score = 0
        else:
            # percentage of restraint violations
            violation_score = total_violations / total_restraints

        # open HSS population file
        self._hss = HssFile(self.hssfilename, 'r+')  # LB

        # store violation score, all the coordinates and the restraint violation summary into the HSS file
        self._hss.set_violation(violation_score)
        self._hss.set_coordinates(self._hss_crd)  # LB
        h5_create_or_replace_dataset(self._hss,
                                     'summary',
                                     data=json.dumps(
                                         self._summary_data,
                                         default=lambda a: a.tolist()))

        # close HSS file
        self._hss.close()

    #-

    def reduce(self):
        """
        Collect all structure coordinates together to assemble a hssFile
        """

        # wait for poller to finish (see also RelaxInit.py script)
        for _ in tqdm(self.file_poller.enumerate(), desc='(REDUCE)'):
            pass

        # read and log details, and save the runtime variables
        with HssFile(self.hssfilename, 'r+') as hss:
            n_struct = hss.nstruct
            n_beads = hss.nbead
            violation_score = log_stats(hss, self.cfg)
            self.cfg['runtime']['violation_score'] = violation_score
            h5_create_or_replace_dataset(hss,
                                         'config_data',
                                         data=json.dumps(
                                             self.cfg,
                                             default=lambda a: a.tolist()))

        # repack hss file
        PACK_SIZE = 1e6
        pack_beads = max(1, int(PACK_SIZE / n_struct / 3))
        pack_beads = min(pack_beads, n_beads)
        cmd = [
            'h5repack', '-i', self.hssfilename, '-o',
            self.hssfilename + '.swap', '-l',
            'coordinates:CHUNK={:d}x{:d}x3'.format(pack_beads, n_struct), '-v'
        ]

        sp = Popen(cmd, stderr=PIPE, stdout=PIPE)
        logger.info('repacking...')
        stdout, stderr = sp.communicate()
        if sp.returncode != 0:
            print(' '.join(cmd))
            print('O:', stdout.decode('utf-8'))
            print('E:', stderr.decode('utf-8'))
            raise RuntimeError('repacking failed. error code: %d' %
                               sp.returncode)
        logger.info('done.')

        # save the output file with a unique file name if requested (see 'intermediate_name' function below)
        if self.keep_intermediate_structures:
            copyfile(self.hssfilename + '.swap',
                     self.intermediate_name() + '.hss')

        # finally replace output file
        shutil.move(self.hssfilename + '.swap',
                    self.cfg.get("optimization/structure_output"))

    #-

    def skip(self):
        fn = self.intermediate_name() + '.hss'
        if os.path.isfile(fn):
            with HssFile(fn, 'r') as hss:
                violation_score = log_stats(hss, self.cfg)
                self.cfg['runtime']['violation_score'] = violation_score

    #-

    def intermediate_name(self):
        """ Define unique intermediate name for HSS file associated with this modeling step (how about SPRITE?)"""

        additional_data = []

        if "DamID" in self.cfg['runtime']:
            additional_data.append('damid_{:.4f}'.format(
                self.cfg.get('runtime/DamID/sigma', -1.0)))
        if "Hi-C" in self.cfg['runtime']:
            additional_data.append('sigma_{:.4f}'.format(
                self.cfg['runtime']['Hi-C'].get('sigma', -1.0)))

        if "sprite" in self.cfg['restraints']:
            additional_data.append('sprite_{:.1f}'.format(
                self.cfg['restraints']['sprite']['volume_fraction'] * 100.0))

        if 'opt_iter' in self.cfg['runtime']:
            additional_data.append('iter_{}'.format(
                self.cfg['runtime']['opt_iter']))
        additional_data.append(str(self.uid))

        return '.'.join([
            self.cfg.get("optimization/structure_output"),
        ] + additional_data)
예제 #17
0
    def task(struct_id, cfg, tmp_dir):
        """
        Do single structure modeling with restraint assignment from A-step
        """

        # the static method modifications to the cfg should only be local,
        # use a copy of the config file
        cfg = deepcopy(cfg)

        # extract structure information
        step_id = cfg.get('runtime/step_hash', 'xxxx')

        readyfile = os.path.join(tmp_dir, '%s.%d.ready' % (step_id, struct_id))

        # if the ready file exists it does nothing, unless it is a clear run
        if not cfg.get('optimization/clean_restart', False):
            if os.path.isfile(readyfile):
                return

        hssfilename = cfg["optimization"]["structure_output"]

        # read index, radii, coordinates
        with HssFile(hssfilename, 'r') as hss:
            index = hss.index
            radii = hss.radii
            if cfg.get('optimization/random_shuffling', False):
                crd = generate_random_in_sphere(
                    radii, cfg.get('model/restraints/envelope/nucleus_radius'))
            else:
                crd = hss.get_struct_crd(struct_id)

        # init Model class (igm.model)
        model = Model(uid=struct_id)

        # get the chain ids
        chain_ids = np.concatenate([[i] * s
                                    for i, s in enumerate(index.chrom_sizes)])

        # add particles into model
        n_particles = len(crd)
        for i in range(n_particles):
            model.addParticle(crd[i],
                              radii[i],
                              Particle.NORMAL,
                              chainID=chain_ids[i])

        # Add restraints
        monitored_restraints = []

        # ---- POLYMER STRUCTURAL INTEGRITY INTRINSIC restraints ----- #

        # add excluded volume restraint
        ex = Steric(cfg.get("model/restraints/excluded/evfactor"))
        model.addRestraint(ex)

        # add nucleus envelope restraint
        shape = cfg.get('model/restraints/envelope/nucleus_shape')
        envelope_k = cfg.get('model/restraints/envelope/nucleus_kspring')
        radius = 0
        semiaxes = (0, 0, 0)

        if shape == 'sphere':
            radius = cfg.get('model/restraints/envelope/nucleus_radius')
            ev = Envelope(shape, radius, envelope_k)
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'ellipsoid':
            semiaxes = cfg.get('model/restraints/envelope/nucleus_semiaxes')
            ev = Envelope(shape, semiaxes, envelope_k)
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'exp_map':
            volume_file = cfg.get('model/restraints/envelope/input_map')
            ev = GenEnvelope(shape, volume_file, envelope_k)

        model.addRestraint(ev)
        monitored_restraints.append(ev)

        # add consecutive bead polymer restraint to ensure chain connectivity
        if cfg.get('model/restraints/polymer/polymer_bonds_style') != 'none':
            contact_probabilities = cfg['runtime'].get(
                'consecutive_contact_probabilities', None)
            pp = Polymer(
                index,
                cfg['model']['restraints']['polymer']['contact_range'],
                cfg['model']['restraints']['polymer']['polymer_kspring'],
                contact_probabilities=contact_probabilities)
            model.addRestraint(pp)
            monitored_restraints.append(pp)

        # LB: add nuclear body excluded volume restraints
        if "nucleolus" in cfg['restraints']:

            # read in nucle lus coordinates and radius from cfg file

            for mappa in cfg['restraints']['nucleolus']['input_map']:

                nucl = GenEnvelope(cfg['restraints']['nucleolus']['shape'],
                                   mappa,
                                   cfg['restraints']['nucleolus']['k_spring'])
                model.addRestraint(nucl)
                monitored_restraints.append(nucl)

        # ---- IGM MODELING RESTRAINTS FROM EXPERIMENTAL DATA (FISH MISSING) ---- #

        # add Hi-C restraint
        if "Hi-C" in cfg['restraints']:

            # read parameters from cfg file
            actdist_file = cfg.get('runtime/Hi-C/actdist_file')
            contact_range = cfg.get('restraints/Hi-C/contact_range', 2.0)
            k = cfg.get('restraints/Hi-C/contact_kspring', 0.05)

            # effectively add HiC restraints (bonds)
            hic = HiC(actdist_file, contact_range, k)
            model.addRestraint(hic)
            monitored_restraints.append(hic)

        # add DAMID restraint
        if "DamID" in cfg['restraints']:

            # read parameters from cfg file
            actdist_file = cfg.get('runtime/DamID/damid_actdist_file')
            contact_range = cfg.get('restraints/DamID/contact_range', 2.0)
            k = cfg.get('restraints/DamID/contact_kspring', 0.05)

            # effectively add DAMID restraints
            damid = Damid(damid_file=actdist_file,
                          contact_range=contact_range,
                          nuclear_radius=radius,
                          k=k,
                          shape=shape,
                          semiaxes=semiaxes)
            model.addRestraint(damid)
            monitored_restraints.append(damid)

        # add SPRITE restraint
        if "sprite" in cfg['restraints']:

            # read parameters from cfg file
            sprite_tmp = make_absolute_path(
                cfg.get('restraints/sprite/tmp_dir', 'sprite'),
                cfg.get('parameters/tmp_dir'))
            assignment_filename = make_absolute_path(
                cfg.get('restraints/sprite/assignment_file', 'assignment.h5'),
                sprite_tmp)

            # effectively add SPRITE retraints
            sprite = Sprite(assignment_filename,
                            cfg.get('restraints/sprite/volume_fraction',
                                    0.05), struct_id,
                            cfg.get('restraints/sprite/kspring', 1.0))
            model.addRestraint(sprite)
            monitored_restraints.append(sprite)

        # ========Optimization
        cfg['runtime']['run_name'] = cfg.get('runtime/step_hash') + '_' + str(
            struct_id)
        optinfo = model.optimize(cfg)

        # tolerance parameter: if violation score is smaller than tolerance, then restraint is satisfied
        tol = cfg.get('optimization/violation_tolerance', 0.01)

        # save optimization results to .hms file
        ofname = os.path.join(tmp_dir, 'mstep_%d.hms' % struct_id)
        with HmsFile(ofname, 'w') as hms:
            hms.saveModel(struct_id, model)

            # create violations statistics and save all of that into the "vstat" dictionary
            vstat = {}
            for r in monitored_restraints:
                vs = []
                n_imposed = 0
                for fid in r.forceID:
                    f = model.forces[fid]
                    n_imposed += f.rnum
                    if f.rnum > 1:
                        # a list of values is appended to vs = [] at once
                        vs += f.getViolationRatios(model.particles).tolist()
                    else:
                        # one value is appended at the time to vs = []
                        vs.append(f.getViolationRatio(model.particles))
                vs = np.array(vs)
                H, edges = get_violation_histogram(vs)
                num_violations = np.count_nonzero(
                    vs > tol
                )  # the same 'tol' value is used to compute the number of violations across different restraint kinds...is that too easy?
                vstat[repr(r)] = {
                    'histogram': {
                        'edges': edges.tolist(),
                        'counts': H.tolist()
                    },
                    'n_violations': num_violations,
                    'n_imposed': n_imposed
                }

            # add violation dictionary to hms file
            h5_create_or_replace_dataset(hms, 'violation_stats',
                                         json.dumps(vstat))

            if isinstance(optinfo, dict):
                grp = h5_create_group_if_not_exist(hms, 'opt_info')
                for k, v in optinfo.items():
                    if not isinstance(v, dict):
                        h5_create_or_replace_dataset(grp, k, data=v)
                h5_create_or_replace_dataset(hms,
                                             'opt_info_dict',
                                             data=json.dumps(optinfo))

        # double check it has been written correctly
        with HmsFile(ofname, 'r') as hms:
            if not np.all(hms.get_coordinates() == model.getCoordinates()):
                raise RuntimeError('error writing the file %s' % ofname)

        # generat;e the .ready file, which signals to the poller that optimization for that structure has been completed
        readyfile = os.path.join(tmp_dir, '%s.%d.ready' % (step_id, struct_id))
        open(readyfile, 'w').close()  # touch the ready-file
예제 #18
0
    def task(struct_id, cfg, tmp_dir):
        """
        relax one random structure chromosome structures, SERIAL
        """
        cfg = deepcopy(cfg)

        readyfile = os.path.join(tmp_dir, 'relax_%d.hms.ready' % struct_id)

        # if the ready file exists it does nothing, unless it is a clear run
        if not cfg.get('optimization/clean_restart', False):
            if os.path.isfile(readyfile):
                return

        # extract structure information
        hssfilename = cfg["optimization"]["structure_output"]

        # read index, radii, coordinates
        with HssFile(hssfilename, 'r') as hss:
            index = hss.index
            radii = hss.radii
            crd = hss.get_struct_crd(struct_id)

        # init Model
        model = Model(uid=struct_id)

        # add particles into model
        n_particles = len(crd)
        for i in range(n_particles):
            model.addParticle(crd[i], radii[i], Particle.NORMAL)

        # ========Add polymer/nucleoli restraints =========

        # add excluded volume restraint
        ex = Steric(cfg.get("model/restraints/excluded/evfactor"))
        model.addRestraint(ex)

        # add nucleus envelope restraint (spherical, ellipsoidal OR from data)
        if cfg['model']['restraints']['envelope']['nucleus_shape'] == 'sphere':
            ev = Envelope(
                cfg['model']['restraints']['envelope']['nucleus_shape'],
                cfg['model']['restraints']['envelope']['nucleus_radius'],
                cfg['model']['restraints']['envelope']['nucleus_kspring'])
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'ellipsoid':
            ev = Envelope(
                cfg['model']['restraints']['envelope']['nucleus_shape'],
                cfg['model']['restraints']['envelope']['nucleus_semiaxes'],
                cfg['model']['restraints']['envelope']['nucleus_kspring'])
        elif cfg['model']['restraints']['envelope'][
                'nucleus_shape'] == 'exp_map':
            ev = GenEnvelope(
                cfg['model']['restraints']['envelope']['nucleus_shape'],
                cfg['model']['restraints']['envelope']['input_map'],
                cfg['model']['restraints']['envelope']['nucleus_kspring'])
        model.addRestraint(ev)

        # add consecutive polymer restraint
        contact_probabilities = cfg['runtime'].get(
            'consecutive_contact_probabilities', None)
        pp = Polymer(index,
                     cfg['model']['restraints']['polymer']['contact_range'],
                     cfg['model']['restraints']['polymer']['polymer_kspring'],
                     contact_probabilities=contact_probabilities)
        model.addRestraint(pp)

        # LB: add nuclear body "excluded volume" restraints (keep chromosomes out of nucleolar region)
        if 'nucleolus' in cfg['restraints']:

            for mappa in cfg['restraints']['nucleolus']['input_map']:

                nucl = GenEnvelope(cfg['restraints']['nucleolus']['shape'],
                                   mappa,
                                   cfg['restraints']['nucleolus']['k_spring'])
                model.addRestraint(nucl)
                logger.info(nucl)

        # ========Optimization

        # set "run_name" variable into "runtime" dictionary
        cfg['runtime']['run_name'] = cfg['runtime']['step_hash'] + '_' + str(
            struct_id)

        # run optimization of the structures, by enforcing excluded volume, polymer and envelope restraints
        model.optimize(cfg)

        # save optimization results (both optimized coordinates and violations) into a .hms file
        ofname = os.path.join(tmp_dir, 'relax_%d.hms' % struct_id)
        with HmsFile(ofname, 'w') as hms:
            hms.saveModel(struct_id, model)
            hms.saveViolations(pp)

        # make sure write was successful
        with HmsFile(ofname, 'r') as hms:
            if not np.all(hms.get_coordinates() == model.getCoordinates()):
                raise RuntimeError('error writing the file %s' % ofname)

        # create .ready file, which signals to the poller that optimization went to completion
        readyfile = ofname + '.ready'
        open(readyfile, 'w').close()  # touch the ready-file
예제 #19
0
    def teardown_poller(self):
        """ Reopen HSS file, overwrite ALL coordinates, close file """

        _hss = HssFile(self.hssfilename, 'r+')
        _hss.set_coordinates(self._hss_crd)
        _hss.close()
예제 #20
0
def modeling_task(struct_id, cfg_file):
    '''
    Serial function to be mapped in parallel. //
    It is a wrapper intended to be used only internally by the parallel map
    function. Will be called as a partial with all the constant variables
    set, except i.
    Resolve the templates, obtains input data,
    runs the minimization routines and finally communicates back results.

    Parameters
    ---------- 
    i : int
        number of the structure 
    cfg_file : str 
        configuration filename for the task
    
    Returns
    -------
    None
    '''
    cfg = Config(cfg_file)

    # importing here so it will be called on the parallel workers
    local_vars = resolve_templates(cfg['mstep']['templates'], [struct_id])

    model = Model()
    with HssFile(cfg['mstep']['input_hss'], 'r') as f:
        radii = f.radii
        index = f.index
        crd = f['coordinates'][:, struct_id, :][()]

    n_particles = len(crd)
    for i in range(n_particles):
        model.addParticle(crd[i], radii[i], Particle.NORMAL)

    ee = Envelope(cfg['model']['nucleus_geometry'])
    model.addRestraint(ee)

    ex = Steric(cfg['model']['evfactor'])
    model.addRestraint(ex)

    pp = Polymer(index, cfg['model']['contact_range'],
                 cfg['model']['contact_kspring'])
    model.addRestraint(pp)

    kernel = kernel_class[cfg['mstep']['kernel']]
    info = kernel.optimize(model, cfg['optimization'])

    new_crd = np.array([p.pos for p in model.particles], dtype=COORD_DTYPE)
    np.save(local_vars['crd_out'], new_crd)

    # make sure that is readable
    np.load(local_vars['crd_out'])

    with open(local_vars['info_out'], 'w') as f:
        for k in kernel.INFO_KEYS:
            if isinstance(info[k], float):
                out_str = '{:9.2f}'.format(info[k])
            elif isinstance(info[k], int):
                out_str = '{:7d}'.format(info[k])
            else:
                out_str = str(info[k])
            f.write(out_str + '\t')
예제 #21
0
 def skip(self):
     fn = self.intermediate_name() + '.hss'
     if os.path.isfile(fn):
         with HssFile(fn, 'r') as hss:
             violation_score = log_stats(hss, self.cfg)
             self.cfg['runtime']['violation_score'] = violation_score
예제 #22
0
    def setup_poller(self):
        """ Load Hss population file, store all coordinates into numpy array, close file"""

        _hss = HssFile(self.hssfilename, 'r')
        self._hss_crd = _hss.coordinates
        _hss.close()
예제 #23
0
def debug_minimization(cfg, struct_id, rname, **kwargs):
    if not isinstance(cfg, dict):
        cfg = Config(cfg)
    if os.path.isfile(cfg['step_db']):
        db = StepDB(cfg)
        h = db.get_history()
        cfg.update(h[-1])

    cfg['optimization']['optimizer_options'].update(kwargs)
    cfg['optimization']['keep_temporary_files'] = True

    step_id = rname

    hssfilename    = cfg['structure_output']

    #read index, radii, coordinates
    with HssFile(hssfilename,'r') as hss:
        index = hss.index
        radii = hss.radii
        if cfg.get('random_shuffling', False):
            crd = generate_random_in_sphere(radii, cfg['model']['nucleus_radius'])
        else:
            crd = hss.get_struct_crd(struct_id)

    #init Model
    model = Model(uid=struct_id)

    # get the chain ids
    chain_ids = np.concatenate( [ [i]*s for i, s in enumerate(index.chrom_sizes) ] )

    #add particles into model
    n_particles = len(crd)
    for i in range(n_particles):
        model.addParticle(crd[i], radii[i], Particle.NORMAL, chainID=chain_ids[i])

    #========Add restraint
    monitored_restraints = []

    #add excluded volume restraint
    ex = Steric(cfg['model']['evfactor'])
    model.addRestraint(ex)

    #add nucleus envelop restraint
    if cfg['model']['nucleus_shape'] == 'sphere':
        ev = Envelope(cfg['model']['nucleus_shape'],
                      cfg['model']['nucleus_radius'],
                      cfg['model']['contact_kspring'])
    elif cfg['model']['nucleus_shape'] == 'ellipsoid':
        ev = Envelope(cfg['model']['nucleus_shape'],
                      cfg['model']['nucleus_semiaxes'],
                      cfg['model']['contact_kspring'])
    else:
        raise NotImplementedError('Invalid nucleus shape')
    model.addRestraint(ev)

    #add consecutive polymer restraint
    pp = Polymer(index,
                 cfg['model']['contact_range'],
                 cfg['model']['contact_kspring'])
    model.addRestraint(pp)
    monitored_restraints.append(pp)

    #add Hi-C restraint
    # if "Hi-C" in cfg['restraints']:
    #     dictHiC = cfg['restraints']['Hi-C']
    #     actdist_file = cfg['runtime']['Hi-C']['actdist_file']
    #     contact_range = dictHiC.get( 'contact_range', 2.0 )
    #     k = dictHiC.get( 'contact_kspring', 1.0 )

    #     hic = HiC(actdist_file, contact_range, k)
    #     model.addRestraint(hic)
    #     monitored_restraints.append(hic)

    # if "sprite" in cfg['restraints']:
    #     sprite_opt = cfg['restraints']['sprite']
    #     sprite = Sprite(
    #         sprite_opt['assignment_file'],
    #         sprite_opt['volume_fraction'],
    #         struct_id,
    #         sprite_opt['kspring']
    #     )
    #     model.addRestraint(sprite)
    #     monitored_restraints.append(sprite)

    #========Optimization
    #optimize model
    cfg['runtime']['run_name'] = rname
    model.optimize(cfg)

    tol = cfg.get('violation_tolerance', 0.01)
    lockfile = os.path.join('.', '%s.%d.ready' % (step_id, struct_id) )
    with FileLock(lockfile):
        open(lockfile, 'w').close() # touch the ready-file
        ofname = os.path.join('.', 'mstep_%d.hms' % struct_id)
        with HmsFile(ofname, 'w') as hms:
            hms.saveModel(struct_id, model)

            for r in monitored_restraints:
                hms.saveViolations(r, tolerance=tol)

        # double check it has been written correctly
        with HmsFile(ofname, 'r') as hms:
            if np.all( hms.get_coordinates() == model.getCoordinates() ):
                raise RuntimeError('error writing the file %s' % ofname)