Esempio n. 1
0
    def print_on_training(self, 
                          tb_writer,
                          cur_batch,
                          sess, 
                          natoms,
                          feed_dict_test,
                          feed_dict_batch):  # depreciated

        run_data = [
            self.l2_l,
            self.l2_more['l2_ener_loss'],
            self.l2_more['l2_force_loss'],
            self.l2_more['l2_virial_loss'],
            self.l2_more['l2_atom_ener_loss'],
            self.l2_more['l2_pref_force_loss']
        ]

        # first train data
        train_out = run_sess(sess, run_data, feed_dict=feed_dict_batch)
        error_train, error_e_train, error_f_train, error_v_train, error_ae_train, error_pf_train = train_out

        # than test data, if tensorboard log writter is present, commpute summary
        # and write tensorboard logs
        if tb_writer:
            summary_merged_op = tf.summary.merge([self.l2_loss_summary, self.l2_loss_ener_summary, self.l2_loss_force_summary, self.l2_loss_virial_summary])
            run_data.insert(0, summary_merged_op)

        test_out = run_sess(sess, run_data, feed_dict=feed_dict_test)

        if tb_writer:
            summary = test_out.pop(0)
            tb_writer.add_summary(summary, cur_batch)

        error_test, error_e_test, error_f_test, error_v_test, error_ae_test, error_pf_test = test_out

        
        print_str = ""
        prop_fmt = "   %11.2e %11.2e"
        print_str += prop_fmt % (np.sqrt(error_test), np.sqrt(error_train))
        if self.has_e :
            print_str += prop_fmt % (np.sqrt(error_e_test) / natoms[0], np.sqrt(error_e_train) / natoms[0])
        if self.has_ae :
            print_str += prop_fmt % (np.sqrt(error_ae_test), np.sqrt(error_ae_train))
        if self.has_f :
            print_str += prop_fmt % (np.sqrt(error_f_test), np.sqrt(error_f_train))
        if self.has_v :
            print_str += prop_fmt % (np.sqrt(error_v_test) / natoms[0], np.sqrt(error_v_train) / natoms[0])
        if self.has_pf:
            print_str += prop_fmt % (np.sqrt(error_pf_test), np.sqrt(error_pf_train))

        return print_str      
Esempio n. 2
0
    def print_on_training(self, tb_writer, cur_batch, sess, natoms,
                          feed_dict_test, feed_dict_batch):  # depreciated

        # YHT: added to calculate the atoms number
        atoms = 0
        if self.type_sel is not None:
            for w in self.type_sel:
                atoms += natoms[2 + w]
        else:
            atoms = natoms[0]

        run_data = [
            self.l2_l, self.l2_more['local_loss'], self.l2_more['global_loss']
        ]
        summary_list = [self.l2_loss_summary]
        if self.local_weight > 0.0:
            summary_list.append(self.l2_loss_local_summary)
        if self.global_weight > 0.0:
            summary_list.append(self.l2_loss_global_summary)

        # first train data
        error_train = run_sess(sess, run_data, feed_dict=feed_dict_batch)

        # than test data, if tensorboard log writter is present, commpute summary
        # and write tensorboard logs
        if tb_writer:
            #summary_merged_op = tf.summary.merge([self.l2_loss_summary])
            summary_merged_op = tf.summary.merge(summary_list)
            run_data.insert(0, summary_merged_op)

        test_out = run_sess(sess, run_data, feed_dict=feed_dict_test)

        if tb_writer:
            summary = test_out.pop(0)
            tb_writer.add_summary(summary, cur_batch)

        error_test = test_out

        print_str = ""
        prop_fmt = "   %11.2e %11.2e"
        print_str += prop_fmt % (np.sqrt(error_test[0]), np.sqrt(
            error_train[0]))
        if self.local_weight > 0.0:
            print_str += prop_fmt % (np.sqrt(
                error_test[1]), np.sqrt(error_train[1]))
        if self.global_weight > 0.0:
            print_str += prop_fmt % (np.sqrt(error_test[2]) / atoms,
                                     np.sqrt(error_train[2]) / atoms)

        return print_str
Esempio n. 3
0
    def _eval_fv(self, coords, cells, atom_types, ext_f):
        # reshape the inputs
        cells = np.reshape(cells, [-1, 9])
        nframes = cells.shape[0]
        coords = np.reshape(coords, [nframes, -1])
        natoms = coords.shape[1] // 3

        # sort inputs
        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
            coords, atom_types, sel_atoms=self.get_sel_type())

        # make natoms_vec and default_mesh
        natoms_vec = self.make_natoms_vec(atom_types)
        assert (natoms_vec[0] == natoms)
        default_mesh = make_default_mesh(cells)

        # evaluate
        tensor = []
        feed_dict_test = {}
        feed_dict_test[self.t_natoms] = natoms_vec
        feed_dict_test[self.t_type] = np.tile(atom_types,
                                              [nframes, 1]).reshape([-1])
        feed_dict_test[self.t_coord] = coords.reshape([-1])
        feed_dict_test[self.t_box] = cells.reshape([-1])
        feed_dict_test[self.t_mesh] = default_mesh.reshape([-1])
        feed_dict_test[self.t_ef] = ext_f.reshape([-1])
        # print(run_sess(self.sess, tf.shape(self.t_tensor), feed_dict = feed_dict_test))
        fout, vout, avout \
            = run_sess(self.sess, [self.force, self.virial, self.av],
                            feed_dict = feed_dict_test)
        # print('fout: ', fout.shape, fout)
        fout = self.reverse_map(np.reshape(fout, [nframes, -1, 3]), imap)
        fout = np.reshape(fout, [nframes, -1])
        return fout, vout, avout
Esempio n. 4
0
    def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]:
        """
        get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms

        Parameters
        ----------
        data
                Class for manipulating many data systems. It is implemented with the help of DeepmdData.
        
        Returns
        -------
        min_nbor_dist
                The nearest distance between neighbor atoms
        max_nbor_size
                A list with ntypes integers, denotes the actual achieved max sel
        """
        self.min_nbor_dist = 100.0
        self.max_nbor_size = [0] * self.ntypes

        # for ii in tqdm(range(len(data.system_dirs)), desc = 'DEEPMD INFO    |-> deepmd.utils.neighbor_stat\t\t\tgetting neighbor status'):
        for ii in range(len(data.system_dirs)):
            for jj in data.data_systems[ii].dirs:
                data_set = data.data_systems[ii]._load_set(jj)
                for kk in range(np.array(data_set['type']).shape[0]):
                    mn, dt \
                        = run_sess(self.sub_sess, [self._max_nbor_size, self._min_nbor_dist],
                                            feed_dict = {
                                                self.place_holders['coord']: np.array(data_set['coord'])[kk].reshape([-1, data.natoms[ii] * 3]),
                                                self.place_holders['type']: np.array(data_set['type'])[kk].reshape([-1, data.natoms[ii]]),
                                                self.place_holders['natoms_vec']: np.array(data.natoms_vec[ii]),
                                                self.place_holders['box']: np.array(data_set['box'])[kk].reshape([-1, 9]),
                                                self.place_holders['default_mesh']: np.array(data.default_mesh[ii]),
                                            })
                    if dt.size != 0:
                        dt = np.min(dt)
                    else:
                        dt = self.rcut
                        log.warning(
                            "Atoms with no neighbors found in %s. Please make sure it's what you expected."
                            % jj)

                    if dt < self.min_nbor_dist:
                        if math.isclose(dt, 0., rel_tol=1e-6):
                            # it's unexpected that the distance between two atoms is zero
                            # zero distance will cause nan (#874)
                            raise RuntimeError(
                                "Some atoms in %s are overlapping. Please check your"
                                " training data to remove duplicated atoms." %
                                jj)
                        self.min_nbor_dist = dt
                    for ww in range(self.ntypes):
                        var = np.max(mn[:, ww])
                        if var > self.max_nbor_size[ww]:
                            self.max_nbor_size[ww] = var

        log.info('training data with min nbor dist: ' +
                 str(self.min_nbor_dist))
        log.info('training data with max nbor size: ' +
                 str(self.max_nbor_size))
        return self.min_nbor_dist, self.max_nbor_size
Esempio n. 5
0
 def _compute_dstats_sys_se_r(self, data_coord, data_box, data_atype,
                              natoms_vec, mesh):
     dd_all \
         = run_sess(self.sub_sess, self.stat_descrpt,
                             feed_dict = {
                                 self.place_holders['coord']: data_coord,
                                 self.place_holders['type']: data_atype,
                                 self.place_holders['natoms_vec']: natoms_vec,
                                 self.place_holders['box']: data_box,
                                 self.place_holders['default_mesh']: mesh,
                             })
     natoms = natoms_vec
     dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
     start_index = 0
     sysr = []
     sysn = []
     sysr2 = []
     for type_i in range(self.ntypes):
         end_index = start_index + self.ndescrpt * natoms[2 + type_i]
         dd = dd_all[:, start_index:end_index]
         dd = np.reshape(dd, [-1, self.ndescrpt])
         start_index = end_index
         # compute
         dd = np.reshape(dd, [-1, 1])
         ddr = dd[:, :1]
         sumr = np.sum(ddr)
         sumn = dd.shape[0]
         sumr2 = np.sum(np.multiply(ddr, ddr))
         sysr.append(sumr)
         sysn.append(sumn)
         sysr2.append(sumr2)
     return sysr, sysr2, sysn
Esempio n. 6
0
def get_tensor_by_name_from_graph(graph: tf.Graph,
                                  tensor_name: str) -> tf.Tensor:
    """
    Load tensor value from the given tf.Graph object

    Parameters
    ----------
    graph : tf.Graph
        The input TensorFlow graph
    tensor_name : str
        Indicates which tensor which will be loaded from the frozen model

    Returns
    -------
    tf.Tensor
        The tensor which was loaded from the frozen model

    Raises
    ------
    GraphWithoutTensorError
        Whether the tensor_name is within the frozen model
    """
    try:
        tensor = graph.get_tensor_by_name(tensor_name + ':0')
    except KeyError as e:
        raise GraphWithoutTensorError() from e
    with tf.Session(graph=graph) as sess:
        tensor = run_sess(sess, tensor)
    return tensor
Esempio n. 7
0
 def _eval_descriptor_inner(self,
         coords: np.ndarray,
         cells: np.ndarray,
         atom_types: List[int],
         fparam: Optional[np.ndarray] = None,
         aparam: Optional[np.ndarray] = None,
         efield: Optional[np.ndarray] = None,
         ) -> np.array:
     natoms, nframes = self._get_natoms_and_nframes(coords, atom_types)
     feed_dict_test, imap = self._prepare_feed_dict(coords, cells, atom_types, fparam, aparam, efield)
     descriptor, = run_sess(self.sess, [self.t_descriptor], feed_dict = feed_dict_test)
     return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
Esempio n. 8
0
    def model_type(self) -> str:
        """Get type of model.

        :type:str
        """
        if not self._model_type:
            t_mt = self._get_tensor("model_attr/model_type:0")
            sess = tf.Session(graph=self.graph,
                              config=default_tf_session_config)
            [mt] = run_sess(sess, [t_mt], feed_dict={})
            self._model_type = mt.decode("utf-8")
        return self._model_type
Esempio n. 9
0
    def valid_on_the_fly(self,
                         fp,
                         train_batches,
                         valid_batches,
                         print_header=False):
        train_results = self.get_evaluation_results(train_batches)
        valid_results = self.get_evaluation_results(valid_batches)

        cur_batch = self.cur_batch
        current_lr = run_sess(self.sess, self.learning_rate)
        if print_header:
            self.print_header(fp, train_results, valid_results)
        self.print_on_training(fp, train_results, valid_results, cur_batch, current_lr)
Esempio n. 10
0
 def eval(self, sess, feed_dict, natoms):
     run_data = [
         self.l2_l,
         self.l2_more['l2_ener_loss'],
         self.l2_more['l2_ener_dipole_loss']
     ]
     error, error_e, error_ed = run_sess(sess, run_data, feed_dict=feed_dict)
     results = {
         'natoms': natoms[0],
         'rmse': np.sqrt(error),
         'rmse_e': np.sqrt(error_e) / natoms[0],
         'rmse_ed': np.sqrt(error_ed)
     }
     return results
Esempio n. 11
0
    def _eval_inner(
        self,
        coords,
        cells,
        atom_types,
        fparam=None,
        aparam=None,
        atomic=False,
        efield=None
    ):
        natoms, nframes = self._get_natoms_and_nframes(coords, atom_types)
        feed_dict_test, imap = self._prepare_feed_dict(coords, cells, atom_types, fparam, aparam, efield)
        t_out = [self.t_energy, 
                 self.t_force, 
                 self.t_virial]
        if atomic :
            t_out += [self.t_ae, 
                      self.t_av]

        v_out = run_sess(self.sess, t_out, feed_dict = feed_dict_test)
        energy = v_out[0]
        force = v_out[1]
        virial = v_out[2]
        if atomic:
            ae = v_out[3]
            av = v_out[4]

        # reverse map of the outputs
        force  = self.reverse_map(np.reshape(force, [nframes,-1,3]), imap)
        if atomic :
            ae  = self.reverse_map(np.reshape(ae, [nframes,-1,1]), imap)
            av  = self.reverse_map(np.reshape(av, [nframes,-1,9]), imap)

        energy = np.reshape(energy, [nframes, 1])
        force = np.reshape(force, [nframes, natoms, 3])
        virial = np.reshape(virial, [nframes, 9])
        if atomic:
            ae = np.reshape(ae, [nframes, natoms, 1])
            av = np.reshape(av, [nframes, natoms, 9])
            return energy, force, virial, ae, av
        else :
            return energy, force, virial
Esempio n. 12
0
    def eval(self, coord: np.ndarray, charge: np.ndarray,
             box: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Evaluate 
        
        Parameters
        ----------
        coord
                The coordinates of atoms
        charge
                The atomic charge
        box
                The simulation region. PBC is assumed

        Returns
        -------
        e
                The energy 
        f
                The force 
        v
                The virial 
        """
        coord = np.array(coord)
        charge = np.array(charge)
        box = np.array(box)
        nframes = charge.shape[0]
        natoms = charge.shape[1]
        coord = np.reshape(coord, [nframes * 3 * natoms])
        charge = np.reshape(charge, [nframes * natoms])
        box = np.reshape(box, [nframes * 9])

        [energy, force, virial] \
            = run_sess(self.sess, [self.t_energy, self.t_force, self.t_virial],
                            feed_dict = {
                                self.t_coord:  coord,
                                self.t_charge: charge,
                                self.t_box:    box,
                                self.t_nloc:   [natoms],
                            })

        return energy, force, virial
Esempio n. 13
0
    def model_version(self) -> str:
        """Get version of model.

        Returns
        -------
        str
            version of model
        """
        if not self._model_version:
            try:
                t_mt = self._get_tensor("model_attr/model_version:0")
            except KeyError:
                # For deepmd-kit version 0.x - 1.x, set model version to 0.0
                self._model_version = "0.0"
            else:
                sess = tf.Session(graph=self.graph,
                                  config=default_tf_session_config)
                [mt] = run_sess(sess, [t_mt], feed_dict={})
                self._model_version = mt.decode("utf-8")
        return self._model_version
Esempio n. 14
0
 def __init__(self,
              model_name: str,
              model_charge_map: List[float],
              sys_charge_map: List[float],
              ewald_h: float = 1,
              ewald_beta: float = 1) -> None:
     """
     Constructor 
     """
     # the dipole model is loaded with prefix 'dipole_charge'
     self.modifier_prefix = 'dipole_charge'
     # init dipole model
     DeepDipole.__init__(self,
                         model_name,
                         load_prefix=self.modifier_prefix,
                         default_tf_graph=True)
     self.model_name = model_name
     self.model_charge_map = model_charge_map
     self.sys_charge_map = sys_charge_map
     self.sel_type = list(self.get_sel_type())
     # init ewald recp
     self.ewald_h = ewald_h
     self.ewald_beta = ewald_beta
     self.er = EwaldRecp(self.ewald_h, self.ewald_beta)
     # dimension of dipole
     self.ext_dim = 3
     self.t_ndesc = self.graph.get_tensor_by_name(
         os.path.join(self.modifier_prefix, 'descrpt_attr/ndescrpt:0'))
     self.t_sela = self.graph.get_tensor_by_name(
         os.path.join(self.modifier_prefix, 'descrpt_attr/sel:0'))
     [self.ndescrpt, self.sel_a] = run_sess(self.sess,
                                            [self.t_ndesc, self.t_sela])
     self.sel_r = [0 for ii in range(len(self.sel_a))]
     self.nnei_a = np.cumsum(self.sel_a)[-1]
     self.nnei_r = np.cumsum(self.sel_r)[-1]
     self.nnei = self.nnei_a + self.nnei_r
     self.ndescrpt_a = self.nnei_a * 4
     self.ndescrpt_r = self.nnei_r * 1
     assert (self.ndescrpt == self.ndescrpt_a + self.ndescrpt_r)
     self.force = None
     self.ntypes = len(self.sel_a)
Esempio n. 15
0
    def eval(self, sess, feed_dict, natoms):
        atoms = 0
        if self.type_sel is not None:
            for w in self.type_sel:
                atoms += natoms[2 + w]
        else:
            atoms = natoms[0]

        run_data = [
            self.l2_l, self.l2_more['local_loss'], self.l2_more['global_loss']
        ]
        error, error_lc, error_gl = run_sess(sess,
                                             run_data,
                                             feed_dict=feed_dict)

        results = {"natoms": atoms, "rmse": np.sqrt(error)}
        if self.local_weight > 0.0:
            results["rmse_lc"] = np.sqrt(error_lc)
        if self.global_weight > 0.0:
            results["rmse_gl"] = np.sqrt(error_gl) / atoms
        return results
Esempio n. 16
0
 def eval(self, sess, feed_dict, natoms):
     run_data = [
         self.l2_l,
         self.l2_more['l2_ener_loss'],
         self.l2_more['l2_force_loss'],
         self.l2_more['l2_virial_loss'],
         self.l2_more['l2_atom_ener_loss'],
         self.l2_more['l2_pref_force_loss']
     ]
     error, error_e, error_f, error_v, error_ae, error_pf = run_sess(sess, run_data, feed_dict=feed_dict)
     results = {"natoms": natoms[0], "rmse": np.sqrt(error)}
     if self.has_e:
         results["rmse_e"] = np.sqrt(error_e) / natoms[0]
     if self.has_ae:
         results["rmse_ae"] = np.sqrt(error_ae)
     if self.has_f:
         results["rmse_f"] = np.sqrt(error_f)
     if self.has_v:
         results["rmse_v"] = np.sqrt(error_v) / natoms[0]
     if self.has_pf:
         results["rmse_pf"] = np.sqrt(error_pf)
     return results
Esempio n. 17
0
    def train(self, train_data=None, valid_data=None):

        # if valid_data is None:  # no validation set specified.
        #     valid_data = train_data  # using training set as validation set.

        stop_batch = self.stop_batch
        self._init_session()

        # Before data shard is enabled, only cheif do evaluation and record it
        # self.print_head()
        fp = None
        if self.run_opt.is_chief:
            fp = open(self.disp_file, "a")

        cur_batch = run_sess(self.sess, self.global_step)
        is_first_step = True
        self.cur_batch = cur_batch
        log.info(
            "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
            % (run_sess(self.sess, self.learning_rate),
               self.lr.value(cur_batch), self.lr.decay_steps_,
               self.lr.decay_rate_, self.lr.value(stop_batch)))

        prf_options = None
        prf_run_metadata = None
        if self.profiling:
            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            prf_run_metadata = tf.RunMetadata()

        # set tensorboard execution environment
        if self.tensorboard:
            summary_merged_op = tf.summary.merge_all()
            # Remove TB old logging directory from previous run
            try:
                shutil.rmtree(self.tensorboard_log_dir)
            except FileNotFoundError:
                pass  # directory does not exist, this is OK
            except Exception as e:
                # general error when removing directory, warn user
                log.exception(
                    f"Could not remove old tensorboard logging directory: "
                    f"{self.tensorboard_log_dir}. Error: {e}")
            else:
                log.debug("Removing old tensorboard log directory.")
            tb_train_writer = tf.summary.FileWriter(
                self.tensorboard_log_dir + '/train', self.sess.graph)
            tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir +
                                                    '/test')
        else:
            tb_train_writer = None
            tb_valid_writer = None
        if self.enable_profiler:
            # https://www.tensorflow.org/guide/profiler
            tfv2.profiler.experimental.start(self.tensorboard_log_dir)

        train_time = 0

        while cur_batch < stop_batch:

            # first round validation:
            train_batch = train_data.get_batch()
            if self.display_in_training and is_first_step:
                if self.run_opt.is_chief:
                    valid_batches = [
                        valid_data.get_batch()
                        for ii in range(self.valid_numb_batch)
                    ] if valid_data is not None else None
                    self.valid_on_the_fly(fp, [train_batch],
                                          valid_batches,
                                          print_header=True)
                is_first_step = False

            if self.timing_in_training: tic = time.time()
            train_feed_dict = self.get_feed_dict(train_batch, is_training=True)
            # use tensorboard to visualize the training of deepmd-kit
            # it will takes some extra execution time to generate the tensorboard data
            if self.tensorboard and (cur_batch % self.tensorboard_freq == 0):
                summary, _ = run_sess(self.sess,
                                      [summary_merged_op, self.train_op],
                                      feed_dict=train_feed_dict,
                                      options=prf_options,
                                      run_metadata=prf_run_metadata)
                tb_train_writer.add_summary(summary, cur_batch)
            else:
                run_sess(self.sess, [self.train_op],
                         feed_dict=train_feed_dict,
                         options=prf_options,
                         run_metadata=prf_run_metadata)
            if self.timing_in_training: toc = time.time()
            if self.timing_in_training: train_time += toc - tic
            cur_batch = run_sess(self.sess, self.global_step)
            self.cur_batch = cur_batch

            # on-the-fly validation
            if self.display_in_training and (cur_batch % self.disp_freq == 0):
                if self.timing_in_training:
                    tic = time.time()
                if self.run_opt.is_chief:
                    valid_batches = [
                        valid_data.get_batch()
                        for ii in range(self.valid_numb_batch)
                    ] if valid_data is not None else None
                    self.valid_on_the_fly(fp, [train_batch], valid_batches)
                if self.timing_in_training:
                    toc = time.time()
                    test_time = toc - tic
                    log.info(
                        "batch %7d training time %.2f s, testing time %.2f s" %
                        (cur_batch, train_time, test_time))
                    train_time = 0
                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None:
                    self.save_checkpoint(cur_batch)
        if (self.save_freq == 0 or cur_batch == 0
                or cur_batch % self.save_freq != 0) and self.saver is not None:
            self.save_checkpoint(cur_batch)
        if self.run_opt.is_chief:
            fp.close()
        if self.profiling and self.run_opt.is_chief:
            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(self.profiling_file, 'w') as f:
                f.write(chrome_trace)
        if self.enable_profiler and self.run_opt.is_chief:
            tfv2.profiler.experimental.stop()
Esempio n. 18
0
    def _eval_inner(self,
                    coords,
                    cells,
                    atom_types,
                    fparam=None,
                    aparam=None,
                    atomic=False,
                    efield=None):
        # standarize the shape of inputs
        atom_types = np.array(atom_types, dtype=int).reshape([-1])
        natoms = atom_types.size
        coords = np.reshape(np.array(coords), [-1, natoms * 3])
        nframes = coords.shape[0]
        if cells is None:
            pbc = False
            # make cells to work around the requirement of pbc
            cells = np.tile(np.eye(3), [nframes, 1]).reshape([nframes, 9])
        else:
            pbc = True
            cells = np.array(cells).reshape([nframes, 9])

        if self.has_fparam:
            assert (fparam is not None)
            fparam = np.array(fparam)
        if self.has_aparam:
            assert (aparam is not None)
            aparam = np.array(aparam)
        if self.has_efield:
            assert (
                efield is not None
            ), "you are using a model with external field, parameter efield should be provided"
            efield = np.array(efield)

        # reshape the inputs
        if self.has_fparam:
            fdim = self.get_dim_fparam()
            if fparam.size == nframes * fdim:
                fparam = np.reshape(fparam, [nframes, fdim])
            elif fparam.size == fdim:
                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
            else:
                raise RuntimeError(
                    'got wrong size of frame param, should be either %d x %d or %d'
                    % (nframes, fdim, fdim))
        if self.has_aparam:
            fdim = self.get_dim_aparam()
            if aparam.size == nframes * natoms * fdim:
                aparam = np.reshape(aparam, [nframes, natoms * fdim])
            elif aparam.size == natoms * fdim:
                aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
            elif aparam.size == fdim:
                aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
            else:
                raise RuntimeError(
                    'got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d'
                    % (nframes, natoms, fdim, natoms, fdim, fdim))

        # sort inputs
        coords, atom_types, imap = self.sort_input(coords, atom_types)
        if self.has_efield:
            efield = np.reshape(efield, [nframes, natoms, 3])
            efield = efield[:, imap, :]
            efield = np.reshape(efield, [nframes, natoms * 3])

        # make natoms_vec and default_mesh
        natoms_vec = self.make_natoms_vec(atom_types)
        assert (natoms_vec[0] == natoms)

        # evaluate
        feed_dict_test = {}
        feed_dict_test[self.t_natoms] = natoms_vec
        feed_dict_test[self.t_type] = np.tile(atom_types,
                                              [nframes, 1]).reshape([-1])
        t_out = [self.t_energy, self.t_force, self.t_virial]
        if atomic:
            t_out += [self.t_ae, self.t_av]

        feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
        if self.has_efield:
            feed_dict_test[self.t_efield] = np.reshape(efield, [-1])
        if pbc:
            feed_dict_test[self.t_mesh] = make_default_mesh(cells)
        else:
            feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32)
        if self.has_fparam:
            feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
        if self.has_aparam:
            feed_dict_test[self.t_aparam] = np.reshape(aparam, [-1])
        v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test)
        energy = v_out[0]
        force = v_out[1]
        virial = v_out[2]
        if atomic:
            ae = v_out[3]
            av = v_out[4]

        # reverse map of the outputs
        force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap)
        if atomic:
            ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap)
            av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap)

        energy = np.reshape(energy, [nframes, 1])
        force = np.reshape(force, [nframes, natoms, 3])
        virial = np.reshape(virial, [nframes, 9])
        if atomic:
            ae = np.reshape(ae, [nframes, natoms, 1])
            av = np.reshape(av, [nframes, natoms, 9])
            return energy, force, virial, ae, av
        else:
            return energy, force, virial
Esempio n. 19
0
    def _init_session(self):
        config = get_tf_session_config()
        device, idx = self.run_opt.my_device.split(":", 1)
        if device == "gpu":
            config.gpu_options.visible_device_list = idx
        self.sess = tf.Session(config=config)

        # Initializes or restore global variables
        init_op = tf.global_variables_initializer()
        if self.run_opt.is_chief:
            self.saver = tf.train.Saver(save_relative_paths=True)
            if self.run_opt.init_mode == 'init_from_scratch' :
                log.info("initialize model from scratch")
                run_sess(self.sess, init_op)
                if not self.is_compress:
                    fp = open(self.disp_file, "w")
                    fp.close ()
            elif self.run_opt.init_mode == 'init_from_model' :
                log.info("initialize from model %s" % self.run_opt.init_model)
                run_sess(self.sess, init_op)
                self.saver.restore (self.sess, self.run_opt.init_model)            
                run_sess(self.sess, self.global_step.assign(0))
                fp = open(self.disp_file, "w")
                fp.close ()
            elif self.run_opt.init_mode == 'restart' :
                log.info("restart from model %s" % self.run_opt.restart)
                run_sess(self.sess, init_op)
                self.saver.restore (self.sess, self.run_opt.restart)
            elif self.run_opt.init_mode == 'init_from_frz_model' :
                log.info("initialize training from the frozen model")
                run_sess(self.sess, init_op)
                fp = open(self.disp_file, "w")
                fp.close ()
            else :
                raise RuntimeError ("unkown init mode")
        else:
            run_sess(self.sess, init_op)
            self.saver = None

        # Ensure variable consistency among tasks when training starts
        if self.run_opt.is_distrib:
            bcast_op = self.run_opt._HVD.broadcast_global_variables(0)
            if self.run_opt.is_chief:
                log.info('broadcast global variables to other tasks')
            else:
                log.info('receive global variables from task#0')
            run_sess(self.sess, bcast_op)
Esempio n. 20
0
def freeze(*,
           checkpoint_folder: str,
           output: str,
           node_names: Optional[str] = None,
           **kwargs):
    """Freeze the graph in supplied folder.

    Parameters
    ----------
    checkpoint_folder : str
        location of the folder with model
    output : str
        output file name
    node_names : Optional[str], optional
        names of nodes to output, by default None
    """
    # We retrieve our checkpoint fullpath
    checkpoint = tf.train.get_checkpoint_state(checkpoint_folder)
    input_checkpoint = checkpoint.model_checkpoint_path

    # expand the output file to full path
    output_graph = abspath(output)

    # Before exporting our graph, we need to precise what is our output node
    # This is how TF decides what part of the Graph he has to keep
    # and what part it can dump
    # NOTE: this variable is plural, because you can have multiple output nodes
    # node_names = "energy_test,force_test,virial_test,t_rcut"

    # We clear devices to allow TensorFlow to control
    # on which device it will load operations
    clear_devices = True

    # We import the meta graph and retrieve a Saver
    try:
        # In case paralle training
        import horovod.tensorflow as _
    except ImportError:
        pass
    saver = tf.train.import_meta_graph(f"{input_checkpoint}.meta",
                                       clear_devices=clear_devices)

    # We retrieve the protobuf graph definition
    graph = tf.get_default_graph()
    input_graph_def = graph.as_graph_def()
    nodes = [n.name for n in input_graph_def.node]

    # We start a session and restore the graph weights
    with tf.Session() as sess:
        saver.restore(sess, input_checkpoint)
        model_type = run_sess(sess, "model_attr/model_type:0",
                              feed_dict={}).decode("utf-8")
        if "modifier_attr/type" in nodes:
            modifier_type = run_sess(sess,
                                     "modifier_attr/type:0",
                                     feed_dict={}).decode("utf-8")
        else:
            modifier_type = None
        if node_names is None:
            output_node_list = _make_node_names(model_type, modifier_type)
            different_set = set(output_node_list) - set(nodes)
            if different_set:
                log.warning("The following nodes are not in the graph: %s. "
                            "Skip freezeing these nodes. You may be freezing "
                            "a checkpoint generated by an old version." %
                            different_set)
                # use intersection as output list
                output_node_list = list(set(output_node_list) & set(nodes))
        else:
            output_node_list = node_names.split(",")
        log.info(f"The following nodes will be frozen: {output_node_list}")

        # We use a built-in TF helper to export variables to constants
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,  # The session is used to retrieve the weights
            input_graph_def,  # The graph_def is used to retrieve the nodes
            output_node_list,  # The output node names are used to select the usefull nodes
        )

        # Finally we serialize and dump the output graph to the filesystem
        with tf.gfile.GFile(output_graph, "wb") as f:
            f.write(output_graph_def.SerializeToString())
        log.info(f"{len(output_graph_def.node):d} ops in the final graph.")
Esempio n. 21
0
    def train (self, train_data = None, valid_data=None) :

        # if valid_data is None:  # no validation set specified.
        #     valid_data = train_data  # using training set as validation set.

        stop_batch = self.stop_batch
        self._init_session()

        # Before data shard is enabled, only cheif do evaluation and record it
        # self.print_head()
        fp = None
        if self.run_opt.is_chief :
            fp = open(self.disp_file, "a")

        cur_batch = run_sess(self.sess, self.global_step)
        is_first_step = True
        self.cur_batch = cur_batch
        log.info("start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % 
                 (run_sess(self.sess, self.learning_rate),
                  self.lr.value(cur_batch), 
                  self.lr.decay_steps_,
                  self.lr.decay_rate_,
                  self.lr.value(stop_batch)) 
        )

        prf_options = None
        prf_run_metadata = None
        if self.profiling:
            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            prf_run_metadata = tf.RunMetadata()

        # set tensorboard execution environment
        if self.tensorboard:
            summary_merged_op = tf.summary.merge_all()
            # Remove TB old logging directory from previous run
            try:
                shutil.rmtree(self.tensorboard_log_dir)
            except FileNotFoundError:
                pass  # directory does not exist, this is OK
            except Exception as e:
                # general error when removing directory, warn user
                log.exception(
                    f"Could not remove old tensorboard logging directory: "
                    f"{self.tensorboard_log_dir}. Error: {e}"
                )
            else:
                log.debug("Removing old tensorboard log directory.")
            tb_train_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/train', self.sess.graph)
            tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/test')
        else:
            tb_train_writer = None
            tb_valid_writer = None
        
        train_time = 0

        while cur_batch < stop_batch :

            # first round validation:
            train_batch = train_data.get_batch()
            if self.display_in_training and is_first_step:
                if self.run_opt.is_chief:
                    valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
                    self.valid_on_the_fly(fp, [train_batch], valid_batches, print_header=True)
                is_first_step = False

            if self.timing_in_training: tic = time.time()
            train_feed_dict = self.get_feed_dict(train_batch, is_training=True)
            # use tensorboard to visualize the training of deepmd-kit
            # it will takes some extra execution time to generate the tensorboard data
            if self.tensorboard and (cur_batch % self.tensorboard_freq == 0):
                summary, _ = run_sess(self.sess, [summary_merged_op, self.train_op], feed_dict=train_feed_dict,
                                           options=prf_options, run_metadata=prf_run_metadata)
                tb_train_writer.add_summary(summary, cur_batch)
            else:
                run_sess(self.sess, [self.train_op], feed_dict=train_feed_dict,
                              options=prf_options, run_metadata=prf_run_metadata)
            if self.timing_in_training: toc = time.time()
            if self.timing_in_training: train_time += toc - tic
            cur_batch = run_sess(self.sess, self.global_step)
            self.cur_batch = cur_batch

            # on-the-fly validation
            if self.display_in_training and (cur_batch % self.disp_freq == 0):
                if self.timing_in_training:
                    tic = time.time()
                if self.run_opt.is_chief:
                    valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
                    self.valid_on_the_fly(fp, [train_batch], valid_batches)
                if self.timing_in_training:
                    toc = time.time()
                    test_time = toc - tic
                    log.info("batch %7d training time %.2f s, testing time %.2f s"
                                  % (cur_batch, train_time, test_time))
                    train_time = 0
                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None:
                    try:
                        ckpt_prefix = self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt), global_step=cur_batch)
                    except google.protobuf.message.DecodeError as e:
                        raise GraphTooLargeError(
                            "The graph size exceeds 2 GB, the hard limitation of protobuf."
                            " Then a DecodeError was raised by protobuf. You should "
                            "reduce the size of your model."
                        ) from e
                    # make symlinks from prefix with step to that without step to break nothing
                    # get all checkpoint files
                    original_files = glob.glob(ckpt_prefix + ".*")
                    for ori_ff in original_files:
                        new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix):]
                        try:
                            # remove old one
                            os.remove(new_ff)
                        except OSError:
                            pass
                        os.symlink(ori_ff, new_ff)
                    log.info("saved checkpoint %s" % self.save_ckpt)
        if self.run_opt.is_chief: 
            fp.close ()
        if self.profiling and self.run_opt.is_chief :
            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            with open(self.profiling_file, 'w') as f:
                f.write(chrome_trace)
Esempio n. 22
0
 def get_global_step(self):
     return run_sess(self.sess, self.global_step)
Esempio n. 23
0
 def _run_default_sess(self):
     [self.ntypes, self.rcut, self.dfparam, self.daparam,
      self.tmap] = run_sess(self.sess, [
          self.t_ntypes, self.t_rcut, self.t_dfparam, self.t_daparam,
          self.t_tmap
      ])
Esempio n. 24
0
 def _run_default_sess(self):
     [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] \
         = run_sess(self.sess,
             [self.t_ntypes, self.t_rcut, self.t_tmap, self.t_sel_type, self.t_ouput_dim]
         )
Esempio n. 25
0
    def __init__(
        self,
        model_file: "Path",
        load_prefix: str = "load",
        default_tf_graph: bool = False,
        auto_batch_size: Union[bool, int, AutoBatchSize] = True,
    ) -> None:

        # add these tensors on top of what is defined by DeepTensor Class
        # use this in favor of dict update to move attribute from class to
        # instance namespace
        self.tensors = dict(
            {
                # descrpt attrs
                "t_ntypes": "descrpt_attr/ntypes:0",
                "t_rcut": "descrpt_attr/rcut:0",
                # fitting attrs
                "t_dfparam": "fitting_attr/dfparam:0",
                "t_daparam": "fitting_attr/daparam:0",
                # model attrs
                "t_tmap": "model_attr/tmap:0",
                # inputs
                "t_coord": "t_coord:0",
                "t_type": "t_type:0",
                "t_natoms": "t_natoms:0",
                "t_box": "t_box:0",
                "t_mesh": "t_mesh:0",
                # add output tensors
                "t_energy": "o_energy:0",
                "t_force": "o_force:0",
                "t_virial": "o_virial:0",
                "t_ae": "o_atom_energy:0",
                "t_av": "o_atom_virial:0"
            }, )
        DeepEval.__init__(
            self,
            model_file,
            load_prefix=load_prefix,
            default_tf_graph=default_tf_graph,
            auto_batch_size=auto_batch_size,
        )

        # load optional tensors
        operations = [op.name for op in self.graph.get_operations()]
        # check if the graph has these operations:
        # if yes add them
        if 't_efield' in operations:
            self._get_tensor("t_efield:0", "t_efield")
            self.has_efield = True
        else:
            log.debug(f"Could not get tensor 't_efield:0'")
            self.t_efield = None
            self.has_efield = False

        if 'load/t_fparam' in operations:
            self.tensors.update({"t_fparam": "t_fparam:0"})
            self.has_fparam = True
        else:
            log.debug(f"Could not get tensor 't_fparam:0'")
            self.t_fparam = None
            self.has_fparam = False

        if 'load/t_aparam' in operations:
            self.tensors.update({"t_aparam": "t_aparam:0"})
            self.has_aparam = True
        else:
            log.debug(f"Could not get tensor 't_aparam:0'")
            self.t_aparam = None
            self.has_aparam = False

        # now load tensors to object attributes
        for attr_name, tensor_name in self.tensors.items():
            self._get_tensor(tensor_name, attr_name)

        # start a tf session associated to the graph
        self.sess = tf.Session(graph=self.graph,
                               config=default_tf_session_config)
        self._run_default_sess()
        self.tmap = self.tmap.decode('UTF-8').split()

        # setup modifier
        try:
            t_modifier_type = self._get_tensor("modifier_attr/type:0")
            self.modifier_type = run_sess(self.sess,
                                          t_modifier_type).decode("UTF-8")
        except (ValueError, KeyError):
            self.modifier_type = None

        if self.modifier_type == "dipole_charge":
            t_mdl_name = self._get_tensor("modifier_attr/mdl_name:0")
            t_mdl_charge_map = self._get_tensor(
                "modifier_attr/mdl_charge_map:0")
            t_sys_charge_map = self._get_tensor(
                "modifier_attr/sys_charge_map:0")
            t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0")
            t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0")
            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h,
             ewald_beta] = run_sess(self.sess, [
                 t_mdl_name, t_mdl_charge_map, t_sys_charge_map, t_ewald_h,
                 t_ewald_beta
             ])
            mdl_name = mdl_name.decode("UTF-8")
            mdl_charge_map = [
                int(ii) for ii in mdl_charge_map.decode("UTF-8").split()
            ]
            sys_charge_map = [
                int(ii) for ii in sys_charge_map.decode("UTF-8").split()
            ]
            self.dm = DipoleChargeModifier(mdl_name,
                                           mdl_charge_map,
                                           sys_charge_map,
                                           ewald_h=ewald_h,
                                           ewald_beta=ewald_beta)