Beispiel #1
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """
        from aiida_orca.utils import OrcaInput

        # create input structure(s)
        if 'structure' in self.inputs:
            self._write_structure(self.inputs.structure, folder,
                                  self._INPUT_COORDS_FILE)

        settings = self.inputs.settings.get_dict(
        ) if 'settings' in self.inputs else {}

        # create code info
        codeinfo = CodeInfo()
        codeinfo.cmdline_params = settings.pop('cmdline',
                                               []) + [self._INPUT_FILE]
        codeinfo.stdout_name = self._OUTPUT_FILE
        codeinfo.join_files = True
        codeinfo.code_uuid = self.inputs.code.uuid

        # create calc info
        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._INPUT_FILE
        calcinfo.stdout_name = self._OUTPUT_FILE
        calcinfo.codes_info = [codeinfo]

        # files or additional structures
        if 'file' in self.inputs:
            calcinfo.local_copy_list = []
            for name, obj in self.inputs.file.items():
                if name == 'gbw':
                    calcinfo.local_copy_list.append(
                        (obj.uuid, obj.filename, 'aiida_old.gbw'))
                else:
                    calcinfo.local_copy_list.append(
                        (obj.uuid, obj.filename, obj.filename))

        # Retrive list
        calcinfo.retrieve_list = [
            self._OUTPUT_FILE, self._GBW_FILE, self._HESSIAN_FILE,
            self._RELAX_COORDS_FILE
        ]
        calcinfo.retrieve_list += settings.pop('additional_retrieve_list', [])

        # create ORCA input file
        # inp = OrcaInput(self.inputs.parameters.get_dict(), remote_path=remote_path)
        inp = OrcaInput(self.inputs.parameters.get_dict())
        with io.open(folder.get_abs_path(self._INPUT_FILE), mode='w') as fobj:
            fobj.write(inp.render())

        return calcinfo
Beispiel #2
0
    def prepare_for_submission(self, tempfolder):
        # Setup template
        with open(self.inputs.template, 'r') as tempfile:
            temp_contents = tempfile.read()
        lmp_template = Template(temp_contents)

        # # check variables
        # for variable in self.inputs.variables.values():
        #     if not isinstance(variable, list):
        #         raise TypeError('Values in variables must be list')
        # check kinds
        if 'kinds' in self.inputs:
            kind_var = {
                kind: kind_index + 1
                for kind_index, kind in enumerate(self.inputs.kinds)
            }
            lmp_template = Template(lmp_template.safe_substitute(**kind_var))

        for i, case in enumerate(self.inputs.cases):
            structure = case.pop('structure')
            if isinstance(structure, StructureData):
                structure_txt, struct_transform = generate_lammps_structure(
                    structure, kinds=self.inputs.kinds)
            elif isinstance(structure, SinglefileData):
                structure_txt = structure.get_content()
            else:
                raise TypeError(
                    'Input structure must be StructureData or SinglefileData')
            input_txt = lmp_template.safe_substitute(**case)

            # ========================= dump to file ===========================
            tempfolder.get_subfolder(i, create=True)
            input_filename = tempfolder.get_abs_path(
                f'{i}/{self._INPUT_FILE_NAME}')
            with open(input_filename, 'w') as infile:
                infile.write(input_txt)

            structure_filename = tempfolder.get_abs_path(
                f'{i}/{self._INPUT_STRUCTURE}')
            with open(structure_filename, 'w') as infile:
                infile.write(structure_txt)

            case_filename = tempfolder.get_abs_path(f'{i}/case.json')
            case.update({'structure pk': structure.pk})
            with open(case_filename, 'w') as infile:
                json.dump(case, infile, sort_keys=True, indent=2)

        # ============================ calcinfo ================================
        settings = self.inputs.settings.get_dict() \
            if 'settings' in self.inputs else {}

        codeinfo = CodeInfo()
        codeinfo.cmdline_params = self._cmdline_params
        codeinfo.code_uuid = self.inputs.code.uuid
        codeinfo.stdout_name = self._stdout_name
        codeinfo.join_files = True

        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.prepend_text = (f'for i in $(seq 0 {i})\n'
                                 'do\n'
                                 'cd "${i}" || exit')
        calcinfo.append_text = ('cd ..\n' 'done')
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._INPUT_FILE_NAME
        calcinfo.stdout_name = self._stdout_name
        calcinfo.retrieve_list = self._retrieve_list + [
            self.options.output_filename
        ]
        calcinfo.retrieve_list += settings.pop('additional_retrieve_list', [])
        calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
        calcinfo.codes_info = [codeinfo]

        # =========================== local_copy_list ==========================

        if 'file' in self.inputs:
            calcinfo.local_copy_list = []
            for name, obj in self.inputs.file.items():
                calcinfo.local_copy_list.append(
                    (obj.uuid, obj.filename, f'{name}.pb'))

        return calcinfo
Beispiel #3
0
    def prepare_for_submission(self, tempfolder):

        # Setup structure
        if isinstance(self.inputs.structure, StructureData):
            structure_txt, struct_transform = generate_lammps_structure(
                self.inputs.structure, kinds=self.inputs.kinds)
        elif isinstance(self.inputs.structure, SinglefileData):
            structure_txt = self.inputs.structure.get_content()
        else:
            raise TypeError(
                'Input structure must be StructureData or SinglefileData')

        with open(self.inputs.template, 'r') as tempfile:
            temp_contents = tempfile.read()
        init_temp = Template(temp_contents)
        input_txt = init_temp.safe_substitute(**self.inputs.variables)
        if 'kinds' in self.inputs:
            kind_temp = Template(input_txt)
            kind_var = {
                kind: kind_index + 1
                for kind_index, kind in enumerate(self.inputs.kinds)
            }
            input_txt = kind_temp.safe_substitute(**kind_var)

        # =========================== dump to file =============================
        input_filename = tempfolder.get_abs_path(self._INPUT_FILE_NAME)
        with open(input_filename, 'w') as infile:
            infile.write(input_txt)

        structure_filename = tempfolder.get_abs_path(self._INPUT_STRUCTURE)
        with open(structure_filename, 'w') as infile:
            infile.write(structure_txt)

        # ============================ calcinfo ================================
        settings = self.inputs.settings.get_dict() \
            if 'settings' in self.inputs else {}

        codeinfo = CodeInfo()
        codeinfo.cmdline_params = self._cmdline_params
        codeinfo.code_uuid = self.inputs.code.uuid
        codeinfo.stdout_name = self._stdout_name
        codeinfo.join_files = True

        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._INPUT_FILE_NAME
        calcinfo.stdout_name = self._stdout_name
        calcinfo.retrieve_list = self._retrieve_list + [
            self.options.output_filename
        ]
        calcinfo.retrieve_list += settings.pop('additional_retrieve_list', [])
        calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
        calcinfo.codes_info = [codeinfo]

        # =========================== local_copy_list ==========================

        if 'file' in self.inputs:
            calcinfo.local_copy_list = []
            for name, obj in self.inputs.file.items():
                calcinfo.local_copy_list.append(
                    (obj.uuid, obj.filename, f'{name}.pb'))

        return calcinfo
Beispiel #4
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """
        from aiida_cp2k.utils import Cp2kInput

        # create cp2k input file
        inp = Cp2kInput(self.inputs.parameters.get_dict())
        inp.add_keyword("GLOBAL/PROJECT", self._DEFAULT_PROJECT_NAME)

        # create input structure(s)
        if 'structure' in self.inputs:
            # As far as I understand self.inputs.structure can't deal with tags
            # self.inputs.structure.export(folder.get_abs_path(self._DEFAULT_COORDS_FILE_NAME), fileformat="xyz")
            self._write_structure(self.inputs.structure, folder,
                                  self._DEFAULT_COORDS_FILE_NAME)

            # modify the input dictionary accordingly
            for i, letter in enumerate('ABC'):
                inp.add_keyword('FORCE_EVAL/SUBSYS/CELL/' + letter,
                                '{:<15} {:<15} {:<15}'.format(
                                    *self.inputs.structure.cell[i]),
                                override=False,
                                conflicting_keys=[
                                    'ABC', 'ALPHA_BETA_GAMMA', 'CELL_FILE_NAME'
                                ])

            topo = "FORCE_EVAL/SUBSYS/TOPOLOGY"
            inp.add_keyword(topo + "/COORD_FILE_NAME",
                            self._DEFAULT_COORDS_FILE_NAME,
                            override=False)
            inp.add_keyword(topo + "/COORD_FILE_FORMAT",
                            "XYZ",
                            override=False,
                            conflicting_keys=['COORDINATE'])

        with io.open(folder.get_abs_path(self._DEFAULT_INPUT_FILE),
                     mode="w",
                     encoding="utf-8") as fobj:
            try:
                fobj.write(inp.render())
            except ValueError as exc:
                six.raise_from(
                    InputValidationError(
                        "invalid keys or values in input parameters found"),
                    exc)

        settings = self.inputs.settings.get_dict(
        ) if 'settings' in self.inputs else {}

        # create code info
        codeinfo = CodeInfo()
        codeinfo.cmdline_params = settings.pop(
            'cmdline', []) + ["-i", self._DEFAULT_INPUT_FILE]
        codeinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        codeinfo.join_files = True
        codeinfo.code_uuid = self.inputs.code.uuid

        # create calc info
        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._DEFAULT_INPUT_FILE
        calcinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        calcinfo.codes_info = [codeinfo]

        # files or additional structures
        if 'file' in self.inputs:
            calcinfo.local_copy_list = []
            for name, obj in self.inputs.file.items():
                if isinstance(obj, SinglefileData):
                    calcinfo.local_copy_list.append(
                        (obj.uuid, obj.filename, obj.filename))
                elif isinstance(obj, StructureData):
                    self._write_structure(obj, folder, name + '.xyz')

        calcinfo.retrieve_list = [
            self._DEFAULT_OUTPUT_FILE, self._DEFAULT_RESTART_FILE_NAME,
            self._DEFAULT_TRAJECT_FILE_NAME
        ]
        calcinfo.retrieve_list += settings.pop('additional_retrieve_list', [])

        # symlinks
        calcinfo.remote_symlink_list = []
        calcinfo.remote_copy_list = []
        if 'parent_calc_folder' in self.inputs:
            comp_uuid = self.inputs.parent_calc_folder.computer.uuid
            remote_path = self.inputs.parent_calc_folder.get_remote_path()
            copy_info = (comp_uuid, remote_path,
                         self._DEFAULT_PARENT_CALC_FLDR_NAME)
            if self.inputs.code.computer.uuid == comp_uuid:  # if running on the same computer - make a symlink
                # if not - copy the folder
                calcinfo.remote_symlink_list.append(copy_info)
            else:
                calcinfo.remote_copy_list.append(copy_info)

        # check for left over settings
        if settings:
            raise InputValidationError(
                "The following keys have been found " +
                "in the settings input node {}, ".format(self.pk) +
                "but were not understood: " + ",".join(settings.keys()))

        return calcinfo
Beispiel #5
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """
        from .utils import Cp2kInput

        # create input structure
        if "structure" in self.inputs:
            self.inputs.structure.export(folder.get_abs_path(
                self._DEFAULT_COORDS_FILE_NAME),
                                         fileformat="xyz")

        # create cp2k input file
        inp = Cp2kInput(self.inputs.parameters.get_dict())
        inp.add_keyword("GLOBAL/PROJECT", self._DEFAULT_PROJECT_NAME)
        if "structure" in self.inputs:
            for i, letter in enumerate("ABC"):
                inp.add_keyword(
                    "FORCE_EVAL/SUBSYS/CELL/" + letter,
                    "{:<15} {:<15} {:<15}".format(
                        *self.inputs.structure.cell[i]),
                )
            topo = "FORCE_EVAL/SUBSYS/TOPOLOGY"
            inp.add_keyword(topo + "/COORD_FILE_NAME",
                            self._DEFAULT_COORDS_FILE_NAME)
            inp.add_keyword(topo + "/COORD_FILE_FORMAT", "XYZ")

        with io.open(folder.get_abs_path(self._DEFAULT_INPUT_FILE),
                     mode="w",
                     encoding="utf-8") as fobj:
            try:
                inp.to_file(fobj)
            except ValueError as exc:
                six.raise_from(
                    InputValidationError(
                        "invalid keys or values in input parameters found"),
                    exc,
                )

        if "settings" in self.inputs:
            settings = self.inputs.settings.get_dict()
        else:
            settings = {}

        # create code info
        codeinfo = CodeInfo()
        codeinfo.cmdline_params = settings.pop("cmdline", []) + [
            "-i",
            self._DEFAULT_INPUT_FILE,
        ]
        codeinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        codeinfo.join_files = True
        codeinfo.code_uuid = self.inputs.code.uuid

        # create calc info
        calcinfo = CalcInfo()
        calcinfo.stdin_name = self._DEFAULT_INPUT_FILE
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._DEFAULT_INPUT_FILE
        calcinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        calcinfo.codes_info = [codeinfo]

        # file lists
        calcinfo.remote_symlink_list = []
        if "file" in self.inputs:
            calcinfo.local_copy_list = []
            for fobj in self.inputs.file.values():
                calcinfo.local_copy_list.append(
                    (fobj.uuid, fobj.filename, fobj.filename))

        calcinfo.remote_copy_list = []
        calcinfo.retrieve_list = [
            self._DEFAULT_OUTPUT_FILE,
            self._DEFAULT_RESTART_FILE_NAME,
        ]
        calcinfo.retrieve_list += settings.pop("additional_retrieve_list", [])

        # symlinks
        if "parent_calc_folder" in self.inputs:
            comp_uuid = self.inputs.parent_calc_folder.computer.uuid
            remote_path = self.inputs.parent_calc_folder.get_remote_path()
            symlink = (comp_uuid, remote_path,
                       self._DEFAULT_PARENT_CALC_FLDR_NAME)
            calcinfo.remote_symlink_list.append(symlink)

        # check for left over settings
        if settings:
            raise InputValidationError(
                "The following keys have been found " +
                "in the settings input node {}, ".format(self.pk) +
                "but were not understood: " + ",".join(settings.keys()))

        return calcinfo
Beispiel #6
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """

        # pylint: disable=too-many-statements,too-many-branches

        # Create cp2k input file.
        inp = Cp2kInput(self.inputs.parameters.get_dict())
        inp.add_keyword("GLOBAL/PROJECT", self._DEFAULT_PROJECT_NAME)

        # Create input structure(s).
        if 'structure' in self.inputs:
            # As far as I understand self.inputs.structure can't deal with tags
            # self.inputs.structure.export(folder.get_abs_path(self._DEFAULT_COORDS_FILE_NAME), fileformat="xyz")
            self._write_structure(self.inputs.structure, folder, self._DEFAULT_COORDS_FILE_NAME)

            # modify the input dictionary accordingly
            for i, letter in enumerate('ABC'):
                inp.add_keyword('FORCE_EVAL/SUBSYS/CELL/' + letter,
                                '{:<15} {:<15} {:<15}'.format(*self.inputs.structure.cell[i]),
                                override=False,
                                conflicting_keys=['ABC', 'ALPHA_BETA_GAMMA', 'CELL_FILE_NAME'])

            topo = "FORCE_EVAL/SUBSYS/TOPOLOGY"
            inp.add_keyword(topo + "/COORD_FILE_NAME", self._DEFAULT_COORDS_FILE_NAME, override=False)
            inp.add_keyword(topo + "/COORD_FILE_FORMAT", "XYZ", override=False, conflicting_keys=['COORDINATE'])

        if 'basissets' in self.inputs:
            validate_basissets(inp, self.inputs.basissets,
                               self.inputs.structure if 'structure' in self.inputs else None)
            write_basissets(inp, self.inputs.basissets, folder)

        if 'pseudos' in self.inputs:
            validate_pseudos(inp, self.inputs.pseudos, self.inputs.structure if 'structure' in self.inputs else None)
            write_pseudos(inp, self.inputs.pseudos, folder)

        # Kpoints.
        if 'kpoints' in self.inputs:
            try:
                mesh, _ = self.inputs.kpoints.get_kpoints_mesh()
            except AttributeError:
                raise InputValidationError("K-point sampling for SCF must be given in mesh form.")

            inp.add_keyword('FORCE_EVAL/DFT/KPOINTS', {'WAVEFUNCTIONS': ' COMPLEX', 'FULL_GRID': '.TRUE.'})
            inp.add_keyword('FORCE_EVAL/DFT/KPOINTS/SCHEME MONKHORST-PACK', f'{mesh[0]} {mesh[1]} {mesh[2]}')

        with io.open(folder.get_abs_path(self._DEFAULT_INPUT_FILE), mode="w", encoding="utf-8") as fobj:
            try:
                fobj.write(inp.render())
            except ValueError as exc:
                raise InputValidationError("Invalid keys or values in input parameters found") from exc

        settings = self.inputs.settings.get_dict() if 'settings' in self.inputs else {}

        # Create code info.
        codeinfo = CodeInfo()
        codeinfo.cmdline_params = settings.pop('cmdline', []) + ["-i", self._DEFAULT_INPUT_FILE]
        codeinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        codeinfo.join_files = True
        codeinfo.code_uuid = self.inputs.code.uuid

        # Create calc info.
        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._DEFAULT_INPUT_FILE
        calcinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        calcinfo.codes_info = [codeinfo]

        # Files or additional structures.
        if 'file' in self.inputs:
            calcinfo.local_copy_list = []
            for name, obj in self.inputs.file.items():
                if isinstance(obj, SinglefileData):
                    calcinfo.local_copy_list.append((obj.uuid, obj.filename, obj.filename))
                elif isinstance(obj, StructureData):
                    self._write_structure(obj, folder, name + '.xyz')

        calcinfo.retrieve_list = [
            self._DEFAULT_OUTPUT_FILE, self._DEFAULT_RESTART_FILE_NAME, self._DEFAULT_TRAJECT_FILE_NAME
        ]
        calcinfo.retrieve_list += settings.pop('additional_retrieve_list', [])

        # Symlinks.
        calcinfo.remote_symlink_list = []
        calcinfo.remote_copy_list = []
        if 'parent_calc_folder' in self.inputs:
            comp_uuid = self.inputs.parent_calc_folder.computer.uuid
            remote_path = self.inputs.parent_calc_folder.get_remote_path()
            copy_info = (comp_uuid, remote_path, self._DEFAULT_PARENT_CALC_FLDR_NAME)

            # If running on the same computer - make a symlink.
            if self.inputs.code.computer.uuid == comp_uuid:
                calcinfo.remote_symlink_list.append(copy_info)
            # If not - copy the folder.
            else:
                calcinfo.remote_copy_list.append(copy_info)

        # Check for left over settings.
        if settings:
            raise InputValidationError(
                f"The following keys have been found in the settings input node {self.pk}, but were not understood: " +
                ",".join(settings.keys()))

        return calcinfo
Beispiel #7
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """
        # from aiida_deepmd.utils import DpInput

        # create json input file
        # input = DpInput(self.inputs.model.get_dict(), self.inputs.learning_rate.get_dict(), self.inputs.loss.get_dict(), self.inputs.training.get_dict())
        input = dict()
        input['model'] = self.inputs.model.get_dict()
        input['learning_rate'] = self.inputs.learning_rate.get_dict()
        input['loss'] = self.inputs.loss.get_dict()
        input['training'] = self.inputs.training.get_dict()
        json_str = json.dumps(input, indent=4, sort_keys=False)

        with io.open(folder.get_abs_path(self._DEFAULT_INPUT_FILE), mode="w", encoding="utf-8") as fobj:
            try:
                fobj.write(json_str)
            except ValueError as exc:
                raise InputValidationError("invalid keys or values in input parameters found")

        local_copy_list = []
        # Create the subfolder that will contain the train data
        folder.get_subfolder(self._TRAIN_DATA_SUBFOLDER, create=True)
        for fn in self.inputs.file:
            fobj = self.inputs.file[fn]
            src_path = fobj.filename
            dst_path = os.path.join(self._TRAIN_DATA_SUBFOLDER, fobj.filename)
            local_copy_list.append((fobj.uuid, src_path, dst_path))

        def create_array_from_files(files):
            for f in files:
                content = f.get_content()
                # function from aiida_ce
                pass
            return data_array

        # create train set and store the data in
        data_array = create_array_from_files(self.inputs.file)

        # for simplicity do not split the folder
        n = 0
        set_folder = folder.get_subfolder(os.path.join(self._TRAIN_DATA_SUBFOLDER, self._TRAIN_SET_PRFIX + str(n)), create=True)
        with io.open(set_folder.get_abs_path(coord.npy), mode="w") as fobj:
            try:
                np.dump(fobj, array=data_array)
            except ValueError as exc:
                raise InputValidationError("invalid keys or values in input parameters found")

        # settings = self.inputs.settings.get_dict() if 'settings' in self.inputs else {}

        # create code info
        codeinfo = CodeInfo()
        codeinfo.cmdline_params = ["train", self._DEFAULT_INPUT_FILE]
        codeinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        codeinfo.join_files = True
        codeinfo.code_uuid = self.inputs.code.uuid

        # create calc info
        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.cmdline_params = codeinfo.cmdline_params
        calcinfo.stdin_name = self._DEFAULT_INPUT_FILE
        calcinfo.stdout_name = self._DEFAULT_OUTPUT_FILE
        calcinfo.local_copy_list = local_copy_list
        calcinfo.codes_info = [codeinfo]

        calcinfo.retrieve_list = [
            # self._DEFAULT_OUTPUT_FILE,
            # self._DEFAULT_OUTPUT_INFO_FILE,
            # self._DEFAULT_CHECK_META_FILE,
            # self._DEFAULT_CHECK_INDEX_FILE,
        ]

        return calcinfo
Beispiel #8
0
    def prepare_for_submission(self, folder):
        """Create the input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """

        # create json input file
        input = dict()
        input['model'] = self.inputs.model.get_dict()
        input['learning_rate'] = self.inputs.learning_rate.get_dict()
        input['loss'] = self.inputs.loss.get_dict()
        input['training'] = self.inputs.training.get_dict()
        # replace all the random seed
        input['model']['descriptor']['seed'] = np.random.randint(100000000)
        input['model']['fitting_net']['seed'] = np.random.randint(100000000)
        input['training']['seed'] = np.random.randint(100000000)
        warn("All seeds in user input will be automatically replaced by numpy.")
        json_str = json.dumps(input, indent=4, sort_keys=False)

        with io.open(folder.get_abs_path(self._DEFAULT_INPUT_FILE), mode="w", encoding="utf-8") as fobj:
            try:
                fobj.write(json_str)
            except ValueError as exc:
                raise InputValidationError("invalid keys or values in input parameters found")

        # Create the subfolder that will contain the train data
        local_copy_list = []
        for datadir in self.inputs.datadirs:
            # change to absolute path
            if not os.path.exists(datadir):
                raise FileExistsError("This datadir dose not exist")
            absdatadir = os.path.abspath(datadir)
            # create subfolder
            datadir_basename = os.path.basename(absdatadir)
            datadir_in_workdir = os.path.join("./", datadir_basename)
            folder.get_subfolder(datadir_in_workdir, create=True)
            # this loop use to copy the training data under the datadir
            for root, directories, files in os.walk(top=absdatadir, topdown=True):
                relroot = os.path.relpath(root, absdatadir)
                # create subtree folders
                for name in directories:
                    folder.get_subfolder(
                        os.path.join(
                            datadir_basename,
                            relroot,
                            name),
                        create=True)

                # give the singlefiledata to file
                for name in files:
                    fobj = SinglefileData(
                        file=os.path.join(root, name)
                    )
                    # must save fobj otherwise the node is empty and can't be copied
                    fobj.store()
                    dst_path = os.path.join(
                        datadir_basename,
                        relroot,
                        name)
                    local_copy_list.append((fobj.uuid, fobj.filename, dst_path))



        # settings = self.inputs.settings.get_dict() if 'settings' in self.inputs else {}

        # set two code info here, once the training finished, the model will freeze then.
        # create code info for training
        codeinfotrain = CodeInfo()
        codeinfotrain.cmdline_params = ["train", self._DEFAULT_INPUT_FILE]
        #codeinfotrain.stdin_name = self._DEFAULT_INPUT_FILE
        codeinfotrain.stdout_name = self._DEFAULT_TRAIN_OUTPUT_FILE
        codeinfotrain.join_files = True
        codeinfotrain.code_uuid = self.inputs.code.uuid
        codeinfotrain.withmpi = self.inputs.metadata.options.withmpi

        # create code info for freeze
        codeinfofreeze = CodeInfo()
        codeinfofreeze.cmdline_params = ["freeze", '-o', self._DEFAULT_FREEZE_OUTPUT_FILE]
        #codeinfofreeze.stdout_name = self._DEFAULT_FREEZE_OUTPUT_FILE
        codeinfofreeze.code_uuid = self.inputs.code.uuid
        codeinfofreeze.withmpi = self.inputs.metadata.options.withmpi


        # create calc info
        calcinfo = CalcInfo()
        calcinfo.uuid = self.uuid
        calcinfo.local_copy_list = local_copy_list
        calcinfo.codes_info = [codeinfotrain, codeinfofreeze]

        calcinfo.retrieve_list = [
            self._DEFAULT_TRAIN_OUTPUT_FILE,
            self._DEFAULT_FREEZE_OUTPUT_FILE,
            self._DEFAULT_OUTPUT_INFO_FILE,
            self._DEFAULT_CHECK_META_FILE,
            self._DEFAULT_CHECK_INDEX_FILE
        ]

        return calcinfo