def btnExportClick(): selectId = int(selectVar.get()) exportDir = inputBox.get() config["exportDir"] = exportDir config["selectId"] = selectId print "SVN UPDATE" client = pysvn.Client() client.update('./') curItemName = dirs[selectId] ExportList = [curItemName] getParent(curItemName,ExportList) targetDir = "../skinlocal_"+curItemName Util.ensureDir(targetDir) for itemName in reversed(ExportList): print "copy=",itemName utils.copytree(itemName, targetDir) print u"拷贝资源成功" os.chdir("tools") json_str = json.dumps(config) Util.writeStringToFile("export.db",json_str) targetDir = "../../skinlocal_" + curItemName #发布资源 print(u"发布资源中") ccsexport.export(targetDir, os.path.join(exportDir, "res", "ui"), True) print(u"发布成功") print(u"按任意键退出") raw_input() window.destroy()
def build_ons_data(root_dir, out_dir): build_ons_script(root_dir, out_dir) se_dir = os.path.join(root_dir, 'download', 'sound') print("复制bgm中...") copyfiles(se_dir, os.path.join(out_dir, "bgm"), lambda n: n.startswith('bgm')) print("复制音效中...") copyfiles(se_dir, os.path.join(out_dir, "se"), lambda n: n.startswith('se')) adv_dir = os.path.join(root_dir, 'download', 'image', 'adv') print("解密背景图片中...") copyfiles(adv_dir, os.path.join(out_dir, 'bgimage'), lambda n: n.startswith('adv_bg'), decrypt=decrypt, tranformer=lambda n: n+'.png') copyfiles(os.path.join(root_dir, 'download', 'rest'), os.path.join(out_dir, 'bgimage'), lambda n: n == 'exp_map_bg', decrypt=decrypt, tranformer=lambda n: 'map.png') print("解密角色图片中...") copyfiles(adv_dir, os.path.join(out_dir, 'chara'), lambda n: n.startswith('adv_chara'), decrypt=decrypt, tranformer=lambda n: n+'.png') print("生成对话框中...") image_dir = os.path.join(out_dir, 'image') os.makedirs(image_dir, exist_ok=True) with open(os.path.join(root_dir, 'download', 'rest', 'que_adv'), 'rb') as f: img = decrypt(f.read()) build_que_adv(img, os.path.join(image_dir, 'que_adv.png')) voice_dir = os.path.join(root_dir, 'download', 'voice') if os.path.isdir(voice_dir): print("复制语音目录中...") copytree(voice_dir, os.path.join(out_dir, 'voice')) else: print("语音目录不存在,已忽略")
def _install_from(self, fromPath, fromLoc, toLocation=None, ignore=None): """Copy file or directory from a location to the droplet Copies a file or directory from a location to the application droplet. Directories are copied recursively, but specific files in those directories can be ignored by specifing the ignore parameter. fromPath -> file to copy, relative build pack fromLoc -> root of the from path. Full path to file or directory to be copied is fromLoc + fromPath toLocation -> optional location where to copy the file relative to app droplet. If not specified uses fromPath. ignore -> an optional callable that is passed to the ignore argument of shutil.copytree. """ self._log.debug("Install file [%s] from [%s]", fromPath, fromLoc) fullPathFrom = os.path.join(fromLoc, fromPath) if os.path.exists(fullPathFrom): fullPathTo = os.path.join( self._ctx['BUILD_DIR'], ((toLocation is None) and fromPath or toLocation)) safe_makedirs(os.path.dirname(fullPathTo)) self._log.debug("Copying [%s] to [%s]", fullPathFrom, fullPathTo) if os.path.isfile(fullPathFrom): shutil.copy(fullPathFrom, fullPathTo) else: utils.copytree(fullPathFrom, fullPathTo, ignore=ignore)
def copy_specfem_stuff(specfemdir, targetdir): dir_list = ["DATA", "OUTPUT_FILES", "bin"] # copy DATA for _dir in dir_list: fromdir = os.path.join(specfemdir, _dir) todir = os.path.join(targetdir, _dir) cleantree(todir) copytree(fromdir, todir)
def __main__(args=None): conf = getconf(args) pydwarf.log.debug('Proceeding with configuration: %s.' % conf) # Report versions pydwarf.log.info('Running PyDwarf manager version %s.' % __version__) pydwarf.log.debug('With PyDwarf version %s.' % pydwarf.__version__) pydwarf.log.debug('With raws version %s.' % raws.__version__) # Handle flags that completely change behavior if args.list: pydwarf.urist.list() exit(0) elif args.meta is not None: pydwarf.urist.doclist(args.meta) exit(0) # Verify that input directory exists if not os.path.exists(conf.input): pydwarf.log.error('Specified raws directory %s does not exist.' % conf.input) exit(1) # Make backup if conf.backup is not None: pydwarf.log.info('Backing up raws to %s.' % conf.backup) try: copytree(conf.input, conf.backup) except: pydwarf.log.error('Failed to create backup.') exit(1) else: pydwarf.log.warning('Proceeding without backing up raws.') # Read input raws pydwarf.log.info('Reading raws from input directory %s.' % conf.input) pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log) # Run each script pydwarf.log.info('Running scripts.') pydwarf.urist.session.handleall(conf.scripts) # Get the output directory, remove old raws if present outputdir = conf.output if conf.output else conf.input if os.path.exists(outputdir): pydwarf.log.info('Removing obsolete raws from %s.' % outputdir) for removefile in [os.path.join(outputdir, f) for f in os.listdir(outputdir)]: pydwarf.log.debug('Removing file %s.' % removefile) if removefile.endswith('.txt'): os.remove(removefile) else: pydwarf.log.info('Creating raws output directory %s.' % outputdir) os.makedirs(outputdir) # Write the output pydwarf.log.info('Writing changes to raws to %s.' % outputdir) pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log) # All done! pydwarf.log.info('All done!')
def __main__(): # Get configuration conf = config.export if not conf: pydwarf.log.error('No configuration specified. Imported config package must contain an export variable.') exit(1) # Things to do with versions pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__) if conf.version is not None: pydwarf.log.info('Managing Dwarf Fortress version %s.' % conf.version) pydwarf.urist.session.dfversion = conf.version else: pydwarf.log.error('No Dwarf Fortress version was specified in conf. Scripts will be run regardless of their indicated compatibility.') # Verify that input directory exists if not os.path.exists(conf.input): pydwarf.log.error('Specified raws directory %s does not exist.' % conf.input) exit(1) # Make backup if conf.backup is not None: pydwarf.log.info('Backing up raws to %s...' % conf.backup) try: copytree(conf.input, conf.backup) except: pydwarf.log.error('Failed to create backup.') exit(1) else: pydwarf.log.warning('Proceeding without backing up raws.') # Read input raws pydwarf.log.info('Reading raws from input directory %s...' % conf.input) pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log) # Run each script pydwarf.log.info('Running scripts...') pydwarf.urist.session.handleall(conf.scripts) # Get the output directory, remove old raws if present outputdir = conf.output if conf.output else conf.input if os.path.exists(outputdir): pydwarf.log.info('Removing obsolete raws from %s...' % outputdir) for removefile in [os.path.join(outputdir, f) for f in os.listdir(outputdir)]: pydwarf.log.debug('Removing file %s...' % removefile) if removefile.endswith('.txt'): os.remove(removefile) else: pydwarf.log.info('Creating raws output directory %s...' % outputdir) os.makedirs(outputdir) # Write the output pydwarf.log.info('Writing changes to raws to %s...' % outputdir) pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log) # All done! pydwarf.log.info('All done!')
def run(self, env): toDir = self.toDir if toDir is None: if not "WORK_DIR" in env: logging.error("WORK_DIR not defined") raise TaskError("WORK_DIR not defined") toDir = env["WORK_DIR"] logging.info("Copying " + self.fromDir + " to " + toDir) utils.copytree(self.fromDir, toDir) logging.info("Successfully copied folder contents")
def _initialize(self): colored_print('Initializing... ', 'OKGREEN') # Init html dest dir if os.path.isdir(self.html_output): shutil.rmtree(self.html_output) os.mkdir(self.html_output) copytree(self.assets, self.html_assets) os.mkdir(self.html_css) # Init pdf dir if os.path.isdir(self.pdf_output): shutil.rmtree(self.pdf_output) os.mkdir(self.pdf_output)
def addApplication(appName, childOf='', appShortName='', templateDir=None, targetDir=None): if targetDir is None: targetDir = os.path.join(dirname(dirname(__file__)), 'Apps') if templateDir is None: templateDir = os.path.join(dirname(__file__), 'templates') toMoveFiles = [('APP.app.template', '%s.app' % (appName.upper())), ('APP_init.php.in.template', '%s_init.php.in' % (appName.upper()))] toParseFiles = ['%s.app' % (appName.upper())] # create tmp dir tempDir = mkdtemp() #print "working in %s"%(tempDir) # copy files to tmp dir copytree(os.path.join(templateDir, 'APP'), tempDir, symlinks=False) # rename files in tmp dir for (fromFilePath, toFilePath) in toMoveFiles: fromFileFullPath = os.path.join(tempDir, fromFilePath) toFileFullPath = os.path.join(tempDir, toFilePath) #print "move %s to %s"%(fromFileFullPath, toFileFullPath) shutil.move(fromFileFullPath, toFileFullPath) # parse files in tmp dir for parsedFilePath in toParseFiles: parsedFileFullPath = os.path.join(tempDir, parsedFilePath) #print "parsing %s"%(parsedFileFullPath) for line in fileinput.input(parsedFileFullPath, inplace=1): print Template(line).safe_substitute({ 'APPNAME': appName.upper(), 'CHILDOF': childOf.upper(), 'appShortName': appShortName, 'appIcon': "%s.png" % (appName.lower()) }).rstrip() #strip to remove EOL duplication # move tmp dir to target dir shutil.move(tempDir, os.path.join(targetDir, appName.upper())) return
def copy_static_assets(base_path, theme_folder): # copy files from theme folder dest = os.path.join(base_path, 'public', 'static') orig = os.path.join(base_path, theme_folder, 'static') if not os.path.exists(dest): os.makedirs(dest) else: pass #shutil.rmtree(dest) copytree(orig, dest) # copy files from static folder orig = os.path.join(base_path, 'static') distutils.dir_util.copy_tree(orig, dest)
def main(argv=None): parser = argparse.ArgumentParser() parser.add_argument('-t', '--targetdir', help='the target directory (Jenkins home directory)', metavar='DIR', required=True) parser.add_argument('-b', '--backuprepodir', help='the backup repository', metavar='DIR', default='SapMachine-Infrastructure') parser.add_argument('--install-plugins', help='install the Jenkins plugins', action='store_true', default=False) parser.add_argument( '--plugins-only', help='install only the Jenkins plugins (implies --install-plugins)', action='store_true', default=False) args = parser.parse_args() if args.plugins_only: args.install_plugins = True source = os.path.realpath(args.backuprepodir) target = os.path.realpath(args.targetdir) if not os.path.exists(target): os.mkdir(target) if not args.plugins_only: utils.copytree(join(source, jenkins_configuration), target) if args.install_plugins: with open(join(source, jenkins_configuration, 'plugin_list.json'), 'r') as plugin_list_json: plugin_list = json.loads(plugin_list_json.read()) install_cmd = ['/usr/local/bin/install-plugins.sh'] for plugin in plugin_list: install_cmd.append( str.format('{0}:{1}', plugin['Extension-Name'], plugin['Plugin-Version'])) utils.run_cmd(install_cmd)
def copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt, deriv_cmt_list): origincmt = os.path.join(cmtfolder, _event) targetcmt = os.path.join(targetcmtdir, _event) copyfile(origincmt, targetcmt, verbose=False) if not generate_deriv_cmt: # copy deriv cmt files for deriv_type in deriv_cmt_list: derivcmt = os.path.join(cmtfolder, "%s_%s" % (_event, deriv_type)) targetcmt = os.path.join(targetcmtdir, "%s_%s" % (_event, deriv_type)) copyfile(derivcmt, targetcmt, verbose=False) else: # copy scripts to generate deriv cmt files copytree("job_running_template/perturb_cmt", os.path.dirname(targetcmtdir))
def addApplication(appName, childOf='', appShortName='', templateDir=None, targetDir=None): appNameUpper = appName.upper() if targetDir is None: targetDir = os.path.join(dirname(dirname(__file__)), appNameUpper) if templateDir is None: templateDir = os.path.join(dirname(__file__), 'templates') toMoveFiles = [ ('APP.app.template', '%s.app' % (appNameUpper)), ('APP_init.php.in.template', '%s_init.php.in' % (appNameUpper)) ] toParseFiles = [ '%s.app' % (appNameUpper) ] # create tmp dir tempDir = mkdtemp() #print "working in %s"%(tempDir) # copy files to tmp dir copytree(os.path.join(templateDir, 'APP'), tempDir, symlinks=False) # rename files in tmp dir for (fromFilePath, toFilePath) in toMoveFiles: fromFileFullPath = os.path.join(tempDir, fromFilePath) toFileFullPath = os.path.join(tempDir,toFilePath) #print "move %s to %s"%(fromFileFullPath, toFileFullPath) shutil.move(fromFileFullPath, toFileFullPath) # parse files in tmp dir for parsedFilePath in toParseFiles: parsedFileFullPath = os.path.join(tempDir, parsedFilePath) #print "parsing %s"%(parsedFileFullPath) for line in fileinput.input(parsedFileFullPath, inplace=1): print Template(line).safe_substitute({ 'APPNAME': appNameUpper, 'CHILDOF': childOf.upper(), 'appShortName': appShortName, 'appIcon': "%s.png" % (appName.lower()) }).rstrip() #strip to remove EOL duplication # move tmp dir to target dir shutil.move(tempDir, os.path.join(targetDir, appNameUpper)) return
def createModule(moduleName, appName, outputDir, childOf='', ignoreList=[], appShortName=''): toMoveFiles = [ ('APP_en.po', '%s_en.po'%(appName.upper())), ('APP_fr.po', '%s_fr.po'%(appName.upper())) ] toParseFiles = [ 'configure.in', 'info.xml.in' ] # create tmp dir tempDir = mkdtemp() #print "working in %s"%(tempDir) # copy files to tmp dir (exclude some) ignoreList = tuple(ignoreList) + ('createModule.py', '.git', '.gitmodules', '*.md') #print "ignoring '%s'"%("', '".join(ignoreList)) ignore = shutil.ignore_patterns(*ignoreList) copytree(os.path.dirname(__file__), tempDir, symlinks=False, ignore=ignore) # rename files in tmp dir for (fromFilePath, toFilePath) in toMoveFiles: fromFileFullPath = os.path.join(tempDir, fromFilePath) toFileFullPath = os.path.join(tempDir,toFilePath) #print "move %s to %s"%(fromFileFullPath, toFileFullPath) shutil.move(fromFileFullPath, toFileFullPath) # parse files in tmp dir for parsedFilePath in toParseFiles: parsedFileFullPath = os.path.join(tempDir, parsedFilePath) #print "parsing %s"%(parsedFileFullPath) for line in fileinput.input(parsedFileFullPath, inplace=1): print Template(line).safe_substitute({ 'APPNAME': appName.upper(), 'modulename': moduleName }).rstrip() #strip to remove EOL duplication addApplication(appName, childOf=childOf, appShortName=appShortName, targetDir=os.path.join(tempDir, 'Apps')) # move tmp dir to target dir copytree(tempDir, outputDir) shutil.rmtree(tempDir) return
def main(): conf_path_list = os.path.join("etc", "apache-vhost-manager") etc_config_path_src = os.path.join( here_path, conf_path_list ) etc_config_path_dst = os.path.join( "/", conf_path_list ) setup( name='apache_vhost_manager', version='0.0.3', author="airtonix", maintainer="Airtonix", maintainer_email="*****@*****.**", url="airtonix.net/projects/apache_vhost_creator", scripts = [ 'usr/bin/apache-vhost-manager' ], license = read_file('LICENSE.md'), description='A helper script to manage apache subdomain based virtualhosts. It inserts BIND dns records, sets up django projects and LDAP authentication directives.', long_description = read_file('README.md') ) copytree( etc_config_path_src, etc_config_path_dst)
def _edit_and_commit(self, commit_date, message, change_file_dir): #for every file in the change directory, copy to the new place print("copytree {0} {1}".format(change_file_dir, os.getcwd())) new_files = copytree(change_file_dir, os.getcwd() + "\\test") for file in new_files: print ("Adding file {0}".format(file)) self.repo.index.add(new_files) date_in_iso = commit_date.strftime("%Y-%m-%d %H:%M:%S") self.repo.index.commit(message, author_date=date_in_iso, commit_date=date_in_iso) print("{0}{1}".format(commit_date, message))
def main(argv=None): parser = argparse.ArgumentParser() parser.add_argument('-s', '--srcdir', help='the source directory to copy from', metavar='DIR', required=True) parser.add_argument('-t', '--targetdir', help='the target directory to copy to', metavar='DIR', required=True) parser.add_argument('--install-plugins', help='install the Jenkins plugins', action='store_true', default=False) args = parser.parse_args() source = os.path.realpath(args.srcdir) target = os.path.realpath(args.targetdir) if not os.path.exists(target): os.mkdir(target) utils.copytree(join(source, 'jenkins_configuration'), target) if args.install_plugins: with open(join(source, 'jenkins_configuration', 'plugin_list.json'), 'r') as plugin_list_json: plugin_list = json.loads(plugin_list_json.read()) install_cmd = ['/usr/local/bin/install-plugins.sh'] for plugin in plugin_list: install_cmd.append( str.format('{0}:{1}', plugin['Extension-Name'], plugin['Plugin-Version'])) utils.run_cmd(install_cmd)
def create_job_folder(template_folder, tag, eventlist_dict, cmtfolder, stafolder, generate_deriv_cmt, deriv_cmt_list): targetdir_list = [] print("*"*20 + "\nCreat job sub folder") for _i in range(len(eventlist_dict)): idx = _i + 1 targetdir = "job_" + tag + "_%02d" % idx targetdir_list.append(targetdir) check_job_folder_exist(targetdir_list) for _i, targetdir in enumerate(targetdir_list): idx = _i + 1 print("="*5 + "\nJob id: %d" % idx) # copy eventlist file eventlist_file = eventlist_dict[idx] targetfile = os.path.join(targetdir, "XEVENTID") copyfile(eventlist_file, targetfile) # copy original cmt file and station file targetcmtdir = os.path.join(targetdir, "cmtfile") targetstadir = os.path.join(targetdir, "station") print("copy cmt:[%s --> %s]" % (cmtfolder, targetcmtdir)) print("copy stattion:[%s --> %s]" % (stafolder, targetstadir)) events = read_txt_into_list(eventlist_file) for _event in events: copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt, deriv_cmt_list) copy_stations(_event, stafolder, targetstadir) print("Copy dir:[%s --> %s]" % (template_folder, targetdir)) # copy scripts template copytree(template_folder, targetdir) # copy config.yaml file copyfile("config.yml", os.path.join(targetdir, "config.yml"))
def create_job_folder(template_folder, tag, eventlist_dict, cmtfolder, stafolder, generate_deriv_cmt, deriv_cmt_list): targetdir_list = [] print("*" * 20 + "\nCreat job sub folder") for _i in range(len(eventlist_dict)): idx = _i + 1 targetdir = "job_" + tag + "_%02d" % idx targetdir_list.append(targetdir) check_job_folder_exist(targetdir_list) for _i, targetdir in enumerate(targetdir_list): idx = _i + 1 print("=" * 5 + "\nJob id: %d" % idx) # copy eventlist file eventlist_file = eventlist_dict[idx] targetfile = os.path.join(targetdir, "XEVENTID") copyfile(eventlist_file, targetfile) # copy original cmt file and station file targetcmtdir = os.path.join(targetdir, "cmtfile") targetstadir = os.path.join(targetdir, "station") print("copy cmt:[%s --> %s]" % (cmtfolder, targetcmtdir)) print("copy stattion:[%s --> %s]" % (stafolder, targetstadir)) events = read_txt_into_list(eventlist_file) for _event in events: copy_cmtfiles(_event, cmtfolder, targetcmtdir, generate_deriv_cmt, deriv_cmt_list) copy_stations(_event, stafolder, targetstadir) print("Copy dir:[%s --> %s]" % (template_folder, targetdir)) # copy scripts template copytree(template_folder, targetdir) # copy config.yaml file copyfile("config.yml", os.path.join(targetdir, "config.yml"))
def _edit_and_commit(self, commit_date, message, change_file_dir): #for every file in the change directory, copy to the new place print("copytree {0} {1}".format(change_file_dir, os.getcwd())) new_files = copytree(change_file_dir, os.getcwd() + "\\test") for file in new_files: print("Adding file {0}".format(file)) self.repo.index.add(new_files) date_in_iso = commit_date.strftime("%Y-%m-%d %H:%M:%S") self.repo.index.commit(message, author_date=date_in_iso, commit_date=date_in_iso) print("{0}{1}".format(commit_date, message))
def perform_random_validation_split(self, split_size): """Performs random split into training and validation sets. # Arguments split_size: (float), size of validation set in percents """ print('Performing random split with split size: {}'.format(split_size)) os.chdir(self.train_dir) os.chdir('../') shutil.rmtree(self.train_dir) shutil.rmtree(self.valid_dir) os.makedirs(self.train_dir, exist_ok=True) os.makedirs(self.valid_dir, exist_ok=True) copytree(self.full_train_dir, self.train_dir) os.chdir(self.train_dir) for _class in glob.glob('*'): os.mkdir(self.valid_dir + _class) train_images_names, valid_images_names = train_test_split( glob.glob(self.train_dir + '*/*.*'), test_size=split_size, random_state=self.seed) print('Number of training set images: {}, validation set images: {}'. format(len(train_images_names), len(valid_images_names))) for i in range(len(valid_images_names)): os.rename( valid_images_names[i], '{}/{}'.format( self.valid_dir, '/'.join(valid_images_names[i].split('/')[-2:]))) return
# -*- coding: utf-8 -*- import utils targetDir = 'EffekseerForDXLib_160b_322c/' dxlibDir = 'DXLib_VC/' effekseerDir = '../Effekseer/' effekseerVSDir = effekseerDir + 'EffekseerRuntime_DXLib/Compiled/' utils.cdToScript() utils.rmdir(targetDir) utils.mkdir(targetDir) utils.copytree(dxlibDir + 'プロジェクトに追加すべきファイル_VC用/', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'include/Effekseer.h', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'include/Effekseer.Modules.h', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'include/Effekseer.SIMD.h', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'include/EffekseerRendererDX9.h', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'include/EffekseerRendererDX11.h', targetDir + 'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir + 'lib/VS2017/Debug/Effekseer.lib', targetDir + 'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2017_x86_d.lib') utils.copy(
def main(argv=sys.argv): arguments = parse_arguments(argv[1:]) parameters, conf_parameters = load_parameters( arguments['parameters_filepath'], arguments=arguments) dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths( parameters) check_parameter_compatiblity(parameters, dataset_filepaths) # Load dataset dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug']) dataset.load_dataset(dataset_filepaths, parameters) # Create graph and session with tf.device('/gpu:0'): with tf.Graph().as_default(): session_conf = tf.ConfigProto( intra_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], inter_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], device_count={ 'CPU': 1, 'GPU': parameters['number_of_gpus'] }, allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details'][ 'time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension( parameters['dataset_text_folder']) model_name = dataset_name utils.create_folder_if_not_exists(parameters['output_folder']) stats_graph_folder = os.path.join( parameters['output_folder'], model_name) # Folder where to save graphs final_weights_folder = os.path.join( parameters['output_folder'], 'weights') utils.create_folder_if_not_exists(stats_graph_folder) utils.create_folder_if_not_exists(final_weights_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join( stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists( tensorboard_log_folders[dataset_type]) pickle.dump( dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) model = EntityLSTM(dataset, parameters) writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter( tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter(model_folder) embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath( token_list_file_path, '..') tensorboard_character_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath( character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) token_list_file = codecs.open(token_list_file_path, 'w', 'latin-1') for token_index in range(dataset.vocabulary_size): token_list_file.write('{0}\n'.format( dataset.index_to_token[token_index])) token_list_file.close() character_list_file = codecs.open(character_list_file_path, 'w', 'latin-1') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format( dataset.index_to_character[character_index])) character_list_file.close() # Initialize the model sess.run(tf.global_variables_initializer()) if not parameters['use_pretrained_model']: model.load_pretrained_token_embeddings( sess, dataset, parameters) patience_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score f1_score_best = 0 f1_scores = {'train-F1': [], 'valid-F1': [], 'test-F1': []} transition_params_trained = np.random.rand( len(dataset.unique_labels) + 2, len(dataset.unique_labels) + 2) model_saver = tf.train.Saver( max_to_keep=parameters['num_of_model_to_keep'] ) #, reshape= True) # defaults to saving all variables epoch_number = -1 try: while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if parameters[ 'use_pretrained_model'] and epoch_number == 0: if parameters['use_corrector']: parameters['use_corrector'] = False transition_params_trained = train.restore_pretrained_model( parameters, dataset, sess, model, model_saver) print( 'Getting the 3-label predictions from the step1 model.' ) all_pred_labels, y_pred_for_corrector, y_true_for_corrector, \ output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths, for_corrector = True) all_pred_indices = {} #defaultdict(list) for dataset_type in dataset_filepaths.keys(): all_pred_indices[dataset_type] = [] for i in range( len(all_pred_labels[dataset_type]) ): indices = [ dataset. label_corrector_to_index[label] for label in all_pred_labels[dataset_type][i] ] all_pred_indices[dataset_type].append( indices) label_binarizer_corrector = sklearn.preprocessing.LabelBinarizer( ) label_binarizer_corrector.fit( range( max(dataset.index_to_label_corrector. keys()) + 1)) predicted_label_corrector_vector_indices = {} for dataset_type in dataset_filepaths.keys(): predicted_label_corrector_vector_indices[ dataset_type] = [] for label_indices_sequence in all_pred_indices[ dataset_type]: predicted_label_corrector_vector_indices[ dataset_type].append( label_binarizer_corrector. transform( label_indices_sequence)) parameters['use_corrector'] = True transition_params_trained, model, glo_step = \ train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver) for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter( tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter( model_folder) init_new_vars_op = tf.initialize_variables( [glo_step]) sess.run(init_new_vars_op) elif epoch_number != 0: sequence_numbers = list( range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained, W_before_crf = train.train_step( sess, dataset, sequence_number, model, transition_params_trained, parameters) step += 1 epoch_elapsed_training_time = time.time( ) - epoch_start_time print('Training completed in {0:.2f} seconds'.format( epoch_elapsed_training_time), flush=False) if parameters['use_corrector']: original_label_corrector_vector_indices = dataset.label_corrector_vector_indices dataset.label_corrector_vector_indices = predicted_label_corrector_vector_indices y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) dataset.label_corrector_vector_indices = original_label_corrector_vector_indices else: y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stopping train_f1_score = results['epoch'][epoch_number][0][ 'train']['f1_score']['micro'] valid_f1_score = results['epoch'][epoch_number][0][ 'valid']['f1_score']['micro'] test_f1_score = results['epoch'][epoch_number][0][ 'test']['f1_score']['micro'] f1_scores['train-F1'].append(train_f1_score) f1_scores['valid-F1'].append(valid_f1_score) f1_scores['test-F1'].append(test_f1_score) if valid_f1_score > f1_score_best: patience_counter = 0 f1_score_best = valid_f1_score # Save the best model model_saver.save( sess, os.path.join(model_folder, 'best_model.ckpt')) print( 'updated model to current epoch : epoch {:d}'. format(epoch_number)) print('the model is saved in: {:s}'.format( model_folder)) ### newly deleted else: patience_counter += 1 print("In epoch {:d}, the valid F1 is : {:f}".format( epoch_number, valid_f1_score)) print( "The last {0} epochs have not shown improvements on the validation set." .format(patience_counter)) if patience_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True if epoch_number >= parameters[ 'maximum_number_of_epochs'] and parameters[ 'refine_with_crf']: model = train.refine_with_crf( parameters, sess, model, model_saver) print('refine model with CRF ...') for additional_epoch in range( parameters['additional_epochs_with_crf']): print('Additional {:d}th epoch'.format( additional_epoch)) sequence_numbers = list( range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained, W_before_crf = train.train_step( sess, dataset, sequence_number, model, transition_params_trained, parameters) step += 1 epoch_elapsed_training_time = time.time( ) - epoch_start_time print( 'Additional training completed in {0:.2f} seconds' .format(epoch_elapsed_training_time), flush=False) y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) evaluate.evaluate_model( results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary( summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) if epoch_number >= parameters[ 'maximum_number_of_epochs'] and not parameters[ 'refine_with_crf']: break if not parameters['use_pretrained_model']: plot_name = 'F1-summary-step1.svg' else: plot_name = 'F1-summary-step2.svg' for k, l in f1_scores.items(): print(k, l) utils_plots.plot_f1( f1_scores, os.path.join(stats_graph_folder, '..', plot_name), 'F1 score summary') except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') print('Finishing the experiment') end_time = time.time() results['execution_details'][ 'train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) for dataset_type in dataset_filepaths.keys(): writers[dataset_type].close() sess.close()
def setUp(self): self.init_temp_path() self.path = self.get_temp_path() copytree(BARE_REPO_PATH, self.path)
# -*- coding: utf-8 -*- import utils targetDir = 'EffekseerForDXLib_143_320a/' dxlibDir = 'DXLib_VC/' effekseerDir = '../Effekseer/' effekseerVSDir = effekseerDir + 'EffekseerRuntime143/Compiled/' utils.cdToScript() utils.rmdir(targetDir) utils.mkdir(targetDir) utils.copytree('docs/', targetDir+'Help/') utils.copytree(dxlibDir+'プロジェクトに追加すべきファイル_VC用/', targetDir+'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir+'include/Effekseer.h', targetDir+'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir+'include/EffekseerRendererDX9.h', targetDir+'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir+'include/EffekseerRendererDX11.h', targetDir+'プロジェクトに追加すべきファイル_VC用/') utils.copy(effekseerVSDir+'lib/VS2015/Debug/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x86_d.lib') utils.copy(effekseerVSDir+'lib/VS2015/Debug/EffekseerRendererDX9.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX9_vs2015_x86_d.lib') utils.copy(effekseerVSDir+'lib/VS2015/Debug/EffekseerRendererDX11.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX11_vs2015_x86_d.lib') utils.copy(effekseerVSDir+'lib/VS2015/Release/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x86.lib') utils.copy(effekseerVSDir+'lib/VS2015/Release/EffekseerRendererDX9.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX9_vs2015_x86.lib') utils.copy(effekseerVSDir+'lib/VS2015/Release/EffekseerRendererDX11.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/EffekseerRendererDX11_vs2015_x86.lib') utils.copy(effekseerVSDir+'lib/VS2015WIN64/Debug/Effekseer.lib', targetDir+'プロジェクトに追加すべきファイル_VC用/Effekseer_vs2015_x64_d.lib')
def main(languages): #embeddings_type = ['polyglot', 'fasttext'] #embeddings_type = ['fasttext', 'fasttext_noOOV'] embeddings_type = ['fasttext_noOOV'] character_lstm = [True] embedding_language = ['target', 'source'] combination = product(languages, embeddings_type, embedding_language, character_lstm) create_folder_if_not_exists(os.path.join("..", "log")) experiment_timestamp = utils.get_current_time_in_miliseconds() log_file = os.path.join("..", "log", "experiment-{}.log".format(experiment_timestamp)) for language, emb_type, emb_language, char_lstm in combination: conf_parameters = load_parameters() conf_parameters = set_datasets(conf_parameters, language) conf_parameters.set('ann','use_character_lstm', str(char_lstm)) conf_parameters.set('ann','embedding_type', emb_type) conf_parameters.set('ann','embedding_language', emb_language) if emb_type == 'polyglot': conf_parameters.set('ann', 'embedding_dimension', str(64)) elif 'fasttext' in emb_type: conf_parameters.set('ann', 'embedding_dimension', str(300)) else: raise("Uknown embedding type") if emb_language == 'source': conf_parameters.set('dataset', 'language', constants.MAPPING_LANGUAGE[language]) else: conf_parameters.set('dataset', 'language', language) parameters, conf_parameters = parse_parameters(conf_parameters) start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details']['time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension(parameters['dataset_train']) model_name = '{0}_{1}_{2}_{3}_{4}'.format(language, emb_type, char_lstm, emb_language, results['execution_details']['time_stamp']) sys.stdout = open(os.path.join("..", "log", model_name), "w") print(language, emb_type, char_lstm, emb_language) with open(log_file, "a") as file: file.write("Experiment: {}\n".format(model_name)) file.write("Start time:{}\n".format(experiment_timestamp)) file.write("-------------------------------------\n\n") pprint(parameters) dataset_filepaths = get_valid_dataset_filepaths(parameters) check_parameter_compatiblity(parameters, dataset_filepaths) previous_best_valid_epoch = -1 # Load dataset dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug']) dataset.load_vocab_word_embeddings(parameters) dataset.load_dataset(dataset_filepaths, parameters) # Create graph and session with tf.Graph().as_default(): session_conf = tf.ConfigProto( intra_op_parallelism_threads=parameters['number_of_cpu_threads'], inter_op_parallelism_threads=parameters['number_of_cpu_threads'], device_count={'CPU': 1, 'GPU': parameters['number_of_gpus']}, allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist log_device_placement=False ) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): # Initialize and save execution details print(model_name) output_folder = os.path.join('..', 'output') utils.create_folder_if_not_exists(output_folder) stats_graph_folder = os.path.join(output_folder, model_name) # Folder where to save graphs utils.create_folder_if_not_exists(stats_graph_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type]) # del dataset.embeddings_matrix if not parameters['use_pretrained_model']: pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) # dataset.load_pretrained_word_embeddings(parameters) # Instantiate the model # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard model = EntityLSTM(dataset, parameters) # Instantiate the writers for TensorBoard writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter( model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add() tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..') if parameters['use_character_lstm']: tensorboard_character_embeddings = embeddings_projector_config.embeddings.add() tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Write metadata for TensorBoard embeddings token_list_file = codecs.open(token_list_file_path, 'w', 'UTF-8') for token_index in range(len(dataset.index_to_token)): token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index])) token_list_file.close() if parameters['use_character_lstm']: character_list_file = codecs.open(character_list_file_path, 'w', 'UTF-8') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index])) character_list_file.close() try: # Initialize the model sess.run(tf.global_variables_initializer()) if not parameters['use_pretrained_model']: model.load_pretrained_token_embeddings(sess, dataset, parameters) # Start training + evaluation loop. Each iteration corresponds to 1 epoch. bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score previous_best_valid_f1_score = -1 transition_params_trained = np.random.rand(len(dataset.unique_labels), len( dataset.unique_labels)) # TODO np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2) model_saver = tf.train.Saver( max_to_keep=None) # parameters['maximum_number_of_epochs']) # defaults to saving all variables epoch_number = 0 while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if parameters['use_pretrained_model'] and epoch_number == 1: # Restore pretrained model parameters transition_params_trained = train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver) elif epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers = list(range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) data_counter = 0 sub_id = 0 for i in tqdm(range(0, len(sequence_numbers), parameters['batch_size']), "Training epoch {}".format(epoch_number), mininterval=1): data_counter += parameters['batch_size'] if data_counter >= 20000: data_counter = 0 sub_id += 0.001 print("Intermediate evaluation number: ", sub_id) epoch_elapsed_training_time = time.time() - epoch_start_time print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number + sub_id, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) # Save model model_saver.save(sess, os.path.join(model_folder, 'model_{0:07.3f}.ckpt'.format( epoch_number + sub_id))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score else: bad_counter += 1 sequence_number = sequence_numbers[i: i + parameters['batch_size']] transition_params_trained, loss = train.train_step(sess, dataset, sequence_number, model, transition_params_trained, parameters) epoch_elapsed_training_time = time.time() - epoch_start_time print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) # Save model model_saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score previous_best_valid_epoch = epoch_number else: bad_counter += 1 print("The last {0} epochs have not shown improvements on the validation set.".format( bad_counter)) if bad_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break if epoch_number >= parameters['maximum_number_of_epochs']: break keep_only_best_model(model_folder,previous_best_valid_epoch ,parameters['maximum_number_of_epochs']+1) except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') # remove the experiment remove_experiment = input("Do you want to remove the experiment? (yes/y/Yes)") if remove_experiment in ["Yes", "yes", "y"]: shutil.rmtree(stats_graph_folder) print("Folder removed") else: print('Finishing the experiment') end_time = time.time() results['execution_details']['train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) sys.stdout.close() except Exception: logging.exception("") remove_experiment = input("Do you want to remove the experiment? (yes/y/Yes)") if remove_experiment in ["Yes", "yes", "y"]: shutil.rmtree(stats_graph_folder) print("Folder removed") sys.stdout.close() sess.close() # release the session's resources sys.stdout.close()
def fit(self): parameters = self.parameters conf_parameters = self.conf_parameters dataset_filepaths = self.dataset_filepaths dataset = self.dataset dataset_brat_folders = self.dataset_brat_folders sess = self.sess model = self.model transition_params_trained = self.transition_params_trained stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters) # Initialize and save execution details start_time = time.time() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details']['time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type]) # Instantiate the writers for TensorBoard writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add() tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..') tensorboard_character_embeddings = embeddings_projector_config.embeddings.add() tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Write metadata for TensorBoard embeddings token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8') for token_index in range(dataset.vocabulary_size): token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index])) token_list_file.close() character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index])) character_list_file.close() # Start training + evaluation loop. Each iteration corresponds to 1 epoch. bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score previous_best_valid_f1_score = 0 epoch_number = -1 try: while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers=list(range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters) step += 1 if step % 10 == 0: print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True) epoch_elapsed_training_time = time.time() - epoch_start_time print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) if parameters['use_pretrained_model'] and not parameters['train_model']: conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder) break # Save model model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True) self.transition_params_trained = transition_params_trained else: bad_counter += 1 print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter)) if bad_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break if epoch_number >= parameters['maximum_number_of_epochs']: break except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') print('Finishing the experiment') end_time = time.time() results['execution_details']['train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) for dataset_type in dataset_filepaths.keys(): writers[dataset_type].close()
def test_check_project_test_workflow(self): """ Validate new project to test via zuul layout.yaml """ # We want to create a project, provide project source # code with tests. We then configure zuul/jjb to handle the # run of the test cases. We then validate Gerrit has been # updated about the test results # We use the sample-project (that already exists) pname = 'test_workflow_%s' % create_random_str() # Be sure the project does not exist self.msu.deleteProject(pname, config.ADMIN_USER) # Create it self.create_project(pname, config.ADMIN_USER) # Add the sample-project to the empty repository clone_dir = self.clone_as_admin(pname) copytree(self.sample_project_dir, clone_dir) self.commit_direct_push_as_admin(clone_dir, "Add the sample project") # Change to config/zuul/layout.yaml and jobs/projects.yaml # in order to test the new project ycontent = file(os.path.join( self.config_clone_dir, "zuul/projects.yaml")).read() file(os.path.join( self.config_clone_dir, "zuul/projects.yaml"), 'w').write( ycontent.replace("zuul-demo", pname), ) ycontent2 = load(file(os.path.join( self.config_clone_dir, "jobs/projects.yaml")).read()) sp2 = copy.deepcopy( [p for p in ycontent2 if 'project' in p and p['project']['name'] == 'zuul-demo'][0]) sp2['project']['name'] = pname ycontent2.append(sp2) file(os.path.join( self.config_clone_dir, "jobs/projects.yaml"), 'w').write( dump(ycontent2)) # Retrieve the previous build number for config-check last_success_build_num_ch = \ self.ju.get_last_build_number("config-check", "lastSuccessfulBuild") # Retrieve the previous build number for config-update last_success_build_num_cu = \ self.ju.get_last_build_number("config-update", "lastSuccessfulBuild") # Send review (config-check) will be triggered self.push_review_as_admin( self.config_clone_dir, "Add config definition in Zuul/JJB config for %s" % pname) # Wait for config-check to finish and verify the success self.ju.wait_till_job_completes("config-check", last_success_build_num_ch, "lastSuccessfulBuild") last_build_num_ch, last_success_build_num_ch = 0, 1 attempt = 0 while last_build_num_ch != last_success_build_num_ch: if attempt >= 90: break time.sleep(1) last_build_num_ch = \ self.ju.get_last_build_number("config-check", "lastBuild") last_success_build_num_ch = \ self.ju.get_last_build_number("config-check", "lastSuccessfulBuild") attempt += 1 self.assertEqual(last_build_num_ch, last_success_build_num_ch) # let some time to Zuul to update the test result to Gerrit. time.sleep(2) # Get the change id change_ids = self.gu.get_my_changes_for_project("config") self.assertGreater(len(change_ids), 0) change_id = change_ids[0] # Check whether zuul sets verified to +1 after running the tests # let some time to Zuul to update the test result to Gerrit. self.assert_reviewer_approvals(change_id, '+1') # review the change self.gu2.submit_change_note(change_id, "current", "Code-Review", "2") self.gu2.submit_change_note(change_id, "current", "Workflow", "1") # now zuul processes gate pipeline and runs config-check job # Wait for config-check to finish and verify the success self.ju.wait_till_job_completes("config-check", last_success_build_num_ch, "lastSuccessfulBuild") last_build_num_ch, last_success_build_num_ch = 0, 1 attempt = 0 while last_build_num_ch != last_success_build_num_ch: if attempt >= 90: break time.sleep(1) last_build_num_ch = \ self.ju.get_last_build_number("config-check", "lastBuild") last_success_build_num_ch = \ self.ju.get_last_build_number("config-check", "lastSuccessfulBuild") attempt += 1 self.assertEqual(last_build_num_ch, last_success_build_num_ch) # Check whether zuul sets verified to +2 after running the tests # let some time to Zuul to update the test result to Gerrit. self.assert_reviewer_approvals(change_id, '+2') # verify whether zuul merged the patch change = self.gu.get_change('config', 'master', change_id) change_status = change['status'] attempt = 0 while change_status != 'MERGED': if attempt >= 90: break time.sleep(1) change = self.gu.get_change('config', 'master', change_id) change_status = change['status'] attempt += 1 self.assertEqual(change_status, 'MERGED') # Test post pipe line # as the patch is merged, post pieline should run config-update job # Wait for config-update to finish and verify the success self.ju.wait_till_job_completes("config-update", last_success_build_num_cu, "lastSuccessfulBuild") last_build_num_cu = \ self.ju.get_last_build_number("config-update", "lastBuild") last_success_build_num_cu = \ self.ju.get_last_build_number("config-update", "lastSuccessfulBuild") self.assertEqual(last_build_num_cu, last_success_build_num_cu) # Retrieve the prev build number for pname-unit-tests # Retrieve the prev build number for pname-functional-tests last_success_build_num_sp_ut = \ self.ju.get_last_build_number("%s-unit-tests" % pname, "lastSuccessfulBuild") last_success_build_num_sp_ft = \ self.ju.get_last_build_number("%s-functional-tests" % pname, "lastSuccessfulBuild") # Test config-update # config-update should have created jobs for pname # Trigger tests on pname # Send a review and check tests has been run self.gitu_admin.add_commit_and_publish( clone_dir, 'master', "Add useless file", self.un) # Wait for pname-unit-tests to finish and verify the success self.ju.wait_till_job_completes("%s-unit-tests" % pname, last_success_build_num_sp_ut, "lastSuccessfulBuild") # Wait for pname-functional-tests to end and check the success self.ju.wait_till_job_completes("%s-functional-tests" % pname, last_success_build_num_sp_ft, "lastSuccessfulBuild") # Check the unit tests succeed last_build_num_sp_ut = \ self.ju.get_last_build_number("%s-unit-tests" % pname, "lastBuild") last_success_build_num_sp_ut = \ self.ju.get_last_build_number("%s-unit-tests" % pname, "lastSuccessfulBuild") self.assertEqual(last_build_num_sp_ut, last_success_build_num_sp_ut) # Check the functional tests succeed last_build_num_sp_ft = \ self.ju.get_last_build_number("%s-functional-tests" % pname, "lastBuild") last_success_build_num_sp_ft = \ self.ju.get_last_build_number("%s-functional-tests" % pname, "lastSuccessfulBuild") self.assertEqual(last_build_num_sp_ft, last_success_build_num_sp_ft) # Get the change id change_ids = self.gu.get_my_changes_for_project(pname) self.assertGreater(len(change_ids), 0) change_id = change_ids[0] # let some time to Zuul to update the test result to Gerrit. for i in range(90): if "jenkins" in self.gu.get_reviewers(change_id): break time.sleep(1) self.assert_reviewer_approvals(change_id, '+1')
def __main__(): # Get configuration conf = config.export if not conf: pydwarf.log.error( 'No configuration specified. Imported config package must contain an export variable.' ) exit(1) # Things to do with versions pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__) if conf.version is not None: pydwarf.log.info('Managing Dwarf Fortress version %s.' % conf.version) pydwarf.urist.session.dfversion = conf.version else: pydwarf.log.error( 'No Dwarf Fortress version was specified in conf. Scripts will be run regardless of their indicated compatibility.' ) # Verify that input directory exists if not os.path.exists(conf.input): pydwarf.log.error('Specified raws directory %s does not exist.' % conf.input) exit(1) # Make backup if conf.backup is not None: pydwarf.log.info('Backing up raws to %s...' % conf.backup) try: copytree(conf.input, conf.backup) except: pydwarf.log.error('Failed to create backup.') exit(1) else: pydwarf.log.warning('Proceeding without backing up raws.') # Read input raws pydwarf.log.info('Reading raws from input directory %s...' % conf.input) pydwarf.urist.session.dfraws = raws.dir(path=conf.input, log=pydwarf.log) # Run each script pydwarf.log.info('Running scripts...') pydwarf.urist.session.handleall(conf.scripts) # Get the output directory, remove old raws if present outputdir = conf.output if conf.output else conf.input if os.path.exists(outputdir): pydwarf.log.info('Removing obsolete raws from %s...' % outputdir) for removefile in [ os.path.join(outputdir, f) for f in os.listdir(outputdir) ]: pydwarf.log.debug('Removing file %s...' % removefile) if removefile.endswith('.txt'): os.remove(removefile) else: pydwarf.log.info('Creating raws output directory %s...' % outputdir) os.makedirs(outputdir) # Write the output pydwarf.log.info('Writing changes to raws to %s...' % outputdir) pydwarf.urist.session.dfraws.write(outputdir, pydwarf.log) # All done! pydwarf.log.info('All done!')
def main(): try: if not os.path.exists(sys.argv[1]): sys.exit("O_O No existe el archivo de parametros: ./" + sys.argv[1]) with open(sys.argv[1], 'r') as f: dic = json.load(f) f.close() dic['main_dir'] = os.getcwd().replace('\\', '/') dic['copy_file'] = os.getcwd().replace('\\', '/') + '/' + dic['copy_file'] dic['Filters'] = ['EstPresente', 'Copia', 'Omision'] # # Archivo de logging if os.path.exists(dic['main_dir'] + '/preprocessing.log'): os.remove(dic['main_dir'] + '/preprocessing.log') logging.basicConfig( filename=dic['main_dir'] + '/preprocessing.log', level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') # validar archivo de SNP actualizacion if len(sys.argv) == 3: flagUpdate = True if not os.path.exists(dic['main_dir'] + '/input/' + sys.argv[2]): logging.info("ADVERTENCIA .... No existe el archivo (" + sys.argv[2] + ") No se filtraran los .dat") else: pdSNP = pd.read_csv( dic['main_dir'] + '/input/' + sys.argv[2], dtype="str") else: flagUpdate = False print('REVISANDO PARAMETROS...') if not os.path.exists(dic['main_dir'] + '/src/'): sys.exit("FALTA CARPETA SRC") if not os.path.exists(dic['main_dir'] + '/src/bin/'): sys.exit( "CARPETA BIN PARA BILOG NO EXISTE, COPIAR CARPETA EN 'src/'") if not os.path.exists(dic['main_dir'] + '/input'): sys.exit( "CARPETA 'input' NO EXISTE --> CREAR CARPETA INPUT CON .zip Y ARCHIVO DE COPIA" ) if not os.path.isfile(dic['copy_file']): sys.exit( "ARCHIVO DE COPIA NO ENCONTRADO --> REVISAR ARCHIVO DE PARAMETROS 'src/parameters.json' --> parametro 'copy_file'" ) isMissing = [ prueba for prueba in dic['Pruebas'].keys() if not prueba in dic['Codigos'].keys() ] if len(isMissing) > 0: sys.exit( "PARAMETROS INCORRECTOS DE JUNTURAS FALTA AGREGAR: ---->\n" + ', '.join(isMissing) + "\n-------------------------------------------------------" + "\ncambiar en 'src/parameters.json' --> parametro 'Codigos'") isMissing = [ prueba for prueba in dic['Codigos'].keys() if not prueba in dic['Pruebas'].keys() ] if len(isMissing) > 0: sys.exit( "PARAMETROS INCORRECTOS DE FORMAS FALTA AGREGAR: ---->\n" + ', '.join(isMissing) + "\n-------------------------------------------------------" + "\ncambiar en 'src/parameters.json' --> parametro 'Codigos'") logging.info( '#######################################################################' ) logging.info('COMENZO: ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S')) logging.info('Corriendo Preprocessing_stage.py') logging.info( '#######################################################################' ) if dic['subloque']: if ("subloques" not in dic.keys()): logging.info( 'NO DEFINIO SUBLOQUES, no se correran sub-bloques...') dic['subloques'] = [] dic['subloque'] = False if len(dic['subloques']) == 0: logging.info( 'NO DEFINIO SUBLOQUES, no se correran sub-bloques...') dic['subloque'] = False else: logging.info( 'NO DEFINIO BANDERA SUBLOQUE, no se correran sub-bloques...') logging.info('Limpiando carpeta principal') if os.path.exists(dic['main_dir'] + '/input/Descargas'): shutil.rmtree( dic['main_dir'] + '/input/Descargas', ignore_errors=True) if os.path.exists(dic['main_dir'] + '/output'): shutil.rmtree(dic['main_dir'] + '/output', ignore_errors=True) if os.path.exists(dic['main_dir'] + '/doc'): shutil.rmtree(dic['main_dir'] + '/doc', ignore_errors=True) time.sleep(15) logging.info('\tLeyendo archivo de parametros: ' + sys.argv[1]) zipfiles = [] logging.info( "ARCHIVOS .ZIP ENCONTRADOS en 'input'--> ALGUNA INCONSISTENCIA EN LOS ARCHIVOS --> REVISAR CARPETA INPUT Y EJECUTAR NUEVAMENTE EL PROCESO" ) for file in os.listdir(dic['main_dir'] + '/input/'): if file.endswith('.zip'): zipfiles.append(file) if len(zipfiles) == 0: sys.exit( "NINGUN ARCHIVO ZIP ENCONTRADO -- > COPIAR ARCHIVOS ZIP EN 'input/'" ) else: logging.info(str(zipfiles)) logging.info(sep) logging.info('REVISANDO SCRIPTS NECESARIOS...') dirs = os.listdir(dic['main_dir'] + '/src') if not 'JuntarModulosSaber11.pl' in dirs: logging.info("FALTA SCRIPT 'src/JuntarModulosSaber11.pl'") sys.exit("FALTA SCRIPT 'src/JuntarModulosSaber11.pl'") groups = { '01_Estudiantes': None, '02_NoEstudiantes': None, '03_Discapacitados': None } folder_structure = { 'input': None, 'output': { 'calibracion': groups, 'calificacion': groups }, 'doc': None } logging.info(sep) logging.info('CREANDO ESTRUCTURA DE CARPETAS EN: ' + dic['main_dir'] + '/' + str(folder_structure) + '...') utils.make_dirs_from_dict(folder_structure, dic['main_dir'] + '/') logging.info('\tTerminado...') logging.info(sep) logging.info('EXTRAYENDO ARCHIVOS...') utils.extract_files(dic['main_dir'] + '/') logging.info('\tTerminado...') logging.info(sep) if dic['subloque']: logging.info('ELIMINANDO SUBLOQUES 0...') for root, dirs, files in os.walk(dic["main_dir"]): for file in files: pattern = re.compile(r".*sblq(.*)\.|\-.*", re.I) matcher = pattern.match(file) if not matcher == None and not matcher.groups()[0].split( '-')[0] in dic['subloques']: print('ARCHIVO A ELIMINAR..', file) os.remove(os.path.join(root, file)) logging.info('\tTerminado...') logging.info(sep) logging.info('GENERANDO FORMAS DE SUBLOQUES...') print('Generando formas de subloques') pattern = re.compile(r'.*sblq(.*).con', re.I) ext = dic['out_f'][:] ext.remove('.con') for root, dirs, files in os.walk(dic["main_dir"]): for file in files: matcher = pattern.search(file) if not matcher == None: newPath = root + '_' + matcher.groups()[0] if not os.path.exists(newPath): print('NEW_FOLDER: ' + newPath) ensure_dir_exists(newPath + '/salidas') params = { 'main_path': root + '/', 'con_file': file.replace('.con', '') } utils.filterISELECT(params, newPath) logging.info('\tTerminado...') logging.info(sep) logging.info('ADICIONANDO SUBLOQUES A DICCIONARIO...') print('Adicionando subloques a diccionario...') utils.add_subloques(dic) logging.info(dic['Pruebas']) logging.info('\tTerminado...') logging.info(sep) logging.info('ARCHIVOS DE SUBLOQUES...') print('Eliminando archivos de subloques') for root, dirs, files in os.walk(dic["main_dir"]): for file in files: if re.match("(.*)sblq(.*)", file): print('ARCHIVO A ELIMINAR..', file) os.remove(os.path.join(root, file)) logging.info('\tTerminado...') logging.info(sep) logging.info('APLICANDO FILTROS A ARCHIVOS .DAT...') mpath = '' for root, dirs, files in os.walk(dic["main_dir"]): for file in files: if file.endswith(".con"): flagFiltro = True print('CONFILE ENCONTRADO: ' + file) path = (os.path.join(root, file)).replace('\\', '/').split('/') mpath = ('/').join((os.path.join(root, file)).replace( '\\', '/').split('/')[:-1]) + '/' if dic['subloque']: indSubl = [ bloque in file for bloque in dic['subloques'] ] print(indSubl) if not any(indSubl): flagFiltro = False shutil.rmtree(root) if flagFiltro: confile = path[-2] dic['con_file'] = path[-2] logging.info('\tAplicando filtros a : ' + confile) dic['filtered_data'] = '' dic['main_path'] = mpath dic['count_log'] = dic['main_path'] + '/registro.log' con = utils.create_dict_from_con(dic) params = dict(dic.items() + con.items()) params['id_form'] = confile params['aplicacion'] = con['DATA'].split('-')[0][0:7] logging.info('\t\tAplicacion: ' + params['aplicacion']) f_g = {} utils.set_f_g(params['Pruebas'].copy(), confile, f_g) if not f_g == {}: params['curr_group'] = f_g[confile] logging.info('\t\tGrupo: ' + params['curr_group']) if not flagUpdate: filtrado.apply_filters(params) mpath = mpath.replace(confile, '') logging.info('\t\tTerminado...') logging.info(sep) if not flagUpdate: logging.info('CREANDO ARCHIVO DE CONFIGURACION.TXT...') f = [] for root, dirs, files in os.walk(dic['main_dir'] + '/input/Descargas/'): for dir in dirs: f.append(dir) config_file = [] utils.create_config_file(dic['Pruebas'], dic['Codigos'], dic['Pruebas'].keys(), '', config_file, [], f) np.savetxt( dic['main_dir'] + '/output/configuracion.txt', config_file, delimiter=",", fmt="%s") logging.info('\t\tTerminado...') logging.info(sep) logging.info('JUNTANDO ARCHIVOS DAT (JUNTAR.pl)...') os.chdir(mpath) #Change to forms_path p = subprocess.Popen([ 'perl', dic['main_dir'] + '/src/JuntarModulosSaber11.pl', '-com', '-dat', '-conf', dic['main_dir'] + '/output/configuracion.txt' ]) p.communicate() logging.info('\t\tTerminado...') logging.info(sep) ext = dic['out_f'] logging.info( 'MOVIENDO ARCHIVOS A SUS RESPECTIVAS CARPETAS DE SALIDA...') config = [] logging.info( 'SACANDO CARPETAS DE JUNTURA...(Leyendo archivo de configuracion.txt)' ) with open(dic['main_dir'] + '/output/configuracion.txt') as f: config = f.readlines() f.close() J = [] #Formas ya presentes en carpeta JUNTAS for line in config: if line.startswith("PRUEBA"): J.append(line.split()[2]) logging.info('FORMAS DE JUNTURA: ' + str(J)) dirs = os.listdir(os.getcwd()) for d in dirs: if not d in J and os.path.isdir(os.getcwd() + '/' + d): logging.info(sep) logging.info('PATH DE INPUT - OUTPUT PARA CARPETA: ' + d) if d == "JUNTAS": f_g = {'JUNTAS': '01_Estudiantes'} else: f_g = {} utils.set_f_g(dic['Pruebas'].copy(), d, f_g) if not f_g == {}: path_output = dic[ 'main_dir'] + '/output/calibracion/' + f_g[ d] + '/' + d ensure_dir_exists(path_output) if d == 'JUNTAS': path_input = os.getcwd() + '/' + d logging.info('PATH INPUT: ' + path_input) logging.info('PATH OUTPUT: ' + path_output) utils.copytree(path_input, path_output) else: for root, dirs, files in os.walk(os.getcwd() + '/' + d): for file in files: for ex in dic['out_f']: if ex in file: path_input = os.path.join( root, file).replace('\\', '/') logging.info('PATH INPUT: ' + path_input) logging.info('PATH OUTPUT: ' + path_output + '/' + file) shutil.copyfile( path_input, path_output + '/' + file) if not d == 'JUNTAS': ensure_dir_exists(path_output + '/salidas') logging.info('SALIDAS: ' + path_output + '/salidas') logging.info(sep) logging.info( 'MOVIENDO ARCHIVOS DE CALIFICACION A SUS RESPECTIVAS CARPETAS...') ext = [] for e in dic['out_f']: if e.endswith('.DAT') and not flagUpdate: ext.append(e.replace('.DAT', '.O')) else: ext.append(e) removePaths = [] for root, dirs, files in os.walk(os.getcwd()): for d in dirs: if not d == 'JUNTAS': f_g = {} utils.set_f_g(dic['Pruebas'].copy(), d, f_g) if not f_g == {}: path_output = dic[ 'main_dir'] + '/output/calificacion/' + f_g[ d] + '/' + d ensure_dir_exists(path_output) if not os.path.exists(path_output + '/salidas'): os.makedirs(path_output + '/salidas') for file in os.listdir(os.path.join(root, d)): for e in ext: if e in file: path_input = os.path.join(root, d) + '/' + file if file.endswith('.O'): output = path_output + '/' + file.replace( '.O', '.DAT') else: output = path_output + '/' + file if not os.path.exists(output): shutil.copyfile(path_input, output) if flagUpdate and file.endswith( '.DAT'): if ('pdSNP' in locals()): nUpdate = filtroActualizacion( output, pdSNP, dic['id_len']) if nUpdate == 0: removePaths.append( path_output) logging.info( 'FILTRANDO NUEVAS PERSONAS PARA CALIFICAR: ' + file) else: statFile = os.stat(path_input) if str(statFile. st_size) == '0': removePaths.append( path_output) logging.info( 'SE CALIFICARAN TODAS LAS PERSONAS: ' + file) # # Removiendo if flagUpdate: logging.info(sep) logging.info('ELIMANDO CARPETAS EN BLANCO DE LA CALIFICACION...') for path in removePaths: logging.info('Eliminando: ' + path) shutil.rmtree(path) logging.info(sep) logging.info('\tTerminado...') logging.info(sep) logging.info('TERMINO: ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S')) except Exception as e: print(logging.error(traceback.format_exc()))
def make_output_dirs(self): self.output_err = '' try: self.progress_text = 'Removing old output directory...\n' output_dir = utils.path_join(self.output_dir(), self.project_name()) if os.path.exists(output_dir): utils.rmtree(output_dir, ignore_errors=True) temp_dir = utils.path_join(TEMP_DIR, 'webexectemp') if os.path.exists(temp_dir): utils.rmtree(temp_dir, ignore_errors=True) self.progress_text = 'Making new directories...\n' if not os.path.exists(output_dir): os.makedirs(output_dir) os.makedirs(temp_dir) self.copy_files_to_project_folder() json_file = utils.path_join(self.project_dir(), 'package.json') global_json = utils.get_data_file_path('files/global.json') if self.output_package_json: with codecs.open(json_file, 'w+', encoding='utf-8') as f: f.write(self.generate_json()) with codecs.open(global_json, 'w+', encoding='utf-8') as f: f.write(self.generate_json(global_json=True)) zip_file = utils.path_join(temp_dir, self.project_name()+'.nw') app_nw_folder = utils.path_join(temp_dir, self.project_name()+'.nwf') utils.copytree(self.project_dir(), app_nw_folder, ignore=shutil.ignore_patterns(output_dir)) zip_files(zip_file, self.project_dir(), exclude_paths=[output_dir]) for ex_setting in self.settings['export_settings'].values(): if ex_setting.value: self.progress_text = '\n' name = ex_setting.display_name self.progress_text = u'Making files for {}...'.format(name) export_dest = utils.path_join(output_dir, ex_setting.name) versions = re.findall('(\d+)\.(\d+)\.(\d+)', self.selected_version())[0] minor = int(versions[1]) if minor >= 12: export_dest = export_dest.replace('node-webkit', 'nwjs') if os.path.exists(export_dest): utils.rmtree(export_dest, ignore_errors=True) # shutil will make the directory for us utils.copytree(get_data_path('files/'+ex_setting.name), export_dest, ignore=shutil.ignore_patterns('place_holder.txt')) utils.rmtree(get_data_path('files/'+ex_setting.name), ignore_errors=True) self.progress_text += '.' if 'mac' in ex_setting.name: uncomp_setting = self.get_setting('uncompressed_folder') uncompressed = uncomp_setting.value app_path = utils.path_join(export_dest, self.project_name()+'.app') try: utils.move(utils.path_join(export_dest, 'nwjs.app'), app_path) except IOError: utils.move(utils.path_join(export_dest, 'node-webkit.app'), app_path) plist_path = utils.path_join(app_path, 'Contents', 'Info.plist') plist_dict = plistlib.readPlist(plist_path) plist_dict['CFBundleDisplayName'] = self.project_name() plist_dict['CFBundleName'] = self.project_name() version_setting = self.get_setting('version') plist_dict['CFBundleShortVersionString'] = version_setting.value plist_dict['CFBundleVersion'] = version_setting.value plistlib.writePlist(plist_dict, plist_path) self.progress_text += '.' app_nw_res = utils.path_join(app_path, 'Contents', 'Resources', 'app.nw') if uncompressed: utils.copytree(app_nw_folder, app_nw_res) else: utils.copy(zip_file, app_nw_res) self.create_icns_for_app(utils.path_join(app_path, 'Contents', 'Resources', 'nw.icns')) self.progress_text += '.' else: ext = '' windows = False if 'windows' in ex_setting.name: ext = '.exe' windows = True nw_path = utils.path_join(export_dest, ex_setting.dest_files[0]) if windows: self.replace_icon_in_exe(nw_path) self.compress_nw(nw_path) dest_binary_path = utils.path_join(export_dest, self.project_name() + ext) if 'linux' in ex_setting.name: self.make_desktop_file(dest_binary_path, export_dest) join_files(dest_binary_path, nw_path, zip_file) sevenfivefive = (stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) os.chmod(dest_binary_path, sevenfivefive) self.progress_text += '.' if os.path.exists(nw_path): os.remove(nw_path) except Exception: error = u''.join([unicode(x) for x in traceback.format_exception(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2])]) self.logger.error(error) self.output_err += error finally: utils.rmtree(temp_dir, ignore_errors=True)
def __main__(): pydwarf.log.info('Running PyDwarf %s.' % pydwarf.__version__) if settings.dfversion is not None: pydwarf.log.info('Managing Dwarf Fortress version %s.' % settings.dfversion) else: pydwarf.log.error('No Dwarf Fortress version was specified in settings. Scripts will be run regardless of their indicated compatibility.') if os.path.exists(settings.rawsdir): if settings.backup and settings.backupdir: pydwarf.log.info('Backing up raws to %s...' % settings.backupdir) copytree(settings.rawsdir, settings.backupdir) else: pydwarf.log.warning('Proceeding without backing up raws.') pydwarf.log.info('Reading raws from %s...' % settings.rawsdir) r = raws().read(settings.rawsdir, pydwarf.log) pydwarf.log.info('Running scripts...') for script in settings.runscripts: pydwarf.log.debug('Handling script %s...' % script) urist = None scriptname = None scriptfunc = None scriptargs = None if isinstance(script, tuple) or isinstance(script, list): scriptargs = script[1] script = script[0] elif isinstance(script, dict): scriptname = script.get('name') scriptargs = script.get('args') scriptmatch = script.get('match') scriptignoreversion = script.get('ignore_df_version') checkversion = None if scriptignoreversion else settings.dfversion candidates = pydwarf.urist.get(scriptname, version=checkversion, match=scriptmatch) if candidates and len(candidates): urist = candidates[0] scriptname = urist.name if len(candidates) > 1: pydwarf.log.warning('More than one fitting script has been specified, using a best guess.') elif callable(script): scriptname = script.__name__ scriptfunc = script else: scriptname = script candidates = pydwarf.urist.get(scriptname, version=settings.dfversion) if candidates and len(candidates): urist = candidates[0] scriptname = urist.name if len(candidates) > 1: pydwarf.log.warning('More than one fitting script has been specified, using a best guess.') if urist and scriptfunc is None: scriptfunc = urist.fn if scriptfunc: scriptinfo = 'Running script %s' % scriptname if scriptargs: scriptinfo = '%s with args %s' % (scriptinfo, scriptargs) pydwarf.log.info('%s...' % scriptinfo) try: response = scriptfunc(r, **scriptargs) if scriptargs else scriptfunc(r) if response: success = response.get('success') status = response['status'] if 'status' in response else ('Script %s ran %ssuccessfully.' % (scriptname, '' if success else 'un')) pydwarf.log.info('%s: %s' % ('SUCCESS' if success else 'FAILURE', status)) else: pydwarf.log.error('Received no response from script %s.' % scriptname) except Exception: pydwarf.log.exception('Unhandled exception while running script %s.' % scriptname) else: pydwarf.log.info('Finished running script %s.' % scriptname) else: pydwarf.log.error('Failed to retrieve script %s.' % scriptname) outputdir = settings.outputdir if settings.outputdir else settings.rawsdir pydwarf.log.info('Writing changes to raws to %s...' % outputdir) if not os.path.exists(outputdir): os.makedirs(outputdir) r.write(outputdir, pydwarf.log) pydwarf.log.info('All done!') else: pydwarf.log.info('Specified raws directory does not exist.')
def main(): parameters, conf_parameters = load_parameters() dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths(parameters) check_parameter_compatiblity(parameters, dataset_filepaths) # Load dataset dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug']) dataset.load_dataset(dataset_filepaths, parameters) # Create graph and session with tf.Graph().as_default(): session_conf = tf.ConfigProto( intra_op_parallelism_threads=parameters['number_of_cpu_threads'], inter_op_parallelism_threads=parameters['number_of_cpu_threads'], device_count={'CPU': 1, 'GPU': parameters['number_of_gpus']}, allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist log_device_placement=False ) sess = tf.Session(config=session_conf) with sess.as_default(): # Initialize and save execution details start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details']['time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension(parameters['dataset_text_folder']) model_name = '{0}_{1}'.format(dataset_name, results['execution_details']['time_stamp']) output_folder=os.path.join('..', 'output') utils.create_folder_if_not_exists(output_folder) stats_graph_folder=os.path.join(output_folder, model_name) # Folder where to save graphs utils.create_folder_if_not_exists(stats_graph_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type]) pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) # Instantiate the model # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard model = EntityLSTM(dataset, parameters) # Instantiate the writers for TensorBoard writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add() tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath(token_list_file_path, '..') tensorboard_character_embeddings = embeddings_projector_config.embeddings.add() tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath(character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Write metadata for TensorBoard embeddings token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8') for token_index in range(dataset.vocabulary_size): token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index])) token_list_file.close() character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index])) character_list_file.close() # Initialize the model sess.run(tf.global_variables_initializer()) if not parameters['use_pretrained_model']: model.load_pretrained_token_embeddings(sess, dataset, parameters) # Start training + evaluation loop. Each iteration corresponds to 1 epoch. bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score previous_best_valid_f1_score = 0 transition_params_trained = np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2) model_saver = tf.train.Saver(max_to_keep=parameters['maximum_number_of_epochs']) # defaults to saving all variables epoch_number = -1 try: while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if parameters['use_pretrained_model'] and epoch_number == 0: # Restore pretrained model parameters transition_params_trained = train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver) elif epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers=list(range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained = train.train_step(sess, dataset, sequence_number, model, transition_params_trained, parameters) step += 1 if step % 10 == 0: print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True) epoch_elapsed_training_time = time.time() - epoch_start_time print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) if parameters['use_pretrained_model'] and not parameters['train_model']: conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder) break # Save model model_saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True) else: bad_counter += 1 print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter)) if bad_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break if epoch_number >= parameters['maximum_number_of_epochs']: break except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') print('Finishing the experiment') end_time = time.time() results['execution_details']['train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time print('ok1') evaluate.save_results(results, stats_graph_folder) print('ok2') print('ok3') #sess.close() # release the session's resources print('ok4')
def fit(self): ''' Dùng để train data ''' parameters = self.parameters conf_parameters = self.conf_parameters dataset_filepaths = self.dataset_filepaths dataset = self.dataset dataset_brat_folders = self.dataset_brat_folders sess = self.sess model = self.model transition_params_trained = self.transition_params_trained stats_graph_folder, experiment_timestamp = self._create_stats_graph_folder(parameters) # Khởi tạo và lưu các thông tin của lần chạy start_time = time.time() results = {} results['epoch'] = {} ''' An epoch, in Machine Learning, is the entire processing by the learning algorithm of the entire train-set. Ex: The MNIST train set is composed by 55000 samples. Once the algorithm processed all those 55000 samples an epoch is passed. ''' results['execution_details'] = {} results['execution_details']['train_start'] = start_time # Thời gian bắt đầu chạy results['execution_details']['time_stamp'] = experiment_timestamp # Nhãn thời gian results['execution_details']['early_stop'] = False # Cho biết có lỗi xảy ra nên bị dừng sớm ko results['execution_details']['keyboard_interrupt'] = False # Cho biết có bị dừng bởi keyboard results['execution_details']['num_epochs'] = 0 # Số lượng epoch đã chạy results['model_options'] = copy.copy(parameters) # Các tham số model_folder = os.path.join(stats_graph_folder, 'model') # output/en.../model utils.create_folder_if_not_exists(model_folder) # Save value cac parameters vao file parameters.ini with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) # Log các tham số ra file pickle.dump(dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) # Dump dataset thành pickle file để lần sau chạy # Tạo folder tensorboard logs để dùng cho việc vẽ biểu đồ sau này tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') # folder lưu file log của tensorboard -> dùng cho việc plot biểu đồ lên utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join(stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists(tensorboard_log_folders[dataset_type]) # Khởi tạo các writers cho tensorboard writers = {} # Có nhiều nhất 4 writers train, test, valid, deploy for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter(tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter(model_folder) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings # Dùng cho việc visualize embedding bằng tensorboard embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add() tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = 'tensorboard_metadata_tokens.tsv'#os.path.relpath(token_list_file_path, '..') tensorboard_character_embeddings = embeddings_projector_config.embeddings.add() tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join(model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = 'tensorboard_metadata_characters.tsv'#os.path.relpath(character_list_file_path, '..') # Saves a configuration file that TensorBoard will read during startup. projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Ghi token vào file tsv dùng làm metadata cho embedding token_list_file = codecs.open(token_list_file_path,'w', 'UTF-8') for token_index in range(dataset.vocabulary_size): token_list_file.write('{0}\n'.format(dataset.index_to_token[token_index])) token_list_file.close() # Ghi characters vào file tsv dùng làm metadata cho embedding character_list_file = codecs.open(character_list_file_path,'w', 'UTF-8') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format(dataset.index_to_character[character_index])) character_list_file.close() # Start training + evaluation loop. Each iteration corresponds to 1 epoch. bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score previous_best_valid_f1_score = 0 # f1-Score tốt nhất ở các lần chạy trước epoch_number = -1 try: while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers=list(range(len(dataset.token_indices['train']))) print("----****____") print(dataset.token_indices['train'][:10]) random.shuffle(sequence_numbers) # Thuc hien train for sequence_number in sequence_numbers: transition_params_trained = train.train_step(sess, dataset, sequence_number, model, parameters) step += 1 if step % 10 == 0: print('Training {0:.2f}% done'.format(step/len(sequence_numbers)*100), end='\r', flush=True) # Tinh thoi gian thuc hien 1 epoch epoch_elapsed_training_time = time.time() - epoch_start_time print('Training completed in {0:.2f} seconds'.format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) if parameters['use_pretrained_model'] and not parameters['train_model']: conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder) break # Save model model.saver.save(sess, os.path.join(model_folder, 'model_{0:05d}.ckpt'.format(epoch_number))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro'] # If do chinh xac cua epoch > do chinh xac cua epoch truoc if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score conll_to_brat.output_brat(output_filepaths, dataset_brat_folders, stats_graph_folder, overwrite=True) self.transition_params_trained = transition_params_trained else: bad_counter += 1 print("The last {0} epochs have not shown improvements on the validation set.".format(bad_counter)) # If bad_counter den mot muc gioi han parameters['patience'] = 10 (gia tri khoi tao) finish train if bad_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break # Neu so epoch >= so luong epoch toi da quy dinh --> ket thuc train if epoch_number >= parameters['maximum_number_of_epochs']: break except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') # Ket thuc train luu cac tham so time, ket qua print('Finishing the experiment') end_time = time.time() results['execution_details']['train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) for dataset_type in dataset_filepaths.keys(): writers[dataset_type].close()
def test_check_project_test_workflow(self): """ Validate new project to test via zuul """ # We want to create a project, provide project source # code with tests. We then configure zuul/jjb to handle the # run of the test cases. We then validate Gerrit has been # updated about the test results # We use the sample-project (that already exists) pname = 'test_workflow_%s' % create_random_str() logger.info("Creating project %s" % pname) # Create it self.create_project(pname) logger.info("Populating the project with %s" % self.sample_project_dir) # Add the sample-project to the empty repository clone_dir = self.clone_as_admin(pname) copytree(self.sample_project_dir, clone_dir) self.commit_direct_push_as_admin(clone_dir, "Add the sample project") # Change to config/{zuul,jobs}/projects.yaml # in order to test the new project logger.info("Adding config-repo configuration") ycontent = file(os.path.join( self.config_clone_dir, "zuul/projects.yaml")).read() file(os.path.join( self.config_clone_dir, "zuul/projects.yaml"), 'w').write( ycontent.replace("zuul-demo", pname), ) ycontent2 = load(file(os.path.join( self.config_clone_dir, "jobs/projects.yaml")).read()) sp2 = copy.deepcopy( [p for p in ycontent2 if 'project' in p and p['project']['name'] == 'zuul-demo'][0]) sp2['project']['name'] = pname ycontent2.append(sp2) file(os.path.join( self.config_clone_dir, "jobs/projects.yaml"), 'w').write( dump(ycontent2)) # Send review (config-check) will be triggered logger.info("Submitting the config review") change_sha = self.push_review_as_admin( self.config_clone_dir, "Add config definition in Zuul/JJB config for %s" % pname) change_nr = self.gu.get_change_number(change_sha) logger.info("Waiting for verify +1 on change %d" % change_nr) self.assertEquals(self.gu.wait_for_verify(change_nr), 1) # review the config change as a member from the config-core group logger.info("Approving and waiting for verify +2") self.gu2.submit_change_note(change_nr, "current", "Code-Review", "2") self.gu2.submit_change_note(change_nr, "current", "Workflow", "1") for retry in xrange(60): jenkins_vote = self.gu.get_vote(change_nr, "Verified") if jenkins_vote == 2: break time.sleep(1) self.assertEquals(jenkins_vote, 2) # verify whether zuul merged the patch logger.info("Waiting for change to be merged") for retry in xrange(60): change_status = self.gu.get_info(change_nr)['status'] if change_status == "MERGED": break time.sleep(1) self.assertEqual(change_status, 'MERGED') self.need_restore_config_repo = True logger.info("Waiting for config-update") config_update_log = self.ju.wait_for_config_update(change_sha) self.assertIn("Finished: SUCCESS", config_update_log) # Propose a change on a the repo and expect a Verified +1 logger.info("Submiting a test change to %s" % pname) change_sha = self.gitu_admin.add_commit_and_publish( clone_dir, 'master', "Add useless file", self.un) change_nr = self.gu.get_change_number(change_sha) logger.info("Waiting for verify +1 on change %d" % change_nr) self.assertEquals(self.gu.wait_for_verify(change_nr), 1) # Update the change on a the repo and expect a Verified -1 logger.info("Submiting a test change to %s suppose to fail" % pname) data = "#!/bin/bash\nexit 1\n" file(os.path.join(clone_dir, "run_tests.sh"), 'w').write(data) os.chmod(os.path.join(clone_dir, "run_tests.sh"), 0755) self.gitu_admin.add_commit_and_publish( clone_dir, "master", None, fnames=["run_tests.sh"]) logger.info("Waiting for verify -1 on change %d" % change_nr) self.assertEquals(self.gu.wait_for_verify(change_nr), -1) logger.info("Validate jobs ran via the job api %s" % pname) # This piece of code is there by convenience ... # TODO: Should be moved in the job api tests file. # Test the manageSF jobs API: query per patch & revision change_ids = self.gu.get_my_changes_for_project(pname) self.assertGreater(len(change_ids), 0) change_id = change_ids[0] patch = self.gu.get_change_last_patchset(change_id)['_number'] cookie = get_cookie(config.ADMIN_USER, config.ADMIN_PASSWORD) cookies = {"auth_pubtkt": cookie} base_url = config.GATEWAY_URL + "/manage/jobs/" for j in ["%s-functional-tests" % pname, "%s-unit-tests" % pname]: job = requests.get(base_url + '%s/?change=%s' % (j, patch), cookies=cookies).json() self.assertTrue("jenkins" in job.keys(), job) self.assertTrue(len(job["jenkins"]) > 1, job)
def main(): parameters, conf_parameters = load_parameters() pprint(parameters) dataset_filepaths = get_valid_dataset_filepaths(parameters) check_parameter_compatiblity(parameters, dataset_filepaths) cross_validation = parameters[ 'cross_validation'] if 'cross_validation' in parameters else 1 valid_fscores = [] valid_precisions = [] valid_recalls = [] for cv in range(0, cross_validation): if "als" in dataset_filepaths['train'] and cross_validation > 1: train_files = list(range(0, cv)) + list( range(cv + 1, cross_validation)) test_file = cv file_train = "tmp_combined.train" file_valid = "tmp_combined.test" output = [] for i in train_files: with open(dataset_filepaths['train'] + "_" + str(i), "r", encoding="utf-8") as file: output.append(file.read()) with open(file_train, "w", encoding="utf-8") as file: file.write("\n\n".join(output)) output = [] with open(dataset_filepaths['train'] + "_" + str(test_file), "r", encoding="utf-8") as file: output.append(file.read()) with open(file_valid, "w", encoding="utf-8") as file: file.write("\n\n".join(output)) dataset_filepaths['train'] = file_train dataset_filepaths['valid'] = file_valid # Load dataset dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug']) dataset.load_vocab_word_embeddings(parameters) dataset.load_dataset(dataset_filepaths, parameters) # Create graph and session with tf.Graph().as_default(): session_conf = tf.ConfigProto( intra_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], inter_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], device_count={ 'CPU': 1, 'GPU': parameters['number_of_gpus'] }, allow_soft_placement= True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist log_device_placement=False) session_conf.gpu_options.allow_growth = True sess = tf.Session(config=session_conf) with sess.as_default(): # Initialize and save execution details start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details'][ 'time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension( parameters['dataset_train']) if 'data_to_use' in parameters: model_name = '{0}_{1}'.format( parameters['language'] + "_" + dataset_name + "_small", results['execution_details']['time_stamp']) else: model_name = '{0}_{1}'.format( parameters['language'] + "_" + dataset_name, results['execution_details']['time_stamp']) output_folder = os.path.join('..', 'output') utils.create_folder_if_not_exists(output_folder) stats_graph_folder = os.path.join( output_folder, model_name) # Folder where to save graphs utils.create_folder_if_not_exists(stats_graph_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join( stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists( tensorboard_log_folders[dataset_type]) #del dataset.embeddings_matrix if not parameters['use_pretrained_model']: pickle.dump( dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) #dataset.load_pretrained_word_embeddings(parameters) # Instantiate the model # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard model = EntityLSTM(dataset, parameters) # Instantiate the writers for TensorBoard writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter( tensorboard_log_folders[dataset_type], graph=sess.graph) embedding_writer = tf.summary.FileWriter( model_folder ) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath( token_list_file_path, '..') if parameters['use_character_lstm']: tensorboard_character_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath( character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Write metadata for TensorBoard embeddings token_list_file = codecs.open(token_list_file_path, 'w', 'UTF-8') for token_index in range(len(dataset.index_to_token)): token_list_file.write('{0}\n'.format( dataset.index_to_token[token_index])) token_list_file.close() if parameters['use_character_lstm']: character_list_file = codecs.open(character_list_file_path, 'w', 'UTF-8') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format( dataset.index_to_character[character_index])) character_list_file.close() try: # Initialize the model sess.run(tf.global_variables_initializer()) if not parameters['use_pretrained_model']: model.load_pretrained_token_embeddings( sess, dataset, parameters) # Start training + evaluation loop. Each iteration corresponds to 1 epoch. bad_counter = 0 # number of epochs with no improvement on the validation test in terms of F1-score previous_best_valid_f1_score = 0 transition_params_trained = np.random.rand( len(dataset.unique_labels), len(dataset.unique_labels) ) #TODO np.random.rand(len(dataset.unique_labels)+2,len(dataset.unique_labels)+2) model_saver = tf.train.Saver( max_to_keep=None ) #parameters['maximum_number_of_epochs']) # defaults to saving all variables epoch_number = 0 while True: epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() if parameters[ 'use_pretrained_model'] and epoch_number == 1: # Restore pretrained model parameters transition_params_trained = train.restore_model_parameters_from_pretrained_model( parameters, dataset, sess, model, model_saver) elif epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers = list( range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) data_counter = 0 sub_id = 0 for i in tqdm(range(0, len(sequence_numbers), parameters['batch_size']), "Training", mininterval=1): data_counter += parameters['batch_size'] if data_counter >= 20000: data_counter = 0 sub_id += 0.001 print("Intermediate evaluation number: ", sub_id) #model_saver.save(sess, # os.path.join(model_folder, 'model_{0:05d}_{1}.ckpt'.format(epoch_number, len(sequence_numbers)/4/len(sequence_numbers)))) epoch_elapsed_training_time = time.time( ) - epoch_start_time print( 'Training completed in {0:.2f} seconds' .format(epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number + sub_id, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model( results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) # Save model model_saver.save( sess, os.path.join( model_folder, 'model_{0:07.3f}.ckpt'.format( epoch_number + sub_id))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary( summary, epoch_number) writers['train'].flush() utils.copytree( writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][ epoch_number][0]['valid']['f1_score'][ 'micro'] # valid_precision = results['epoch'][epoch_number][0]['valid']['precision']['micro'] # valid_recall = results['epoch'][epoch_number][0]['valid']['recall']['micro'] # valid_fscores.append(valid_f1_score) if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score # previous_best_valid_precision = valid_precision # previous_best_valid_recall = valid_recall else: bad_counter += 1 sequence_number = sequence_numbers[ i:i + parameters['batch_size']] transition_params_trained, loss = train.train_step( sess, dataset, sequence_number, model, transition_params_trained, parameters) epoch_elapsed_training_time = time.time( ) - epoch_start_time print('Training completed in {0:.2f} seconds'.format( epoch_elapsed_training_time), flush=True) y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) # Save model model_saver.save( sess, os.path.join( model_folder, 'model_{0:05d}.ckpt'.format(epoch_number))) # Save TensorBoard logs summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stop valid_f1_score = results['epoch'][epoch_number][0][ 'valid']['f1_score']['micro'] #valid_precision = results['epoch'][epoch_number][0]['valid']['precision']['micro'] #valid_recall = results['epoch'][epoch_number][0]['valid']['recall']['micro'] #valid_fscores.append(valid_f1_score) if valid_f1_score > previous_best_valid_f1_score: bad_counter = 0 previous_best_valid_f1_score = valid_f1_score #previous_best_valid_precision = valid_precision #previous_best_valid_recall = valid_recall else: bad_counter += 1 print( "The last {0} epochs have not shown improvements on the validation set." .format(bad_counter)) if bad_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True break if epoch_number >= parameters[ 'maximum_number_of_epochs']: break except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') # remove the experiment remove_experiment = input( "Do you want to remove the experiment? (yes/y/Yes)") if remove_experiment in ["Yes", "yes", "y"]: shutil.rmtree(stats_graph_folder) print("Folder removed") else: print('Finishing the experiment') end_time = time.time() results['execution_details'][ 'train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) except Exception: logging.exception("") remove_experiment = input( "Do you want to remove the experiment? (yes/y/Yes)") if remove_experiment in ["Yes", "yes", "y"]: shutil.rmtree(stats_graph_folder) print("Folder removed") sess.close() # release the session's resources if 'cross_validation' in parameters and parameters[ 'cross_validation'] > 1: valid_fscores.append(previous_best_valid_f1_score) #valid_precisions.append(previous_best_valid_precision) #valid_recalls.append(previous_best_valid_recall) if 'cross_validation' in parameters and parameters['cross_validation'] > 1: print("mean f1score:", np.mean(valid_fscores)) #print("mean precision:", np.mean(valid_precisions)) #print("mean recall:", np.mean(valid_recalls)) with codecs.open(os.path.join(stats_graph_folder, "result_cv.txt"), "w") as file: file.write("F1score " + ", ".join(map(str, valid_fscores))) # file.write("Precision " + valid_precisions) # file.write("Recall " + valid_recalls) file.write("Mean F1score " + str(np.mean(valid_fscores)))
def main(argv=sys.argv): ''' NeuroNER main method Args: parameters_filepath the path to the parameters file output_folder the path to the output folder ''' arguments = parse_arguments(argv[1:]) parameters, conf_parameters = load_parameters( arguments['parameters_filepath'], arguments=arguments) dataset_filepaths, dataset_brat_folders = get_valid_dataset_filepaths( parameters) check_parameter_compatiblity(parameters, dataset_filepaths) # Load dataset dataset = ds.Dataset(verbose=parameters['verbose'], debug=parameters['debug']) dataset.load_dataset(dataset_filepaths, parameters) # Create graph and session with tf.device('/gpu:0'): with tf.Graph().as_default(): session_conf = tf.ConfigProto( intra_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], inter_op_parallelism_threads=parameters[ 'number_of_cpu_threads'], device_count={ 'CPU': 1, 'GPU': parameters['number_of_gpus'] }, allow_soft_placement=True, # automatically choose an existing and supported device to run the operations in case the specified one doesn't exist log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): start_time = time.time() experiment_timestamp = utils.get_current_time_in_miliseconds() results = {} results['epoch'] = {} results['execution_details'] = {} results['execution_details']['train_start'] = start_time results['execution_details'][ 'time_stamp'] = experiment_timestamp results['execution_details']['early_stop'] = False results['execution_details']['keyboard_interrupt'] = False results['execution_details']['num_epochs'] = 0 results['model_options'] = copy.copy(parameters) dataset_name = utils.get_basename_without_extension( parameters['dataset_text_folder']) model_name = dataset_name utils.create_folder_if_not_exists(parameters['output_folder']) stats_graph_folder = os.path.join( parameters['output_folder'], model_name) # Folder where to save graphs final_weights_folder = os.path.join( parameters['output_folder'], 'weights') utils.create_folder_if_not_exists(stats_graph_folder) utils.create_folder_if_not_exists(final_weights_folder) model_folder = os.path.join(stats_graph_folder, 'model') utils.create_folder_if_not_exists(model_folder) # saving the parameter setting to the output model dir. For later resuming training with open(os.path.join(model_folder, 'parameters.ini'), 'w') as parameters_file: conf_parameters.write(parameters_file) tensorboard_log_folder = os.path.join(stats_graph_folder, 'tensorboard_logs') utils.create_folder_if_not_exists(tensorboard_log_folder) tensorboard_log_folders = {} for dataset_type in dataset_filepaths.keys(): tensorboard_log_folders[dataset_type] = os.path.join( stats_graph_folder, 'tensorboard_logs', dataset_type) utils.create_folder_if_not_exists( tensorboard_log_folders[dataset_type]) pickle.dump( dataset, open(os.path.join(model_folder, 'dataset.pickle'), 'wb')) # Instantiate the model # graph initialization should be before FileWriter, otherwise the graph will not appear in TensorBoard model = EntityLSTM(dataset, parameters) # Instantiate the writers for TensorBoard writers = {} for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter( tensorboard_log_folders[dataset_type], graph=sess.graph) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embedding_writer = tf.summary.FileWriter(model_folder) embeddings_projector_config = projector.ProjectorConfig() tensorboard_token_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_token_embeddings.tensor_name = model.token_embedding_weights.name token_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_tokens.tsv') tensorboard_token_embeddings.metadata_path = os.path.relpath( token_list_file_path, '..') tensorboard_character_embeddings = embeddings_projector_config.embeddings.add( ) tensorboard_character_embeddings.tensor_name = model.character_embedding_weights.name character_list_file_path = os.path.join( model_folder, 'tensorboard_metadata_characters.tsv') tensorboard_character_embeddings.metadata_path = os.path.relpath( character_list_file_path, '..') projector.visualize_embeddings(embedding_writer, embeddings_projector_config) # Write metadata for TensorBoard embeddings token_list_file = codecs.open(token_list_file_path, 'w', 'latin-1') for token_index in range(dataset.vocabulary_size): token_list_file.write('{0}\n'.format( dataset.index_to_token[token_index])) token_list_file.close() character_list_file = codecs.open(character_list_file_path, 'w', 'latin-1') for character_index in range(dataset.alphabet_size): if character_index == dataset.PADDING_CHARACTER_INDEX: character_list_file.write('PADDING\n') else: character_list_file.write('{0}\n'.format( dataset.index_to_character[character_index])) character_list_file.close() # Initialize the model sess.run(tf.global_variables_initializer()) if not parameters['use_pretrained_model']: model.load_pretrained_token_embeddings( sess, dataset, parameters) # Start training + evaluation loop. Each iteration corresponds to 1 epoch. patience_counter = 0 f1_score_best = 0 f1_scores = {'train-F1': [], 'valid-F1': [], 'test-F1': []} f1_scores_conll = { 'train-F1': [], 'valid-F1': [], 'test-F1': [] } transition_params_trained = np.random.rand( len(dataset.unique_labels) + 2, len(dataset.unique_labels) + 2) model_saver = tf.train.Saver( max_to_keep=parameters['num_of_model_to_keep']) epoch_number = -1 try: while True: step = 0 epoch_number += 1 print('\nStarting epoch {0}'.format(epoch_number)) epoch_start_time = time.time() # use pre-trained model and epoch_number = 0 if parameters[ 'use_pretrained_model'] and epoch_number == 0: if parameters['use_adapter']: parameters['use_adapter'] = False transition_params_trained = train.restore_pretrained_model( parameters, dataset, sess, model, model_saver) print( 'Getting the 3-label predictions from the step1 model.' ) all_pred_labels, y_pred_for_adapter, y_true_for_adapter, \ output_filepaths = train.predict_labels(sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths, for_adapter=True) # use the label2idx mapping (for adapter) in the dataset to transform all_pred_labels all_pred_indices = {} for dataset_type in dataset_filepaths.keys(): all_pred_indices[dataset_type] = [] for i in range( len(all_pred_labels[dataset_type]) ): indices = [ dataset. label_adapter_to_index[label] for label in all_pred_labels[dataset_type][i] ] all_pred_indices[dataset_type].append( indices) # and use binarizer to transform to ndarray label_binarizer_adapter = sklearn.preprocessing.LabelBinarizer( ) label_binarizer_adapter.fit( range( max(dataset.index_to_label_adapter. keys()) + 1)) predicted_label_adapter_vector_indices = {} for dataset_type in dataset_filepaths.keys(): predicted_label_adapter_vector_indices[ dataset_type] = [] for label_indices_sequence in all_pred_indices[ dataset_type]: predicted_label_adapter_vector_indices[ dataset_type].append( label_binarizer_adapter. transform( label_indices_sequence)) parameters['use_adapter'] = True if parameters['train_model'] and parameters[ 'add_class']: transition_params_trained, model, glo_step = \ train.restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver) init_new_vars_op = tf.initialize_variables( [glo_step]) sess.run(init_new_vars_op) else: transition_params_trained = \ train.restore_pretrained_model(parameters, dataset, sess, model, model_saver) for dataset_type in dataset_filepaths.keys(): writers[dataset_type] = tf.summary.FileWriter( tensorboard_log_folders[dataset_type], graph=sess.graph) # embedding_writer has to write in model_folder, otherwise TensorBoard won't be able to view embeddings embedding_writer = tf.summary.FileWriter( model_folder) # epoch_number != 0, no matter use or not use pre-trained model elif epoch_number != 0: # Train model: loop over all sequences of training set with shuffling sequence_numbers = list( range(len(dataset.token_indices['train']))) random.shuffle(sequence_numbers) for sequence_number in sequence_numbers: transition_params_trained, W_before_crf = train.train_step( sess, dataset, sequence_number, model, transition_params_trained, parameters) step += 1 epoch_elapsed_training_time = time.time( ) - epoch_start_time print('Training completed in {0:.2f} seconds'.format( epoch_elapsed_training_time), flush=False) if parameters[ 'use_adapter']: # model evaluation, using adapter # pass the pred_for_adapter as label_indices vector original_label_adapter_vector_indices = dataset.label_adapter_vector_indices dataset.label_adapter_vector_indices = predicted_label_adapter_vector_indices y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) dataset.label_adapter_vector_indices = original_label_adapter_vector_indices else: # model evaluation, not using adapter y_pred, y_true, output_filepaths = train.predict_labels( sess, model, transition_params_trained, parameters, dataset, epoch_number, stats_graph_folder, dataset_filepaths) # Evaluate model: save and plot results evaluate.evaluate_model(results, dataset, y_pred, y_true, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters) summary = sess.run(model.summary_op, feed_dict=None) writers['train'].add_summary(summary, epoch_number) writers['train'].flush() utils.copytree(writers['train'].get_logdir(), model_folder) # Early stopping train_f1_score = results['epoch'][epoch_number][0][ 'train']['f1_score']['weighted'] valid_f1_score = results['epoch'][epoch_number][0][ 'valid']['f1_score']['weighted'] test_f1_score = results['epoch'][epoch_number][0][ 'test']['f1_score']['weighted'] f1_scores['train-F1'].append(train_f1_score) f1_scores['valid-F1'].append(valid_f1_score) f1_scores['test-F1'].append(test_f1_score) train_f1_score_conll = results['epoch'][epoch_number][ 0]['train']['f1_conll']['micro'] valid_f1_score_conll = results['epoch'][epoch_number][ 0]['valid']['f1_conll']['micro'] test_f1_score_conll = results['epoch'][epoch_number][ 0]['test']['f1_conll']['micro'] f1_scores_conll['train-F1'].append( train_f1_score_conll) f1_scores_conll['valid-F1'].append( valid_f1_score_conll) f1_scores_conll['test-F1'].append(test_f1_score_conll) if valid_f1_score > f1_score_best: patience_counter = 0 f1_score_best = valid_f1_score # Save the best model model_saver.save( sess, os.path.join(model_folder, 'best_model.ckpt')) print( 'updated model to current epoch : epoch {:d}'. format(epoch_number)) print('the model is saved in: {:s}'.format( model_folder)) else: patience_counter += 1 print("In epoch {:d}, the valid F1 is : {:f}".format( epoch_number, valid_f1_score)) print( "The last {0} epochs have not shown improvements on the validation set." .format(patience_counter)) if patience_counter >= parameters['patience']: print('Early Stop!') results['execution_details']['early_stop'] = True # save last model model_saver.save( sess, os.path.join(model_folder, 'last_model.ckpt')) print('the last model is saved in: {:s}'.format( model_folder)) break if epoch_number >= parameters[ 'maximum_number_of_epochs'] and not parameters[ 'refine_with_crf']: break if not parameters['use_pretrained_model']: plot_name = 'F1-summary-step1.svg' else: plot_name = 'F1-summary-step2.svg' print('Sklearn result:') for k, l in f1_scores.items(): print(k, l) print('Conll result:') for k, l in f1_scores_conll.items(): print(k, l) utils_plots.plot_f1( f1_scores, os.path.join(stats_graph_folder, '..', plot_name), 'F1 score summary') # TODO: in step 1, for task a, add the best deploy data to step 2 train set, and call script print('(sklearn micro) test F1:') micro_f1 = ','.join([ str(results['epoch'][ep][0]['test']['f1_score'] ['micro']) for ep in range(epoch_number + 1) ]) print(micro_f1) print('(sklearn macro) test F1:') macro_f1 = ','.join([ str(results['epoch'][ep][0]['test']['f1_score'] ['macro']) for ep in range(epoch_number + 1) ]) print(macro_f1) except KeyboardInterrupt: results['execution_details']['keyboard_interrupt'] = True print('Training interrupted') print('Finishing the experiment') end_time = time.time() results['execution_details'][ 'train_duration'] = end_time - start_time results['execution_details']['train_end'] = end_time evaluate.save_results(results, stats_graph_folder) for dataset_type in dataset_filepaths.keys(): writers[dataset_type].close() sess.close() # release the session's resources