def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--input-dir', required=True) parser.add_argument('--directory-to-write', required=True) parser.add_argument('--model-dir', required=True) parser.add_argument('--log-level', default='debug') parser.add_argument('--fresh-start', action="store_true") parser.add_argument('--num-of-process', default=cpu_count() - 1, type=int) parser.add_argument('--check-pool-every', default=150, type=int, help="It checks multiprocessing.Pool is in stuck " "in every n seconds") args = parser.parse_args() input_dir = args.input_dir output_dir = args.directory_to_write model_dir = args.model_dir utils.configure_logger(args.log_level) logger = utils.get_logger() logger.debug("Args: {}".format(args)) predict(model_dir, input_dir, output_dir, num_of_process=args.num_of_process, fresh_start=args.fresh_start, check_pool_every=args.check_pool_every) logger.info('Done')
def main(args): # given program arguments, generate a config file config = cfg.generate_config(args) # if given a best state then we load it's config if args.state: logging.info('loading config from {}'.format(args.state)) best_state = torch.load(args.state) config = best_state['config'] # create a checkpoint directory model_dir = utl.generate_experiment_dir(args.model_dir, config, prefix_str='S3DIS-hilbert') # configure logger utl.configure_logger(model_dir, args.loglevel.upper()) # get Tensorboard writer object writer = utl.get_tensorboard_writer(log_dir=model_dir) train(config=config, model_dir=model_dir, writer=writer) # close Tensorboard writer writer.close()
def configure_logging(): """ Configure the loggers for Talos. Sets up the Talos loggers and discord.py loggers separately, so they can be easily configured independently. """ fh = logging.FileHandler(utils.log_folder / "dtalos.log") dfh = logging.FileHandler(utils.log_folder / "dpy.log") sh = logging.StreamHandler(sys.stderr) gh = None try: import google.cloud.logging as glog client = glog.Client() gh = client.get_default_handler() gh.name = "dtalos" gh.setLevel(logging.WARNING) except (ImportError, OSError): pass ff = logging.Formatter("%(levelname)s:%(name)s:%(message)s") dlog = logging.getLogger("discord") utils.configure_logger(log, handlers=[fh, sh, gh], formatter=ff, level=logging.INFO, propagate=False) utils.configure_logger(dlog, handlers=[dfh, sh], formatter=ff, level=logging.INFO, propagate=False)
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--filename', required=False, default='wikipages.txt') parser.add_argument('--no-fetching-links', required=False, default=False, action="store_true") parser.add_argument('--num-process', help="Number of process for parallel processing", required=False, default=1, type=int) parser.add_argument('--log-level', required=False, default="info") args = parser.parse_args() configure_logger(args.log_level) logger = utils.get_logger() fetching_links = not args.no_fetching_links logger.info("Input file: {}".format(args)) directory = '../datasets/wiki-new/' try: os.mkdir(directory) except OSError: logger.debug("{} is already exist".format(directory)) extract_from_file(args.filename, args.num_process, directory, fetching_links)
def main(fileName, top_directory, maxmimum, minSilenceLength, silence_threshold, samplerate, gain): # Create log directory and configure logging duration = 0 work_on_single_file = fileName is not None if work_on_single_file and not os.path.exists(fileName): print('Could not find the audio-file {}.'.format(fileName)) usage() if maximum < 10 or maximum > 23: print('Audio duration should be between 5 and 20 seconds!') usage() if minSilenceLength > 0 and (minSilenceLength > 1000 or minSilenceLength < 150): print( 'Minimum silence length value should be between 150msec and 1000msec!' ) usage() if samplerate > 0 and (samplerate > 44100 or samplerate < 16000): print('Sample rate should be between 16000 and 44100') usage() logPath = os.path.join(os.getcwd(), 'log') if not os.path.exists(logPath): os.makedirs(logPath) configure_logger(logFileName=os.path.join(logPath, 'vlog.log')) # Check if the file exists in the data directory if work_on_single_file: duration = segmentAudioFile(fileName, maximum=maximum, minSilenceLength=minSilenceLength, silence_threshold=silence_threshold, samplerate=samplerate, gain=gain) logger.info('-' * 80) logger.info('TOTAL DURATION of the FILES is {} '.format(duration)) else: fileList = getfiles(top_directory) # Create the directory where the file segments to be written for fileName in filter( lambda fileName: fileName.endswith('.wav') or fileName. endswith('.mp3'), fileList): filePath = os.path.join(top_directory, fileName) temp_duration = segmentAudioFile( filePath, maximum=maximum, minSilenceLength=minSilenceLength, silence_threshold=silence_threshold, samplerate=samplerate, gain=gain) if temp_duration is not None: duration += temp_duration else: break logger.info('-' * 80) logger.info('TOTAL DURATION of the FILES is \'hh:mm:ss\' {} '.format( convertMilliseconsTime(duration)))
def run(): utils.configure_logger('debug') logger = utils.get_logger() input_directory = sys.argv[1] # '../datasets/wiki-filtered' out_directory = sys.argv[2] # '../datasets/wiki-senses' files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_sense_dataset(files, out_directory) logger.info('done')
def run(): utils.configure_logger('debug') logger = utils.get_logger() input_directory = sys.argv[1] out_directory = sys.argv[2] num_of_fold = int(sys.argv[3]) files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_IMS_formatted_dataset(files, out_directory, k=num_of_fold, num_of_process=30) logger.info('done')
def run(): utils.configure_logger('debug') logger = utils.get_logger() input_directory = sys.argv[1] out_directory = sys.argv[2] num_of_fold = int(sys.argv[3]) num_of_processor = int(sys.argv[4]) files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_IMS_formatted_dataset(files, out_directory, k=num_of_fold, num_of_process=num_of_processor) logger.info('done')
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--input-file', required=True) parser.add_argument('--model-dir', required=True) parser.add_argument('--write-every-n-line', default=200000, type=int) parser.add_argument('--directory-to-write', default='/tmp/mt-data') parser.add_argument('--log-level', default='debug') args = parser.parse_args() utils.configure_logger(args.log_level) logger = utils.get_logger() logger.debug("Args: {}".format(args)) preprocess_mt_input_file(args.input_file, args.model_dir, args.directory_to_write, args.write_every_n_line) logger.info('Done')
def process(statsfile, k, optfile=None): stats = utils.load_pickle(statsfile) track_ar = average_rank_per_track(stats) clique_ar = average_rank_per_clique(stats) ma_p = mean_average_precision(stats) #k_p = average_precision(stats, k, ver=True) k_p = average_precision_at_k(stats, k) # Set up logger logger = utils.configure_logger() # print results logger.info("Number of queries: %d" % len(stats)) logger.info("Average Rank per Track: %.3f" % track_ar) logger.info("Average Rank per Clique: %.3f" % clique_ar) logger.info("Mean Average Precision: %.2f %%" % (ma_p * 100)) logger.info("Precision at %d: %.2f %%" % (k, k_p * 100)) if optfile is not None: stats2 = utils.load_pickle(optfile) #plot_rank_histograms(stats, stats2, test=False) plot_precision_at_k_histograms(stats, stats2, K=[1, 3, 5, 10], test=False) else: plot_rank_histogram(stats)
def exceute(self): current_bet = self.initial_bet current_money = self.initial_money betting_history = [] for index, odd in enumerate(self.odds): if not self._betting_condition(self.odds_ratio[index]): continue current_money = self._bet(current_money, current_bet) if current_money <= 0: return betting_history if self.result[index] == 'win': current_money += (current_bet * float(odd)) current_bet = self._cal_bet_after_winning(current_bet) else: current_bet = self._cal_bet_after_lossing() if logging_settings.get('enable_single_run_logging', False): logger = configure_logger("betting", "DoubleBettingAfterWinning.csv") logger.info("Index:{}, current_money:{}, current_bet:{}, odds:{}".format(index, current_money,current_bet, odd)) betting_history.append({'index':index, 'current_money':current_money, 'current_bet':current_bet}) return betting_history
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--input-file', required=True) parser.add_argument('--wsd-output-dir', required=True) parser.add_argument('--directory-to-write', required=True) parser.add_argument('--log-level', default='debug') args = parser.parse_args() utils.configure_logger(args.log_level) logger = utils.get_logger() logger.debug("Args: {}".format(args)) merger = IMSOutputMerger() merger.merge(args.input_file, args.wsd_output_dir, args.directory_to_write) logger.info('Merge Done.')
def __init__(self, dnsq, fut, clientip, logger=None): self.transport = None self.dnsq = dnsq self.fut = fut self.clientip = clientip if logger is None: logger = utils.configure_logger("DNSClientProtocol", "DEBUG") self.logger = logger
def run(): sense_vocab = build_vocab('../datasets/senses.train.txt', num_already_allocated_tokens=0) word_vocab = build_vocab('../datasets/sentences.train.txt') configure_logger() logger = get_logger() logger.info("{} {}".format(word_vocab.size, sense_vocab.size)) train_iter = read_data(word_vocab, sense_vocab, data_path='../datasets/') disambiguator = NeuralDisambiguator(hidden_unit_size=25, learning_rate=0.001, num_senses=sense_vocab.size, vocab_size=word_vocab.size, embedding_length=50) disambiguator.fit(train_iter, max_steps=2000)
def __init__(self, upstream_resolver, upstream_port, logger=None): self.loop = asyncio.get_event_loop() self.upstream_resolver = upstream_resolver self.upstream_port = upstream_port if logger is None: logger = utils.configure_logger("DNSClient", "DEBUG") self.logger = logger self.transport = None
def run(no_journeys, map_configs): overall_start_time, runstr = get_start_time() logger = configure_logger(runstr) journey_files, no_journeys, attempting_all = get_journey_files(no_journeys) base_layers = read_in_convert_base_maps(map_configs) clear_out_old_folders_and_make_new(map_configs) maps_dict = plot_base_map_layers(base_layers, map_configs) ( start_time, journey_plots, counters, text_vars, ) = set_up_plot_lists_and_counters(journey_files) timestr, timestr_moving_recents, text_vars = make_first_frames( counters, journey_files, text_vars, maps_dict, map_configs) counters = make_all_other_frames( journey_files, attempting_all, no_journeys, start_time, maps_dict, runstr, text_vars, timestr, journey_plots, counters, map_configs, ) make_final_by_year_image(runstr, counters, maps_dict, map_configs) additional_frames_journeys_fading_out(journey_files, maps_dict, journey_plots, counters, map_configs) make_all_videos(runstr, counters, map_configs) clear_out_images_for_video_folder(map_configs) overall_finish_time = datetime.datetime.now() overall_run_notes( runstr, attempting_all, no_journeys, overall_start_time, overall_finish_time, counters, map_configs, )
def run(): configure_logger() logger = get_logger() dataset = DataSet('../datasets/wiki-new') FLAGS = { "embedding_length": 10, "min_counts": 10, "batch_size": 16, "hidden_unit_size": 10, "learning_rate": .001 } disambiguator = NeuralDisambiguator(dataset, FLAGS, use_pretrained_embeddings=False) disambiguator.fit(max_steps=2000)
def main(args): # given program arguments, generate a config file dataset = S3DIS(args) # create a checkpoint directory model_dir = dataset.experiment_dir # configure logger utl.configure_logger(model_dir, args.loglevel.upper()) # get Tensorboard writer object writer = utl.get_tensorboard_writer(log_dir=model_dir) dataset.config.dump_to_tensorboard(writer=writer) train(dataset=dataset, model_dir=model_dir, writer=writer) # close Tensorboard writer writer.close()
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--input-dir', required=True) parser.add_argument('--directory-to-write', default='/tmp/ims-mt-data') parser.add_argument('--num-of-process', default=1, type=int) parser.add_argument('--log-level', default='debug') args = parser.parse_args() input_directory = args.input_dir out_directory = args.directory_to_write utils.configure_logger(args.log_level) logger = utils.get_logger() logger.debug("Args: {}".format(args)) files = os.listdir(input_directory) files = [os.path.join(input_directory, f) for f in files] logger.info('total number of files: %d' % len(files)) create_IMS_formatted_dataset(files, out_directory, args.num_of_process) logger.info('Done')
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--filename', required=False, default='wikipages.txt') parser.add_argument('--num-process', help="Number of process for parallel processing", required=False, default=1, type=int) parser.add_argument('--log-level', required=False, default="info") args = parser.parse_args() configure_logger(args.log_level) logger = utils.get_logger() logger.info("Input file: {}".format(args)) directory = '../datasets/wiki/' try: os.mkdir(directory) except OSError: logger.debug("{} is already exist".format(directory)) extract_from_file(args.filename, args.num_process)
def run(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--uwsd-dataset', required=False, default='../datasets/wiki') parser.add_argument( '--category-file', required=False, default='../datasets/wikipedia-miner/en_20090306/categorylink.csv') parser.add_argument( '--generality-file', required=False, default='../datasets/wikipedia-miner/en_20090306/generality.csv') parser.add_argument( '--pageid-title-file', required=False, default='../datasets/wikipedia-miner/en_20090306/page.csv') parser.add_argument('--num-process', help="Number of process for parallel processing", required=False, default=1, type=int) parser.add_argument('--log-level', required=False, default="info") args = parser.parse_args() configure_logger(args.log_level) logger = utils.get_logger() logger.info("Running.") files = sorted(glob.glob(os.path.abspath(args.uwsd_dataset) + "/*.tw.txt")) create_page_id_link_mapping_file(files, args.uwsd_dataset) # files = sorted(glob.glob(os.path.abspath(args.uwsd_dataset) + "/*.pageid.txt")) # get_categories_for_senses(files, args.category_file, args.pageid_title_file, args.generality_file) logger.info("Done")
def get_app(args): logger = utils.configure_logger("doh-httpproxy", args.level) app = DOHApplication(logger=logger, debug=args.debug) app.set_upstream_resolver(args.upstream_resolver, args.upstream_port) app.set_ecs(args.ecs) app.set_socket(args.socket) app.router.add_get(args.uri, doh1handler) app.router.add_post(args.uri, doh1handler) # Get trusted reverse proxies and format it for aiohttp_remotes setup #if len(args.trusted) == 0: # x_forwarded_handling = aiohttp_remotes.XForwardedRelaxed() # forwarded_handling = aiohttp_remotes.ForwardedRelaxed() #else: # x_forwarded_handling = aiohttp_remotes.XForwardedStrict([args.trusted]) # forwarded_handling = aiohttp_remotes.ForwardedStrict([args.trusted]) x_forwarded_handling = aiohttp_remotes.XForwardedRelaxed() forwarded_handling = aiohttp_remotes.ForwardedRelaxed() asyncio.ensure_future( aiohttp_remotes.setup(app, forwarded_handling, x_forwarded_handling)) return app
def process(statsfile, k, optfile=None): stats = utils.load_pickle(statsfile) track_ar = average_rank_per_track(stats) clique_ar = average_rank_per_clique(stats) ma_p = mean_average_precision(stats) #k_p = average_precision(stats, k, ver=True) k_p = average_precision_at_k(stats, k) # Set up logger logger = utils.configure_logger() # print results logger.info("Number of queries: %d" % len(stats)) logger.info("Average Rank per Track: %.3f" % track_ar) logger.info("Average Rank per Clique: %.3f" % clique_ar) logger.info("Mean Average Precision: %.2f %%" % (ma_p * 100)) logger.info("Precision at %d: %.2f %%" % (k, k_p * 100)) if optfile is not None: stats2 = utils.load_pickle(optfile) #plot_rank_histograms(stats, stats2, test=False) plot_precision_at_k_histograms(stats, stats2, K=[1,3,5,10], test=False) else: plot_rank_histogram(stats)
def start_algorithm(initial_state_file=None, config_file=None, fixed_rounds=None, start_clean=True): FUNCTION = 'main' ''' Main function ''' output_dir = "./output/" output_dir_log = output_dir + "ALGO_TRADING_LOG_{}.txt".format( utils.date_now_filename()) output_dir_log_json = output_dir + "ALGO_TRADINGJSON_LOG_{}.txt".format( utils.date_now_filename()) output_dir_plots = output_dir + "plots" output_dir_status = output_dir + "ALGO_STATUS_LOG_{}.txt".format( utils.date_now_filename()) output_dir_archive = output_dir + "ALGO_ARCHIVE_LOG_{}.txt".format( utils.date_now_filename()) output_dir_plotdata = output_dir + "ALGO_PLOTDATA_LOG_{}.txt".format( utils.date_now_filename()) output_dir_overview = output_dir + "ALGO_OVERVIEW_LOG_{}.txt".format( utils.date_now_filename()) ending_state_path = output_dir + "ALGO_ENDING_STATE_{}.txt".format( utils.date_now_filename()) overview_plotdata_path = output_dir + "ALGO_OVERVIEWPLOTDATA_LOG_{}.txt".format( utils.date_now_filename()) # Clean output directory utils.clean_output(output_dir, output_dir_plots) # Get config params if not config_file: config_file = args.config_file config_params = utils.read_config(config_file) # Configure logging logger = utils.configure_logger("default", output_dir_log, config_params["logging"]) logger.info("Starting algorithm", extra={'function': FUNCTION}) # Initialize stocks object with configuration values if not initial_state_file: initial_state_file = args.initial_state_file logger.debug("Reading initial values from config file: {}...".format( initial_state_file), extra={'function': FUNCTION}) init_val = utils.read_json_data(initial_state_file, logger=logger) stocks = Stocks(balance=init_val["balance"], bought_stocks=init_val["bought_stocks"], monitored_stocks=init_val["monitored_stocks"], current_status=init_val["current_status"], monitored_stock_data=init_val["monitored_stock_data"], archive=init_val["archive"], interesting_stocks=init_val["interesting_stocks"], not_interesting_stocks=init_val["not_interesting_stocks"], yahoo_calls=init_val["yahoo_calls"], results=init_val["results"]) # Initialize status files update_state(stocks, logger, output_dir_log, output_dir_overview, output_dir_status, output_dir_archive, output_dir_plotdata, output_dir_log_json, ending_state_path, overview_plotdata_path) # Check which stocks to monitor if start_clean: logger.info("Getting and initializing list of stocks to monitor...", extra={'function': FUNCTION}) stocks.initialize_stocks(date=datetime.now(), logger=logger, config_params=config_params, update_nasdaq_file=False) # Set initial values stock_market_open = True archive_session = False counter = 0 while stock_market_open: # Update config params if not config_file: config_file = args.config_file config_params = utils.read_config(config_file) # Read and save whether the user has ordered manually to sell a certain stock, and if true, sell it logger.info( "Checking whether user has ordered to buy or sell stocks...", extra={'function': FUNCTION}) commands_log = utils.get_latest_log("COMMANDS", logger=logger) commands = { 'commands': [], 'tickers_to_sell': [], 'tickers_to_stop_monitor': [] } if commands_log: commands = utils.read_commands(commands_log, logger=logger) stocks.hard_sell_check(commands, commands_log, config_params, logger) stocks.check_to_stop_monitor_stocks(commands, commands_log, config_params, logger) # Loop through monitored stocks logger.info("Checking monitored stocks...", extra={'function': FUNCTION}) for stock in stocks.monitored_stocks: stocks.check_monitored_stock(stock, config_params=config_params, logger=logger) # Check if we should monitor more stocks if config_params['main']['check_for_new_stocks']: logger.info("Checking if we should monitor more stocks...", extra={'function': FUNCTION}) stocks.check_to_monitor_new_stocks(datetime.now(), config_params, logger) # Plot data per monitored stock if config_params['main']['plot_data']: logger.info("Plotting monitored stock data...", extra={'function': FUNCTION}) stocks.plot_monitored_stock_data(output_dir_plots, logger=logger) # Check to terminate algorithm if fixed_rounds: counter += 1 if counter >= fixed_rounds: logger.info( "Terminating algorithm because of configured fixed rounds", extra={'function': FUNCTION}) archive_session = True break elif config_params['main'][ 'sell_all_before_finish'] and utils.before_close(): logger.info( "Terminating algorithm and selling all owned stocks because it was configured by the user", extra={'function': FUNCTION}) archive_session = True stocks.hard_sell_check({"tickers_to_sell": ["ALLSTOCKS"]}, commands_log, config_params, logger) break elif "STOPALGORITHM" in commands['commands']: logger.info( "Terminating algorithm because it was instructed by the user", extra={'function': FUNCTION}) archive_session = True commands['commands'].remove("STOPALGORITHM") utils.write_json(commands, commands_log, logger=logger) if config_params['main']['sell_all_before_finish']: stocks.hard_sell_check({"tickers_to_sell": ["ALLSTOCKS"]}, commands_log, config_params, logger) break else: scraper = YahooScraper() if scraper.all_markets_closed( stocks.monitored_stocks, config_params, logger ) and not config_params['main']['ignore_market_hours']: logger.info( "Terminating algorithm because all relevant markets are closed", extra={'function': FUNCTION}) break # Update state update_state(stocks, logger, output_dir_log, output_dir_overview, output_dir_status, output_dir_archive, output_dir_plotdata, output_dir_log_json, ending_state_path, overview_plotdata_path) # Sleep seconds_to_sleep = config_params['main']['seconds_to_sleep'] logger.info("Sleeping {} seconds".format(seconds_to_sleep), extra={'function': FUNCTION}) time.sleep(seconds_to_sleep) # Perform final operations before terminating stocks.current_status = utils.close_markets(stocks.current_status) update_state(stocks, logger, output_dir_log, output_dir_overview, output_dir_status, output_dir_archive, output_dir_plotdata, output_dir_log_json, ending_state_path, overview_plotdata_path) if archive_session: transactions_file = utils.get_latest_log("ARCHIVE", logger=logger) status_file = utils.get_latest_log("STATUS", logger=logger) overview_file = utils.get_latest_log("OVERVIEW", logger=logger) utils.archive_session([transactions_file, status_file, overview_file], logger=logger) stocks.archive = [] update_state(stocks, logger, output_dir_log, output_dir_overview, output_dir_status, output_dir_archive, output_dir_plotdata, output_dir_log_json, ending_state_path, overview_plotdata_path) return True
# local stuff import hdf5_getters as GETTERS import dan_tools import time import utils import scipy.cluster.vq as vq import pylab as plt from transforms import load_transform import analyze_stats as anst # params, for ICMR paper: 75 and 1.96 WIN = 75 PATCH_LEN = WIN*12 # Set up logger logger = utils.configure_logger() # Global models lda = None pca = None def compute_codes_orig_it(track_ids, maindir, clique_ids, start_idx, end_idx): """Computes the original features, based on Thierry and Ellis, 2012. Dimensionality reduction using PCA of 50, 100, and 200 components.""" res = [] trainedpca = utils.load_pickle("models/pca_250Kexamples_900dim_nocovers.pkl") pca_components = [50,100,200] # Init codes codes = [] for n_comp in pca_components:
sleep(60) return run(funcname, args=args, kwargs=kwargs, retries=retries + 1) else: logger.error("Server refused") return False except EOFError: logger.error('Internal server error') return False except Exception: print_exc_plus() if __name__ == '__main__': import argparse from utils import configure_logger configure_logger(logger) def print_log(debug=False): if debug: os.system(f'tail -n 43 -f \"{MAIN_LOGFILE}\"') else: os.system(f'tail -n 43 -f \"{CONSOLE_LOGFILE}\"') try: actions = { 'info': 'show buildbot info', 'update': '[--overwrite] update pushed files to the repo', 'clean': '[dir / all] checkout pkgbuilds in packages', 'rebuild': '[dir1 dir2 --clean] rebuild packages', 'log': '[--debug] print log', 'upload': '[dir1 dir2 --overwrite] force upload packages',
import time # local stuff import pca import hdf5_getters as GETTERS import dan_tools import utils from transforms import load_transform # Thierry's original parameters for ISMIR paper WIN = 75 PWR = 1.96 PATCH_LEN = WIN * 12 # Set up logger logger = utils.configure_logger() def extract_feats(filename, td=None, lda_file=None, lda_n=0, ver=True): """Computes the features using the dictionary transformation td. If it doesn't exist, computes them using Thierry's method. The improved pipeline is composed of 11 steps: 1.- Beat Synchronous Chroma 2.- L2-Norm 3.- Shingle (PATCH_LEN: 75 x 12) 4.- 2D-FFT 5.- L2-Norm 6.- Log-Scale 7.- Sparse Coding
import os import pathlib import sys import requests from google.api_core import exceptions from google.cloud import datastore from google.cloud import vision from PIL import Image, ImageDraw import utils from flickr_to_datastore import write_entities_to_datastore ### LOGGING #################################################################### logger = logging.getLogger(__name__) utils.configure_logger(logger, console_output=True) ################################################################################ def pull_unclassified_entities(ds_client): # TODO: Update docstring """Retrieves entities from datastore that have no value for vision_labels. Args: ds_client (google.cloud.datastore.client.Client) Returns: list of google.cloud.datastore.entity.Entity of kind 'Photo' """ query = ds_client.query(kind="Photo") query.add_filter("is_classified", "=", False)
""" # Configure a connection to the database at the URL specified by the # DATABASE_URL environment variable. # Remember that we're using `echo=True` so we can see all generated SQL. engine = sqlalchemy.create_engine(os.environ['DATABASE_URL'], echo=True) # Create a session factory. Calling `Session()` will create new SQLAlchemy # ORM sessions. Session = orm.sessionmaker(bind=engine) # Create a new session which we'll use for the following investigation. session = Session() """ with step(): configure_logger() engine = sqlalchemy.create_engine(os.environ['DATABASE_URL']) Session = orm.sessionmaker(bind=engine) session = Session() with step(): q = session.query(Zebra).filter(Zebra.when_born <= now() - 2 * YEAR) old_zebra = q.first() with step(): wh = session.query(WateringHole).first() with step(): session.rollback() # Commit instead
def main(): args = get_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) cur_timestamp = str(datetime.now())[:-3] # we also include ms to prevent the probability of name collision model_width = {'linear': '', 'cnn': args.n_filters_cnn, 'lenet': '', 'resnet18': ''}[args.model] model_str = '{}{}'.format(args.model, model_width) model_name = '{} dataset={} model={} eps={} attack={} m={} attack_init={} fgsm_alpha={} epochs={} pgd={}-{} grad_align_cos_lambda={} lr_max={} seed={}'.format( cur_timestamp, args.dataset, model_str, args.eps, args.attack, args.minibatch_replay, args.attack_init, args.fgsm_alpha, args.epochs, args.pgd_alpha_train, args.pgd_train_n_iters, args.grad_align_cos_lambda, args.lr_max, args.seed) if not os.path.exists('models'): os.makedirs('models') logger = utils.configure_logger(model_name, args.debug) logger.info(args) half_prec = args.half_prec n_cls = 2 if 'binary' in args.dataset else 10 np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) double_bp = True if args.grad_align_cos_lambda > 0 else False n_eval_every_k_iter = args.n_eval_every_k_iter args.pgd_alpha = args.eps / 4 eps, pgd_alpha, pgd_alpha_train = args.eps / 255, args.pgd_alpha / 255, args.pgd_alpha_train / 255 train_data_augm = False if args.dataset in ['mnist'] else True train_batches = data.get_loaders(args.dataset, -1, args.batch_size, train_set=True, shuffle=True, data_augm=train_data_augm) train_batches_fast = data.get_loaders(args.dataset, n_eval_every_k_iter, args.batch_size, train_set=True, shuffle=False, data_augm=False) test_batches = data.get_loaders(args.dataset, args.n_final_eval, args.batch_size_eval, train_set=False, shuffle=False, data_augm=False) test_batches_fast = data.get_loaders(args.dataset, n_eval_every_k_iter, args.batch_size_eval, train_set=False, shuffle=False, data_augm=False) model = models.get_model(args.model, n_cls, half_prec, data.shapes_dict[args.dataset], args.n_filters_cnn).cuda() model.apply(utils.initialize_weights) model.train() if args.model == 'resnet18': opt = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=0.9, weight_decay=args.weight_decay) elif args.model == 'cnn': opt = torch.optim.Adam(model.parameters(), lr=args.lr_max, weight_decay=args.weight_decay) elif args.model == 'lenet': opt = torch.optim.Adam(model.parameters(), lr=args.lr_max, weight_decay=args.weight_decay) else: raise ValueError('decide about the right optimizer for the new model') if half_prec: if double_bp: amp.register_float_function(torch, 'batch_norm') model, opt = amp.initialize(model, opt, opt_level="O1") if args.attack == 'fgsm': # needed here only for Free-AT delta = torch.zeros(args.batch_size, *data.shapes_dict[args.dataset][1:]).cuda() delta.requires_grad = True lr_schedule = utils.get_lr_schedule(args.lr_schedule, args.epochs, args.lr_max) loss_function = nn.CrossEntropyLoss() train_acc_pgd_best, best_state_dict = 0.0, copy.deepcopy(model.state_dict()) start_time = time.time() time_train, iteration, best_iteration = 0, 0, 0 for epoch in range(args.epochs + 1): train_loss, train_reg, train_acc, train_n, grad_norm_x, avg_delta_l2 = 0, 0, 0, 0, 0, 0 for i, (X, y) in enumerate(train_batches): if i % args.minibatch_replay != 0 and i > 0: # take new inputs only each `minibatch_replay` iterations X, y = X_prev, y_prev time_start_iter = time.time() # epoch=0 runs only for one iteration (to check the training stats at init) if epoch == 0 and i > 0: break X, y = X.cuda(), y.cuda() lr = lr_schedule(epoch - 1 + (i + 1) / len(train_batches)) # epoch - 1 since the 0th epoch is skipped opt.param_groups[0].update(lr=lr) if args.attack in ['pgd', 'pgd_corner']: pgd_rs = True if args.attack_init == 'random' else False n_eps_warmup_epochs = 5 n_iterations_max_eps = n_eps_warmup_epochs * data.shapes_dict[args.dataset][0] // args.batch_size eps_pgd_train = min(iteration / n_iterations_max_eps * eps, eps) if args.dataset == 'svhn' else eps delta = utils.attack_pgd_training( model, X, y, eps_pgd_train, pgd_alpha_train, opt, half_prec, args.pgd_train_n_iters, rs=pgd_rs) if args.attack == 'pgd_corner': delta = eps * utils.sign(delta) # project to the corners delta = clamp(X + delta, 0, 1) - X elif args.attack == 'fgsm': if args.minibatch_replay == 1: if args.attack_init == 'zero': delta = torch.zeros_like(X, requires_grad=True) elif args.attack_init == 'random': delta = utils.get_uniform_delta(X.shape, eps, requires_grad=True) else: raise ValueError('wrong args.attack_init') else: # if Free-AT, we just reuse the existing delta from the previous iteration delta.requires_grad = True X_adv = clamp(X + delta, 0, 1) output = model(X_adv) loss = F.cross_entropy(output, y) if half_prec: with amp.scale_loss(loss, opt) as scaled_loss: grad = torch.autograd.grad(scaled_loss, delta, create_graph=True if double_bp else False)[0] grad /= scaled_loss / loss # reverse back the scaling else: grad = torch.autograd.grad(loss, delta, create_graph=True if double_bp else False)[0] grad = grad.detach() argmax_delta = eps * utils.sign(grad) n_alpha_warmup_epochs = 5 n_iterations_max_alpha = n_alpha_warmup_epochs * data.shapes_dict[args.dataset][0] // args.batch_size fgsm_alpha = min(iteration / n_iterations_max_alpha * args.fgsm_alpha, args.fgsm_alpha) if args.dataset == 'svhn' else args.fgsm_alpha delta.data = clamp(delta.data + fgsm_alpha * argmax_delta, -eps, eps) delta.data = clamp(X + delta.data, 0, 1) - X elif args.attack == 'random_corner': delta = utils.get_uniform_delta(X.shape, eps, requires_grad=False) delta = eps * utils.sign(delta) elif args.attack == 'none': delta = torch.zeros_like(X, requires_grad=False) else: raise ValueError('wrong args.attack') # extra FP+BP to calculate the gradient to monitor it if args.attack in ['none', 'random_corner', 'pgd', 'pgd_corner']: grad = get_input_grad(model, X, y, opt, eps, half_prec, delta_init='none', backprop=args.grad_align_cos_lambda != 0.0) delta = delta.detach() output = model(X + delta) loss = loss_function(output, y) reg = torch.zeros(1).cuda()[0] # for .item() to run correctly if args.grad_align_cos_lambda != 0.0: grad2 = get_input_grad(model, X, y, opt, eps, half_prec, delta_init='random_uniform', backprop=True) grads_nnz_idx = ((grad**2).sum([1, 2, 3])**0.5 != 0) * ((grad2**2).sum([1, 2, 3])**0.5 != 0) grad1, grad2 = grad[grads_nnz_idx], grad2[grads_nnz_idx] grad1_norms, grad2_norms = l2_norm_batch(grad1), l2_norm_batch(grad2) grad1_normalized = grad1 / grad1_norms[:, None, None, None] grad2_normalized = grad2 / grad2_norms[:, None, None, None] cos = torch.sum(grad1_normalized * grad2_normalized, (1, 2, 3)) reg += args.grad_align_cos_lambda * (1.0 - cos.mean()) loss += reg if epoch != 0: opt.zero_grad() utils.backward(loss, opt, half_prec) opt.step() time_train += time.time() - time_start_iter train_loss += loss.item() * y.size(0) train_reg += reg.item() * y.size(0) train_acc += (output.max(1)[1] == y).sum().item() train_n += y.size(0) with torch.no_grad(): # no grad for the stats grad_norm_x += l2_norm_batch(grad).sum().item() delta_final = clamp(X + delta, 0, 1) - X # we should measure delta after the projection onto [0, 1]^d avg_delta_l2 += ((delta_final ** 2).sum([1, 2, 3]) ** 0.5).sum().item() if iteration % args.eval_iter_freq == 0: train_loss, train_reg = train_loss / train_n, train_reg / train_n train_acc, avg_delta_l2 = train_acc / train_n, avg_delta_l2 / train_n # it'd be incorrect to recalculate the BN stats on the test sets and for clean / adversarial points utils.model_eval(model, half_prec) test_acc_clean, _, _ = rob_acc(test_batches_fast, model, eps, pgd_alpha, opt, half_prec, 0, 1) test_acc_fgsm, test_loss_fgsm, fgsm_deltas = rob_acc(test_batches_fast, model, eps, eps, opt, half_prec, 1, 1, rs=False) test_acc_pgd, test_loss_pgd, pgd_deltas = rob_acc(test_batches_fast, model, eps, pgd_alpha, opt, half_prec, args.attack_iters, 1) cos_fgsm_pgd = utils.avg_cos_np(fgsm_deltas, pgd_deltas) train_acc_pgd, _, _ = rob_acc(train_batches_fast, model, eps, pgd_alpha, opt, half_prec, args.attack_iters, 1) # needed for early stopping grad_x = utils.get_grad_np(model, test_batches_fast, eps, opt, half_prec, rs=False) grad_eta = utils.get_grad_np(model, test_batches_fast, eps, opt, half_prec, rs=True) cos_x_eta = utils.avg_cos_np(grad_x, grad_eta) time_elapsed = time.time() - start_time train_str = '[train] loss {:.3f}, reg {:.3f}, acc {:.2%} acc_pgd {:.2%}'.format(train_loss, train_reg, train_acc, train_acc_pgd) test_str = '[test] acc_clean {:.2%}, acc_fgsm {:.2%}, acc_pgd {:.2%}, cos_x_eta {:.3}, cos_fgsm_pgd {:.3}'.format( test_acc_clean, test_acc_fgsm, test_acc_pgd, cos_x_eta, cos_fgsm_pgd) logger.info('{}-{}: {} {} ({:.2f}m, {:.2f}m)'.format(epoch, iteration, train_str, test_str, time_train/60, time_elapsed/60)) if train_acc_pgd > train_acc_pgd_best: # catastrophic overfitting can be detected on the training set best_state_dict = copy.deepcopy(model.state_dict()) train_acc_pgd_best, best_iteration = train_acc_pgd, iteration utils.model_train(model, half_prec) train_loss, train_reg, train_acc, train_n, grad_norm_x, avg_delta_l2 = 0, 0, 0, 0, 0, 0 iteration += 1 X_prev, y_prev = X.clone(), y.clone() # needed for Free-AT if epoch == args.epochs: torch.save({'last': model.state_dict(), 'best': best_state_dict}, 'models/{} epoch={}.pth'.format(model_name, epoch)) # disable global conversion to fp16 from amp.initialize() (https://github.com/NVIDIA/apex/issues/567) context_manager = amp.disable_casts() if half_prec else utils.nullcontext() with context_manager: last_state_dict = copy.deepcopy(model.state_dict()) half_prec = False # final eval is always in fp32 model.load_state_dict(last_state_dict) utils.model_eval(model, half_prec) opt = torch.optim.SGD(model.parameters(), lr=0) attack_iters, n_restarts = (50, 10) if not args.debug else (10, 3) test_acc_clean, _, _ = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, 0, 1) test_acc_pgd_rr, _, deltas_pgd_rr = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, attack_iters, n_restarts) logger.info('[last: test on 10k points] acc_clean {:.2%}, pgd_rr {:.2%}'.format(test_acc_clean, test_acc_pgd_rr)) if args.eval_early_stopped_model: model.load_state_dict(best_state_dict) utils.model_eval(model, half_prec) test_acc_clean, _, _ = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, 0, 1) test_acc_pgd_rr, _, deltas_pgd_rr = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, attack_iters, n_restarts) logger.info('[best: test on 10k points][iter={}] acc_clean {:.2%}, pgd_rr {:.2%}'.format( best_iteration, test_acc_clean, test_acc_pgd_rr)) utils.model_train(model, half_prec) logger.info('Done in {:.2f}m'.format((time.time() - start_time) / 60))
help="Path to a file with urls to load into database", type=str) parser.add_argument("-w", "--workers", help="Start given number of workers", type=int) parser.add_argument("-t", "--threads", help="Start given number of threaded workers", type=int) parser.add_argument("-d", "--debug", help="Enable debug logging in workers", action="store_true") args = parser.parse_args() utils.configure_logger(args.debug) if args.workers is not None and args.threads is not None: print( "You couldn't start both threaded and process based workers simultaneously" ) exit(1) if args.stop: kill_workers() if args.erase: erase_database() if args.load: load_urls_to_database(args.load)
logger.info("repo-remove: %s", repo_remove(remove_pkgs)) else: logger.warning(f'Nothing to remove in {arch}') archive_dir = Path('archive') for fpath in archive_dir.iterdir(): nosigname = fpath.name[:-4] if fpath.name.endswith( '.sig') else fpath.name if nosigname.endswith(PKG_SUFFIX) and \ get_pkg_details_from_name(nosigname).pkgname in pkgnames: throw_away(fpath) logger.info('finished remove') return True if __name__ == '__main__': configure_logger(logger, logfile='repo.log', rotate_size=1024 * 1024 * 10) import argparse try: parser = argparse.ArgumentParser( description='Automatic management tool for an arch repo.') parser.add_argument( '-a', '--arch', nargs='?', default=False, help='arch to regenerate, split by comma, defaults to all') parser.add_argument('-o', '--overwrite', action='store_true', help='overwrite when updating existing packages') parser.add_argument(
def __init__(self, start, number_of_stocks, sell_criterium, stocks=[]): if not stocks: stocks = [] super().__init__(balance=[10000, 10000], bought_stocks={}, monitored_stocks=[], monitored_stock_data={}, archive=[], current_status={}, interesting_stocks=[], not_interesting_stocks=[], yahoo_calls={}, results={}) if isinstance(start, str): start = datetime.strptime(start, '%Y/%m/%d-%H:%M:%S') self.start = start self.ip = "192.168.0.14" self.M = 500 self.Pavg = 20 self.sell_criterium = sell_criterium self.indicators = Indicators() self.conf = utils.read_config("./config/config.json") self.logger = utils.configure_logger("default", "./GLENNY_LOG.txt", self.conf["logging"]) self.initialize_stocks(start, self.logger, self.conf, number_of_stocks, update_nasdaq_file=False, stocks=stocks) self.results = { "stock": [], "bought": [], "price_bought": [], "number": [], "result": [], "start_date": [], "comment": [], "timestamp": [], "sell_criterium": [], "first_sold": [], "first_Pe": [], "first_N": [], "second_sold": [], "second_Pe": [], "second_N": [], "time_diff_bod": [], "time_diff_eod": [], 'der_bigEMA': [] } self.columns = [ "timestamp", "stock", "result", "comment", "start_date", "bought", "first_sold", "second_sold", "price_bought", "first_Pe", "second_Pe", "number", "first_N", "second_N", "time_diff_bod", "der_bigEMA", "sell_criterium" ] self.stats = { "param": [], "type": [], "total_result_plot": [], "individual_result_plot": [] } self.columns_stats = [ 'param', 'type', 'total_result_plot', 'individual_result_plot' ] self.csv_file = "./backtesting/backtesting_cumulative.csv" self.csv_file_stats = "./backtesting/backtesting_stats.csv" self.plot_dir = "./backtesting/back_plots/" self.stats_plot_dir = "./backtesting/stats_plots/" self.callsYQL_file = "./backtesting/calls_yql.json"
def server(args): """ Server function to drive the submission process. Two main modes of operation are present. First, user submissions can be directly submitted with the -b flag. This is mainly used for debugging. The main mode of operation is without the -b flag, where the server will check the database for jobs that haven't been submitted and call submission_script_manager for each. Inputs: ------ args - argparse arguments object that contains the database configuration instructions as well as other options. Returns: -------- Nothing. For a more verbose output, use --debug=2 at the runtime. """ logger = utils.configure_logger(args) db_conn, sql = setup_database(args) if args.UserSubmissionID != 'none': if update_tables.count_user_submission_id(args.UserSubmissionID, sql) > 0: #if args.submit: logger.debug('Processing {}'.format(args.UserSubmissionID)) #Need to remove any whitespace USID = args.UserSubmissionID.replace(" ", "") submission_script_manager.process_jobs(args, args.UserSubmissionID, db_conn, sql) #else: #print("-s option not selected, not submitting jobs through submission_script_manager") else: print( "The selected UserSubmission (UserSubmissionID = {0}) does not exist, exiting" .format(args.UserSubmissionID)) exit() # No UserSubmissionID specified, send all # that haven't been sent already. else: user_submissions = database.get_unsubmitted_jobs(sql) logger.debug('Found unsubmitted jobs: {}'.format(user_submissions)) if len(user_submissions) == 0: print( "There are no UserSubmissions which have not yet been submitted to a farm" ) else: for i, submission_id in enumerate(user_submissions): logger.debug( 'Working on job {} of {}, user_submission_id = {}'.format( i + 1, len(user_submissions), submission_id)) submission_script_manager.process_jobs(args, submission_id, db_conn, sql) # Shutdown the database connection, we're done here. db_conn.close()
def train_model(language, data_dir, model_type=1, gru=3, num_epochs=100, mini_batch_size=32, iterlog=20, cp_freq=1000, restore_path=None, model_root = './trained-models/', multi_gpu=False): # Check language is supported if not check_language_code(language): raise ValueError("Invalid or not supported language code!") # Check description file exists if not os.path.exists(data_dir): raise ValueError("Description file does not exist!'") # Check valid model is selected if model_type == 1: from models import model_conv1_gru as model elif model_type == 2: from models import model_conv2_gru as model else: raise ValueError("No valid model selected!") # Create model directories model_name = model.__name__ + str(gru) model_dir = os.path.join(model_root, model_name) if multi_gpu: my_gpu_rank = hvd.local_rank() num_gpus = hvd.size() else: my_gpu_rank = 0 num_gpus = 1 if not multi_gpu or my_gpu_rank == 0: if not os.path.exists(model_root): os.makedirs(model_root) if not os.path.exists(model_dir): os.makedirs(model_dir) # Configure logging configure_logger(logFileName=os.path.join(model_root, 'training.log')) print('Loading data...') # Load char_map, index_map and gets number of classes char_map, index_map, nb_classes = get_language_chars(language) # Prepare the data generator. Load the JSON file that contains the dataset datagen = DataGenerator(char_map=char_map, multi_gpu=multi_gpu) # Loads data limited with max duration. returns number of iterations. steps_per_epoch = datagen.load_data(data_dir, minibatch_size=mini_batch_size, max_duration=20.0) print('Building Model...') global_step = tf.Variable(0, name='global_step', trainable=False) # Create input placeholders for CTC Cost feeding and decoding feeding with tf.name_scope('inputs'): # Audio inputs have size of [batch_size, max_stepsize, num_features]. But the batch_size and max_stepsize can vary along each step # inputs = tf.placeholder(tf.float32, [None, None, 161], name='inputs') # spectrogram version inputs = tf.placeholder(tf.float32, [None, None, 40], name='inputs') # filterbank version. 40 shows number of filters.s # inputs = tf.placeholder(tf.float32, [None, None, 12], name='inputs') # mfcc version. 12 shows number of ceps. # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None], name='seq_len') # We define the placeholder for the labels. Here we use sparse_placeholder that will generate a SparseTensor required by ctc_loss op. targets = tf.sparse_placeholder(tf.int32, name='targets') # Create model layers logits = model(inputs, nb_classes, gru) logits = tf.transpose(logits, perm=[1, 0, 2]) # Compute the CTC loss using either TensorFlow's "ctc_loss". Then calculate the average loss across the batch with tf.name_scope('loss'): total_loss = tf.nn.ctc_loss(inputs=logits, labels=targets, sequence_length=seq_len, ignore_longer_outputs_than_inputs=True) avg_loss = tf.reduce_mean(total_loss, name="Mean") # Adam Optimizer has preferred for the performance reasons to optimize the weights with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=1e-8) if multi_gpu: optimizer = hvd.DistributedOptimizer(optimizer) train_op = optimizer.minimize(avg_loss, global_step=global_step) # optimizer = tf.train.MomentumOptimizer(learning_rate= 2e-4, momentum=0.99, use_nesterov=True).minimize(avg_loss) # Beam search decodes the mini-batch with tf.name_scope('decoder'): decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len, beam_width=100, top_paths=1, merge_repeated=False) # Option 2: tf.nn.ctc_greedy_decoder (it's faster but give worse results) dense_decoded = tf.sparse_tensor_to_dense(decoded[0], name="SparseToDense", default_value=-1) # The Levenshtein (edit) distances between the decodings and their transcriptions "distance" with tf.name_scope('distance'): distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), targets, name="edit_distance") # The accuracy of the outcome averaged over the whole batch ``accuracy` ler = tf.reduce_mean(distance, name="Mean") config = tf.ConfigProto(allow_soft_placement=False, log_device_placement=False) init = tf.global_variables_initializer() total_steps = num_epochs * steps_per_epoch if multi_gpu: config.gpu_options.allow_growth = False config.gpu_options.visible_device_list = str(my_gpu_rank) bcast = hvd.broadcast_global_variables(0) # Normally, we would divide the num_steps by hvd.size() # But our DataGen has already done that, so we don't need it here # num_steps = num_steps // hdv.size() + 1 else: bcast = None print('Training...') iterator = None best_cost = 1e10 lbest_cost = best_cost saver = tf.train.Saver(max_to_keep=50) session = tf.Session(config=config) with session.as_default(): history_file = os.path.join(model_dir, 'history.log') if os.path.exists(history_file): entries = codecs.open(history_file, 'r', 'utf-8').readlines() last_entry = entries[-1].strip() _, h_epoch, h_epoch_step, h_loss, h_best_cost, h_cgs, h_ckptfile = last_entry.split('|') saver.restore(session, save_path=tf.train.latest_checkpoint(model_dir)) epoch = int(h_epoch) current_epoch_step = int(h_epoch_step) + 1 current_global_step = int(h_cgs) + 1 remaining_epoch_steps = max(0, steps_per_epoch - current_epoch_step) best_cost = float(h_best_cost) else: epoch = 1 current_epoch_step = 1 current_global_step = 1 remaining_epoch_steps = steps_per_epoch init.run() if bcast is not None: bcast.run() tf.get_default_graph().finalize() while epoch <= num_epochs: if epoch == 1: iterator = datagen.iterate_train(mini_batch_size=mini_batch_size, sort_by_duration=True, shuffle=False, max_iters=remaining_epoch_steps) else: iterator = datagen.iterate_train(mini_batch_size=mini_batch_size, sort_by_duration=False, shuffle=True, max_iters=remaining_epoch_steps) while current_epoch_step < steps_per_epoch: b_perc = int(float(current_epoch_step) / float(steps_per_epoch) * 100.0) inputs, out_len, indices, values, shape, labels = next(iterator) feed = {"inputs/inputs:0": inputs, "inputs/targets/shape:0": shape, "inputs/targets/indices:0": indices, "inputs/targets/values:0": values, "inputs/seq_len:0": out_len} step_start_time = time.time() if current_global_step % iterlog == 0: _, ctc_cost, cError, cDecoded = session.run([train_op, avg_loss, ler, dense_decoded], feed_dict=feed) step_end_time = time.time() batch_error = cError * mini_batch_size if not multi_gpu or my_gpu_rank == 0: for i, seq in enumerate(cDecoded): seq = [s for s in seq if s != -1] sequence = convert_int_sequence_to_text_sequence(seq, index_map) logger.info("IT : {}-{}".format(current_global_step, str(i + 1))) logger.info("OT ({:3d}): {}".format(len(labels[i]), labels[i])) logger.info("DT ({:3d}): {}".format(len(sequence), sequence)) logger.info('-' * 100) else: ctc_cost, _ = session.run([avg_loss, train_op], feed_dict=feed) step_end_time = time.time() if not multi_gpu or my_gpu_rank == 0: best_cost_str = 'N/A' if epoch <= 1 else '{:.5f}'.format(best_cost) logger.info("Epoch:{:-4d}, ES:{:-6d}, GS:{:-6d}, Loss:{:.5f}, BestLoss:{}, Time:{:.3f}".format(epoch, current_epoch_step, current_global_step, ctc_cost, best_cost_str, step_end_time - step_start_time)) # Ignore best_cost during Epoch 1 run... if epoch > 1 and ctc_cost < best_cost: lbest_cost = best_cost best_cost = ctc_cost print('Epoch: {}/{}, Step: {:-6d}/{} {:-3}% -- [Loss: {:-9.5f}, BestLoss: {:-9.5f}, Time: {:.4f}]'.format(epoch, num_epochs, current_epoch_step, steps_per_epoch, b_perc, ctc_cost, best_cost, step_end_time - step_start_time), end='\r') sys.stdout.flush() # Save every 'n' steps or when find a better best_cost if (current_global_step % cp_freq == 0 or best_cost < lbest_cost) and my_gpu_rank == 0: print('\n*** Saving checkpoint at Epoch {}, Step {} (GS: {})'.format(epoch, current_epoch_step, current_global_step)) lbest_cost = best_cost saved_path = saver.save(session, os.path.join(model_dir, 'model'), global_step = current_global_step) write_history(model_dir, epoch, current_epoch_step, ctc_cost, best_cost, current_global_step, saved_path) current_epoch_step += 1 current_global_step += num_gpus # Let's initiate the garbage collector to maintain acceptable RAM usage gc.collect() epoch += 1 current_epoch_step = 0 remaining_epoch_steps = steps_per_epoch print('\n')