def get_extracted_files(dst_path, logger: logging.Logger): logger.info('Getting list of already extracted files...') published_files = [f for f in os.listdir(dst_path) if f.endswith('.jpg')] dup_files = [f for f in os.listdir(dst_path / 'dups/')if f.endswith('.jpg')] return published_files + dup_files
def repair( self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path ) -> bool: # TODO: It would be nice to try and get the contents of the # file/directory at this location in the current commit, rather than # just writing out an empty file or directory backup_dir = fsck_dir / "broken_inodes" backup_dir.mkdir(exist_ok=True) inode_data_path = Path(overlay.get_path(self.inode.inode_number)) inode_backup_path = backup_dir / str(self.inode.inode_number) if self.expected_type == InodeType.DIR: log.info( f"replacing corrupt directory inode {self.compute_path()!r} with an " "empty directory" ) os.rename(inode_data_path, inode_backup_path) overlay.write_empty_dir(self.inode.inode_number) else: log.info( f"replacing corrupt file inode {self.compute_path()!r} with an " "empty file" ) os.rename(inode_data_path, inode_backup_path) overlay.write_empty_file(self.inode.inode_number) return True
def make_middleware(app, global_conf, verbose_log=None, trace_log=None, max_bodylen='3KB', max_logsize='100MB', backup_count='10', keep='100', ): """ Paste filter-app converter """ backup_count = int(backup_count) max_bytes = byte_size(max_logsize) max_bodylen = byte_size(max_bodylen) keep = int(keep) from logging import Logger from logging.handlers import RotatingFileHandler if verbose_log: handler = RotatingFileHandler(verbose_log, maxBytes=max_bytes, backupCount=backup_count) verbose_log = Logger('repoze.debug.verboselogger') verbose_log.handlers = [handler] if trace_log: handler = RotatingFileHandler(trace_log, maxBytes=max_bytes, backupCount=backup_count) trace_log = Logger('repoze.debug.tracelogger') trace_log.handlers = [handler] return ResponseLoggingMiddleware(app, max_bodylen, keep, verbose_log, trace_log)
def configure(self, logger: logging.Logger, verbosity: int = 0) -> int: """ Add all configured handlers to the supplied logger. If verbosity > 0 then make sure we have a console logger and force the level of the console logger based on the verbosity. :param logger: The logger to add the handlers to :param verbosity: The verbosity level given as command line argument :return: The lowest log level that is going to be handled """ # Remove any previously configured loggers, in case we are re-configuring # We are deleting, so copy the list first for handler in list(logger.handlers): logger.removeHandler(handler) # Add the handlers, keeping track of console loggers and saving the one with the "best" level. console = None for handler_factory in self.handlers: handler = handler_factory() logger.addHandler(handler) if isinstance(handler_factory, ConsoleHandlerFactory): console = handler # Set according to verbosity set_verbosity_logger(logger, verbosity, console) # Find the lowest log level lowest_level = logging.CRITICAL for handler in logger.handlers: if handler.level < lowest_level: lowest_level = handler.level # Return the lowest log level we want, so that we can filter lower priority messages earlier (where appropriate) return lowest_level
def test(): logfile = 'log.log' log = Logger(logfile) log.write_log('', False) os.remove(logfile)
def make_file_logger(logfile, maxBytes=int(1e7), backupCount=10): """Create a logger that mimics the format of Products.LongRequestLogger""" if isinstance(logfile, Logger): # The Logger is already set up. return logfile logger = Logger('slowlog') if isinstance(logfile, Handler): # The Handler is already set up. handler = logfile else: if hasattr(logfile, 'write'): # Write to an open file. handler = StreamHandler(logfile) else: # Create a rotating file handler. handler = RotatingFileHandler(logfile, maxBytes=maxBytes, backupCount=backupCount) fmt = Formatter('%(asctime)s - %(message)s') handler.setFormatter(fmt) logger.addHandler(handler) return logger
def post(self): try: name = cgi.escape(self.request.get('name')) description = cgi.escape(self.request.get('description')) url = cgi.escape(self.request.get('url')) seller = cgi.escape(self.request.get('seller')) except: log.exception( "fail to get data from form" ) print( "fail to get data from form" ) seller = Seller.get( seller ) if seller: turl = URL() turl.url = url turl.put() target = Target() target.name = name target.description = description target.url = turl target.seller = seller target.put() else: print( "no seller with specified name" )
class WhiteWolfTextParser(object): """Actual Parser for the WW cardlist text file(s).""" def __init__(self, oLogHandler): self.oLogger = Logger('White wolf card parser') if oLogHandler is not None: self.oLogger.addHandler(oLogHandler) self._oState = None self.reset() def reset(self): """Reset the parser""" self._oState = WaitingForCardName({}, self.oLogger) def parse(self, fIn): """Feed lines to the state machine""" for sLine in fIn: self.feed(sLine) # Ensure we flush any open card text states self.feed('') if hasattr(self._oState, 'flush'): self._oState.flush() else: raise IOError('Failed to parse card list - ' 'unexpected state at end of file.\n' 'Card list probably truncated.') def feed(self, sLine): """Feed the line to the current state""" # Strip BOM from line start sLine = sLine.decode('utf8').lstrip(u'\ufeff') self._oState = self._oState.transition(sLine, None)
class Spy(MailService): """ Шпион, который логгирует о всей почтовой переписке, которая проходит через его руки. Он следит только за объектами класса MailMessage и пишет в логгер следующие сообщения Если в качестве отправителя или получателя указан "Austin Powers": то нужно написать в лог сообщение с уровнем WARN: Detected target mail correspondence: from {from} to {to} "{message}" Иначе, необходимо написать в лог сообщение с уровнем INFO: Usual correspondence: from {from} to {to} """ def __init__(self): self.__logger = Logger("logger") def process_mail(self, mail: Mail): if isinstance(mail, MailMessage): source = mail.get_source() destination = mail.get_destination() message = mail.get_message() if source == banned_address or destination == banned_address: self.__logger.warning('Detected target mail correspondence: from {0} to {1} "{2}"'. format(source, destination, message)) else: self.__logger.info('Usual correspondence: from {0} to {1}'.format(source, destination)) return mail
def __init__(self): Logger.__init__(self, "ravel") ch = logging.StreamHandler() formatter = logging.Formatter(MSG_FORMAT) ch.setFormatter(formatter) self.addHandler(ch) self.setLogLevel()
def __init__(self,name): ''' new constructor already setting up the different handlers and formatters ''' Logger.__init__(self,name) #Flag whether a file log should be created fileLog = True #Checking whether there is a folder for logging if not os.path.isdir(os.path.dirname(logFile)): fileLog = False #If our log file exists we delete it to have a new log file for every run if os.path.isfile(logFile) and fileLog: try: os.remove(logFile) except: pass if fileLog: self.setFileHandler() self.addHandler(zeroLogger._FileHandle) self.setStreamHandler() self.addHandler(zeroLogger._StreamHandle)
def spinner(text: str, logger: Logger, quiet=False, debug=False): '''Decoration for long running processes. :param text: Message to output :param logger: Logger to capture the error if it occurs :param quiet: If ``True``, messages will be hidden :param debug: If ``True``, show full tracebacks ''' # pylint: disable=broad-except try: logger.info(text) if not quiet: print(text) yield if not quiet: print('Done\n') except Exception as exception: exception_traceback = format_exc() logger.error(exception_traceback) if not quiet: if debug: print(exception_traceback) else: print(str(exception))
def run(self): global logger global totalFetchTime global totalRequestsCompleted while True: #grabs host from queue host = self.queue.get() threadId = threading.current_thread #grabs urls of urls and prints first 1024 bytes of page beginTime = time.time() url = urllib2.urlopen(host) x = url.read(100000) if (not x): Logger.warn(logger, "[%s] No data for %s" % (threadId, host)) endTime = time.time() elapsedTime = (endTime - beginTime) Logger.info(logger, "Request for %s executed in %s" % (host, elapsedTime)) #signals to queue job is done totalRequestsCompleted += 1 totalFetchTime += elapsedTime self.queue.task_done()
def extract_files(src_path, extracted_files, logger:logging.Logger): logger.info('Extracting new files...') list_files = os.listdir(src_path) list_land = [] list_port = [] for filename in list_files: if already_extracted(filename, extracted_files, logger): continue src_file = src_path + filename # check if it is image or not try: im = Image.open(src_file) except OSError: continue x, y = im.size im.close() if x == 1920 and y == 1080: list_land += [filename] if x == 1080 and y == 1920: list_port += [filename] return list_land, list_port
def is_satisfied(requirement: Requirement, logger: logging.Logger) -> bool: try: requirement.check() logger.debug("Requirement '%s' satisfied", requirement.description) return True except Exception as e: logger.error("Requirement '%s' not satisfied: %s", requirement.description, e) return False
def check_os(logger: logging.Logger): logger.info('Checking Windows 10...') err_msg = 'This system is not Windows 10. Exit.' if sys.platform != 'win32': raise OSError(err_msg) if platform.release() != '10': raise OSError(err_msg)
def __init__(self, name, level=0): Logger.__init__(self, name, level) self.formatter = self.format handler = StreamHandler() handler.setFormatter(self.formatter) self.addHandler(handler)
def __init__(self, name, level): Logger.__init__(self, name, level) self.formatter = self.format self.handler = PMLogHandler() self.handler.setFormatter(self.formatter) self.addHandler(self.handler)
def copy_database(oOrigConn, oDestConnn, oLogHandler=None): """Copy the database, with no attempts to upgrade. This is a straight copy, with no provision for funky stuff Compatability of database structures is assumed, but not checked. """ # Not checking versions probably should be fixed # Copy tables needed before we can copy AbstractCard flush_cache() oVer = DatabaseVersion() oVer.expire_cache() oLogger = Logger('copy DB') if oLogHandler: oLogger.addHandler(oLogHandler) if hasattr(oLogHandler, 'set_total'): iTotal = 14 + AbstractCard.select(connection=oOrigConn).count() + \ PhysicalCard.select(connection=oOrigConn).count() + \ PhysicalCardSet.select(connection=oOrigConn).count() oLogHandler.set_total(iTotal) bRes = True aMessages = [] oTrans = oDestConnn.transaction() aToCopy = [ (copy_rarity, 'Rarity table', False), (copy_expansion, 'Expansion table', False), (copy_discipline, 'Discipline table', False), (copy_clan, 'Clan table', False), (copy_creed, 'Creed table', False), (copy_virtue, 'Virtue table', False), (copy_card_type, 'CardType table', False), (copy_ruling, 'Ruling table', False), (copy_discipline_pair, 'DisciplinePair table', False), (copy_rarity_pair, 'RarityPair table', False), (copy_sect, 'Sect table', False), (copy_title, 'Title table', False), (copy_artist, 'Artist table', False), (copy_keyword, 'Keyword table', False), (copy_abstract_card, 'AbstractCard table', True), (copy_physical_card, 'PhysicalCard table', True), (copy_physical_card_set, 'PhysicalCardSet table', True), ] for fCopy, sName, bPassLogger in aToCopy: try: if bRes: if bPassLogger: fCopy(oOrigConn, oTrans, oLogger) else: fCopy(oOrigConn, oTrans) except SQLObjectNotFound, oExp: bRes = False aMessages.append('Unable to copy %s: Aborting with error: %s' % (sName, oExp)) else: oTrans.commit() oTrans.cache.clear() if not bPassLogger: oLogger.info('%s copied' % sName)
def log_to_file(logger: Logger, filename: str, log_format: str="%(asctime)s %(levelname)-8s %(message)s", ) -> None: """Note: `filename` should be declared in zproject/settings.py in ZULIP_PATHS.""" formatter = logging.Formatter(log_format) handler = logging.FileHandler(filename) handler.setFormatter(formatter) logger.addHandler(handler)
def __init__(self, name: str, logger: Logger, ttl: int, config: IdPConfig, lock = None): self.logger = logger self._cache: ExpiringCache if (config.redis_sentinel_hosts or config.redis_host) and config.session_app_key: self._cache = ExpiringCacheCommonSession(name, logger, ttl, config, secret=config.session_app_key) else: # This is used in tests self._cache = ExpiringCacheMem(name, logger, ttl, lock) logger.debug('Set up IDP ticket cache {!s}'.format(self._cache))
class CAENRFIDEventArgs: '''This class defines the CAENRFID event arguments.''' def __init__(self): self._log = Logger("CAENRFIDEventArgs") self._log.debug( "Class %s created", self.__class__.__name__ ) def getData(self): '''Returns the event object value.''' raise Exception("Not implemented yet!")
def write_combos(): logger = Logger('name',20) handler = FileHandler('flog.log') logger.addHandler(handler) with open('namelist.txt','a') as fileobject: llist = ("{} {}".format(x,y) for x in names(0, 'names.txt') for y in names(1, 'names.txt')) for name in llist: if len(name) > 17: logger.info('{} is {} characters long'.format(name, len(name))) fileobject.write('{}\n'.format(name))
def configure_logger_for_colour(log: logging.Logger, remove_existing: bool = True) -> None: """ Applies a preconfigured datetime/colour scheme to a logger. Should ONLY be called from the "if __name__ == 'main'" script: https://docs.python.org/3.4/howto/logging.html#library-config """ if remove_existing: log.handlers = [] # http://stackoverflow.com/questions/7484454 log.addHandler(COLOUR_HANDLER)
def __init__(self): Logger.__init__(self, "OVS-CONSOLE") # Create console handler console = logging.StreamHandler() # Add console handler to logging handler self.addHandler(console) # Setting Logging LEVEL self.setLevel(LEVEL)
def __init__(self, name, level=0): if level == 0: level = option_parser.get_verbose() Logger.__init__(self, name, level) self.formatter = self.format handler = StreamHandler() handler.setFormatter(self.formatter) self.addHandler(handler)
def log_error( error: GraphQLError, logger: logging.Logger, level: int, ): logger.log(level, f'{error}') tb = error.__traceback__ while tb and tb.tb_next: tb = tb.tb_next logger.log(level, f'Excecution Context: {tb.tb_frame.f_locals!r}')
def main(): logger = Logger('Pipeliner') handler = StreamHandler(stdout) formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - ' '%(message)s') handler.setFormatter(formatter) logger.addHandler(handler) pipeliner = Pipeliner(api='tcp://localhost:5555', broadcast='tcp://localhost:5556', logger=logger) pipeliner.start()
def delete_cookie(name: str, logger: logging.Logger, config: IdPConfig) -> None: """ Ask browser to delete a cookie. :param name: cookie name as string :param logger: logging instance :param config: IdPConfig instance """ logger.debug("Delete cookie: {!s}".format(name)) return set_cookie(name, '/', logger, config, value='')
def readUrlsFromFile(): global urlFile global urls global logger for line in open(urlFile, 'r').readlines(): line = line.rstrip("\r\n") Logger.debug(logger, "Loading URL %s from %s" % (line, urlFile)) urls.append(line) if (len(urls) < 1): print "No urls were able to be loaded from %s, exiting!" % urlFile exit(1)
def process_dxf_files(dxf_files, input_values, material, logger: logging.Logger): ao = apper.AppObjects() # Start a time line group start_index = apper.start_group() y_magnitude_attribute = ao.design.attributes.itemByName("DXFer", "y_magnitude") x_magnitude_attribute = ao.design.attributes.itemByName("DXFer", "x_magnitude") row_count_attribute = ao.design.attributes.itemByName("DXFer", "row_count") y_row_max_attribute = ao.design.attributes.itemByName("DXFer", "y_row_max") if y_magnitude_attribute is None: y_magnitude = 0.0 else: y_magnitude = float(y_magnitude_attribute.value) if x_magnitude_attribute is None: x_magnitude = 0.0 else: x_magnitude = float(x_magnitude_attribute.value) if row_count_attribute is None: row_count = 0 else: row_count = int(row_count_attribute.value) if y_row_max_attribute is None: y_row_max = 0.0 else: y_row_max = float(y_row_max_attribute.value) # Define spacing and directions x_vector = adsk.core.Vector3D.create(1.0, 0.0, 0.0) y_vector = adsk.core.Vector3D.create(0.0, 1.0, 0.0) # Iterate all dxf files and create components for dxf_file in dxf_files: # Create new component for this DXF file occurrence = apper.create_component(ao.root_comp, dxf_file['name']) sketches = apper.import_dxf( dxf_file['full_path'], occurrence.component, occurrence.component.xYConstructionPlane, input_values['single_sketch'] ) logger.info(f"Imported DXF File: {dxf_file['name']}") x_delta = 0 y_delta = 0 face = False sketch_transform = None extrude_sketch_transform = None for sketch in sketches: if input_values['close_sketches']: tolerance = input_values['tolerance_input'] close_sketch_gaps(sketch, tolerance, logger) if input_values['reset_option_input']: sketch_transform = move_sketch_to_origin(sketch) x_delta_check = get_bb_in_direction(sketch, x_vector) if x_delta_check > x_delta: x_delta = x_delta_check y_delta_check = get_bb_in_direction(sketch, y_vector) if y_delta_check > y_delta: y_delta = y_delta_check if input_values['extrude_option_input']: # extrude_largest_profile(sketch, occurrence.component, input_values['distance']) this_face = extrude_profile_with_most_loops(sketch, occurrence.component, input_values['distance']) if this_face: face = this_face extrude_sketch_transform = sketch_transform if input_values['keep_sketches_shown']: sketch.isLightBulbOn = True if input_values['import_text']: # Alternative to create sketch on extrude cap face, having transform issues. if face: text_sketch = occurrence.component.sketches.add(face) else: xy_plane = occurrence.component.xYConstructionPlane text_sketch = occurrence.component.sketches.add(xy_plane) text_sketch.name = 'TEXT' # Import text with EZDXF Library font_selection = input_values['font_selection'] EZDXFCommands.import_dxf_text(dxf_file['full_path'], text_sketch, font_selection, logger) if text_sketch.sketchTexts.count == 0: text_sketch.deleteMe() elif input_values['reset_option_input']: if extrude_sketch_transform is not None: move_sketch_by_transform(text_sketch, extrude_sketch_transform) elif sketch_transform is not None: move_sketch_by_transform(text_sketch, sketch_transform) # EZDXFCommands.import_dxf_text(dxf_file['full_path'], occurrence.component, font_selection) if not input_values['reset_option_input']: move_to_origin(occurrence) # Move component in specified direction transform_along_vector(occurrence, x_vector, x_magnitude) transform_along_vector(occurrence, y_vector, y_magnitude) # Update document and capture position of new component adsk.doEvents() if ao.design.snapshots.hasPendingSnapshot: ao.design.snapshots.add() # Increment magnitude by desired component size and spacing x_magnitude += input_values['spacing'] x_magnitude += x_delta row_count += 1 if y_delta > y_row_max: y_row_max = y_delta # Move to next row if row_count >= input_values['rows']: y_magnitude += input_values['spacing'] y_magnitude += y_row_max y_row_max = 0.0 x_magnitude = 0.0 row_count = 0 if material is not None: occurrence.component.material = material ao.design.attributes.add("DXFer", "y_magnitude", str(y_magnitude)) ao.design.attributes.add("DXFer", "x_magnitude", str(x_magnitude)) ao.design.attributes.add("DXFer", "row_count", str(row_count)) ao.design.attributes.add("DXFer", "y_row_max", str(y_row_max)) # Close time line group apper.end_group(start_index)
def scaffold_split( data: MoleculeDataset, sizes: Tuple[float, float, float] = (0.8, 0.1, 0.1), balanced: bool = False, seed: int = 0, logger: logging.Logger = None ) -> Tuple[MoleculeDataset, MoleculeDataset, MoleculeDataset]: """ Split a dataset by scaffold so that no molecules sharing a scaffold are in the same split. :param data: A MoleculeDataset. :param sizes: A length-3 tuple with the proportions of data in the train, validation, and test sets. :param balanced: Try to balance sizes of scaffolds in each set, rather than just putting smallest in test set. :param seed: Seed for shuffling when doing balanced splitting. :param logger: A logger. :return: A tuple containing the train, validation, and test splits of the data. """ assert sum(sizes) == 1 # Split train_size, val_size, test_size = sizes[0] * len(data), sizes[1] * len( data), sizes[2] * len(data) train, val, test = [], [], [] train_scaffold_count, val_scaffold_count, test_scaffold_count = 0, 0, 0 # Map from scaffold to index in the data scaffold_to_indices = scaffold_to_smiles(data.smiles(), use_indices=True) if balanced: # Put stuff that's bigger than half the val/test size into train, rest just order randomly index_sets = list(scaffold_to_indices.values()) big_index_sets = [] small_index_sets = [] for index_set in index_sets: if len(index_set) > val_size / 2 or len(index_set) > test_size / 2: big_index_sets.append(index_set) else: small_index_sets.append(index_set) random.seed(seed) random.shuffle(big_index_sets) random.shuffle(small_index_sets) index_sets = big_index_sets + small_index_sets else: # Sort from largest to smallest scaffold sets index_sets = sorted(list(scaffold_to_indices.values()), key=lambda index_set: len(index_set), reverse=True) for index_set in index_sets: if len(train) + len(index_set) <= train_size: train += index_set train_scaffold_count += 1 elif len(val) + len(index_set) <= val_size: val += index_set val_scaffold_count += 1 else: test += index_set test_scaffold_count += 1 if logger is not None: logger.debug(f'Total scaffolds = {len(scaffold_to_indices):,} | ' f'train scaffolds = {train_scaffold_count:,} | ' f'val scaffolds = {val_scaffold_count:,} | ' f'test scaffolds = {test_scaffold_count:,}') log_scaffold_stats(data, index_sets, logger=logger) # Map from indices to data train = [data[i] for i in train] val = [data[i] for i in val] test = [data[i] for i in test] return MoleculeDataset(train), MoleculeDataset(val), MoleculeDataset(test)
async def start_and_monitor_coalescer(config_file: str, cfg: dict, logger: logging.Logger, coalescer_bin: str = None) -> None: '''Start and monitor the coalescer :param config_file: str, the path to suzieq config file, to be passed :param cfg: dict, the Suzieq config dictionary :param logger: logging.Logger, pointer to logger to use :param coalescer_bin: str, optional path to coalescer binary :return: nothing ''' async def start_coalescer(): sq_path = get_sq_install_dir() coalescer_bin = f'{sq_path}/utilities/sq_coalescer.py' if config_file: coalescer_args = f'-c {config_file}' else: coalescer_args = '' coalescer_args = f'{coalescer_bin} {coalescer_args}'.strip().split() try: process = await asyncio.create_subprocess_exec( *coalescer_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE) except Exception as ex: logger.error(f'ABORTING. Unable to start coalescer: {ex}') process = None return process fd = 0 process = None # Check to see file lock is possible while not fd: if not process: logger.warning('Starting Coalescer') elif process.returncode == errno.EBUSY: logger.warning('Trying to start coalescer') process = await start_coalescer() if not process: os.kill(os.getpid(), signal.SIGTERM) return # Initial sleep to ensure that the coalescer starts up await asyncio.sleep(10) coalesce_dir = cfg.get('coalescer', {})\ .get('coalesce-directory', f'{cfg.get("data-directory")}/coalesced') fd = ensure_single_instance(f'{coalesce_dir}/.sq-coalescer.pid', False) if fd > 0: # unlock and try to start process try: fcntl.flock(fd, fcntl.F_UNLCK) os.close(fd) except OSError: pass continue # Check if we have something from the stdout we need to log try: stdout, stderr = await process.communicate() except asyncio.CancelledError: if process: process.terminate() sleep(5) process.kill() return if process.returncode and (process.returncode != errno.EBUSY): logger.error(f'coalescer stdout: {stdout}, stderr: {stderr}') else: if process.returncode == errno.EBUSY: await asyncio.sleep(10 * 60) else: logger.info( f'coalescer ended stdout: {stdout}, stderr: {stderr}') fd = 0
import gevent from gevent import monkey monkey.patch_all() # noinspection PyPep8 from logging import Logger from wxpy import * from traceback import print_exc import builtins from datetime import datetime, timedelta from typing import List bot = Bot(cache_path=True, console_qr=2) logger = Logger('sports') def wxprint(my_bot): def func(x, **kwargs): logger.warning(x) gevent.spawn(my_bot.file_helper.send, x) return func builtins.print = wxprint(bot) from mysports.run import run red, green = 2, 2
def logging_config(folder: Optional[str] = None, name: Optional[str] = None, logger: logging.Logger = logging.root, level: int = logging.INFO, console_level: int = logging.INFO, console: bool = True, overwrite_handler: bool = False) -> str: """Config the logging module. It will set the logger to save to the specified file path. Parameters ---------- folder The folder to save the log name Name of the saved logger The logger level Logging level console_level Logging level of the console log console Whether to also log to console overwrite_handler Whether to overwrite the existing handlers in the logger Returns ------- folder The folder to save the log file. """ if name is None: name = inspect.stack()[-1][1].split('.')[0] if folder is None: folder = os.path.join(os.getcwd(), name) if not os.path.exists(folder): os.makedirs(folder, exist_ok=True) need_file_handler = True need_console_handler = True # Check all loggers. if overwrite_handler: logger.handlers = [] else: for handler in logger.handlers: if isinstance(handler, logging.StreamHandler): need_console_handler = False logpath = os.path.join(folder, name + ".log") print("All Logs will be saved to {}".format(logpath)) logger.setLevel(level) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') if need_file_handler: logfile = logging.FileHandler(logpath) logfile.setLevel(level) logfile.setFormatter(formatter) logger.addHandler(logfile) if console and need_console_handler: # Initialze the console logging logconsole = logging.StreamHandler() logconsole.setLevel(console_level) logconsole.setFormatter(formatter) logger.addHandler(logconsole) return folder
def main(config, log: logging.Logger) -> None: log.debug(f'Configuration: {config}') hyperparameters = config['hyperparameters'] raw_dataset = load_raw_dataset(config, log) log.info(f'size of raw dataset = {len(raw_dataset)}') max_features = hyperparameters['max_features'] log.info(f'Limiting to {max_features} features.') vectorized_reviews = VectorizedReviews(raw_dataset, max_features) log.info(f'size of vectorized reviews = {len(vectorized_reviews)}') num_text_features = len(vectorized_reviews.text_vectorizer.vocabulary_) log.info(f'Num review tokens = {num_text_features}') num_rating_features = len(vectorized_reviews.rating_vectorizer.vocabulary_) log.info(f'Rating vocabulary size = {num_rating_features}') device = get_device(config) log.info(f'Using device = {device}') model = ReviewClassifier(num_text_features) model = model.to(device) log.info(model) loss = nn.BCEWithLogitsLoss() log.info(f'loss = {loss}') learning_rate = hyperparameters['learning_rate'] optimizer = Adam(model.parameters(), lr=learning_rate) log.info(f'optimizer = {optimizer}') tensorboard_directory = config['files']['log_dir'] log.info(f'Writing Tensorboard logs to {tensorboard_directory}.') seed = hyperparameters['seed'] torch.manual_seed(seed) log.info(f'Using seed {seed}.') writer = SummaryWriter(log_dir=tensorboard_directory) train(config, vectorized_reviews, device, model, loss, optimizer, log, writer)
async def _generate_framework_chunks( path: str, logger: Logger) -> AsyncIterator[InstallRequest]: logger.debug(f"Generating chunks for {path}") async for chunk in tar.generate_tar([path]): yield InstallRequest(payload=Payload(data=chunk)) logger.debug(f"Finished generating chunks {path}")
def run_iris(logger: logging.Logger, iris_config: ConfigParser) -> None: """ Run the main Iris process :param logger: logger for forensics :param iris_config: iris.cfg config file object :return: None """ try: iris_main_settings = iris_config['main_settings'] iris_root_path = iris_main_settings['iris_root_path'] textfile_collector_path = iris_main_settings['textfile_collector_path'] iris_monitor_frequency = iris_main_settings.getfloat( 'iris_monitor_frequency') dev_mode = iris_main_settings.getboolean('dev_mode') logger.info( 'Starting IRIS in {} mode\n'.format('DEV' if dev_mode else 'PROD')) # set path variables log_debug_file_path = os.path.join(iris_root_path, 'iris.debug') log_dir_path = os.path.join(iris_root_path, 'logs') config_service_log_path = os.path.join(log_dir_path, 'config_service.log') scheduler_log_path = os.path.join(log_dir_path, 'scheduler.log') garbage_collector_log_path = os.path.join(log_dir_path, 'garbage_collector.log') aws_credentials_path = os.path.join(iris_root_path, 'aws_credentials') s3_download_to_path = os.path.join(iris_root_path, 'downloads') local_config_file_path = os.path.join(iris_root_path, 'local_config.json') global_config_file_path = os.path.join(s3_download_to_path, 'global_config.json') prom_dir_path = os.path.join(iris_root_path, 'prom_files') # won't make dirs if they already exist os.makedirs(s3_download_to_path, exist_ok=True) os.makedirs(textfile_collector_path, exist_ok=True) if not os.path.isdir(prom_dir_path): logger.info('Creating symlink from {} to {}'.format( textfile_collector_path, prom_dir_path)) os.symlink(textfile_collector_path, prom_dir_path) # Expose Iris version metadata logger.info('Exposing Iris version metadata via prom file') iris_version_settings = { 'iris_version': IRIS_VERSION, 'iris_revision': IRIS_REVISION, 'iris_python_version': IRIS_PYTHON_VERSION, 'iris_build_date': IRIS_BUILD_DATE, } prom_builder = PromStrBuilder( metric_name='iris_build_info', metric_result=1, help_str='This gives us iris build metadata', type_str='gauge', labels=iris_version_settings) prom_string = prom_builder.create_prom_string() prom_file_path = os.path.join(prom_dir_path, '{}.prom'.format('iris_build_info')) prom_writer = PromFileWriter(logger=logger) prom_writer.write_prom_file(prom_file_path, prom_string) # run config_service process logger.info('Starting the Config_Service child process') config_service_settings = iris_config['config_service_settings'] run_config_service_params = { 'aws_creds_path': aws_credentials_path, 's3_region_name': config_service_settings['s3_region_name'], 's3_bucket_env': config_service_settings['s3_bucket_env'], 's3_bucket_name': config_service_settings['s3_bucket_name'], 's3_download_to_path': s3_download_to_path, 'ec2_region_name': config_service_settings['ec2_region_name'], 'ec2_dev_instance_id': config_service_settings['ec2_dev_instance_id'], 'ec2_metadata_url': config_service_settings['ec2_metadata_url'], 'local_config_path': local_config_file_path, 'prom_dir_path': prom_dir_path, 'run_frequency': config_service_settings.getfloat('run_frequency'), 'log_path': config_service_log_path, 'log_debug_path': log_debug_file_path, 'dev_mode': dev_mode } config_service_process = multiprocessing.Process( target=run_config_service, name='config_service', kwargs=run_config_service_params) config_service_process.daemon = True # cleanup config_service child process when main process exits config_service_process.start() # run scheduler process logger.info('Starting the Scheduler child process') scheduler_settings = iris_config['scheduler_settings'] run_scheduler_params = { 'global_config_path': global_config_file_path, 'local_config_path': local_config_file_path, 'prom_dir_path': prom_dir_path, 'run_frequency': scheduler_settings.getfloat('run_frequency'), 'internal_metrics_whitelist': internal_metrics_whitelist, 'log_path': scheduler_log_path, 'log_debug_path': log_debug_file_path, } scheduler_process = multiprocessing.Process( target=run_scheduler, name='scheduler', kwargs=run_scheduler_params) scheduler_process.daemon = True # cleanup scheduler child process when main process exits scheduler_process.start() # run garbage collector process logger.info('Starting the Garbage Collector child process') scheduler_settings = iris_config['garbage_collector_settings'] run_garbage_collector_params = { 'global_config_path': global_config_file_path, 'local_config_path': local_config_file_path, 'prom_dir_path': prom_dir_path, 'run_frequency': scheduler_settings.getfloat('run_frequency'), 'internal_metrics_whitelist': internal_metrics_whitelist, 'log_path': garbage_collector_log_path, 'log_debug_path': log_debug_file_path, } garbage_collector_process = multiprocessing.Process( target=run_garbage_collector, name='garbage_collector', kwargs=run_garbage_collector_params) garbage_collector_process.daemon = True # cleanup scheduler child process when main process exits garbage_collector_process.start() # Indicate the parent is up prom_builder = PromStrBuilder( metric_name='iris_main_up', metric_result=1, help_str='Indicates if the Iris parent process is up', type_str='gauge') prom_string = prom_builder.create_prom_string() prom_file_path = os.path.join(prom_dir_path, 'iris_main.prom') prom_writer = PromFileWriter(logger=logger) prom_writer.write_prom_file(prom_file_path, prom_string) # monitor the child processes (config_service, scheduler, etc.) & write to iris-{service}-up.prom files child_processes = [ ChildProcess(config_service_process, config_service_log_path, log_debug_file_path), ChildProcess(scheduler_process, scheduler_log_path, log_debug_file_path), ChildProcess(garbage_collector_process, garbage_collector_log_path, log_debug_file_path), ] while True: logger.info('Monitoring child services: {}'.format(', '.join( [child.name for child in child_processes]))) for child_process in child_processes: process_name = child_process.name if not child_process.is_alive(): err_msg = 'The {0} ({1}) has failed with exit_code {2}. Check the {0} log' logger.error( err_msg.format(process_name, child_process.pid, child_process.get_exit_code())) if not child_process.already_logged: child_process.log_terminate() child_process.already_logged = True metric_name = 'iris_{}_up'.format(process_name) metric_up_result = int(child_process.is_alive()) prom_builder = PromStrBuilder( metric_name=metric_name, metric_result=metric_up_result, help_str='Indicate if the {} process is still up'.format( process_name), type_str='gauge') prom_string = prom_builder.create_prom_string() prom_file_path = os.path.join( prom_dir_path, 'iris_{}.prom'.format(process_name)) prom_writer = PromFileWriter(logger=logger) prom_writer.write_prom_file(prom_file_path, prom_string) logger.info('Sleeping for {}\n'.format(iris_monitor_frequency)) time.sleep(iris_monitor_frequency) except Exception as e: logger.error(e) # Indicate the parent is down prom_builder = PromStrBuilder( metric_name='iris_main_up', metric_result=0, help_str='Indicates if the Iris parent process is up', type_str='gauge') prom_string = prom_builder.create_prom_string() prom_file_path = os.path.join(prom_dir_path, 'iris_main.prom') prom_writer = PromFileWriter(logger=logger) prom_writer.write_prom_file(prom_file_path, prom_string) raise
def run_mixtures_active_learning(theta_0: float, theta_true: float, theta_bounds: List[float], n_theta: int, initial_idx: List[int], hyperparams: Dict, n_true: int, n_samples_per_theta: int, n_iter: int, ucb_kappas: List[float], ucbm_kappas: List[float], logger: Logger = None) -> Dict[str, NDFrame]: logger = logger or logging.getLogger(__name__) logger.info('Simulating X_true and performing exact param scan') param_grid = ParamGrid(bounds=[theta_bounds], num=n_theta) X_true = triple_mixture(theta_true).sample(n_true).numpy() nllr_exact, mle_exact = exact_param_scan(simulator_func=triple_mixture, X_true=X_true, param_grid=param_grid, theta_0=theta_0, to_meshgrid_shape=False) logger.info('Building active learners') learner_kwargs = dict( simulator_func=triple_mixture, X_true=X_true, theta_true=theta_true, theta_0=theta_0, initial_idx=initial_idx, n_samples_per_theta=n_samples_per_theta, ratio_model=create_model(theta_0=theta_0, hyperparams=hyperparams), total_param_grid=param_grid, ) active_learners = dict(Random=RandomActiveLearner(**learner_kwargs)) for ucb_kappa in ucb_kappas: active_learners[f'UCB_{ucb_kappa}'] = \ UpperConfidenceBoundLearner(kappa=ucb_kappa, **learner_kwargs) for ucbm_kappa in ucbm_kappas: active_learners[f'UCBM_{ucbm_kappa}'] = \ ModifiedUCBLearner(kappa=ucbm_kappa, **learner_kwargs) logger.info('Fitting ActiveLearners.') for name, active_learner in active_learners.items(): logger.info(f'Fitting {name} ActiveLearner.') active_learner.fit(n_iter=n_iter) logger.info('Finished fitting, collecting results.') mle = pd.DataFrame({ learner_name: map(float, learner.mle_predictions) for learner_name, learner in active_learners.items() }) mle['Exact'] = float(mle_exact) trialed_thetas = pd.DataFrame({ learner_name: map(float, learner.trialed_thetas) for learner_name, learner in active_learners.items() }) all_thetas = np.around(param_grid.array.squeeze(), 6) # TODO def _collect_predictions(attr_name): columns = list(range(n_iter + 1)) default = [ np.full((len(all_thetas), ), np.nan) for _ in range(len(columns)) ] dfs = [ pd.DataFrame(data=np.stack(getattr(learner, attr_name, default), axis=1), index=all_thetas, columns=columns) for learner in active_learners.values() ] concat = pd.concat(dfs, axis=0, keys=active_learners.keys(), names=['Learner', 'theta']) concat = concat.reset_index().set_index('theta', drop=True) return concat nllr = _collect_predictions('nllr_predictions') std = _collect_predictions('nllr_std') nllr_exact = pd.DataFrame(data=nllr_exact.squeeze(), columns=['Exact'], index=all_thetas) return dict(mle=mle, trialed_thetas=trialed_thetas, nllr=nllr, std=std, nllr_exact=nllr_exact)
def debug_exc_log(lg: logging.Logger, exc: Exception, msg: str = "Exception in RSS"): if lg.getEffectiveLevel() <= logging.DEBUG: lg.exception(msg, exc_info=exc)
def fetch_production(zone_key='DK-DK1', session=None, target_datetime=None, logger: logging.Logger = logging.getLogger(__name__)): """ Queries "Electricity balance Non-Validated" from energinet api for Danish bidding zones """ r = session or requests.session() if zone_key not in ['DK-DK1', 'DK-DK2']: raise NotImplementedError( 'fetch_production() for {} not implemented'.format(zone_key)) zone = zone_key[-3:] timestamp = arrow.get(target_datetime).strftime('%Y-%m-%d %H:%M') # fetch hourly energy balance from recent hours sqlstr = 'SELECT "HourUTC" as timestamp, "Biomass", "Waste", \ "OtherRenewable", "FossilGas" as gas, "FossilHardCoal" as coal, \ "FossilOil" as oil, "HydroPower" as hydro, \ ("OffshoreWindPower"%2B"OnshoreWindPower") as wind, \ "SolarPower" as solar from "{0}" \ WHERE "PriceArea" = \'{1}\' AND \ "HourUTC" >= (timestamp\'{2}\'-INTERVAL \'24 hours\') AND \ "HourUTC" <= timestamp\'{2}\' \ ORDER BY "HourUTC" ASC'.format(ids['energy_bal'], zone, timestamp) url = 'https://api.energidataservice.dk/datastore_search_sql?sql={}'.format( sqlstr) response = r.get(url) # raise errors for responses with an error or no data retry_count = 0 while response.status_code in [429, 403, 500]: retry_count += 1 if retry_count > 5: raise Exception('Retried too many times..') # Wait and retry logger.warn('Retrying..') time.sleep(5**retry_count) response = r.get(url) if response.status_code != 200: j = response.json() if 'error' in j and 'info' in j['error']: error = j['error']['__type'] text = j['error']['info']['orig'] msg = '"{}" fetching production data for {}: {}'.format( error, zone_key, text) else: msg = 'error while fetching production data for {}: {}'.format( zone_key, json.dumps(j)) raise requests.exceptions.HTTPError(msg) if not response.json()['result']['records']: raise ParserException("DK.py", 'API returned no data', zone_key=zone_key) df = pd.DataFrame(response.json()['result']['records']) # index response dataframe by time df = df.set_index('timestamp') df.index = pd.DatetimeIndex(df.index) # drop empty rows from energy balance df.dropna(how='all', inplace=True) # Divide waste into 55% renewable and 45% non-renewable parts according to # https://ens.dk/sites/ens.dk/files/Statistik/int.reporting_2016.xls (visited Jan 24th, 2019) df['unknown'] = 0.45 * df['Waste'] # Report fossil waste as unknown df['renwaste'] = 0.55 * df['Waste'] # Report biomass, renewable waste and other renewables (biogas etc.) as biomass df['biomass'] = df.filter(['Biomass', 'renwaste', 'OtherRenewable']).sum(axis=1) fuels = ['biomass', 'coal', 'oil', 'gas', 'unknown', 'hydro'] # Format output as a list of dictionaries output = [] for dt in df.index: data = { 'zoneKey': zone_key, 'datetime': None, 'production': { 'biomass': 0, 'coal': 0, 'gas': 0, 'hydro': None, 'nuclear': 0, 'oil': 0, 'solar': None, 'wind': None, 'geothermal': None, 'unknown': 0 }, 'storage': {}, 'source': 'api.energidataservice.dk' } data['datetime'] = dt.to_pydatetime() data['datetime'] = data['datetime'].replace(tzinfo=pytz.utc) for f in ['solar', 'wind'] + fuels: data['production'][f] = df.loc[dt, f] output.append(data) return output
def test( cfg_file, ckpt: str, output_path: str = None, logger: logging.Logger = None ) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) if not logger.handlers: FORMAT = "%(asctime)-15s - %(message)s" logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir) ) batch_size = cfg["training"]["batch_size"] batch_type = cfg["training"].get("batch_type", "sentence") use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] dataset_version = cfg["data"].get("version", "phoenix_2014_trans") translation_max_output_length = cfg["training"].get( "translation_max_output_length", None ) # load the data _, dev_data, test_data, gls_vocab, txt_vocab = load_data(data_cfg=cfg["data"]) # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it do_recognition = cfg["training"].get("recognition_loss_weight", 1.0) > 0.0 do_translation = cfg["training"].get("translation_loss_weight", 1.0) > 0.0 model = build_model( cfg=cfg["model"], gls_vocab=gls_vocab, txt_vocab=txt_vocab, sgn_dim=sum(cfg["data"]["feature_size"]) if isinstance(cfg["data"]["feature_size"], list) else cfg["data"]["feature_size"], features_dim=cfg["data"]["feature_size_cnn"], do_recognition=do_recognition, do_translation=do_translation, ) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # Data Augmentation Parameters frame_subsampling_ratio = cfg["data"].get("frame_subsampling_ratio", None) # Note (Cihan): we are not using 'random_frame_subsampling' and # 'random_frame_masking_ratio' in testing as they are just for training. # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): recognition_beam_sizes = cfg["testing"].get("recognition_beam_sizes", [1]) translation_beam_sizes = cfg["testing"].get("translation_beam_sizes", [1]) translation_beam_alphas = cfg["testing"].get("translation_beam_alphas", [-1]) else: recognition_beam_sizes = [1] translation_beam_sizes = [1] translation_beam_alphas = [-1] if "testing" in cfg.keys(): max_recognition_beam_size = cfg["testing"].get( "max_recognition_beam_size", None ) if max_recognition_beam_size is not None: recognition_beam_sizes = list(range(1, max_recognition_beam_size + 1)) if do_recognition: recognition_loss_function = torch.nn.CTCLoss( blank=model.gls_vocab.stoi[SIL_TOKEN], zero_infinity=True ) if use_cuda: recognition_loss_function.cuda() if do_translation: translation_loss_function = XentLoss( pad_index=txt_vocab.stoi[PAD_TOKEN], smoothing=0.0 ) if use_cuda: translation_loss_function.cuda() # NOTE (Cihan): Currently Hardcoded to be 0 for TensorFlow decoding assert model.gls_vocab.stoi[SIL_TOKEN] == 0 if do_recognition: # Dev Recognition CTC Beam Search Results dev_recognition_results = {} dev_best_wer_score = float("inf") dev_best_recognition_beam_size = 1 for rbw in recognition_beam_sizes: logger.info("-" * 60) valid_start_time = time.time() logger.info("[DEV] partition [RECOGNITION] experiment [BW]: %d", rbw) dev_recognition_results[rbw] = validate_on_data( model=model, data=dev_data, batch_size=batch_size, use_cuda=use_cuda, batch_type=batch_type, dataset_version=dataset_version, sgn_dim=sum(cfg["data"]["feature_size"]) if isinstance(cfg["data"]["feature_size"], list) else cfg["data"]["feature_size"], features_dim=cfg["data"]["feature_size_cnn"], txt_pad_index=txt_vocab.stoi[PAD_TOKEN], # Recognition Parameters do_recognition=do_recognition, recognition_loss_function=recognition_loss_function, recognition_loss_weight=1, recognition_beam_size=rbw, # Translation Parameters do_translation=do_translation, translation_loss_function=translation_loss_function if do_translation else None, translation_loss_weight=1 if do_translation else None, translation_max_output_length=translation_max_output_length if do_translation else None, level=level if do_translation else None, translation_beam_size=1 if do_translation else None, translation_beam_alpha=-1 if do_translation else None, frame_subsampling_ratio=frame_subsampling_ratio, ) logger.info("finished in %.4fs ", time.time() - valid_start_time) if dev_recognition_results[rbw]["valid_scores"]["wer"] < dev_best_wer_score: dev_best_wer_score = dev_recognition_results[rbw]["valid_scores"]["wer"] dev_best_recognition_beam_size = rbw dev_best_recognition_result = dev_recognition_results[rbw] logger.info("*" * 60) logger.info( "[DEV] partition [RECOGNITION] results:\n\t" "New Best CTC Decode Beam Size: %d\n\t" "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)", dev_best_recognition_beam_size, dev_best_recognition_result["valid_scores"]["wer"], dev_best_recognition_result["valid_scores"]["wer_scores"][ "del_rate" ], dev_best_recognition_result["valid_scores"]["wer_scores"][ "ins_rate" ], dev_best_recognition_result["valid_scores"]["wer_scores"][ "sub_rate" ], ) logger.info("*" * 60) if do_translation: logger.info("=" * 60) dev_translation_results = {} dev_best_bleu_score = float("-inf") dev_best_translation_beam_size = 1 dev_best_translation_alpha = 1 for tbw in translation_beam_sizes: dev_translation_results[tbw] = {} for ta in translation_beam_alphas: dev_translation_results[tbw][ta] = validate_on_data( model=model, data=dev_data, batch_size=batch_size, use_cuda=use_cuda, level=level, sgn_dim=sum(cfg["data"]["feature_size"]) if isinstance(cfg["data"]["feature_size"], list) else cfg["data"]["feature_size"], features_dim=cfg["data"]["feature_size_cnn"], batch_type=batch_type, dataset_version=dataset_version, do_recognition=do_recognition, recognition_loss_function=recognition_loss_function if do_recognition else None, recognition_loss_weight=1 if do_recognition else None, recognition_beam_size=1 if do_recognition else None, do_translation=do_translation, translation_loss_function=translation_loss_function, translation_loss_weight=1, translation_max_output_length=translation_max_output_length, txt_pad_index=txt_vocab.stoi[PAD_TOKEN], translation_beam_size=tbw, translation_beam_alpha=ta, frame_subsampling_ratio=frame_subsampling_ratio, ) if ( dev_translation_results[tbw][ta]["valid_scores"]["bleu"] > dev_best_bleu_score ): dev_best_bleu_score = dev_translation_results[tbw][ta][ "valid_scores" ]["bleu"] dev_best_translation_beam_size = tbw dev_best_translation_alpha = ta dev_best_translation_result = dev_translation_results[tbw][ta] logger.info( "[DEV] partition [Translation] results:\n\t" "New Best Translation Beam Size: %d and Alpha: %d\n\t" "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t" "CHRF %.2f\t" "ROUGE %.2f", dev_best_translation_beam_size, dev_best_translation_alpha, dev_best_translation_result["valid_scores"]["bleu"], dev_best_translation_result["valid_scores"]["bleu_scores"][ "bleu1" ], dev_best_translation_result["valid_scores"]["bleu_scores"][ "bleu2" ], dev_best_translation_result["valid_scores"]["bleu_scores"][ "bleu3" ], dev_best_translation_result["valid_scores"]["bleu_scores"][ "bleu4" ], dev_best_translation_result["valid_scores"]["chrf"], dev_best_translation_result["valid_scores"]["rouge"], ) logger.info("-" * 60) logger.info("*" * 60) logger.info( "[DEV] partition [Recognition & Translation] results:\n\t" "Best CTC Decode Beam Size: %d\n\t" "Best Translation Beam Size: %d and Alpha: %d\n\t" "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)\n\t" "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t" "CHRF %.2f\t" "ROUGE %.2f", dev_best_recognition_beam_size if do_recognition else -1, dev_best_translation_beam_size if do_translation else -1, dev_best_translation_alpha if do_translation else -1, dev_best_recognition_result["valid_scores"]["wer"] if do_recognition else -1, dev_best_recognition_result["valid_scores"]["wer_scores"]["del_rate"] if do_recognition else -1, dev_best_recognition_result["valid_scores"]["wer_scores"]["ins_rate"] if do_recognition else -1, dev_best_recognition_result["valid_scores"]["wer_scores"]["sub_rate"] if do_recognition else -1, dev_best_translation_result["valid_scores"]["bleu"] if do_translation else -1, dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu1"] if do_translation else -1, dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu2"] if do_translation else -1, dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu3"] if do_translation else -1, dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu4"] if do_translation else -1, dev_best_translation_result["valid_scores"]["chrf"] if do_translation else -1, dev_best_translation_result["valid_scores"]["rouge"] if do_translation else -1, ) logger.info("*" * 60) test_best_result = validate_on_data( model=model, data=test_data, batch_size=batch_size, use_cuda=use_cuda, batch_type=batch_type, dataset_version=dataset_version, sgn_dim=sum(cfg["data"]["feature_size"]) if isinstance(cfg["data"]["feature_size"], list) else cfg["data"]["feature_size"], features_dim=cfg["data"]["feature_size_cnn"], txt_pad_index=txt_vocab.stoi[PAD_TOKEN], do_recognition=do_recognition, recognition_loss_function=recognition_loss_function if do_recognition else None, recognition_loss_weight=1 if do_recognition else None, recognition_beam_size=dev_best_recognition_beam_size if do_recognition else None, do_translation=do_translation, translation_loss_function=translation_loss_function if do_translation else None, translation_loss_weight=1 if do_translation else None, translation_max_output_length=translation_max_output_length if do_translation else None, level=level if do_translation else None, translation_beam_size=dev_best_translation_beam_size if do_translation else None, translation_beam_alpha=dev_best_translation_alpha if do_translation else None, frame_subsampling_ratio=frame_subsampling_ratio, ) logger.info( "[TEST] partition [Recognition & Translation] results:\n\t" "Best CTC Decode Beam Size: %d\n\t" "Best Translation Beam Size: %d and Alpha: %d\n\t" "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)\n\t" "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t" "CHRF %.2f\t" "ROUGE %.2f", dev_best_recognition_beam_size if do_recognition else -1, dev_best_translation_beam_size if do_translation else -1, dev_best_translation_alpha if do_translation else -1, test_best_result["valid_scores"]["wer"] if do_recognition else -1, test_best_result["valid_scores"]["wer_scores"]["del_rate"] if do_recognition else -1, test_best_result["valid_scores"]["wer_scores"]["ins_rate"] if do_recognition else -1, test_best_result["valid_scores"]["wer_scores"]["sub_rate"] if do_recognition else -1, test_best_result["valid_scores"]["bleu"] if do_translation else -1, test_best_result["valid_scores"]["bleu_scores"]["bleu1"] if do_translation else -1, test_best_result["valid_scores"]["bleu_scores"]["bleu2"] if do_translation else -1, test_best_result["valid_scores"]["bleu_scores"]["bleu3"] if do_translation else -1, test_best_result["valid_scores"]["bleu_scores"]["bleu4"] if do_translation else -1, test_best_result["valid_scores"]["chrf"] if do_translation else -1, test_best_result["valid_scores"]["rouge"] if do_translation else -1, ) logger.info("*" * 60) def _write_to_file(file_path: str, sequence_ids: List[str], hypotheses: List[str]): with open(file_path, mode="w", encoding="utf-8") as out_file: for seq, hyp in zip(sequence_ids, hypotheses): out_file.write(seq + "|" + hyp + "\n") if output_path is not None: if do_recognition: dev_gls_output_path_set = "{}.BW_{:03d}.{}.gls".format( output_path, dev_best_recognition_beam_size, "dev" ) _write_to_file( dev_gls_output_path_set, [s for s in dev_data.sequence], dev_best_recognition_result["gls_hyp"], ) test_gls_output_path_set = "{}.BW_{:03d}.{}.gls".format( output_path, dev_best_recognition_beam_size, "test" ) _write_to_file( test_gls_output_path_set, [s for s in test_data.sequence], test_best_result["gls_hyp"], ) if do_translation: if dev_best_translation_beam_size > -1: dev_txt_output_path_set = "{}.BW_{:02d}.A_{:1d}.{}.txt".format( output_path, dev_best_translation_beam_size, dev_best_translation_alpha, "dev", ) test_txt_output_path_set = "{}.BW_{:02d}.A_{:1d}.{}.txt".format( output_path, dev_best_translation_beam_size, dev_best_translation_alpha, "test", ) else: dev_txt_output_path_set = "{}.BW_{:02d}.{}.txt".format( output_path, dev_best_translation_beam_size, "dev" ) test_txt_output_path_set = "{}.BW_{:02d}.{}.txt".format( output_path, dev_best_translation_beam_size, "test" ) _write_to_file( dev_txt_output_path_set, [s for s in dev_data.sequence], dev_best_translation_result["txt_hyp"], ) _write_to_file( test_txt_output_path_set, [s for s in test_data.sequence], test_best_result["txt_hyp"], ) with open(output_path + ".dev_results.pkl", "wb") as out: pickle.dump( { "recognition_results": dev_recognition_results if do_recognition else None, "translation_results": dev_translation_results if do_translation else None, }, out, ) with open(output_path + ".test_results.pkl", "wb") as out: pickle.dump(test_best_result, out)
def test(cfg_file, ckpt: str, output_path: str = None, save_attention: bool = False, logger: logging.Logger = None) -> None: """ Main test function. Handles loading a model from checkpoint, generating translations and storing them and attention plots. :param cfg_file: path to configuration file :param ckpt: path to checkpoint to load :param output_path: path to output :param save_attention: whether to save the computed attention weights :param logger: log output to this logger (creates new logger if not set) """ if logger is None: logger = logging.getLogger(__name__) FORMAT = '%(asctime)-15s - %(message)s' logging.basicConfig(format=FORMAT) logger.setLevel(level=logging.DEBUG) cfg = load_config(cfg_file) if "test" not in cfg["data"].keys(): raise ValueError("Test data must be specified in config.") # when checkpoint is not specified, take latest (best) from model dir if ckpt is None: model_dir = cfg["training"]["model_dir"] ckpt = get_latest_checkpoint(model_dir) if ckpt is None: raise FileNotFoundError( "No checkpoint found in directory {}.".format(model_dir)) try: step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0] except IndexError: step = "best" batch_size = cfg["training"]["batch_size"] batch_type = cfg["training"].get("batch_type", "sentence") use_cuda = cfg["training"].get("use_cuda", False) level = cfg["data"]["level"] eval_metric = cfg["training"]["eval_metric"] max_output_length = cfg["training"].get("max_output_length", None) # load the data _, dev_data, test_data, src_vocab, trg_vocab = load_data( data_cfg=cfg["data"]) data_to_predict = {"dev": dev_data, "test": test_data} # load model state from disk model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # build model and load parameters into it model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab) model.load_state_dict(model_checkpoint["model_state"]) if use_cuda: model.cuda() # whether to use beam search for decoding, 0: greedy decoding if "testing" in cfg.keys(): beam_size = cfg["testing"].get("beam_size", 0) beam_alpha = cfg["testing"].get("alpha", -1) else: beam_size = 0 beam_alpha = -1 for data_set_name, data_set in data_to_predict.items(): #pylint: disable=unused-variable score, loss, ppl, sources, sources_raw, references, hypotheses, \ hypotheses_raw, attention_scores = validate_on_data( model, data=data_set, batch_size=batch_size, batch_type=batch_type, level=level, max_output_length=max_output_length, eval_metric=eval_metric, use_cuda=use_cuda, loss_function=None, beam_size=beam_size, beam_alpha=beam_alpha) #pylint: enable=unused-variable if "trg" in data_set.fields: decoding_description = "Greedy decoding" if beam_size == 0 else \ "Beam search decoding with beam size = {} and alpha = {}".\ format(beam_size, beam_alpha) logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric, score, decoding_description) else: logger.info("No references given for %s -> no evaluation.", data_set_name) if save_attention: if attention_scores: attention_name = "{}.{}.att".format(data_set_name, step) attention_path = os.path.join(model_dir, attention_name) logger.info( "Saving attention plots. This might take a while..") store_attention_plots(attentions=attention_scores, targets=hypotheses_raw, sources=[s for s in data_set.src], indices=range(len(hypotheses)), output_prefix=attention_path) logger.info("Attention plots saved to: %s", attention_path) else: logger.warning("Attention scores could not be saved. " "Note that attention scores are not available " "when using beam search. " "Set beam_size to 0 for greedy decoding.") if output_path is not None: output_path_set = "{}.{}".format(output_path, data_set_name) with open(output_path_set, mode="w", encoding="utf-8") as out_file: for hyp in hypotheses: out_file.write(hyp + "\n") logger.info("Translations saved to: %s", output_path_set)
def close_sketch_gaps(sketch: adsk.fusion.Sketch, tolerance, logger: logging.Logger): ao = apper.AppObjects() # factor = int(floor(1/tolerance)) bounding_box = sketch.boundingBox min_x = bounding_box.minPoint.x min_y = bounding_box.minPoint.y max_x = bounding_box.maxPoint.x max_y = bounding_box.maxPoint.y factor = int(floor(2000 / ((max_x - min_x) + (max_y - min_y)))) x_range = int(floor(factor * (max_x - min_x))) y_range = int(floor(factor * (max_y - min_y))) trans_x = round(0 - min_x, 6) trans_y = round(0 - min_y, 6) # Debug # str_comp = str(x_range) + ', ' + str(y_range) # ao.ui.messageBox(str_comp) # # str_comp = str(trans_x) + ', ' + str(trans_y) # ao.ui.messageBox(str_comp) grid = [[[] for i in range(x_range + 2)] for j in range(y_range + 2)] str_list = [] constrained_points: int = 0 sketch_point: adsk.fusion.SketchPoint for sketch_point in sketch.sketchPoints: if sketch_point.geometry.z == 0: if bounding_box.contains(sketch_point.worldGeometry): x_pos: int = int(floor(factor * (trans_x + sketch_point.worldGeometry.x))) y_pos: int = int(floor(factor * (trans_y + sketch_point.worldGeometry.y))) point_check_list = grid[y_pos][x_pos] point_merged = False for point_check in point_check_list: if isinstance(point_check, adsk.fusion.SketchPoint): if sketch_point.worldGeometry.distanceTo(point_check.worldGeometry) <= tolerance: try: sketch.geometricConstraints.addCoincident(sketch_point, point_check) constrained_points += 1 point_merged = True except: logger.error(f"Constrain Points Error: {traceback.format_exc(2)}") if not point_merged: grid[y_pos][x_pos].append(sketch_point) grid[y_pos + 1][x_pos].append(sketch_point) grid[y_pos - 1][x_pos].append(sketch_point) grid[y_pos][x_pos + 1].append(sketch_point) grid[y_pos + 1][x_pos + 1].append(sketch_point) grid[y_pos - 1][x_pos + 1].append(sketch_point) grid[y_pos][x_pos - 1].append(sketch_point) grid[y_pos + 1][x_pos - 1].append(sketch_point) grid[y_pos - 1][x_pos - 1].append(sketch_point) str_list.append(str(x_pos) + ', ' + str(y_pos)) # ao.ui.messageBox(str(str_list)) # if merged_points > 0: # ao.ui.messageBox(f"Number of merged points: {merged_points}") if constrained_points > 0: logger.info(f"There were {constrained_points} gaps closed in {sketch.parentComponent.name} - {sketch.name}")
def timer(logger: logging.Logger, prefix: str) -> Iterator[None]: """Timed context manager""" start_time = time.time() yield logger.info(f"{prefix} took {time.time() - start_time:.3f} [s]")
def label_to_proto(logger: Logger, label: str, text: str) -> proto.NerType: if label in ner_mapping.keys(): return ner_mapping[label] else: logger.warn("Unsupported ner label {} for text {}".format(label, text)) return proto.NerType.OTHER
def _get_ngram_stats_df_core(symbol_order: List[str], symbols: SymbolIdDict, trainset: PreparedDataList, valset: PreparedDataList, testset: PreparedDataList, restset: PreparedDataList, n: int, logger: Logger): logger.info(f"Get {n}-grams...") trn_symbols = [ symbols.get_symbols(x.serialized_symbol_ids) for x in trainset.items() ] val_symbols = [ symbols.get_symbols(x.serialized_symbol_ids) for x in valset.items() ] tst_symbols = [ symbols.get_symbols(x.serialized_symbol_ids) for x in testset.items() ] rst_symbols = [ symbols.get_symbols(x.serialized_symbol_ids) for x in restset.items() ] trn_symbols_one_gram = [get_ngrams(x, n=n) for x in trn_symbols] val_symbols_one_gram = [get_ngrams(x, n=n) for x in val_symbols] tst_symbols_one_gram = [get_ngrams(x, n=n) for x in tst_symbols] rst_symbols_one_gram = [get_ngrams(x, n=n) for x in rst_symbols] logger.info("Get stats...") occurences_count_df = get_occ_df_of_all_symbols( symbols=symbol_order, data_trn=trn_symbols_one_gram, data_val=val_symbols_one_gram, data_tst=tst_symbols_one_gram, data_rst=rst_symbols_one_gram, ) occurences_count_df.columns = [ FIRST_COL_NAME, 'TRAIN_OCCURRENCES_COUNT', 'VAL_OCCURRENCES_COUNT', 'TEST_OCCURRENCES_COUNT', 'REST_OCCURRENCES_COUNT', 'TOTAL_OCCURRENCES_COUNT' ] print(occurences_count_df) occurrences_percent_df = get_rel_occ_df_of_all_symbols(occurences_count_df) occurrences_percent_df.columns = [ FIRST_COL_NAME, 'TRAIN_OCCURRENCES_PERCENT', 'VAL_OCCURRENCES_PERCENT', 'TEST_OCCURRENCES_PERCENT', 'REST_OCCURRENCES_PERCENT' ] print(occurrences_percent_df) occurrences_distribution_percent_df = get_dist_among_other_symbols_df_of_all_symbols( occs_df=occurences_count_df, data_trn=trn_symbols_one_gram, data_val=val_symbols_one_gram, data_tst=tst_symbols_one_gram, data_rst=rst_symbols_one_gram, ) occurrences_distribution_percent_df.columns = [ FIRST_COL_NAME, 'TRAIN_OCCURRENCES_DISTRIBUTION_PERCENT', 'VAL_OCCURRENCES_DISTRIBUTION_PERCENT', 'TEST_OCCURRENCES_DISTRIBUTION_PERCENT', 'REST_OCCURRENCES_DISTRIBUTION_PERCENT', 'TOTAL_OCCURRENCES_DISTRIBUTION_PERCENT' ] print(occurrences_distribution_percent_df) utterance_occurrences_count_df = get_utter_occ_df_of_all_symbols( symbols=symbol_order, data_trn=trn_symbols_one_gram, data_val=val_symbols_one_gram, data_tst=tst_symbols_one_gram, data_rst=rst_symbols_one_gram, ) utterance_occurrences_count_df.columns = [ FIRST_COL_NAME, 'TRAIN_UTTERANCE_OCCURRENCES_COUNT', 'VAL_UTTERANCE_OCCURRENCES_COUNT', 'TEST_UTTERANCE_OCCURRENCES_COUNT', 'REST_UTTERANCE_OCCURRENCES_COUNT', 'TOTAL_UTTERANCE_OCCURRENCES_COUNT' ] print(utterance_occurrences_count_df) utterance_occurrences_percent_df = get_rel_utter_occ_df_of_all_symbols( utterance_occurrences_count_df) utterance_occurrences_percent_df.columns = [ FIRST_COL_NAME, 'TRAIN_UTTERANCE_OCCURRENCES_PERCENT', 'VAL_UTTERANCE_OCCURRENCES_PERCENT', 'TEST_UTTERANCE_OCCURRENCES_PERCENT', 'REST_UTTERANCE_OCCURRENCES_PERCENT' ] print(utterance_occurrences_percent_df) uniform_occurrences_count_df = get_uniform_distr_df_for_occs( symbols=symbol_order, occ_df=occurences_count_df, ) uniform_occurrences_count_df.columns = [ FIRST_COL_NAME, 'TRAIN_UNIFORM_OCCURRENCES_COUNT', 'VAL_UNIFORM_OCCURRENCES_COUNT', 'TEST_UNIFORM_OCCURRENCES_COUNT', 'REST_UNIFORM_OCCURRENCES_COUNT', 'TOTAL_UNIFORM_OCCURRENCES_COUNT' ] print(uniform_occurrences_count_df) uniform_occurrences_percent_df = get_rel_uniform_distr_df_for_occs( symbols=symbol_order, ) uniform_occurrences_percent_df.columns = [ FIRST_COL_NAME, 'UNIFORM_OCCURRENCES_PERCENT' ] print(uniform_occurrences_percent_df) return occurences_count_df, occurrences_percent_df, occurrences_distribution_percent_df, utterance_occurrences_count_df, utterance_occurrences_percent_df, uniform_occurrences_count_df, uniform_occurrences_percent_df
async def shutdown( # type: ignore[no-untyped-def] loop: AbstractEventLoop, logger: logging.Logger, teardown: AsyncFunction, signal=None # a named enum of ints ) -> None: '''Cancel active tasks for shutdown''' if signal: logger.info(f'Received exit signal {signal.name}') else: logger.info('Unexpeced shutdown initiated') await asyncio.sleep(5) # stall error loops if teardown: try: await teardown() except Exception: logger.exception('Error during teardown function') logger.error('Exiting uncleanly') sys.exit(1) tasks = [ t for t in asyncio.Task.all_tasks() if t is not asyncio.current_task() ] logger.info(f'Cancelling {len(tasks)} tasks') [task.cancel() for task in tasks] try: await asyncio.gather(*tasks, return_exceptions=True) except Exception: logger.exception('Error during loop task cancellation') logger.error('Exiting uncleanly') sys.exit(1) loop.stop()
def debug_exc_log(lg: logging.Logger, exc: Exception, msg: str = None) -> None: """Logs an exception if logging is set to DEBUG level""" if lg.getEffectiveLevel() <= logging.DEBUG: if msg is None: msg = f"{exc}" lg.exception(msg, exc_info=exc)
def wrapper(*args, **kwargs): source = args[0].get('source') if (source == 'serverless-plugin-warmup'): Logger.info('WarmUp - Lambda is warm!') return {} return func(*args, **kwargs)
async def _generate_app_chunks( app_path: str, logger: Logger) -> AsyncIterator[InstallRequest]: logger.debug(f"Generating chunks for .app {app_path}") async for chunk in tar.generate_tar([app_path]): yield InstallRequest(payload=Payload(data=chunk)) logger.debug(f"Finished generating .app chunks {app_path}")
def __init__(self, item: str, log: logging.Logger, datatracker: str): self.name = basename(item) self.revision = revision(item) self.path = path(item) with tempfile.TemporaryDirectory() as tmp: current_directory = os.getcwd() log.debug("tmp dir %s", tmp) self.orig = "" if item != "/dev/stdin": os.chdir(tmp) orig_item = os.path.basename(item) get_items([orig_item], log, datatracker) self.orig = read(orig_item, log) os.chdir(current_directory) self.current = read(item, log) if not self.orig: log.error( "No original for %s, cannot review, " "only performing checks", item, ) self.orig_lines = self.orig.splitlines(keepends=True) self.current_lines = self.current.splitlines(keepends=True) # difflib can't deal with single lines it seems if len(self.orig_lines) == 1: self.orig_lines.append("\n") if len(self.current_lines) == 1: self.current_lines.append("\n") # set status status = re.search( r"^(?:[Ii]ntended )?[Ss]tatus:\s*((?:\w+\s)+)", self.orig, re.MULTILINE, ) self.status = status.group(1).strip() if status else "" # extract relationships self.relationships = {} rel_pat = {"updates": r"[Uu]pdates", "obsoletes": r"[Oo]bsoletes"} for rel in ["updates", "obsoletes"]: match = re.search( r"^" + rel_pat[rel] + r":\s*((?:(?:RFC\s*)?\d{3,},?\s*)+)" + r"(?:.*[\n\r\s]+((?:(?:RFC\s*)?\d{3,},?\s*)+)?)?", self.orig, re.MULTILINE, ) if match: tmp = "".join([group for group in match.groups() if group]) tmp = re.sub("rfc", "", tmp, flags=re.IGNORECASE) tmp = re.sub(r"[,\s]+(\w)", r",\1", tmp) self.relationships[rel] = [ r for r in tmp.strip().split(",") if r ] in_abstract = False abstract = "" for line in self.orig_lines: pot_sec = SECTION_PATTERN.search(line) if pot_sec: which = pot_sec.group(0) if re.search(r"^Abstract", which): in_abstract = True continue if abstract: break if in_abstract: abstract += line self.abstract = unfold(abstract).strip() self.meta = fetch_meta(datatracker, self.name, log) self.is_id = self.name.startswith("draft-") parts = {"text": "", "informative": "", "normative": ""} part = "text" for line in self.orig_lines: pot_sec = SECTION_PATTERN.search(line) if pot_sec: which = pot_sec.group(0) if re.search( r"^(?:(\d\.?)+\s+)?(?:Non-Norm|Inform)ative\s+References?\s*$", which, flags=re.IGNORECASE, ): part = "informative" elif re.search( r"^(?:(\d\.?)+\s+)?(Normative\s+)?References?\s*$", which, flags=re.IGNORECASE, ): part = "normative" else: part = "text" parts[part] += line refs = {} for part, content in parts.items(): refs[part] = re.findall( r"(\[(?:\d+|[a-z]+(?:[-_.]?\w+)*)\]" + (r"|RFC\d+|draft-[-a-z\d_.]+" if part == "text" else r"") + r")", unfold(content), flags=re.IGNORECASE, ) refs[part] = list({f"[{untag(ref)}]" for ref in refs[part]}) self.references = {} for part in ["informative", "normative"]: self.references[part] = [] for ref in refs[part]: ref_match = re.search( r"\s*" + re.escape(ref) + r"\s+((?:[^\n][\n]?)+)\n", parts[part], re.DOTALL, ) if ref_match: ref_text = unfold(ref_match.group(0)) found = False for pat in [r"(draft-[-a-z\d_.]+)", r"((?:RFC|rfc)\d+)"]: match = re.search(pat, ref_text) if match: found = True self.references[part].append( (ref, match.group(0).lower())) break if not found: urls = extract_urls(ref_text, log, True, True) self.references[part].append( (ref, urls.pop() if urls else None)) self.references["text"] = refs["text"]
def __init__(self, logger: logging.Logger) -> None: self._logger = logger.getChild("job_manager") super(BackgroundJobManager, self).__init__()
def generate_n_single_target_tree_rules( n_tree_rules_to_generate: int, prepared_data: PreparedDataForTargetSet, encoding_book_keeper: EncodingBookKeeper, min_support: float, max_depth: int, logger: Logger, random_forest_abs_file_name: str, seed: Optional[int] = None, ) -> Tuple[List[MCAR], TreeRuleGenTimingInfo]: if seed is None: raise Exception() if n_tree_rules_to_generate <= 0: raise Exception( f"n_tree_rules_to_generate = {n_tree_rules_to_generate} but should be larger than 0" ) logger.info( f'Start generating tree rules... Goal number: {n_tree_rules_to_generate}' ) # nb_of_trees_to_use: int = 1 # nb_of_tree_based_rules_after_conversion: int = 0 # current_rf_list: Optional[List[Tuple[PreparedDataForTargetSet, RandomForestClassifier]]] = None # prepared_data_list: List[PreparedDataForTargetSet] = [] # for original_target_attribute_groups in attr_group_partitioning_list: # attr_group: AttrGroup # for attr_group in original_target_attribute_groups: # prepared_data: PreparedDataForTargetSet = get_prepared_data_for_attr_group( # original_group_to_predict=attr_group, # df_original=df_original, # df_one_hot_encoded=df_one_hot_encoded, # encoding_book_keeper=encoding_book_keeper # ) # prepared_data_list.append(prepared_data) optional_rf_classifier: Optional[RandomForestClassifier] total_time_decision_tree_learning_s: TimeDiffSec optional_rf_classifier, total_time_decision_tree_learning_s \ = search_nb_of_single_target_trees_to_use( n_tree_rules_to_generate=n_tree_rules_to_generate, prepared_data=prepared_data, min_support=min_support, max_depth=max_depth, logger=logger, seed=seed ) # ----------------------------------------------------------------------------------------------------------- if optional_rf_classifier is None: raise Exception() else: logger.info( f'Learned RF has {len(optional_rf_classifier.estimators_)} trees') tree_based_rules: List[MCAR] total_time_rf_conversion_s: TimeDiffSec tree_based_rules, total_time_rf_conversion_s = convert_random_forest_to_rules( random_forest_clf=optional_rf_classifier, df_original_without_nans=prepared_data. df_original_without_nans_for_targets, descriptive_one_hot_encoded_column_names=prepared_data. descriptive_one_hot_encoded_columns, target_attribute_names=prepared_data. target_one_hot_encoded_columns, encoding_book_keeper=encoding_book_keeper, logger=logger) store_classifier(SingleTargetClassifierIndicator.random_forest, random_forest_abs_file_name, optional_rf_classifier) logger.info(f"Wrote RF to {random_forest_abs_file_name}") if len(tree_based_rules) > n_tree_rules_to_generate: tree_based_rules = random.sample(tree_based_rules, n_tree_rules_to_generate) logger.info( f"REALITY: found {len(tree_based_rules)} tree based rules, wanted {n_tree_rules_to_generate}" ) for i in range(0, len(tree_based_rules)): logger.info(str(tree_based_rules[i])) if i > 10: break tree_rule_gen_timing_info = TreeRuleGenTimingInfo( total_time_decision_tree_learning_s= total_time_decision_tree_learning_s, total_time_rf_conversion_s=total_time_rf_conversion_s) return tree_based_rules, tree_rule_gen_timing_info
def __init__(self, name: str, token: str, parent_logger: logging.Logger) -> None: self.log = parent_logger.getChild(name) self.token = token
async def call_kubeapi( method: Callable[..., Awaitable], logger: logging.Logger, *, continue_on_absence=False, continue_on_conflict=False, namespace: str = None, body: K8sModel = None, **kwargs, ) -> Optional[Awaitable[K8sModel]]: """ Await a Kubernetes API method and return its result. If the API fails with an HTTP 404 NOT FOUND error and ``continue_on_absence`` is set to ``True`` a warning is raised and ``call_kubeapi`` returns ``None``. If the API fails with an HTTP 409 CONFLICT error and ``continue_on_conflict`` is set to ``True`` a warning is raised and ``call_kubeapi`` returns ``None``. In case of any other error or when either option is set to ``False`` (default) the :exc:`kubernetes_asyncio.client.exceptions.ApiException` is re-raised. :param method: A Kubernetes API function which will be called with ``namespace`` and ``body``, if provided, and all other ``kwargs``. The function will also be awaited and the response returned. :param logger: :param continue_on_absence: When ``True``, emit a warning instead of an error on HTTP 404 responses. :param continue_on_conflict: When ``True``, emit a warning instead of an error on HTTP 409 responses. :param namespace: The namespace passed to namespaced K8s API endpoints. :param body: The body passed to the K8s API endpoints. """ try: if namespace is not None: kwargs["namespace"] = namespace if body is not None: kwargs["body"] = body return await method(**kwargs) except ApiException as e: if (e.status == 409 and continue_on_conflict or e.status == 404 and continue_on_absence): msg = ["Failed", "creating" if e.status == 409 else "deleting"] args = [] if body: if e.status == 409: # For 404 the body is `V1DeleteOptions`; not very helpful. msg.append("%s") args.append(body.__class__.__name__) if namespace: obj_name = None if e.status == 404: # Let's try the explicit name obj_name = kwargs.get("name") if obj_name is None: obj_name = getattr(getattr(body, "metadata", None), "name", "<unknown>") msg.append("'%s/%s'") args.extend([namespace, obj_name]) cause = "already exists" if e.status == 409 else "doesn't exist" msg.append(f"because it {cause}. Continuing.") logger.info(" ".join(msg), *args) return None else: raise
import os import platform import tarfile import tempfile from io import BytesIO, FileIO from logging import Logger from shutil import rmtree from zipfile import ZipFile import requests logger = Logger('KINDLEGEN') WINDOWS_URL = 'http://kindlegen.s3.amazonaws.com/kindlegen_win32_v2_9.zip' MACOS_URL = 'http://kindlegen.s3.amazonaws.com/KindleGen_Mac_i386_v2_9.zip' LINUX_URL = 'http://kindlegen.s3.amazonaws.com/kindlegen_linux_2.6_i386_v2_9.tar.gz' def get_url_by_platform(): if platform.system() == 'Linux': return LINUX_URL elif platform.system() == 'Darwin': return MACOS_URL elif platform.system() == 'Windows': return WINDOWS_URL else: raise Exception('Unrecognized platform') # end if # end def
def analyze_files(filepath: str, exclude: list, lazy: bool, logger: logging.Logger) -> list: """Analyze files for vulnerabilities. Parameters ---------- filepath : str Path where to find the file(s) exclude : list List of regular expressions to exclude in `path` lazy : bool Ignore mutually exclusive paths through methods logger : logging.Logger The logger to use for runtime output Returns ------- list List of all the analyses """ analyses = [] rulesets = dict() # Detect files for filename in find_files(filepath, exclude=exclude): logger.info('Now processing "%s".', filename) input_file = InputFile(filename) # Prepare file for analysis input_file.detect_filetype() try: grammar_module = importlib.import_module( f'modules.{input_file.module}.grammar') grammar = grammar_module.Grammar(input_file) logger.info('Starting analysis for "%s".', input_file.path) ruleset = rulesets.get(input_file.module) if not ruleset: # Load new ruleset rulesets[input_file.module] = Ruleset(input_file.module) ruleset = rulesets[input_file.module] analysis = Analysis(grammar, ruleset) for method in analysis.methods: # Analyze method analysis.calculate_complexity(method) analysis.follow_variables(method) analysis.fix_object_names(method) all_sources = analysis.find_sources(method) all_sinks = analysis.find_sinks(method) all_sanitizers = analysis.find_sanitizers(method) if not lazy: analysis.find_paths_through(method) else: # Assume single path through method, ignore mutually exclusive paths method.paths = [[(method.start, method.end)]] for path in method.paths: # Analyze individual paths through the method method.sources = copy(all_sources) method.sinks = copy(all_sinks) method.sanitizers = copy(all_sanitizers) analysis.find_taints(method, path) if len(method.paths) > 1: # We use multiple paths to better detect taints, but we still need all the # sinks, so another round through the whole method is necessary here method.sources = all_sources method.sinks = all_sinks method.sanitizers = all_sanitizers taints = method.taints analysis.find_taints(method, [(method.start, method.end)]) method.taints = taints analyses.append(analysis) except ModuleNotFoundError: logger.error('No grammar found for "%s".', input_file.module) return analyses
def run_check_with_model(model_with_type_info: onnx.ModelProto, mobile_pkg_build_config: pathlib.Path, logger: logging.Logger): ''' Check if an ONNX model can be used with the ORT Mobile pre-built package. :param model_with_type_info: ONNX model that has had ONNX shape inferencing run on to add type/shape information. :param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package. :param logger: Logger for output :return: True if supported ''' if not mobile_pkg_build_config: mobile_pkg_build_config = get_default_config_path() enable_type_reduction = True config_path = str(mobile_pkg_build_config.resolve(strict=True)) required_ops, op_type_impl_filter = parse_config(config_path, enable_type_reduction) global_onnx_tensorproto_types, special_types = _get_global_tensorproto_types( op_type_impl_filter, logger) # get the opset imports opsets = get_opsets_imported(model_with_type_info) # If the ONNX opset of the model is not supported we can recommend using our tools to update that first. supported_onnx_opsets = set(required_ops['ai.onnx'].keys()) # we have a contrib op that is erroneously in the ai.onnx domain with opset 1. manually remove that incorrect value supported_onnx_opsets.remove(1) onnx_opset_model_uses = opsets['ai.onnx'] if onnx_opset_model_uses not in supported_onnx_opsets: logger.info(f'Model uses ONNX opset {onnx_opset_model_uses}.') logger.info( f'The pre-built package only supports ONNX opsets {sorted(supported_onnx_opsets)}.' ) logger.info( 'Please try updating the ONNX model opset to a supported version using ' 'python -m onnxruntime.tools.onnx_model_utils.update_onnx_opset ...' ) return False unsupported_ops = set() logger.debug( 'Checking if the data types and operators used in the model are supported ' 'in the pre-built ORT package...') unsupported = check_graph(model_with_type_info.graph, opsets, required_ops, global_onnx_tensorproto_types, special_types, unsupported_ops, logger) if unsupported_ops: logger.info('Unsupported operators:') for entry in sorted(unsupported_ops): logger.info(' ' + entry) if unsupported: logger.info( '\nModel is not supported by the pre-built package due to unsupported types and/or operators.' ) logger.info( 'Please see https://onnxruntime.ai/docs/reference/mobile/prebuilt-package/ for information ' 'on what is supported in the pre-built package.') logger.info( 'A custom build of ONNX Runtime will be required to run the model. Please see ' 'https://onnxruntime.ai/docs/build/custom.html for details on performing that.' ) else: logger.info('Model should work with the pre-built package.') logger.info('---------------\n') return not unsupported
def trinity_boot(args: Namespace, trinity_config: TrinityConfig, extra_kwargs: Dict[str, Any], plugin_manager: PluginManager, listener: logging.handlers.QueueListener, event_bus: EventBus, main_endpoint: Endpoint, logger: logging.Logger) -> None: # start the listener thread to handle logs produced by other processes in # the local logger. listener.start() networking_endpoint = event_bus.create_endpoint( NETWORKING_EVENTBUS_ENDPOINT) event_bus.start() # First initialize the database process. database_server_process = ctx.Process( target=run_database_process, args=( trinity_config, LevelDB, ), kwargs=extra_kwargs, ) networking_process = ctx.Process( target=launch_node, args=( args, trinity_config, networking_endpoint, ), kwargs=extra_kwargs, ) # start the processes database_server_process.start() logger.info("Started DB server process (pid=%d)", database_server_process.pid) # networking process needs the IPC socket file provided by the database process try: wait_for_ipc(trinity_config.database_ipc_path) except TimeoutError as e: logger.error("Timeout waiting for database to start. Exiting...") kill_process_gracefully(database_server_process, logger) ArgumentParser().error(message="Timed out waiting for database start") networking_process.start() logger.info("Started networking process (pid=%d)", networking_process.pid) main_endpoint.subscribe( ShutdownRequest, lambda ev: kill_trinity_gracefully( logger, database_server_process, networking_process, plugin_manager, main_endpoint, event_bus, ev.reason)) plugin_manager.prepare(args, trinity_config, extra_kwargs) try: loop = asyncio.get_event_loop() loop.run_forever() loop.close() except KeyboardInterrupt: kill_trinity_gracefully(logger, database_server_process, networking_process, plugin_manager, main_endpoint, event_bus, reason="CTRL+C / Keyboard Interrupt")