def get_extracted_files(dst_path, logger: logging.Logger):
    logger.info('Getting list of already extracted files...')

    published_files = [f for f in os.listdir(dst_path) if f.endswith('.jpg')]
    dup_files = [f for f in os.listdir(dst_path / 'dups/')if f.endswith('.jpg')]

    return published_files + dup_files
Example #2
0
    def repair(
        self, log: logging.Logger, overlay: overlay_mod.Overlay, fsck_dir: Path
    ) -> bool:
        # TODO: It would be nice to try and get the contents of the
        # file/directory at this location in the current commit, rather than
        # just writing out an empty file or directory

        backup_dir = fsck_dir / "broken_inodes"
        backup_dir.mkdir(exist_ok=True)
        inode_data_path = Path(overlay.get_path(self.inode.inode_number))
        inode_backup_path = backup_dir / str(self.inode.inode_number)

        if self.expected_type == InodeType.DIR:
            log.info(
                f"replacing corrupt directory inode {self.compute_path()!r} with an "
                "empty directory"
            )
            os.rename(inode_data_path, inode_backup_path)
            overlay.write_empty_dir(self.inode.inode_number)
        else:
            log.info(
                f"replacing corrupt file inode {self.compute_path()!r} with an "
                "empty file"
            )
            os.rename(inode_data_path, inode_backup_path)
            overlay.write_empty_file(self.inode.inode_number)

        return True
Example #3
0
def make_middleware(app,
                    global_conf,
                    verbose_log=None,
                    trace_log=None,
                    max_bodylen='3KB',
                    max_logsize='100MB',
                    backup_count='10',
                    keep='100',
                    ):
    """ Paste filter-app converter """
    backup_count = int(backup_count)
    max_bytes = byte_size(max_logsize)
    max_bodylen = byte_size(max_bodylen)
    keep = int(keep)
    from logging import Logger
    from logging.handlers import RotatingFileHandler

    if verbose_log:
        handler = RotatingFileHandler(verbose_log, maxBytes=max_bytes,
                                      backupCount=backup_count)
        verbose_log = Logger('repoze.debug.verboselogger')
        verbose_log.handlers = [handler]

    if trace_log:
        handler = RotatingFileHandler(trace_log, maxBytes=max_bytes,
                                      backupCount=backup_count)
        trace_log = Logger('repoze.debug.tracelogger')
        trace_log.handlers = [handler]

    return ResponseLoggingMiddleware(app, max_bodylen, keep, verbose_log,
                                     trace_log)
Example #4
0
    def configure(self, logger: logging.Logger, verbosity: int = 0) -> int:
        """
        Add all configured handlers to the supplied logger. If verbosity > 0 then make sure we have a console logger
        and force the level of the console logger based on the verbosity.

        :param logger: The logger to add the handlers to
        :param verbosity: The verbosity level given as command line argument
        :return: The lowest log level that is going to be handled
        """
        # Remove any previously configured loggers, in case we are re-configuring
        # We are deleting, so copy the list first
        for handler in list(logger.handlers):
            logger.removeHandler(handler)

        # Add the handlers, keeping track of console loggers and saving the one with the "best" level.
        console = None
        for handler_factory in self.handlers:
            handler = handler_factory()
            logger.addHandler(handler)

            if isinstance(handler_factory, ConsoleHandlerFactory):
                console = handler

        # Set according to verbosity
        set_verbosity_logger(logger, verbosity, console)

        # Find the lowest log level
        lowest_level = logging.CRITICAL
        for handler in logger.handlers:
            if handler.level < lowest_level:
                lowest_level = handler.level

        # Return the lowest log level we want, so that we can filter lower priority messages earlier (where appropriate)
        return lowest_level
Example #5
0
def test():
    logfile = 'log.log'

    log = Logger(logfile)
    log.write_log('', False)

    os.remove(logfile)
Example #6
0
def make_file_logger(logfile, maxBytes=int(1e7), backupCount=10):
    """Create a logger that mimics the format of Products.LongRequestLogger"""
    if isinstance(logfile, Logger):
        # The Logger is already set up.
        return logfile

    logger = Logger('slowlog')

    if isinstance(logfile, Handler):
        # The Handler is already set up.
        handler = logfile
    else:
        if hasattr(logfile, 'write'):
            # Write to an open file.
            handler = StreamHandler(logfile)
        else:
            # Create a rotating file handler.
            handler = RotatingFileHandler(logfile,
                                          maxBytes=maxBytes,
                                          backupCount=backupCount)
        fmt = Formatter('%(asctime)s - %(message)s')
        handler.setFormatter(fmt)

    logger.addHandler(handler)
    return logger
Example #7
0
 def post(self):
     try:
         name = cgi.escape(self.request.get('name'))
         description = cgi.escape(self.request.get('description'))
         url = cgi.escape(self.request.get('url'))
         seller = cgi.escape(self.request.get('seller'))
     except:
         log.exception( "fail to get data from form" )
         print( "fail to get data from form" )
     
     seller = Seller.get( seller )
     
     if seller:
         turl = URL()
         turl.url = url
         turl.put()
         
         target = Target()
         target.name = name
         target.description = description
         target.url = turl
         target.seller = seller
         target.put()
     else:
         print( "no seller with specified name" )
Example #8
0
class WhiteWolfTextParser(object):
    """Actual Parser for the WW cardlist text file(s)."""

    def __init__(self, oLogHandler):
        self.oLogger = Logger('White wolf card parser')
        if oLogHandler is not None:
            self.oLogger.addHandler(oLogHandler)
        self._oState = None
        self.reset()

    def reset(self):
        """Reset the parser"""
        self._oState = WaitingForCardName({}, self.oLogger)

    def parse(self, fIn):
        """Feed lines to the state machine"""
        for sLine in fIn:
            self.feed(sLine)
        # Ensure we flush any open card text states
        self.feed('')
        if hasattr(self._oState, 'flush'):
            self._oState.flush()
        else:
            raise IOError('Failed to parse card list - '
                    'unexpected state at end of file.\n'
                    'Card list probably truncated.')

    def feed(self, sLine):
        """Feed the line to the current state"""
        # Strip BOM from line start
        sLine = sLine.decode('utf8').lstrip(u'\ufeff')
        self._oState = self._oState.transition(sLine, None)
Example #9
0
class Spy(MailService):
    """
    Шпион, который логгирует о всей почтовой переписке, которая проходит через его руки.
    Он следит только за объектами класса MailMessage и пишет в логгер следующие сообщения
    Если в качестве отправителя или получателя указан "Austin Powers":
    то нужно написать в лог сообщение с уровнем WARN:
    Detected target mail correspondence: from {from} to {to} "{message}"
    Иначе, необходимо написать в лог сообщение с уровнем INFO:
    Usual correspondence: from {from} to {to}
    """

    def __init__(self):
        self.__logger = Logger("logger")

    def process_mail(self, mail: Mail):
        if isinstance(mail, MailMessage):
            source = mail.get_source()
            destination = mail.get_destination()
            message = mail.get_message()
            if source == banned_address or destination == banned_address:
                self.__logger.warning('Detected target mail correspondence: from {0} to {1} "{2}"'.
                                      format(source, destination, message))
            else:
                self.__logger.info('Usual correspondence: from {0} to {1}'.format(source, destination))
        return mail
Example #10
0
File: log.py Project: croft/ravel
 def __init__(self):
     Logger.__init__(self, "ravel")
     ch = logging.StreamHandler()
     formatter = logging.Formatter(MSG_FORMAT)
     ch.setFormatter(formatter)
     self.addHandler(ch)
     self.setLogLevel()
Example #11
0
    def __init__(self,name):
        '''
        new constructor already setting up the different handlers and formatters
        '''
        Logger.__init__(self,name)

        #Flag whether a file log should be created
        fileLog = True

        #Checking whether there is a folder for logging
        if not os.path.isdir(os.path.dirname(logFile)):
            fileLog = False

        #If our log file exists we delete it to have a new log file for every run
        if os.path.isfile(logFile) and fileLog:
            try:
                os.remove(logFile)
            except:
                pass

        if fileLog:
            self.setFileHandler()
            self.addHandler(zeroLogger._FileHandle)

        self.setStreamHandler()
        self.addHandler(zeroLogger._StreamHandle)
Example #12
0
def spinner(text: str, logger: Logger, quiet=False, debug=False):
    '''Decoration for long running processes.

    :param text: Message to output
    :param logger: Logger to capture the error if it occurs
    :param quiet: If ``True``, messages will be hidden
    :param debug: If ``True``, show full tracebacks
    '''

    # pylint: disable=broad-except

    try:
        logger.info(text)

        if not quiet:
            print(text)

        yield

        if not quiet:
            print('Done\n')

    except Exception as exception:
        exception_traceback = format_exc()

        logger.error(exception_traceback)

        if not quiet:
            if debug:
                print(exception_traceback)

            else:
                print(str(exception))
    def run(self):
        global logger
        global totalFetchTime
        global totalRequestsCompleted
        while True:
            #grabs host from queue
            host = self.queue.get()
            threadId = threading.current_thread
            
            #grabs urls of urls and prints first 1024 bytes of page
            beginTime = time.time()
            url = urllib2.urlopen(host)
            x = url.read(100000)
            if (not x):
                Logger.warn(logger, "[%s] No data for %s" % (threadId, host))
            endTime = time.time()

            elapsedTime = (endTime - beginTime)
             
            Logger.info(logger, "Request for %s executed in %s" % (host, elapsedTime))
            
            #signals to queue job is done
            totalRequestsCompleted += 1
            totalFetchTime += elapsedTime
            self.queue.task_done()
def extract_files(src_path, extracted_files, logger:logging.Logger):
    logger.info('Extracting new files...')

    list_files = os.listdir(src_path)
    list_land = []
    list_port = []
    for filename in list_files:
        if already_extracted(filename, extracted_files, logger):
            continue

        src_file = src_path + filename
        # check if it is image or not
        try:
            im = Image.open(src_file)
        except OSError:
            continue

        x, y = im.size
        im.close()
        if x == 1920 and y == 1080:
            list_land += [filename]
        if x == 1080 and y == 1920:
            list_port += [filename]

    return list_land, list_port
Example #15
0
def is_satisfied(requirement: Requirement, logger: logging.Logger) -> bool:
    try:
        requirement.check()
        logger.debug("Requirement '%s' satisfied", requirement.description)
        return True
    except Exception as e:
        logger.error("Requirement '%s' not satisfied: %s", requirement.description, e)
        return False
def check_os(logger: logging.Logger):
    logger.info('Checking Windows 10...')

    err_msg = 'This system is not Windows 10. Exit.'
    if sys.platform != 'win32':
        raise OSError(err_msg)
    if platform.release() != '10':
        raise OSError(err_msg)
Example #17
0
    def __init__(self, name, level=0):
        Logger.__init__(self, name, level)
        self.formatter = self.format

        handler = StreamHandler()
        handler.setFormatter(self.formatter)

        self.addHandler(handler)
Example #18
0
    def __init__(self, name, level):
        Logger.__init__(self, name, level)
        self.formatter = self.format

        self.handler = PMLogHandler()
        self.handler.setFormatter(self.formatter)

        self.addHandler(self.handler)
Example #19
0
def copy_database(oOrigConn, oDestConnn, oLogHandler=None):
    """Copy the database, with no attempts to upgrade.

       This is a straight copy, with no provision for funky stuff
       Compatability of database structures is assumed, but not checked.
       """
    # Not checking versions probably should be fixed
    # Copy tables needed before we can copy AbstractCard
    flush_cache()
    oVer = DatabaseVersion()
    oVer.expire_cache()
    oLogger = Logger('copy DB')
    if oLogHandler:
        oLogger.addHandler(oLogHandler)
        if hasattr(oLogHandler, 'set_total'):
            iTotal = 14 + AbstractCard.select(connection=oOrigConn).count() + \
                    PhysicalCard.select(connection=oOrigConn).count() + \
                    PhysicalCardSet.select(connection=oOrigConn).count()
            oLogHandler.set_total(iTotal)
    bRes = True
    aMessages = []
    oTrans = oDestConnn.transaction()
    aToCopy = [
            (copy_rarity, 'Rarity table', False),
            (copy_expansion, 'Expansion table', False),
            (copy_discipline, 'Discipline table', False),
            (copy_clan, 'Clan table', False),
            (copy_creed, 'Creed table', False),
            (copy_virtue, 'Virtue table', False),
            (copy_card_type, 'CardType table', False),
            (copy_ruling, 'Ruling table', False),
            (copy_discipline_pair, 'DisciplinePair table', False),
            (copy_rarity_pair, 'RarityPair table', False),
            (copy_sect, 'Sect table', False),
            (copy_title, 'Title table', False),
            (copy_artist, 'Artist table', False),
            (copy_keyword, 'Keyword table', False),
            (copy_abstract_card, 'AbstractCard table', True),
            (copy_physical_card, 'PhysicalCard table', True),
            (copy_physical_card_set, 'PhysicalCardSet table', True),
            ]
    for fCopy, sName, bPassLogger in aToCopy:
        try:
            if bRes:
                if bPassLogger:
                    fCopy(oOrigConn, oTrans, oLogger)
                else:
                    fCopy(oOrigConn, oTrans)
        except SQLObjectNotFound, oExp:
            bRes = False
            aMessages.append('Unable to copy %s: Aborting with error: %s'
                    % (sName, oExp))
        else:
            oTrans.commit()
            oTrans.cache.clear()
            if not bPassLogger:
                oLogger.info('%s copied' % sName)
Example #20
0
def log_to_file(logger: Logger,
                filename: str,
                log_format: str="%(asctime)s %(levelname)-8s %(message)s",
                ) -> None:
    """Note: `filename` should be declared in zproject/settings.py in ZULIP_PATHS."""
    formatter = logging.Formatter(log_format)
    handler = logging.FileHandler(filename)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
Example #21
0
 def __init__(self, name: str, logger: Logger, ttl: int, config: IdPConfig, lock = None):
     self.logger = logger
     self._cache: ExpiringCache
     if (config.redis_sentinel_hosts or config.redis_host) and config.session_app_key:
         self._cache = ExpiringCacheCommonSession(name, logger, ttl, config, secret=config.session_app_key)
     else:
         # This is used in tests
         self._cache = ExpiringCacheMem(name, logger, ttl, lock)
     logger.debug('Set up IDP ticket cache {!s}'.format(self._cache))
Example #22
0
class CAENRFIDEventArgs:
	'''This class defines the CAENRFID event arguments.'''
	def __init__(self):
		self._log = Logger("CAENRFIDEventArgs")
		self._log.debug( "Class %s created", self.__class__.__name__ )		

	def getData(self):
		'''Returns the event object value.'''
		raise Exception("Not implemented yet!")
def write_combos():
    logger = Logger('name',20)
    handler = FileHandler('flog.log')
    logger.addHandler(handler)
    with open('namelist.txt','a') as fileobject:
        llist = ("{} {}".format(x,y) for x in names(0, 'names.txt') for y in names(1, 'names.txt'))
        for name in llist:
            if len(name) > 17:
                logger.info('{} is {} characters long'.format(name, len(name)))
            fileobject.write('{}\n'.format(name))
def configure_logger_for_colour(log: logging.Logger,
                                remove_existing: bool = True) -> None:
    """
    Applies a preconfigured datetime/colour scheme to a logger.
    Should ONLY be called from the "if __name__ == 'main'" script:
        https://docs.python.org/3.4/howto/logging.html#library-config
    """
    if remove_existing:
        log.handlers = []  # http://stackoverflow.com/questions/7484454
    log.addHandler(COLOUR_HANDLER)
Example #25
0
    def __init__(self):
        Logger.__init__(self, "OVS-CONSOLE")
        # Create console handler
        console = logging.StreamHandler()

        # Add console handler to logging handler
        self.addHandler(console)

        # Setting Logging LEVEL
        self.setLevel(LEVEL)
Example #26
0
    def __init__(self, name, level=0):
        if level == 0:
            level = option_parser.get_verbose()
        Logger.__init__(self, name, level)
        self.formatter = self.format

        handler = StreamHandler()
        handler.setFormatter(self.formatter)

        self.addHandler(handler)
Example #27
0
def log_error(
    error: GraphQLError,
    logger: logging.Logger,
    level: int,
):
    logger.log(level, f'{error}')
    tb = error.__traceback__
    while tb and tb.tb_next:
        tb = tb.tb_next
    logger.log(level, f'Excecution Context: {tb.tb_frame.f_locals!r}')
Example #28
0
def main():
    logger = Logger('Pipeliner')
    handler = StreamHandler(stdout)
    formatter = Formatter('%(asctime)s - %(name)s - %(levelname)s - '
                          '%(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    pipeliner = Pipeliner(api='tcp://localhost:5555',
                          broadcast='tcp://localhost:5556', logger=logger)
    pipeliner.start()
Example #29
0
def delete_cookie(name: str, logger: logging.Logger, config: IdPConfig) -> None:
    """
    Ask browser to delete a cookie.

    :param name: cookie name as string
    :param logger: logging instance
    :param config: IdPConfig instance
    """
    logger.debug("Delete cookie: {!s}".format(name))
    return set_cookie(name, '/', logger, config, value='')
def readUrlsFromFile():
    global urlFile
    global urls
    global logger    
    for line in open(urlFile, 'r').readlines():
        line = line.rstrip("\r\n")
        Logger.debug(logger, "Loading URL %s from %s" % (line, urlFile))
        urls.append(line)
    if (len(urls) < 1):
        print "No urls were able to be loaded from %s, exiting!" % urlFile
        exit(1)
Example #31
0
def process_dxf_files(dxf_files, input_values, material, logger: logging.Logger):
    ao = apper.AppObjects()
    # Start a time line group
    start_index = apper.start_group()

    y_magnitude_attribute = ao.design.attributes.itemByName("DXFer", "y_magnitude")
    x_magnitude_attribute = ao.design.attributes.itemByName("DXFer", "x_magnitude")
    row_count_attribute = ao.design.attributes.itemByName("DXFer", "row_count")
    y_row_max_attribute = ao.design.attributes.itemByName("DXFer", "y_row_max")

    if y_magnitude_attribute is None:
        y_magnitude = 0.0
    else:
        y_magnitude = float(y_magnitude_attribute.value)

    if x_magnitude_attribute is None:
        x_magnitude = 0.0
    else:
        x_magnitude = float(x_magnitude_attribute.value)

    if row_count_attribute is None:
        row_count = 0
    else:
        row_count = int(row_count_attribute.value)

    if y_row_max_attribute is None:
        y_row_max = 0.0
    else:
        y_row_max = float(y_row_max_attribute.value)

    # Define spacing and directions
    x_vector = adsk.core.Vector3D.create(1.0, 0.0, 0.0)
    y_vector = adsk.core.Vector3D.create(0.0, 1.0, 0.0)

    # Iterate all dxf files and create components
    for dxf_file in dxf_files:
        # Create new component for this DXF file
        occurrence = apper.create_component(ao.root_comp, dxf_file['name'])
        sketches = apper.import_dxf(
            dxf_file['full_path'],
            occurrence.component,
            occurrence.component.xYConstructionPlane,
            input_values['single_sketch']
        )
        logger.info(f"Imported DXF File: {dxf_file['name']}")
        x_delta = 0
        y_delta = 0
        face = False
        sketch_transform = None
        extrude_sketch_transform = None
        for sketch in sketches:

            if input_values['close_sketches']:
                tolerance = input_values['tolerance_input']
                close_sketch_gaps(sketch, tolerance, logger)

            if input_values['reset_option_input']:
                sketch_transform = move_sketch_to_origin(sketch)

            x_delta_check = get_bb_in_direction(sketch, x_vector)
            if x_delta_check > x_delta:
                x_delta = x_delta_check
            y_delta_check = get_bb_in_direction(sketch, y_vector)
            if y_delta_check > y_delta:
                y_delta = y_delta_check

            if input_values['extrude_option_input']:
                # extrude_largest_profile(sketch, occurrence.component, input_values['distance'])
                this_face = extrude_profile_with_most_loops(sketch, occurrence.component, input_values['distance'])
                if this_face:
                    face = this_face
                    extrude_sketch_transform = sketch_transform
                if input_values['keep_sketches_shown']:
                    sketch.isLightBulbOn = True

        if input_values['import_text']:
            # Alternative to create sketch on extrude cap face, having transform issues.
            if face:
                text_sketch = occurrence.component.sketches.add(face)
            else:
                xy_plane = occurrence.component.xYConstructionPlane
                text_sketch = occurrence.component.sketches.add(xy_plane)

            text_sketch.name = 'TEXT'

            # Import text with EZDXF Library
            font_selection = input_values['font_selection']
            EZDXFCommands.import_dxf_text(dxf_file['full_path'], text_sketch, font_selection, logger)

            if text_sketch.sketchTexts.count == 0:
                text_sketch.deleteMe()
            elif input_values['reset_option_input']:
                if extrude_sketch_transform is not None:
                    move_sketch_by_transform(text_sketch, extrude_sketch_transform)
                elif sketch_transform is not None:
                    move_sketch_by_transform(text_sketch, sketch_transform)

            # EZDXFCommands.import_dxf_text(dxf_file['full_path'], occurrence.component, font_selection)

        if not input_values['reset_option_input']:
            move_to_origin(occurrence)
        # Move component in specified direction
        transform_along_vector(occurrence, x_vector, x_magnitude)
        transform_along_vector(occurrence, y_vector, y_magnitude)

        # Update document and capture position of new component
        adsk.doEvents()
        if ao.design.snapshots.hasPendingSnapshot:
            ao.design.snapshots.add()

        # Increment magnitude by desired component size and spacing
        x_magnitude += input_values['spacing']
        x_magnitude += x_delta
        row_count += 1

        if y_delta > y_row_max:
            y_row_max = y_delta

        # Move to next row
        if row_count >= input_values['rows']:
            y_magnitude += input_values['spacing']
            y_magnitude += y_row_max
            y_row_max = 0.0
            x_magnitude = 0.0
            row_count = 0

        if material is not None:
            occurrence.component.material = material

    ao.design.attributes.add("DXFer", "y_magnitude", str(y_magnitude))
    ao.design.attributes.add("DXFer", "x_magnitude", str(x_magnitude))
    ao.design.attributes.add("DXFer", "row_count", str(row_count))
    ao.design.attributes.add("DXFer", "y_row_max", str(y_row_max))

    # Close time line group
    apper.end_group(start_index)
Example #32
0
def scaffold_split(
    data: MoleculeDataset,
    sizes: Tuple[float, float, float] = (0.8, 0.1, 0.1),
    balanced: bool = False,
    seed: int = 0,
    logger: logging.Logger = None
) -> Tuple[MoleculeDataset, MoleculeDataset, MoleculeDataset]:
    """
    Split a dataset by scaffold so that no molecules sharing a scaffold are in the same split.

    :param data: A MoleculeDataset.
    :param sizes: A length-3 tuple with the proportions of data in the
    train, validation, and test sets.
    :param balanced: Try to balance sizes of scaffolds in each set, rather than just putting smallest in test set.
    :param seed: Seed for shuffling when doing balanced splitting.
    :param logger: A logger.
    :return: A tuple containing the train, validation, and test splits of the data.
    """
    assert sum(sizes) == 1

    # Split
    train_size, val_size, test_size = sizes[0] * len(data), sizes[1] * len(
        data), sizes[2] * len(data)
    train, val, test = [], [], []
    train_scaffold_count, val_scaffold_count, test_scaffold_count = 0, 0, 0

    # Map from scaffold to index in the data
    scaffold_to_indices = scaffold_to_smiles(data.smiles(), use_indices=True)

    if balanced:  # Put stuff that's bigger than half the val/test size into train, rest just order randomly
        index_sets = list(scaffold_to_indices.values())
        big_index_sets = []
        small_index_sets = []
        for index_set in index_sets:
            if len(index_set) > val_size / 2 or len(index_set) > test_size / 2:
                big_index_sets.append(index_set)
            else:
                small_index_sets.append(index_set)
        random.seed(seed)
        random.shuffle(big_index_sets)
        random.shuffle(small_index_sets)
        index_sets = big_index_sets + small_index_sets
    else:  # Sort from largest to smallest scaffold sets
        index_sets = sorted(list(scaffold_to_indices.values()),
                            key=lambda index_set: len(index_set),
                            reverse=True)

    for index_set in index_sets:
        if len(train) + len(index_set) <= train_size:
            train += index_set
            train_scaffold_count += 1
        elif len(val) + len(index_set) <= val_size:
            val += index_set
            val_scaffold_count += 1
        else:
            test += index_set
            test_scaffold_count += 1

    if logger is not None:
        logger.debug(f'Total scaffolds = {len(scaffold_to_indices):,} | '
                     f'train scaffolds = {train_scaffold_count:,} | '
                     f'val scaffolds = {val_scaffold_count:,} | '
                     f'test scaffolds = {test_scaffold_count:,}')

    log_scaffold_stats(data, index_sets, logger=logger)

    # Map from indices to data
    train = [data[i] for i in train]
    val = [data[i] for i in val]
    test = [data[i] for i in test]

    return MoleculeDataset(train), MoleculeDataset(val), MoleculeDataset(test)
Example #33
0
async def start_and_monitor_coalescer(config_file: str,
                                      cfg: dict,
                                      logger: logging.Logger,
                                      coalescer_bin: str = None) -> None:
    '''Start and monitor the coalescer

    :param config_file: str, the path to suzieq config file, to be passed
    :param cfg: dict, the Suzieq config dictionary
    :param logger: logging.Logger, pointer to logger to use
    :param coalescer_bin: str, optional path to coalescer binary

    :return: nothing

    '''
    async def start_coalescer():
        sq_path = get_sq_install_dir()
        coalescer_bin = f'{sq_path}/utilities/sq_coalescer.py'
        if config_file:
            coalescer_args = f'-c {config_file}'
        else:
            coalescer_args = ''
        coalescer_args = f'{coalescer_bin} {coalescer_args}'.strip().split()

        try:
            process = await asyncio.create_subprocess_exec(
                *coalescer_args,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE)
        except Exception as ex:
            logger.error(f'ABORTING. Unable to start coalescer: {ex}')
            process = None

        return process

    fd = 0
    process = None
    # Check to see file lock is possible
    while not fd:
        if not process:
            logger.warning('Starting Coalescer')
        elif process.returncode == errno.EBUSY:
            logger.warning('Trying to start coalescer')
        process = await start_coalescer()

        if not process:
            os.kill(os.getpid(), signal.SIGTERM)
            return

        # Initial sleep to ensure that the coalescer starts up
        await asyncio.sleep(10)
        coalesce_dir = cfg.get('coalescer', {})\
            .get('coalesce-directory',
                 f'{cfg.get("data-directory")}/coalesced')

        fd = ensure_single_instance(f'{coalesce_dir}/.sq-coalescer.pid', False)
        if fd > 0:
            # unlock and try to start process
            try:
                fcntl.flock(fd, fcntl.F_UNLCK)
                os.close(fd)
            except OSError:
                pass
            continue

        # Check if we have something from the stdout we need to log
        try:
            stdout, stderr = await process.communicate()
        except asyncio.CancelledError:
            if process:
                process.terminate()
                sleep(5)
                process.kill()
            return

        if process.returncode and (process.returncode != errno.EBUSY):
            logger.error(f'coalescer stdout: {stdout}, stderr: {stderr}')
        else:
            if process.returncode == errno.EBUSY:
                await asyncio.sleep(10 * 60)
            else:
                logger.info(
                    f'coalescer ended stdout: {stdout}, stderr: {stderr}')

        fd = 0
Example #34
0
import gevent
from gevent import monkey

monkey.patch_all()
# noinspection PyPep8
from logging import Logger
from wxpy import *
from traceback import print_exc
import builtins
from datetime import datetime, timedelta
from typing import List

bot = Bot(cache_path=True, console_qr=2)
logger = Logger('sports')


def wxprint(my_bot):
    def func(x, **kwargs):
        logger.warning(x)
        gevent.spawn(my_bot.file_helper.send, x)

    return func


builtins.print = wxprint(bot)

from mysports.run import run

red, green = 2, 2

Example #35
0
def logging_config(folder: Optional[str] = None,
                   name: Optional[str] = None,
                   logger: logging.Logger = logging.root,
                   level: int = logging.INFO,
                   console_level: int = logging.INFO,
                   console: bool = True,
                   overwrite_handler: bool = False) -> str:
    """Config the logging module. It will set the logger to save to the specified file path.

    Parameters
    ----------
    folder
        The folder to save the log
    name
        Name of the saved
    logger
        The logger
    level
        Logging level
    console_level
        Logging level of the console log
    console
        Whether to also log to console
    overwrite_handler
        Whether to overwrite the existing handlers in the logger

    Returns
    -------
    folder
        The folder to save the log file.
    """
    if name is None:
        name = inspect.stack()[-1][1].split('.')[0]
    if folder is None:
        folder = os.path.join(os.getcwd(), name)
    if not os.path.exists(folder):
        os.makedirs(folder, exist_ok=True)
    need_file_handler = True
    need_console_handler = True
    # Check all loggers.
    if overwrite_handler:
        logger.handlers = []
    else:
        for handler in logger.handlers:
            if isinstance(handler, logging.StreamHandler):
                need_console_handler = False
    logpath = os.path.join(folder, name + ".log")
    print("All Logs will be saved to {}".format(logpath))
    logger.setLevel(level)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if need_file_handler:
        logfile = logging.FileHandler(logpath)
        logfile.setLevel(level)
        logfile.setFormatter(formatter)
        logger.addHandler(logfile)
    if console and need_console_handler:
        # Initialze the console logging
        logconsole = logging.StreamHandler()
        logconsole.setLevel(console_level)
        logconsole.setFormatter(formatter)
        logger.addHandler(logconsole)
    return folder
Example #36
0
def main(config, log: logging.Logger) -> None:
    log.debug(f'Configuration: {config}')
    hyperparameters = config['hyperparameters']

    raw_dataset = load_raw_dataset(config, log)
    log.info(f'size of raw dataset = {len(raw_dataset)}')

    max_features = hyperparameters['max_features']
    log.info(f'Limiting to {max_features} features.')

    vectorized_reviews = VectorizedReviews(raw_dataset, max_features)
    log.info(f'size of vectorized reviews = {len(vectorized_reviews)}')

    num_text_features = len(vectorized_reviews.text_vectorizer.vocabulary_)
    log.info(f'Num review tokens = {num_text_features}')

    num_rating_features = len(vectorized_reviews.rating_vectorizer.vocabulary_)
    log.info(f'Rating vocabulary size = {num_rating_features}')

    device = get_device(config)
    log.info(f'Using device = {device}')

    model = ReviewClassifier(num_text_features)
    model = model.to(device)
    log.info(model)

    loss = nn.BCEWithLogitsLoss()
    log.info(f'loss = {loss}')

    learning_rate = hyperparameters['learning_rate']
    optimizer = Adam(model.parameters(), lr=learning_rate)
    log.info(f'optimizer = {optimizer}')

    tensorboard_directory = config['files']['log_dir']
    log.info(f'Writing Tensorboard logs to {tensorboard_directory}.')

    seed = hyperparameters['seed']
    torch.manual_seed(seed)
    log.info(f'Using seed {seed}.')

    writer = SummaryWriter(log_dir=tensorboard_directory)

    train(config, vectorized_reviews, device, model, loss, optimizer, log, writer)
Example #37
0
async def _generate_framework_chunks(
        path: str, logger: Logger) -> AsyncIterator[InstallRequest]:
    logger.debug(f"Generating chunks for {path}")
    async for chunk in tar.generate_tar([path]):
        yield InstallRequest(payload=Payload(data=chunk))
    logger.debug(f"Finished generating chunks {path}")
Example #38
0
File: run.py Project: dzhang30/Iris
def run_iris(logger: logging.Logger, iris_config: ConfigParser) -> None:
    """
    Run the main Iris process

    :param logger: logger for forensics
    :param iris_config: iris.cfg config file object
    :return: None
    """
    try:
        iris_main_settings = iris_config['main_settings']

        iris_root_path = iris_main_settings['iris_root_path']
        textfile_collector_path = iris_main_settings['textfile_collector_path']
        iris_monitor_frequency = iris_main_settings.getfloat(
            'iris_monitor_frequency')
        dev_mode = iris_main_settings.getboolean('dev_mode')

        logger.info(
            'Starting IRIS in {} mode\n'.format('DEV' if dev_mode else 'PROD'))

        # set path variables
        log_debug_file_path = os.path.join(iris_root_path, 'iris.debug')
        log_dir_path = os.path.join(iris_root_path, 'logs')
        config_service_log_path = os.path.join(log_dir_path,
                                               'config_service.log')
        scheduler_log_path = os.path.join(log_dir_path, 'scheduler.log')
        garbage_collector_log_path = os.path.join(log_dir_path,
                                                  'garbage_collector.log')

        aws_credentials_path = os.path.join(iris_root_path, 'aws_credentials')
        s3_download_to_path = os.path.join(iris_root_path, 'downloads')
        local_config_file_path = os.path.join(iris_root_path,
                                              'local_config.json')
        global_config_file_path = os.path.join(s3_download_to_path,
                                               'global_config.json')
        prom_dir_path = os.path.join(iris_root_path, 'prom_files')

        # won't make dirs if they already exist
        os.makedirs(s3_download_to_path, exist_ok=True)
        os.makedirs(textfile_collector_path, exist_ok=True)

        if not os.path.isdir(prom_dir_path):
            logger.info('Creating symlink from {} to {}'.format(
                textfile_collector_path, prom_dir_path))
            os.symlink(textfile_collector_path, prom_dir_path)

        # Expose Iris version metadata
        logger.info('Exposing Iris version metadata via prom file')
        iris_version_settings = {
            'iris_version': IRIS_VERSION,
            'iris_revision': IRIS_REVISION,
            'iris_python_version': IRIS_PYTHON_VERSION,
            'iris_build_date': IRIS_BUILD_DATE,
        }

        prom_builder = PromStrBuilder(
            metric_name='iris_build_info',
            metric_result=1,
            help_str='This gives us iris build metadata',
            type_str='gauge',
            labels=iris_version_settings)

        prom_string = prom_builder.create_prom_string()
        prom_file_path = os.path.join(prom_dir_path,
                                      '{}.prom'.format('iris_build_info'))
        prom_writer = PromFileWriter(logger=logger)
        prom_writer.write_prom_file(prom_file_path, prom_string)

        # run config_service process
        logger.info('Starting the Config_Service child process')

        config_service_settings = iris_config['config_service_settings']
        run_config_service_params = {
            'aws_creds_path': aws_credentials_path,
            's3_region_name': config_service_settings['s3_region_name'],
            's3_bucket_env': config_service_settings['s3_bucket_env'],
            's3_bucket_name': config_service_settings['s3_bucket_name'],
            's3_download_to_path': s3_download_to_path,
            'ec2_region_name': config_service_settings['ec2_region_name'],
            'ec2_dev_instance_id':
            config_service_settings['ec2_dev_instance_id'],
            'ec2_metadata_url': config_service_settings['ec2_metadata_url'],
            'local_config_path': local_config_file_path,
            'prom_dir_path': prom_dir_path,
            'run_frequency': config_service_settings.getfloat('run_frequency'),
            'log_path': config_service_log_path,
            'log_debug_path': log_debug_file_path,
            'dev_mode': dev_mode
        }
        config_service_process = multiprocessing.Process(
            target=run_config_service,
            name='config_service',
            kwargs=run_config_service_params)
        config_service_process.daemon = True  # cleanup config_service child process when main process exits
        config_service_process.start()

        # run scheduler process
        logger.info('Starting the Scheduler child process')

        scheduler_settings = iris_config['scheduler_settings']
        run_scheduler_params = {
            'global_config_path': global_config_file_path,
            'local_config_path': local_config_file_path,
            'prom_dir_path': prom_dir_path,
            'run_frequency': scheduler_settings.getfloat('run_frequency'),
            'internal_metrics_whitelist': internal_metrics_whitelist,
            'log_path': scheduler_log_path,
            'log_debug_path': log_debug_file_path,
        }
        scheduler_process = multiprocessing.Process(
            target=run_scheduler,
            name='scheduler',
            kwargs=run_scheduler_params)
        scheduler_process.daemon = True  # cleanup scheduler child process when main process exits
        scheduler_process.start()

        # run garbage collector process
        logger.info('Starting the Garbage Collector child process')

        scheduler_settings = iris_config['garbage_collector_settings']
        run_garbage_collector_params = {
            'global_config_path': global_config_file_path,
            'local_config_path': local_config_file_path,
            'prom_dir_path': prom_dir_path,
            'run_frequency': scheduler_settings.getfloat('run_frequency'),
            'internal_metrics_whitelist': internal_metrics_whitelist,
            'log_path': garbage_collector_log_path,
            'log_debug_path': log_debug_file_path,
        }
        garbage_collector_process = multiprocessing.Process(
            target=run_garbage_collector,
            name='garbage_collector',
            kwargs=run_garbage_collector_params)
        garbage_collector_process.daemon = True  # cleanup scheduler child process when main process exits
        garbage_collector_process.start()

        # Indicate the parent is up
        prom_builder = PromStrBuilder(
            metric_name='iris_main_up',
            metric_result=1,
            help_str='Indicates if the Iris parent process is up',
            type_str='gauge')

        prom_string = prom_builder.create_prom_string()
        prom_file_path = os.path.join(prom_dir_path, 'iris_main.prom')
        prom_writer = PromFileWriter(logger=logger)
        prom_writer.write_prom_file(prom_file_path, prom_string)

        # monitor the child processes (config_service, scheduler, etc.) & write to iris-{service}-up.prom files
        child_processes = [
            ChildProcess(config_service_process, config_service_log_path,
                         log_debug_file_path),
            ChildProcess(scheduler_process, scheduler_log_path,
                         log_debug_file_path),
            ChildProcess(garbage_collector_process, garbage_collector_log_path,
                         log_debug_file_path),
        ]
        while True:
            logger.info('Monitoring child services: {}'.format(', '.join(
                [child.name for child in child_processes])))

            for child_process in child_processes:
                process_name = child_process.name
                if not child_process.is_alive():
                    err_msg = 'The {0} ({1}) has failed with exit_code {2}. Check the {0} log'
                    logger.error(
                        err_msg.format(process_name, child_process.pid,
                                       child_process.get_exit_code()))

                    if not child_process.already_logged:
                        child_process.log_terminate()
                        child_process.already_logged = True

                metric_name = 'iris_{}_up'.format(process_name)
                metric_up_result = int(child_process.is_alive())
                prom_builder = PromStrBuilder(
                    metric_name=metric_name,
                    metric_result=metric_up_result,
                    help_str='Indicate if the {} process is still up'.format(
                        process_name),
                    type_str='gauge')

                prom_string = prom_builder.create_prom_string()
                prom_file_path = os.path.join(
                    prom_dir_path, 'iris_{}.prom'.format(process_name))
                prom_writer = PromFileWriter(logger=logger)
                prom_writer.write_prom_file(prom_file_path, prom_string)

            logger.info('Sleeping for {}\n'.format(iris_monitor_frequency))

            time.sleep(iris_monitor_frequency)

    except Exception as e:
        logger.error(e)

        # Indicate the parent is down
        prom_builder = PromStrBuilder(
            metric_name='iris_main_up',
            metric_result=0,
            help_str='Indicates if the Iris parent process is up',
            type_str='gauge')
        prom_string = prom_builder.create_prom_string()
        prom_file_path = os.path.join(prom_dir_path, 'iris_main.prom')
        prom_writer = PromFileWriter(logger=logger)
        prom_writer.write_prom_file(prom_file_path, prom_string)

        raise
Example #39
0
def run_mixtures_active_learning(theta_0: float,
                                 theta_true: float,
                                 theta_bounds: List[float],
                                 n_theta: int,
                                 initial_idx: List[int],
                                 hyperparams: Dict,
                                 n_true: int,
                                 n_samples_per_theta: int,
                                 n_iter: int,
                                 ucb_kappas: List[float],
                                 ucbm_kappas: List[float],
                                 logger: Logger = None) -> Dict[str, NDFrame]:
    logger = logger or logging.getLogger(__name__)

    logger.info('Simulating X_true and performing exact param scan')
    param_grid = ParamGrid(bounds=[theta_bounds], num=n_theta)
    X_true = triple_mixture(theta_true).sample(n_true).numpy()
    nllr_exact, mle_exact = exact_param_scan(simulator_func=triple_mixture,
                                             X_true=X_true,
                                             param_grid=param_grid,
                                             theta_0=theta_0,
                                             to_meshgrid_shape=False)

    logger.info('Building active learners')
    learner_kwargs = dict(
        simulator_func=triple_mixture,
        X_true=X_true,
        theta_true=theta_true,
        theta_0=theta_0,
        initial_idx=initial_idx,
        n_samples_per_theta=n_samples_per_theta,
        ratio_model=create_model(theta_0=theta_0, hyperparams=hyperparams),
        total_param_grid=param_grid,
    )
    active_learners = dict(Random=RandomActiveLearner(**learner_kwargs))

    for ucb_kappa in ucb_kappas:
        active_learners[f'UCB_{ucb_kappa}'] = \
            UpperConfidenceBoundLearner(kappa=ucb_kappa, **learner_kwargs)

    for ucbm_kappa in ucbm_kappas:
        active_learners[f'UCBM_{ucbm_kappa}'] = \
            ModifiedUCBLearner(kappa=ucbm_kappa, **learner_kwargs)

    logger.info('Fitting ActiveLearners.')
    for name, active_learner in active_learners.items():
        logger.info(f'Fitting {name} ActiveLearner.')
        active_learner.fit(n_iter=n_iter)

    logger.info('Finished fitting, collecting results.')

    mle = pd.DataFrame({
        learner_name: map(float, learner.mle_predictions)
        for learner_name, learner in active_learners.items()
    })
    mle['Exact'] = float(mle_exact)

    trialed_thetas = pd.DataFrame({
        learner_name: map(float, learner.trialed_thetas)
        for learner_name, learner in active_learners.items()
    })

    all_thetas = np.around(param_grid.array.squeeze(), 6)  # TODO

    def _collect_predictions(attr_name):
        columns = list(range(n_iter + 1))
        default = [
            np.full((len(all_thetas), ), np.nan) for _ in range(len(columns))
        ]
        dfs = [
            pd.DataFrame(data=np.stack(getattr(learner, attr_name, default),
                                       axis=1),
                         index=all_thetas,
                         columns=columns)
            for learner in active_learners.values()
        ]
        concat = pd.concat(dfs,
                           axis=0,
                           keys=active_learners.keys(),
                           names=['Learner', 'theta'])
        concat = concat.reset_index().set_index('theta', drop=True)
        return concat

    nllr = _collect_predictions('nllr_predictions')
    std = _collect_predictions('nllr_std')
    nllr_exact = pd.DataFrame(data=nllr_exact.squeeze(),
                              columns=['Exact'],
                              index=all_thetas)

    return dict(mle=mle,
                trialed_thetas=trialed_thetas,
                nllr=nllr,
                std=std,
                nllr_exact=nllr_exact)
Example #40
0
def debug_exc_log(lg: logging.Logger,
                  exc: Exception,
                  msg: str = "Exception in RSS"):
    if lg.getEffectiveLevel() <= logging.DEBUG:
        lg.exception(msg, exc_info=exc)
Example #41
0
def fetch_production(zone_key='DK-DK1',
                     session=None,
                     target_datetime=None,
                     logger: logging.Logger = logging.getLogger(__name__)):
    """
    Queries "Electricity balance Non-Validated" from energinet api
    for Danish bidding zones
    """
    r = session or requests.session()

    if zone_key not in ['DK-DK1', 'DK-DK2']:
        raise NotImplementedError(
            'fetch_production() for {} not implemented'.format(zone_key))

    zone = zone_key[-3:]

    timestamp = arrow.get(target_datetime).strftime('%Y-%m-%d %H:%M')

    # fetch hourly energy balance from recent hours
    sqlstr = 'SELECT "HourUTC" as timestamp, "Biomass", "Waste", \
                     "OtherRenewable", "FossilGas" as gas, "FossilHardCoal" as coal, \
                     "FossilOil" as oil, "HydroPower" as hydro, \
                     ("OffshoreWindPower"%2B"OnshoreWindPower") as wind, \
                     "SolarPower" as solar from "{0}" \
                     WHERE "PriceArea" = \'{1}\' AND \
                     "HourUTC" >= (timestamp\'{2}\'-INTERVAL \'24 hours\') AND \
                     "HourUTC" <= timestamp\'{2}\' \
                     ORDER BY "HourUTC" ASC'.format(ids['energy_bal'], zone,
                                                    timestamp)

    url = 'https://api.energidataservice.dk/datastore_search_sql?sql={}'.format(
        sqlstr)
    response = r.get(url)

    # raise errors for responses with an error or no data
    retry_count = 0
    while response.status_code in [429, 403, 500]:
        retry_count += 1
        if retry_count > 5:
            raise Exception('Retried too many times..')
        # Wait and retry
        logger.warn('Retrying..')
        time.sleep(5**retry_count)
        response = r.get(url)
    if response.status_code != 200:
        j = response.json()
        if 'error' in j and 'info' in j['error']:
            error = j['error']['__type']
            text = j['error']['info']['orig']
            msg = '"{}" fetching production data for {}: {}'.format(
                error, zone_key, text)
        else:
            msg = 'error while fetching production data for {}: {}'.format(
                zone_key, json.dumps(j))
        raise requests.exceptions.HTTPError(msg)
    if not response.json()['result']['records']:
        raise ParserException("DK.py",
                              'API returned no data',
                              zone_key=zone_key)

    df = pd.DataFrame(response.json()['result']['records'])
    # index response dataframe by time
    df = df.set_index('timestamp')
    df.index = pd.DatetimeIndex(df.index)
    # drop empty rows from energy balance
    df.dropna(how='all', inplace=True)

    # Divide waste into 55% renewable and 45% non-renewable parts according to
    # https://ens.dk/sites/ens.dk/files/Statistik/int.reporting_2016.xls (visited Jan 24th, 2019)
    df['unknown'] = 0.45 * df['Waste']  # Report fossil waste as unknown
    df['renwaste'] = 0.55 * df['Waste']
    # Report biomass, renewable waste and other renewables (biogas etc.) as biomass
    df['biomass'] = df.filter(['Biomass', 'renwaste',
                               'OtherRenewable']).sum(axis=1)

    fuels = ['biomass', 'coal', 'oil', 'gas', 'unknown', 'hydro']
    # Format output as a list of dictionaries
    output = []
    for dt in df.index:

        data = {
            'zoneKey': zone_key,
            'datetime': None,
            'production': {
                'biomass': 0,
                'coal': 0,
                'gas': 0,
                'hydro': None,
                'nuclear': 0,
                'oil': 0,
                'solar': None,
                'wind': None,
                'geothermal': None,
                'unknown': 0
            },
            'storage': {},
            'source': 'api.energidataservice.dk'
        }

        data['datetime'] = dt.to_pydatetime()
        data['datetime'] = data['datetime'].replace(tzinfo=pytz.utc)
        for f in ['solar', 'wind'] + fuels:
            data['production'][f] = df.loc[dt, f]
        output.append(data)
    return output
def test(
    cfg_file, ckpt: str, output_path: str = None, logger: logging.Logger = None
) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        if not logger.handlers:
            FORMAT = "%(asctime)-15s - %(message)s"
            logging.basicConfig(format=FORMAT)
            logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir)
            )

    batch_size = cfg["training"]["batch_size"]
    batch_type = cfg["training"].get("batch_type", "sentence")
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    dataset_version = cfg["data"].get("version", "phoenix_2014_trans")
    translation_max_output_length = cfg["training"].get(
        "translation_max_output_length", None
    )

    # load the data
    _, dev_data, test_data, gls_vocab, txt_vocab = load_data(data_cfg=cfg["data"])

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    do_recognition = cfg["training"].get("recognition_loss_weight", 1.0) > 0.0
    do_translation = cfg["training"].get("translation_loss_weight", 1.0) > 0.0
    model = build_model(
        cfg=cfg["model"],
        gls_vocab=gls_vocab,
        txt_vocab=txt_vocab,
        sgn_dim=sum(cfg["data"]["feature_size"])
        if isinstance(cfg["data"]["feature_size"], list)
        else cfg["data"]["feature_size"],
        features_dim=cfg["data"]["feature_size_cnn"],
        do_recognition=do_recognition,
        do_translation=do_translation,
    )
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # Data Augmentation Parameters
    frame_subsampling_ratio = cfg["data"].get("frame_subsampling_ratio", None)
    # Note (Cihan): we are not using 'random_frame_subsampling' and
    #   'random_frame_masking_ratio' in testing as they are just for training.

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        recognition_beam_sizes = cfg["testing"].get("recognition_beam_sizes", [1])
        translation_beam_sizes = cfg["testing"].get("translation_beam_sizes", [1])
        translation_beam_alphas = cfg["testing"].get("translation_beam_alphas", [-1])
    else:
        recognition_beam_sizes = [1]
        translation_beam_sizes = [1]
        translation_beam_alphas = [-1]

    if "testing" in cfg.keys():
        max_recognition_beam_size = cfg["testing"].get(
            "max_recognition_beam_size", None
        )
        if max_recognition_beam_size is not None:
            recognition_beam_sizes = list(range(1, max_recognition_beam_size + 1))

    if do_recognition:
        recognition_loss_function = torch.nn.CTCLoss(
            blank=model.gls_vocab.stoi[SIL_TOKEN], zero_infinity=True
        )
        if use_cuda:
            recognition_loss_function.cuda()
    if do_translation:
        translation_loss_function = XentLoss(
            pad_index=txt_vocab.stoi[PAD_TOKEN], smoothing=0.0
        )
        if use_cuda:
            translation_loss_function.cuda()

    # NOTE (Cihan): Currently Hardcoded to be 0 for TensorFlow decoding
    assert model.gls_vocab.stoi[SIL_TOKEN] == 0

    if do_recognition:
        # Dev Recognition CTC Beam Search Results
        dev_recognition_results = {}
        dev_best_wer_score = float("inf")
        dev_best_recognition_beam_size = 1
        for rbw in recognition_beam_sizes:
            logger.info("-" * 60)
            valid_start_time = time.time()
            logger.info("[DEV] partition [RECOGNITION] experiment [BW]: %d", rbw)
            dev_recognition_results[rbw] = validate_on_data(
                model=model,
                data=dev_data,
                batch_size=batch_size,
                use_cuda=use_cuda,
                batch_type=batch_type,
                dataset_version=dataset_version,
                sgn_dim=sum(cfg["data"]["feature_size"])
                if isinstance(cfg["data"]["feature_size"], list)
                else cfg["data"]["feature_size"],
                features_dim=cfg["data"]["feature_size_cnn"],
                txt_pad_index=txt_vocab.stoi[PAD_TOKEN],
                # Recognition Parameters
                do_recognition=do_recognition,
                recognition_loss_function=recognition_loss_function,
                recognition_loss_weight=1,
                recognition_beam_size=rbw,
                # Translation Parameters
                do_translation=do_translation,
                translation_loss_function=translation_loss_function
                if do_translation
                else None,
                translation_loss_weight=1 if do_translation else None,
                translation_max_output_length=translation_max_output_length
                if do_translation
                else None,
                level=level if do_translation else None,
                translation_beam_size=1 if do_translation else None,
                translation_beam_alpha=-1 if do_translation else None,
                frame_subsampling_ratio=frame_subsampling_ratio,
            )
            logger.info("finished in %.4fs ", time.time() - valid_start_time)
            if dev_recognition_results[rbw]["valid_scores"]["wer"] < dev_best_wer_score:
                dev_best_wer_score = dev_recognition_results[rbw]["valid_scores"]["wer"]
                dev_best_recognition_beam_size = rbw
                dev_best_recognition_result = dev_recognition_results[rbw]
                logger.info("*" * 60)
                logger.info(
                    "[DEV] partition [RECOGNITION] results:\n\t"
                    "New Best CTC Decode Beam Size: %d\n\t"
                    "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)",
                    dev_best_recognition_beam_size,
                    dev_best_recognition_result["valid_scores"]["wer"],
                    dev_best_recognition_result["valid_scores"]["wer_scores"][
                        "del_rate"
                    ],
                    dev_best_recognition_result["valid_scores"]["wer_scores"][
                        "ins_rate"
                    ],
                    dev_best_recognition_result["valid_scores"]["wer_scores"][
                        "sub_rate"
                    ],
                )
                logger.info("*" * 60)

    if do_translation:
        logger.info("=" * 60)
        dev_translation_results = {}
        dev_best_bleu_score = float("-inf")
        dev_best_translation_beam_size = 1
        dev_best_translation_alpha = 1
        for tbw in translation_beam_sizes:
            dev_translation_results[tbw] = {}
            for ta in translation_beam_alphas:
                dev_translation_results[tbw][ta] = validate_on_data(
                    model=model,
                    data=dev_data,
                    batch_size=batch_size,
                    use_cuda=use_cuda,
                    level=level,
                    sgn_dim=sum(cfg["data"]["feature_size"])
                    if isinstance(cfg["data"]["feature_size"], list)
                    else cfg["data"]["feature_size"],
                    features_dim=cfg["data"]["feature_size_cnn"],
                    batch_type=batch_type,
                    dataset_version=dataset_version,
                    do_recognition=do_recognition,
                    recognition_loss_function=recognition_loss_function
                    if do_recognition
                    else None,
                    recognition_loss_weight=1 if do_recognition else None,
                    recognition_beam_size=1 if do_recognition else None,
                    do_translation=do_translation,
                    translation_loss_function=translation_loss_function,
                    translation_loss_weight=1,
                    translation_max_output_length=translation_max_output_length,
                    txt_pad_index=txt_vocab.stoi[PAD_TOKEN],
                    translation_beam_size=tbw,
                    translation_beam_alpha=ta,
                    frame_subsampling_ratio=frame_subsampling_ratio,
                )

                if (
                    dev_translation_results[tbw][ta]["valid_scores"]["bleu"]
                    > dev_best_bleu_score
                ):
                    dev_best_bleu_score = dev_translation_results[tbw][ta][
                        "valid_scores"
                    ]["bleu"]
                    dev_best_translation_beam_size = tbw
                    dev_best_translation_alpha = ta
                    dev_best_translation_result = dev_translation_results[tbw][ta]
                    logger.info(
                        "[DEV] partition [Translation] results:\n\t"
                        "New Best Translation Beam Size: %d and Alpha: %d\n\t"
                        "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t"
                        "CHRF %.2f\t"
                        "ROUGE %.2f",
                        dev_best_translation_beam_size,
                        dev_best_translation_alpha,
                        dev_best_translation_result["valid_scores"]["bleu"],
                        dev_best_translation_result["valid_scores"]["bleu_scores"][
                            "bleu1"
                        ],
                        dev_best_translation_result["valid_scores"]["bleu_scores"][
                            "bleu2"
                        ],
                        dev_best_translation_result["valid_scores"]["bleu_scores"][
                            "bleu3"
                        ],
                        dev_best_translation_result["valid_scores"]["bleu_scores"][
                            "bleu4"
                        ],
                        dev_best_translation_result["valid_scores"]["chrf"],
                        dev_best_translation_result["valid_scores"]["rouge"],
                    )
                    logger.info("-" * 60)

    logger.info("*" * 60)
    logger.info(
        "[DEV] partition [Recognition & Translation] results:\n\t"
        "Best CTC Decode Beam Size: %d\n\t"
        "Best Translation Beam Size: %d and Alpha: %d\n\t"
        "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)\n\t"
        "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t"
        "CHRF %.2f\t"
        "ROUGE %.2f",
        dev_best_recognition_beam_size if do_recognition else -1,
        dev_best_translation_beam_size if do_translation else -1,
        dev_best_translation_alpha if do_translation else -1,
        dev_best_recognition_result["valid_scores"]["wer"] if do_recognition else -1,
        dev_best_recognition_result["valid_scores"]["wer_scores"]["del_rate"]
        if do_recognition
        else -1,
        dev_best_recognition_result["valid_scores"]["wer_scores"]["ins_rate"]
        if do_recognition
        else -1,
        dev_best_recognition_result["valid_scores"]["wer_scores"]["sub_rate"]
        if do_recognition
        else -1,
        dev_best_translation_result["valid_scores"]["bleu"] if do_translation else -1,
        dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu1"]
        if do_translation
        else -1,
        dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu2"]
        if do_translation
        else -1,
        dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu3"]
        if do_translation
        else -1,
        dev_best_translation_result["valid_scores"]["bleu_scores"]["bleu4"]
        if do_translation
        else -1,
        dev_best_translation_result["valid_scores"]["chrf"] if do_translation else -1,
        dev_best_translation_result["valid_scores"]["rouge"] if do_translation else -1,
    )
    logger.info("*" * 60)

    test_best_result = validate_on_data(
        model=model,
        data=test_data,
        batch_size=batch_size,
        use_cuda=use_cuda,
        batch_type=batch_type,
        dataset_version=dataset_version,
        sgn_dim=sum(cfg["data"]["feature_size"])
        if isinstance(cfg["data"]["feature_size"], list)
        else cfg["data"]["feature_size"],
        features_dim=cfg["data"]["feature_size_cnn"],
        txt_pad_index=txt_vocab.stoi[PAD_TOKEN],
        do_recognition=do_recognition,
        recognition_loss_function=recognition_loss_function if do_recognition else None,
        recognition_loss_weight=1 if do_recognition else None,
        recognition_beam_size=dev_best_recognition_beam_size
        if do_recognition
        else None,
        do_translation=do_translation,
        translation_loss_function=translation_loss_function if do_translation else None,
        translation_loss_weight=1 if do_translation else None,
        translation_max_output_length=translation_max_output_length
        if do_translation
        else None,
        level=level if do_translation else None,
        translation_beam_size=dev_best_translation_beam_size
        if do_translation
        else None,
        translation_beam_alpha=dev_best_translation_alpha if do_translation else None,
        frame_subsampling_ratio=frame_subsampling_ratio,
    )

    logger.info(
        "[TEST] partition [Recognition & Translation] results:\n\t"
        "Best CTC Decode Beam Size: %d\n\t"
        "Best Translation Beam Size: %d and Alpha: %d\n\t"
        "WER %3.2f\t(DEL: %3.2f,\tINS: %3.2f,\tSUB: %3.2f)\n\t"
        "BLEU-4 %.2f\t(BLEU-1: %.2f,\tBLEU-2: %.2f,\tBLEU-3: %.2f,\tBLEU-4: %.2f)\n\t"
        "CHRF %.2f\t"
        "ROUGE %.2f",
        dev_best_recognition_beam_size if do_recognition else -1,
        dev_best_translation_beam_size if do_translation else -1,
        dev_best_translation_alpha if do_translation else -1,
        test_best_result["valid_scores"]["wer"] if do_recognition else -1,
        test_best_result["valid_scores"]["wer_scores"]["del_rate"]
        if do_recognition
        else -1,
        test_best_result["valid_scores"]["wer_scores"]["ins_rate"]
        if do_recognition
        else -1,
        test_best_result["valid_scores"]["wer_scores"]["sub_rate"]
        if do_recognition
        else -1,
        test_best_result["valid_scores"]["bleu"] if do_translation else -1,
        test_best_result["valid_scores"]["bleu_scores"]["bleu1"]
        if do_translation
        else -1,
        test_best_result["valid_scores"]["bleu_scores"]["bleu2"]
        if do_translation
        else -1,
        test_best_result["valid_scores"]["bleu_scores"]["bleu3"]
        if do_translation
        else -1,
        test_best_result["valid_scores"]["bleu_scores"]["bleu4"]
        if do_translation
        else -1,
        test_best_result["valid_scores"]["chrf"] if do_translation else -1,
        test_best_result["valid_scores"]["rouge"] if do_translation else -1,
    )
    logger.info("*" * 60)

    def _write_to_file(file_path: str, sequence_ids: List[str], hypotheses: List[str]):
        with open(file_path, mode="w", encoding="utf-8") as out_file:
            for seq, hyp in zip(sequence_ids, hypotheses):
                out_file.write(seq + "|" + hyp + "\n")

    if output_path is not None:
        if do_recognition:
            dev_gls_output_path_set = "{}.BW_{:03d}.{}.gls".format(
                output_path, dev_best_recognition_beam_size, "dev"
            )
            _write_to_file(
                dev_gls_output_path_set,
                [s for s in dev_data.sequence],
                dev_best_recognition_result["gls_hyp"],
            )
            test_gls_output_path_set = "{}.BW_{:03d}.{}.gls".format(
                output_path, dev_best_recognition_beam_size, "test"
            )
            _write_to_file(
                test_gls_output_path_set,
                [s for s in test_data.sequence],
                test_best_result["gls_hyp"],
            )

        if do_translation:
            if dev_best_translation_beam_size > -1:
                dev_txt_output_path_set = "{}.BW_{:02d}.A_{:1d}.{}.txt".format(
                    output_path,
                    dev_best_translation_beam_size,
                    dev_best_translation_alpha,
                    "dev",
                )
                test_txt_output_path_set = "{}.BW_{:02d}.A_{:1d}.{}.txt".format(
                    output_path,
                    dev_best_translation_beam_size,
                    dev_best_translation_alpha,
                    "test",
                )
            else:
                dev_txt_output_path_set = "{}.BW_{:02d}.{}.txt".format(
                    output_path, dev_best_translation_beam_size, "dev"
                )
                test_txt_output_path_set = "{}.BW_{:02d}.{}.txt".format(
                    output_path, dev_best_translation_beam_size, "test"
                )

            _write_to_file(
                dev_txt_output_path_set,
                [s for s in dev_data.sequence],
                dev_best_translation_result["txt_hyp"],
            )
            _write_to_file(
                test_txt_output_path_set,
                [s for s in test_data.sequence],
                test_best_result["txt_hyp"],
            )

        with open(output_path + ".dev_results.pkl", "wb") as out:
            pickle.dump(
                {
                    "recognition_results": dev_recognition_results
                    if do_recognition
                    else None,
                    "translation_results": dev_translation_results
                    if do_translation
                    else None,
                },
                out,
            )
        with open(output_path + ".test_results.pkl", "wb") as out:
            pickle.dump(test_best_result, out)
Example #43
0
def test(cfg_file,
         ckpt: str,
         output_path: str = None,
         save_attention: bool = False,
         logger: logging.Logger = None) -> None:
    """
    Main test function. Handles loading a model from checkpoint, generating
    translations and storing them and attention plots.

    :param cfg_file: path to configuration file
    :param ckpt: path to checkpoint to load
    :param output_path: path to output
    :param save_attention: whether to save the computed attention weights
    :param logger: log output to this logger (creates new logger if not set)
    """

    if logger is None:
        logger = logging.getLogger(__name__)
        FORMAT = '%(asctime)-15s - %(message)s'
        logging.basicConfig(format=FORMAT)
        logger.setLevel(level=logging.DEBUG)

    cfg = load_config(cfg_file)

    if "test" not in cfg["data"].keys():
        raise ValueError("Test data must be specified in config.")

    # when checkpoint is not specified, take latest (best) from model dir
    if ckpt is None:
        model_dir = cfg["training"]["model_dir"]
        ckpt = get_latest_checkpoint(model_dir)
        if ckpt is None:
            raise FileNotFoundError(
                "No checkpoint found in directory {}.".format(model_dir))
        try:
            step = ckpt.split(model_dir + "/")[1].split(".ckpt")[0]
        except IndexError:
            step = "best"

    batch_size = cfg["training"]["batch_size"]
    batch_type = cfg["training"].get("batch_type", "sentence")
    use_cuda = cfg["training"].get("use_cuda", False)
    level = cfg["data"]["level"]
    eval_metric = cfg["training"]["eval_metric"]
    max_output_length = cfg["training"].get("max_output_length", None)

    # load the data
    _, dev_data, test_data, src_vocab, trg_vocab = load_data(
        data_cfg=cfg["data"])

    data_to_predict = {"dev": dev_data, "test": test_data}

    # load model state from disk
    model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda)

    # build model and load parameters into it
    model = build_model(cfg["model"], src_vocab=src_vocab, trg_vocab=trg_vocab)
    model.load_state_dict(model_checkpoint["model_state"])

    if use_cuda:
        model.cuda()

    # whether to use beam search for decoding, 0: greedy decoding
    if "testing" in cfg.keys():
        beam_size = cfg["testing"].get("beam_size", 0)
        beam_alpha = cfg["testing"].get("alpha", -1)
    else:
        beam_size = 0
        beam_alpha = -1

    for data_set_name, data_set in data_to_predict.items():

        #pylint: disable=unused-variable
        score, loss, ppl, sources, sources_raw, references, hypotheses, \
        hypotheses_raw, attention_scores = validate_on_data(
            model, data=data_set, batch_size=batch_size,
            batch_type=batch_type, level=level,
            max_output_length=max_output_length, eval_metric=eval_metric,
            use_cuda=use_cuda, loss_function=None, beam_size=beam_size,
            beam_alpha=beam_alpha)
        #pylint: enable=unused-variable

        if "trg" in data_set.fields:
            decoding_description = "Greedy decoding" if beam_size == 0 else \
                "Beam search decoding with beam size = {} and alpha = {}".\
                    format(beam_size, beam_alpha)
            logger.info("%4s %s: %6.2f [%s]", data_set_name, eval_metric,
                        score, decoding_description)
        else:
            logger.info("No references given for %s -> no evaluation.",
                        data_set_name)

        if save_attention:
            if attention_scores:
                attention_name = "{}.{}.att".format(data_set_name, step)
                attention_path = os.path.join(model_dir, attention_name)
                logger.info(
                    "Saving attention plots. This might take a while..")
                store_attention_plots(attentions=attention_scores,
                                      targets=hypotheses_raw,
                                      sources=[s for s in data_set.src],
                                      indices=range(len(hypotheses)),
                                      output_prefix=attention_path)
                logger.info("Attention plots saved to: %s", attention_path)
            else:
                logger.warning("Attention scores could not be saved. "
                               "Note that attention scores are not available "
                               "when using beam search. "
                               "Set beam_size to 0 for greedy decoding.")

        if output_path is not None:
            output_path_set = "{}.{}".format(output_path, data_set_name)
            with open(output_path_set, mode="w", encoding="utf-8") as out_file:
                for hyp in hypotheses:
                    out_file.write(hyp + "\n")
            logger.info("Translations saved to: %s", output_path_set)
Example #44
0
def close_sketch_gaps(sketch: adsk.fusion.Sketch, tolerance, logger: logging.Logger):
    ao = apper.AppObjects()

    # factor = int(floor(1/tolerance))

    bounding_box = sketch.boundingBox
    min_x = bounding_box.minPoint.x
    min_y = bounding_box.minPoint.y
    max_x = bounding_box.maxPoint.x
    max_y = bounding_box.maxPoint.y

    factor = int(floor(2000 / ((max_x - min_x) + (max_y - min_y))))

    x_range = int(floor(factor * (max_x - min_x)))
    y_range = int(floor(factor * (max_y - min_y)))
    trans_x = round(0 - min_x, 6)
    trans_y = round(0 - min_y, 6)

    # Debug
    # str_comp = str(x_range) + ', ' + str(y_range)
    # ao.ui.messageBox(str_comp)
    #
    # str_comp = str(trans_x) + ', ' + str(trans_y)
    # ao.ui.messageBox(str_comp)

    grid = [[[] for i in range(x_range + 2)] for j in range(y_range + 2)]
    str_list = []
    constrained_points: int = 0
    sketch_point: adsk.fusion.SketchPoint
    for sketch_point in sketch.sketchPoints:
        if sketch_point.geometry.z == 0:
            if bounding_box.contains(sketch_point.worldGeometry):
                x_pos: int = int(floor(factor * (trans_x + sketch_point.worldGeometry.x)))
                y_pos: int = int(floor(factor * (trans_y + sketch_point.worldGeometry.y)))
                point_check_list = grid[y_pos][x_pos]
                point_merged = False
                for point_check in point_check_list:
                    if isinstance(point_check, adsk.fusion.SketchPoint):
                        if sketch_point.worldGeometry.distanceTo(point_check.worldGeometry) <= tolerance:
                            try:
                                sketch.geometricConstraints.addCoincident(sketch_point, point_check)
                                constrained_points += 1
                                point_merged = True

                            except:
                                logger.error(f"Constrain Points Error: {traceback.format_exc(2)}")

                if not point_merged:
                    grid[y_pos][x_pos].append(sketch_point)
                    grid[y_pos + 1][x_pos].append(sketch_point)
                    grid[y_pos - 1][x_pos].append(sketch_point)
                    grid[y_pos][x_pos + 1].append(sketch_point)
                    grid[y_pos + 1][x_pos + 1].append(sketch_point)
                    grid[y_pos - 1][x_pos + 1].append(sketch_point)
                    grid[y_pos][x_pos - 1].append(sketch_point)
                    grid[y_pos + 1][x_pos - 1].append(sketch_point)
                    grid[y_pos - 1][x_pos - 1].append(sketch_point)

                str_list.append(str(x_pos) + ', ' + str(y_pos))

        # ao.ui.messageBox(str(str_list))
    # if merged_points > 0:
    #     ao.ui.messageBox(f"Number of merged points: {merged_points}")
    if constrained_points > 0:
        logger.info(f"There were {constrained_points} gaps closed in {sketch.parentComponent.name} - {sketch.name}")
Example #45
0
def timer(logger: logging.Logger, prefix: str) -> Iterator[None]:
    """Timed context manager"""
    start_time = time.time()
    yield
    logger.info(f"{prefix} took {time.time() - start_time:.3f} [s]")
Example #46
0
def label_to_proto(logger: Logger, label: str, text: str) -> proto.NerType:
    if label in ner_mapping.keys():
        return ner_mapping[label]
    else:
        logger.warn("Unsupported ner label {} for text {}".format(label, text))
        return proto.NerType.OTHER
Example #47
0
def _get_ngram_stats_df_core(symbol_order: List[str], symbols: SymbolIdDict,
                             trainset: PreparedDataList,
                             valset: PreparedDataList,
                             testset: PreparedDataList,
                             restset: PreparedDataList, n: int,
                             logger: Logger):
    logger.info(f"Get {n}-grams...")
    trn_symbols = [
        symbols.get_symbols(x.serialized_symbol_ids) for x in trainset.items()
    ]
    val_symbols = [
        symbols.get_symbols(x.serialized_symbol_ids) for x in valset.items()
    ]
    tst_symbols = [
        symbols.get_symbols(x.serialized_symbol_ids) for x in testset.items()
    ]
    rst_symbols = [
        symbols.get_symbols(x.serialized_symbol_ids) for x in restset.items()
    ]

    trn_symbols_one_gram = [get_ngrams(x, n=n) for x in trn_symbols]
    val_symbols_one_gram = [get_ngrams(x, n=n) for x in val_symbols]
    tst_symbols_one_gram = [get_ngrams(x, n=n) for x in tst_symbols]
    rst_symbols_one_gram = [get_ngrams(x, n=n) for x in rst_symbols]
    logger.info("Get stats...")

    occurences_count_df = get_occ_df_of_all_symbols(
        symbols=symbol_order,
        data_trn=trn_symbols_one_gram,
        data_val=val_symbols_one_gram,
        data_tst=tst_symbols_one_gram,
        data_rst=rst_symbols_one_gram,
    )
    occurences_count_df.columns = [
        FIRST_COL_NAME, 'TRAIN_OCCURRENCES_COUNT', 'VAL_OCCURRENCES_COUNT',
        'TEST_OCCURRENCES_COUNT', 'REST_OCCURRENCES_COUNT',
        'TOTAL_OCCURRENCES_COUNT'
    ]
    print(occurences_count_df)

    occurrences_percent_df = get_rel_occ_df_of_all_symbols(occurences_count_df)
    occurrences_percent_df.columns = [
        FIRST_COL_NAME, 'TRAIN_OCCURRENCES_PERCENT', 'VAL_OCCURRENCES_PERCENT',
        'TEST_OCCURRENCES_PERCENT', 'REST_OCCURRENCES_PERCENT'
    ]
    print(occurrences_percent_df)

    occurrences_distribution_percent_df = get_dist_among_other_symbols_df_of_all_symbols(
        occs_df=occurences_count_df,
        data_trn=trn_symbols_one_gram,
        data_val=val_symbols_one_gram,
        data_tst=tst_symbols_one_gram,
        data_rst=rst_symbols_one_gram,
    )
    occurrences_distribution_percent_df.columns = [
        FIRST_COL_NAME, 'TRAIN_OCCURRENCES_DISTRIBUTION_PERCENT',
        'VAL_OCCURRENCES_DISTRIBUTION_PERCENT',
        'TEST_OCCURRENCES_DISTRIBUTION_PERCENT',
        'REST_OCCURRENCES_DISTRIBUTION_PERCENT',
        'TOTAL_OCCURRENCES_DISTRIBUTION_PERCENT'
    ]
    print(occurrences_distribution_percent_df)

    utterance_occurrences_count_df = get_utter_occ_df_of_all_symbols(
        symbols=symbol_order,
        data_trn=trn_symbols_one_gram,
        data_val=val_symbols_one_gram,
        data_tst=tst_symbols_one_gram,
        data_rst=rst_symbols_one_gram,
    )
    utterance_occurrences_count_df.columns = [
        FIRST_COL_NAME, 'TRAIN_UTTERANCE_OCCURRENCES_COUNT',
        'VAL_UTTERANCE_OCCURRENCES_COUNT', 'TEST_UTTERANCE_OCCURRENCES_COUNT',
        'REST_UTTERANCE_OCCURRENCES_COUNT', 'TOTAL_UTTERANCE_OCCURRENCES_COUNT'
    ]
    print(utterance_occurrences_count_df)

    utterance_occurrences_percent_df = get_rel_utter_occ_df_of_all_symbols(
        utterance_occurrences_count_df)
    utterance_occurrences_percent_df.columns = [
        FIRST_COL_NAME, 'TRAIN_UTTERANCE_OCCURRENCES_PERCENT',
        'VAL_UTTERANCE_OCCURRENCES_PERCENT',
        'TEST_UTTERANCE_OCCURRENCES_PERCENT',
        'REST_UTTERANCE_OCCURRENCES_PERCENT'
    ]
    print(utterance_occurrences_percent_df)

    uniform_occurrences_count_df = get_uniform_distr_df_for_occs(
        symbols=symbol_order,
        occ_df=occurences_count_df,
    )
    uniform_occurrences_count_df.columns = [
        FIRST_COL_NAME, 'TRAIN_UNIFORM_OCCURRENCES_COUNT',
        'VAL_UNIFORM_OCCURRENCES_COUNT', 'TEST_UNIFORM_OCCURRENCES_COUNT',
        'REST_UNIFORM_OCCURRENCES_COUNT', 'TOTAL_UNIFORM_OCCURRENCES_COUNT'
    ]
    print(uniform_occurrences_count_df)

    uniform_occurrences_percent_df = get_rel_uniform_distr_df_for_occs(
        symbols=symbol_order, )
    uniform_occurrences_percent_df.columns = [
        FIRST_COL_NAME, 'UNIFORM_OCCURRENCES_PERCENT'
    ]
    print(uniform_occurrences_percent_df)

    return occurences_count_df, occurrences_percent_df, occurrences_distribution_percent_df, utterance_occurrences_count_df, utterance_occurrences_percent_df, uniform_occurrences_count_df, uniform_occurrences_percent_df
Example #48
0
async def shutdown(  # type: ignore[no-untyped-def]
        loop: AbstractEventLoop,
        logger: logging.Logger,
        teardown: AsyncFunction,
        signal=None  # a named enum of ints
) -> None:
    '''Cancel active tasks for shutdown'''
    if signal:
        logger.info(f'Received exit signal {signal.name}')
    else:
        logger.info('Unexpeced shutdown initiated')
        await asyncio.sleep(5)  # stall error loops

    if teardown:
        try:
            await teardown()
        except Exception:
            logger.exception('Error during teardown function')
            logger.error('Exiting uncleanly')
            sys.exit(1)

    tasks = [
        t for t in asyncio.Task.all_tasks() if t is not asyncio.current_task()
    ]

    logger.info(f'Cancelling {len(tasks)} tasks')
    [task.cancel() for task in tasks]

    try:
        await asyncio.gather(*tasks, return_exceptions=True)
    except Exception:
        logger.exception('Error during loop task cancellation')
        logger.error('Exiting uncleanly')
        sys.exit(1)

    loop.stop()
Example #49
0
def debug_exc_log(lg: logging.Logger, exc: Exception, msg: str = None) -> None:
    """Logs an exception if logging is set to DEBUG level"""
    if lg.getEffectiveLevel() <= logging.DEBUG:
        if msg is None:
            msg = f"{exc}"
        lg.exception(msg, exc_info=exc)
 def wrapper(*args, **kwargs):
     source = args[0].get('source')
     if (source == 'serverless-plugin-warmup'):
         Logger.info('WarmUp - Lambda is warm!')
         return {}
     return func(*args, **kwargs)
Example #51
0
async def _generate_app_chunks(
        app_path: str, logger: Logger) -> AsyncIterator[InstallRequest]:
    logger.debug(f"Generating chunks for .app {app_path}")
    async for chunk in tar.generate_tar([app_path]):
        yield InstallRequest(payload=Payload(data=chunk))
    logger.debug(f"Finished generating .app chunks {app_path}")
Example #52
0
    def __init__(self, item: str, log: logging.Logger, datatracker: str):
        self.name = basename(item)
        self.revision = revision(item)
        self.path = path(item)
        with tempfile.TemporaryDirectory() as tmp:
            current_directory = os.getcwd()
            log.debug("tmp dir %s", tmp)
            self.orig = ""
            if item != "/dev/stdin":
                os.chdir(tmp)
                orig_item = os.path.basename(item)
                get_items([orig_item], log, datatracker)
                self.orig = read(orig_item, log)
                os.chdir(current_directory)
            self.current = read(item, log)
            if not self.orig:
                log.error(
                    "No original for %s, cannot review, "
                    "only performing checks",
                    item,
                )
        self.orig_lines = self.orig.splitlines(keepends=True)
        self.current_lines = self.current.splitlines(keepends=True)

        # difflib can't deal with single lines it seems
        if len(self.orig_lines) == 1:
            self.orig_lines.append("\n")
        if len(self.current_lines) == 1:
            self.current_lines.append("\n")

        # set status
        status = re.search(
            r"^(?:[Ii]ntended )?[Ss]tatus:\s*((?:\w+\s)+)",
            self.orig,
            re.MULTILINE,
        )
        self.status = status.group(1).strip() if status else ""

        # extract relationships
        self.relationships = {}
        rel_pat = {"updates": r"[Uu]pdates", "obsoletes": r"[Oo]bsoletes"}
        for rel in ["updates", "obsoletes"]:
            match = re.search(
                r"^" + rel_pat[rel] + r":\s*((?:(?:RFC\s*)?\d{3,},?\s*)+)" +
                r"(?:.*[\n\r\s]+((?:(?:RFC\s*)?\d{3,},?\s*)+)?)?",
                self.orig,
                re.MULTILINE,
            )
            if match:
                tmp = "".join([group for group in match.groups() if group])
                tmp = re.sub("rfc", "", tmp, flags=re.IGNORECASE)
                tmp = re.sub(r"[,\s]+(\w)", r",\1", tmp)
                self.relationships[rel] = [
                    r for r in tmp.strip().split(",") if r
                ]

        in_abstract = False
        abstract = ""
        for line in self.orig_lines:
            pot_sec = SECTION_PATTERN.search(line)
            if pot_sec:
                which = pot_sec.group(0)
                if re.search(r"^Abstract", which):
                    in_abstract = True
                    continue
                if abstract:
                    break
            if in_abstract:
                abstract += line
        self.abstract = unfold(abstract).strip()

        self.meta = fetch_meta(datatracker, self.name, log)
        self.is_id = self.name.startswith("draft-")

        parts = {"text": "", "informative": "", "normative": ""}
        part = "text"
        for line in self.orig_lines:
            pot_sec = SECTION_PATTERN.search(line)
            if pot_sec:
                which = pot_sec.group(0)
                if re.search(
                        r"^(?:(\d\.?)+\s+)?(?:Non-Norm|Inform)ative\s+References?\s*$",
                        which,
                        flags=re.IGNORECASE,
                ):
                    part = "informative"
                elif re.search(
                        r"^(?:(\d\.?)+\s+)?(Normative\s+)?References?\s*$",
                        which,
                        flags=re.IGNORECASE,
                ):
                    part = "normative"
                else:
                    part = "text"
            parts[part] += line

        refs = {}
        for part, content in parts.items():
            refs[part] = re.findall(
                r"(\[(?:\d+|[a-z]+(?:[-_.]?\w+)*)\]" +
                (r"|RFC\d+|draft-[-a-z\d_.]+" if part == "text" else r"") +
                r")",
                unfold(content),
                flags=re.IGNORECASE,
            )
            refs[part] = list({f"[{untag(ref)}]" for ref in refs[part]})

        self.references = {}
        for part in ["informative", "normative"]:
            self.references[part] = []
            for ref in refs[part]:
                ref_match = re.search(
                    r"\s*" + re.escape(ref) + r"\s+((?:[^\n][\n]?)+)\n",
                    parts[part],
                    re.DOTALL,
                )
                if ref_match:
                    ref_text = unfold(ref_match.group(0))
                    found = False

                    for pat in [r"(draft-[-a-z\d_.]+)", r"((?:RFC|rfc)\d+)"]:
                        match = re.search(pat, ref_text)
                        if match:
                            found = True
                            self.references[part].append(
                                (ref, match.group(0).lower()))
                            break

                    if not found:
                        urls = extract_urls(ref_text, log, True, True)
                        self.references[part].append(
                            (ref, urls.pop() if urls else None))
        self.references["text"] = refs["text"]
Example #53
0
 def __init__(self, logger: logging.Logger) -> None:
     self._logger = logger.getChild("job_manager")
     super(BackgroundJobManager, self).__init__()
def generate_n_single_target_tree_rules(
    n_tree_rules_to_generate: int,
    prepared_data: PreparedDataForTargetSet,
    encoding_book_keeper: EncodingBookKeeper,
    min_support: float,
    max_depth: int,
    logger: Logger,
    random_forest_abs_file_name: str,
    seed: Optional[int] = None,
) -> Tuple[List[MCAR], TreeRuleGenTimingInfo]:
    if seed is None:
        raise Exception()
    if n_tree_rules_to_generate <= 0:
        raise Exception(
            f"n_tree_rules_to_generate = {n_tree_rules_to_generate} but should be larger than 0"
        )

    logger.info(
        f'Start generating tree rules... Goal number: {n_tree_rules_to_generate}'
    )

    # nb_of_trees_to_use: int = 1
    # nb_of_tree_based_rules_after_conversion: int = 0
    # current_rf_list: Optional[List[Tuple[PreparedDataForTargetSet, RandomForestClassifier]]] = None

    # prepared_data_list: List[PreparedDataForTargetSet] = []
    # for original_target_attribute_groups in attr_group_partitioning_list:
    #     attr_group: AttrGroup
    #     for attr_group in original_target_attribute_groups:
    #         prepared_data: PreparedDataForTargetSet = get_prepared_data_for_attr_group(
    #             original_group_to_predict=attr_group,
    #             df_original=df_original,
    #             df_one_hot_encoded=df_one_hot_encoded,
    #             encoding_book_keeper=encoding_book_keeper
    #         )
    #         prepared_data_list.append(prepared_data)

    optional_rf_classifier: Optional[RandomForestClassifier]
    total_time_decision_tree_learning_s: TimeDiffSec
    optional_rf_classifier, total_time_decision_tree_learning_s \
        = search_nb_of_single_target_trees_to_use(
            n_tree_rules_to_generate=n_tree_rules_to_generate,
            prepared_data=prepared_data,
            min_support=min_support,
            max_depth=max_depth,
            logger=logger,
            seed=seed
        )

    # -----------------------------------------------------------------------------------------------------------
    if optional_rf_classifier is None:
        raise Exception()
    else:
        logger.info(
            f'Learned RF has {len(optional_rf_classifier.estimators_)} trees')

        tree_based_rules: List[MCAR]
        total_time_rf_conversion_s: TimeDiffSec
        tree_based_rules, total_time_rf_conversion_s = convert_random_forest_to_rules(
            random_forest_clf=optional_rf_classifier,
            df_original_without_nans=prepared_data.
            df_original_without_nans_for_targets,
            descriptive_one_hot_encoded_column_names=prepared_data.
            descriptive_one_hot_encoded_columns,
            target_attribute_names=prepared_data.
            target_one_hot_encoded_columns,
            encoding_book_keeper=encoding_book_keeper,
            logger=logger)

        store_classifier(SingleTargetClassifierIndicator.random_forest,
                         random_forest_abs_file_name, optional_rf_classifier)
        logger.info(f"Wrote RF to {random_forest_abs_file_name}")

        if len(tree_based_rules) > n_tree_rules_to_generate:
            tree_based_rules = random.sample(tree_based_rules,
                                             n_tree_rules_to_generate)

        logger.info(
            f"REALITY: found {len(tree_based_rules)} tree based rules, wanted {n_tree_rules_to_generate}"
        )
        for i in range(0, len(tree_based_rules)):
            logger.info(str(tree_based_rules[i]))
            if i > 10:
                break

        tree_rule_gen_timing_info = TreeRuleGenTimingInfo(
            total_time_decision_tree_learning_s=
            total_time_decision_tree_learning_s,
            total_time_rf_conversion_s=total_time_rf_conversion_s)

        return tree_based_rules, tree_rule_gen_timing_info
 def __init__(self, name: str, token: str,
              parent_logger: logging.Logger) -> None:
     self.log = parent_logger.getChild(name)
     self.token = token
Example #56
0
async def call_kubeapi(
    method: Callable[..., Awaitable],
    logger: logging.Logger,
    *,
    continue_on_absence=False,
    continue_on_conflict=False,
    namespace: str = None,
    body: K8sModel = None,
    **kwargs,
) -> Optional[Awaitable[K8sModel]]:
    """
    Await a Kubernetes API method and return its result.

    If the API fails with an HTTP 404 NOT FOUND error and
    ``continue_on_absence`` is set to ``True`` a warning is raised and
    ``call_kubeapi`` returns ``None``.

    If the API fails with an HTTP 409 CONFLICT error and
    ``continue_on_conflict`` is set to ``True`` a warning is raised and
    ``call_kubeapi`` returns ``None``.

    In case of any other error or when either option is set to ``False``
    (default) the :exc:`kubernetes_asyncio.client.exceptions.ApiException` is
    re-raised.

    :param method: A Kubernetes API function which will be called with
        ``namespace`` and ``body``, if provided, and all other ``kwargs``. The
        function will also be awaited and the response returned.
    :param logger:
    :param continue_on_absence: When ``True``, emit a warning instead of an
        error on HTTP 404 responses.
    :param continue_on_conflict: When ``True``, emit a warning instead of an
        error on HTTP 409 responses.
    :param namespace: The namespace passed to namespaced K8s API endpoints.
    :param body: The body passed to the K8s API endpoints.
    """
    try:
        if namespace is not None:
            kwargs["namespace"] = namespace
        if body is not None:
            kwargs["body"] = body
        return await method(**kwargs)
    except ApiException as e:
        if (e.status == 409 and continue_on_conflict
                or e.status == 404 and continue_on_absence):
            msg = ["Failed", "creating" if e.status == 409 else "deleting"]
            args = []

            if body:
                if e.status == 409:
                    # For 404 the body is `V1DeleteOptions`; not very helpful.
                    msg.append("%s")
                    args.append(body.__class__.__name__)

                if namespace:
                    obj_name = None
                    if e.status == 404:
                        # Let's try the explicit name
                        obj_name = kwargs.get("name")
                    if obj_name is None:
                        obj_name = getattr(getattr(body, "metadata", None),
                                           "name", "<unknown>")
                    msg.append("'%s/%s'")
                    args.extend([namespace, obj_name])

            cause = "already exists" if e.status == 409 else "doesn't exist"
            msg.append(f"because it {cause}. Continuing.")
            logger.info(" ".join(msg), *args)
            return None
        else:
            raise
import os
import platform
import tarfile
import tempfile
from io import BytesIO, FileIO
from logging import Logger
from shutil import rmtree
from zipfile import ZipFile
import requests

logger = Logger('KINDLEGEN')

WINDOWS_URL = 'http://kindlegen.s3.amazonaws.com/kindlegen_win32_v2_9.zip'
MACOS_URL = 'http://kindlegen.s3.amazonaws.com/KindleGen_Mac_i386_v2_9.zip'
LINUX_URL = 'http://kindlegen.s3.amazonaws.com/kindlegen_linux_2.6_i386_v2_9.tar.gz'


def get_url_by_platform():
    if platform.system() == 'Linux':
        return LINUX_URL
    elif platform.system() == 'Darwin':
        return MACOS_URL
    elif platform.system() == 'Windows':
        return WINDOWS_URL
    else:
        raise Exception('Unrecognized platform')
    # end if


# end def
Example #58
0
def analyze_files(filepath: str, exclude: list, lazy: bool,
                  logger: logging.Logger) -> list:
    """Analyze files for vulnerabilities.

    Parameters
    ----------
    filepath : str
        Path where to find the file(s)
    exclude : list
        List of regular expressions to exclude in `path`
    lazy : bool
        Ignore mutually exclusive paths through methods
    logger : logging.Logger
        The logger to use for runtime output

    Returns
    -------
    list
        List of all the analyses
    """
    analyses = []
    rulesets = dict()
    # Detect files
    for filename in find_files(filepath, exclude=exclude):
        logger.info('Now processing "%s".', filename)
        input_file = InputFile(filename)
        # Prepare file for analysis
        input_file.detect_filetype()
        try:
            grammar_module = importlib.import_module(
                f'modules.{input_file.module}.grammar')
            grammar = grammar_module.Grammar(input_file)
            logger.info('Starting analysis for "%s".', input_file.path)
            ruleset = rulesets.get(input_file.module)
            if not ruleset:
                # Load new ruleset
                rulesets[input_file.module] = Ruleset(input_file.module)
                ruleset = rulesets[input_file.module]
            analysis = Analysis(grammar, ruleset)
            for method in analysis.methods:
                # Analyze method
                analysis.calculate_complexity(method)
                analysis.follow_variables(method)
                analysis.fix_object_names(method)
                all_sources = analysis.find_sources(method)
                all_sinks = analysis.find_sinks(method)
                all_sanitizers = analysis.find_sanitizers(method)
                if not lazy:
                    analysis.find_paths_through(method)
                else:
                    # Assume single path through method, ignore mutually exclusive paths
                    method.paths = [[(method.start, method.end)]]
                for path in method.paths:
                    # Analyze individual paths through the method
                    method.sources = copy(all_sources)
                    method.sinks = copy(all_sinks)
                    method.sanitizers = copy(all_sanitizers)
                    analysis.find_taints(method, path)
                if len(method.paths) > 1:
                    # We use multiple paths to better detect taints, but we still need all the
                    # sinks, so another round through the whole method is necessary here
                    method.sources = all_sources
                    method.sinks = all_sinks
                    method.sanitizers = all_sanitizers
                    taints = method.taints
                    analysis.find_taints(method, [(method.start, method.end)])
                    method.taints = taints
            analyses.append(analysis)
        except ModuleNotFoundError:
            logger.error('No grammar found for "%s".', input_file.module)
    return analyses
Example #59
0
def run_check_with_model(model_with_type_info: onnx.ModelProto,
                         mobile_pkg_build_config: pathlib.Path,
                         logger: logging.Logger):
    '''
    Check if an ONNX model can be used with the ORT Mobile pre-built package.
    :param model_with_type_info: ONNX model that has had ONNX shape inferencing run on to add type/shape information.
    :param mobile_pkg_build_config: Configuration file used to build the ORT Mobile package.
    :param logger: Logger for output
    :return: True if supported
    '''
    if not mobile_pkg_build_config:
        mobile_pkg_build_config = get_default_config_path()

    enable_type_reduction = True
    config_path = str(mobile_pkg_build_config.resolve(strict=True))
    required_ops, op_type_impl_filter = parse_config(config_path,
                                                     enable_type_reduction)
    global_onnx_tensorproto_types, special_types = _get_global_tensorproto_types(
        op_type_impl_filter, logger)

    # get the opset imports
    opsets = get_opsets_imported(model_with_type_info)

    # If the ONNX opset of the model is not supported we can recommend using our tools to update that first.
    supported_onnx_opsets = set(required_ops['ai.onnx'].keys())
    # we have a contrib op that is erroneously in the ai.onnx domain with opset 1. manually remove that incorrect value
    supported_onnx_opsets.remove(1)
    onnx_opset_model_uses = opsets['ai.onnx']
    if onnx_opset_model_uses not in supported_onnx_opsets:
        logger.info(f'Model uses ONNX opset {onnx_opset_model_uses}.')
        logger.info(
            f'The pre-built package only supports ONNX opsets {sorted(supported_onnx_opsets)}.'
        )
        logger.info(
            'Please try updating the ONNX model opset to a supported version using '
            'python -m onnxruntime.tools.onnx_model_utils.update_onnx_opset ...'
        )

        return False

    unsupported_ops = set()
    logger.debug(
        'Checking if the data types and operators used in the model are supported '
        'in the pre-built ORT package...')
    unsupported = check_graph(model_with_type_info.graph, opsets, required_ops,
                              global_onnx_tensorproto_types, special_types,
                              unsupported_ops, logger)

    if unsupported_ops:
        logger.info('Unsupported operators:')
        for entry in sorted(unsupported_ops):
            logger.info('  ' + entry)

    if unsupported:
        logger.info(
            '\nModel is not supported by the pre-built package due to unsupported types and/or operators.'
        )
        logger.info(
            'Please see https://onnxruntime.ai/docs/reference/mobile/prebuilt-package/ for information '
            'on what is supported in the pre-built package.')
        logger.info(
            'A custom build of ONNX Runtime will be required to run the model. Please see '
            'https://onnxruntime.ai/docs/build/custom.html for details on performing that.'
        )
    else:
        logger.info('Model should work with the pre-built package.')

    logger.info('---------------\n')

    return not unsupported
Example #60
0
File: main.py Project: sjyi/py-evm
def trinity_boot(args: Namespace, trinity_config: TrinityConfig,
                 extra_kwargs: Dict[str, Any], plugin_manager: PluginManager,
                 listener: logging.handlers.QueueListener, event_bus: EventBus,
                 main_endpoint: Endpoint, logger: logging.Logger) -> None:
    # start the listener thread to handle logs produced by other processes in
    # the local logger.
    listener.start()

    networking_endpoint = event_bus.create_endpoint(
        NETWORKING_EVENTBUS_ENDPOINT)
    event_bus.start()

    # First initialize the database process.
    database_server_process = ctx.Process(
        target=run_database_process,
        args=(
            trinity_config,
            LevelDB,
        ),
        kwargs=extra_kwargs,
    )

    networking_process = ctx.Process(
        target=launch_node,
        args=(
            args,
            trinity_config,
            networking_endpoint,
        ),
        kwargs=extra_kwargs,
    )

    # start the processes
    database_server_process.start()
    logger.info("Started DB server process (pid=%d)",
                database_server_process.pid)

    # networking process needs the IPC socket file provided by the database process
    try:
        wait_for_ipc(trinity_config.database_ipc_path)
    except TimeoutError as e:
        logger.error("Timeout waiting for database to start.  Exiting...")
        kill_process_gracefully(database_server_process, logger)
        ArgumentParser().error(message="Timed out waiting for database start")

    networking_process.start()
    logger.info("Started networking process (pid=%d)", networking_process.pid)

    main_endpoint.subscribe(
        ShutdownRequest, lambda ev: kill_trinity_gracefully(
            logger, database_server_process, networking_process,
            plugin_manager, main_endpoint, event_bus, ev.reason))

    plugin_manager.prepare(args, trinity_config, extra_kwargs)

    try:
        loop = asyncio.get_event_loop()
        loop.run_forever()
        loop.close()
    except KeyboardInterrupt:
        kill_trinity_gracefully(logger,
                                database_server_process,
                                networking_process,
                                plugin_manager,
                                main_endpoint,
                                event_bus,
                                reason="CTRL+C / Keyboard Interrupt")