예제 #1
0
 def dumpRunArgs( self, args ):
     """
     Dump all command line arguments and their values
     """
     d = vars( args )
     for a in d:
         dblog.log( 'args', '%s: %s' % ( a, d[ a ] ), level = logging.DEBUG )
예제 #2
0
 def dumpRunArgs(self, args):
     """
     Dump all command line arguments and their values
     """
     d = vars(args)
     for a in d:
         dblog.log('args', '%s: %s' % (a, d[a]), level=logging.DEBUG)
예제 #3
0
 def dumpConf( self, level = logging.DEBUG ):
     """
     Dump all configuration variables and their values
     """
     for s in self.configParser.sections( ):
         for o in sorted( self.configParser.options( s ) ):
             dblog.log( 'conf', '%s: %s' % ( o.title( ), self.getConf( s, o ) ), level = level )
예제 #4
0
 def visitUrl( self, url ):
     """
     Visit a url with the browser
     """
     dblog.log( 'visit', url )
     self.browser( ).get( url )
     self.stats[ 'visited' ] += 1
예제 #5
0
 def visitUrl(self, url):
     """
     Visit a url with the browser
     """
     dblog.log('visit', url)
     self.browser().get(url)
     self.stats['visited'] += 1
예제 #6
0
def takeSelfie( db, url ):
    """
    Captures a screenshot of the browser, the url is what was sent to handleUrl(),
    thus the browser may have fired an action handler, so we shouldn't expect a screenshot
    to always be the same as browser.get(url).
    """
    # build a unique filename for this running instance
    filename = '%s-%s.png' % ( time.time( ), selfieName( url ) )

    # determine where we are putting these selfies
    dir = db.getConf( conf.conf.OPTIONS, conf.conf.OPTION_SELFIES_DIR )

    # ensure this directory actually exists
    if not os.path.isdir( dir ):
        os.makedirs( dir )

    # ensure we're only working with an absolute path here
    dir = os.path.abspath( dir )

    # put them together and what do we get? a nice consistent dir of filenames with timestamps
    # that can be scanned and used as frames in a movie / sequence
    fileFull = dir + '/' + filename
    if db.browser( ).get_screenshot_as_file( fileFull ):
        dblog.log( 'selfie', 'Saved [ %s ]' % fileFull )
    else:
        dblog.log( 'selfie', 'Failed to save [ %s ]' % fileFull, level = logging.WARNING )
예제 #7
0
 def dumpProcessedUrls( self, level = logging.DEBUG ):
     """
     Dump all the URLs that have been processed and when they were processed
     """
     for url in self.processedUrls:
         for ts in reversed( self.processedUrls[ url ] ):
             dblog.log( 'urlhist', '%s %s %s' % ( ts.strftime( '%Y-%M-%d %H:%M:%S' ), dblog.LOG_PIPE_CHAR, url ),
                        level = level )
예제 #8
0
 def waitUntil(self, whenStr):
     startDtm = dateutil.parser.parse(str(whenStr))
     nowDtm   = datetime.now()
     diff     = (startDtm-nowDtm)
     diffSec  = round(diff.total_seconds(),0)
     if diffSec > 0:
         dblog.log('wait','Waiting %s before starting, resuming at %s' % ( diff, startDtm ) )
         self.idle( diffSec )
         dblog.log('wait','Finished waiting, target time = %s' % ( diff, startDtm ) )
예제 #9
0
 def dumpConf(self, level=logging.DEBUG):
     """
     Dump all configuration variables and their values
     """
     for s in self.configParser.sections():
         for o in sorted(self.configParser.options(s)):
             dblog.log('conf',
                       '%s: %s' % (o.title(), self.getConf(s, o)),
                       level=level)
예제 #10
0
 def dumpProcessedUrls(self, level=logging.DEBUG):
     """
     Dump all the URLs that have been processed and when they were processed
     """
     for url in self.processedUrls:
         for ts in reversed(self.processedUrls[url]):
             dblog.log('urlhist',
                       '%s %s %s' % (ts.strftime('%Y-%M-%d %H:%M:%S'),
                                     dblog.LOG_PIPE_CHAR, url),
                       level=level)
예제 #11
0
 def logStats( self ):
     """
     Log start-finish stats in a formatted manner
     """
     dur = self.stats[ 'tock' ] - self.stats[ 'tick' ]
     hrRate = float( ( 60 * 60 ) / dur.total_seconds( ) )
     total = self.stats[ 'skipped' ] + self.stats[ 'handled' ] + self.stats[ 'visited' ]
     dblog.log( 'stat', 'Skipped: %s [ %.2f url/hr ]' % ( self.stats[ 'skipped' ], hrRate * self.stats[ 'skipped' ] ) )
     dblog.log( 'stat', 'Handled: %s [ %.2f url/hr ]' % ( self.stats[ 'handled' ], hrRate * self.stats[ 'handled' ] ) )
     dblog.log( 'stat', 'Visited: %s [ %.2f url/hr ]' % ( self.stats[ 'visited' ], hrRate * self.stats[ 'visited' ] ) )
     dblog.log( 'stat', 'Total:   %s [ %.2f url/hr ]' % ( total, hrRate * total ) )
     dblog.log( 'stat', 'Elapsed: %s' % dur )
예제 #12
0
    def loadList( self, filename ):
        """
        Load white/black list from the location and read in each line as a new regex entry
        """
        list = [ ]
        # give lists an existential crisis test
        if os.path.isfile( filename ):
            with open( filename ) as f:
                list = [ x.strip( '\n' ) for x in f.readlines( ) ]
                dblog.log( 'list', 'Found %d records from %s' % ( len( list ), filename ) )

        return list
예제 #13
0
    def loadList(self, filename):
        """
        Load white/black list from the location and read in each line as a new regex entry
        """
        list = []
        # give lists an existential crisis test
        if os.path.isfile(filename):
            with open(filename) as f:
                list = [x.strip('\n') for x in f.readlines()]
                dblog.log('list',
                          'Found %d records from %s' % (len(list), filename))

        return list
예제 #14
0
    def tryUrlHandler( self, url, handler ):
        """
        Try a regex-handleFunc pair for the URL
        """
        pattern = re.compile( handler[ 0 ] )
        match = pattern.findall( url )
        # do we have a match?
        if len( match ):
            handleFunc = handler[ 1 ]
            dblog.log( 'handler', "'%s' handling url [ %s ]" % ( handleFunc.__name__, url ) )
            # use the handler and the regex match for the URL
            handleFunc( self, url, match )
            # mark it handled and move on
            self.stats[ 'handled' ] += 1
            self.postHandleUrl( url )
            return True

        return False
예제 #15
0
    def tryUrlHandler(self, url, handler):
        """
        Try a regex-handleFunc pair for the URL
        """
        pattern = re.compile(handler[0])
        match = pattern.findall(url)
        # do we have a match?
        if len(match):
            handleFunc = handler[1]
            dblog.log('handler',
                      "'%s' handling url [ %s ]" % (handleFunc.__name__, url))
            # use the handler and the regex match for the URL
            handleFunc(self, url, match)
            # mark it handled and move on
            self.stats['handled'] += 1
            self.postHandleUrl(url)
            return True

        return False
예제 #16
0
 def logStats(self):
     """
     Log start-finish stats in a formatted manner
     """
     dur = self.stats['tock'] - self.stats['tick']
     hrRate = float((60 * 60) / dur.total_seconds())
     total = self.stats['skipped'] + self.stats['handled'] + self.stats[
         'visited']
     dblog.log(
         'stat', 'Skipped: %s [ %.2f url/hr ]' %
         (self.stats['skipped'], hrRate * self.stats['skipped']))
     dblog.log(
         'stat', 'Handled: %s [ %.2f url/hr ]' %
         (self.stats['handled'], hrRate * self.stats['handled']))
     dblog.log(
         'stat', 'Visited: %s [ %.2f url/hr ]' %
         (self.stats['visited'], hrRate * self.stats['visited']))
     dblog.log('stat',
               'Total:   %s [ %.2f url/hr ]' % (total, hrRate * total))
     dblog.log('stat', 'Elapsed: %s' % dur)
예제 #17
0
    def run(self):
        """
        Browse the internet in a similar manner to the history database
        """
        # handle all the arg setup / parsing
        parser = argparse.ArgumentParser(
            description='Launch cruise-control for websites',
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
            usage='%(prog)s run'
            if not self.txtFile else '%(prog)s txt [txtFile]')
        self.addConfigParserArgs(parser)
        self.addLocationParserArgs(parser)
        self.addRunParserArgs(parser)
        self.addLogParserArgs(parser)
        args = self.parseAndMergeArgs(parser)
        self.skipHandling = args.skip_urls

        # setup data manager and browser
        self.runBootstrap()

        if self.txtFile:
            self.urllist = self.loadList(args.txtFile)

        # wait until out start dtm
        self.user.waitUntil(args.start)

        # begin magic
        # self.urlHistory( )
        # self.urlHistory( '%duckduckgo.com/%' )
        self.simulateRealtime()

        # memories to last a lifetime
        if self.getConf(conf.conf.OPTIONS, conf.conf.OPTION_SELFIES) == 'True':
            dir = self.getConf(conf.conf.OPTIONS, conf.conf.OPTION_SELFIES_DIR)
            nameNoExt = '%s-selfies' % str(time.time()).split('.')[0]
            outputFileName = selfies.compileSelfies(dir, nameNoExt)
            dblog.log('selfie',
                      'Compiled selfie movie at [ %s ]' % outputFileName)

        # Shuuuut iiit doooooown
        self.shutdown()
예제 #18
0
    def typeKeys( self, db, inputElem, text, sloppify = True, enableDrunkRate = False ):
        """
        Types keys as if the user did it themselves, also has fuzz-factor
        """
        errRate = self.typingRand
        speedWpm = self.typingWpm
        if enableDrunkRate:
            errRate  *= self.DRUNK_ERR_FACTOR
            speedWpm *= self.DRUNK_WPM_FACTOR

        if sloppify:
            # mangle and sloppify the string to a more drunk-like string
            str = self.sloppify( text, errRate )
            # log original and sloppy text, replace unicode backspace with '\b' for easier reading in log output
            dblog.log( 'user', 'Sloppifying [ %s ] -> [ %s ]' % ( text, str.replace( Keys.BACKSPACE, '\\b' ) ) )
            text = str

        # send each character and wait a fuzzed duration
        for c in text:
            inputElem.send_keys( c )
            self.reactKeyPress( speedWpm )
예제 #19
0
    def urlPassesWhiteBlackLists( self, url ):
        """
        Check if a URL is valid when matched against white and blacklists
        A whitelist always overrides blacklists
        """
        # no lists equal insta-pass
        wLen = len( self.whitelist )
        bLen = len( self.blacklist )
        dblog.log( 'list', 'white: %d | black: %d' % ( wLen, bLen ), level = logging.DEBUG )
        if wLen + bLen == 0:
            dblog.log( 'list', 'Url [ %s ] didn\'t test, empty lists' % url, level = logging.DEBUG )
            return True

        # check if we pass the white/blacklist test
        wMatch = self.urlHasListMatch( self.whitelist, url )
        bMatch = self.urlHasListMatch( self.blacklist, url )
        dblog.log( 'list', 'Url [ %s ] tested on lists -> white[%d] = %s | black[%d] = %s ' % (
            url, wLen, wMatch, bLen, bMatch ), level = logging.DEBUG )

        # if we have white && black list, only pass if
        # url matches the whitelist and not the blacklist
        if wLen and bLen:
            return wMatch and not bMatch

        # return whitelist match if we have a list
        if wLen:
            return wMatch

        # matching a blacklist is bad, do not pass
        if bLen:
            return not bMatch

        # shouldn't ever get here ...
        return False
예제 #20
0
    def simulateRealtime(self):
        """
        Use current date/time to build a "typical" browsing pattern
        """
        # continuously loop, refreshing the seed data as needed
        expiresDtm = None
        processUrls = True
        while processUrls:
            # first up, get history if our time block is up
            now = datetime.now()
            if (expiresDtm is None) or (now >= expiresDtm):
                failures = 0
                expiresDtm = now + timedelta(minutes=30)
                dblog.log('sim',
                          'Refreshing seed data, valid until %s' % expiresDtm)
                if self.txtFile:
                    # refresh from the text file - don't need to actually reload all the URLs
                    # since they are cached.  Just change the rate at which we browse so there
                    # is a change in behavior at this period
                    urlRows = self.urllist.copy()
                    ratePerHalfHour = random.uniform(150, 500)
                else:
                    # refresh from the database
                    (urlRows,
                     ratePerHalfHour) = self.historian.getSeedUrlData(now)

                # if there are no URL results foudn, we have to go home
                urlRowsOrigLen = len(urlRows)
                if not urlRowsOrigLen:
                    dblog.log('sim', 'No URLs found :-(')
                    return

                # convert 30-min rate to secs/url
                secPerUrl = round(60 / (ratePerHalfHour / 30), 2)

            # pick a random item in the urls
            idx = int(random.uniform(0, len(urlRows)))

            # remove it so it's not seen again
            urlRow = urlRows.pop(idx)
            url = urlRow['url'] if not self.txtFile else urlRow

            # handle it with our main handler / dispatcher
            if self.handleUrl(url):
                # wait the appropriate amount of time; we'll fudge the time by +/- 25%
                sleepSec = abs(
                    secPerUrl +
                    round(random.choice([-1, 1]) * (secPerUrl / 4), 2))
                dblog.log('sim', 'Finished URL, waiting %d seconds' % sleepSec)
                self.user.idle(sleepSec)
            else:
                failures += 1

            # reset the expiration time if we're out of URLs
            if not len(urlRows):
                expiresDtm = None

            # only keep going if we haven't failed on every URL attempt
            processUrls = (failures < urlRowsOrigLen)
예제 #21
0
    def run( self ):
        """
        Browse the internet in a similar manner to the history database
        """
        # handle all the arg setup / parsing
        parser = argparse.ArgumentParser( description = 'Launch cruise-control for websites',
                                          formatter_class = argparse.ArgumentDefaultsHelpFormatter,
                                          usage = '%(prog)s run' if not self.txtFile else '%(prog)s txt [txtFile]')
        self.addConfigParserArgs( parser )
        self.addLocationParserArgs( parser )
        self.addRunParserArgs( parser )
        self.addLogParserArgs( parser )
        args = self.parseAndMergeArgs( parser )
        self.skipHandling = args.skip_urls

        # setup data manager and browser
        self.runBootstrap( )

        if self.txtFile:
            self.urllist = self.loadList( args.txtFile )

        # wait until out start dtm
        self.user.waitUntil( args.start )

        # begin magic
        # self.urlHistory( )
        # self.urlHistory( '%duckduckgo.com/%' )
        self.simulateRealtime( )

        # memories to last a lifetime
        if self.getConf( conf.conf.OPTIONS, conf.conf.OPTION_SELFIES ) == 'True':
            dir = self.getConf( conf.conf.OPTIONS, conf.conf.OPTION_SELFIES_DIR )
            nameNoExt = '%s-selfies' % str( time.time( ) ).split( '.' )[ 0 ]
            outputFileName = selfies.compileSelfies( dir, nameNoExt )
            dblog.log( 'selfie', 'Compiled selfie movie at [ %s ]' % outputFileName )

        # Shuuuut iiit doooooown
        self.shutdown( )
예제 #22
0
    def simulateRealtime( self ):
        """
        Use current date/time to build a "typical" browsing pattern
        """
        # continuously loop, refreshing the seed data as needed
        expiresDtm  = None
        processUrls = True
        while processUrls:
            # first up, get history if our time block is up
            now = datetime.now()
            if ( expiresDtm is None ) or ( now >= expiresDtm ):
                failures = 0
                expiresDtm = now + timedelta( minutes = 30 )
                dblog.log( 'sim', 'Refreshing seed data, valid until %s' % expiresDtm )
                if self.txtFile:
                    # refresh from the text file - don't need to actually reload all the URLs
                    # since they are cached.  Just change the rate at which we browse so there
                    # is a change in behavior at this period
                    urlRows = self.urllist.copy()
                    ratePerHalfHour = random.uniform( 150, 500 )
                else:
                    # refresh from the database
                    ( urlRows, ratePerHalfHour ) = self.historian.getSeedUrlData( now )

                # if there are no URL results foudn, we have to go home
                urlRowsOrigLen = len( urlRows )
                if not urlRowsOrigLen:
                    dblog.log( 'sim', 'No URLs found :-(' )
                    return

                # convert 30-min rate to secs/url
                secPerUrl = round( 60 / ( ratePerHalfHour / 30 ), 2 )

            # pick a random item in the urls
            idx = int( random.uniform( 0, len( urlRows ) ) )

            # remove it so it's not seen again
            urlRow = urlRows.pop( idx )
            url = urlRow[ 'url' ] if not self.txtFile else urlRow

            # handle it with our main handler / dispatcher
            if self.handleUrl( url ):
                # wait the appropriate amount of time; we'll fudge the time by +/- 25%
                sleepSec = abs( secPerUrl + round( random.choice( [-1,1] ) * ( secPerUrl / 4 ), 2 ) )
                dblog.log( 'sim', 'Finished URL, waiting %d seconds' % sleepSec )
                self.user.idle( sleepSec )
            else:
                failures += 1

            # reset the expiration time if we're out of URLs
            if not len( urlRows ):
                expiresDtm = None

            # only keep going if we haven't failed on every URL attempt
            processUrls = ( failures < urlRowsOrigLen )
예제 #23
0
    def urlPassesWhiteBlackLists(self, url):
        """
        Check if a URL is valid when matched against white and blacklists
        A whitelist always overrides blacklists
        """
        # no lists equal insta-pass
        wLen = len(self.whitelist)
        bLen = len(self.blacklist)
        dblog.log('list',
                  'white: %d | black: %d' % (wLen, bLen),
                  level=logging.DEBUG)
        if wLen + bLen == 0:
            dblog.log('list',
                      'Url [ %s ] didn\'t test, empty lists' % url,
                      level=logging.DEBUG)
            return True

        # check if we pass the white/blacklist test
        wMatch = self.urlHasListMatch(self.whitelist, url)
        bMatch = self.urlHasListMatch(self.blacklist, url)
        dblog.log(
            'list',
            'Url [ %s ] tested on lists -> white[%d] = %s | black[%d] = %s ' %
            (url, wLen, wMatch, bLen, bMatch),
            level=logging.DEBUG)

        # if we have white && black list, only pass if
        # url matches the whitelist and not the blacklist
        if wLen and bLen:
            return wMatch and not bMatch

        # return whitelist match if we have a list
        if wLen:
            return wMatch

        # matching a blacklist is bad, do not pass
        if bLen:
            return not bMatch

        # shouldn't ever get here ...
        return False
예제 #24
0
파일: datamgr.py 프로젝트: m-kal/dirtyboots
 def log(self, msg, level=logging.INFO):
     """
     For consistent logging format within all handler functions
     """
     dblog.log('SQL', msg, level=level)
예제 #25
0
 def skipUrl( self, url, reason = 'skip' ):
     """
     Skip a URL and log it
     """
     dblog.log( reason, url )
     self.stats[ 'skipped' ] += 1
예제 #26
0
 def log(self, msg, level=logging.INFO):
     """
     Shortcut to DB's logger
     """
     dblog.log('history', msg, level=level)
예제 #27
0
 def skipUrl(self, url, reason='skip'):
     """
     Skip a URL and log it
     """
     dblog.log(reason, url)
     self.stats['skipped'] += 1
예제 #28
0
def log(text):
    """
    For consistent logging format within all handler functions
    """
    dblog.log('handlers', text)
예제 #29
0
def log( text ):
    """
    For consistent logging format within all handler functions
    """
    dblog.log( 'handlers', text )
예제 #30
0
 def log( self, msg, level = logging.INFO ):
     """
     Shortcut to DB's logger
     """
     dblog.log( 'history', msg, level = level )
예제 #31
0
파일: datamgr.py 프로젝트: m-kal/DirtyBoots
 def log( self, msg, level = logging.INFO ):
     """
     For consistent logging format within all handler functions
     """
     dblog.log( 'SQL', msg, level = level )