def register_stream_list(reindex=False): # Find the local streams and make sure they are all registered in the sqlite3 database. # # Get the existing streams as a set # # If we are asked to re-index (due to trying to fix a bug) then we ignore what we have # and just go ahead and do everything. # if reindex: all_registered = Set([]) else: all_registered = Set(DB.all('streams', ['name'])) # There should be a smarter way to do this ... you'd think. We should also # be more faithfully giving things extensions since it's not 100% mp3 all_files = Set(glob('%s/*.mp3' % misc.DIR_STREAMS)) diff = all_files.difference(all_registered) # This is a list of files we haven't scanned yet... if not diff: return True # This basically means we could still be writing # this file. # # We take the cascade time and then buffer it by a minute, just # to be sure. # # If the creation time is less then this then we don't register this # until later. cutoff = time.mktime( (datetime.now() - timedelta(minutes=1, seconds=misc.config['cascadetime'])).timetuple()) for fname in diff: if len(fname) == 0 or os.path.getctime(fname) > cutoff: next info = stream_info(fname) if not info: continue DB.register_stream(info) if not misc.manager_is_running(): logging.info("Manager is gone, shutting down") raise Exception()
def register_stream_list(reindex=False): # Find the local streams and make sure they are all registered in the sqlite3 database. # # Get the existing streams as a set # # If we are asked to re-index (due to trying to fix a bug) then we ignore what we have # and just go ahead and do everything. # if reindex: all_registered = Set([]) else: all_registered = Set(DB.all('streams', ['name'])) # There should be a smarter way to do this ... you'd think. We should also # be more faithfully giving things extensions since it's not 100% mp3 all_files = Set(glob('%s/*.mp3' % misc.DIR_STREAMS)) diff = all_files.difference(all_registered) # This is a list of files we haven't scanned yet... if not diff: return True # This basically means we could still be writing # this file. # # We take the cascade time and then buffer it by a minute, just # to be sure. # # If the creation time is less then this then we don't register this # until later. cutoff = time.mktime((datetime.now() - timedelta(minutes=1, seconds=misc.config['cascadetime'])).timetuple()) for fname in diff: if len(fname) == 0 or os.path.getctime(fname) > cutoff: next info = stream_info(fname) if not info: continue DB.register_stream(info) if not misc.manager_is_running(): logging.info("Manager is gone, shutting down") raise Exception()
def prune_process(lockMap, reindex=False, force=False): # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not lockMap['prune'].acquire(False) and not force: logging.warn("Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except: lockMap['prune'].release() return None db = DB.connect() archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime('prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND # Put thingies into the cloud. count = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): lockMap['prune'].release() return None ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archivedays'], misc.am_i_official() # We observe the rules set up in the config. if file_name.startswith('slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith('slice') and misc.am_i_official(): logging.debug("Prune[cloud]: %s" % file_name) # Only unlink the file if I can successfully put it into the cloud. if put(file_name): try: os.unlink(file_name) except: logging.debug("Prune[cloud]: Couldn't remove %s" % file_name) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count += 1 # The map names are different since there may or may not be a corresponding # cloud thingie associated with it. db = DB.connect() # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = db['c'].execute('select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud.";unlink(file_name) # After we remove these streams then we delete them from the db. db['c'].execute('delete from streams where id = %d' % id) db['conn'].commit() # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count += 1 logging.info("Found %d files older than %s days." % (count, misc.config['archivedays'])) lockMap['prune'].release()
def prune_process(lockMap, reindex=False, force=False): # This is internal, call prune() directly. This is a normally blocking # process that is prepared by prune(), making it easily callable asynchronously # If another prune is running then we just bail if not lockMap['prune'].acquire(False) and not force: logging.warn( "Tried to run another prune whilst one is running. Aborting") return True # If we are the first process then we need to make sure that the webserver is up before # we do this to check to see if we are official time.sleep(2) pid = misc.change_proc_name("%s-cleanup" % misc.config['callsign']) # We want to run the am_i_official here since it could block on a DNS lookup misc.am_i_official() try: register_stream_list(reindex) except: lockMap['prune'].release() return None db = DB.connect() archive_duration = misc.config['archivedays'] * TS.ONE_DAY_SECOND cutoff = TS.unixtime('prune') - archive_duration # Remove all slices older than 4 hours. slice_cutoff = TS.unixtime('prune') - 0.1667 * TS.ONE_DAY_SECOND cloud_cutoff = None if misc.config['cloud']: cloud_cutoff = TS.unixtime( 'prune') - misc.config['cloudarchive'] * TS.ONE_DAY_SECOND # Put thingies into the cloud. count = 0 for file_name in glob('*/*.mp3'): # # Depending on many factors this could be running for hours # or even days. We want to make sure this isn't a blarrrghhh # zombie process or worse yet, still running and competing with # other instances of itself. # if not misc.manager_is_running(): lockMap['prune'].release() return None ctime = os.path.getctime(file_name) # print "Looking at ", file_name, ctime, cutoff, archive_duration, misc.config['archivedays'], misc.am_i_official() # We observe the rules set up in the config. if file_name.startswith( 'slices') and ctime < slice_cutoff or ctime < cutoff: logging.debug("Prune[remove]: %s (ctime)" % file_name) os.unlink(file_name) count += 1 # We want to make sure we aren't archiving the slices elif cloud_cutoff and ctime < cloud_cutoff and not file_name.startswith( 'slice') and misc.am_i_official(): logging.debug("Prune[cloud]: %s" % file_name) # Only unlink the file if I can successfully put it into the cloud. if put(file_name): try: os.unlink(file_name) except: logging.debug("Prune[cloud]: Couldn't remove %s" % file_name) for file_name in glob('%s/*.gz' % misc.DIR_BACKUPS): ctime = os.path.getctime(file_name) # We observe the rules set up in the config. if ctime < cutoff: logging.debug("Prune: %s" % file_name) os.unlink(file_name) count += 1 # The map names are different since there may or may not be a corresponding # cloud thingie associated with it. db = DB.connect() # Don't do this f*****g shit at all because f**k this so hard. #logging.info('select name, id from streams where end_unix < date("now", "-%d seconds") or (end_minute - start_minute < 0.05 and start_unix < date("now", "%d seconds"))' % (archive_duration, TS.get_offset() * 60 - 1200)) unlink_list = db['c'].execute( 'select name, id from streams where end_unix < date("now", "-%d seconds")' % (archive_duration)).fetchall() for file_name_tuple in unlink_list: file_name = str(file_name_tuple[0]) id = file_name_tuple[1] logging.debug("Prune[remove]: %s (unlink list)" % file_name) # If there's a cloud account at all then we need to unlink the # equivalent mp3 file if cloud_cutoff and misc.am_i_official(): "cloud." unlink(file_name) # After we remove these streams then we delete them from the db. db['c'].execute('delete from streams where id = %d' % id) db['conn'].commit() # now only after we've deleted from the cloud can we delete the local file if os.path.exists(file_name): os.unlink(file_name) count += 1 logging.info("Found %d files older than %s days." % (count, misc.config['archivedays'])) lockMap['prune'].release()