def query(self, starttime, endtime): """Query and retrieve data within the specified range. Checks for data in the specified range and retrieves any new files. After execution is completed, the same range is checked again to see if any new files have appeared since the first execution. This continues until no new files are found (for xxx minutes?) """ urls = [] fmt = '%Y-%m-%d %H:%M:%S' logging.info("Querying time range %s - %s", starttime.strftime(fmt), endtime.strftime(fmt)) for browser in self.browsers: matches = self.query_server(browser, starttime, endtime) if len(matches) > 0: urls.append(matches) # Remove duplicate files, randomizing to spread load across servers if len(urls) > 1: urls = self._deduplicate(urls) # Filter out files that are already in the database new_urls = [] for url_list in urls: filtered = None while filtered is None: try: filtered = list(filter(self._filter_new, url_list)) except mysqld.OperationalError: # MySQL has gone away -- try again in 5s logging.warning(( "Unable to access database to check for file existence. Will try again in 5 seconds." )) time.sleep(5) # Try and reconnect # @note: May be a good idea to move the reconnect # functionality to the db module and have it occur # for all queries. try: self._db, self._cursor = get_db_cursor( self.dbhost, self.dbname, self.dbuser, self.dbpass) except: pass new_urls.append(filtered) # check disk space if not self.sent_diskspace_warning: self._check_free_space() # acquire the data files self.acquire(new_urls)
def query(self, starttime, endtime): """Query and retrieve data within the specified range. Checks for data in the specified range and retrieves any new files. After execution is completed, the same range is checked again to see if any new files have appeared since the first execution. This continues until no new files are found (for xxx minutes?) """ urls = [] fmt = '%Y-%m-%d %H:%M:%S' logging.info("Querying time range %s - %s", starttime.strftime(fmt), endtime.strftime(fmt)) for browser in self.browsers: matches = self.query_server(browser, starttime, endtime) if len(matches) > 0: urls.append(matches) # Remove duplicate files, randomizing to spread load across servers if len(urls) > 1: urls = self._deduplicate(urls) # Filter out files that are already in the database new_urls = [] for url_list in urls: filtered = None while filtered is None: try: filtered = filter(self._filter_new, url_list) except MySQLdb.OperationalError: # MySQL has gone away -- try again in 5s logging.warning(("Unable to access database to check for file" " existence. Will try again in 5 seconds.")) time.sleep(5) # Try and reconnect # @note: May be a good idea to move the reconnect # functionality to the db module and have it occur # for all queries. try: self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass) except: pass new_urls.append(filtered) # check disk space if not self.sent_diskspace_warning: self._check_free_space() # acquire the data files self.acquire(new_urls)
def __init__(self, servers, browse_method, download_method, conf): """Explain.""" # MySQL/Postgres info self.dbname = conf.get('database', 'dbname') self.dbuser = conf.get('database', 'user') self.dbpass = conf.get('database', 'pass') self.downloaders = [] try: self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass) except MySQLdb.OperationalError: logging.error( "Unable to access MySQL. Is the database daemon running?") self.shutdown() self.stop() # Email notification self.email_server = conf.get('notifications', 'server') self.email_from = conf.get('notifications', 'from') self.email_to = conf.get('notifications', 'to') # Warning flags self.sent_diskspace_warning = False # Maximum number of simultaneous downloads self.max_downloads = conf.getint('network', 'max_downloads') # Directories self.working_dir = os.path.expanduser( conf.get('directories', 'working_dir')) self.image_archive = os.path.expanduser( conf.get('directories', 'image_archive')) self.incoming = os.path.join(self.working_dir, 'incoming') self.quarantine = os.path.join(self.working_dir, 'quarantine') # Check directory permission self._init_directories() # Load data server, browser, and downloader self.servers = self._load_servers(servers) self.browsers = [] self.downloaders = [] self.queues = [] # For each server instantiate a browser and one or more downloaders for server in self.servers: self.browsers.append(self._load_browser(browse_method, server)) queue = Queue.Queue() self.queues.append(queue) self.downloaders.append([ self._load_downloader(download_method, queue) for i in range(self.max_downloads) ]) # Shutdown switch self.shutdown_requested = False
def __init__(self, servers, browse_method, download_method, conf): """Explain.""" # MySQL/Postgres info self.dbname = conf.get('database', 'dbname') self.dbuser = conf.get('database', 'user') self.dbpass = conf.get('database', 'pass') self.downloaders = [] try: self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass) except MySQLdb.OperationalError: logging.error("Unable to access MySQL. Is the database daemon running?") self.shutdown() self.stop() # Email notification self.email_server = conf.get('notifications', 'server') self.email_from = conf.get('notifications', 'from') self.email_to = conf.get('notifications', 'to') # Warning flags self.sent_diskspace_warning = False # Maximum number of simultaneous downloads self.max_downloads = conf.getint('network', 'max_downloads') # Directories self.working_dir = os.path.expanduser(conf.get('directories', 'working_dir')) self.image_archive = os.path.expanduser(conf.get('directories', 'image_archive')) self.incoming = os.path.join(self.working_dir, 'incoming') self.quarantine = os.path.join(self.working_dir, 'quarantine') # Check directory permission self._init_directories() # Load data server, browser, and downloader self.servers = self._load_servers(servers) self.browsers = [] self.downloaders = [] self.queues = [] # For each server instantiate a browser and one or more downloaders for server in self.servers: self.browsers.append(self._load_browser(browse_method, server)) queue = Queue.Queue() self.queues.append(queue) self.downloaders.append([self._load_downloader(download_method, queue) for i in range(self.max_downloads)]) # Shutdown switch self.shutdown_requested = False
def main(argv): '''Main application access point''' options = get_options() init_logger('update.log') print('Processing Images...') # Get a list of images to process filepaths = find_images(options.source) if len(filepaths) is 0: return images = [] # Move images to main archive for filepath in filepaths: dest = os.path.join(options.destination, os.path.relpath(filepath, options.source)) # Parse image header image_params = create_image_data(filepath) image_params['filepath'] = dest images.append(image_params) directory = os.path.dirname(dest) if not os.path.isdir(directory): os.makedirs(directory) shutil.move(filepath, dest) # Add images to the database db, cursor = get_db_cursor(options.dbhost, options.dbname, options.dbuser, options.dbpass) process_jp2_images(images, options.destination, cursor, True) cursor.close() print('Finished!')
def main(argv): '''Main application access point''' options = get_options() init_logger('update.log') print('Processing Images...') # Get a list of images to process filepaths = find_images(options.source) if len(filepaths) is 0: return images = [] # Move images to main archive for filepath in filepaths: dest = os.path.join(options.destination, os.path.relpath(filepath, options.source)) # Parse image header image_params = sunpy.read_header(filepath) image_params['filepath'] = dest images.append(image_params) directory = os.path.dirname(dest) if not os.path.isdir(directory): os.makedirs(directory) shutil.move(filepath, dest) # Add images to the database cursor = get_db_cursor(options.dbname, options.dbuser, options.dbpass) process_jp2_images(images, options.destination, cursor, True) cursor.close() print('Finished!')