예제 #1
0
    def query(self, starttime, endtime):
        """Query and retrieve data within the specified range.

        Checks for data in the specified range and retrieves any new files.
        After execution is completed, the same range is checked again to see
        if any new files have appeared since the first execution. This continues
        until no new files are found (for xxx minutes?)
        """
        urls = []

        fmt = '%Y-%m-%d %H:%M:%S'

        logging.info("Querying time range %s - %s", starttime.strftime(fmt),
                     endtime.strftime(fmt))

        for browser in self.browsers:
            matches = self.query_server(browser, starttime, endtime)

            if len(matches) > 0:
                urls.append(matches)

        # Remove duplicate files, randomizing to spread load across servers
        if len(urls) > 1:
            urls = self._deduplicate(urls)

        # Filter out files that are already in the database
        new_urls = []

        for url_list in urls:
            filtered = None

            while filtered is None:
                try:
                    filtered = list(filter(self._filter_new, url_list))
                except mysqld.OperationalError:
                    # MySQL has gone away -- try again in 5s
                    logging.warning((
                        "Unable to access database to check for file existence. Will try again in 5 seconds."
                    ))
                    time.sleep(5)

                    # Try and reconnect

                    # @note: May be a good idea to move the reconnect
                    # functionality to the db module and have it occur
                    # for all queries.
                    try:
                        self._db, self._cursor = get_db_cursor(
                            self.dbhost, self.dbname, self.dbuser, self.dbpass)
                    except:
                        pass

            new_urls.append(filtered)

        # check disk space
        if not self.sent_diskspace_warning:
            self._check_free_space()

        # acquire the data files
        self.acquire(new_urls)
    def query(self, starttime, endtime):
        """Query and retrieve data within the specified range.

        Checks for data in the specified range and retrieves any new files.
        After execution is completed, the same range is checked again to see
        if any new files have appeared since the first execution. This continues
        until no new files are found (for xxx minutes?)
        """
        urls = []

        fmt = '%Y-%m-%d %H:%M:%S'

        logging.info("Querying time range %s - %s", starttime.strftime(fmt),
                                                    endtime.strftime(fmt))

        for browser in self.browsers:
            matches = self.query_server(browser, starttime, endtime)

            if len(matches) > 0:
                urls.append(matches)

        # Remove duplicate files, randomizing to spread load across servers
        if len(urls) > 1:
            urls = self._deduplicate(urls)

        # Filter out files that are already in the database
        new_urls = []

        for url_list in urls:
            filtered = None

            while filtered is None:
                try:
                    filtered = filter(self._filter_new, url_list)
                except MySQLdb.OperationalError:
                    # MySQL has gone away -- try again in 5s
                    logging.warning(("Unable to access database to check for file"
                                   " existence. Will try again in 5 seconds."))
                    time.sleep(5)

                    # Try and reconnect

                    # @note: May be a good idea to move the reconnect
                    # functionality to the db module and have it occur
                    # for all queries.
                    try:
                        self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass)
                    except:
                        pass

            new_urls.append(filtered)

        # check disk space
        if not self.sent_diskspace_warning:
            self._check_free_space()

        # acquire the data files
        self.acquire(new_urls)
예제 #3
0
    def __init__(self, servers, browse_method, download_method, conf):
        """Explain."""
        # MySQL/Postgres info
        self.dbname = conf.get('database', 'dbname')
        self.dbuser = conf.get('database', 'user')
        self.dbpass = conf.get('database', 'pass')

        self.downloaders = []

        try:
            self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass)
        except MySQLdb.OperationalError:
            logging.error(
                "Unable to access MySQL. Is the database daemon running?")
            self.shutdown()
            self.stop()

        # Email notification
        self.email_server = conf.get('notifications', 'server')
        self.email_from = conf.get('notifications', 'from')
        self.email_to = conf.get('notifications', 'to')

        # Warning flags
        self.sent_diskspace_warning = False

        # Maximum number of simultaneous downloads
        self.max_downloads = conf.getint('network', 'max_downloads')

        # Directories
        self.working_dir = os.path.expanduser(
            conf.get('directories', 'working_dir'))
        self.image_archive = os.path.expanduser(
            conf.get('directories', 'image_archive'))
        self.incoming = os.path.join(self.working_dir, 'incoming')
        self.quarantine = os.path.join(self.working_dir, 'quarantine')

        # Check directory permission
        self._init_directories()

        # Load data server, browser, and downloader
        self.servers = self._load_servers(servers)

        self.browsers = []
        self.downloaders = []
        self.queues = []

        # For each server instantiate a browser and one or more downloaders
        for server in self.servers:
            self.browsers.append(self._load_browser(browse_method, server))
            queue = Queue.Queue()
            self.queues.append(queue)
            self.downloaders.append([
                self._load_downloader(download_method, queue)
                for i in range(self.max_downloads)
            ])

        # Shutdown switch
        self.shutdown_requested = False
    def __init__(self, servers, browse_method, download_method, conf):
        """Explain."""
        # MySQL/Postgres info
        self.dbname = conf.get('database', 'dbname')
        self.dbuser = conf.get('database', 'user')
        self.dbpass = conf.get('database', 'pass')

        self.downloaders = []

        try:
            self._db = get_db_cursor(self.dbname, self.dbuser, self.dbpass)
        except MySQLdb.OperationalError:
            logging.error("Unable to access MySQL. Is the database daemon running?")
            self.shutdown()
            self.stop()

        # Email notification
        self.email_server = conf.get('notifications', 'server')
        self.email_from = conf.get('notifications', 'from')
        self.email_to = conf.get('notifications', 'to')

        # Warning flags
        self.sent_diskspace_warning = False

        # Maximum number of simultaneous downloads
        self.max_downloads = conf.getint('network', 'max_downloads')

        # Directories
        self.working_dir = os.path.expanduser(conf.get('directories',
                                                       'working_dir'))
        self.image_archive = os.path.expanduser(conf.get('directories',
                                                         'image_archive'))
        self.incoming = os.path.join(self.working_dir, 'incoming')
        self.quarantine = os.path.join(self.working_dir, 'quarantine')

        # Check directory permission
        self._init_directories()

        # Load data server, browser, and downloader
        self.servers = self._load_servers(servers)

        self.browsers = []
        self.downloaders = []
        self.queues = []

        # For each server instantiate a browser and one or more downloaders
        for server in self.servers:
            self.browsers.append(self._load_browser(browse_method, server))
            queue = Queue.Queue()
            self.queues.append(queue)
            self.downloaders.append([self._load_downloader(download_method, queue)
                                     for i in range(self.max_downloads)])

        # Shutdown switch
        self.shutdown_requested = False
예제 #5
0
def main(argv):
    '''Main application access point'''
    options = get_options()

    init_logger('update.log')

    print('Processing Images...')

    # Get a list of images to process
    filepaths = find_images(options.source)

    if len(filepaths) is 0:
        return

    images = []

    # Move images to main archive
    for filepath in filepaths:
        dest = os.path.join(options.destination,
                            os.path.relpath(filepath, options.source))

        # Parse image header
        image_params = create_image_data(filepath)
        image_params['filepath'] = dest

        images.append(image_params)

        directory = os.path.dirname(dest)

        if not os.path.isdir(directory):
            os.makedirs(directory)

        shutil.move(filepath, dest)

    # Add images to the database
    db, cursor = get_db_cursor(options.dbhost, options.dbname, options.dbuser,
                               options.dbpass)
    process_jp2_images(images, options.destination, cursor, True)
    cursor.close()

    print('Finished!')
def main(argv):
    '''Main application access point'''
    options = get_options()
    
    init_logger('update.log')
    
    print('Processing Images...')
    
    # Get a list of images to process
    filepaths = find_images(options.source)
    
    if len(filepaths) is 0:
        return

    images = []

    # Move images to main archive
    for filepath in filepaths:
        dest = os.path.join(options.destination, 
                            os.path.relpath(filepath, options.source))
        
        # Parse image header
        image_params = sunpy.read_header(filepath)
        image_params['filepath'] = dest
        
        images.append(image_params)

        directory = os.path.dirname(dest)
        
        if not os.path.isdir(directory):
            os.makedirs(directory)

        shutil.move(filepath, dest)
    
    # Add images to the database
    cursor = get_db_cursor(options.dbname, options.dbuser, options.dbpass)
    process_jp2_images(images, options.destination, cursor, True)    
    cursor.close()
    
    print('Finished!')