Esempio n. 1
0
    def get_file_list(self):
        logger.debug("fetching files")
        backfill = timedelta(days=int(tutil.get_env_var("GINA_BACKFILL_DAYS")))
        end_date = datetime.utcnow() + timedelta(days=1)
        start_date = end_date - backfill

        url = GINA_URL
        url += "&start_date=" + start_date.strftime("%Y-%m-%d")
        url += "&end_date=" + end_date.strftime("%Y-%m-%d")
        url += "&sensors[]=viirs"
        url += "&processing_levels[]=level1"
        url += "&facilities[]=" + tutil.get_env_var("VIIRS_FACILITY")
        url += "&satellites[]=" + SATELLITE
        logger.debug("URL: %s", url)
        buf = BytesIO()

        c = pycurl.Curl()
        c.setopt(c.URL, url)
        c.setopt(c.WRITEFUNCTION, buf.write)
        c.perform()

        files = []
        for file in json.loads(buf.getvalue()):
            files.append(Viirs(file["url"], file["md5sum"]))

        buf.close()

        logger.info("Found %s files", len(files))
        return files
Esempio n. 2
0
def main():
    # let ctrl-c work as it should.
    signal.signal(signal.SIGINT, signal.SIG_DFL)

    # exit quickly if queue is already running
    (gotlock, lock) = aquire_lock()
    if not gotlock:
        tutil.exit_with_error(
            "Queue {} locked, skipping".format(SATELLITE + "-".join(CHANNELS)))
        return

    try:
        mirror_gina = MirrorGina()
        mirror_gina.fetch_files()
    finally:
        logger.info("All done with queue.")

        if gotlock:
            try:
                lock.unlock()
            except AttributeError:
                pass

    logger.debug("That's all for now, bye.")
    logging.shutdown()
Esempio n. 3
0
    def fetch_files(self):
        file_list = self.get_file_list()
        file_queue = self.file_store.queue_files(file_list, CHANNELS)

        # sort to retrieve geoloc files first. I should run frequently
        # enough that getting stuck wile retrieving several orbits
        # shouldn't be a problem.
        file_queue.sort()

        for file in file_queue:
            url = file.url
            tmp_file = path_from_url(self.tmp_path, url)
            logger.debug("Fetching %s from %s" % (tmp_file, url))
            dl = Downloader(max_con=self.connection_count)
            dl.fetch(url, tmp_file)
            file_md5 = hashlib.md5(open(tmp_file, "rb").read()).hexdigest()
            logger.debug("MD5 %s : %s" % (file.md5, file_md5))

            if file.md5 == file_md5:
                try:
                    check = h5py.File(tmp_file, "r")
                    check.close()
                except Exception as e:
                    logger.info("Bad HDF5 file %s", tmp_file)
                    logger.info(e)
                    os.unlink(tmp_file)
                else:
                    self.file_store.place_file(file, tmp_file)
            else:
                size = os.path.getsize(tmp_file)
                msg = "Bad checksum: %s != %s (%d bytes)"
                logger.info(msg, file_md5, file.md5, size)
                os.unlink(tmp_file)
def queue_files(file_list, channels):
    orbits = {}
    for new_file in file_list:
        orbit = new_file.orbit
        if orbit not in orbits:
            try:
                orbits[orbit] = list_files(orbit)
            except Exception as e:
                print("TOMP SAYS:")
                print(e.with_traceback())
                print("THAT's ALL")

    queue = []
    pattern = re.compile("/({})_".format("|".join(channels)))
    for new_file in file_list:

        orbit = new_file.orbit
        filename = f"{SATELLITE}/{orbit}/{new_file.basename}"
        if pattern.search(filename) and filename not in orbits[orbit]:
            logger.debug("Queueing %s", new_file.url)
            queue.append(new_file)
        else:
            logger.debug("Skipping %s", new_file.url)
    logger.info("%d files after pruning", len(queue))
    return queue
def place_file(file, tmp_file):
    filename = file.basename
    orbit = file.orbit
    logger.debug("Uploading %s to S3 Bucket %s", tmp_file, BUCKET_NAME)
    key = f"{SATELLITE}/{orbit}/{filename}"
    try:
        s3 = boto3.resource("s3", verify=VERIFY)
        bucket = s3.Bucket(BUCKET_NAME)
        bucket.upload_file(tmp_file, key)
    except botocore.exceptions.SSLError as e:
        logger.debug("TOMP: caught exception")
        logger.error("TOMP: %s", e.__doc__)
        logger.error("TOMP: %s", e.message)
Esempio n. 6
0
    def create_multi(self):
        m = pycurl.CurlMulti()
        m.handles = []
        for i in range(self._num_conn):
            logger.debug("creating curl object")
            c = pycurl.Curl()
            c.fp = None
            c.setopt(pycurl.FOLLOWLOCATION, 1)
            c.setopt(pycurl.MAXREDIRS, 5)
            c.setopt(pycurl.CONNECTTIMEOUT, 30)
            c.setopt(pycurl.TIMEOUT, 600)
            c.setopt(pycurl.NOSIGNAL, 1)
            m.handles.append(c)

        return m
Esempio n. 7
0
def queue_files(file_list, channels):
    queue = []
    pattern = re.compile("/({})_".format("|".join(channels)))
    logger.debug("%d files before pruning", len(file_list))
    for new_file in file_list:
        out_file = path_from_url(OUT_PATH, new_file.url)
        if pattern.search(out_file) and not os.path.exists(out_file):
            logger.debug("Queueing %s", new_file.url)
            queue.append(new_file)
        else:
            logger.debug("Skipping %s", new_file.url)
    logger.info("%d files after pruning", len(queue))
    return queue