def bind(cls, destination): """Bind the publisher class to a port. """ cls.destination = destination logger.info(cls.destination) cls.context = zmq.Context() cls.publish = cls.context.socket(zmq.PUB) cls.publish.bind(cls.destination)
def get_file_chunk(data_type, file_name, offset, size): """Return a chunk of given file. """ logger.info('... get_file_chunk(%s, %s, %d, %d)' % (data_type, file_name, offset, size)) path = datex_config.get_path(data_type)[0] fob = open(os.path.join(path, file_name)) fob.seek(offset) buf = fob.read(size) fob.close() return rpc.Binary(buf)
def get_file_chunk(data_type, file_name, offset, size): """Return a chunk of given file. """ logger.info('... get_file_chunk(%s, %s, %d, %d)'%(data_type, file_name, offset, size)) path = datex_config.get_path(data_type)[0] fob = open(os.path.join(path, file_name)) fob.seek(offset) buf = fob.read(size) fob.close() return rpc.Binary(buf)
def update_last_stamp(self, last_stamp): """Update last time stamp. """ try: self._read() except IOError: logger.info("create last stamp file '%s'", self.filename) self.cfg.add_section(self.section) if not isinstance(last_stamp, str): last_stamp = last_stamp.isoformat() self.cfg.set(self.section, 'last_stamp', last_stamp) self._write()
def get_file_md5(data_type, file_name): """Calculate md5 of a given file and return it. """ logger.info('... get_file_md5(%s, %s)', data_type, file_name) path = datex_config.get_path(data_type)[0] fob = open(os.path.join(path, file_name)) md5 = hashlib.md5() while True: buf = fob.read(128) if not buf: break md5.update(buf) fob.close() return md5.hexdigest()
def get_file_list(data_type, time_start=None, time_end=None): """Return a list of files for give datatype and time interval. """ logger.info('... get_file_list(%s, %s, %s)' % (data_type, time_start, time_end)) if time_start: time_start = strp_isoformat(time_start) if time_end: time_end = strp_isoformat(time_end) result = [] for fname, ftime in _get_file_list(data_type, time_start, time_end): fname, ftime = os.path.basename(fname), ftime.isoformat() result.append((fname, ftime)) return result
def get_file_list(data_type, time_start=None, time_end=None): """Return a list of files for give datatype and time interval. """ logger.info('... get_file_list(%s, %s, %s)'%(data_type, time_start, time_end)) if time_start: time_start = strp_isoformat(time_start) if time_end: time_end = strp_isoformat(time_end) result = [] for fname, ftime in _get_file_list(data_type, time_start, time_end): fname, ftime = os.path.basename(fname), ftime.isoformat() result.append((fname, ftime)) return result
def check_and_publish(datatype, rpc_metadata, publish, heartbeat): """Check for new files of type *datatype*, with the given *rpc_metadata* and publish them through *publish*. """ stamp_config = DatexLastStamp(datatype) def younger_than_stamp_files(): """Uses glob polling to get new files. """ fdir, fglob = datex_config.get_path(datatype) del fglob fstamp = stamp_config.get_last_stamp() for fname, ftime in _get_file_list(datatype, time_start=fstamp + TIME_EPSILON): if datex_config.distribute(datatype): yield os.path.join(fdir, fname) stamp_config.update_last_stamp(ftime) # Give the publisher a little time to initialize # (e.g reconnections from subscribers) time.sleep(1) logger.info('publisher starting for datatype %s' % datatype) if heartbeat: last_heartbeat = datetime.now() - timedelta(seconds=heartbeat + 1) try: while (True): if (heartbeat and (datetime.now() - last_heartbeat).seconds >= heartbeat): # Send a heartbeat msg = Message('/hearbeat/' + datatype, 'heartbeat', datetime.utcnow().isoformat()) logger.info('sending: ' + str(msg)) try: publish.send(str(msg)) last_heartbeat = datetime.now() except zmq.ZMQError: logger.exception('publish failed') for filedesc in younger_than_stamp_files(): # Publish new files data = copy.copy(rpc_metadata) data['uri'] += os.path.basename(filedesc) msg = Message('/' + datatype, 'file', data) logger.info('sending: ' + str(msg)) try: publish.send(str(msg)) except zmq.ZMQError: logger.exception('publish failed') time.sleep(TIME_WAKEUP) except (KeyboardInterrupt, SystemExit): pass finally: logger.info('publisher stopping') publish.close()
def check_and_publish(datatype, rpc_metadata, publish, heartbeat): """Check for new files of type *datatype*, with the given *rpc_metadata* and publish them through *publish*. """ stamp_config = DatexLastStamp(datatype) def younger_than_stamp_files(): """Uses glob polling to get new files. """ fdir, fglob = datex_config.get_path(datatype) del fglob fstamp = stamp_config.get_last_stamp() for fname, ftime in _get_file_list(datatype, time_start=fstamp + TIME_EPSILON): if datex_config.distribute(datatype): yield os.path.join(fdir, fname) stamp_config.update_last_stamp(ftime) # Give the publisher a little time to initialize # (e.g reconnections from subscribers) time.sleep(1) logger.info('publisher starting for datatype %s'%datatype) if heartbeat: last_heartbeat = datetime.now() - timedelta(seconds=heartbeat + 1) try: while(True): if(heartbeat and (datetime.now() - last_heartbeat).seconds >= heartbeat): # Send a heartbeat msg = Message('/hearbeat/' + datatype, 'heartbeat', datetime.utcnow().isoformat()) logger.info('sending: ' + str(msg)) try: publish.send(str(msg)) last_heartbeat = datetime.now() except zmq.ZMQError: logger.exception('publish failed') for filedesc in younger_than_stamp_files(): # Publish new files data = copy.copy(rpc_metadata) data['uri'] += os.path.basename(filedesc) msg = Message('/' + datatype, 'file', data) logger.info('sending: ' + str(msg)) try: publish.send(str(msg)) except zmq.ZMQError: logger.exception('publish failed') time.sleep(TIME_WAKEUP) except (KeyboardInterrupt, SystemExit): pass finally: logger.info('publisher stopping') publish.close()
def get_file(self, datatype, filename, outdir='.', chunk_size=1000*5120, check_md5=False): """Fetch a file in chunks. """ logger.info('getting %s', self.url + '/' + datatype + '/' + filename) if check_md5: md5 = hashlib.md5() localfile = os.path.join(outdir, filename) fob = open(localfile, 'w+b') offset = 0 while True: buf = self.server.get_file_chunk(datatype, filename, offset, chunk_size, timeout=1800) buf = buf.data if not buf: break if check_md5: md5.update(buf) fob.write(buf) offset += len(buf) fob.close() logger.info('saved %s (%d bytes)', localfile, offset) if check_md5: logger.info('md5 check on %s', filename) remote_md5 = self.server.get_file_md5(datatype, filename, timeout=1800) if remote_md5 != md5.hexdigest(): logger.error('md5 check failed on %s', filename) return None return localfile
def __init__(self, subject, address): subscriber = context.socket(zmq.SUB) if subject: logger.info("Subscribing to '%s'"%subject) subscriber.setsockopt(zmq.SUBSCRIBE, subject) # corresponding heartbeat sub = subject.split()[0].split('://') heartbeat_subject = sub[0] + "://hearbeat/" + sub[1] + ' heartbeat' else: heartbeat_subject = "pytroll://hearbeat" # subscribe for heartbeats logger.info("Subscribing to '%s'"%heartbeat_subject) subscriber.setsockopt(zmq.SUBSCRIBE, heartbeat_subject) self.subscriber = subscriber self.destination = address logger.info("Destination '%s'"%self.destination)
def __init__(self, subject, address): subscriber = context.socket(zmq.SUB) if subject: logger.info("Subscribing to '%s'" % subject) subscriber.setsockopt(zmq.SUBSCRIBE, subject) # corresponding heartbeat sub = subject.split()[0].split('://') heartbeat_subject = sub[0] + "://hearbeat/" + sub[1] + ' heartbeat' else: heartbeat_subject = "pytroll://hearbeat" # subscribe for heartbeats logger.info("Subscribing to '%s'" % heartbeat_subject) subscriber.setsockopt(zmq.SUBSCRIBE, heartbeat_subject) self.subscriber = subscriber self.destination = address logger.info("Destination '%s'" % self.destination)
def get_file_md5(self, datatype, filename): """Get a files md5 sum. """ logger.info('getting md5 %s', self.url + '/' + datatype + '/' + filename) return self.server.get_file_md5(datatype, filename, timeout=1800)
def get_datatype_list(): """Return a list of available data types. """ logger.info('... get_datatype_list') return sorted(datex_config.get_datatypes())