def storeHosts(lines_): # Assume that change_ip is a function that takes a string and returns a new one with the ip changed): example below new_file = open(hostsFileName, "w") new_file.writelines(lines_) new_file.flush() os.fsync(new_file.fileno()) new_file.close()
def open_tempfile_with_atomic_write_to(path, **kwargs): """ Open a temporary file object that atomically moves to the specified path upon exiting the context manager. Supports the same function signature as `open`. The parent directory exist and be user-writable. WARNING: This is just like 'mv', it will clobber files! """ parent_directory = os.path.dirname(path) _tempfile = tempfile.NamedTemporaryFile(delete=False, dir=parent_directory) _tempfile.close() tempfile_path = _tempfile.name try: with open(tempfile_path, **kwargs) as file: yield file file.flush() os.fsync(file.fileno()) os.rename(tempfile_path, path) finally: try: os.remove(tempfile_path) except OSError as e: if e.errno == errno.ENOENT: pass else: raise e
def __idle_save_text(self, status): self.__save_text_id = 0 _logger.debug("autosaving to %s", self.__filename) dn,bn = os.path.split(self.__filename) try: perms = os.stat(self.__filename).st_mode except: perms = None (tempfd, temppath) = tempfile.mkstemp('.tmp', self.__filename, dn) os.close(tempfd) f = open_text_file(temppath, 'w') text = self.input.get_property("text") utext = str(text) f.write(utext) f.flush() os.fsync(tempfd) f.close() if perms is not None: os.chmod(temppath, perms) atomic_rename(temppath, self.__filename) self.__show_msg(status + _("...done")) self.__modified = False self.__sync_modified_sensitivity() _logger.debug("autosave complete") return False
def restore(self, backup, volume_id, volume_file): """Restore the given volume backup from Ceph object store""" volume = self.db.volume_get(self.context, volume_id) backup_name = self._get_backup_base_name(backup['volume_id'], backup['id']) LOG.debug('starting backup restore from Ceph backup=%s ' 'to volume=%s' % (backup['id'], volume['name'])) # Ensure we are at the beginning of the volume volume_file.seek(0) backup_size = int(volume['size']) * units.GiB with rbddriver.RADOSClient(self, self._ceph_pool) as client: src_rbd = self.rbd.Image(client.ioctx, backup_name) try: self._transfer_data(src_rbd, volume_file, volume['name'], backup_size) finally: src_rbd.close() # Be tolerant to IO implementations that do not support fileno() try: fileno = volume_file.fileno() except IOError: LOG.info("volume_file does not support fileno() so skipping " "fsync()") else: os.fsync(fileno) LOG.debug('restore %s to %s finished.' % (backup['id'], volume_id))
def brickfind_crawl(brick, args): if brick.endswith("/"): brick = brick[0:len(brick)-1] working_dir = os.path.dirname(args.outfile) mkdirp(working_dir, exit_on_err=True, logger=logger) create_file(args.outfile, exit_on_err=True, logger=logger) with open(args.outfile, "a+") as fout: brick_path_len = len(brick) def output_callback(path, filter_result): path = path.strip() path = path[brick_path_len+1:] output_write(fout, path, args.output_prefix, encode=True) ignore_dirs = [os.path.join(brick, dirname) for dirname in conf.get_opt("brick_ignore_dirs").split(",")] find(brick, callback_func=output_callback, ignore_dirs=ignore_dirs) fout.flush() os.fsync(fout.fileno())
def create_mds_bootstrap(cluster, key): """ Run on mds node, writes the bootstrap key if not there yet. Returns None on success, error message on error exceptions. pushy mangles exceptions to all be of type ExceptionProxy, so we can't tell between bug and correctly handled failure, so avoid using exceptions for non-exceptional runs. """ import os path = '/var/lib/ceph/bootstrap-mds/{cluster}.keyring'.format( cluster=cluster, ) if not os.path.exists(path): tmp = '{path}.{pid}.tmp'.format( path=path, pid=os.getpid(), ) # file() doesn't let us control access mode from the # beginning, and thus would have a race where attacker can # open before we chmod the file, so play games with os.open fd = os.open( tmp, (os.O_WRONLY|os.O_CREAT|os.O_EXCL |os.O_NOCTTY|os.O_NOFOLLOW), 0600, ) with os.fdopen(fd, 'wb') as f: f.write(key) f.flush() os.fsync(f) os.rename(tmp, path)
def _update(self, mergerfunc): data = self.default_values with LockedOpen(self.filename, 'r+') as f: try: data.update(json.load(f)) except ValueError: pass data = mergerfunc(data) # If Data is not changed by merger func if not data: return False with tempfile.NamedTemporaryFile( 'w', dir=os.path.dirname(self.filename), delete=False) as tf: tf.write(data) tempname = tf.name os.rename(tempname, self.filename) dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)), os.O_DIRECTORY) os.fsync(dirfd) os.close(dirfd) return True
def fsync(self, isfsyncfile): log.debug("file %s isfsyncfile %d" % (self.path, isfsyncfile)) self._fflush() if isfsyncfile and hasattr(os, 'fdatasync'): os.fdatasync(self.fd) else: os.fsync(self.fd)
def log(s): dt = datetime.now() str = dt.strftime("%Y-%m-%d %H:%M:%S") print(str + "> " + s) logFile.write(str + "> " + s + "\n") logFile.flush() os.fsync(logFile.fileno())
def __init__(self, pid_path): self._pid_path = pid_path self._other_running = False ensuredirs(self._pid_path) self._lockfile = None try: self._lockfile = os.open(self._pid_path, os.O_CREAT | os.O_WRONLY) except: raise SoleError('Cannot open lockfile (path = %s)' % self._pid_path) try: fcntl.lockf(self._lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: self._other_running = True try: f = open(self._pid_path, 'r') pid = f.read().strip() f.close() except: pid = '?' raise SoleError('Other instance is running (pid = %s)' % pid) try: os.ftruncate(self._lockfile, 0) os.write(self._lockfile, '%i\n' % os.getpid()) os.fsync(self._lockfile) except: pass # the pid is only stored for user information, so this is allowed to fail
def initialize(self): """ Create the database """ if os.path.exists(self.db_file): self.conn = get_db_connection(self.db_file, self.timeout) else: mkdirs(self.db_dir) fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir) os.close(fd) conn = sqlite3.connect(tmp_db_file, check_same_thread=False, timeout=0) self._initialize(conn) conn.commit() if tmp_db_file: conn.close() if not os.path.exists(self.db_file): with open(tmp_db_file, 'r+b') as f: os.fsync(f.fileno()) os.rename(tmp_db_file, self.db_file) else: os.remove(tmp_db_file) self.conn = get_db_connection(self.db_file, self.timeout) else: self.conn = conn
def handle_result(self, data, **kwargs): if self.result is None: return # Process each result individually try: for item in extract_results(data): fields = {} fields.update(kwargs) if item.timestamp: timestamp = time.mktime(item.timestamp.timetuple()) else: timestamp = time.mktime(dt.datetime.utcnow.timetuple()) fields.update(name=item.name, timestamp=timestamp) base = self.join_fields(fields) # Write each added/removed entry on a different line curr_fields = {'result_type': item.action} for key, val in item.columns.items(): curr_fields['_'.join([item.action, key])] = val self.result.write(base + ', ' + self.join_fields(curr_fields) + '\n') finally: self.result.flush() os.fsync(self.result.fileno())
def open_atomic(filepath, *args, **kwargs): """ Open temporary file object that atomically moves to destination upon exiting. Allows reading and writing to and from the same filename. The file will not be moved to destination in case of an exception. Parameters ---------- filepath : string the file path to be opened fsync : bool whether to force write the file to disk *args : mixed Any valid arguments for :code:`open` **kwargs : mixed Any valid keyword arguments for :code:`open` """ fsync = kwargs.get('fsync', False) with tempfile(dir=os.path.dirname(os.path.abspath(filepath))) as tmppath: with open(tmppath, *args, **kwargs) as file: try: yield file finally: if fsync: file.flush() os.fsync(file.fileno()) os.rename(tmppath, filepath) os.chmod(filepath, 0o644)
def finish_rip(self, track): self.progress.end_track() if self.pipe is not None: print(Fore.GREEN + 'Rip complete' + Fore.RESET) self.pipe.flush() self.pipe.close() # wait for process to end before continuing ret_code = self.rip_proc.wait() if ret_code != 0: print( Fore.YELLOW + "Warning: encoder returned non-zero " "error code " + str(ret_code) + Fore.RESET) self.rip_proc = None self.pipe = None if self.wav_file is not None: self.wav_file.close() self.wav_file = None if self.pcm_file is not None: self.pcm_file.flush() os.fsync(self.pcm_file.fileno()) self.pcm_file.close() self.pcm_file = None self.ripping.clear() self.post.log_success(track)
def handle_status(self, data, **kwargs): if self.status is None: return # Write each status log on a different line try: for item in data.get('data', []): fields = {} fields.update(kwargs) fields.update({ 'line': item.get('line', ''), 'message': item.get('message', ''), 'severity': item.get('severity', ''), 'filename': item.get('filename', ''), 'version': item.get('version'), # be null }) if 'created' in item: fields['created'] = time.mktime(item['created'].timetuple()) else: fields['created'] = time.mktime(dt.datetime.utcnow().timetuple()) self.status.write(self.join_fields(fields) + '\n') finally: self.status.flush() os.fsync(self.status.fileno())
def logProcessing(record_name, msg): global logOut if logOut is None: logOut = open("C:\Users\u0064666\Pictures\Cards\\renderLog.txt", "w") logOut.write("[" + record_name + "]: " + msg + "\r\n") logOut.flush() os.fsync(logOut.fileno())
def restore(self, backup, volume_id, volume_file): """Restore volume from backup in Ceph object store. If volume metadata is available this will also be restored. """ target_volume = self.db.volume_get(self.context, volume_id) LOG.debug('Starting restore from Ceph backup=%(src)s to ' 'volume=%(dest)s' % {'src': backup['id'], 'dest': target_volume['name']}) try: self._restore_volume(backup, target_volume, volume_file) # Be tolerant of IO implementations that do not support fileno() try: fileno = volume_file.fileno() except IOError: LOG.debug("Restore target I/O object does not support " "fileno() - skipping call to fsync().") else: os.fsync(fileno) self._restore_metadata(backup, volume_id) LOG.debug('Restore to volume %s finished successfully.' % volume_id) except exception.BackupOperationError as e: LOG.error(_('Restore to volume %(volume)s finished with error - ' '%(error)s.') % {'error': e, 'volume': volume_id}) raise
def handle_other_file(self, f): self.updated_date = timezone.now() self.first_run = False repo = self.get_git_repo(delete = True) rules_dir = os.path.join(settings.GIT_SOURCES_BASE_DIRECTORY, str(self.pk), 'rules') # create rules dir if needed if not os.path.isdir(rules_dir): os.makedirs(rules_dir) # copy file content to target f.seek(0) os.fsync(f) shutil.copy(f.name, os.path.join(rules_dir, self.name)) index = repo.index if len(index.diff(None)) or self.first_run: os.environ['USERNAME'] = '******' index.add(["rules"]) message = 'source version at %s' % (self.updated_date) index.commit(message) self.save() # Now we must update SourceAtVersion for this source # or create it if needed self.create_sourceatversion()
def saveJob(self, job, workflow, wmTask = None, jobNumber = 0): """ _saveJob_ Actually do the mechanics of saving the job to a pickle file """ priority = None if wmTask: # If we managed to load the task, # so the url should be valid job['spec'] = workflow.spec job['task'] = wmTask.getPathName() priority = wmTask.getTaskPriority() if job.get('sandbox', None) == None: job['sandbox'] = wmTask.data.input.sandbox job['priority'] = priority job['counter'] = jobNumber cacheDir = job.getCache() job['cache_dir'] = cacheDir output = open(os.path.join(cacheDir, 'job.pkl'), 'w') cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL) output.flush() os.fsync(output.fileno()) output.close() return
def OnTaskSuccess(self, task): # Log the succeed tasks so that they are ensured to be frozen in case # of a sudden death. self._resume_output.write('-f\n^{}$\n'.format(re.escape(task.name))) # Makes sure the task freezing command line make it to the disk. self._resume_output.flush() os.fsync(self._resume_output.fileno())
def session_write(self): """ Writes the locked set to the session file. The global lock MUST be held for this function to work, although on NFS additional locking is done Raises RepositoryError if session file is inaccessible """ # logger.debug("Openining Session File: %s " % self.fn ) try: # If this fails, we want to shutdown the repository (corruption # possible) fd = self.delayopen(self.fn) if not self.afs: fcntl.lockf(fd, fcntl.LOCK_EX) self.delaywrite(fd, pickle.dumps(self.locked)) if not self.afs: fcntl.lockf(fd, fcntl.LOCK_UN) os.fsync(fd) os.close(fd) except OSError as x: if x.errno != errno.ENOENT: raise RepositoryError(self.repo, "Error on session file access '%s': %s" % (self.fn, x)) else: # logger.debug( "File NOT found %s" %self.fn ) raise RepositoryError( self.repo, "SessionWrite: Own session file not found! Possibly deleted by another ganga session.\n\ Possible reasons could be that this computer has a very high load, or that the system clocks on computers running Ganga are not synchronized.\n\ On computers with very high load and on network filesystems, try to avoid running concurrent ganga sessions for long.\n '%s' : %s" % (self.fn, x), ) except IOError as x: raise RepositoryError(self.repo, "Error on session file locking '%s': %s" % (self.fn, x))
def _save(self, name, content): full_path = self.path(name) # Create any intermediate directories that do not exist. # Note that there is a race between os.path.exists and os.makedirs: # if os.makedirs fails with EEXIST, the directory was created # concurrently, and we can continue normally. Refs #16082. directory = os.path.dirname(full_path) if not os.path.exists(directory): try: if self.directory_permissions_mode is not None: # os.makedirs applies the global umask, so we reset it, # for consistency with file_permissions_mode behavior. old_umask = os.umask(0) try: os.makedirs(directory, self.directory_permissions_mode) finally: os.umask(old_umask) else: os.makedirs(directory) except OSError as e: if e.errno != errno.EEXIST: raise if not os.path.isdir(directory): raise IOError("%s exists and is not a directory." % directory) tmp_file = tempfile.mktemp() filey = open(tmp_file, 'wb') filey.write(content.read()) # make sure that all data is on disk filey.flush() os.fsync(filey.fileno()) filey.close() file_move_safe(tmp_file, full_path, allow_overwrite=True) return name
def table_to_file(table, filepath, freshfile, adddic=None): 'save table to a file with additional columns' if freshfile: mode = 'w' else: mode = 'a' colnames = get_all_colnames(table) with open(filepath, mode) as f: if adddic != None: colnames += adddic.keys() colnamestr = ','.join(colnames) + '\n' if freshfile: f.write(colnamestr) for row in table: if adddic != None: rowcopy = dict(row.items() + adddic.items()) else: rowcopy = row rowstr = [ rowcopy[k] if rowcopy.has_key(k) else '' for k in colnames ] rowstr = [str(x) for x in rowstr] rowstr = ','.join(rowstr) + '\n' f.write(rowstr) f.flush() os.fsync(f.fileno())
def pymw_worker_write(output, options): import os outfile = open(sys.argv[2], 'wb') pickle.Pickler(outfile).dump(output) outfile.flush() os.fsync(outfile.fileno()) outfile.close()
def test00CropFail(self): # make the dirs cam = moduleUnderTest.cameras[0] indir = os.path.join(moduleUnderTest.root, "2013-07-01", cam.shortname) os.makedirs(os.path.join(indir, "hires")) # put a fragment of a test jpg in the indir tfn = "SampleImage.jpg" tfd = os.open(tfn, os.O_RDONLY | os.O_BINARY) buf = os.read(tfd, 8192) logging.info("test00CropFail(): buf size is %d" % len(buf)) os.close(tfd) ifn = "12-00-01-12345.jpg" ifp = os.path.join(indir, ifn) infd = os.open(ifp, os.O_WRONLY | os.O_BINARY | os.O_CREAT) os.write(infd, buf) os.fsync(infd) os.close(infd) time.sleep(2) hfp = os.path.join(indir, "hires", ifn) # run processImage(). # Since the mod time is recent, The file should stay in indir moduleUnderTest.processImage(indir, ifn, cam) assert os.path.exists(ifp) and not os.path.exists(hfp) # set the file's mod time back over an hour and run processImage(). # This time the file should move to the hires dir os.utime(ifp, (int(time.time()), time.time() - 3602)) moduleUnderTest.processImage(indir, ifn, cam) assert not os.path.exists(ifp) and os.path.exists(hfp)
def handle_status(self, data, **kwargs): if self.fp is None: return fp = self.fp minimum_severity = self.minimum_severity host_identifier = kwargs.get('host_identifier') created = dt.datetime.utcnow().isoformat() for item in data.get('data', []): if int(item['severity']) < minimum_severity: continue if 'created' in item: item['created'] = item['created'].isoformat() json_dump({ '@version': 1, '@host_identifier': host_identifier, '@timestamp': item.get('created', created), '@message': item.get('message', ''), 'log_type': 'status', 'line': item.get('line', ''), 'message': item.get('message', ''), 'severity': item.get('severity', ''), 'filename': item.get('filename', ''), 'osquery_version': item.get('version'), # be null 'created': created, }, fp) fp.write('\r\n') else: fp.flush() os.fsync()
def pymw_master_write(self, output, loc): import os outfile = open(loc, 'wb') pickle.Pickler(outfile).dump(output) outfile.flush() os.fsync(outfile.fileno()) outfile.close()
def input(self, **kwargs): options = dict(self.options) if self.infile is None: if "{infile}" in self.command: if self.filename is None: self.infile = tempfile.NamedTemporaryFile(mode="w") self.infile.write(self.content) self.infile.flush() os.fsync(self.infile) options["infile"] = self.infile.name else: self.infile = open(self.filename) options["infile"] = self.filename if "{outfile}" in self.command and not "outfile" in options: ext = ".%s" % self.type and self.type or "" self.outfile = tempfile.NamedTemporaryFile(mode="r+", suffix=ext) options["outfile"] = self.outfile.name try: command = fstr(self.command).format(**options) proc = subprocess.Popen( command, shell=True, cwd=self.cwd, stdout=self.stdout, stdin=self.stdin, stderr=self.stderr ) if self.infile is None: filtered, err = proc.communicate(self.content) else: filtered, err = proc.communicate() except (IOError, OSError), e: raise FilterError("Unable to apply %s (%r): %s" % (self.__class__.__name__, self.command, e))
def close(self, _defer=False): # _defer == True is how a stream can notify Archive that the stream is # now closed. Calling it directly in not recommended. if _defer: # This call came from our open stream. self._stream = None if not self._defer_close: # We are not yet ready to close. return if self._stream is not None: # We have a stream open! don't close, but remember we were asked to. self._defer_close = True return self.denit() # If there is a file attached... if hasattr(self, 'f'): # Make sure it is not already closed... if getattr(self.f, 'closed', False): return # Flush it if not read-only... if self.f.mode != 'r' and self.f.mode != 'rb': self.f.flush() os.fsync(self.f.fileno()) # and then close it, if we opened it... if getattr(self, '_close', None): self.f.close()
def _fsync_files(filenames): """Call fsync() a list of file names The filenames should be absolute paths already. """ touched_directories = set() mode = os.O_RDONLY # Windows if hasattr(os, 'O_BINARY'): mode |= os.O_BINARY for filename in filenames: fd = os.open(filename, mode) os.fsync(fd) os.close(fd) touched_directories.add(os.path.dirname(filename)) # Some OSes also require us to fsync the directory where we've # created files or subdirectories. if hasattr(os, 'O_DIRECTORY'): for dirname in touched_directories: fd = os.open(dirname, os.O_RDONLY | os.O_DIRECTORY) os.fsync(fd) os.close(fd)
def migrate_lbryum_to_torba(path): if not os.path.exists(path): return None, None with open(path, 'r') as f: unmigrated_json = f.read() unmigrated = json.loads(unmigrated_json) # TODO: After several public releases of new torba based wallet, we can delete # this lbryum->torba conversion code and require that users who still # have old structured wallets install one of the earlier releases that # still has the below conversion code. if 'master_public_keys' not in unmigrated: return None, None total = unmigrated.get('addr_history') receiving_addresses, change_addresses = set(), set() for _, unmigrated_account in unmigrated.get('accounts', {}).items(): receiving_addresses.update( map(unhexlify, unmigrated_account.get('receiving', []))) change_addresses.update( map(unhexlify, unmigrated_account.get('change', []))) log.info( "Wallet migrator found %s receiving addresses and %s change addresses. %s in total on history.", len(receiving_addresses), len(change_addresses), len(total)) migrated_json = json.dumps( { 'version': 1, 'name': 'My Wallet', 'accounts': [{ 'version': 1, 'name': 'Main Account', 'ledger': 'lbc_mainnet', 'encrypted': unmigrated['use_encryption'], 'seed': unmigrated['seed'], 'seed_version': unmigrated['seed_version'], 'private_key': unmigrated['master_private_keys']['x/'], 'public_key': unmigrated['master_public_keys']['x/'], 'certificates': unmigrated.get('claim_certificates', {}), 'address_generator': { 'name': 'deterministic-chain', 'receiving': { 'gap': 20, 'maximum_uses_per_address': 2 }, 'change': { 'gap': 6, 'maximum_uses_per_address': 2 } } }] }, indent=4, sort_keys=True) mode = os.stat(path).st_mode i = 1 backup_path_template = os.path.join(os.path.dirname(path), "old_lbryum_wallet") + "_%i" while os.path.isfile(backup_path_template % i): i += 1 os.rename(path, backup_path_template % i) temp_path = "%s.tmp.%s" % (path, os.getpid()) with open(temp_path, "w") as f: f.write(migrated_json) f.flush() os.fsync(f.fileno()) os.rename(temp_path, path) os.chmod(path, mode) return receiving_addresses, change_addresses
def get_socket(self): if self.use_ssl: # Try with CA first, since they are preferred over self-signed certs # and are always accepted (even if a previous pinned self-signed # cert exists). cert_path = os.path.join(self.config_path, 'certs', sanitize_filename(self.host, replacement_text='_')) has_pinned_self_signed = os.path.exists(cert_path) s, give_up = self._get_socket_and_verify_ca_cert(suppress_errors=has_pinned_self_signed) if s: if has_pinned_self_signed: # Delete pinned cert. They now have a valid CA-signed cert. # This hopefully undoes the bug in previous EC versions that # refused to consider CA-signed certs at all if the server # ever had a self-signed cert in the past. try: os.remove(cert_path) self.print_error("Server is now using a CA-signed certificate, deleted previous self-signed certificate:", cert_path) except OSError: pass return s elif give_up: # low-level error in _get_socket_and_verify_ca_cert, give up return # if we get here, certificate is not CA signed, so try the alternate # "pinned self-signed" method. if not has_pinned_self_signed: is_new = True # get server certificate. Do not use ssl.get_server_certificate # because it does not work with proxy s = self.get_simple_socket() if s is None: return try: context = self.get_ssl_context(cert_reqs=ssl.CERT_NONE, ca_certs=None) s = context.wrap_socket(s) except ssl.SSLError as e: self.print_error("SSL error retrieving SSL certificate:", e) return except: return dercert = s.getpeercert(True) s.close() cert = ssl.DER_cert_to_PEM_cert(dercert) # workaround android bug cert = re.sub("([^\n])-----END CERTIFICATE-----","\\1\n-----END CERTIFICATE-----",cert) temporary_path = cert_path + '.temp' util.assert_datadir_available(self.config_path) with open(temporary_path, "w", encoding='utf-8') as f: f.write(cert) f.flush() os.fsync(f.fileno()) else: is_new = False temporary_path = None s = self.get_simple_socket() if s is None: return if self.use_ssl: try: context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED, ca_certs=(temporary_path if is_new else cert_path)) s = context.wrap_socket(s, do_handshake_on_connect=True) except socket.timeout: self.print_error('timeout') return except ssl.SSLError as e: self.print_error("SSL error:", e) if e.errno != 1: return if is_new: rej = cert_path + '.rej' try: if os.path.exists(rej): os.unlink(rej) os.rename(temporary_path, rej) except OSError as e2: self.print_error("Could not rename rejected certificate:", rej, repr(e2)) else: util.assert_datadir_available(self.config_path) with open(cert_path, encoding='utf-8') as f: cert = f.read() try: b = pem.dePem(cert, 'CERTIFICATE') x = x509.X509(b) except: if util.is_verbose: self.print_error("Error checking certificate, traceback follows") traceback.print_exc(file=sys.stderr) self.print_error("wrong certificate") self.bad_certificate(self.server, cert_path) return try: x.check_date() except: self.print_error("certificate has expired:", cert_path) try: os.unlink(cert_path) self.print_error("Removed expired certificate:", cert_path) except OSError as e2: self.print_error("Could not remove expired certificate:", cert_path, repr(e2)) return self.print_error("wrong certificate") self.bad_certificate(self.server, cert_path) if e.errno == 104: return return if is_new: self.print_error("saving certificate") os.rename(temporary_path, cert_path) return s
while not done: action = weighted_choice(proportions) if action == 'r': before, after, queryTime, beforeClock, afterClock, queryClock, extra, lfnDict = doRead( readDepth) elif action == 'i': before, after, queryTime, beforeClock, afterClock, queryClock, extra = doInsert( writeDepth, maxInsert) elif action == 'd': before, after, queryTime, beforeClock, afterClock, queryClock, extra, lfnDict = doRead( readDepth) before, after, queryTime, beforeClock, afterClock, queryClock, extra = doRemove( lfnDict) timeFile.write("%s\t%s\t%s\t%s\n" % (before, after, queryTime, extra)) timeFile.flush() os.fsync(timeFile) clockFile.write("%s\t%s\t%s\t%s\n" % (beforeClock, afterClock, queryClock, extra)) clockFile.flush() os.fsync(clockFile) if (time.time() - start) > maxDuration: done = True timeFile.close() clockFile.close()
def fsync(self, isfsyncfile): self._fflush() if isfsyncfile and hasattr(os, 'fdatasync'): os.fdatasync(self.fd) else: os.fsync(self.fd)
def acquire_lock(self): """ Try to acquire the lock. :Parameters: #. result (boolean): Whether the lock is succesfully acquired. #. code (integer, Exception): Integer code indicating the reason how the lock was successfully set or unsuccessfully acquired. When setting the lock generates an error, this will be catched and returned in a message Exception code. * 0: Lock is successfully set for normal reasons, In this case result is True. * 1: Lock was already set, no need to set it again. In this case result is True. * 2: Old and forgotten lock is found and removed. New lock is successfully set, In this case result is True. * 3: Lock was not successfully set before timeout. In this case result is False. * Exception: Lock was not successfully set because of an unexpected error. The error is catched and returned in this Exception. In this case result is False. """ # set acquire flag code = 0 acquired = False t0 = t1 = time.time() LP = self.__lockPass + '\n' bytesLP = LP.encode() # set general while loop with timeout condition while (t1 - t0) <= self.__timeout: # try to set acquired to True by reading an empty lock file try: while not acquired and (t1 - t0) <= self.__timeout: if os.path.isfile(self.__lockPath): with open(self.__lockPath, 'rb') as fd: lock = fd.readlines() # lock file is empty if not len(lock): acquired = True break # if it is already locked if lock[0] == bytesLP: code = 1 acquired = True break if t1 - float(lock[1]) > self.__deadLock: acquired = True code = 2 break #print('locked ',(t1-t0), t0, t1, lock, self.__lockPath) # wait a bit if self.__wait: time.sleep(self.__wait) t1 = time.time() else: acquired = True break except Exception as code: acquired = False # impossible to acquire because of an error or timeout. if not acquired: break # try to write lock try: tic = time.time() with open(self.__lockPath, 'wb') as fd: #fd.write( str(LP+'%.6f'%t1).encode('utf-8') ) fd.write(str(LP + '%.6f' % t1).encode()) fd.flush() os.fsync(fd.fileno()) toc = time.time() except Exception as e: code = str(e) acquired = False break # sleep for double tic-toc or 0.1 ms which ever one is higher s = max([2 * (toc - tic), 0.0001]) time.sleep(s) # check if lock is still acquired by the same lock pass with open(self.__lockPath, 'rb') as fd: lock = fd.readlines() if len(lock) >= 1: if lock[0] == bytesLP: acquired = True break else: acquired = False t1 = time.time() continue else: acquired = False t1 = time.time() continue # return whether it is acquired or not if not acquired and not code: code = 3 return acquired, code
def write(self, message): self.terminal.write(message) self.log.write(message) self.log.flush() os.fsync(self.log)
import os import time flog = open(time.strftime("%Y%m%d%H%M%S", time.localtime()) + '.txt', 'a') flog.write('aabbc' + '\n') flog.flush() os.fsync(flog.fileno()) flog.close()
def sync(fd): fd.flush() os.fsync(fd.fileno())
resultFile.write("FLAGS.max_grad_norm, %.3f\n" %(FLAGS.max_grad_norm)) resultFile.write("initScale, %.3f\n" %(FLAGS.init_scale)) resultFile.write("numEpochsFullLR, %0.d\n" %(FLAGS.num_epochs_full_lr)) resultFile.write("numEpochs, %0.d\n" %(FLAGS.num_epochs)) resultFile.write("baseLearningRate, %.3f\n" %(FLAGS.learning_rate)) resultFile.write("lrDecay, %.3f\n" %(FLAGS.lr_decay)) resultFile.write("forgetBias, %.3f\n" %(FLAGS.forget_bias)) if (FLAGS.shuffle): resultFile.write("Shuffle\n") else: resultFile.write("NoShuffle\n") resultFile.write("\nDataset, VocabSize, TrainWords, ValidWords, Epoch, TrainPrecision, TrainPerplexity, TrainCrossEntropy, Epoch, ValidPrecision, ValidPerplexity, ValidCrossEntropy\n") resultFile.flush() os.fsync(resultFile.fileno()) def linear(inp, output_dim, scope_name=None, stddev=1.0, reuse_scope=False): norm = tf.random_normal_initializer(stddev=stddev, dtype=tf.float32) const = tf.constant_initializer(0.0, dtype=tf.float32) with tf.variable_scope(scope_name or 'G/linear') as scope: scope.set_regularizer(tf.contrib.layers.l2_regularizer(scale=FLAGS.reg_scale)) if reuse_scope: scope.reuse_variables() #print('inp.get_shape(): {}'.format(inp.get_shape())) w = tf.get_variable('w', [inp.get_shape()[1], output_dim], initializer=norm, dtype=tf.float32) b = tf.get_variable('b', [output_dim], initializer=const, dtype=tf.float32) return tf.matmul(inp, w) + b def plus(a,b): c = []
def flush(self): self.console.flush() if self.f is not None: self.f.flush() import os os.fsync(self.f.fileno())
def fsync_dir(path): fd = os.open(path, os.O_RDONLY) try: os.fsync(fd) finally: os.close(fd)
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777): ''' Open the file pointed to by path with the specified mode. If any directories in path do not exist, they are created. Returns the opened file object and the path to the opened file object. This path is guaranteed to have the same case as the on disk path. For case insensitive filesystems, the returned path may be different from the passed in path. The returned path is always unicode and always an absolute path. If mode is None, then this function assumes that path points to a directory and return the path to the directory as the file object. mkdir_mode specifies the mode with which any missing directories in path are created. ''' if isbytestring(path): path = path.decode(filesystem_encoding) path = os.path.abspath(path) sep = force_unicode(os.sep, 'ascii') if path.endswith(sep): path = path[:-1] if not path: raise ValueError('Path must not point to root') components = path.split(sep) if not components: raise ValueError('Invalid path: %r' % path) cpath = sep if iswindows: # Always upper case the drive letter and add a trailing slash so that # the first os.listdir works correctly cpath = components[0].upper() + sep bdir = path if mode is None else os.path.dirname(path) if not os.path.exists(bdir): os.makedirs(bdir, mkdir_mode) # Walk all the directories in path, putting the on disk case version of # the directory into cpath dirs = components[1:] if mode is None else components[1:-1] for comp in dirs: cdir = os.path.join(cpath, comp) cl = comp.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except: # Dont have permission to do the listdir, assume the case is # correct as we have no way to check it. pass else: if len(candidates) == 1: cdir = os.path.join(cpath, candidates[0]) # else: We are on a case sensitive file system so cdir must already # be correct cpath = cdir if mode is None: ans = fpath = cpath else: fname = components[-1] ans = lopen(os.path.join(cpath, fname), mode) # Ensure file and all its metadata is written to disk so that subsequent # listdir() has file name in it. I don't know if this is actually # necessary, but given the diversity of platforms, best to be safe. ans.flush() os.fsync(ans.fileno()) cl = fname.lower() try: candidates = [c for c in os.listdir(cpath) if c.lower() == cl] except EnvironmentError: # The containing directory, somehow disappeared? candidates = [] if len(candidates) == 1: fpath = os.path.join(cpath, candidates[0]) else: # We are on a case sensitive filesystem fpath = os.path.join(cpath, fname) return ans, fpath
def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile=True): """ Worker process script, runs a job. :param str jobName: The "job name" (a user friendly name) of the job to be run :param str jobStoreLocator: Specifies the job store to use :param str jobStoreID: The job store ID of the job to be run :return int: 1 if a job failed, or 0 if all jobs succeeded """ configureRootLogger() setLogLevel(config.logLevel) ########################################## #Create the worker killer, if requested ########################################## logFileByteReportLimit = config.maxLogFileSize if config.badWorker > 0 and random.random() < config.badWorker: # We need to kill the process we are currently in, to simulate worker # failure. We don't want to just send SIGKILL, because we can't tell # that from a legitimate OOM on our CI runner. We're going to send # SIGUSR1 so our terminations are distinctive, and then SIGKILL if that # didn't stick. We definitely don't want to do this from *within* the # process we are trying to kill, so we fork off. TODO: We can still # leave the killing code running after the main Toil flow is done, but # since it's now in a process instead of a thread, the main Python # process won't wait around for its timeout to expire. I think this is # better than the old thread-based way where all of Toil would wait # around to be killed. killTarget = os.getpid() sleepTime = config.badWorkerFailInterval * random.random() if os.fork() == 0: # We are the child # Let the parent run some amount of time time.sleep(sleepTime) # Kill it gently os.kill(killTarget, signal.SIGUSR1) # Wait for that to stick time.sleep(0.01) try: # Kill it harder. Hope the PID hasn't already been reused. # If we succeeded the first time, this will OSError os.kill(killTarget, signal.SIGKILL) except OSError: pass # Exit without doing any of Toil's cleanup os._exit(0) # We don't need to reap the child. Either it kills us, or we finish # before it does. Either way, init will have to clean it up for us. ########################################## #Load the environment for the job ########################################## #First load the environment for the job. with jobStore.readSharedFileStream("environment.pickle") as fileHandle: environment = safeUnpickleFromStream(fileHandle) env_reject = { "TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE", "HOME", "LOGNAME", "USER", "DISPLAY", "JAVA_HOME" } for i in environment: if i == "PATH": # Handle path specially. Sometimes e.g. leader may not include # /bin, but the Toil appliance needs it. if i in os.environ and os.environ[i] != '': # Use the provided PATH and then the local system's PATH os.environ[i] = environment[i] + ':' + os.environ[i] else: # Use the provided PATH only os.environ[i] = environment[i] elif i not in env_reject: os.environ[i] = environment[i] # sys.path is used by __import__ to find modules if "PYTHONPATH" in environment: for e in environment["PYTHONPATH"].split(':'): if e != '': sys.path.append(e) toilWorkflowDir = Toil.getLocalWorkflowDir(config.workflowID, config.workDir) ########################################## #Setup the temporary directories. ########################################## # Dir to put all this worker's temp files in. localWorkerTempDir = tempfile.mkdtemp(dir=toilWorkflowDir) os.chmod(localWorkerTempDir, 0o755) ########################################## #Setup the logging ########################################## #This is mildly tricky because we don't just want to #redirect stdout and stderr for this Python process; we want to redirect it #for this process and all children. Consequently, we can't just replace #sys.stdout and sys.stderr; we need to mess with the underlying OS-level #file descriptors. See <http://stackoverflow.com/a/11632982/402891> #When we start, standard input is file descriptor 0, standard output is #file descriptor 1, and standard error is file descriptor 2. # Do we even want to redirect output? Let the config make us not do it. redirectOutputToLogFile = redirectOutputToLogFile and not config.disableWorkerOutputCapture #What file do we want to point FDs 1 and 2 to? tempWorkerLogPath = os.path.join(localWorkerTempDir, "worker_log.txt") if redirectOutputToLogFile: # Announce that we are redirecting logging, and where it will now go. # This is important if we are trying to manually trace a faulty worker invocation. logger.info("Redirecting logging to %s", tempWorkerLogPath) sys.stdout.flush() sys.stderr.flush() # Save the original stdout and stderr (by opening new file descriptors # to the same files) origStdOut = os.dup(1) origStdErr = os.dup(2) # Open the file to send stdout/stderr to. logFh = os.open(tempWorkerLogPath, os.O_WRONLY | os.O_CREAT | os.O_APPEND) # Replace standard output with a descriptor for the log file os.dup2(logFh, 1) # Replace standard error with a descriptor for the log file os.dup2(logFh, 2) # Since we only opened the file once, all the descriptors duped from # the original will share offset information, and won't clobber each # others' writes. See <http://stackoverflow.com/a/5284108/402891>. This # shouldn't matter, since O_APPEND seeks to the end of the file before # every write, but maybe there's something odd going on... # Close the descriptor we used to open the file os.close(logFh) debugging = logging.getLogger().isEnabledFor(logging.DEBUG) ########################################## #Worker log file trapped from here on in ########################################## jobAttemptFailed = False statsDict = MagicExpando() statsDict.jobs = [] statsDict.workers.logsToMaster = [] blockFn = lambda: True listOfJobs = [jobName] job = None try: #Put a message at the top of the log, just to make sure it's working. logger.info("---TOIL WORKER OUTPUT LOG---") sys.stdout.flush() logProcessContext(config) ########################################## #Connect to the deferred function system ########################################## deferredFunctionManager = DeferredFunctionManager(toilWorkflowDir) ########################################## #Load the JobDescription ########################################## jobDesc = jobStore.load(jobStoreID) listOfJobs[0] = str(jobDesc) logger.debug("Parsed job description") ########################################## #Cleanup from any earlier invocation of the job ########################################## if jobDesc.command == None: logger.debug("Job description has no body to run.") # Cleanup jobs already finished predicate = lambda jID: jobStore.exists(jID) jobDesc.filterSuccessors(predicate) jobDesc.filterServiceHosts(predicate) logger.debug( "Cleaned up any references to completed successor jobs") # This cleans the old log file which may # have been left if the job is being retried after a job failure. oldLogFile = jobDesc.logJobStoreFileID if oldLogFile != None: jobDesc.logJobStoreFileID = None jobStore.update(jobDesc) #Update first, before deleting any files jobStore.deleteFile(oldLogFile) ########################################## # If a checkpoint exists, restart from the checkpoint ########################################## if isinstance( jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None: # The job is a checkpoint, and is being restarted after previously completing logger.debug("Job is a checkpoint") # If the checkpoint still has extant successors or services, its # subtree didn't complete properly. We handle the restart of the # checkpoint here, removing its previous subtree. if next(jobDesc.successorsAndServiceHosts(), None) is not None: logger.debug("Checkpoint has failed; restoring") # Reduce the try count assert jobDesc.remainingTryCount >= 0 jobDesc.remainingTryCount = max(0, jobDesc.remainingTryCount - 1) jobDesc.restartCheckpoint(jobStore) # Otherwise, the job and successors are done, and we can cleanup stuff we couldn't clean # because of the job being a checkpoint else: logger.debug( "The checkpoint jobs seems to have completed okay, removing any checkpoint files to delete." ) #Delete any remnant files list( map( jobStore.deleteFile, list( filter(jobStore.fileExists, jobDesc.checkpointFilesToDelete)))) ########################################## #Setup the stats, if requested ########################################## if config.stats: startClock = getTotalCpuTime() startTime = time.time() while True: ########################################## #Run the job body, if there is one ########################################## logger.info("Working on job %s", jobDesc) if jobDesc.command is not None: assert jobDesc.command.startswith("_toil ") logger.debug("Got a command to run: %s" % jobDesc.command) # Load the job. It will use the same JobDescription we have been using. job = Job.loadJob(jobStore, jobDesc) if isinstance(jobDesc, CheckpointJobDescription): # If it is a checkpoint job, save the command jobDesc.checkpoint = jobDesc.command logger.info("Loaded body %s from description %s", job, jobDesc) # Create a fileStore object for the job fileStore = AbstractFileStore.createFileStore( jobStore, jobDesc, localWorkerTempDir, blockFn, caching=not config.disableCaching) with job._executor(stats=statsDict if config.stats else None, fileStore=fileStore): with deferredFunctionManager.open() as defer: with fileStore.open(job): # Get the next block function to wait on committing this job blockFn = fileStore.waitForCommit # Run the job, save new successors, and set up # locally (but don't commit) successor # relationships and job completion. # Pass everything as name=value because Cactus # likes to override _runner when it shouldn't and # it needs some hope of finding the arguments it # wants across multiple Toil versions. We also # still pass a jobGraph argument to placate old # versions of Cactus. job._runner(jobGraph=None, jobStore=jobStore, fileStore=fileStore, defer=defer) # Accumulate messages from this job & any subsequent chained jobs statsDict.workers.logsToMaster += fileStore.loggingMessages logger.info("Completed body for %s", jobDesc) else: #The command may be none, in which case #the JobDescription is either a shell ready to be deleted or has #been scheduled after a failure to cleanup logger.debug("No user job to run, so finishing") break if AbstractFileStore._terminateEvent.isSet(): raise RuntimeError("The termination flag is set") ########################################## #Establish if we can run another job within the worker ########################################## successor = nextChainable(jobDesc, jobStore, config) if successor is None or config.disableChaining: # Can't chain any more jobs. We are going to stop. logger.info("Not chaining from job %s", jobDesc) # TODO: Somehow the commit happens even if we don't start it here. break logger.info("Chaining from %s to %s", jobDesc, successor) ########################################## # We have a single successor job that is not a checkpoint job. We # reassign the ID of the current JobDescription to the successor. # We can then delete the successor JobDescription (under its old # ID) in the jobStore, as it is wholly incorporated into the # current one. ########################################## # Make sure nothing has gone wrong and we can really chain assert jobDesc.memory >= successor.memory assert jobDesc.cores >= successor.cores # Save the successor's original ID, so we can clean it (and its # body) up after we finish executing it. successorID = successor.jobStoreID # add the successor to the list of jobs run listOfJobs.append(str(successor)) # Now we need to become that successor, under the original ID. successor.replace(jobDesc) jobDesc = successor # Problem: successor's job body is a file that will be cleaned up # when we delete the successor job by ID. We can't just move it. So # we need to roll up the deletion of the successor job by ID with # the deletion of the job ID we're currently working on. jobDesc.jobsToDelete.append(successorID) # Clone the now-current JobDescription (which used to be the successor). # TODO: Why??? Can we not? jobDesc = copy.deepcopy(jobDesc) # Build a fileStore to update the job and commit the replacement. # TODO: can we have a commit operation without an entire FileStore??? fileStore = AbstractFileStore.createFileStore( jobStore, jobDesc, localWorkerTempDir, blockFn, caching=not config.disableCaching) # Update blockFn to wait for that commit operation. blockFn = fileStore.waitForCommit # This will update the job once the previous job is done updating fileStore.startCommit(jobState=True) # Clone the current job description again, so that further updates # to it (such as new successors being added when it runs) occur # after the commit process we just kicked off, and aren't committed # early or partially. jobDesc = copy.deepcopy(jobDesc) logger.debug("Starting the next job") ########################################## #Finish up the stats ########################################## if config.stats: totalCPUTime, totalMemoryUsage = getTotalCpuTimeAndMemoryUsage() statsDict.workers.time = str(time.time() - startTime) statsDict.workers.clock = str(totalCPUTime - startClock) statsDict.workers.memory = str(totalMemoryUsage) # log the worker log path here so that if the file is truncated the path can still be found if redirectOutputToLogFile: logger.info( "Worker log can be found at %s. Set --cleanWorkDir to retain this log", localWorkerTempDir) logger.info( "Finished running the chain of jobs on this node, we ran for a total of %f seconds", time.time() - startTime) ########################################## #Trapping where worker goes wrong ########################################## except: #Case that something goes wrong in worker traceback.print_exc() logger.error("Exiting the worker because of a failed job on host %s", socket.gethostname()) AbstractFileStore._terminateEvent.set() ########################################## #Wait for the asynchronous chain of writes/updates to finish ########################################## blockFn() ########################################## #All the asynchronous worker/update threads must be finished now, #so safe to test if they completed okay ########################################## if AbstractFileStore._terminateEvent.isSet(): # Something has gone wrong. # Clobber any garbage state we have for this job from failing with # whatever good state is still stored in the JobStore jobDesc = jobStore.load(jobStoreID) # Remember that we failed jobAttemptFailed = True ########################################## #Cleanup ########################################## # Close the worker logging # Flush at the Python level sys.stdout.flush() sys.stderr.flush() if redirectOutputToLogFile: # Flush at the OS level os.fsync(1) os.fsync(2) # Close redirected stdout and replace with the original standard output. os.dup2(origStdOut, 1) # Close redirected stderr and replace with the original standard error. os.dup2(origStdErr, 2) # sys.stdout and sys.stderr don't need to be modified at all. We don't # need to call redirectLoggerStreamHandlers since they still log to # sys.stderr # Close our extra handles to the original standard output and standard # error streams, so we don't leak file handles. os.close(origStdOut) os.close(origStdErr) # Now our file handles are in exactly the state they were in before. # Copy back the log file to the global dir, if needed. # Note that we work with bytes instead of characters so we can seek # relative to the end (since Python won't decode Unicode backward, or even # interpret seek offsets in characters for us). TODO: We may get invalid or # just different Unicode by breaking up a character at the boundary! if jobAttemptFailed and redirectOutputToLogFile: jobDesc.logJobStoreFileID = jobStore.getEmptyFileStoreID( jobDesc.jobStoreID, cleanup=True) jobDesc.chainedJobs = listOfJobs with jobStore.updateFileStream(jobDesc.logJobStoreFileID) as w: with open(tempWorkerLogPath, 'rb') as f: if os.path.getsize( tempWorkerLogPath) > logFileByteReportLimit != 0: if logFileByteReportLimit > 0: f.seek(-logFileByteReportLimit, 2) # seek to last tooBig bytes of file elif logFileByteReportLimit < 0: f.seek(logFileByteReportLimit, 0) # seek to first tooBig bytes of file # Dump the possibly-invalid-Unicode bytes into the log file w.write(f.read()) # TODO load file using a buffer # Commit log file reference back to JobStore jobStore.update(jobDesc) elif ((debugging or (config.writeLogsFromAllJobs and not jobName.startswith(CWL_INTERNAL_JOBS))) and redirectOutputToLogFile): # write log messages with open(tempWorkerLogPath, 'rb') as logFile: if os.path.getsize( tempWorkerLogPath) > logFileByteReportLimit != 0: if logFileByteReportLimit > 0: logFile.seek(-logFileByteReportLimit, 2) # seek to last tooBig bytes of file elif logFileByteReportLimit < 0: logFile.seek(logFileByteReportLimit, 0) # seek to first tooBig bytes of file # Make sure lines are Unicode so they can be JSON serialized as part of the dict. # We may have damaged the Unicode text by cutting it at an arbitrary byte so we drop bad characters. logMessages = [ line.decode('utf-8', 'skip') for line in logFile.read().splitlines() ] statsDict.logs.names = listOfJobs statsDict.logs.messages = logMessages if (debugging or config.stats or statsDict.workers.logsToMaster ) and not jobAttemptFailed: # We have stats/logging to report back jobStore.writeStatsAndLogging( json.dumps(statsDict, ensure_ascii=True).encode()) #Remove the temp dir cleanUp = config.cleanWorkDir if cleanUp == 'always' or (cleanUp == 'onSuccess' and not jobAttemptFailed) or (cleanUp == 'onError' and jobAttemptFailed): shutil.rmtree(localWorkerTempDir) #This must happen after the log file is done with, else there is no place to put the log if (not jobAttemptFailed) and jobDesc.command == None and next( jobDesc.successorsAndServiceHosts(), None) is None: # We can now safely get rid of the JobDescription, and all jobs it chained up for otherID in jobDesc.jobsToDelete: jobStore.delete(otherID) jobStore.delete(jobDesc.jobStoreID) if jobAttemptFailed: return 1 else: return 0
def __exit__(self, type, value, traceback): self._check_entered() try: # data_path refers to the externally used path to the params. It is a symlink. # old_data_path is the path currently pointed to by data_path. # tempdir_path is a path where the new params will go, which the new data path will point to. # new_data_path is a temporary symlink that will atomically overwrite data_path. # # The current situation is: # data_path -> old_data_path # We're going to write params data to tempdir_path # tempdir_path -> params data # Then point new_data_path to tempdir_path # new_data_path -> tempdir_path # Then atomically overwrite data_path with new_data_path # data_path -> tempdir_path old_data_path = None new_data_path = None tempdir_path = tempfile.mkdtemp(prefix=".tmp", dir=self._path) try: # Write back all keys. os.chmod(tempdir_path, 0o777) for k, v in self._vals.items(): with open(os.path.join(tempdir_path, k), "wb") as f: f.write(v) f.flush() os.fsync(f.fileno()) fsync_dir(tempdir_path) data_path = self._data_path() try: old_data_path = os.path.join(self._path, os.readlink(data_path)) except (OSError, IOError): # NOTE(mgraczyk): If other DB implementations have bugs, this could cause # copies to be left behind, but we still want to overwrite. pass new_data_path = "{}.link".format(tempdir_path) os.symlink(os.path.basename(tempdir_path), new_data_path) os.rename(new_data_path, data_path) fsync_dir(self._path) finally: # If the rename worked, we can delete the old data. Otherwise delete the new one. success = new_data_path is not None and os.path.exists( data_path) and (os.readlink(data_path) == os.path.basename(tempdir_path)) if success: if old_data_path is not None: shutil.rmtree(old_data_path) else: shutil.rmtree(tempdir_path) # Regardless of what happened above, there should be no link at new_data_path. if new_data_path is not None and os.path.islink(new_data_path): os.remove(new_data_path) finally: os.umask(self._prev_umask) self._prev_umask = None # Always release the lock. self._lock.release() self._lock = None
continue if "~TYPE~" in key: continue in "/name" in key: continue subkey = key[16:] val = str(logTable.getValue(str(subkey),"missing")) entryStr += str(subkey) + ":" + val + "|" keyDict[subkey] = 0 entryStr = entryStr[:-1] entryStr += "\r\n" logFile.write(entryStr) #print "Wrote a line of data." if i >= loopsPerFlush: logFile.flush() os.fsync(logFile.fileno()) #print "Flushed!" i = 0 else: i += 1 time.sleep(0.02) q += 1 #Put out one final line with all the keys seen keyStr = "KEYLIST-" for k in keyDict.keys(): keyStr += k + "|" keyStr = keyStr[:-1] logFile.write(keyStr) #If we get here, matchOver went True. Time to shut down
def _save_pickle(self, filename): """Save sensors to pickle file.""" with open(filename, 'wb') as file_handle: pickle.dump(self._sensors, file_handle, pickle.HIGHEST_PROTOCOL) file_handle.flush() os.fsync(file_handle.fileno())
def closeAllFiles(self, file_list): for file in file_list: file.flush() os.fsync(file.fileno()) file.close()
def flush(self): self.console.flush() if self.file is not None: self.file.flush() os.fsync(self.file.fileno())
def task_run(dockerfile, configuration, task): """ Run test """ # Check if current image is a toolimage (no daemon) is_toolimage = False for term in configuration.get('dockerTest.toolImages', {}): if term in dockerfile['image']['fullname']: is_toolimage = True # rspec spec file settings spec_path = configuration.get('dockerTest.serverspec.specPath' ) % dockerfile['image']['imageName'] spec_abs_path = os.path.join(configuration.get('serverspecPath'), spec_path) # create dockerfile tmp_suffix = '.%s_%s_%s.tmp' % (dockerfile['image']['repository'], dockerfile['image']['imageName'], dockerfile['image']['tag']) tmp_suffix = tmp_suffix.replace('/', '_') test_dockerfile = tempfile.NamedTemporaryFile( prefix='Dockerfile.', suffix=tmp_suffix, dir=configuration.get('serverspecPath'), bufsize=0, delete=False) # serverspec conf serverspec_conf = DockerTestServerspecTaskLoader.generate_serverspec_configuration( path=os.path.basename(test_dockerfile.name), dockerfile=dockerfile, configuration=configuration, is_toolimage=is_toolimage) # serverspec options serverspec_opts = [] serverspec_opts.extend([ spec_path, dockerfile['image']['fullname'], base64.b64encode(json.dumps(serverspec_conf)), os.path.basename(test_dockerfile.name) ]) # dockerfile content dockerfile_content = DockerTestServerspecTaskLoader.generate_dockerfile( dockerfile=dockerfile, configuration=configuration, is_toolimage=is_toolimage) # DryRun if configuration.get('dryRun'): if not os.path.isfile(spec_abs_path): print ' no tests found' print ' image: %s' % (dockerfile['image']['fullname']) print ' path: %s' % (spec_path) print ' args: %s' % (' '.join(serverspec_opts)) print '' print 'spec configuration:' print '-------------------' print json.dumps(serverspec_conf, indent=4, sort_keys=True) print '' print 'Dockerfile:' print '-----------' print dockerfile_content return True # check if we have any tests if not os.path.isfile(spec_abs_path): print ' no tests defined (%s)' % (spec_path) return True # build rspec/serverspec command cmd = ['bash', 'serverspec.sh'] cmd.extend(serverspec_opts) # create Dockerfile with open(test_dockerfile.name, mode='w', buffering=0) as f: f.write(dockerfile_content) f.flush() os.fsync(f.fileno()) f.close() test_status = False for retry_count in range(0, configuration.get('retry')): try: test_status = Command.execute( cmd, cwd=configuration.get('serverspecPath')) except Exception as e: print e pass if test_status: break elif retry_count < (configuration.get('retry') - 1): print ' failed, retrying... (try %s)' % (retry_count + 1) else: print ' failed, giving up' return test_status
def flush(filename, exist_ok=False): if not exist_ok and not os.path.exists(filename): raise OSError('Path not exists') with io.open(filename) as fp: fp.flush() os.fsync(fp.fileno())
def write(self, pid): os.ftruncate(self.fd, 0) os.write(self.fd, b"%d" % pid) os.fsync(self.fd)
def get_socket(self): if self.use_ssl: cert_path = os.path.join(self.config_path, 'certs', self.host) if not os.path.exists(cert_path): is_new = True s = self.get_simple_socket() if s is None: # print_error('[get_socket]', 'get_simple_socket failed') return # try with CA first if os.path.exists(ca_path): try: context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED, ca_certs=ca_path) s = context.wrap_socket(s, do_handshake_on_connect=True) except ssl.SSLError as e: self.print_error('[get_socket] 1', e) s = None except Exception as e: self.print_error('[get_socket] 2', e) return try: if s and self.check_host_name(s.getpeercert(), self.host): self.print_error("SSL certificate signed by CA") return s except Exception as e: self.print_error('[get_socket] 2.5', e) # get server certificate. # Do not use ssl.get_server_certificate because it does not work with proxy s = self.get_simple_socket() if s is None: self.print_error('[get_socket] 3') return try: context = self.get_ssl_context(cert_reqs=ssl.CERT_NONE, ca_certs=None) s = context.wrap_socket(s) except ssl.SSLError as e: self.print_error("SSL error retrieving SSL certificate:", e) return except Exception as e: self.print_error('[get_socket] 4', e) return dercert = s.getpeercert(True) s.close() cert = ssl.DER_cert_to_PEM_cert(dercert) # workaround android bug cert = re.sub("([^\n])-----END CERTIFICATE-----", "\\1\n-----END CERTIFICATE-----", cert) temporary_path = cert_path + '.temp' with open(temporary_path,"w") as f: f.write(cert) f.flush() os.fsync(f.fileno()) else: is_new = False s = self.get_simple_socket() if s is None: return if self.use_ssl: try: context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED, ca_certs=(temporary_path if is_new else cert_path)) s = context.wrap_socket(s, do_handshake_on_connect=True) except socket.timeout: self.print_error('timeout') return except ssl.SSLError as e: self.print_error("SSL error:", e) if e.errno != 1: self.print_error('[get_socket] 6', e) return if is_new: rej = cert_path + '.rej' if os.path.exists(rej): os.unlink(rej) os.rename(temporary_path, rej) else: with open(cert_path) as f: cert = f.read() try: b = pem.dePem(cert, 'CERTIFICATE') x = x509.X509(b) except: traceback.print_exc(file=sys.stderr) self.print_error("wrong certificate") return try: x.check_date() except: self.print_error("certificate has expired:", cert_path) os.unlink(cert_path) return self.print_error("wrong certificate") if e.errno == 104: self.print_error('[get_socket] 7', e) return return except BaseException as e: self.print_error('[get_socket] 8', e) traceback.print_exc(file=sys.stderr) return if is_new: self.print_error("saving certificate") os.rename(temporary_path, cert_path) return s
def _write_version_file(self, version: int) -> None: with open(os.path.join(self.path, VERSION_FILE), "wt") as tf: tf.write("%d" % version) tf.flush() os.fsync(tf.fileno())
if not found: del new_aliases[n] aliases = new_aliases # Rewrite aliases file without cycles or names not in the csrankings database. with open('dblp-aliases.csv-x', mode='w') as outfile: sfieldnames = ['alias', 'name'] swriter = csv.DictWriter(outfile, fieldnames=sfieldnames) swriter.writeheader() for n in collections.OrderedDict( sorted(aliases.items(), key=lambda t: t[0])): for a in aliases[n]: h = {'alias': a, 'name': n} swriter.writerow(h) outfile.flush() os.fsync(outfile.fileno()) os.rename('dblp-aliases.csv-x', 'dblp-aliases.csv') # Add any missing aliases. for name in aliases: if name in csrankings: # Make sure all aliases are there. for a in aliases[name]: # Add any missing aliases. if not a in csrankings: csrankings[a] = csrankings[name] else: # There might be a name that isn't there but an alias that IS. If so, add the name. for a in aliases[name]: if a in csrankings:
def log(title, message='', write=False): if write: REQ_URLS_FILE.write(''.join([message, '\n'])) REQ_URLS_FILE.flush() os.fsync(REQ_URLS_FILE.fileno()) print(''.join([GREEN, title, WHIYE, message]))
def _flush(self, timeout): if self._file: self._file.flush() if self._open_args: os.fsync(self._file.fileno())
def vrrp_master(self, job, fobj, ifname, event): # vrrp does the "election" for us. If we've gotten this far # then the specified timeout for NOT receiving an advertisement # has elapsed. Setting the progress to ELECTING is to prevent # extensive API breakage with the platform indepedent failover plugin # as well as the front-end (webUI) even though the term is misleading # in this use case job.set_progress(None, description='ELECTING') fenced_error = None if event == 'forcetakeover': # reserve the disks forcefully ignoring if the other node has the disks logger.warning('Forcefully taking over as the MASTER node.') # need to stop fenced just in case it's running already self.run_call('failover.fenced.stop') logger.warning('Forcefully starting fenced') fenced_error = self.run_call('failover.fenced.start', True) else: # if we're here then we need to check a couple things before we start fenced # and start the process of becoming master # # 1. if the interface that we've received a MASTER event for is # in a failover group with other interfaces and ANY of the # other members in the failover group are still BACKUP, # then we need to ignore the event. # # TODO: Not sure how keepalived and laggs operate so need to test this # (maybe the event only gets triggered if the lagg goes down) # status = self.run_call('failover.vip.check_failover_group', ifname, fobj['groups']) # this means that we received a master event and the interface was # in a failover group. And in that failover group, there were other # interfaces that were still in the BACKUP state which means the # other node has them as MASTER so ignore the event. if len(status[1]): logger.warning( 'Received MASTER event for "%s", but other ' 'interfaces "%r" are still working on the ' 'MASTER node. Ignoring event.', ifname, status[0], ) job.set_progress(None, description='IGNORED') raise IgnoreFailoverEvent() logger.warning('Entering MASTER on "%s".', ifname) # need to stop fenced just in case it's running already self.run_call('failover.fenced.stop') logger.warning('Starting fenced') fenced_error = self.run_call('failover.fenced.start') # starting fenced daemon failed....which is bad # emit an error and exit if fenced_error != 0: if fenced_error == 1: logger.error('Failed to register keys on disks, exiting!') elif fenced_error == 2: logger.error('Fenced is running on the remote node, exiting!') elif fenced_error == 3: logger.error( '10% or more of the disks failed to be reserved, exiting!') elif fenced_error == 5: logger.error( 'Fenced encountered an unexpected fatal error, exiting!') else: logger.error( f'Fenced exited with code "{fenced_error}" which should never happen, exiting!' ) job.set_progress(None, description='ERROR') raise FencedError() # remove the zpool cache files if necessary if os.path.exists(self.ZPOOL_KILLCACHE): for i in (self.ZPOOL_CACHE_FILE, self.ZPOOL_CACHE_FILE_SAVED): with contextlib.suppress(Exception): os.unlink(i) # create the self.ZPOOL_KILLCACHE file else: with contextlib.suppress(Exception): with open(self.ZPOOL_KILLCACHE, 'w') as f: f.flush() # be sure it goes straight to disk os.fsync( f.fileno()) # be EXTRA sure it goes straight to disk # if we're here and the zpool "saved" cache file exists we need to check # if it's modify time is < the standard zpool cache file and if it is # we overwrite the zpool "saved" cache file with the standard one if os.path.exists(self.ZPOOL_CACHE_FILE_SAVED) and os.path.exists( self.ZPOOL_CACHE_FILE): zpool_cache_mtime = os.stat(self.ZPOOL_CACHE_FILE).st_mtime zpool_cache_saved_mtime = os.stat( self.ZPOOL_CACHE_FILE_SAVED).st_mtime if zpool_cache_mtime > zpool_cache_saved_mtime: with contextlib.suppress(Exception): shutil.copy2(self.ZPOOL_CACHE_FILE, self.ZPOOL_CACHE_FILE_SAVED) # set the progress to IMPORTING job.set_progress(None, description='IMPORTING') failed = [] for vol in fobj['volumes']: logger.info('Importing %s', vol['name']) # import the zpool(s) try: self.run_call('zfs.pool.import_pool', vol['guid'], { 'altroot': '/mnt', 'cachefile': self.ZPOOL_CACHE_FILE, }) except Exception as e: vol['error'] = str(e) failed.append(vol) continue # try to unlock the zfs datasets (if any) unlock_job = self.run_call('failover.unlock_zfs_datasets', vol["name"]) unlock_job.wait_sync() if unlock_job.error: logger.error( f'Error unlocking ZFS encrypted datasets: {unlock_job.error}' ) elif unlock_job.result['failed']: logger.error('Failed to unlock %s ZFS encrypted dataset(s)', ','.join(unlock_job.result['failed'])) # if we fail to import all zpools then alert the user because nothing # is going to work at this point if len(failed) == len(fobj['volumes']): for i in failed: logger.error( 'Failed to import volume with name "%s" with guid "%s" ' 'with error "%s"', failed['name'], failed['guid'], failed['error'], ) logger.error('All volumes failed to import!') job.set_progress(None, description='ERROR') raise AllZpoolsFailedToImport() # if we fail to import any of the zpools then alert the user but continue the process elif len(failed): for i in failed: logger.error( 'Failed to import volume with name "%s" with guid "%s" ' 'with error "%s"', failed['name'], failed['guid'], failed['error'], ) logger.error( 'However, other zpools imported so the failover process continued.' ) logger.info('Volume imports complete.') # need to make sure failover status is updated in the middleware cache logger.info('Refreshing failover status') self.run_call('failover.status_refresh') # this enables all necessary services that have been enabled by the user logger.info('Enabling necessary services') self.run_call('etc.generate', 'rc') logger.info('Configuring system dataset') self.run_call('etc.generate', 'system_dataset') # Write the certs to disk based on what is written in db. logger.info('Configuring SSL') self.run_call('etc.generate', 'ssl') # Now we restart the appropriate services to ensure it's using correct certs. logger.info('Configuring HTTP') self.run_call('service.restart', 'http') # now we restart the services, prioritizing the "critical" services logger.info('Restarting critical services.') for i in self.CRITICAL_SERVICES: for j in fobj['services']: if i == j['srv_service'] and j['srv_enable']: logger.info('Restarting critical service "%s"', i) self.run_call('service.restart', i, self.HA_PROPAGATE) # TODO: look at nftables # logger.info('Allowing network traffic.') # run('/sbin/pfctl -d') logger.info('Critical portion of failover is now complete') # regenerate cron logger.info('Regenerating cron') self.run_call('etc.generate', 'cron') # sync disks is disabled on passive node logger.info('Syncing disks') self.run_call('disk.sync_all') # restart the remaining "non-critical" services logger.info('Restarting remaining services') # restart the non-critical services in the background self.run_call('failover.events.restart_background', fobj['services']) # TODO: jails don't exist on SCALE (yet) # self.run_call('jail.start_on_boot') self.run_call('vm.start_on_boot') logger.info('Initializing alert system') self.run_call('alert.block_failover_alerts') self.run_call('alert.initialize', False) kmip_config = self.run_call('kmip.config') if kmip_config and kmip_config['enabled']: logger.info('Syncing encryption keys with KMIP server') # Even though we keep keys in sync, it's best that we do this as well # to ensure that the system is up to date with the latest keys available # from KMIP. If it's unaccessible, the already synced memory keys are used # meanwhile. self.run_call('kmip.initialize_keys') logger.info('Failover event complete.') # clear the description and set the result job.set_progress(None, description='SUCCESS') self.FAILOVER_RESULT = 'SUCCESS' return self.FAILOVER_RESULT
def save(self, fname): fo = open(fname, "wb") pickle.dump(self, fo) os.fsync(fo) fo.close()
def fsync(self, path, datasync, fh): return os.fsync(fh)
def vrrp_backup(self, job, fobj, ifname, event): # we need to check a couple things before we stop fenced # and start the process of becoming backup # # 1. if the interface that we've received a BACKUP event for is # in a failover group with other interfaces and ANY of the # other members in the failover group are still MASTER, # then we need to ignore the event. # # TODO: Not sure how keepalived and laggs operate so need to test this # (maybe the event only gets triggered if the lagg goes down) # status = self.run_call('failover.vip.check_failover_group', ifname, fobj['groups']) # this means that we received a backup event and the interface was # in a failover group. And in that failover group, there were other # interfaces that were still in the MASTER state so ignore the event. if len(status[0]): logger.warning( 'Received BACKUP event for "%s", but other ' 'interfaces "%r" are still working. ' 'Ignoring event.', ifname, status[1], ) job.set_progress(None, description='IGNORED') raise IgnoreFailoverEvent() logger.warning('Entering BACKUP on "%s".', ifname) # we need to stop fenced first logger.warning('Stopping fenced') self.run_call('failover.fenced.stop') # restarting keepalived sends a priority 0 advertisement # which means any VIP that is on this controller will be # migrated to the other controller logger.info('Transitioning all VIPs off this node') self.run_call('service.restart', 'keepalived') # TODO: look at nftables # logger.info('Enabling firewall') # run('/sbin/pfctl -ef /etc/pf.conf.block') # ticket 23361 enabled a feature to send email alerts when an unclean reboot occurrs. # TrueNAS HA, by design, has a triggered unclean shutdown. # If a controller is demoted to standby, we set a 4 sec countdown using watchdog. # If the zpool(s) can't export within that timeframe, we use watchdog to violently reboot the controller. # When this occurrs, the customer gets an email about an "Unauthorized system reboot". # The idea for creating a new sentinel file for watchdog related panics, # is so that we can send an appropriate email alert. # So if we panic here, middleware will check for this file and send an appropriate email. # ticket 39114 with contextlib.suppress(Exception): with open(self.WATCHDOG_ALERT_FILE, 'w') as f: f.write(int(time.time())) f.flush() # be sure it goes straight to disk os.fsync(f.fileno()) # be EXTRA sure it goes straight to disk # export zpools in a thread and set a timeout to # to `self.ZPOOL_EXPORT_TIMEOUT`. # if we can't export the zpool(s) in this timeframe, # we send the 'b' character to the /proc/sysrq-trigger # to trigger an immediate reboot of the system # https://www.kernel.org/doc/html/latest/admin-guide/sysrq.html export_thread = threading.Thread(target=self._export_zpools, name='failover_export_zpools', args=(fobj['volumes'])) export_thread.start() export_thread.join(timeout=self.ZPOOL_EXPORT_TIMEOUT) if export_thread.is_alive(): # have to enable the "magic" sysrq triggers with open('/proc/sys/kernel/sysrq', 'w') as f: f.write('1') # now violently reboot with open('/proc/sysrq-trigger', 'w') as f: f.write('b') # We also remove this file here, because on boot we become BACKUP if the other # controller is MASTER. So this means we have no volumes to export which means # the `self.ZPOOL_EXPORT_TIMEOUT` is honored. with contextlib.suppress(Exception): os.unlink(self.WATCHDOG_ALERT_FILE) logger.info('Refreshing failover status') self.run_call('failover.status_refresh') logger.info('Restarting syslog-ng') self.run_call('service.restart', 'syslogd', self.HA_PROPAGATE) logger.info('Regenerating cron') self.run_call('etc.generate', 'cron') logger.info('Stopping smartd') self.run_call('service.stop', 'smartd', self.HA_PROPAGATE) logger.info('Stopping collectd') self.run_call('service.stop', 'collectd', self.HA_PROPAGATE) # we keep SSH running on both controllers (if it's enabled by user) for i in fobj['services']: if i['srv_service'] == 'ssh' and i['srv_enable']: logger.info('Restarting SSH') self.run_call('service.restart', 'ssh', self.HA_PROPAGATE) # TODO: ALUA on SCALE?? # do something with iscsi service here logger.info('Syncing encryption keys from MASTER node (if any)') self.run_call('failover.call_remote', 'failover.sync_keys_to_remote_node') logger.info('Successfully became the BACKUP node.') self.FAILOVER_RESULT = 'SUCCESS' return self.FAILOVER_RESULT