Example #1
0
def storeHosts(lines_):
    # Assume that change_ip is a function that takes a string and returns a new one with the ip changed): example below
    new_file = open(hostsFileName, "w")
    new_file.writelines(lines_)
    new_file.flush()
    os.fsync(new_file.fileno())
    new_file.close()
Example #2
0
def open_tempfile_with_atomic_write_to(path, **kwargs):
    """
    Open a temporary file object that atomically moves to the specified
    path upon exiting the context manager.

    Supports the same function signature as `open`.

    The parent directory exist and be user-writable.

    WARNING: This is just like 'mv', it will clobber files!
    """
    parent_directory = os.path.dirname(path)
    _tempfile = tempfile.NamedTemporaryFile(delete=False, dir=parent_directory)
    _tempfile.close()
    tempfile_path = _tempfile.name
    try:
        with open(tempfile_path, **kwargs) as file:
            yield file
            file.flush()
            os.fsync(file.fileno())
        os.rename(tempfile_path, path)
    finally:
        try:
            os.remove(tempfile_path)
        except OSError as e:
            if e.errno == errno.ENOENT:
                pass
            else:
                raise e
Example #3
0
 def __idle_save_text(self, status):
     self.__save_text_id = 0
     _logger.debug("autosaving to %s", self.__filename)
     dn,bn = os.path.split(self.__filename)
     try:
         perms = os.stat(self.__filename).st_mode
     except:
         perms = None
     (tempfd, temppath) = tempfile.mkstemp('.tmp', self.__filename, dn)
     os.close(tempfd)
     f = open_text_file(temppath, 'w')
     text = self.input.get_property("text")
     utext = str(text)
     f.write(utext)
     f.flush()
     os.fsync(tempfd)
     f.close()
     if perms is not None:
         os.chmod(temppath, perms)
     atomic_rename(temppath, self.__filename)
     self.__show_msg(status + _("...done"))
     self.__modified = False
     self.__sync_modified_sensitivity()
     _logger.debug("autosave complete")
     return False
Example #4
0
    def restore(self, backup, volume_id, volume_file):
        """Restore the given volume backup from Ceph object store"""
        volume = self.db.volume_get(self.context, volume_id)
        backup_name = self._get_backup_base_name(backup['volume_id'],
                                                 backup['id'])

        LOG.debug('starting backup restore from Ceph backup=%s '
                  'to volume=%s' % (backup['id'], volume['name']))

        # Ensure we are at the beginning of the volume
        volume_file.seek(0)

        backup_size = int(volume['size']) * units.GiB

        with rbddriver.RADOSClient(self, self._ceph_pool) as client:
            src_rbd = self.rbd.Image(client.ioctx, backup_name)
            try:
                self._transfer_data(src_rbd, volume_file, volume['name'],
                                    backup_size)
            finally:
                src_rbd.close()

        # Be tolerant to IO implementations that do not support fileno()
        try:
            fileno = volume_file.fileno()
        except IOError:
            LOG.info("volume_file does not support fileno() so skipping "
                     "fsync()")
        else:
            os.fsync(fileno)

        LOG.debug('restore %s to %s finished.' % (backup['id'], volume_id))
Example #5
0
def brickfind_crawl(brick, args):
    if brick.endswith("/"):
        brick = brick[0:len(brick)-1]

    working_dir = os.path.dirname(args.outfile)
    mkdirp(working_dir, exit_on_err=True, logger=logger)
    create_file(args.outfile, exit_on_err=True, logger=logger)

    with open(args.outfile, "a+") as fout:
        brick_path_len = len(brick)

        def output_callback(path, filter_result):
            path = path.strip()
            path = path[brick_path_len+1:]
            output_write(fout, path, args.output_prefix, encode=True)

        ignore_dirs = [os.path.join(brick, dirname)
                       for dirname in
                       conf.get_opt("brick_ignore_dirs").split(",")]

        find(brick, callback_func=output_callback,
             ignore_dirs=ignore_dirs)

        fout.flush()
        os.fsync(fout.fileno())
Example #6
0
def create_mds_bootstrap(cluster, key):
    """
    Run on mds node, writes the bootstrap key if not there yet.

    Returns None on success, error message on error exceptions. pushy
    mangles exceptions to all be of type ExceptionProxy, so we can't
    tell between bug and correctly handled failure, so avoid using
    exceptions for non-exceptional runs.
    """
    import os

    path = '/var/lib/ceph/bootstrap-mds/{cluster}.keyring'.format(
        cluster=cluster,
        )
    if not os.path.exists(path):
        tmp = '{path}.{pid}.tmp'.format(
            path=path,
            pid=os.getpid(),
            )
        # file() doesn't let us control access mode from the
        # beginning, and thus would have a race where attacker can
        # open before we chmod the file, so play games with os.open
        fd = os.open(
            tmp,
            (os.O_WRONLY|os.O_CREAT|os.O_EXCL
             |os.O_NOCTTY|os.O_NOFOLLOW),
            0600,
            )
        with os.fdopen(fd, 'wb') as f:
            f.write(key)
            f.flush()
            os.fsync(f)
        os.rename(tmp, path)
Example #7
0
    def _update(self, mergerfunc):
        data = self.default_values
        with LockedOpen(self.filename, 'r+') as f:
            try:
                data.update(json.load(f))
            except ValueError:
                pass

            data = mergerfunc(data)
            # If Data is not changed by merger func
            if not data:
                return False

            with tempfile.NamedTemporaryFile(
                    'w',
                    dir=os.path.dirname(self.filename),
                    delete=False) as tf:
                tf.write(data)
                tempname = tf.name

            os.rename(tempname, self.filename)
            dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)),
                            os.O_DIRECTORY)
            os.fsync(dirfd)
            os.close(dirfd)
            return True
Example #8
0
 def fsync(self, isfsyncfile):
     log.debug("file %s isfsyncfile %d" % (self.path, isfsyncfile))
     self._fflush()
     if isfsyncfile and hasattr(os, 'fdatasync'):
         os.fdatasync(self.fd)
     else:
         os.fsync(self.fd)
Example #9
0
def log(s):
    dt = datetime.now()
    str = dt.strftime("%Y-%m-%d %H:%M:%S")
    print(str + "> " + s)
    logFile.write(str + "> " + s + "\n")
    logFile.flush()
    os.fsync(logFile.fileno())
Example #10
0
    def __init__(self, pid_path):
        self._pid_path = pid_path
        self._other_running = False
        ensuredirs(self._pid_path)
        self._lockfile = None

        try:
            self._lockfile = os.open(self._pid_path, os.O_CREAT | os.O_WRONLY)
        except:
            raise SoleError('Cannot open lockfile (path = %s)' % self._pid_path)

        try:
            fcntl.lockf(self._lockfile, fcntl.LOCK_EX | fcntl.LOCK_NB)
            
        except IOError:
            self._other_running = True
            try:
                f = open(self._pid_path, 'r')
                pid = f.read().strip()
                f.close()
            except:
                pid = '?'

            raise SoleError('Other instance is running (pid = %s)' % pid)

        try:
            os.ftruncate(self._lockfile, 0)
            os.write(self._lockfile, '%i\n' % os.getpid())
            os.fsync(self._lockfile)

        except:
            pass # the pid is only stored for user information, so this is allowed to fail
Example #11
0
 def initialize(self):
     """
     Create the database
     """
     if os.path.exists(self.db_file):
         self.conn = get_db_connection(self.db_file, self.timeout)
     else:
         mkdirs(self.db_dir)
         fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir)
         os.close(fd)
         conn = sqlite3.connect(tmp_db_file,
                                check_same_thread=False,
                                timeout=0)
         self._initialize(conn)
         conn.commit()
         if tmp_db_file:
             conn.close()
             if not os.path.exists(self.db_file):
                 with open(tmp_db_file, 'r+b') as f:
                     os.fsync(f.fileno())
                     os.rename(tmp_db_file, self.db_file)
             else:
                 os.remove(tmp_db_file)
                 self.conn = get_db_connection(self.db_file, self.timeout)
         else:
             self.conn = conn
Example #12
0
    def handle_result(self, data, **kwargs):
        if self.result is None:
            return

        # Process each result individually
        try:
            for item in extract_results(data):
                fields = {}
                fields.update(kwargs)

                if item.timestamp:
                    timestamp = time.mktime(item.timestamp.timetuple())
                else:
                    timestamp = time.mktime(dt.datetime.utcnow.timetuple())

                fields.update(name=item.name, timestamp=timestamp)

                base = self.join_fields(fields)

                # Write each added/removed entry on a different line
                curr_fields = {'result_type': item.action}
                for key, val in item.columns.items():
                    curr_fields['_'.join([item.action, key])] = val

                self.result.write(base + ', ' + self.join_fields(curr_fields) + '\n')
        finally:
            self.result.flush()
            os.fsync(self.result.fileno())
Example #13
0
def open_atomic(filepath, *args, **kwargs):
    """
    Open temporary file object that atomically moves to destination upon
    exiting.

    Allows reading and writing to and from the same filename.

    The file will not be moved to destination in case of an exception.

    Parameters
    ----------
    filepath : string
        the file path to be opened
    fsync : bool
        whether to force write the file to disk
    *args : mixed
        Any valid arguments for :code:`open`
    **kwargs : mixed
        Any valid keyword arguments for :code:`open`
    """
    fsync = kwargs.get('fsync', False)

    with tempfile(dir=os.path.dirname(os.path.abspath(filepath))) as tmppath:
        with open(tmppath, *args, **kwargs) as file:
            try:
                yield file
            finally:
                if fsync:
                    file.flush()
                    os.fsync(file.fileno())
        os.rename(tmppath, filepath)
        os.chmod(filepath, 0o644)
Example #14
0
    def finish_rip(self, track):
        self.progress.end_track()
        if self.pipe is not None:
            print(Fore.GREEN + 'Rip complete' + Fore.RESET)
            self.pipe.flush()
            self.pipe.close()

            # wait for process to end before continuing
            ret_code = self.rip_proc.wait()
            if ret_code != 0:
                print(
                    Fore.YELLOW + "Warning: encoder returned non-zero "
                                  "error code " + str(ret_code) + Fore.RESET)
            self.rip_proc = None
            self.pipe = None

        if self.wav_file is not None:
            self.wav_file.close()
            self.wav_file = None

        if self.pcm_file is not None:
            self.pcm_file.flush()
            os.fsync(self.pcm_file.fileno())
            self.pcm_file.close()
            self.pcm_file = None

        self.ripping.clear()
        self.post.log_success(track)
Example #15
0
    def handle_status(self, data, **kwargs):
        if self.status is None:
            return

        # Write each status log on a different line
        try:
            for item in data.get('data', []):
                fields = {}
                fields.update(kwargs)
                fields.update({
                    'line':     item.get('line', ''),
                    'message':  item.get('message', ''),
                    'severity': item.get('severity', ''),
                    'filename': item.get('filename', ''),
                    'version': item.get('version'),  # be null
                })

                if 'created' in item:
                    fields['created'] = time.mktime(item['created'].timetuple())
                else:
                    fields['created'] = time.mktime(dt.datetime.utcnow().timetuple())

                self.status.write(self.join_fields(fields) + '\n')
        finally:
            self.status.flush()
            os.fsync(self.status.fileno())
def logProcessing(record_name, msg):
    global logOut
    if logOut is None:
        logOut = open("C:\Users\u0064666\Pictures\Cards\\renderLog.txt", "w")
    logOut.write("[" + record_name + "]: " + msg + "\r\n")
    logOut.flush()
    os.fsync(logOut.fileno())
Example #17
0
    def restore(self, backup, volume_id, volume_file):
        """Restore volume from backup in Ceph object store.

        If volume metadata is available this will also be restored.
        """
        target_volume = self.db.volume_get(self.context, volume_id)
        LOG.debug('Starting restore from Ceph backup=%(src)s to '
                  'volume=%(dest)s' %
                  {'src': backup['id'], 'dest': target_volume['name']})

        try:
            self._restore_volume(backup, target_volume, volume_file)

            # Be tolerant of IO implementations that do not support fileno()
            try:
                fileno = volume_file.fileno()
            except IOError:
                LOG.debug("Restore target I/O object does not support "
                          "fileno() - skipping call to fsync().")
            else:
                os.fsync(fileno)

            self._restore_metadata(backup, volume_id)

            LOG.debug('Restore to volume %s finished successfully.' %
                      volume_id)
        except exception.BackupOperationError as e:
            LOG.error(_('Restore to volume %(volume)s finished with error - '
                        '%(error)s.') % {'error': e, 'volume': volume_id})
            raise
Example #18
0
    def handle_other_file(self, f):
        self.updated_date = timezone.now()
        self.first_run = False

        repo = self.get_git_repo(delete = True)

        rules_dir = os.path.join(settings.GIT_SOURCES_BASE_DIRECTORY, str(self.pk), 'rules')
        # create rules dir if needed
        if not os.path.isdir(rules_dir):
            os.makedirs(rules_dir)
        # copy file content to target
        f.seek(0)
        os.fsync(f)
        shutil.copy(f.name, os.path.join(rules_dir, self.name))

        index = repo.index
        if len(index.diff(None)) or self.first_run:
            os.environ['USERNAME'] = '******'
            index.add(["rules"])
            message =  'source version at %s' % (self.updated_date)
            index.commit(message)

        self.save()
        # Now we must update SourceAtVersion for this source
        # or create it if needed
        self.create_sourceatversion()
    def saveJob(self, job, workflow, wmTask = None, jobNumber = 0):
        """
        _saveJob_

        Actually do the mechanics of saving the job to a pickle file
        """
        priority = None

        if wmTask:
            # If we managed to load the task,
            # so the url should be valid
            job['spec']    = workflow.spec
            job['task']    = wmTask.getPathName()
            priority       = wmTask.getTaskPriority()
            if job.get('sandbox', None) == None:
                job['sandbox'] = wmTask.data.input.sandbox

        job['priority'] = priority
        job['counter']  = jobNumber
        cacheDir = job.getCache()
        job['cache_dir'] = cacheDir
        output = open(os.path.join(cacheDir, 'job.pkl'), 'w')
        cPickle.dump(job, output, cPickle.HIGHEST_PROTOCOL)
        output.flush()
        os.fsync(output.fileno())
        output.close()


        return
Example #20
0
 def OnTaskSuccess(self, task):
   # Log the succeed tasks so that they are ensured to be frozen in case
   # of a sudden death.
   self._resume_output.write('-f\n^{}$\n'.format(re.escape(task.name)))
   # Makes sure the task freezing command line make it to the disk.
   self._resume_output.flush()
   os.fsync(self._resume_output.fileno())
Example #21
0
 def session_write(self):
     """ Writes the locked set to the session file. 
         The global lock MUST be held for this function to work, although on NFS additional
         locking is done
         Raises RepositoryError if session file is inaccessible """
     # logger.debug("Openining Session File: %s " % self.fn )
     try:
         # If this fails, we want to shutdown the repository (corruption
         # possible)
         fd = self.delayopen(self.fn)
         if not self.afs:
             fcntl.lockf(fd, fcntl.LOCK_EX)
         self.delaywrite(fd, pickle.dumps(self.locked))
         if not self.afs:
             fcntl.lockf(fd, fcntl.LOCK_UN)
         os.fsync(fd)
         os.close(fd)
     except OSError as x:
         if x.errno != errno.ENOENT:
             raise RepositoryError(self.repo, "Error on session file access '%s': %s" % (self.fn, x))
         else:
             # logger.debug( "File NOT found %s" %self.fn )
             raise RepositoryError(
                 self.repo,
                 "SessionWrite: Own session file not found! Possibly deleted by another ganga session.\n\
                                 Possible reasons could be that this computer has a very high load, or that the system clocks on computers running Ganga are not synchronized.\n\
                                 On computers with very high load and on network filesystems, try to avoid running concurrent ganga sessions for long.\n '%s' : %s"
                 % (self.fn, x),
             )
     except IOError as x:
         raise RepositoryError(self.repo, "Error on session file locking '%s': %s" % (self.fn, x))
Example #22
0
    def _save(self, name, content):
        full_path = self.path(name)

        # Create any intermediate directories that do not exist.
        # Note that there is a race between os.path.exists and os.makedirs:
        # if os.makedirs fails with EEXIST, the directory was created
        # concurrently, and we can continue normally. Refs #16082.
        directory = os.path.dirname(full_path)
        if not os.path.exists(directory):
            try:
                if self.directory_permissions_mode is not None:
                    # os.makedirs applies the global umask, so we reset it,
                    # for consistency with file_permissions_mode behavior.
                    old_umask = os.umask(0)
                    try:
                        os.makedirs(directory, self.directory_permissions_mode)
                    finally:
                        os.umask(old_umask)
                else:
                    os.makedirs(directory)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
        if not os.path.isdir(directory):
            raise IOError("%s exists and is not a directory." % directory)

        tmp_file = tempfile.mktemp()
        filey = open(tmp_file, 'wb')
        filey.write(content.read())
        # make sure that all data is on disk
        filey.flush()
        os.fsync(filey.fileno())
        filey.close()
        file_move_safe(tmp_file, full_path, allow_overwrite=True)
        return name
def table_to_file(table, filepath, freshfile, adddic=None):
    'save table to a file with additional columns'
    if freshfile:
        mode = 'w'
    else:
        mode = 'a'

    colnames = get_all_colnames(table)

    with open(filepath, mode) as f:
        if adddic != None:
            colnames += adddic.keys()
        colnamestr = ','.join(colnames) + '\n'
        if freshfile:
            f.write(colnamestr)
        for row in table:
            if adddic != None:
                rowcopy = dict(row.items() + adddic.items())
            else:
                rowcopy = row
            rowstr = [ rowcopy[k] if rowcopy.has_key(k) else '' for k in colnames ]
            rowstr = [str(x) for x in rowstr]
            rowstr = ','.join(rowstr) + '\n'
            f.write(rowstr)
            f.flush()
            os.fsync(f.fileno())
Example #24
0
File: pymw.py Project: dMaggot/pymw
	def pymw_worker_write(output, options):
		import os
		outfile = open(sys.argv[2], 'wb')
		pickle.Pickler(outfile).dump(output)
		outfile.flush()
		os.fsync(outfile.fileno())
		outfile.close()
    def test00CropFail(self):
        # make the dirs
        cam = moduleUnderTest.cameras[0]
        indir = os.path.join(moduleUnderTest.root, "2013-07-01", cam.shortname)
        os.makedirs(os.path.join(indir, "hires"))

        # put a fragment of a test jpg in the indir
        tfn = "SampleImage.jpg"
        tfd = os.open(tfn, os.O_RDONLY | os.O_BINARY)
        buf = os.read(tfd, 8192)
        logging.info("test00CropFail(): buf size is %d" % len(buf))
        os.close(tfd)
        ifn = "12-00-01-12345.jpg"
        ifp = os.path.join(indir, ifn)
        infd = os.open(ifp, os.O_WRONLY | os.O_BINARY | os.O_CREAT)
        os.write(infd, buf)
        os.fsync(infd)
        os.close(infd)
        time.sleep(2)

        hfp = os.path.join(indir, "hires", ifn)

        # run processImage().
        # Since the mod time is recent, The file should stay in indir
        moduleUnderTest.processImage(indir, ifn, cam)
        assert os.path.exists(ifp) and not os.path.exists(hfp)

        # set the file's mod time back over an hour and run processImage().
        # This time the file should move to the hires dir
        os.utime(ifp, (int(time.time()), time.time() - 3602))
        moduleUnderTest.processImage(indir, ifn, cam)
        assert not os.path.exists(ifp) and os.path.exists(hfp)
Example #26
0
    def handle_status(self, data, **kwargs):
        if self.fp is None:
            return

        fp = self.fp
        minimum_severity = self.minimum_severity

        host_identifier = kwargs.get('host_identifier')
        created = dt.datetime.utcnow().isoformat()

        for item in data.get('data', []):
            if int(item['severity']) < minimum_severity:
                continue

            if 'created' in item:
                item['created'] = item['created'].isoformat()

            json_dump({
                '@version': 1,
                '@host_identifier': host_identifier,
                '@timestamp': item.get('created', created),
                '@message': item.get('message', ''),
                'log_type': 'status',
                'line': item.get('line', ''),
                'message': item.get('message', ''),
                'severity': item.get('severity', ''),
                'filename': item.get('filename', ''),
                'osquery_version': item.get('version'),  # be null
                'created': created,
            }, fp)
            fp.write('\r\n')
        else:
            fp.flush()
            os.fsync()
Example #27
0
File: pymw.py Project: dMaggot/pymw
	def pymw_master_write(self, output, loc):
		import os
		outfile = open(loc, 'wb')
		pickle.Pickler(outfile).dump(output)
		outfile.flush()
		os.fsync(outfile.fileno())
		outfile.close()
Example #28
0
    def input(self, **kwargs):
        options = dict(self.options)
        if self.infile is None:
            if "{infile}" in self.command:
                if self.filename is None:
                    self.infile = tempfile.NamedTemporaryFile(mode="w")
                    self.infile.write(self.content)
                    self.infile.flush()
                    os.fsync(self.infile)
                    options["infile"] = self.infile.name
                else:
                    self.infile = open(self.filename)
                    options["infile"] = self.filename

        if "{outfile}" in self.command and not "outfile" in options:
            ext = ".%s" % self.type and self.type or ""
            self.outfile = tempfile.NamedTemporaryFile(mode="r+", suffix=ext)
            options["outfile"] = self.outfile.name
        try:
            command = fstr(self.command).format(**options)
            proc = subprocess.Popen(
                command, shell=True, cwd=self.cwd, stdout=self.stdout, stdin=self.stdin, stderr=self.stderr
            )
            if self.infile is None:
                filtered, err = proc.communicate(self.content)
            else:
                filtered, err = proc.communicate()
        except (IOError, OSError), e:
            raise FilterError("Unable to apply %s (%r): %s" % (self.__class__.__name__, self.command, e))
Example #29
0
 def close(self, _defer=False):
     # _defer == True is how a stream can notify Archive that the stream is
     # now closed.  Calling it directly in not recommended.
     if _defer:
         # This call came from our open stream.
         self._stream = None
         if not self._defer_close:
             # We are not yet ready to close.
             return
     if self._stream is not None:
         # We have a stream open! don't close, but remember we were asked to.
         self._defer_close = True
         return
     self.denit()
     # If there is a file attached...
     if hasattr(self, 'f'):
         # Make sure it is not already closed...
         if getattr(self.f, 'closed', False):
             return
         # Flush it if not read-only...
         if self.f.mode != 'r' and self.f.mode != 'rb':
             self.f.flush()
             os.fsync(self.f.fileno())
         # and then close it, if we opened it...
         if getattr(self, '_close', None):
             self.f.close()
Example #30
0
def _fsync_files(filenames):
    """Call fsync() a list of file names

    The filenames should be absolute paths already.

    """
    touched_directories = set()

    mode = os.O_RDONLY

    # Windows
    if hasattr(os, 'O_BINARY'):
        mode |= os.O_BINARY

    for filename in filenames:
        fd = os.open(filename, mode)
        os.fsync(fd)
        os.close(fd)
        touched_directories.add(os.path.dirname(filename))

    # Some OSes also require us to fsync the directory where we've
    # created files or subdirectories.
    if hasattr(os, 'O_DIRECTORY'):
        for dirname in touched_directories:
            fd = os.open(dirname, os.O_RDONLY | os.O_DIRECTORY)
            os.fsync(fd)
            os.close(fd)
Example #31
0
    def migrate_lbryum_to_torba(path):
        if not os.path.exists(path):
            return None, None
        with open(path, 'r') as f:
            unmigrated_json = f.read()
            unmigrated = json.loads(unmigrated_json)
        # TODO: After several public releases of new torba based wallet, we can delete
        #       this lbryum->torba conversion code and require that users who still
        #       have old structured wallets install one of the earlier releases that
        #       still has the below conversion code.
        if 'master_public_keys' not in unmigrated:
            return None, None
        total = unmigrated.get('addr_history')
        receiving_addresses, change_addresses = set(), set()
        for _, unmigrated_account in unmigrated.get('accounts', {}).items():
            receiving_addresses.update(
                map(unhexlify, unmigrated_account.get('receiving', [])))
            change_addresses.update(
                map(unhexlify, unmigrated_account.get('change', [])))
        log.info(
            "Wallet migrator found %s receiving addresses and %s change addresses. %s in total on history.",
            len(receiving_addresses), len(change_addresses), len(total))

        migrated_json = json.dumps(
            {
                'version':
                1,
                'name':
                'My Wallet',
                'accounts':
                [{
                    'version': 1,
                    'name': 'Main Account',
                    'ledger': 'lbc_mainnet',
                    'encrypted': unmigrated['use_encryption'],
                    'seed': unmigrated['seed'],
                    'seed_version': unmigrated['seed_version'],
                    'private_key': unmigrated['master_private_keys']['x/'],
                    'public_key': unmigrated['master_public_keys']['x/'],
                    'certificates': unmigrated.get('claim_certificates', {}),
                    'address_generator': {
                        'name': 'deterministic-chain',
                        'receiving': {
                            'gap': 20,
                            'maximum_uses_per_address': 2
                        },
                        'change': {
                            'gap': 6,
                            'maximum_uses_per_address': 2
                        }
                    }
                }]
            },
            indent=4,
            sort_keys=True)
        mode = os.stat(path).st_mode
        i = 1
        backup_path_template = os.path.join(os.path.dirname(path),
                                            "old_lbryum_wallet") + "_%i"
        while os.path.isfile(backup_path_template % i):
            i += 1
        os.rename(path, backup_path_template % i)
        temp_path = "%s.tmp.%s" % (path, os.getpid())
        with open(temp_path, "w") as f:
            f.write(migrated_json)
            f.flush()
            os.fsync(f.fileno())
        os.rename(temp_path, path)
        os.chmod(path, mode)
        return receiving_addresses, change_addresses
Example #32
0
    def get_socket(self):
        if self.use_ssl:
            # Try with CA first, since they are preferred over self-signed certs
            # and are always accepted (even if a previous pinned self-signed
            # cert exists).
            cert_path = os.path.join(self.config_path, 'certs', sanitize_filename(self.host, replacement_text='_'))
            has_pinned_self_signed = os.path.exists(cert_path)
            s, give_up = self._get_socket_and_verify_ca_cert(suppress_errors=has_pinned_self_signed)
            if s:
                if has_pinned_self_signed:
                    # Delete pinned cert. They now have a valid CA-signed cert.
                    # This hopefully undoes the bug in previous EC versions that
                    # refused to consider CA-signed certs at all if the server
                    # ever had a self-signed cert in the past.
                    try:
                        os.remove(cert_path)
                        self.print_error("Server is now using a CA-signed certificate, deleted previous self-signed certificate:", cert_path)
                    except OSError:
                        pass
                return s
            elif give_up:
                # low-level error in _get_socket_and_verify_ca_cert, give up
                return
            # if we get here, certificate is not CA signed, so try the alternate
            # "pinned self-signed" method.
            if not has_pinned_self_signed:
                is_new = True
                # get server certificate. Do not use ssl.get_server_certificate
                # because it does not work with proxy
                s = self.get_simple_socket()
                if s is None:
                    return
                try:
                    context = self.get_ssl_context(cert_reqs=ssl.CERT_NONE, ca_certs=None)
                    s = context.wrap_socket(s)
                except ssl.SSLError as e:
                    self.print_error("SSL error retrieving SSL certificate:", e)
                    return
                except:
                    return

                dercert = s.getpeercert(True)
                s.close()
                cert = ssl.DER_cert_to_PEM_cert(dercert)
                # workaround android bug
                cert = re.sub("([^\n])-----END CERTIFICATE-----","\\1\n-----END CERTIFICATE-----",cert)
                temporary_path = cert_path + '.temp'
                util.assert_datadir_available(self.config_path)
                with open(temporary_path, "w", encoding='utf-8') as f:
                    f.write(cert)
                    f.flush()
                    os.fsync(f.fileno())
            else:
                is_new = False
                temporary_path = None

        s = self.get_simple_socket()
        if s is None:
            return

        if self.use_ssl:
            try:
                context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED,
                                               ca_certs=(temporary_path if is_new else cert_path))
                s = context.wrap_socket(s, do_handshake_on_connect=True)
            except socket.timeout:
                self.print_error('timeout')
                return
            except ssl.SSLError as e:
                self.print_error("SSL error:", e)
                if e.errno != 1:
                    return
                if is_new:
                    rej = cert_path + '.rej'
                    try:
                        if os.path.exists(rej):
                            os.unlink(rej)
                        os.rename(temporary_path, rej)
                    except OSError as e2:
                        self.print_error("Could not rename rejected certificate:", rej, repr(e2))
                else:
                    util.assert_datadir_available(self.config_path)
                    with open(cert_path, encoding='utf-8') as f:
                        cert = f.read()
                    try:
                        b = pem.dePem(cert, 'CERTIFICATE')
                        x = x509.X509(b)
                    except:
                        if util.is_verbose:
                            self.print_error("Error checking certificate, traceback follows")
                            traceback.print_exc(file=sys.stderr)
                        self.print_error("wrong certificate")
                        self.bad_certificate(self.server, cert_path)
                        return
                    try:
                        x.check_date()
                    except:
                        self.print_error("certificate has expired:", cert_path)
                        try:
                            os.unlink(cert_path)
                            self.print_error("Removed expired certificate:", cert_path)
                        except OSError as e2:
                            self.print_error("Could not remove expired certificate:", cert_path, repr(e2))
                        return
                    self.print_error("wrong certificate")
                    self.bad_certificate(self.server, cert_path)
                if e.errno == 104:
                    return
                return

            if is_new:
                self.print_error("saving certificate")
                os.rename(temporary_path, cert_path)

        return s
Example #33
0
while not done:

    action = weighted_choice(proportions)

    if action == 'r':
        before, after, queryTime, beforeClock, afterClock, queryClock, extra, lfnDict = doRead(
            readDepth)
    elif action == 'i':
        before, after, queryTime, beforeClock, afterClock, queryClock, extra = doInsert(
            writeDepth, maxInsert)
    elif action == 'd':
        before, after, queryTime, beforeClock, afterClock, queryClock, extra, lfnDict = doRead(
            readDepth)
        before, after, queryTime, beforeClock, afterClock, queryClock, extra = doRemove(
            lfnDict)

    timeFile.write("%s\t%s\t%s\t%s\n" % (before, after, queryTime, extra))
    timeFile.flush()
    os.fsync(timeFile)
    clockFile.write("%s\t%s\t%s\t%s\n" %
                    (beforeClock, afterClock, queryClock, extra))
    clockFile.flush()
    os.fsync(clockFile)

    if (time.time() - start) > maxDuration:
        done = True

timeFile.close()
clockFile.close()
Example #34
0
 def fsync(self, isfsyncfile):
     self._fflush()
     if isfsyncfile and hasattr(os, 'fdatasync'):
         os.fdatasync(self.fd)
     else:
         os.fsync(self.fd)
Example #35
0
    def acquire_lock(self):
        """
        Try to acquire the lock.

        :Parameters:
            #. result (boolean): Whether the lock is succesfully acquired.
            #. code (integer, Exception): Integer code indicating the reason how the
               lock was successfully set or unsuccessfully acquired. When setting the
               lock generates an error, this will be catched and returned in a message
               Exception code.

               *  0: Lock is successfully set for normal reasons, In this case result
                  is True.
               *  1: Lock was already set, no need to set it again. In this case result
                  is True.
               *  2: Old and forgotten lock is found and removed. New lock is
                  successfully set, In this case result is True.
               *  3: Lock was not successfully set before timeout. In this case result
                  is False.
               *  Exception: Lock was not successfully set because of an unexpected error.
                  The error is catched and returned in this Exception. In this case
                  result is False.
        """
        # set acquire flag
        code = 0
        acquired = False
        t0 = t1 = time.time()
        LP = self.__lockPass + '\n'
        bytesLP = LP.encode()
        # set general while loop with timeout condition
        while (t1 - t0) <= self.__timeout:
            # try to set acquired to True by reading an empty lock file
            try:
                while not acquired and (t1 - t0) <= self.__timeout:
                    if os.path.isfile(self.__lockPath):
                        with open(self.__lockPath, 'rb') as fd:
                            lock = fd.readlines()
                        # lock file is empty
                        if not len(lock):
                            acquired = True
                            break
                        # if it is already locked
                        if lock[0] == bytesLP:
                            code = 1
                            acquired = True
                            break
                        if t1 - float(lock[1]) > self.__deadLock:
                            acquired = True
                            code = 2
                            break
                        #print('locked ',(t1-t0), t0, t1, lock, self.__lockPath)
                        # wait a bit
                        if self.__wait:
                            time.sleep(self.__wait)
                        t1 = time.time()
                    else:
                        acquired = True
                        break
            except Exception as code:
                acquired = False
            # impossible to acquire because of an error or timeout.
            if not acquired:
                break
            # try to write lock
            try:
                tic = time.time()
                with open(self.__lockPath, 'wb') as fd:
                    #fd.write( str(LP+'%.6f'%t1).encode('utf-8') )
                    fd.write(str(LP + '%.6f' % t1).encode())
                    fd.flush()
                    os.fsync(fd.fileno())
                toc = time.time()
            except Exception as e:
                code = str(e)
                acquired = False
                break
            # sleep for double tic-toc or 0.1 ms which ever one is higher
            s = max([2 * (toc - tic), 0.0001])
            time.sleep(s)
            # check if lock is still acquired by the same lock pass
            with open(self.__lockPath, 'rb') as fd:
                lock = fd.readlines()
            if len(lock) >= 1:
                if lock[0] == bytesLP:
                    acquired = True
                    break
                else:
                    acquired = False
                    t1 = time.time()
                    continue
            else:
                acquired = False
                t1 = time.time()
                continue
        # return whether it is acquired or not
        if not acquired and not code:
            code = 3
        return acquired, code
Example #36
0
 def write(self, message):
     self.terminal.write(message)
     self.log.write(message)
     self.log.flush()
     os.fsync(self.log)
Example #37
0
import os
import time

flog = open(time.strftime("%Y%m%d%H%M%S", time.localtime()) + '.txt', 'a')

flog.write('aabbc' + '\n')
flog.flush()
os.fsync(flog.fileno())

flog.close()
Example #38
0
def sync(fd):
    fd.flush()
    os.fsync(fd.fileno())
resultFile.write("FLAGS.max_grad_norm, %.3f\n" %(FLAGS.max_grad_norm))
resultFile.write("initScale, %.3f\n" %(FLAGS.init_scale))
resultFile.write("numEpochsFullLR, %0.d\n" %(FLAGS.num_epochs_full_lr))
resultFile.write("numEpochs, %0.d\n" %(FLAGS.num_epochs))
resultFile.write("baseLearningRate, %.3f\n" %(FLAGS.learning_rate))
resultFile.write("lrDecay, %.3f\n" %(FLAGS.lr_decay))
resultFile.write("forgetBias, %.3f\n" %(FLAGS.forget_bias))

if (FLAGS.shuffle):
    resultFile.write("Shuffle\n")
else:
    resultFile.write("NoShuffle\n")

resultFile.write("\nDataset, VocabSize, TrainWords, ValidWords, Epoch, TrainPrecision, TrainPerplexity, TrainCrossEntropy, Epoch, ValidPrecision, ValidPerplexity, ValidCrossEntropy\n")
resultFile.flush()
os.fsync(resultFile.fileno())

def linear(inp, output_dim, scope_name=None, stddev=1.0, reuse_scope=False):
  norm = tf.random_normal_initializer(stddev=stddev, dtype=tf.float32)
  const = tf.constant_initializer(0.0, dtype=tf.float32)
  with tf.variable_scope(scope_name or 'G/linear') as scope:
    scope.set_regularizer(tf.contrib.layers.l2_regularizer(scale=FLAGS.reg_scale))
    if reuse_scope:
      scope.reuse_variables()
    #print('inp.get_shape(): {}'.format(inp.get_shape()))
    w = tf.get_variable('w', [inp.get_shape()[1], output_dim], initializer=norm, dtype=tf.float32)
    b = tf.get_variable('b', [output_dim], initializer=const, dtype=tf.float32)
  return tf.matmul(inp, w) + b

def plus(a,b):
    c = []
Example #40
0
 def flush(self):
     self.console.flush()
     if self.f is not None:
         self.f.flush()
         import os
         os.fsync(self.f.fileno())
Example #41
0
def fsync_dir(path):
    fd = os.open(path, os.O_RDONLY)
    try:
        os.fsync(fd)
    finally:
        os.close(fd)
Example #42
0
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
    '''
    Open the file pointed to by path with the specified mode. If any
    directories in path do not exist, they are created. Returns the
    opened file object and the path to the opened file object. This path is
    guaranteed to have the same case as the on disk path. For case insensitive
    filesystems, the returned path may be different from the passed in path.
    The returned path is always unicode and always an absolute path.

    If mode is None, then this function assumes that path points to a directory
    and return the path to the directory as the file object.

    mkdir_mode specifies the mode with which any missing directories in path
    are created.
    '''
    if isbytestring(path):
        path = path.decode(filesystem_encoding)

    path = os.path.abspath(path)

    sep = force_unicode(os.sep, 'ascii')

    if path.endswith(sep):
        path = path[:-1]
    if not path:
        raise ValueError('Path must not point to root')

    components = path.split(sep)
    if not components:
        raise ValueError('Invalid path: %r' % path)

    cpath = sep
    if iswindows:
        # Always upper case the drive letter and add a trailing slash so that
        # the first os.listdir works correctly
        cpath = components[0].upper() + sep

    bdir = path if mode is None else os.path.dirname(path)
    if not os.path.exists(bdir):
        os.makedirs(bdir, mkdir_mode)

    # Walk all the directories in path, putting the on disk case version of
    # the directory into cpath
    dirs = components[1:] if mode is None else components[1:-1]
    for comp in dirs:
        cdir = os.path.join(cpath, comp)
        cl = comp.lower()
        try:
            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
        except:
            # Dont have permission to do the listdir, assume the case is
            # correct as we have no way to check it.
            pass
        else:
            if len(candidates) == 1:
                cdir = os.path.join(cpath, candidates[0])
            # else: We are on a case sensitive file system so cdir must already
            # be correct
        cpath = cdir

    if mode is None:
        ans = fpath = cpath
    else:
        fname = components[-1]
        ans = lopen(os.path.join(cpath, fname), mode)
        # Ensure file and all its metadata is written to disk so that subsequent
        # listdir() has file name in it. I don't know if this is actually
        # necessary, but given the diversity of platforms, best to be safe.
        ans.flush()
        os.fsync(ans.fileno())

        cl = fname.lower()
        try:
            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
        except EnvironmentError:
            # The containing directory, somehow disappeared?
            candidates = []
        if len(candidates) == 1:
            fpath = os.path.join(cpath, candidates[0])
        else:
            # We are on a case sensitive filesystem
            fpath = os.path.join(cpath, fname)
    return ans, fpath
Example #43
0
def workerScript(jobStore,
                 config,
                 jobName,
                 jobStoreID,
                 redirectOutputToLogFile=True):
    """
    Worker process script, runs a job. 
    
    :param str jobName: The "job name" (a user friendly name) of the job to be run
    :param str jobStoreLocator: Specifies the job store to use
    :param str jobStoreID: The job store ID of the job to be run
    
    :return int: 1 if a job failed, or 0 if all jobs succeeded
    """

    configureRootLogger()
    setLogLevel(config.logLevel)

    ##########################################
    #Create the worker killer, if requested
    ##########################################

    logFileByteReportLimit = config.maxLogFileSize

    if config.badWorker > 0 and random.random() < config.badWorker:
        # We need to kill the process we are currently in, to simulate worker
        # failure. We don't want to just send SIGKILL, because we can't tell
        # that from a legitimate OOM on our CI runner. We're going to send
        # SIGUSR1 so our terminations are distinctive, and then SIGKILL if that
        # didn't stick. We definitely don't want to do this from *within* the
        # process we are trying to kill, so we fork off. TODO: We can still
        # leave the killing code running after the main Toil flow is done, but
        # since it's now in a process instead of a thread, the main Python
        # process won't wait around for its timeout to expire. I think this is
        # better than the old thread-based way where all of Toil would wait
        # around to be killed.

        killTarget = os.getpid()
        sleepTime = config.badWorkerFailInterval * random.random()
        if os.fork() == 0:
            # We are the child
            # Let the parent run some amount of time
            time.sleep(sleepTime)
            # Kill it gently
            os.kill(killTarget, signal.SIGUSR1)
            # Wait for that to stick
            time.sleep(0.01)
            try:
                # Kill it harder. Hope the PID hasn't already been reused.
                # If we succeeded the first time, this will OSError
                os.kill(killTarget, signal.SIGKILL)
            except OSError:
                pass
            # Exit without doing any of Toil's cleanup
            os._exit(0)

        # We don't need to reap the child. Either it kills us, or we finish
        # before it does. Either way, init will have to clean it up for us.

    ##########################################
    #Load the environment for the job
    ##########################################

    #First load the environment for the job.
    with jobStore.readSharedFileStream("environment.pickle") as fileHandle:
        environment = safeUnpickleFromStream(fileHandle)
    env_reject = {
        "TMPDIR", "TMP", "HOSTNAME", "HOSTTYPE", "HOME", "LOGNAME", "USER",
        "DISPLAY", "JAVA_HOME"
    }
    for i in environment:
        if i == "PATH":
            # Handle path specially. Sometimes e.g. leader may not include
            # /bin, but the Toil appliance needs it.
            if i in os.environ and os.environ[i] != '':
                # Use the provided PATH and then the local system's PATH
                os.environ[i] = environment[i] + ':' + os.environ[i]
            else:
                # Use the provided PATH only
                os.environ[i] = environment[i]
        elif i not in env_reject:
            os.environ[i] = environment[i]
    # sys.path is used by __import__ to find modules
    if "PYTHONPATH" in environment:
        for e in environment["PYTHONPATH"].split(':'):
            if e != '':
                sys.path.append(e)

    toilWorkflowDir = Toil.getLocalWorkflowDir(config.workflowID,
                                               config.workDir)

    ##########################################
    #Setup the temporary directories.
    ##########################################

    # Dir to put all this worker's temp files in.
    localWorkerTempDir = tempfile.mkdtemp(dir=toilWorkflowDir)
    os.chmod(localWorkerTempDir, 0o755)

    ##########################################
    #Setup the logging
    ##########################################

    #This is mildly tricky because we don't just want to
    #redirect stdout and stderr for this Python process; we want to redirect it
    #for this process and all children. Consequently, we can't just replace
    #sys.stdout and sys.stderr; we need to mess with the underlying OS-level
    #file descriptors. See <http://stackoverflow.com/a/11632982/402891>

    #When we start, standard input is file descriptor 0, standard output is
    #file descriptor 1, and standard error is file descriptor 2.

    # Do we even want to redirect output? Let the config make us not do it.
    redirectOutputToLogFile = redirectOutputToLogFile and not config.disableWorkerOutputCapture

    #What file do we want to point FDs 1 and 2 to?
    tempWorkerLogPath = os.path.join(localWorkerTempDir, "worker_log.txt")

    if redirectOutputToLogFile:
        # Announce that we are redirecting logging, and where it will now go.
        # This is important if we are trying to manually trace a faulty worker invocation.
        logger.info("Redirecting logging to %s", tempWorkerLogPath)
        sys.stdout.flush()
        sys.stderr.flush()

        # Save the original stdout and stderr (by opening new file descriptors
        # to the same files)
        origStdOut = os.dup(1)
        origStdErr = os.dup(2)

        # Open the file to send stdout/stderr to.
        logFh = os.open(tempWorkerLogPath,
                        os.O_WRONLY | os.O_CREAT | os.O_APPEND)

        # Replace standard output with a descriptor for the log file
        os.dup2(logFh, 1)

        # Replace standard error with a descriptor for the log file
        os.dup2(logFh, 2)

        # Since we only opened the file once, all the descriptors duped from
        # the original will share offset information, and won't clobber each
        # others' writes. See <http://stackoverflow.com/a/5284108/402891>. This
        # shouldn't matter, since O_APPEND seeks to the end of the file before
        # every write, but maybe there's something odd going on...

        # Close the descriptor we used to open the file
        os.close(logFh)

    debugging = logging.getLogger().isEnabledFor(logging.DEBUG)
    ##########################################
    #Worker log file trapped from here on in
    ##########################################

    jobAttemptFailed = False
    statsDict = MagicExpando()
    statsDict.jobs = []
    statsDict.workers.logsToMaster = []
    blockFn = lambda: True
    listOfJobs = [jobName]
    job = None
    try:

        #Put a message at the top of the log, just to make sure it's working.
        logger.info("---TOIL WORKER OUTPUT LOG---")
        sys.stdout.flush()

        logProcessContext(config)

        ##########################################
        #Connect to the deferred function system
        ##########################################
        deferredFunctionManager = DeferredFunctionManager(toilWorkflowDir)

        ##########################################
        #Load the JobDescription
        ##########################################

        jobDesc = jobStore.load(jobStoreID)
        listOfJobs[0] = str(jobDesc)
        logger.debug("Parsed job description")

        ##########################################
        #Cleanup from any earlier invocation of the job
        ##########################################

        if jobDesc.command == None:
            logger.debug("Job description has no body to run.")
            # Cleanup jobs already finished
            predicate = lambda jID: jobStore.exists(jID)
            jobDesc.filterSuccessors(predicate)
            jobDesc.filterServiceHosts(predicate)
            logger.debug(
                "Cleaned up any references to completed successor jobs")

        # This cleans the old log file which may
        # have been left if the job is being retried after a job failure.
        oldLogFile = jobDesc.logJobStoreFileID
        if oldLogFile != None:
            jobDesc.logJobStoreFileID = None
            jobStore.update(jobDesc)  #Update first, before deleting any files
            jobStore.deleteFile(oldLogFile)

        ##########################################
        # If a checkpoint exists, restart from the checkpoint
        ##########################################

        if isinstance(
                jobDesc,
                CheckpointJobDescription) and jobDesc.checkpoint is not None:
            # The job is a checkpoint, and is being restarted after previously completing
            logger.debug("Job is a checkpoint")
            # If the checkpoint still has extant successors or services, its
            # subtree didn't complete properly. We handle the restart of the
            # checkpoint here, removing its previous subtree.
            if next(jobDesc.successorsAndServiceHosts(), None) is not None:
                logger.debug("Checkpoint has failed; restoring")
                # Reduce the try count
                assert jobDesc.remainingTryCount >= 0
                jobDesc.remainingTryCount = max(0,
                                                jobDesc.remainingTryCount - 1)
                jobDesc.restartCheckpoint(jobStore)
            # Otherwise, the job and successors are done, and we can cleanup stuff we couldn't clean
            # because of the job being a checkpoint
            else:
                logger.debug(
                    "The checkpoint jobs seems to have completed okay, removing any checkpoint files to delete."
                )
                #Delete any remnant files
                list(
                    map(
                        jobStore.deleteFile,
                        list(
                            filter(jobStore.fileExists,
                                   jobDesc.checkpointFilesToDelete))))

        ##########################################
        #Setup the stats, if requested
        ##########################################

        if config.stats:
            startClock = getTotalCpuTime()

        startTime = time.time()
        while True:
            ##########################################
            #Run the job body, if there is one
            ##########################################

            logger.info("Working on job %s", jobDesc)

            if jobDesc.command is not None:
                assert jobDesc.command.startswith("_toil ")
                logger.debug("Got a command to run: %s" % jobDesc.command)
                # Load the job. It will use the same JobDescription we have been using.
                job = Job.loadJob(jobStore, jobDesc)
                if isinstance(jobDesc, CheckpointJobDescription):
                    # If it is a checkpoint job, save the command
                    jobDesc.checkpoint = jobDesc.command

                logger.info("Loaded body %s from description %s", job, jobDesc)

                # Create a fileStore object for the job
                fileStore = AbstractFileStore.createFileStore(
                    jobStore,
                    jobDesc,
                    localWorkerTempDir,
                    blockFn,
                    caching=not config.disableCaching)
                with job._executor(stats=statsDict if config.stats else None,
                                   fileStore=fileStore):
                    with deferredFunctionManager.open() as defer:
                        with fileStore.open(job):
                            # Get the next block function to wait on committing this job
                            blockFn = fileStore.waitForCommit

                            # Run the job, save new successors, and set up
                            # locally (but don't commit) successor
                            # relationships and job completion.
                            # Pass everything as name=value because Cactus
                            # likes to override _runner when it shouldn't and
                            # it needs some hope of finding the arguments it
                            # wants across multiple Toil versions. We also
                            # still pass a jobGraph argument to placate old
                            # versions of Cactus.
                            job._runner(jobGraph=None,
                                        jobStore=jobStore,
                                        fileStore=fileStore,
                                        defer=defer)

                # Accumulate messages from this job & any subsequent chained jobs
                statsDict.workers.logsToMaster += fileStore.loggingMessages

                logger.info("Completed body for %s", jobDesc)

            else:
                #The command may be none, in which case
                #the JobDescription is either a shell ready to be deleted or has
                #been scheduled after a failure to cleanup
                logger.debug("No user job to run, so finishing")
                break

            if AbstractFileStore._terminateEvent.isSet():
                raise RuntimeError("The termination flag is set")

            ##########################################
            #Establish if we can run another job within the worker
            ##########################################
            successor = nextChainable(jobDesc, jobStore, config)
            if successor is None or config.disableChaining:
                # Can't chain any more jobs. We are going to stop.

                logger.info("Not chaining from job %s", jobDesc)

                # TODO: Somehow the commit happens even if we don't start it here.

                break

            logger.info("Chaining from %s to %s", jobDesc, successor)

            ##########################################
            # We have a single successor job that is not a checkpoint job. We
            # reassign the ID of the current JobDescription to the successor.
            # We can then delete the successor JobDescription (under its old
            # ID) in the jobStore, as it is wholly incorporated into the
            # current one.
            ##########################################

            # Make sure nothing has gone wrong and we can really chain
            assert jobDesc.memory >= successor.memory
            assert jobDesc.cores >= successor.cores

            # Save the successor's original ID, so we can clean it (and its
            # body) up after we finish executing it.
            successorID = successor.jobStoreID

            # add the successor to the list of jobs run
            listOfJobs.append(str(successor))

            # Now we need to become that successor, under the original ID.
            successor.replace(jobDesc)
            jobDesc = successor

            # Problem: successor's job body is a file that will be cleaned up
            # when we delete the successor job by ID. We can't just move it. So
            # we need to roll up the deletion of the successor job by ID with
            # the deletion of the job ID we're currently working on.
            jobDesc.jobsToDelete.append(successorID)

            # Clone the now-current JobDescription (which used to be the successor).
            # TODO: Why??? Can we not?
            jobDesc = copy.deepcopy(jobDesc)

            # Build a fileStore to update the job and commit the replacement.
            # TODO: can we have a commit operation without an entire FileStore???
            fileStore = AbstractFileStore.createFileStore(
                jobStore,
                jobDesc,
                localWorkerTempDir,
                blockFn,
                caching=not config.disableCaching)

            # Update blockFn to wait for that commit operation.
            blockFn = fileStore.waitForCommit

            # This will update the job once the previous job is done updating
            fileStore.startCommit(jobState=True)

            # Clone the current job description again, so that further updates
            # to it (such as new successors being added when it runs) occur
            # after the commit process we just kicked off, and aren't committed
            # early or partially.
            jobDesc = copy.deepcopy(jobDesc)

            logger.debug("Starting the next job")

        ##########################################
        #Finish up the stats
        ##########################################
        if config.stats:
            totalCPUTime, totalMemoryUsage = getTotalCpuTimeAndMemoryUsage()
            statsDict.workers.time = str(time.time() - startTime)
            statsDict.workers.clock = str(totalCPUTime - startClock)
            statsDict.workers.memory = str(totalMemoryUsage)

        # log the worker log path here so that if the file is truncated the path can still be found
        if redirectOutputToLogFile:
            logger.info(
                "Worker log can be found at %s. Set --cleanWorkDir to retain this log",
                localWorkerTempDir)

        logger.info(
            "Finished running the chain of jobs on this node, we ran for a total of %f seconds",
            time.time() - startTime)

    ##########################################
    #Trapping where worker goes wrong
    ##########################################
    except:  #Case that something goes wrong in worker
        traceback.print_exc()
        logger.error("Exiting the worker because of a failed job on host %s",
                     socket.gethostname())
        AbstractFileStore._terminateEvent.set()

    ##########################################
    #Wait for the asynchronous chain of writes/updates to finish
    ##########################################

    blockFn()

    ##########################################
    #All the asynchronous worker/update threads must be finished now,
    #so safe to test if they completed okay
    ##########################################

    if AbstractFileStore._terminateEvent.isSet():
        # Something has gone wrong.

        # Clobber any garbage state we have for this job from failing with
        # whatever good state is still stored in the JobStore
        jobDesc = jobStore.load(jobStoreID)
        # Remember that we failed
        jobAttemptFailed = True

    ##########################################
    #Cleanup
    ##########################################

    # Close the worker logging
    # Flush at the Python level
    sys.stdout.flush()
    sys.stderr.flush()
    if redirectOutputToLogFile:
        # Flush at the OS level
        os.fsync(1)
        os.fsync(2)

        # Close redirected stdout and replace with the original standard output.
        os.dup2(origStdOut, 1)

        # Close redirected stderr and replace with the original standard error.
        os.dup2(origStdErr, 2)

        # sys.stdout and sys.stderr don't need to be modified at all. We don't
        # need to call redirectLoggerStreamHandlers since they still log to
        # sys.stderr

        # Close our extra handles to the original standard output and standard
        # error streams, so we don't leak file handles.
        os.close(origStdOut)
        os.close(origStdErr)

    # Now our file handles are in exactly the state they were in before.

    # Copy back the log file to the global dir, if needed.
    # Note that we work with bytes instead of characters so we can seek
    # relative to the end (since Python won't decode Unicode backward, or even
    # interpret seek offsets in characters for us). TODO: We may get invalid or
    # just different Unicode by breaking up a character at the boundary!
    if jobAttemptFailed and redirectOutputToLogFile:
        jobDesc.logJobStoreFileID = jobStore.getEmptyFileStoreID(
            jobDesc.jobStoreID, cleanup=True)
        jobDesc.chainedJobs = listOfJobs
        with jobStore.updateFileStream(jobDesc.logJobStoreFileID) as w:
            with open(tempWorkerLogPath, 'rb') as f:
                if os.path.getsize(
                        tempWorkerLogPath) > logFileByteReportLimit != 0:
                    if logFileByteReportLimit > 0:
                        f.seek(-logFileByteReportLimit,
                               2)  # seek to last tooBig bytes of file
                    elif logFileByteReportLimit < 0:
                        f.seek(logFileByteReportLimit,
                               0)  # seek to first tooBig bytes of file
                # Dump the possibly-invalid-Unicode bytes into the log file
                w.write(f.read())  # TODO load file using a buffer
        # Commit log file reference back to JobStore
        jobStore.update(jobDesc)

    elif ((debugging or (config.writeLogsFromAllJobs
                         and not jobName.startswith(CWL_INTERNAL_JOBS)))
          and redirectOutputToLogFile):  # write log messages
        with open(tempWorkerLogPath, 'rb') as logFile:
            if os.path.getsize(
                    tempWorkerLogPath) > logFileByteReportLimit != 0:
                if logFileByteReportLimit > 0:
                    logFile.seek(-logFileByteReportLimit,
                                 2)  # seek to last tooBig bytes of file
                elif logFileByteReportLimit < 0:
                    logFile.seek(logFileByteReportLimit,
                                 0)  # seek to first tooBig bytes of file
            # Make sure lines are Unicode so they can be JSON serialized as part of the dict.
            # We may have damaged the Unicode text by cutting it at an arbitrary byte so we drop bad characters.
            logMessages = [
                line.decode('utf-8', 'skip')
                for line in logFile.read().splitlines()
            ]
        statsDict.logs.names = listOfJobs
        statsDict.logs.messages = logMessages

    if (debugging or config.stats or statsDict.workers.logsToMaster
        ) and not jobAttemptFailed:  # We have stats/logging to report back
        jobStore.writeStatsAndLogging(
            json.dumps(statsDict, ensure_ascii=True).encode())

    #Remove the temp dir
    cleanUp = config.cleanWorkDir
    if cleanUp == 'always' or (cleanUp == 'onSuccess' and
                               not jobAttemptFailed) or (cleanUp == 'onError'
                                                         and jobAttemptFailed):
        shutil.rmtree(localWorkerTempDir)

    #This must happen after the log file is done with, else there is no place to put the log
    if (not jobAttemptFailed) and jobDesc.command == None and next(
            jobDesc.successorsAndServiceHosts(), None) is None:
        # We can now safely get rid of the JobDescription, and all jobs it chained up
        for otherID in jobDesc.jobsToDelete:
            jobStore.delete(otherID)
        jobStore.delete(jobDesc.jobStoreID)

    if jobAttemptFailed:
        return 1
    else:
        return 0
Example #44
0
    def __exit__(self, type, value, traceback):
        self._check_entered()

        try:
            # data_path refers to the externally used path to the params. It is a symlink.
            # old_data_path is the path currently pointed to by data_path.
            # tempdir_path is a path where the new params will go, which the new data path will point to.
            # new_data_path is a temporary symlink that will atomically overwrite data_path.
            #
            # The current situation is:
            #   data_path -> old_data_path
            # We're going to write params data to tempdir_path
            #   tempdir_path -> params data
            # Then point new_data_path to tempdir_path
            #   new_data_path -> tempdir_path
            # Then atomically overwrite data_path with new_data_path
            #   data_path -> tempdir_path
            old_data_path = None
            new_data_path = None
            tempdir_path = tempfile.mkdtemp(prefix=".tmp", dir=self._path)

            try:
                # Write back all keys.
                os.chmod(tempdir_path, 0o777)
                for k, v in self._vals.items():
                    with open(os.path.join(tempdir_path, k), "wb") as f:
                        f.write(v)
                        f.flush()
                        os.fsync(f.fileno())
                fsync_dir(tempdir_path)

                data_path = self._data_path()
                try:
                    old_data_path = os.path.join(self._path,
                                                 os.readlink(data_path))
                except (OSError, IOError):
                    # NOTE(mgraczyk): If other DB implementations have bugs, this could cause
                    #                 copies to be left behind, but we still want to overwrite.
                    pass

                new_data_path = "{}.link".format(tempdir_path)
                os.symlink(os.path.basename(tempdir_path), new_data_path)
                os.rename(new_data_path, data_path)
                fsync_dir(self._path)
            finally:
                # If the rename worked, we can delete the old data. Otherwise delete the new one.
                success = new_data_path is not None and os.path.exists(
                    data_path) and (os.readlink(data_path)
                                    == os.path.basename(tempdir_path))

                if success:
                    if old_data_path is not None:
                        shutil.rmtree(old_data_path)
                else:
                    shutil.rmtree(tempdir_path)

                # Regardless of what happened above, there should be no link at new_data_path.
                if new_data_path is not None and os.path.islink(new_data_path):
                    os.remove(new_data_path)
        finally:
            os.umask(self._prev_umask)
            self._prev_umask = None

            # Always release the lock.
            self._lock.release()
            self._lock = None
Example #45
0
				continue
			if "~TYPE~" in key:
				continue
			in "/name" in key:
				continue
			subkey = key[16:]
			val = str(logTable.getValue(str(subkey),"missing"))
			entryStr += str(subkey) + ":" + val + "|"
			keyDict[subkey] = 0
		entryStr = entryStr[:-1]
		entryStr += "\r\n"
		logFile.write(entryStr)
		#print "Wrote a line of data."
		if i >= loopsPerFlush:
			logFile.flush()
			os.fsync(logFile.fileno())
			#print "Flushed!"
			i = 0
		else:
			i += 1
	time.sleep(0.02)
        q += 1
	
#Put out one final line with all the keys seen
keyStr = "KEYLIST-"
for k in keyDict.keys():
	keyStr += k + "|"
keyStr = keyStr[:-1]
logFile.write(keyStr)
	
#If we get here, matchOver went True.  Time to shut down
Example #46
0
 def _save_pickle(self, filename):
     """Save sensors to pickle file."""
     with open(filename, 'wb') as file_handle:
         pickle.dump(self._sensors, file_handle, pickle.HIGHEST_PROTOCOL)
         file_handle.flush()
         os.fsync(file_handle.fileno())
	def closeAllFiles(self, file_list):
		for file in file_list:
			file.flush()
			os.fsync(file.fileno())
			file.close()
Example #48
0
 def flush(self):
     self.console.flush()
     if self.file is not None:
         self.file.flush()
         os.fsync(self.file.fileno())
Example #49
0
    def task_run(dockerfile, configuration, task):
        """
        Run test
        """
        # Check if current image is a toolimage (no daemon)
        is_toolimage = False
        for term in configuration.get('dockerTest.toolImages', {}):
            if term in dockerfile['image']['fullname']:
                is_toolimage = True

        # rspec spec file settings
        spec_path = configuration.get('dockerTest.serverspec.specPath'
                                      ) % dockerfile['image']['imageName']
        spec_abs_path = os.path.join(configuration.get('serverspecPath'),
                                     spec_path)

        # create dockerfile
        tmp_suffix = '.%s_%s_%s.tmp' % (dockerfile['image']['repository'],
                                        dockerfile['image']['imageName'],
                                        dockerfile['image']['tag'])
        tmp_suffix = tmp_suffix.replace('/', '_')
        test_dockerfile = tempfile.NamedTemporaryFile(
            prefix='Dockerfile.',
            suffix=tmp_suffix,
            dir=configuration.get('serverspecPath'),
            bufsize=0,
            delete=False)

        # serverspec conf
        serverspec_conf = DockerTestServerspecTaskLoader.generate_serverspec_configuration(
            path=os.path.basename(test_dockerfile.name),
            dockerfile=dockerfile,
            configuration=configuration,
            is_toolimage=is_toolimage)

        # serverspec options
        serverspec_opts = []
        serverspec_opts.extend([
            spec_path, dockerfile['image']['fullname'],
            base64.b64encode(json.dumps(serverspec_conf)),
            os.path.basename(test_dockerfile.name)
        ])

        # dockerfile content
        dockerfile_content = DockerTestServerspecTaskLoader.generate_dockerfile(
            dockerfile=dockerfile,
            configuration=configuration,
            is_toolimage=is_toolimage)

        # DryRun
        if configuration.get('dryRun'):
            if not os.path.isfile(spec_abs_path):
                print '                no tests found'

            print '         image: %s' % (dockerfile['image']['fullname'])
            print '          path: %s' % (spec_path)
            print '          args: %s' % (' '.join(serverspec_opts))
            print ''
            print 'spec configuration:'
            print '-------------------'
            print json.dumps(serverspec_conf, indent=4, sort_keys=True)
            print ''
            print 'Dockerfile:'
            print '-----------'
            print dockerfile_content
            return True

        # check if we have any tests
        if not os.path.isfile(spec_abs_path):
            print '         no tests defined (%s)' % (spec_path)
            return True

        # build rspec/serverspec command
        cmd = ['bash', 'serverspec.sh']
        cmd.extend(serverspec_opts)

        # create Dockerfile
        with open(test_dockerfile.name, mode='w', buffering=0) as f:
            f.write(dockerfile_content)
            f.flush()
            os.fsync(f.fileno())
            f.close()

        test_status = False
        for retry_count in range(0, configuration.get('retry')):
            try:
                test_status = Command.execute(
                    cmd, cwd=configuration.get('serverspecPath'))
            except Exception as e:
                print e
                pass

            if test_status:
                break
            elif retry_count < (configuration.get('retry') - 1):
                print '    failed, retrying... (try %s)' % (retry_count + 1)
            else:
                print '    failed, giving up'

        return test_status
Example #50
0
def flush(filename, exist_ok=False):
    if not exist_ok and not os.path.exists(filename):
        raise OSError('Path not exists')
    with io.open(filename) as fp:
        fp.flush()
        os.fsync(fp.fileno())
Example #51
0
 def write(self, pid):
     os.ftruncate(self.fd, 0)
     os.write(self.fd, b"%d" % pid)
     os.fsync(self.fd)
    def get_socket(self):
        if self.use_ssl:
            cert_path = os.path.join(self.config_path, 'certs', self.host)
            if not os.path.exists(cert_path):
                is_new = True
                s = self.get_simple_socket()
                if s is None:
                    # print_error('[get_socket]', 'get_simple_socket failed')
                    return

                # try with CA first
                if os.path.exists(ca_path):
                    try:
                        context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED, ca_certs=ca_path)
                        s = context.wrap_socket(s, do_handshake_on_connect=True)
                    except ssl.SSLError as e:
                        self.print_error('[get_socket] 1', e)
                        s = None
                    except Exception as e:
                        self.print_error('[get_socket] 2', e)
                        return
                try:
                    if s and self.check_host_name(s.getpeercert(), self.host):
                        self.print_error("SSL certificate signed by CA")
                        return s
                except Exception as e:
                    self.print_error('[get_socket] 2.5', e)

                # get server certificate.
                # Do not use ssl.get_server_certificate because it does not work with proxy
                s = self.get_simple_socket()
                if s is None:
                    self.print_error('[get_socket] 3')
                    return
                try:
                    context = self.get_ssl_context(cert_reqs=ssl.CERT_NONE, ca_certs=None)
                    s = context.wrap_socket(s)
                except ssl.SSLError as e:
                    self.print_error("SSL error retrieving SSL certificate:", e)
                    return
                except Exception as e:
                    self.print_error('[get_socket] 4', e)
                    return

                dercert = s.getpeercert(True)
                s.close()
                cert = ssl.DER_cert_to_PEM_cert(dercert)
                # workaround android bug
                cert = re.sub("([^\n])-----END CERTIFICATE-----", "\\1\n-----END CERTIFICATE-----", cert)
                temporary_path = cert_path + '.temp'
                with open(temporary_path,"w") as f:
                    f.write(cert)
                    f.flush()
                    os.fsync(f.fileno())
            else:
                is_new = False

        s = self.get_simple_socket()
        if s is None:
            return

        if self.use_ssl:
            try:
                context = self.get_ssl_context(cert_reqs=ssl.CERT_REQUIRED,
                                               ca_certs=(temporary_path if is_new else cert_path))
                s = context.wrap_socket(s, do_handshake_on_connect=True)
            except socket.timeout:
                self.print_error('timeout')
                return
            except ssl.SSLError as e:
                self.print_error("SSL error:", e)
                if e.errno != 1:
                    self.print_error('[get_socket] 6', e)
                    return
                if is_new:
                    rej = cert_path + '.rej'
                    if os.path.exists(rej):
                        os.unlink(rej)
                    os.rename(temporary_path, rej)
                else:
                    with open(cert_path) as f:
                        cert = f.read()
                    try:
                        b = pem.dePem(cert, 'CERTIFICATE')
                        x = x509.X509(b)
                    except:
                        traceback.print_exc(file=sys.stderr)
                        self.print_error("wrong certificate")
                        return
                    try:
                        x.check_date()
                    except:
                        self.print_error("certificate has expired:", cert_path)
                        os.unlink(cert_path)
                        return
                    self.print_error("wrong certificate")
                if e.errno == 104:
                    self.print_error('[get_socket] 7', e)
                    return
                return
            except BaseException as e:
                self.print_error('[get_socket] 8', e)
                traceback.print_exc(file=sys.stderr)
                return

            if is_new:
                self.print_error("saving certificate")
                os.rename(temporary_path, cert_path)

        return s
Example #53
0
 def _write_version_file(self, version: int) -> None:
     with open(os.path.join(self.path, VERSION_FILE), "wt") as tf:
         tf.write("%d" % version)
         tf.flush()
         os.fsync(tf.fileno())
Example #54
0
        if not found:
            del new_aliases[n]
aliases = new_aliases

# Rewrite aliases file without cycles or names not in the csrankings database.
with open('dblp-aliases.csv-x', mode='w') as outfile:
    sfieldnames = ['alias', 'name']
    swriter = csv.DictWriter(outfile, fieldnames=sfieldnames)
    swriter.writeheader()
    for n in collections.OrderedDict(
            sorted(aliases.items(), key=lambda t: t[0])):
        for a in aliases[n]:
            h = {'alias': a, 'name': n}
            swriter.writerow(h)
    outfile.flush()
    os.fsync(outfile.fileno())

os.rename('dblp-aliases.csv-x', 'dblp-aliases.csv')

# Add any missing aliases.
for name in aliases:
    if name in csrankings:
        # Make sure all aliases are there.
        for a in aliases[name]:
            # Add any missing aliases.
            if not a in csrankings:
                csrankings[a] = csrankings[name]
    else:
        # There might be a name that isn't there but an alias that IS. If so, add the name.
        for a in aliases[name]:
            if a in csrankings:
Example #55
0
def log(title, message='', write=False):
    if write:
        REQ_URLS_FILE.write(''.join([message, '\n']))
        REQ_URLS_FILE.flush()
        os.fsync(REQ_URLS_FILE.fileno())
    print(''.join([GREEN, title, WHIYE, message]))
Example #56
0
 def _flush(self, timeout):
     if self._file:
         self._file.flush()
         if self._open_args:
             os.fsync(self._file.fileno())
Example #57
0
    def vrrp_master(self, job, fobj, ifname, event):

        # vrrp does the "election" for us. If we've gotten this far
        # then the specified timeout for NOT receiving an advertisement
        # has elapsed. Setting the progress to ELECTING is to prevent
        # extensive API breakage with the platform indepedent failover plugin
        # as well as the front-end (webUI) even though the term is misleading
        # in this use case
        job.set_progress(None, description='ELECTING')

        fenced_error = None
        if event == 'forcetakeover':
            # reserve the disks forcefully ignoring if the other node has the disks
            logger.warning('Forcefully taking over as the MASTER node.')

            # need to stop fenced just in case it's running already
            self.run_call('failover.fenced.stop')

            logger.warning('Forcefully starting fenced')
            fenced_error = self.run_call('failover.fenced.start', True)
        else:
            # if we're here then we need to check a couple things before we start fenced
            # and start the process of becoming master
            #
            #   1. if the interface that we've received a MASTER event for is
            #       in a failover group with other interfaces and ANY of the
            #       other members in the failover group are still BACKUP,
            #       then we need to ignore the event.
            #
            #   TODO: Not sure how keepalived and laggs operate so need to test this
            #           (maybe the event only gets triggered if the lagg goes down)
            #
            status = self.run_call('failover.vip.check_failover_group', ifname,
                                   fobj['groups'])

            # this means that we received a master event and the interface was
            # in a failover group. And in that failover group, there were other
            # interfaces that were still in the BACKUP state which means the
            # other node has them as MASTER so ignore the event.
            if len(status[1]):
                logger.warning(
                    'Received MASTER event for "%s", but other '
                    'interfaces "%r" are still working on the '
                    'MASTER node. Ignoring event.',
                    ifname,
                    status[0],
                )

                job.set_progress(None, description='IGNORED')
                raise IgnoreFailoverEvent()

            logger.warning('Entering MASTER on "%s".', ifname)

            # need to stop fenced just in case it's running already
            self.run_call('failover.fenced.stop')

            logger.warning('Starting fenced')
            fenced_error = self.run_call('failover.fenced.start')

        # starting fenced daemon failed....which is bad
        # emit an error and exit
        if fenced_error != 0:
            if fenced_error == 1:
                logger.error('Failed to register keys on disks, exiting!')
            elif fenced_error == 2:
                logger.error('Fenced is running on the remote node, exiting!')
            elif fenced_error == 3:
                logger.error(
                    '10% or more of the disks failed to be reserved, exiting!')
            elif fenced_error == 5:
                logger.error(
                    'Fenced encountered an unexpected fatal error, exiting!')
            else:
                logger.error(
                    f'Fenced exited with code "{fenced_error}" which should never happen, exiting!'
                )

            job.set_progress(None, description='ERROR')
            raise FencedError()

        # remove the zpool cache files if necessary
        if os.path.exists(self.ZPOOL_KILLCACHE):
            for i in (self.ZPOOL_CACHE_FILE, self.ZPOOL_CACHE_FILE_SAVED):
                with contextlib.suppress(Exception):
                    os.unlink(i)

        # create the self.ZPOOL_KILLCACHE file
        else:
            with contextlib.suppress(Exception):
                with open(self.ZPOOL_KILLCACHE, 'w') as f:
                    f.flush()  # be sure it goes straight to disk
                    os.fsync(
                        f.fileno())  # be EXTRA sure it goes straight to disk

        # if we're here and the zpool "saved" cache file exists we need to check
        # if it's modify time is < the standard zpool cache file and if it is
        # we overwrite the zpool "saved" cache file with the standard one
        if os.path.exists(self.ZPOOL_CACHE_FILE_SAVED) and os.path.exists(
                self.ZPOOL_CACHE_FILE):
            zpool_cache_mtime = os.stat(self.ZPOOL_CACHE_FILE).st_mtime
            zpool_cache_saved_mtime = os.stat(
                self.ZPOOL_CACHE_FILE_SAVED).st_mtime
            if zpool_cache_mtime > zpool_cache_saved_mtime:
                with contextlib.suppress(Exception):
                    shutil.copy2(self.ZPOOL_CACHE_FILE,
                                 self.ZPOOL_CACHE_FILE_SAVED)

        # set the progress to IMPORTING
        job.set_progress(None, description='IMPORTING')

        failed = []
        for vol in fobj['volumes']:
            logger.info('Importing %s', vol['name'])

            # import the zpool(s)
            try:
                self.run_call('zfs.pool.import_pool', vol['guid'], {
                    'altroot': '/mnt',
                    'cachefile': self.ZPOOL_CACHE_FILE,
                })
            except Exception as e:
                vol['error'] = str(e)
                failed.append(vol)
                continue

            # try to unlock the zfs datasets (if any)
            unlock_job = self.run_call('failover.unlock_zfs_datasets',
                                       vol["name"])
            unlock_job.wait_sync()
            if unlock_job.error:
                logger.error(
                    f'Error unlocking ZFS encrypted datasets: {unlock_job.error}'
                )
            elif unlock_job.result['failed']:
                logger.error('Failed to unlock %s ZFS encrypted dataset(s)',
                             ','.join(unlock_job.result['failed']))

        # if we fail to import all zpools then alert the user because nothing
        # is going to work at this point
        if len(failed) == len(fobj['volumes']):
            for i in failed:
                logger.error(
                    'Failed to import volume with name "%s" with guid "%s" '
                    'with error "%s"',
                    failed['name'],
                    failed['guid'],
                    failed['error'],
                )

            logger.error('All volumes failed to import!')
            job.set_progress(None, description='ERROR')
            raise AllZpoolsFailedToImport()

        # if we fail to import any of the zpools then alert the user but continue the process
        elif len(failed):
            for i in failed:
                logger.error(
                    'Failed to import volume with name "%s" with guid "%s" '
                    'with error "%s"',
                    failed['name'],
                    failed['guid'],
                    failed['error'],
                )
                logger.error(
                    'However, other zpools imported so the failover process continued.'
                )

        logger.info('Volume imports complete.')

        # need to make sure failover status is updated in the middleware cache
        logger.info('Refreshing failover status')
        self.run_call('failover.status_refresh')

        # this enables all necessary services that have been enabled by the user
        logger.info('Enabling necessary services')
        self.run_call('etc.generate', 'rc')

        logger.info('Configuring system dataset')
        self.run_call('etc.generate', 'system_dataset')

        # Write the certs to disk based on what is written in db.
        logger.info('Configuring SSL')
        self.run_call('etc.generate', 'ssl')

        # Now we restart the appropriate services to ensure it's using correct certs.
        logger.info('Configuring HTTP')
        self.run_call('service.restart', 'http')

        # now we restart the services, prioritizing the "critical" services
        logger.info('Restarting critical services.')
        for i in self.CRITICAL_SERVICES:
            for j in fobj['services']:
                if i == j['srv_service'] and j['srv_enable']:
                    logger.info('Restarting critical service "%s"', i)
                    self.run_call('service.restart', i, self.HA_PROPAGATE)

        # TODO: look at nftables
        # logger.info('Allowing network traffic.')
        # run('/sbin/pfctl -d')

        logger.info('Critical portion of failover is now complete')

        # regenerate cron
        logger.info('Regenerating cron')
        self.run_call('etc.generate', 'cron')

        # sync disks is disabled on passive node
        logger.info('Syncing disks')
        self.run_call('disk.sync_all')

        # restart the remaining "non-critical" services
        logger.info('Restarting remaining services')

        # restart the non-critical services in the background
        self.run_call('failover.events.restart_background', fobj['services'])

        # TODO: jails don't exist on SCALE (yet)
        # self.run_call('jail.start_on_boot')
        self.run_call('vm.start_on_boot')

        logger.info('Initializing alert system')
        self.run_call('alert.block_failover_alerts')
        self.run_call('alert.initialize', False)

        kmip_config = self.run_call('kmip.config')
        if kmip_config and kmip_config['enabled']:
            logger.info('Syncing encryption keys with KMIP server')

            # Even though we keep keys in sync, it's best that we do this as well
            # to ensure that the system is up to date with the latest keys available
            # from KMIP. If it's unaccessible, the already synced memory keys are used
            # meanwhile.
            self.run_call('kmip.initialize_keys')

        logger.info('Failover event complete.')

        # clear the description and set the result
        job.set_progress(None, description='SUCCESS')

        self.FAILOVER_RESULT = 'SUCCESS'

        return self.FAILOVER_RESULT
Example #58
0
 def save(self, fname):
     fo = open(fname, "wb")
     pickle.dump(self, fo)
     os.fsync(fo)
     fo.close()
Example #59
0
 def fsync(self, path, datasync, fh):
     return os.fsync(fh)
Example #60
0
    def vrrp_backup(self, job, fobj, ifname, event):

        # we need to check a couple things before we stop fenced
        # and start the process of becoming backup
        #
        #   1. if the interface that we've received a BACKUP event for is
        #       in a failover group with other interfaces and ANY of the
        #       other members in the failover group are still MASTER,
        #       then we need to ignore the event.
        #
        #   TODO: Not sure how keepalived and laggs operate so need to test this
        #           (maybe the event only gets triggered if the lagg goes down)
        #
        status = self.run_call('failover.vip.check_failover_group', ifname,
                               fobj['groups'])

        # this means that we received a backup event and the interface was
        # in a failover group. And in that failover group, there were other
        # interfaces that were still in the MASTER state so ignore the event.
        if len(status[0]):
            logger.warning(
                'Received BACKUP event for "%s", but other '
                'interfaces "%r" are still working. '
                'Ignoring event.',
                ifname,
                status[1],
            )

            job.set_progress(None, description='IGNORED')
            raise IgnoreFailoverEvent()

        logger.warning('Entering BACKUP on "%s".', ifname)

        # we need to stop fenced first
        logger.warning('Stopping fenced')
        self.run_call('failover.fenced.stop')

        # restarting keepalived sends a priority 0 advertisement
        # which means any VIP that is on this controller will be
        # migrated to the other controller
        logger.info('Transitioning all VIPs off this node')
        self.run_call('service.restart', 'keepalived')

        # TODO: look at nftables
        # logger.info('Enabling firewall')
        # run('/sbin/pfctl -ef /etc/pf.conf.block')

        # ticket 23361 enabled a feature to send email alerts when an unclean reboot occurrs.
        # TrueNAS HA, by design, has a triggered unclean shutdown.
        # If a controller is demoted to standby, we set a 4 sec countdown using watchdog.
        # If the zpool(s) can't export within that timeframe, we use watchdog to violently reboot the controller.
        # When this occurrs, the customer gets an email about an "Unauthorized system reboot".
        # The idea for creating a new sentinel file for watchdog related panics,
        # is so that we can send an appropriate email alert.
        # So if we panic here, middleware will check for this file and send an appropriate email.
        # ticket 39114
        with contextlib.suppress(Exception):
            with open(self.WATCHDOG_ALERT_FILE, 'w') as f:
                f.write(int(time.time()))
                f.flush()  # be sure it goes straight to disk
                os.fsync(f.fileno())  # be EXTRA sure it goes straight to disk

        # export zpools in a thread and set a timeout to
        # to `self.ZPOOL_EXPORT_TIMEOUT`.
        # if we can't export the zpool(s) in this timeframe,
        # we send the 'b' character to the /proc/sysrq-trigger
        # to trigger an immediate reboot of the system
        # https://www.kernel.org/doc/html/latest/admin-guide/sysrq.html
        export_thread = threading.Thread(target=self._export_zpools,
                                         name='failover_export_zpools',
                                         args=(fobj['volumes']))
        export_thread.start()
        export_thread.join(timeout=self.ZPOOL_EXPORT_TIMEOUT)
        if export_thread.is_alive():
            # have to enable the "magic" sysrq triggers
            with open('/proc/sys/kernel/sysrq', 'w') as f:
                f.write('1')

            # now violently reboot
            with open('/proc/sysrq-trigger', 'w') as f:
                f.write('b')

        # We also remove this file here, because on boot we become BACKUP if the other
        # controller is MASTER. So this means we have no volumes to export which means
        # the `self.ZPOOL_EXPORT_TIMEOUT` is honored.
        with contextlib.suppress(Exception):
            os.unlink(self.WATCHDOG_ALERT_FILE)

        logger.info('Refreshing failover status')
        self.run_call('failover.status_refresh')

        logger.info('Restarting syslog-ng')
        self.run_call('service.restart', 'syslogd', self.HA_PROPAGATE)

        logger.info('Regenerating cron')
        self.run_call('etc.generate', 'cron')

        logger.info('Stopping smartd')
        self.run_call('service.stop', 'smartd', self.HA_PROPAGATE)

        logger.info('Stopping collectd')
        self.run_call('service.stop', 'collectd', self.HA_PROPAGATE)

        # we keep SSH running on both controllers (if it's enabled by user)
        for i in fobj['services']:
            if i['srv_service'] == 'ssh' and i['srv_enable']:
                logger.info('Restarting SSH')
                self.run_call('service.restart', 'ssh', self.HA_PROPAGATE)

        # TODO: ALUA on SCALE??
        # do something with iscsi service here

        logger.info('Syncing encryption keys from MASTER node (if any)')
        self.run_call('failover.call_remote',
                      'failover.sync_keys_to_remote_node')

        logger.info('Successfully became the BACKUP node.')
        self.FAILOVER_RESULT = 'SUCCESS'

        return self.FAILOVER_RESULT