def dump_to_file(entries, package, version_code, version_name): lock_acquired = False while not lock_acquired: try: filename = dest + LOCK_PREFIX lock = Lock(filename, lifetime=datetime.timedelta(seconds=6000)) # expires in 10 minutes if not lock.is_locked: lock.lock(timeout=datetime.timedelta(milliseconds=350)) lock_acquired = True with open(dest, 'a') as f: first = True if os.path.exists(dest) and os.path.getsize(dest) > 0: first = False for entry in entries: entry_dict = entry.__dict__ entry_dict['package'] = package entry_dict['versionCode'] = version_code entry_dict['versionName'] = version_name if first: first = False else: f.write(",\n") json.dump(entry_dict, f, indent=4) if lock.is_locked: lock.unlock() except (AlreadyLockedError, TimeOutError): # some other process is analyzing the file; go ahead and look for another file pass
def dump_strings(self, entries): lock_acquired = False while not lock_acquired: try: filename = self.strings_dest + LOCK_PREFIX lock = Lock(filename, lifetime=datetime.timedelta( seconds=6000)) # expires in 10 minutes if not lock.is_locked: lock.lock(timeout=datetime.timedelta(milliseconds=350)) lock_acquired = True with open(self.strings_dest, 'a') as f: first = True if os.path.exists( self.strings_dest) and os.path.getsize( self.strings_dest) > 0: first = False for entry in entries: entry_dict = entry.__dict__ if first: first = False else: f.write("\n") json.dump(entry_dict, f) if lock.is_locked: lock.unlock() except (AlreadyLockedError, TimeOutError): # some other process is analyzing the file; go ahead and look for another file pass
def acquire_lock_1(force, lock_file=None): """Try to acquire the master lock. :param force: Flag that controls whether to force acquisition of the lock. :type force: bool :param lock_file: Path to the lock file, otherwise `config.LOCK_FILE`. :type lock_file: str :return: The master lock. :raises: `TimeOutError` if the lock could not be acquired. """ if lock_file is None: lock_file = config.LOCK_FILE lock = Lock(lock_file, LOCK_LIFETIME) try: lock.lock(timedelta(seconds=0.1)) return lock except TimeOutError: if not force: raise # Force removal of lock first. hostname, pid, tempfile = lock.details os.unlink(lock_file) # Also remove any stale claim files. dname = os.path.dirname(lock_file) for fname in os.listdir(dname): fpath = os.path.join(dname, fname) if fpath.startswith(lock_file): os.unlink(fpath) return acquire_lock_1(force=False)
def get_next_apk(apks_dir): """ Gets the first available (not-locked) apk files and locks it :param apks_dir: directory to scan :return: a tuple containing the apk's path and the locked lock :rtype: (str, lockfile.LockFile) """ try: files = os.listdir(apks_dir) except FileNotFoundError: # folder doesn't exist return None, None for f in files: if not f.endswith(".apk"): continue f = os.path.join(apks_dir, f) try: # lock file should not exist filename = f + LOCK_PREFIX lock = Lock(filename, lifetime=datetime.timedelta( seconds=6000)) # expires in 10 minutes if not lock.is_locked: lock.lock(timeout=datetime.timedelta(milliseconds=350)) if os.path.isfile( f ): # the original file could be deleted in the meantime return f, lock if lock.is_locked: lock.unlock() except (AlreadyLockedError, TimeOutError): # some other process is analyzing the file; go ahead and look for another file pass return None, None
def load_json(path, require_exclusive=True, lock_path=None): """ Shortcut for loading json from a file path. :param path: The full path to the file :type: path: str :param require_exclusive: lock file for exclusive read :type require_exclusive: bool :param lock_path: path for the lock file to use :type lock_path: string :returns: loaded json :rtype: dict :raises: IOError, ValueError """ lock = None if require_exclusive: if not lock_path: lock_path = get_lock_path(path) lock = Lock(lock_path) lock.lock() try: with open(path) as f: return json.load(f) finally: if lock: lock.unlock(unconditionally=True)
def run_capsule(self, run_info, gpu_idx): lock_path = os.path.join(run_info['trial_dir'], 'lock') lock = Lock(lock_path, datetime.timedelta(days=365)) lock.lock(timeout=datetime.timedelta(seconds=1)) if not lock.is_locked: self.print_log('locking failed for', run_info['trial_id']) return None # run capsule env = {'CUDA_VISIBLE_DEVICES': str(gpu_idx), 'INFR_TRIAL': run_info['trial_id'], 'INFR_EXP_PATH': self.experiment_dir, 'INFR_MODE': self.mode, 'INFR_REDIRECT_IO': '1', 'INFR_START_STATE': os.path.join(run_info['trial_dir'], 'start_state.json')} if self.cuda_sync: env['CUDA_LAUNCH_BLOCKING'] = '1' proc = subprocess.Popen([sys.executable, '-m', run_info['start_state']['module_name']], env=env) self.print_log('started worker', proc.pid, 'for', run_info['trial_id'], self.mode) return {'trial_dir': run_info['trial_dir'], 'trial_id': run_info['trial_id'], 'start_at': time.time(), 'lock': lock, 'gpu_idx': gpu_idx, 'proc': proc, 'pid': proc.pid, 'ret_code': None}
class NFSFileLock(object): def __init__(self, filename): self.lock_obj = Lock(filename + '.lock', timedelta(days = 999)) def lock(self): while True: try: self.lock_obj.lock() break except OSError as error: if errno.ESTALE != error.errno: raise # Disowning the lock allows us to delete the flufl.lock.Lock object # without it calling the object destructor, which releases the lock. We # want to be able to delete the object but keep the lock in order to # store the lock inside a file. self.lock_obj.disown() def unlock(self): self.lock_obj.unlock() def is_locked(self): return self.lock_obj.is_locked
def test_acquire_lock_1_force(self): # Create the lock and lock it. my_lock = Lock(self.lock_file) my_lock.lock(timedelta(seconds=60)) # Try to aquire it again with force. lock = master.acquire_lock_1(True, self.lock_file) self.assertTrue(lock.is_locked) lock.unlock()
def setupTest(self, param, testDataName): # Lock to prevent race condition lock_file = "lift/lib/.locks/setup.lock" lock = Lock(lock_file) lock.lifetime = timedelta(days=2) lock.lock() # --------------------------------------------------------------------- # Base path # for dataset self.dataset = os.getenv('PROJ_DATA_DIR', '') if self.dataset == '': self.dataset = os.path.expanduser("~/Datasets") self.dataset += "/" + testDataName # for temp self.temp = os.getenv('PROJ_TEMP_DIR', '') if self.temp == '': self.temp = os.path.expanduser("~/Temp") self.temp += "/" + testDataName # for volatile temp self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '') if self.volatile_temp == '': self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp" self.volatile_temp += "/" + testDataName # --------------------------------------------------------------------- # Path for data loading self.train_data = None # block these as they should not be used self.train_mask = None # block these as they should not be used self.debug = self.dataset + "/debug/" + self.prefix_dataset(param) # --------------------------------------------------------------------- # Path for the model learning resdir = os.getenv('PROJ_RES_DIR', '') if resdir == '': resdir = os.path.expanduser("~/Results") self.result = (resdir + "/" + self.getResPrefix(param) + self.prefix_dataset(param) + self.prefix_patch(param) + self.prefix_learning(param)) # Check if the un-sorted prefix exists unsorted_hash_path = (resdir + "/" + self.getResPrefix(param, do_sort=False) + self.prefix_dataset(param, do_sort=False) + self.prefix_patch(param, do_sort=False) + self.prefix_learning(param, do_sort=False)) if os.path.exists(unsorted_hash_path): shutil.copytree(unsorted_hash_path, self.result) shutil.rmtree(unsorted_hash_path) lock.unlock()
def test_master_state(self): my_lock = Lock(self.lock_file) # Mailman is not running. state, lock = master.master_state(self.lock_file) self.assertEqual(state, master.WatcherState.none) # Acquire the lock as if another process had already started the # master. Use a timeout to avoid this test deadlocking. my_lock.lock(timedelta(seconds=60)) try: state, lock = master.master_state(self.lock_file) finally: my_lock.unlock() self.assertEqual(state, master.WatcherState.conflict)
def test_master_state(self): my_lock = Lock(self.lock_file) # Mailman is not running. state, lock = master.master_state(self.lock_file) self.assertEqual(state, master.WatcherState.none) # Acquire the lock as if another process had already started the # master. my_lock.lock() try: state, lock = master.master_state(self.lock_file) finally: my_lock.unlock() self.assertEqual(state, master.WatcherState.conflict)
def setupTrain(self, param, setID): # Lock to prevent race condition lock_file = "lift/lib/.locks/setup.lock" lock = Lock(lock_file) lock.lifetime = timedelta(days=2) # print(lock.is_locked) lock.lock() # --------------------------------------------------------------------- # Base path # for dataset self.dataset = os.getenv('PROJ_DATA_DIR', '') if self.dataset == '': self.dataset = os.path.expanduser("~/Datasets") self.dataset += "/" + param.dataset.trainSetList[setID] # for temp self.temp = os.getenv('PROJ_TEMP_DIR', '') if self.temp == '': self.temp = os.path.expanduser("~/Temp") self.temp += "/" + param.dataset.trainSetList[setID] # for volatile temp self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '') if self.volatile_temp == '': self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp" self.volatile_temp += "/" + param.dataset.trainSetList[setID] # --------------------------------------------------------------------- # Path for the model learning resdir = os.getenv('PROJ_RES_DIR', '') if resdir == '': resdir = os.path.expanduser("~/Results") self.result = (resdir + "/" + self.getResPrefix(param) + self.prefix_dataset(param) + self.prefix_patch(param) + self.prefix_learning(param)) if not os.path.exists(self.result): # Check if the un-sorted prefix exists unsorted_hash_path = (resdir + "/" + self.getResPrefix(param, do_sort=False) + self.prefix_dataset(param, do_sort=False) + self.prefix_patch(param, do_sort=False) + self.prefix_learning(param, do_sort=False)) if os.path.exists(unsorted_hash_path): shutil.copytree(unsorted_hash_path, self.result) shutil.rmtree(unsorted_hash_path) lock.unlock()
def archive_message(mlist, message): """See `IArchiver`. This archiver saves messages into a maildir. """ archive_dir = os.path.join(config.ARCHIVE_DIR, 'prototype') try: os.makedirs(archive_dir, 0o775) except OSError as error: # If this already exists, then we're fine if error.errno != errno.EEXIST: raise # Maildir will throw an error if the directories are partially created # (for instance the toplevel exists but cur, new, or tmp do not) # therefore we don't create the toplevel as we did above. list_dir = os.path.join(archive_dir, mlist.fqdn_listname) mailbox = Maildir(list_dir, create=True, factory=None) lock_file = os.path.join( config.LOCK_DIR, '{0}-maildir.lock'.format(mlist.fqdn_listname)) # Lock the maildir as Maildir.add() is not threadsafe. Don't use the # context manager because it's not an error if we can't acquire the # archiver lock. We'll just log the problem and continue. # # XXX 2012-03-14 BAW: When we extend the chain/pipeline architecture # to other runners, e.g. the archive runner, it would be better to let # any TimeOutError propagate up. That would cause the message to be # re-queued and tried again later, rather than being discarded as # happens now below. lock = Lock(lock_file) try: lock.lock(timeout=timedelta(seconds=1)) # Add the message to the maildir. The return value could be used # to construct the file path if necessary. E.g. # # os.path.join(archive_dir, mlist.fqdn_listname, 'new', # message_key) mailbox.add(message) except TimeOutError: # Log the error and go on. log.error('Unable to acquire prototype archiver lock for {0}, ' 'discarding: {1}'.format( mlist.fqdn_listname, message.get('message-id', 'n/a'))) finally: lock.unlock(unconditionally=True) # Can we get return the URL of the archived message? return None
def lock(self, timeout=None): while True: try: result = Lock.lock(self, timeout) except AlreadyLockedError, e: self._sleep() else: return result
def set_lock(check_lock_file): check_lock = None if os.name == "posix": check_lock = Lock(check_lock_file) check_lock.lifetime = timedelta(hours=1) frameinfo = getframeinfo(currentframe()) print("-- {}/{}: waiting to obtain lock --".format( frameinfo.filename, frameinfo.lineno)) check_lock.lock() print(">> obtained lock for posix system<<") elif os.name == "nt": import filelock check_lock = filelock.FileLock(check_lock_file) check_lock.timeout = 100 # 100s check_lock.acquire() if check_lock.is_locked: print(">> obtained lock for windows system <<") else: print("Unknown operating system, lock unavailable") return check_lock
class NFSLock: def __init__(self, path): # Specify the path to a file that will be used to synchronize the lock. # Per the flufl.lock documentation, use a file that does not exist. self._lock = Lock(path) # Locks have a lifetime (default 15 seconds) which is the period of time that the process expects # to keep the lock once it has been acquired. We set the lifetime to be 5 minutes as we expect # all operations that require locks to be completed within that time. self._lock.lifetime = timedelta(minutes=5) # Ensure multiple threads within a process run NFSLock operations one at a time. # We must acquire the reentrant lock before acquiring the flufl lock and only release after # the flufl lock is released. self._r_lock = threading.RLock() def acquire(self): self._r_lock.acquire() try: self._lock.lock() except AlreadyLockedError: # Safe to re-attempt to acquire a lock pass def release(self): try: self._lock.unlock() except NotLockedError: # Safe to re-attempt to release a lock pass self._r_lock.release() def __enter__(self): self.acquire() def __exit__(self, t, v, tb): self.release() @property def is_locked(self): return self._lock.is_locked
def acquire_lock_1(force, lock_file=None): """Try to acquire the master lock. :param force: Flag that controls whether to force acquisition of the lock. :type force: bool :param lock_file: Path to the lock file, otherwise `config.LOCK_FILE`. :type lock_file: str :return: The master lock. :raises: `TimeOutError` if the lock could not be acquired. """ if lock_file is None: lock_file = config.LOCK_FILE lock = Lock(lock_file, LOCK_LIFETIME) try: lock.lock(timedelta(seconds=0.1)) return lock except TimeOutError: if not force: raise # Force removal of lock first. lock.disown() hostname, pid, tempfile = lock.details os.unlink(lock_file) return acquire_lock_1(force=False)
def dmg_signfile(filename, keychain, signing_identity, code_resources, identifier, subject_ou, lockfile, fake=False, passphrase=None): """ Sign a mac .app folder """ from flufl.lock import Lock, TimeOutError, NotLockedError from datetime import timedelta import pexpect basename = os.path.basename(filename) dirname = os.path.dirname(filename) stdout = tempfile.TemporaryFile() sign_command = [ 'codesign', '-s', signing_identity, '-fv', '--keychain', keychain, '--resource-rules', code_resources, '--requirement', MAC_DESIGNATED_REQUIREMENTS % locals(), basename ] # pexpect requires a string as input unlock_command = 'security unlock-keychain ' + keychain lock_command = ['security', 'lock-keychain', keychain] try: sign_lock = None try: # Acquire a lock for the signing command, to ensure we don't have a # race condition where one process locks the keychain immediately after another # unlocks it. log.debug("Try to acquire %s", lockfile) sign_lock = Lock(lockfile) # Put a 30 second timeout on waiting for the lock. sign_lock.lock(timedelta(0, 30)) # Unlock the keychain so that we do not get a user-interaction prompt to use # the keychain for signing. This operation requires a password. child = pexpect.spawn(unlock_command) child.expect('password to unlock .*') child.sendline(passphrase) # read output until child exits child.read() child.close() if child.exitstatus != 0: raise ValueError("keychain unlock failed") # Execute the signing command check_call(sign_command, cwd=dirname, stdout=stdout, stderr=STDOUT) except TimeOutError, error: # timed out acquiring lock, give an error log.exception( "Timeout acquiring lock %s for codesign, is something broken? ", lockfile, error) raise except: # catch any other locking error log.exception( "Error acquiring %s for codesign, is something broken?", lockfile) raise finally: # Lock the keychain again, no matter what happens # This command does not require a password check_call(lock_command) # Release the lock, if it was acquired if sign_lock: try: sign_lock.unlock() log.debug("Release %s", lockfile) except NotLockedError: log.debug("%s was already unlocked", lockfile)
class TaskQueue(object): ''' The actual Task Queue object. See Module docs ''' def __init__(self, config_file): ''' initialise the task queue, from the config file ''' self.config = Config(config_file) self.lock = Lock(pathjoin(self.config.get('DIRS', 'db'), 'TaskQueue.lock')) self.db = DictLiteStore(pathjoin(self.config.get('DIRS', 'db'), 'TaskQueue.db'), 'Tasks') def __enter__(self): ''' start of with TaskQueue(...) as t: block ''' self.lock.lock() self.db.open() return self def __exit__(self, exptype, value, tb): ''' end of with ... block ''' self.db.close() self.lock.unlock() def tasks(self, group=None, state=None): q = [] if group: q.append(('group', 'LIKE', NoJSON('%"' + group + '"%'))) if state: q.append(('state', '==', state)) return self.db.get(*q) # pylint: disable=W0142 def active_groups(self): ''' return a list of all groups currently in the task list, and how many tasks they each are running ''' # grouplist looks like: # # dict[groupname] -> dict[state] -> count # so you can do awesome things. grouplist = defaultdict(lambda:defaultdict(lambda:0)) sql = u'SELECT Tasks."group", Tasks."state" From Tasks' try: rows = self.db.cur.execute(sql).fetchall() except OperationalError as err: # usually no such column, which means usually no rows. rowcount = self.db.cur.execute(u'SELECT Count(id) FROM Tasks') if rowcount.fetchone()[0] == 0: return grouplist else: raise err for rawgroups, rawstate in rows: groups = json.loads(rawgroups) state = json.loads(rawstate) if isinstance(groups, list): for g in groups: grouplist[g][state] += 1 else: grouplist[groups][state] += 1 return grouplist ###################################### # If for some reason it would be better to return dicts # rather than defaultdicts, then this is the code: # #to_return = {} #for groupname in grouplist: # to_return[groupname] = dict(grouplist[groupname]) #return to_return def grouplimit(self, groupname): ''' how many tasks can be run at the same time in this group? ''' return int(self.config.get(groupname, 'limit', 1)) def _getnexttask(self, group, new_state='running'): ''' get the next 'ready' task of this group. This should ONLY be called by self.getnexttask, not by end users. getnexttask checks that limits haven't been reached, etc. ''' try: task = self.tasks(group, 'ready')[0] if new_state: task['state'] = new_state self.db.update(task, False, ('uid', '==', task['uid'])) # Now we are going to start the task, import the defaults from # the group config: if self.config.config.has_section(group): for k, v in self.config.config.items(group): if not k in task: task[k] = v # and finally load defaults: if self.config.config.has_section('task_defaults'): for k, v in self.config.config.items('task_defaults'): if not k in task: task[k] = v return task except IndexError as err: import pdb; pdb.set_trace() raise NoAvailableTasks() def getnexttask(self, group=None, new_state='running'): ''' Get one available next task, as long as 'group' isn't overloaded. When the task is 'got', sets the state to new_state in the database. So this can be used as an atomic action on tasks. ''' if group: running_tasks = self.active_groups()[group]['running'] group_limit = self.grouplimit(group) if running_tasks < group_limit: return self._getnexttask(group, new_state) else: raise TooBusy() else: #no group specified. all_groups = self.active_groups() for groupname, grouptasks in all_groups.items(): # already at limit: if grouptasks['running'] >= self.grouplimit(groupname): continue # no ready tasks: if grouptasks['ready'] == 0: continue # we have a winner! (a group with available tasks) return self._getnexttask(groupname, new_state) # if there are no ready tasks at all, then raise that exception if all((g['ready'] == 0 for g in all_groups.values())): raise NoAvailableTasks() # otherwise, there are availible tasks, but we're too busy. raise TooBusy() def save(self, data): ''' add needed fields if they're not there, and then save to the database. If the same uuid is already there, then update it. ''' if 'state' not in data: data['state'] = 'ready' if not 'uid' in data: data['uid'] = uuid1().hex if not 'group' in data: data['group'] = 'none' # If output files are not absolute paths, then place them in the config # file specified logfile directory. if 'stdout' in data and not data['stdout'] == abspath(data['stdout']): data['stdout'] = abspath(pathjoin(self.config.get('DIRS', 'log'), data['stdout'])) if 'stderr' in data and not data['stderr'] == abspath(data['stderr']): data['stderr'] = abspath(pathjoin(self.config.get('DIRS', 'log'), data['stderr'])) # And save it to the database. self.db.update(data, True, ('uid', '==', data['uid'])) return data def get(self, uid): ''' get a task based of its uuid ''' return self.db.get(('uid', '==', uid))
def dmg_signfile(filename, keychain, signing_identity, code_resources, identifier, subject_ou, lockfile, fake=False, passphrase=None): """ Sign a mac .app folder """ from flufl.lock import Lock, TimeOutError, NotLockedError from datetime import timedelta import pexpect basename = os.path.basename(filename) dirname = os.path.dirname(filename) stdout = tempfile.TemporaryFile() sign_command = ['codesign', '-s', signing_identity, '-fv', '--keychain', keychain, '--resource-rules', code_resources, '--requirement', MAC_DESIGNATED_REQUIREMENTS % locals(), basename] # pexpect requires a string as input unlock_command = 'security unlock-keychain ' + keychain lock_command = ['security', 'lock-keychain', keychain] try: sign_lock = None try: # Acquire a lock for the signing command, to ensure we don't have a # race condition where one process locks the keychain immediately after another # unlocks it. log.debug("Try to acquire %s", lockfile) sign_lock = Lock(lockfile) # Put a 30 second timeout on waiting for the lock. sign_lock.lock(timedelta(0, 30)) # Unlock the keychain so that we do not get a user-interaction prompt to use # the keychain for signing. This operation requires a password. child = pexpect.spawn(unlock_command) child.expect('password to unlock .*') child.sendline(passphrase) # read output until child exits child.read() child.close() if child.exitstatus != 0: raise ValueError("keychain unlock failed") # Execute the signing command check_call(sign_command, cwd=dirname, stdout=stdout, stderr=STDOUT) except TimeOutError, error: # timed out acquiring lock, give an error log.exception("Timeout acquiring lock %s for codesign, is something broken? ", lockfile, error) raise except: # catch any other locking error log.exception("Error acquiring %s for codesign, is something broken?", lockfile) raise finally: # Lock the keychain again, no matter what happens # This command does not require a password check_call(lock_command) # Release the lock, if it was acquired if sign_lock: try: sign_lock.unlock() log.debug("Release %s", lockfile) except NotLockedError: log.debug("%s was already unlocked", lockfile)
class DictListFile: def __init__(self, filename, header="report('", footer="');"): self.filename = Path(filename) self.filename.parent.mkdir(parents=True, exist_ok=True) lockfilename = f"{filename}.lock" self.lock = Lock(str(lockfilename)) if isinstance(header, str): header = header.encode() self.header = header if isinstance(footer, str): footer = footer.encode() self.footer = footer self.dictlist = None self.is_dirty = None @classmethod @lru_cache(maxsize=128) def cached(cls, filename, **kwargs): return cls(filename, **kwargs) def __enter__(self): self.lock.lock() self.dictlist = [] self.is_dirty = False if self.filename.is_file(): with open(str(self.filename), "rb") as fp: bytesfromfile = fp.read() try: if self.header is not None: bytesfromfile = bytesfromfile[len(self.header):] if self.footer is not None: bytesfromfile = bytesfromfile[:-len(self.footer)] jsonstr = bytesfromfile.decode() jsonstr = jsonstr.replace("\\\n", "") self.dictlist = json.loads(jsonstr) except json.decoder.JSONDecodeError as e: logger.warning("JSONDecodeError %s", e) return self def __exit__(self, *args): if self.is_dirty: with open(str(self.filename), "w") as fp: fp.write(self.header.decode()) jsonstr = json.dumps(self.dictlist, indent=4, sort_keys=True, ensure_ascii=False) for line in jsonstr.splitlines(): fp.write(line) fp.write("\\\n") fp.write(self.footer.decode()) try: self.lock.unlock() except RuntimeError: pass self.dictlist = None def to_table(self): dictlist = [{str(k): str(v) for k, v in indict.items()} for indict in self.dictlist] dataframe = pd.DataFrame.from_records(dictlist) dataframe = dataframe.replace({np.nan: ""}) columnsinorder = [ entity for entity in reversed(entities) if entity in dataframe ] columnsinorder.extend( sorted([column for column in dataframe if column not in entities])) dataframe = dataframe[columnsinorder] table_str = tabulate(dataframe, headers="keys", showindex=False) table_filename = self.filename.parent / f"{self.filename.stem}.txt" with open(str(table_filename), "w") as fp: fp.write(table_str) fp.write("\n") def put(self, indict): assert self.dictlist is not None intags = {k: v for k, v in indict.items() if k in entities} matches = False for i, curdict in enumerate(self.dictlist): curtags = {k: v for k, v in curdict.items() if k in entities} if set(intags.items()) == set(curtags.items()): if set(indict.keys()) == set(curdict.keys()): if all(_compare(v, curdict[k]) for k, v in indict.items()): return # update not needed matches = True break self.is_dirty = True # will need to write out file if matches: self.dictlist[i].update(indict) logger.debug( f"Updating {self.filename} entry {curdict} with {indict}") else: self.dictlist.append(indict)
def dmg_signpackage(pkgfile, dstfile, keychain, mac_id, subject_ou, fake=False, passphrase=None): """ Sign a mac build, putting results into `dstfile`. pkgfile must be a tar, which gets unpacked, signed, and repacked. """ # Keep track of our output in a list here, and we can output everything # when we're done This is to avoid interleaving the output from # multiple processes. from flufl.lock import Lock, TimeOutError, NotLockedError from datetime import timedelta import pexpect # TODO: Is it even possible to do 'fake' signing? logs = [] logs.append("Repacking %s to %s" % (pkgfile, dstfile)) # pexpect requires a string as input unlock_command = 'security unlock-keychain ' + keychain lock_command = ['security', 'lock-keychain', keychain] lockfile = os.path.join(os.path.dirname(keychain), '.lock') tmpdir = tempfile.mkdtemp() try: # Unpack it logs.append("Unpacking %s to %s" % (pkgfile, tmpdir)) unpacktar(pkgfile, tmpdir) for macdir in os.listdir(tmpdir): macdir = os.path.join(tmpdir, macdir) log.debug('Checking if we should sign %s', macdir) if shouldSign(macdir, 'mac'): log.debug('Need to sign %s', macdir) try: sign_lock = None # Acquire a lock for the signing command, to ensure we don't have a # race condition where one process locks the keychain immediately after another # unlocks it. log.debug("Try to acquire %s", lockfile) sign_lock = Lock(lockfile) # Put a 30 second timeout on waiting for the lock. sign_lock.lock(timedelta(0, 30)) # Unlock the keychain so that we do not get a user-interaction prompt to use # the keychain for signing. This operation requires a password. child = pexpect.spawn(unlock_command) child.expect('password to unlock .*') child.sendline(passphrase) # read output until child exits child.read() child.close() if child.exitstatus != 0: raise ValueError("keychain unlock failed") # Sign the thing! dmg_signfile(macdir, keychain, mac_id, subject_ou, fake) except TimeOutError: # timed out acquiring lock, give an error log.exception( "Timeout acquiring lock %s for codesign, is something broken? ", lockfile) raise except: # catch any other locking error log.exception( "Error acquiring %s for codesign, is something broken?", lockfile) raise finally: # Lock the keychain again, no matter what happens # This command does not require a password check_call(lock_command) # Release the lock, if it was acquired if sign_lock: try: sign_lock.unlock() log.debug("Release %s", lockfile) except NotLockedError: log.debug("%s was already unlocked", lockfile) # Repack it logs.append("Packing %s" % dstfile) tar_dir(dstfile, tmpdir) except: log.exception("Error signing %s", pkgfile) raise finally: # Clean up after ourselves, and output our logs shutil.rmtree(tmpdir) log.info("\n ".join(logs))
def setupTrain(self, param, dataset_in): # Lock to prevent race condition if not os.path.exists(".locks"): os.makedirs(".locks") lock_file = ".locks/setup.lock" if os.name == "posix": lock = Lock(lock_file) lock.lifetime = timedelta(days=2) frameinfo = getframeinfo(currentframe()) print(">> {}/{}: waiting to obtain lock <<".format( frameinfo.filename, frameinfo.lineno)) lock.lock() print(">> obtained lock for posix system <<") elif os.name == "nt": import filelock lock = filelock.FileLock(lock_file) lock.acquire() if lock.is_locked: print(">> obtained lock for windows system <<") else: print("Unknown operating system, lock unavailable") # --------------------------------------------------------------------- # Base path # for dataset # self.dataset = os.getenv('PROJ_DATA_DIR', '') # if self.dataset == '': # self.dataset = os.path.expanduser("~/Datasets") # self.dataset += "/" + str(dataset_in) self.dataset = os.path.join(param.data_dir, str(dataset_in)).rstrip("/") + "/" # for temp # self.temp = os.getenv('PROJ_TEMP_DIR', '') # if self.temp == '': # self.temp = os.path.expanduser("~/Temp") # self.temp += "/" + str(dataset_in) self.temp = os.path.join(param.temp_dir, str(dataset_in)).rstrip("/") + "/" # for volatile temp # self.volatile_temp = os.getenv('PROJ_VOLTEMP_DIR', '') # if self.volatile_temp == '': # self.volatile_temp = "/scratch/" + os.getenv('USER') + "/Temp" # self.volatile_temp += "/" + str(dataset_in) self.volatile_temp = os.path.join(param.scratch_dir, str(dataset_in)).rstrip("/") + "/" # self.negdata = os.path.expanduser("~/Datasets/NegData/") # LEGACY # # create dump directory if it does not exist # if not os.path.exists(self.temp): # os.makedirs(self.temp) # --------------------------------------------------------------------- # Path for data loading # path for synthetic data generation self.train_data = self.dataset + "train/" + self.prefix_dataset(param) self.train_mask = (self.dataset + "train/" + self.prefix_dataset(param) + "masks/") if not os.path.exists(self.train_data): # Check if the un-sorted prefix exists unsorted_hash_path = (self.dataset + "train/" + self.prefix_dataset(param, do_sort=False)) if os.path.exists(unsorted_hash_path): os.symlink(unsorted_hash_path.rstrip("/"), self.train_data.rstrip("/")) # shutil.copytree(unsorted_hash_path, self.train_data) # shutil.rmtree(unsorted_hash_path) # dump folder for dataset selection (sampling and etc) self.train_dump = self.temp + "train/" + self.prefix_dataset(param) if not os.path.exists(self.train_dump): # Check if the un-sorted prefix exists unsorted_hash_path = (self.temp + "train/" + self.prefix_dataset(param, do_sort=False)) if os.path.exists(unsorted_hash_path): os.symlink(unsorted_hash_path.rstrip("/"), self.train_dump.rstrip("/")) # shutil.copytree(unsorted_hash_path, self.train_dump) # shutil.rmtree(unsorted_hash_path) # dump folder for patch extraction (if necessary) self.patch_dump = (self.temp + "train/" + self.prefix_dataset(param) + self.prefix_patch(param)) if not os.path.exists(self.patch_dump): # Check if the un-sorted prefix exists unsorted_hash_path = (self.temp + "train/" + self.prefix_dataset(param, do_sort=False) + self.prefix_patch(param, do_sort=False)) if os.path.exists(unsorted_hash_path): os.symlink(unsorted_hash_path.rstrip("/"), self.patch_dump.rstrip("/")) # shutil.copytree(unsorted_hash_path, self.patch_dump) # shutil.rmtree(unsorted_hash_path) # volatile dump folder for patch extraction (if necessary) self.volatile_patch_dump = (self.volatile_temp + "train/" + self.prefix_dataset(param) + self.prefix_patch(param)) # if not os.path.exists(self.volatile_patch_dump): # # Check if the un-sorted prefix exists # unsorted_hash_path = (self.volatile_temp + "train/" + # self.prefix_dataset(param, do_sort=False) + # self.prefix_patch(param, do_sort=False)) # os.symlink(unsorted_hash_path.rstrip("/"), # self.volatile_patch_dump.rstrip("/")) # # shutil.copytree(unsorted_hash_path, self.volatile_patch_dump) # # shutil.rmtree(unsorted_hash_path) # debug info folder self.debug = self.dataset + "debug/" + self.prefix_dataset(param) if not os.path.exists(self.debug): # Check if the un-sorted prefix exists unsorted_hash_path = (self.dataset + "debug/" + self.prefix_dataset(param, do_sort=False)) if os.path.exists(unsorted_hash_path): shutil.copytree(unsorted_hash_path, self.debug) shutil.rmtree(unsorted_hash_path) # # --------------------------------------------------------------------- # # Path for the model learning # resdir = os.getenv('PROJ_RES_DIR', '') # if resdir == '': # resdir = os.path.expanduser("~/Results") # self.result = (resdir + "/" + # self.getResPrefix(param) + # self.prefix_dataset(param) + # self.prefix_patch(param) + # self.prefix_learning(param)) # if not os.path.exists(self.result): # # Check if the un-sorted prefix exists # unsorted_hash_path = (resdir + "/" + # self.getResPrefix(param, do_sort=False) + # self.prefix_dataset(param, do_sort=False) + # self.prefix_patch(param, do_sort=False) + # self.prefix_learning(param, do_sort=False)) # if os.path.exists(unsorted_hash_path): # shutil.copytree(unsorted_hash_path, self.result) # shutil.rmtree(unsorted_hash_path) # # create result directory if it does not exist # if not os.path.exists(self.result): # os.makedirs(self.result) if os.name == "posix": lock.unlock() elif os.name == "nt": lock.release() else: pass
def dmg_signpackage(pkgfile, dstfile, keychain, mac_id, subject_ou, fake=False, passphrase=None): """ Sign a mac build, putting results into `dstfile`. pkgfile must be a tar, which gets unpacked, signed, and repacked. """ # Keep track of our output in a list here, and we can output everything # when we're done This is to avoid interleaving the output from # multiple processes. from flufl.lock import Lock, TimeOutError, NotLockedError from datetime import timedelta import pexpect # TODO: Is it even possible to do 'fake' signing? logs = [] logs.append("Repacking %s to %s" % (pkgfile, dstfile)) # pexpect requires a string as input unlock_command = 'security unlock-keychain ' + keychain lock_command = ['security', 'lock-keychain', keychain] lockfile = os.path.join(os.path.dirname(keychain), '.lock') tmpdir = tempfile.mkdtemp() try: # Unpack it logs.append("Unpacking %s to %s" % (pkgfile, tmpdir)) unpacktar(pkgfile, tmpdir) for macdir in os.listdir(tmpdir): macdir = os.path.join(tmpdir, macdir) log.debug('Checking if we should sign %s', macdir) if shouldSign(macdir, 'mac'): log.debug('Need to sign %s', macdir) try: sign_lock = None # Acquire a lock for the signing command, to ensure we don't have a # race condition where one process locks the keychain immediately after another # unlocks it. log.debug("Try to acquire %s", lockfile) sign_lock = Lock(lockfile) # Put a 30 second timeout on waiting for the lock. sign_lock.lock(timedelta(0, 30)) # Unlock the keychain so that we do not get a user-interaction prompt to use # the keychain for signing. This operation requires a password. child = pexpect.spawn(unlock_command) child.expect('password to unlock .*') child.sendline(passphrase) # read output until child exits child.read() child.close() if child.exitstatus != 0: raise ValueError("keychain unlock failed") # Sign the thing! dmg_signfile(macdir, keychain, mac_id, subject_ou, fake) except TimeOutError: # timed out acquiring lock, give an error log.exception("Timeout acquiring lock %s for codesign, is something broken? ", lockfile) raise except: # catch any other locking error log.exception("Error acquiring %s for codesign, is something broken?", lockfile) raise finally: # Lock the keychain again, no matter what happens # This command does not require a password check_call(lock_command) # Release the lock, if it was acquired if sign_lock: try: sign_lock.unlock() log.debug("Release %s", lockfile) except NotLockedError: log.debug("%s was already unlocked", lockfile) # Repack it logs.append("Packing %s" % dstfile) tar_dir(dstfile, tmpdir) except: log.exception("Error signing %s", pkgfile) raise finally: # Clean up after ourselves, and output our logs shutil.rmtree(tmpdir) log.info("\n ".join(logs))
def load_data_for_set(self, pathconf, param, mode): # ---------------------------------------------------------------------- # Train, Validation, and Test # mlab = matlab.engine.start_matlab() # Read from pathconf # Original implementation train_data_dir = os.path.normpath(pathconf.dataset) dump_data_dir = os.path.normpath(pathconf.train_dump) dump_patch_dir = os.path.normpath(pathconf.patch_dump) # local (or volatile) copy of the dump data tmp_patch_dir = os.path.normpath(pathconf.volatile_patch_dump) # print("train_data_dir = {}".format(train_data_dir)) # print("dump_data_dir = {}".format(dump_data_dir)) # print("dump_patch_dir = {}".format(dump_patch_dir)) # print("tmp_patch_dir = {}".format(tmp_patch_dir)) if not os.path.exists(dump_data_dir): os.makedirs(dump_data_dir) if not os.path.exists(dump_patch_dir): os.makedirs(dump_patch_dir) if not os.path.exists(tmp_patch_dir): os.makedirs(tmp_patch_dir) # Check if we have the big h5 file ready big_file_name = dump_patch_dir + mode + "-data-chunked.h5" # if os.getenv("MLTEST_DEBUG", default=""): # import pdb # pdb.set_trace() # Mutex lock # # We will create an nfs-safe lock file in a temporary directory to # prevent our script from using corrupted, or data that is still being # generated. This allows us to launch multiple instances at the same # time, and allow only a single instance to generate the big_file. if not os.path.exists(".locks"): os.makedirs(".locks") check_lock_file = ".locks/" + \ hashlib.md5(big_file_name.encode()).hexdigest() if os.name == "posix": check_lock = Lock(check_lock_file) check_lock.lifetime = timedelta(days=2) frameinfo = getframeinfo(currentframe()) print("-- {}/{}: waiting to obtain lock --".format( frameinfo.filename, frameinfo.lineno)) check_lock.lock() print(">> obtained lock for posix system<<") elif os.name == "nt": import filelock check_lock = filelock.FileLock(check_lock_file) check_lock.timeout = 2000 check_lock.acquire() if check_lock.is_locked: print(">> obtained lock for windows system <<") else: print("Unknown operating system, lock unavailable") # if the large training data file does not exist if not os.path.exists(big_file_name): print("big data file does not exist...") # if the patch-mode-data file does not exist if not os.path.exists( os.path.join(dump_patch_dir, mode + "-data.h5")): print("{0} does not exist...".format( os.path.join(dump_patch_dir, mode + "-data.h5"))) # Read scale histogram hist_file_path = train_data_dir + "scales-histogram-minsc-" + str( param.dataset.fMinKpSize) + ".h5" if not os.path.exists(hist_file_path): print("Hist file does not exist, creating...") get_scale_hist(train_data_dir, param) # print("Loading hist file...") hist_file = h5py.File(hist_file_path, "r") scale_hist = np.asarray(hist_file["histogram_bins"], dtype=float).flatten() # print(scale_hist) scale_hist /= np.sum(scale_hist) scale_hist_c = np.asarray( hist_file["histogram_centers"]).flatten() # Read list of images from split files split_name = "" split_name += str(param.dataset.nTrainPercent) + "-" split_name += str(param.dataset.nValidPercent) + "-" split_name += str(param.dataset.nTestPercent) + "-" if mode == "train": # split_name += "train-" split_name += "train" elif mode == "valid": # split_name += "val-" split_name += "val" elif mode == "test": # split_name += "test-" split_name += "test" print("split_name: {}".format(split_name)) # split_file_name = train_data_dir + "split-" \ # + split_name + "minsc-" \ # + str(param.dataset.fMinKpSize) + ".h.txt" # split_file_name = "split-" + split_name + "minsc-" + str(param.dataset.fMinKpSize) + ".h.txt" split_file_name = "split-" + split_name + ".txt" split_file_name = train_data_dir + split_file_name # split_file_name = os.path.join(train_data_dir, split_file_name) print("split_file_name: {}".format(split_file_name)) if not os.path.exists(split_file_name): print("split_file_name does not exist...") list_jpg_file = get_list_of_img(train_data_dir, dump_data_dir, param, mode) else: print("split_file_name exists...") list_jpg_file = [] for file_name in list( np.loadtxt(split_file_name, dtype=bytes)): list_jpg_file += [ file_name.decode("utf-8").replace( "-kp-minsc-" + str(param.dataset.fMinKpSize), ".jpg") ] # ------------------------------------------------- # Create dumps in parallel # I am lazy so create arguments in loop lol pool_arg = [None] * len(list_jpg_file) for idx_jpg in six.moves.xrange(len(list_jpg_file)): pool_arg[idx_jpg] = (idx_jpg, list_jpg_file[idx_jpg], train_data_dir, dump_data_dir, tmp_patch_dir, scale_hist, scale_hist_c, self.out_dim, param) # # if true, use multi thread, otherwise use only single thread prod = True if prod: number_of_process = int(ratio_CPU * mp.cpu_count()) pool = mp.Pool(processes=number_of_process) manager = mp.Manager() queue = manager.Queue() for idx_jpg in six.moves.xrange(len(list_jpg_file)): pool_arg[idx_jpg] = pool_arg[idx_jpg] + (queue, ) # map async pool_res = pool.map_async(createDump, pool_arg) # pool_res = pool.map_async(createDump, pool_arg, chunksize = int(len(list_jpg_file)/(number_of_process* mp.cpu_count()))) # monitor loop while True: if pool_res.ready(): print("Pool_res ready?") break else: size = queue.qsize() print("\r -- " + mode + ": Processing image {}/{}".format( size, len(list_jpg_file)), end="") # print(list_jpg_file[size]) sys.stdout.flush() time.sleep(1) pool.close() pool.join() print("\r -- " + mode + ": Finished Processing Images!") # for debugging, if multi thread is used, then it is difficult # to debug else: for idx_jpg in six.moves.xrange(len(list_jpg_file)): pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None, ) for idx_jpg in six.moves.xrange(len(list_jpg_file)): createDump(pool_arg[idx_jpg]) print("\r -- " + mode + ": Processing image " "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), end="") sys.stdout.flush() print("\r -- " + mode + ": Finished Processing Images!") # ------------------------------------------------- # # -------------------- # use single thread for simplify debugging # for idx_jpg in six.moves.xrange(len(list_jpg_file)): # pool_arg[idx_jpg] = pool_arg[idx_jpg] + (None,) # for idx_jpg in six.moves.xrange(len(list_jpg_file)): # createDump(pool_arg[idx_jpg]) # print("\r -- " + mode + ": Processing image " # "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), # end="") # sys.stdout.flush() # print("\r -- " + mode + ": Finished Processing Images!") # ------------------------------------------------------------------ # Use only valid indices to ascertain mutual exclusiveness id_file_name = train_data_dir + "split-" id_file_name += str(param.dataset.nTrainPercent) + "-" id_file_name += str(param.dataset.nValidPercent) + "-" id_file_name += str(param.dataset.nTestPercent) + "-" id_file_name += ("minsc-" + str(param.dataset.fMinKpSize) + ".h5") if mode == "train": id_key = "indices_train" elif mode == "valid": id_key = "indices_val" elif mode == "test": id_key = "indices_test" # print(id_file_name) try: with h5py.File(id_file_name, "r") as id_file: id_2_keep = np.asarray(id_file[id_key]) except OSError as err: print(err) print("Creating idx file...") # if "unable to open file" in err: createsplitindexh5file(id_file_name, train_data_dir, param) with h5py.File(id_file_name, "r") as id_file: id_2_keep = np.asarray(id_file[id_key]) # print(id_2_keep) print("{0} has {1} sfmid points to keep...".format( id_key, len(id_2_keep))) # exit() # ind_2_keep = np.in1d(dataset[2], id_2_keep) # ind_2_keep += dataset[2] < 0 # loop through files to figure out how many valid items we have # pdb.set_trace() # for tracking of the dataset num_valid = 0 # print(len(list_jpg_file)) # exit() for idx_jpg in six.moves.xrange(len(list_jpg_file)): jpg_file = list_jpg_file[idx_jpg] print("\r -- " + mode + ": " "Reading dumps to figure out number of valid " "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), end="") sys.stdout.flush() # Load created dump # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5") # print(tmp_patch_dir) # print(jpg_file) final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename( jpg_file)[:-4] + ".h5" # print(final_dump_file_name) # Use loadh5 and turn it back to original cur_data_set try: with h5py.File(final_dump_file_name, "r") as dump_file: # print(list(dump_file.keys())) cur_ids = dump_file["2"].value # kps = dump_file["valid_keypoints"][()] # cur_ids = np.asarray(kps[:, 4]) # print(cur_ids) except OSError as err: # print(err) continue # Find cur valid by looking at id_2_keep cur_valid = np.in1d(cur_ids, id_2_keep) # print(cur_valid) # Add all negative labels as valid (neg data) cur_valid += cur_ids < 0 # Sum it up num_valid += np.sum(cur_valid) # print(num_valid) print("\n -- " + mode + ": " "Found {} valid data points from {} files" "".format(num_valid, len(list_jpg_file))) # Get the first data to simply check the shape tmp_dump_file_name = tmp_patch_dir + "\\" + os.path.basename( list_jpg_file[-1])[:-4] + ".h5" with h5py.File(tmp_dump_file_name, "r") as dump_file: dataset_shape = [] dataset_type = [] for _idx in six.moves.xrange(len(dump_file.keys())): dataset_shape += [dump_file[str(_idx)].shape] dataset_type += [dump_file[str(_idx)].dtype] # create and save the large dataset chunk with h5py.File(big_file_name, "w-") as big_file: big_file["time_stamp"] = np.asarray(time.localtime()) name_list = ["x", "y", "ID", "pos", "angle", "coords"] # create the dataset storage chunk for __i in six.moves.xrange(len(dataset_shape)): big_file.create_dataset( name_list[__i], (num_valid, ) + dataset_shape[__i][1:], chunks=(1, ) + dataset_shape[__i][1:], maxshape=((num_valid, ) + dataset_shape[__i][1:]), dtype=dataset_type[__i]) # loop through the file to save to a big chunk save_base = 0 for idx_jpg in six.moves.xrange(len(list_jpg_file)): jpg_file = list_jpg_file[idx_jpg] print("\r -- " + mode + ": " "Saving the data to the big dump " "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), end="") sys.stdout.flush() # Load created dump # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5") final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename( jpg_file)[:-4] + ".h5" # print(final_dump_file_name) # Use loadh5 and turn it back to original cur_data_set try: tmpdict = loadh5(final_dump_file_name) cur_data_set = tuple([ tmpdict[str(_idx)] for _idx in range(len(tmpdict.keys())) ]) # Find cur valid by looking at id_2_keep cur_valid = np.in1d(cur_data_set[2], id_2_keep) # Add all negative labels as valid (neg data) cur_valid += cur_data_set[2] < 0 for __i in six.moves.xrange(len(dataset_shape)): big_file[name_list[__i]][ save_base:save_base + np.sum(cur_valid )] = cur_data_set[__i][cur_valid] # Move base to the next chunk save_base += np.sum(cur_valid) except OSError as err: # print(err) # print("{0} skipped due to invalidity...".format(final_dump_file_name)) # sys.stdout.flush() continue # Assert that we saved all assert save_base == num_valid print("\n -- " + mode + ": " "Done saving {} valid data points from {} files" "".format(num_valid, len(list_jpg_file))) # -------------------------------------------------- # Cleanup dump for idx_jpg in six.moves.xrange(len(list_jpg_file)): jpg_file = list_jpg_file[idx_jpg] print("\r -- " + mode + ": " "Removing dump " "{}/{}".format(idx_jpg + 1, len(list_jpg_file)), end="") sys.stdout.flush() # Delete dump # final_dump_file_name = tmp_patch_dir + jpg_file.replace(".jpg", ".h5") final_dump_file_name = tmp_patch_dir + "\\" + os.path.basename( jpg_file)[:-4] + ".h5" try: os.remove(final_dump_file_name) except FileNotFoundError as err: pass print("\r -- " + mode + ": " "Cleaned up dumps! " "Local dump is now clean!") else: print(" -- Found old file without chunks. " "Copying to new file with chunks...") old_big_file_name = dump_patch_dir + mode + "-data.h5" with h5py.File(old_big_file_name, "r") as old_big_file, \ h5py.File(big_file_name, "w-") as big_file: dataset = [] # load old train into array name_list = ["x", "y", "ID", "pos", "angle", "coords"] for __i in six.moves.xrange(len(name_list)): dataset += [np.asarray(old_big_file[name_list[__i]])] # save train big_file["time_stamp"] = np.asarray(time.localtime()) # allocate and write for __i in six.moves.xrange(len(name_list)): if name_list[__i] == "x": chunk_shape = (1, ) + dataset[__i].shape[1:] else: chunk_shape = None big_file.create_dataset( name_list[__i], dataset[__i].shape, data=dataset[__i], chunks=chunk_shape, maxshape=dataset[__i].shape, ) print(" -- Finished creating chunked file, removing old...") os.remove(old_big_file_name) # ---------------------------------------------------------------------- # Copy to local tmp if necessary if not os.path.exists(tmp_patch_dir + mode + "-data-chunked.h5"): print(" -- " + mode + ": " "Local dump does not exist! " "Copying big dump to local drive... ") shutil.copy(dump_patch_dir + mode + "-data-chunked.h5", tmp_patch_dir + mode + "-data-chunked.h5") else: print(" -- " + mode + ": " "Local dump exists. Checking timestamp...") # get timestamp from nfs with h5py.File(dump_patch_dir + mode + "-data-chunked.h5", "r") \ as nfs_file: nfs_time = np.asarray(nfs_file["time_stamp"]) # get timestamp from local with h5py.File(tmp_patch_dir + mode + "-data-chunked.h5", "r") \ as local_file: local_time = np.asarray(local_file["time_stamp"]) # if the two files have different time stamps if any(nfs_time != local_time): print(" -- " + mode + ": " "Time stamps are different! " "Copying big dump to local drive... ") shutil.copy(dump_patch_dir + mode + "-data-chunked.h5", tmp_patch_dir + mode + "-data-chunked.h5") else: print(" -- " + mode + ": " "Time stamps are identical! Re-using local dump") # Free lock if os.name == "posix": check_lock.unlock() print("-- free lock --") elif os.name == "nt": check_lock.release() print("-- free lock --") else: pass # ---------------------------------------------------------------------- # Use local copy for faster speed print(" -- " + mode + ": Loading from local drive... ") big_file_name = tmp_patch_dir + mode + "-data-chunked.h5" # open big_file and don"t close big_file = h5py.File(big_file_name, "r") x = big_file["x"] # work arround for h5py loading all things to memory read_batch_size = 10000 read_batch_num = int( np.ceil(float(big_file["x"].shape[0]) / float(read_batch_size))) # Manual, since I don't want to bother debugging the below # fields = ["y", "ID", "pos", "angle", "coords"] # for var_name in fields: # # allocate data # exec("{0} = np.zeros(big_file['{0}'].shape, " # "dtype=big_file['{0}'].dtype)".format(var_name)) # # copy data in batches # for idx_batch in six.moves.xrange(read_batch_num): # idx_s = idx_batch * read_batch_size # idx_e = (idx_batch + 1) * read_batch_size # idx_e = np.minimum(idx_e, big_file["x"].shape[0]) # exec("{0}[idx_s:idx_e] = np.asarray(big_file['{0}'][idx_s:idx_e])" # "". format(var_name)) # Allocate y = np.zeros(big_file["y"].shape, dtype=big_file["y"].dtype) ID = np.zeros(big_file["ID"].shape, dtype=big_file["ID"].dtype) pos = np.zeros(big_file["pos"].shape, dtype=big_file["pos"].dtype) angle = np.zeros(big_file["angle"].shape, dtype=big_file["angle"].dtype) coords = np.zeros(big_file["coords"].shape, dtype=big_file["coords"].dtype) # Copy data in batches for idx_batch in six.moves.xrange(read_batch_num): idx_s = idx_batch * read_batch_size idx_e = (idx_batch + 1) * read_batch_size idx_e = np.minimum(idx_e, big_file["x"].shape[0]) y[idx_s:idx_e] = np.asarray(big_file['y'][idx_s:idx_e]) ID[idx_s:idx_e] = np.asarray(big_file['ID'][idx_s:idx_e]) pos[idx_s:idx_e] = np.asarray(big_file['pos'][idx_s:idx_e]) angle[idx_s:idx_e] = np.asarray(big_file['angle'][idx_s:idx_e]) coords[idx_s:idx_e] = np.asarray(big_file['coords'][idx_s:idx_e]) # import pdb # pdb.set_trace() # # Make sure data is contiguos # y = np.ascontiguousarray(y) # ID = np.ascontiguousarray(ID) # pos = np.ascontiguousarray(pos) # angle = np.ascontiguousarray(angle) # coords = np.ascontiguousarray(coords) print(" -- " + mode + ": Done... ") return x, y, ID, pos, angle, coords
class SafeUpdater(object): """ Container for the safe update functionality. """ def __init__(self, lifetime=LOCK_LIFETIME, timeout=LOCK_TIMEOUT): """ Create a SafeUpdater with the given lock lifetime and timeout (see flufl.lock documentation). The defaults are the lifetime and timeout found in the config. """ self.lock = Lock(LOCK_FILE, lifetime=lifetime) self.timeout = timeout def clone_repo(self, repo): """ Clone a repository on the disk. The parent directories are created under the cloning directory from config. For example, the repository name might be "devs/joe/task1". If "devs/joe" was not a directory under the cloning directory, it is created. Raise an exception on failure. """ # Get the full path for "devs/joe/task1". repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo)) # Sanity check - make sure the path is actually in the cloning # directory. if not repo_path.startswith(CLONE_DIR): raise Exception("Illegal base path for repo: %s" % repo_path) # Clone the task into the desired directory. # git will create the subdirectories if needed. SafeUpdater.run(["git", "clone", "gitolite3@localhost:%s" % repo, repo_path]) def update_repo(self, repo, allow_clone): """ Update a repository on the disk. If allow_clone is set, and the repository doesn't exist on the disk yet, it will be cloned with the clone_repo method. Raise an exception on failure. """ # Clone if needed. If the repository doesn't exist and we shouldn't # clone, raise an error for the caller. repo_path = os.path.join(CLONE_DIR, repo) if not os.path.isdir(repo_path): if allow_clone: # We only need to clone in this case. # Pulling (below) is not relevant. self.clone_repo(repo) return else: raise Exception("Directory doesn't exist, " "and allow_clone is false: %s" % repo_path) # Make sure the repository is up to date. # We temporarily change the working directory, for git. old_working_dir = os.getcwd() os.chdir(repo_path) try: # We want to "git pull" here, except that repositories may have # been updated with force, and we want the newest version. SafeUpdater.run(["git", "fetch", "origin"]) SafeUpdater.run(["git", "reset", "--hard", "origin/master"]) finally: os.chdir(old_working_dir) def generate_task(self, repo, update, allow_clone, gen_dir=None): """ Generate a task on the disk with TaskSandbox into gen_dir. If update is true, we first update the task. If both update and allow_clone are true, and the task doesn't exist on the disk yet, it will be cloned with the clone_repo method. If gen_dir is not specified, the default of TaskSandbox is used (auto.gen inside the task directory). Raise an exception on failure. """ if update: self.update_repo(repo, allow_clone) repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo)) if not os.path.isdir(repo_path): raise Exception("Task directory not found: %s" % repo_path) TaskSandbox.execute(repo_path, gen_dir=gen_dir) def update_contest(self, repo, update, generate, add_new_users, update_users, auto_submit, auto_submit_new, auto_submit_all=False): """ Update a contest and its tasks on the database. This should be done after generating newly updated tasks with TaskSandbox, in order to update CMS. If generate is true, tasks are updated and generated (cloned if needed). The contest repository itself is updated (cloned if needed), if update is true. If update_users is true, the users repository is updated, and the contest's users are updated. Users are never modified or deleted (this requires manual action). auto_submit_tasks is a set/list of task repositories for which auto_submit will be invoked. If auto_submit_new is given, auto_submit will also be invoked for tasks that were not in the contest before. Raise an exception on failure. """ # Update/clone contest. if update: self.update_repo(repo, allow_clone=True) # Get contest module. repo_path = os.path.abspath(os.path.join(CLONE_DIR, repo)) module_path = os.path.join(repo_path, "module.yaml") # Read contest params. with open(module_path) as stream: contest_params = yaml.safe_load(stream) # Update/clone users, and add them. if add_new_users: self.add_new_users(contest_params["users_file"], update_users, contest_params["short_name"]) if generate: # Clone and generate tasks. for task in contest_params["tasks"]: task_repo = task["path"] self.generate_task(task_repo, update=True, allow_clone=True) # Fetch the tasks that were already in the contest before. # If an exception is raised, this contest is not yet in the database. contest_name = contest_params["short_name"] try: existing_tasks = set(get_contest_tasks(contest_name)) except Exception: existing_tasks = set() # Note: cmsImportContest drops participations when updating # a contest. We can give the --update-contest flag because # our cmsImportContest script was modified to ignore # participations. See issue #775. SafeUpdater.run(["cmsImportContest", "--import-tasks", "--update-tasks", "--update-contest", repo_path]) # Invoke auto_submit for every task that didn't exist # in the contest before, and every task in auto_submit. for task in contest_params["tasks"]: is_new = task["short_name"] not in existing_tasks should_submit = auto_submit_all should_submit |= auto_submit_new and is_new should_submit |= task["path"] in auto_submit if should_submit: self.auto_submit(contest_name, task) def auto_submit(self, contest_name, task_info): """ Perform auto submission for the given contest and task, removing previous auto submissions from the database. If the task does not specify any solutions to auto submit, do nothing. """ username = "******" task_name = task_info["short_name"] task_dir = os.path.join(CLONE_DIR, task_info["path"]) processor = create_processor(task_dir) # For each submission, we convert the list of files to a dictionary # that maps the submission filename to the path. For example: # {"Task.%l": "path/to/sol.cpp"} # This relies on the type being batch, with a single file # per submission. auto_submit_items = [] for item in processor.get_auto_submit_items(): file_path = item["files"][0] auto_submit_items += [{"Task.%l": file_path}] if not auto_submit_items: return if not remove_submissions(contest_name, task_name, username): raise Exception("Auto submission failed: could not remove old " "submissions, they are in progress.") add_submissions(contest_name, task_name, username, auto_submit_items) def add_new_users(self, users_file, update_repo, contest_name=None): """ Add the users in the given YAML path to the database. Users that already exist are ignored. This never deletes or modifies existing users. If update_repo is true, update/clone the users repository first. If contest_name is given and it exists, add participations too. Raise an exception on failure. """ # Update the users repository. if update_repo: self.update_repo("users", allow_clone=True) # Get the information from the users file. yaml_path = os.path.join(CLONE_DIR, users_file) with open(yaml_path) as stream: users_info = yaml.safe_load(stream) add_users(users_info, contest_name) def __enter__(self): """ Lock when starting a "with" block. """ self.lock.lock(timeout=self.timeout) return self def __exit__(self, exc_type, exc_val, traceback): """ Unlock when finishing a "with" block. Any exceptions are raised to the caller. """ self.lock.unlock() return False @staticmethod def run(commands, input_string="", fail_abort=True): """ Run the given commands as a subprocess, wait for it to finish. If fail_abort is set, then a non-zero return code will trigger an exception. Return (return_code, stdout, stderr). """ process = subprocess.Popen(commands, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate(input=input_string) return_code = process.returncode if return_code != 0 and fail_abort: raise Exception("Command returned non-zero: %s\n" "Return code: %s\n" "Stdout: %s\n" "Stderr: %s\n" % (commands, return_code, stdout, stderr)) return (return_code, stdout, stderr)