def __enter__(self): args = [] if self.profile is not None: args.append("--profile=" + self.profile) if self.cluster_id is not None: args.append("--cluster-id=" + self.cluster_id) if self.num_engines is not None: args.append("--n=" + str(self.num_engines)) if self.ipython_dir is not None: args.append("--ipython-dir=" + self.ipython_dir) cmd = " ".join(["ipcluster start --daemonize"] + args) self.logger.info('Staring IPython cluster with "' + cmd + '"') os.system(cmd) num_engines, timeout = self.num_engines, self.timeout time.sleep(self.min_wait) waited = self.min_wait client = None while client is None: try: client = Client(profile=self.profile, cluster_id=self.cluster_id) except (IOError, TimeoutError): if waited >= self.timeout: raise IOError("Could not connect to IPython cluster controller") if waited % 10 == 0: self.logger.info("Waiting for controller to start ...") time.sleep(1) waited += 1 if num_engines is None: while len(client) == 0 and waited < timeout: if waited % 10 == 0: self.logger.info("Waiting for engines to start ...") time.sleep(1) waited += 1 if len(client) == 0: raise IOError("IPython cluster engines failed to start") wait = min(waited, timeout - waited) if wait > 0: self.logger.info("Waiting {} more seconds for engines to start ...".format(wait)) time.sleep(wait) else: running = len(client) while running < num_engines and waited < timeout: if waited % 10 == 0: self.logger.info( "Waiting for {} of {} engines to start ...".format(num_engines - running, num_engines) ) time.sleep(1) waited += 1 running = len(client) running = len(client) if running < num_engines: raise IOError( "{} of {} IPython cluster engines failed to start".format(num_engines - running, num_engines) ) client.close() self.pool = IPythonPool(profile=self.profile, cluster_id=self.cluster_id) return self.pool
class ParallelPool( object ): def __init__(self): #Load configuration self.c = Configuration.Configuration( ) #Now instance the pool of batch workers according #to the technology selected in the configuration file if self.c.parallel.technology=='ipython': self.IPYc = Client( profile=self.c.parallel.ipython.profile ) self.pool = self.IPYc[:] elif self.c.parallel.technology=='python': if self.c.parallel.python.number_of_processes==0: n_cpus = multiprocessing.cpu_count() else: n_cpus = self.c.parallel.python.number_of_processes self.pool = multiprocessing.Pool( n_cpus ) else: raise ValueError("Unknown technology %s in configuration file" %(self.c.parallel.technology)) #The following methods simply forward the requests to the #batch worker technology def map( self, *args, **kwargs ): if self.c.parallel.technology=='ipython': return self.pool.map( *args, **kwargs ).get() else: return self.pool.map( *args, **kwargs ) def imap( self, *args, **kwargs ): return self.pool.imap( *args, **kwargs ) def close( self ): if self.c.parallel.technology=='ipython': self.IPYc.close() else: self.pool.close() self.pool.join()
def add_engines(n=1, profile='iptest', total=False): """add a number of engines to a given profile. If total is True, then already running engines are counted, and only the additional engines necessary (if any) are started. """ rc = Client(profile=profile) base = len(rc) if total: n = max(n - base, 0) eps = [] for i in range(n): ep = TestProcessLauncher() ep.cmd_and_args = ipengine_cmd_argv + [ '--profile=%s' % profile, '--InteractiveShell.colors=nocolor' ] ep.start() launchers.append(ep) eps.append(ep) tic = time.time() while len(rc) < base + n: if any([ep.poll() is not None for ep in eps]): raise RuntimeError("A test engine failed to start.") elif time.time() - tic > 15: raise RuntimeError("Timeout waiting for engines to connect.") time.sleep(.1) rc.close() return eps
def add_engines(n=1, profile='iptest', total=False): """add a number of engines to a given profile. If total is True, then already running engines are counted, and only the additional engines necessary (if any) are started. """ rc = Client(profile=profile) base = len(rc) if total: n = max(n - base, 0) eps = [] for i in range(n): ep = TestProcessLauncher() ep.cmd_and_args = ipengine_cmd_argv + [ '--profile=%s' % profile, '--InteractiveShell.colors=nocolor' ] ep.start() launchers.append(ep) eps.append(ep) tic = time.time() while len(rc) < base+n: if any([ ep.poll() is not None for ep in eps ]): raise RuntimeError("A test engine failed to start.") elif time.time()-tic > 15: raise RuntimeError("Timeout waiting for engines to connect.") time.sleep(.1) rc.close() return eps
class ParallelPool(object): def __init__(self): #Load configuration self.c = Configuration.Configuration() #Now instance the pool of batch workers according #to the technology selected in the configuration file if self.c.parallel.technology == 'ipython': self.IPYc = Client(profile=self.c.parallel.ipython.profile) self.pool = self.IPYc[:] elif self.c.parallel.technology == 'python': if self.c.parallel.python.number_of_processes == 0: n_cpus = multiprocessing.cpu_count() else: n_cpus = self.c.parallel.python.number_of_processes self.pool = multiprocessing.Pool(n_cpus) else: raise ValueError("Unknown technology %s in configuration file" % (self.c.parallel.technology)) #The following methods simply forward the requests to the #batch worker technology def map(self, *args, **kwargs): if self.c.parallel.technology == 'ipython': return self.pool.map(*args, **kwargs).get() else: return self.pool.map(*args, **kwargs) def imap(self, *args, **kwargs): return self.pool.imap(*args, **kwargs) def close(self): if self.c.parallel.technology == 'ipython': self.IPYc.close() else: self.pool.close() self.pool.join()
def __enter__(self): args = [] if self.profile is not None: args.append('--profile=' + self.profile) if self.cluster_id is not None: args.append('--cluster-id=' + self.cluster_id) if self.num_engines is not None: args.append('--n=' + str(self.num_engines)) if self.ipython_dir is not None: args.append('--ipython-dir=' + self.ipython_dir) cmd = ' '.join(['ipcluster start --daemonize'] + args) self.logger.info('Staring IPython cluster with "' + cmd + '"') os.system(cmd) num_engines, timeout = self.num_engines, self.timeout time.sleep(self.min_wait) waited = self.min_wait client = None while client is None: try: client = Client(profile=self.profile, cluster_id=self.cluster_id) except (IOError, TimeoutError): if waited >= self.timeout: raise IOError('Could not connect to IPython cluster controller') if waited % 10 == 0: self.logger.info('Waiting for controller to start ...') time.sleep(1) waited += 1 if num_engines is None: while len(client) == 0 and waited < timeout: if waited % 10 == 0: self.logger.info('Waiting for engines to start ...') time.sleep(1) waited += 1 if len(client) == 0: raise IOError('IPython cluster engines failed to start') wait = min(waited, timeout - waited) if wait > 0: self.logger.info(f'Waiting {wait} more seconds for engines to start ...') time.sleep(wait) else: running = len(client) while running < num_engines and waited < timeout: if waited % 10 == 0: self.logger.info(f'Waiting for {num_engines-running} of {num_engines} engines to start ...') time.sleep(1) waited += 1 running = len(client) running = len(client) if running < num_engines: raise IOError(f'{num_engines-running} of {num_engines} IPython cluster engines failed to start') # make sure all (potential) engines are in the same cwd, so they can import the same code client[:].apply_sync(os.chdir, os.getcwd()) client.close() self.pool = IPythonPool(profile=self.profile, cluster_id=self.cluster_id) return self.pool
def motion_correct_parallel(file_names,fr,template=None,margins_out=0,max_shift_w=5, max_shift_h=5,remove_blanks=False,apply_smooth=True,backend='single_thread'): """motion correct many movies usingthe ipyparallel cluster Parameters ---------- file_names: list of strings names of he files to be motion corrected fr: double fr parameters for calcblitz movie margins_out: int number of pixels to remove from the borders Return ------ base file names of the motion corrected files """ args_in=[]; for f in file_names: args_in.append((f,fr,margins_out,template,max_shift_w, max_shift_h,remove_blanks,apply_smooth)) try: if backend is 'ipyparallel': c = Client() dview=c[:] file_res = dview.map_sync(process_movie_parallel, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend is 'single_thread': file_res = map(process_movie_parallel, args_in) else: raise Exception('Unknown backend') except : try: if backend is 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except UnboundLocalError as uberr: print 'could not close client' raise return file_res
def start_server(slurm_script=None, ipcluster="ipcluster", ncpus=None): """ programmatically start the ipyparallel server Parameters: ---------- ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda2\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" """ logger.info("Starting cluster...") if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": subprocess.Popen( "ipcluster start -n {0} -- --location=localhost".format( ncpus ), # localhost needed b/c default listen only on 127.0.0.1 not external ips shell=True, close_fds=(os.name != 'nt')) else: subprocess.Popen(shlex.split("{0} start -n {1}".format( ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) # Check that all processes have started client = ipyparallel.Client() while len( client ) < ncpus: # client len will get longer as workers start and connect to hub time.sleep(0.5) logger.debug('Making sure everything is up and running') ar = client.direct_view().apply_async(lambda x: x + 1, 10) ar.get(timeout=10) # when done on all, we're set to go. 10s timeout client.close() logger.debug('Cluster started successfully.') else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] print([pdir, profile]) c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Running on %d engines.' % (ne))) c.close() sys.stdout.write(" done\n")
def cluster_status(): try: rcl = Client() nworkers = len(rcl[:]) qstat = rcl.queue_status() queued = qstat[u'unassigned'] working = sum([qstat[w][u'tasks'] for w in rcl.ids]) idle = nworkers - working rcl.close() except: nworkers, queued, working, idle = 0, 0, 0, 0 return nworkers, queued, working, idle
def start_server(slurm_script=None, ipcluster="ipcluster", ncpus=None): ''' programmatically start the ipyparallel server Parameters ---------- ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda2\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" ''' sys.stdout.write("Starting cluster...") sys.stdout.flush() if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": p1 = subprocess.Popen("ipcluster start -n {0}".format(ncpus), shell=True, close_fds=(os.name != 'nt')) else: p1 = subprocess.Popen(shlex.split("{0} start -n {1}".format( ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) # while True: try: c = ipyparallel.Client() if len(c) < ncpus: sys.stdout.write(".") sys.stdout.flush() raise ipyparallel.error.TimeoutError c.close() break except (IOError, ipyparallel.error.TimeoutError): sys.stdout.write(".") sys.stdout.flush() time.sleep(1) else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Running on %d engines.' % (ne))) c.close() sys.stdout.write(" done\n")
def start_server(slurm_script: str = None, ipcluster: str = "ipcluster", ncpus: int = None) -> None: """ programmatically start the ipyparallel server Args: ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda3\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" """ logger.info("Starting cluster...") if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": subprocess.Popen("ipcluster start -n {0}".format(ncpus), shell=True, close_fds=(os.name != 'nt')) else: subprocess.Popen(shlex.split("{0} start -n {1}".format( ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) time.sleep(1.5) # Check that all processes have started client = ipyparallel.Client() time.sleep(1.5) while len(client) < ncpus: sys.stdout.write( ".") # Give some visual feedback of things starting sys.stdout.flush() # (de-buffered) time.sleep(0.5) logger.debug('Making sure everything is up and running') client.direct_view().execute( '__a=1', block=True) # when done on all, we're set to go else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] logger.debug([pdir, profile]) c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) logger.info(('Running on %d engines.' % (ne))) c.close() sys.stdout.write("start_server: done\n")
def start_server(slurm_script=None, ipcluster="ipcluster", ncpus=None): """ programmatically start the ipyparallel server Parameters: ---------- ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda2\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" """ sys.stdout.write("Starting cluster...") sys.stdout.flush() if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": subprocess.Popen( "ipcluster start -n {0}".format(ncpus), shell=True, close_fds=(os.name != 'nt')) else: subprocess.Popen(shlex.split( "{0} start -n {1}".format(ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) # Check that all processes have started time.sleep(1) client = ipyparallel.Client() while len(client) < ncpus: sys.stdout.write(".") sys.stdout.flush() client.close() time.sleep(1) client = ipyparallel.Client() print('Making Sure everything is up and running') time.sleep(10) client.close() else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] print([pdir,profile]) c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Running on %d engines.' % (ne))) c.close() sys.stdout.write(" done\n")
def _nengines_up(url_file): "return the number of engines up" client = None try: client = Client(url_file, timeout=60) up = len(client.ids) client.close() # the controller isn't up yet except iperror.TimeoutError: return 0 # the JSON file is not available to parse except IOError: return 0 else: return up
def start_server(slurm_script=None, ipcluster="ipcluster", ncpus=None): """ programmatically start the ipyparallel server Parameters: ---------- ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda2\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" """ logger.info("Starting cluster...") if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": subprocess.Popen( "ipcluster start -n {0}".format(ncpus), shell=True, close_fds=(os.name != 'nt')) else: subprocess.Popen(shlex.split( "{0} start -n {1}".format(ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) # Check that all processes have started time.sleep(1) client = ipyparallel.Client() while len(client) < ncpus: client.close() time.sleep(1) client = ipyparallel.Client() time.sleep(10) logger.debug('Making sure everything is up and running') client.close() else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] print([pdir,profile]) c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Running on %d engines.' % (ne))) c.close() sys.stdout.write(" done\n")
def test_hubresult_timestamps(self): self.minimum_engines(4) v = self.client[:] ar = v.apply_async(time.sleep, 0.25) ar.get(2) rc2 = Client(profile='iptest') # must have try/finally to close second Client, otherwise # will have dangling sockets causing problems try: time.sleep(0.25) hr = rc2.get_result(ar.msg_ids) self.assertTrue(hr.elapsed > 0., "got bad elapsed: %s" % hr.elapsed) hr.get(1) self.assertTrue(hr.wall_time < ar.wall_time + 0.2, "got bad wall_time: %s > %s" % (hr.wall_time, ar.wall_time)) self.assertEqual(hr.serial_time, ar.serial_time) finally: rc2.close()
def map(self, parallel_task, args): from ipyparallel import Client, TimeoutError chunksize = 1 if self.max_tasks > 0 and len(args) > self.max_tasks: chunksize = len(args) // self.max_tasks if chunksize * self.max_tasks < len(args): chunksize += 1 client = None try: client = Client() except TimeoutError: raise RuntimeError( 'Cannot connect to the ipyparallel client. Is it running?') ar = None try: client[:].use_cloudpickle() lbv = client.load_balanced_view() ar = lbv.map_async(IppFunctionWrapper(parallel_task, self.timeout), args, chunksize=chunksize) try: r = [] for k, z in enumerate( tqdm(ar, desc="(IPYPARALLEL)", total=len(args))): if z[0] == -1: logger.error(z[1]) engine = ar.engine_id[k] client.abort(ar) client.close() raise RuntimeError( 'remote failure (task %d of %d on engine %d)' % (k + 1, len(ar), engine)) elif z[0] == 0: r.append(z[1]) except KeyboardInterrupt: client.abort(ar) raise finally: # always close the client to release resources if ar: client.abort(ar) if client: client.close() return r
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 5): idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride) perctl=95 n_components=2 tol=1e-6 max_iter=5000 args_in=[] for id_f,id_2d in zip(idx_flat,idx_2d): args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter)) st=time.time() print len(idx_flat) try: if 1: c = Client() dview=c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time()-st A1=lil_matrix((d1*d2,len(file_res))) C1=[] A2=lil_matrix((d1*d2,len(file_res))) C2=[] for count,f in enumerate(file_res): idx_,flt,ca,d=f #flt,ca,_=cse.order_components(coo_matrix(flt),ca) A1[idx_,count]=flt[:,0][:,np.newaxis] A2[idx_,count]=flt[:,1][:,np.newaxis] C1.append(ca[0,:]) C2.append(ca[1,:]) # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) return A1,A2,C1,C2
def extract_rois_patch(file_name, d1, d2, rf=5, stride=5): idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride) perctl = 95 n_components = 2 tol = 1e-6 max_iter = 5000 args_in = [] for id_f, id_2d in zip(idx_flat, idx_2d): args_in.append((file_name, id_f, id_2d[0].shape, perctl, n_components, tol, max_iter)) st = time.time() print len(idx_flat) try: if 1: c = Client() dview = c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time() - st A1 = lil_matrix((d1 * d2, len(file_res))) C1 = [] A2 = lil_matrix((d1 * d2, len(file_res))) C2 = [] for count, f in enumerate(file_res): idx_, flt, ca, d = f #flt,ca,_=cse.order_components(coo_matrix(flt),ca) A1[idx_, count] = flt[:, 0][:, np.newaxis] A2[idx_, count] = flt[:, 1][:, np.newaxis] C1.append(ca[0, :]) C2.append(ca[1, :]) # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) return A1, A2, C1, C2
def start_server(ncpus,slurm_script=None): ''' programmatically start the ipyparallel server Parameters ---------- ncpus: int number of processors ''' sys.stdout.write("Starting cluster...") sys.stdout.flush() if slurm_script is None: subprocess.Popen(["ipcluster start -n {0}".format(ncpus)], shell=True) while True: try: c = ipyparallel.Client() if len(c) < ncpus: sys.stdout.write(".") sys.stdout.flush() raise ipyparallel.error.TimeoutError c.close() break except (IOError, ipyparallel.error.TimeoutError): sys.stdout.write(".") sys.stdout.flush() time.sleep(1) else: shell_source(slurm_script) from ipyparallel import Client pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print 'Running on %d engines.'%(ne) c.close() sys.stdout.write(" done\n")
def start_server(ncpus, slurm_script=None): ''' programmatically start the ipyparallel server Parameters ---------- ncpus: int number of processors ''' sys.stdout.write("Starting cluster...") sys.stdout.flush() if slurm_script is None: subprocess.Popen(["ipcluster start -n {0}".format(ncpus)], shell=True) while True: try: c = ipyparallel.Client() if len(c) < ncpus: sys.stdout.write(".") sys.stdout.flush() raise ipyparallel.error.TimeoutError c.close() break except (IOError, ipyparallel.error.TimeoutError): sys.stdout.write(".") sys.stdout.flush() time.sleep(1) else: shell_source(slurm_script) from ipyparallel import Client pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print 'Running on %d engines.' % (ne) c.close() sys.stdout.write(" done\n")
t1 = time() file_res=cb.motion_correct_parallel(fnames,30,template=template,margins_out=0,max_shift_w=45, max_shift_h=45,dview=client_[::2],remove_blanks=False) t2=time()-t1 print(t2) #%% fnames=[] for file in glob.glob(base_folder+'k31_20160107_MMP_150um_65mW_zoom2p2_000*[0-9].hdf5'): fnames.append(file) fnames.sort() print(fnames) #%% file_res=cb.utils.pre_preprocess_movie_labeling(client_[::2], fnames, median_filter_size=(2,1,1), resize_factors=[.2,.1666666666],diameter_bilateral_blur=4) #%% client_.close() cse.utilities.stop_server(is_slurm=True) #%% #%% fold=os.path.split(os.path.split(fnames[0])[-2])[-1] os.mkdir(fold) #%% files=glob.glob(fnames[0][:-20]+'*BL_compress_.tif') files.sort() print(files) #%% m=cb.load_movie_chain(files,fr=3) m.play(backend='opencv',gain=10,fr=40) #%%
class ClusterLab(epyc.Lab): """A :class:`Lab` running on an ``pyparallel`` compute cluster. Experiments are submitted to engines in the cluster for execution in parallel, with the experiments being performed asynchronously to allow for disconnection and subsequent retrieval of results. Combined with a persistent :class:`LabNotebook`, this allows for fully decoupled access to an on-going computational experiment with piecewise retrieval of results. This class requires a cluster to already be set up and running, configured for persistent access, with access to the necessary code and libraries, and with appropriate security information available to the client. """ # Tuning parameters WaitingTime = 30 #: Waiting time for checking for job completion. Lower values increase network traffic. def __init__(self, notebook=None, url_file=None, profile=None, profile_dir=None, ipython_dir=None, context=None, debug=False, sshserver=None, sshkey=None, password=None, paramiko=None, timeout=10, cluster_id=None, use_dill=False, **extra_args): """Create an empty lab attached to the given cluster. Most of the arguments are as expected by the ``pyparallel.Client`` class, and are used to create the underlying connection to the cluster. The connection is opened immediately, meaning the cluster must be up and accessible when creating a lab to use it. :param notebook: the notebook used to results (defaults to an empty :class:`LabNotebook`) :param url_file: file containing connection information for accessing cluster :param profile: name of the IPython profile to use :param profile_dir: directory containing the profile's connection information :param ipython_dir: directory containing profile directories :param context: ZMQ context :param debug: whether to issue debugging information (defaults to False) :param sshserver: username and machine for ssh connections :param sshkey: file containing ssh key :param password: ssh password :param paramiko: True to use paramiko for ssh (defaults to False) :param timeout: timeout in seconds for ssh connection (defaults to 10s) :param cluster_id: string added to runtime files to prevent collisions :param use_dill: whether to use Dill as pickler (defaults to False)""" super(epyc.ClusterLab, self).__init__(notebook) # record all the connection arguments for later self._arguments = dict(url_file=url_file, profile=profile, profile_dir=profile_dir, ipython_dir=ipython_dir, context=context, debug=debug, sshserver=sshserver, sshkey=sshkey, password=password, paramiko=paramiko, timeout=timeout, cluster_id=cluster_id, use_dill=use_dill, **extra_args) self._client = None # connect to the cluster self.open() # use Dill if requested if use_dill: self.use_dill() # ---------- Protocol ---------- def open(self): """Connect to the cluster.""" if self._client is None: self._client = Client(**self._arguments) def close(self): """Close down the connection to the cluster.""" if self._client is not None: self._client.close() self._client = None def recreate(self): '''Save the arguments needed to re-connect to the cluster we use. :returns: a (classname, args) pair''' (cn, args) = super(ClusterLab, self).recreate() nargs = args.copy() nargs.update(self._arguments) return (classname, nargs) # ---------- Remote control of the compute engines ---------- def numberOfEngines(self): """Return the number of engines available to this lab. :returns: the number of engines""" return len(self.engines()) def engines(self): """Return a list of the available engines. :returns: a list of engines""" self.open() return self._client[:] def use_dill(self): """Make the cluster use Dill as pickler for transferring results. This isn't generally needed, but is sometimes useful for particularly complex experiments such as those involving closures. (Or, to put it another way, if you find yourself tempted to use this method, consider re-structuring your experiment code.)""" self.open() with self.sync_imports(quiet=True): import dill self._client.direct_view().use_dill() def sync_imports(self, quiet=False): """Return a context manager to control imports onto all the engines in the underlying cluster. This method is used within a ``with`` statement. Any imports should be done with no experiments running, otherwise the method will block until the cluster is quiet. Generally imports will be one of the first things done when connecting to a cluster. (But be careful not to accidentally try to re-import if re-connecting to a running cluster.) :param quiet: if True, suppresses messages (defaults to False) :returns: a context manager""" self.open() return self._client[:].sync_imports(quiet=quiet) # ---------- Running experiments ---------- def _mixup(self, ps): """Private method to mix up a list of values in-place using a Fisher-Yates shuffle (see https://en.wikipedia.org/wiki/Fisher-Yates_shuffle). :param ps: the array :returns: the array, shuffled in-place""" for i in range(len(ps) - 1, 0, -1): j = int(numpy.random.random() * i) temp = ps[i] ps[i] = ps[j] ps[j] = temp return ps def runExperiment(self, e): """Run the experiment across the parameter space in parallel using all the engines in the cluster. This method returns immediately. The experiments are run asynchronously, with the points in the parameter space being explored randomly so that intermediate retrievals of results are more representative of the overall result. Put another way, for a lot of experiments the results available will converge towards a final answer, so we can plot them and see the answer emerge. :param e: the experiment""" # create the parameter space space = self.parameterSpace() # only proceed if there's work to do if len(space) > 0: nb = self.notebook() # randomise the order of the parameter space so that we evaluate across # the space as we go along to try to make intermediate (incomplete) result # sets more representative of the overall result set ps = self._mixup(space) try: # connect to the cluster self.open() # submit an experiment at each point in the parameter space to the cluster view = self._client.load_balanced_view() jobs = [] for p in ps: jobs.extend((view.apply_async((lambda p: e.set(p).run()), p)).msg_ids) # there seems to be a race condition in submitting jobs, # whereby jobs get dropped if they're submitted too quickly time.sleep(0.01) # record the mesage ids of all the jobs as submitted but not yet completed psjs = zip(ps, jobs) for (p, j) in psjs: nb.addPendingResult(p, j) finally: # commit our pending results in the notebook nb.commit() self.close() def updateResults(self): """Update our results within any pending results that have completed since we last retrieved results from the cluster. :returns: the number of pending results completed at this call""" # we do all the tests for pending results against the notebook directly, # as the corresponding methods on self call this method themselves nb = self.notebook() # look for pending results if we're waiting for any n = 0 if nb.numberOfPendingResults() > 0: # we have results to get self.open() for j in set(nb.pendingResults()): # query the status of a job status = self._client.result_status(j, status_only=False) # add all completed jobs to the notebook if j in status['completed']: r = status[j] # update the result in the notebook, cancelling # the pending result as well # values come back from Client.result_status() in # varying degrees of list-nesting, which LabNotebook.addResult() # handles itself nb.addResult(r, j) # commit changes to the notebook nb.commit() # purge the completed job from the cluster self._client.purge_hub_results(j) # record that we retrieved the results for the given job n = n + 1 return n # ---------- Accessing results ---------- def numberOfResults(self): """Return the number of results we have available at the moment. :returns: the number of results""" self.updateResults() return self.notebook().numberOfResults() def numberOfPendingResults(self): """Return the number of resultswe are waiting for. :returns: the number of pending results""" self.updateResults() return self.notebook().numberOfPendingResults() def _availableResultsFraction(self): """Private method to return the fraction of results available, as a real number between 0 and 1. This does not update the results fetched from the cluster. :returns: the fraction of available results""" tr = self.notebook().numberOfResults() + self.notebook( ).numberOfPendingResults() if tr == 0: return 0 else: return (self.notebook().numberOfResults() + 0.0) / tr def readyFraction(self): """Test what fraction of results are available. This will change over time as the results come in. :returns: the fraction from 0 to 1""" self.updateResults() return self._availableResultsFraction() def ready(self): """Test whether all the results are available. This will change over time as the results come in. :returns: True if all the results are available""" return (self.readyFraction() == 1) def wait(self, timeout=-1): """Wait for all pending results to be finished. If timeout is set, return after this many seconds regardless. :param timeout: timeout period in seconds (defaults to forever) :returns: True if all the results completed""" # we can't use pyparallel.Client.wait() for this, because that # method only works for cases where the Client object is the one that # submitted the jobs to the cluster hub -- and therefore has the # necessary data structures to perform synchronisation. This isn't the # case for us, as one of the main goals of epyc is to support disconnected # operation, which implies a different Client object retrieving results # than the one that submitted the jobs in the first place. This is # unfortunate, but understandable given the typical use cases for # Client objects in pyparallel. # # Instead. we have to code around a little busily. The ClusterLab.WaitingTime # global sets the latency for waiting, and we repeatedly wait for this amount # of time before updating the results. The latency value essentially controls # how busy this process is: given that most simulations are expected to # be long, a latency in the tens of seconds feels about right as a default if self.numberOfPendingResults() > 0: # we've got pending results, wait for them timeWaited = 0 while (timeout < 0) or (timeWaited < timeout): if self.numberOfPendingResults() == 0: # no pending jobs left, we're complete return True else: # not done yet, calculate the waiting period if timeout == -1: # wait for the default waiting period dt = self.WaitingTime else: # wait for the default waiting period or until the end of the timeout. # whichever comes first if (timeout - timeWaited) < self.WaitingTime: dt = timeout - timeWaited else: dt = self.WaitingTime # sleep for a while time.sleep(dt) timeWaited = timeWaited + dt # if we get here, the timeout expired, so do a final check # and then exit return (self.numberOfPendingResults() == 0) else: # no results, so we got them all return True # ---------- Managing pending results ---------- def pendingResults(self): """Return the list of job iods for any pending results. :returns: a list of job ids""" return self.notebook().pendingResults() def pendingResultsFor(self, params): """Return a list of job ids for any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters :returns: a list of job ids""" return self.notebook().pendingResultsFor(params) def _abortJobs(self, js): """Private method to abort a set of jobs. :param js: the job ids to be aborted""" self.open() self._client.abort(jobs=js) self.close() def cancelPendingResultsFor(self, params): """Cancel any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters""" # grab the result job ids jobs = self.pendingResultsFor(params) if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelPendingResultsFor(params) def cancelAllPendingResults(self): """Cancel all pending results.""" # grab all the pending job ids jobs = self.pendingResults() if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelAllPendingResults()
def dask_executor(): from distributed import LocalCluster, Client client = Client(n_workers=1) yield client client.close()
def run_CNMF_patches(file_name, shape, options, rf=16, stride=4, n_processes=2, backend='single_thread', memory_fact=1): """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' n_processes: int nuber of cores to be used (should be less than the number of cores started with ipyparallel) memory_fact: double unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work Returns ------- A_tot: matrix containing all the componenents from all the patches C_tot: matrix containing the calcium traces corresponding to A_tot sn_tot: per pixel noise estimate optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch """ (d1, d2, T) = shape d = d1 * d2 K = options['init_params']['K'] options['preprocess_params']['backend'] = 'single_thread' options['preprocess_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['temporal_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['backend'] = 'single_thread' options['temporal_params']['backend'] = 'single_thread' idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride) # import pdb # pdb.set_trace() args_in = [] for id_f, id_2d in zip(idx_flat[:], idx_2d[:]): args_in.append((file_name, id_f, id_2d[0].shape, options)) print len(idx_flat) st = time.time() if backend is 'ipyparallel': try: c = Client() dview = c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except: print('Something went wrong') raise finally: print('You may think that it went well but reality is harsh') elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time() - st # extract the values from the output of mapped computation num_patches = len(file_res) A_tot = scipy.sparse.csc_matrix((d, K * num_patches)) B_tot = scipy.sparse.csc_matrix((d, num_patches)) C_tot = np.zeros((K * num_patches, T)) F_tot = np.zeros((num_patches, T)) mask = np.zeros(d) sn_tot = np.zeros((d1 * d2)) b_tot = [] f_tot = [] bl_tot = [] c1_tot = [] neurons_sn_tot = [] g_tot = [] idx_tot = [] shapes_tot = [] id_patch_tot = [] count = 0 patch_id = 0 print 'Transforming patches into full matrix' for idx_, shapes, A, b, C, f, S, bl, c1, neurons_sn, g, sn, _ in file_res: sn_tot[idx_] = sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) mask[idx_] += 1 F_tot[patch_id, :] = f B_tot[idx_, patch_id] = b for ii in range(np.shape(A)[-1]): new_comp = A.tocsc()[:, ii] / np.sqrt( np.sum(np.array(A.tocsc()[:, ii].todense())**2)) if new_comp.sum() > 0: A_tot[idx_, count] = new_comp C_tot[count, :] = C[ii, :] id_patch_tot.append(patch_id) count += 1 patch_id += 1 A_tot = A_tot[:, :count] C_tot = C_tot[:count, :] optional_outputs = dict() optional_outputs['b_tot'] = b_tot optional_outputs['f_tot'] = f_tot optional_outputs['bl_tot'] = bl_tot optional_outputs['c1_tot'] = c1_tot optional_outputs['neurons_sn_tot'] = neurons_sn_tot optional_outputs['g_tot'] = g_tot optional_outputs['idx_tot'] = idx_tot optional_outputs['shapes_tot'] = shapes_tot optional_outputs['id_patch_tot'] = id_patch_tot optional_outputs['B'] = B_tot optional_outputs['F'] = F_tot optional_outputs['mask'] = mask Im = scipy.sparse.csr_matrix((1. / mask, (np.arange(d), np.arange(d)))) Bm = Im.dot(B_tot) A_tot = Im.dot(A_tot) f = np.mean(F_tot, axis=0) for iter in range(10): b = Bm.dot(F_tot.dot(f)) / np.sum(f**2) f = np.dot((Bm.T.dot(b)).T, F_tot) / np.sum(b**2) return A_tot, C_tot, b, f, sn_tot, optional_outputs
idx_components=np.union1d(idx_components_r,idx_components_raw) idx_components=np.union1d(idx_components,idx_components_delta) idx_blobs=np.intersect1d(idx_components,idx_blobs) idx_components_bad=np.setdiff1d(list(range(len(traces))),idx_components) print(' ***** ') print((len(traces))) print((len(idx_components))) print((len(idx_blobs))) #%% visualize components #pl.figure(); pl.subplot(1,3,1) crd = plot_contours(A2.tocsc()[:,idx_components],Cn,thr=0.9) pl.subplot(1,3,2) crd = plot_contours(A2.tocsc()[:,idx_blobs],Cn,thr=0.9) pl.subplot(1,3,3) crd = plot_contours(A2.tocsc()[:,idx_components_bad],Cn,thr=0.9) #%% view_patches_bar(Yr,scipy.sparse.coo_matrix(A2.tocsc()[:,idx_components]),C2[idx_components,:],b2,f2, dims[0],dims[1], YrA=YrA[idx_components,:],img=Cn) #%% view_patches_bar(Yr,scipy.sparse.coo_matrix(A2.tocsc()[:,idx_components_bad]),C2[idx_components_bad,:],b2,f2, dims[0],dims[1], YrA=YrA[idx_components_bad,:],img=Cn) #%% STOP CLUSTER pl.close() if not single_thread: c.close() cm.stop_server()
def update_temporal_components_parallel(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for basis projection pursuit cvx or spgl1 or debug for fast but possibly imprecise temporal components Returns -------- C: np.matrix matrix of temporal components (K x T) f: np.array vector of temporal background (length T) Y_res: np.ndarray matrix with current residual (d x T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; #% nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) Sp = np.zeros((nr,T)) #YrA = Y.T*A - Cin.T*(A.T*A); # Y=np.matrix(Y) # C=np.matrix(C) # Cin=np.matrix(Cin) # YrA2 = Y.T*A - Cin.T*(A.T*A); Cin=coo_matrix(Cin) YrA = (A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] if backend == 'ipyparallel': results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: #pars=dict(kwargs) C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gtemp[jj,:]#[jj_,np.abs(gtemp[jj,:])>0] #pars['b'] = cb_ # pars['c1'] = c1_ # pars['neuron_sn'] = sn_ # pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] # ## for jj = 1:length(O{jo}) ## P.gn(O{jo}(jj)) = {gtemp(jj,abs(gtemp(jj,:))>0)'}; ## end # pars['neuron_id'] = jo[jj_] # P_.append(pars) YrA[:,jo] = Ytemp C[jo,:] = Ctemp S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count])) + ' out of total ' + str(nr) + ' temporal components updated \n' ii=nr YrA[:,ii] = YrA[:,ii] + nA[ii]*np.atleast_2d(Cin[ii,:]).T cc = np.maximum(YrA[:,ii]/nA[ii],0) C[ii,:] = cc[:].T YrA[:,ii] = YrA[:,ii] - nA[ii]*np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C Y_res = Y - A*C # this includes the baseline term f = C[nr:,:] C = C[:nr,:] P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,Y_res,S,bl,c1,sn,g
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread',memory_fact=1): """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' n_processes: int nuber of cores to be used (should be less than the number of cores started with ipyparallel) memory_fact: double unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work Returns ------- A_tot: matrix containing all the componenents from all the patches C_tot: matrix containing the calcium traces corresponding to A_tot sn_tot: per pixel noise estimate optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch """ (d1,d2,T)=shape d=d1*d2 K=options['init_params']['K'] options['preprocess_params']['backend']='single_thread' options['preprocess_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['spatial_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['temporal_params']['n_pixels_per_process']=np.int((rf*rf)/memory_fact) options['spatial_params']['backend']='single_thread' options['temporal_params']['backend']='single_thread' idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride) # import pdb # pdb.set_trace() args_in=[] for id_f,id_2d in zip(idx_flat[:],idx_2d[:]): args_in.append((file_name, id_f,id_2d[0].shape, options)) print len(idx_flat) st=time.time() if backend is 'ipyparallel': try: c = Client() dview=c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except: print('Something went wrong') raise finally: print('You may think that it went well but reality is harsh') elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time()-st # extract the values from the output of mapped computation num_patches=len(file_res) A_tot=scipy.sparse.csc_matrix((d,K*num_patches)) B_tot=scipy.sparse.csc_matrix((d,num_patches)) C_tot=np.zeros((K*num_patches,T)) F_tot=np.zeros((num_patches,T)) mask=np.zeros(d) sn_tot=np.zeros((d1*d2)) b_tot=[] f_tot=[] bl_tot=[] c1_tot=[] neurons_sn_tot=[] g_tot=[] idx_tot=[]; shapes_tot=[] id_patch_tot=[] count=0 patch_id=0 print 'Transforming patches into full matrix' for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res: sn_tot[idx_]=sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) mask[idx_] += 1 F_tot[patch_id,:]=f B_tot[idx_,patch_id]=b for ii in range(np.shape(A)[-1]): new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2)) if new_comp.sum()>0: A_tot[idx_,count]=new_comp C_tot[count,:]=C[ii,:] id_patch_tot.append(patch_id) count+=1 patch_id+=1 A_tot=A_tot[:,:count] C_tot=C_tot[:count,:] optional_outputs=dict() optional_outputs['b_tot']=b_tot optional_outputs['f_tot']=f_tot optional_outputs['bl_tot']=bl_tot optional_outputs['c1_tot']=c1_tot optional_outputs['neurons_sn_tot']=neurons_sn_tot optional_outputs['g_tot']=g_tot optional_outputs['idx_tot']=idx_tot optional_outputs['shapes_tot']=shapes_tot optional_outputs['id_patch_tot']= id_patch_tot optional_outputs['B'] = B_tot optional_outputs['F'] = F_tot optional_outputs['mask'] = mask Im = scipy.sparse.csr_matrix((1./mask,(np.arange(d),np.arange(d)))) Bm = Im.dot(B_tot) A_tot = Im.dot(A_tot) f = np.mean(F_tot,axis=0) for iter in range(10): b = Bm.dot(F_tot.dot(f))/np.sum(f**2) f = np.dot((Bm.T.dot(b)).T,F_tot)/np.sum(b**2) return A_tot,C_tot,b,f,sn_tot, optional_outputs
def extract_rois_patch(file_name,d1,d2,rf=5,stride = 2): not_completed, in_progress rf=6 stride = 2 idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf,stride = stride) perctl=95 n_components=2 tol=1e-6 max_iter=5000 args_in=[] for id_f,id_2d in zip(idx_flat,idx_2d): args_in.append((file_name, id_f,id_2d[0].shape, perctl,n_components,tol,max_iter)) st=time.time() try: if 1: c = Client() dview=c[:] file_res = dview.map_sync(nmf_patches, args_in) else: file_res = map(nmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() print time.time()-st A1=lil_matrix((d1*d2,len(file_res))) C1=[] A2=lil_matrix((d1*d2,len(file_res))) C2=[] A_tot=lil_matrix((d1*d2,n_components*len(file_res))) C_tot=[]; count_out=0 for count,f in enumerate(file_res): idx_,flt,ca,d=f print count_out #flt,ca,_=cse.order_components(coo_matrix(flt),ca) # A1[idx_,count]=flt[:,0][:,np.newaxis]/np.sqrt(np.sum(flt[:,0]**2)) # A2[idx_,count]=flt[:,1][:,np.newaxis] /np.sqrt(np.sum(flt[:,1]**2)) # C1.append(ca[0,:]) # C2.append(ca[1,:]) for ccc in range(n_components): A_tot[idx_,count_out]=flt[:,ccc][:,np.newaxis]/np.sqrt(np.sum(flt[:,ccc]**2)) C_tot.append(ca[ccc,:]) count_out+=1 # pl.imshow(np.reshape(flt[:,0],d,order='F'),vmax=10) # pl.pause(.1) correlations=np.corrcoef(np.array(C_tot)) centers=cse.com(A_tot.todense(),d1,d2) distances=sklearn.metrics.pairwise.euclidean_distances(centers) pl.imshow((correlations>0.8) & (distances<10)) Yr=np.load('Yr.npy',mmap_mode='r') [d,T]=Yr.shape Y=np.reshape(Yr,(d1,d2,T),order='F') options=cse.utilities.CNMFSetParms(Y,p=0) res_merge=cse.merge_components(Yr,A_tot,[],np.array(C_tot),[],np.array(C_tot),[],options['temporal_params'],options['spatial_params'],thr=0.8) A_m,C_m,nr_m,merged_ROIs,S_m,bl_m,c1_m,sn_m,g_m=res_merge A_norm=np.array([A_m[:,rr].toarray()/np.sqrt(np.sum(A_m[:,rr].toarray()**2)) for rr in range(A_m.shape[-1])]).T options=cse.utilities.CNMFSetParms(Y,p=2,K=np.shape(A_m)[-1]) Yr,sn,g=cse.pre_processing.preprocess_data(Yr,**options['preprocess_params']) epsilon=1e-2 pixels_bckgrnd=np.nonzero(A_norm.sum(axis=-1)<epsilon)[0] f=np.sum(Yr[pixels_bckgrnd,:],axis=0) A2,b2,C2 = cse.spatial.update_spatial_components(Yr, C_m, f, A_m, sn=sn, **options['spatial_params']) A_or2, C_or2, srt2 = cse.utilities.order_components(A2,C2) A_norm2=np.array([A_or2[:,rr]/np.sqrt(np.sum(A_or2[:,rr]**2)) for rr in range(A_or2.shape[-1])]).T options['temporal_params']['p'] = 2 # set it back to original value to perform full deconvolution C2,f2,S2,bl2,c12,neurons_sn2,g21,YrA = cse.temporal.update_temporal_components(Yr,A2,b2,C2,f,bl=None,c1=None,sn=None,g=None,**options['temporal_params']) A_or, C_or, srt = cse.utilities.order_components(A2,C2) return A1,A2,C1
fnames = [] for file in glob.glob(base_folder + 'k31_20160107_MMP_150um_65mW_zoom2p2_000*[0-9].hdf5'): fnames.append(file) fnames.sort() print(fnames) #%% file_res = cb.utils.pre_preprocess_movie_labeling( client_[::2], fnames, median_filter_size=(2, 1, 1), resize_factors=[.2, .1666666666], diameter_bilateral_blur=4) #%% client_.close() cse.utilities.stop_server(is_slurm=True) #%% #%% fold = os.path.split(os.path.split(fnames[0])[-2])[-1] os.mkdir(fold) #%% files = glob.glob(fnames[0][:-20] + '*BL_compress_.tif') files.sort() print(files) #%% m = cb.load_movie_chain(files, fr=3) m.play(backend='opencv', gain=10, fr=40) #%%
def update_temporal_components(Y, A, b, Cin, fin, bl=None, c1=None, g=None, sn=None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread', memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d, T = np.shape(Y) nr = np.shape(A)[-1] if bl is None: bl = np.repeat(None, nr) if c1 is None: c1 = np.repeat(None, nr) if g is None: g = np.repeat(None, nr) if sn is None: sn = np.repeat(None, nr) A = scipy.sparse.hstack((A, coo_matrix(b))) S = np.zeros(np.shape(Cin)) Cin = np.vstack((Cin, fin)) C = Cin nA = np.squeeze(np.array(np.sum(np.square(A.todense()), axis=0))) #import pdb #pdb.set_trace() Cin = coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T) * spdiags(1. / nA, 0, nr + 1, nr + 1) AA = ((A.T.dot(A)) * spdiags(1. / nA, 0, nr + 1, nr + 1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes Cin = np.array(Cin.todense()) for iter in range(ITER): O, lo = update_order(A.tocsc()[:, :nr]) P_ = [] for count, jo_ in enumerate(O): jo = np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:, jo.flatten()] + Cin[jo, :].T Ctemp = np.zeros((np.size(jo), T)) Stemp = np.zeros((np.size(jo), T)) btemp = np.zeros((np.size(jo), 1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo), kwargs['p'])) nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in = [(np.squeeze(np.array(Ytemp[:, jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel': # if debug: results = dview.map_async(constrained_foopsi_parallel, args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel, args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel, args_in) else: raise Exception( 'Backend not defined. Use either single_thread or ipyparallel' ) for chunk in results: pars = dict() C_, Sp_, Ytemp_, cb_, c1_, sn_, gn_, jj_ = chunk Ctemp[jj_, :] = C_[None, :] Stemp[jj_, :] = Sp_ Ytemp[:, jj_] = Ytemp_[:, None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_, :] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_, np.abs(gtemp[jj, :]) > 0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp - C[jo, :]).T * AA[jo, :] #YrA[:,jo] = Ytemp C[jo, :] = Ctemp.copy() S[jo, :] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count + 1])) + ' out of total ' + str( nr) + ' temporal components updated' ii = nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:, ii] + np.atleast_2d(Cin[ii, :]).T, 0) YrA -= (cc - np.atleast_2d(Cin[ii, :]).T) * AA[ii, :] C[ii, :] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C, 'fro') / scipy.linalg.norm( C, 'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:, :] C = C[:nr, :] YrA = np.array(YrA[:, :nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C, f, S, bl, c1, sn, g, YrA #,P_
def update_spatial_components_parallel(Y,C,f,A_in,sn=None, d1=None,d2=None,min_size=3,max_size=8, dist=3, method = 'ellipse', expandCore = None,backend='single_thread',n_processes=4,n_pixels_per_process=128, memory_efficient=False): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'multiprocessing', 'threading', 'ipyparallel', 'single_thread' single_thread:no parallelization. It shoul dbe used in most cases. multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations. ipyparallel: starts an ipython cluster and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread memory_efficient [bool] whether or not to reduce memory usage (at the expense of increased computational time) method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore=iterate_structure(generate_binary_structure(2,1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') Y=np.atleast_2d(Y) if Y.shape[1]==1: raise Exception('Dimension of Matrix Y must be pixels x time') C=np.atleast_2d(C) if C.shape[1]==1: raise Exception('Dimension of Matrix C must be neurons x time') f=np.atleast_2d(f) if f.shape[1]==1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape)==1: A_in=np.atleast_2d(A_in).T if A_in.shape[0]==1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C,f)) # create matrix that include background components [d,T] = np.shape(Y) if n_pixels_per_process > d: raise Exception('The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.') nr,_ = np.shape(C) # number of neurons IND = determine_search_location(A_in,d1,d2,method = method, min_size = min_size, max_size = max_size, dist = dist, expandCore = expandCore) print " find search location" ind2_ =[ np.hstack( (np.where(iid_)[0] , nr+np.arange(f.shape[0])) ) if np.size(np.where(iid_)[0])>0 else [] for iid_ in IND] folder = tempfile.mkdtemp() if backend == 'multiprocessing' or backend == 'threading': A_name = os.path.join(folder, 'A_temp') # Pre-allocate a writeable shared memory map as a container for the # results of the parallel computation print "Create Matrix for dumping data from matrix A and C for parallel computation...." A_ = np.memmap(A_name, dtype=A_in.dtype,shape=(d,nr+np.size(f,0)), mode='w+') pixels_name = os.path.join(folder, 'pixels') C_name = os.path.join(folder, 'C_temp') # Dump the input data to disk to free the memory dump(Y, pixels_name) dump(Cf, C_name) # use mempry mapped versions of C and Y Y = load(pixels_name, mmap_mode='r') Cf = load(C_name, mmap_mode='r') pixel_groups=[range(i,i+n_pixels_per_process) for i in range(0,Y.shape[0]-n_pixels_per_process+1,n_pixels_per_process)] # Fork the worker processes to perform computation concurrently print "start parallel pool..." sys.stdout.flush() Parallel(n_jobs=n_processes, backend=backend,verbose=100,max_nbytes=None)(delayed(lars_regression_noise_parallel)(Y,Cf,A_,sn,i,ind2_) for i in pixel_groups) # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels pixels_remaining= Y.shape[0] % n_pixels_per_process if pixels_remaining>0: print "Running deconvolution for remaining pixels:" + str(pixels_remaining) lars_regression_noise_parallel(Y,Cf,A_,sn,range(Y.shape[0]-pixels_remaining,Y.shape[0]),ind2_,positive=1) A_=np.array(A_) elif backend == 'ipyparallel': # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name,Cf) if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name=Y.filename else: # if not create a memory mapped version (necessary for parallelization) Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name,Y) Y=np.load(Y_name,mmap_mode='r') # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread pixel_groups=[(Y_name,C_name,sn,ind2_,range(i,i+n_pixels_per_process)) for i in range(0,d1*d2-n_pixels_per_process+1,n_pixels_per_process)] A_ = np.zeros((d,nr+np.size(f,0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) for chunk in parallel_result: for pars in chunk: px,idxs_,a=pars A_[px,idxs_]=a #clean up dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend=='single_thread': Cf_=[Cf[idx_,:] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d,nr)),np.zeros((d,np.size(f,0))))) for c,y,s,id2_,px in zip(Cf_,Y,sn,ind2_,range(d)): if px%1000==0: print px if np.size(c)>0: _, _, a, _ , _= lars_regression_noise(y, np.array(c.T), 1, sn[px]**2*T) if np.isscalar(a): A_[px,id2_]=a else: A_[px,id2_]=a.T else: raise Exception('Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel') #% print 'Updated Spatial Components' A_=threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_,axis=0)==0); # remove empty components if np.size(ff)>0: ff = ff[0] warn('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_,list(ff),1) C = np.delete(C,list(ff),0) A_ = A_[:,:nr] A_=coo_matrix(A_) if memory_efficient: print "Using memory efficient computation (slow but memory preserving)" A__=coo_matrix(A_,dtype=np.float32) C__=coo_matrix(C[:nr,:],dtype=np.float32) Y_res_name = os.path.join(folder, 'Y_res_temp.npy') Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='w+', shape=Y.shape) Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='r+', shape=Y.shape) print "computing residuals" Y_res[:] = -A__.dot(C__).todense()[:] Y_res[:]+=Y else: print "Using memory trade-off computation (good use of memory if input is memmaped)" Y_res = Y - A_.dot(coo_matrix(C[:nr,:])) print "Computing A_bas" A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update baseline based on residual Y_res[:]=1 b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: #clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_,b,C
def update_temporal_components(Y, A, b, Cin, fin, bl = None, c1 = None, g = None, sn = None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread',memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) 'spgl1': using the spgl1 package 'debug': using spgl1 without spike non-negativity constraints (just for debugging purposes) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d,T = np.shape(Y); nr = np.shape(A)[-1] if bl is None: bl=np.repeat(None,nr) if c1 is None: c1=np.repeat(None,nr) if g is None: g=np.repeat(None,nr) if sn is None: sn=np.repeat(None,nr) A = scipy.sparse.hstack((A,coo_matrix(b))) S = np.zeros(np.shape(Cin)); Cin = np.vstack((Cin,fin)); C = Cin; nA = np.squeeze(np.array(np.sum(np.square(A.todense()),axis=0))) #import pdb #pdb.set_trace() Cin=coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T)*spdiags(1./nA,0,nr+1,nr+1) AA = ((A.T.dot(A))*spdiags(1./nA,0,nr+1,nr+1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception("the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview=c[:n_processes] # use the number of processes Cin=np.array(Cin.todense()) for iter in range(ITER): O,lo = update_order(A.tocsc()[:,:nr]) P_=[]; for count,jo_ in enumerate(O): jo=np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:,jo.flatten()] + Cin[jo,:].T Ctemp = np.zeros((np.size(jo),T)) Stemp = np.zeros((np.size(jo),T)) btemp = np.zeros((np.size(jo),1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo),kwargs['p'])); nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel': # if debug: results = dview.map_async(constrained_foopsi_parallel,args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel,args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel,args_in) else: raise Exception('Backend not defined. Use either single_thread or ipyparallel') for chunk in results: pars=dict() C_,Sp_,Ytemp_,cb_,c1_,sn_,gn_,jj_=chunk Ctemp[jj_,:] = C_[None,:] Stemp[jj_,:] = Sp_ Ytemp[:,jj_] = Ytemp_[:,None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_,:] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_,np.abs(gtemp[jj,:])>0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp-C[jo,:]).T*AA[jo,:] #YrA[:,jo] = Ytemp C[jo,:] = Ctemp.copy() S[jo,:] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count+1])) + ' out of total ' + str(nr) + ' temporal components updated' ii=nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T,0) YrA -= (cc-np.atleast_2d(Cin[ii,:]).T)*AA[ii,:] C[ii,:] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C,'fro')/scipy.linalg.norm(C,'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:,:] C = C[:nr,:] YrA = np.array(YrA[:,:nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel': c.close() return C,f,S,bl,c1,sn,g,YrA #,P_
def update_spatial_components_parallel(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128, memory_efficient=False): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'multiprocessing', 'threading', 'ipyparallel', 'single_thread' single_thread:no parallelization. It shoul dbe used in most cases. multiprocessing or threading: use the corresponding python threading package. It has known issues on mac OS. Not to be used in most situations. ipyparallel: starts an ipython cluster and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread memory_efficient [bool] whether or not to reduce memory usage (at the expense of increased computational time) method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.' ) nr, _ = np.shape(C) # number of neurons IND = determine_search_location(A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [ np.hstack( (np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND ] folder = tempfile.mkdtemp() if backend == 'multiprocessing' or backend == 'threading': A_name = os.path.join(folder, 'A_temp') # Pre-allocate a writeable shared memory map as a container for the # results of the parallel computation print "Create Matrix for dumping data from matrix A and C for parallel computation...." A_ = np.memmap(A_name, dtype=A_in.dtype, shape=(d, nr + np.size(f, 0)), mode='w+') pixels_name = os.path.join(folder, 'pixels') C_name = os.path.join(folder, 'C_temp') # Dump the input data to disk to free the memory dump(Y, pixels_name) dump(Cf, C_name) # use mempry mapped versions of C and Y Y = load(pixels_name, mmap_mode='r') Cf = load(C_name, mmap_mode='r') pixel_groups = [ range(i, i + n_pixels_per_process) for i in range(0, Y.shape[0] - n_pixels_per_process + 1, n_pixels_per_process) ] # Fork the worker processes to perform computation concurrently print "start parallel pool..." sys.stdout.flush() Parallel(n_jobs=n_processes, backend=backend, verbose=100, max_nbytes=None)(delayed(lars_regression_noise_parallel)( Y, Cf, A_, sn, i, ind2_) for i in pixel_groups) # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels pixels_remaining = Y.shape[0] % n_pixels_per_process if pixels_remaining > 0: print "Running deconvolution for remaining pixels:" + str( pixels_remaining) lars_regression_noise_parallel(Y, Cf, A_, sn, range(Y.shape[0] - pixels_remaining, Y.shape[0]), ind2_, positive=1) A_ = np.array(A_) elif backend == 'ipyparallel': # use the ipyparallel package, you need to start a cluster server (ipcluster command) in order to use it C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type( Y ) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename else: # if not create a memory mapped version (necessary for parallelization) Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y = np.load(Y_name, mmap_mode='r') # create arguments to be passed to the function. Here we are grouping bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a #clean up dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel' ) #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] warn('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) if memory_efficient: print "Using memory efficient computation (slow but memory preserving)" A__ = coo_matrix(A_, dtype=np.float32) C__ = coo_matrix(C[:nr, :], dtype=np.float32) Y_res_name = os.path.join(folder, 'Y_res_temp.npy') Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='w+', shape=Y.shape) Y_res = np.memmap(Y_res_name, dtype=np.float32, mode='r+', shape=Y.shape) print "computing residuals" Y_res[:] = -A__.dot(C__).todense()[:] Y_res[:] += Y else: print "Using memory trade-off computation (good use of memory if input is memmaped)" Y_res = Y - A_.dot(coo_matrix(C[:nr, :])) print "Computing A_bas" A_bas = np.fmax(np.dot(Y_res, f.T) / scipy.linalg.norm(f)**2, 0) # update baseline based on residual Y_res[:] = 1 b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: #clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
def update_spatial_components(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'ipyparallel', 'single_thread' single_thread:no parallelization. It can be used with small datasets. ipyparallel: uses ipython clusters and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') if Y.ndim < 2 and not type(Y) is str: Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.' ) nr, _ = np.shape(C) # number of neurons IND = determine_search_location(A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [ np.hstack( (np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND ] folder = tempfile.mkdtemp() # use the ipyparallel package, you need to start a cluster server # (ipcluster command) in order to use it if backend == 'ipyparallel': C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type( Y ) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename # if not create a memory mapped version (necessary for parallelization) elif type(Y) is str: Y_name = Y else: Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y, _, _, _ = load_memmap(Y_name) # create arguments to be passed to the function. Here we are grouping # bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) # clean up for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel' ) #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] print('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) # import pdb # pdb.set_trace() Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T)) print "Computing A_bas" A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2, 0) # update baseline based on residual # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update # baseline based on residual b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: # clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
def stop_server(ipcluster='ipcluster', pdir=None, profile=None, dview=None): """ programmatically stops the ipyparallel server Parameters: ---------- ipcluster : str ipcluster binary file name; requires 4 path separators on Windows Default: "ipcluster" """ if 'multiprocessing' in str(type(dview)): dview.terminate() else: sys.stdout.write("Stopping cluster...\n") sys.stdout.flush() try: pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] is_slurm = True except: print('NOT SLURM') is_slurm = False if is_slurm: if pdir is None and profile is None: pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Shutting down %d engines.' % (ne))) c.close() c.shutdown(hub=True) shutil.rmtree('profile_' + str(profile)) try: shutil.rmtree('./log/') except: print('creating log folder') files = glob.glob('*.log') os.mkdir('./log') for fl in files: shutil.move(fl, './log/') else: if ipcluster == "ipcluster": proc = subprocess.Popen("ipcluster stop", shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt')) else: proc = subprocess.Popen(shlex.split(ipcluster + " stop"), shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt')) line_out = proc.stderr.readline() if b'CRITICAL' in line_out: sys.stdout.write("No cluster to stop...") sys.stdout.flush() elif b'Stopping' in line_out: st = time.time() sys.stdout.write('Waiting for cluster to stop...') while (time.time() - st) < 4: sys.stdout.write('.') sys.stdout.flush() time.sleep(1) else: print(line_out) print( '**** Unrecognized Syntax in ipcluster output, waiting for server to stop anyways ****' ) proc.stderr.close() sys.stdout.write(" done\n")
def slurm_map(fnc, iterables, resource_spec, env='root', job_name=None, output_path=None, n_retries=5, patience=30): """ Args: fnc iterables resource_spec env: virtual env to launch engines in job_name: name of job to use. Derived from fnc name if not specified output_path: location to direct output to. If unspecified output is sent to a file (based on job name and timestamp) in ~/logs/slurm n_retries: number of times to retry connecting to client if less than the requested number of workers are available. patience: seconds to wait after failed attempt to connect to client """ resource_spec = process_resource_spec(resource_spec) if not profile_installed(PROFILE_NAME): print("No profile found for {}, installing".format(PROFILE_NAME)) install_profile(PROFILE_NAME) submission_time = time.strftime("%Y%m%d-%H%M%S") cluster_id = '{}_{}'.format(fnc.__name__, submission_time) print("Using cluster id: {}".format(cluster_id)) # break down by line: # run in bash # activate the specified environment # launch controller with desired settings controller_cmd_template = ("exec bash -c '" "source activate {env};" " ipcontroller --profile={profile} --sqlitedb --location={hostname} --ip=\'*\' --cluster-id={cluster_id}'") controller_cmd = controller_cmd_template.format( env=env, profile=PROFILE_NAME, hostname=socket.gethostname(), cluster_id=cluster_id ) print("Starting controller with: {} \n".format(controller_cmd)) # runs in the background if executed this way subprocess.Popen(controller_cmd, shell=True) print("Sleeping for 10") time.sleep(10) engine_cmd_template_path = package_path() + '/templates/slurm_template.sh' with open(engine_cmd_template_path, 'r') as engine_cmd_template_file: engine_command_template = engine_cmd_template_file.read() # prepare engine commands if job_name is None: job_name = fnc.__name__ + '_slurm_map' else: assert isinstance(job_name, str) if output_path is None: output_dir = os.path.expanduser('~/logs/slurm') output_path = '{}/{}_{}'.format (output_dir, job_name, submission_time) if not os.path.exists(output_dir): os.makedirs(output_path) else: assert isinstance(output_path, str) assert os.path.exists(output_path) # find path to engine based on specified environment if env == 'root': engine_path = 'bin/ipengine' else: engine_path = 'envs/{}/bin/ipengine'.format(env) full_engine_path = os.path.expanduser('~/anaconda3/{}'.format(engine_path)) assert os.path.exists(full_engine_path) engine_command = engine_command_template.format( job_name=job_name, output_path=output_path, n_tasks=resource_spec['max_workers'], mem_mb=resource_spec['worker_mem_mb'], n_cpus=resource_spec['worker_n_cpus'], n_gpus=resource_spec['worker_n_gpus'], engine_path=engine_path, profile=PROFILE_NAME, controller_hostname=socket.gethostname(), cluster_id=cluster_id, comment=job_name ) sbatch_file_path = '/tmp/slurm_map_sbatch_{}.sh'.format(cluster_id) with open(sbatch_file_path, 'w') as sbatch_file: sbatch_file.write(engine_command) # wrap command to execute in bash sbatch_command = "exec bash -c 'sbatch {}'".format(sbatch_file_path) print("Starting engines") # runs in the background if executed this way subprocess.Popen(sbatch_command, shell=True) print("Sleeping for {}".format(patience)) time.sleep(patience) # TODO: shut down unused engines connected = False for attempt_idx in range(n_retries): print("Attempt {} to connect to cluster".format(attempt_idx)) try: client = Client(profile=PROFILE_NAME, cluster_id=cluster_id) if resource_spec['min_workers'] <= len(client.ids) <= resource_spec['max_workers']: connected = True print('Succesfully connected to cluster with {} engines out of {} requested'.format( len(client.ids), resource_spec['max_workers'])) if len(client.ids) < resource_spec['max_workers']: warn("{} slurm jobs submitted but only {} are being used.".format( resource_spec['max_workers'], len(client.ids))) break else: print("{} available engines less than minimum requested of {}".format( len(client.ids), resource_spec['min_workers'])) print("Retrying after {}".format(patience)) client.close() time.sleep(patience) except OSError as os_err: print("Caught OSError while attempting to connect to {}: {}.".format(PROFILE_NAME, os_err)) print("Retrying after {}".format(patience)) time.sleep(patience) except TimeoutError as timeout_err: print("Caught TimeoutError while attempting to connect to {}: {}".format(PROFILE_NAME, timeout_err)) print("Retrying after {}".format(patience)) time.sleep(patience) if not connected: raise TimeoutError("Failed to connect to client after {} retries".format(n_retries)) # run tasks print("Submitting tasks") start_time = time.time() client[:].use_cloudpickle() lb_view = client.load_balanced_view() result = lb_view.map(fnc, iterables, block=True) print("Tasks finished after {} seconds".format(time.time() - start_time)) print("Shutting down cluster") client.shutdown(hub=True) print("Relinquishing slurm nodes") shutdown_cmd = 'scancel -n={job_name}'.format(job_name=job_name) shutdown_cmd = "exec bash -c '{}'".format(shutdown_cmd) # runs in the background if executed this way subprocess.Popen(shutdown_cmd, shell=True) print("Removing sbatch script") os.remove(sbatch_file_path) return result
def stop_server(ipcluster='ipcluster', pdir=None, profile=None, dview=None): """ programmatically stops the ipyparallel server Parameters: ---------- ipcluster : str ipcluster binary file name; requires 4 path separators on Windows Default: "ipcluster" """ if 'multiprocessing' in str(type(dview)): dview.terminate() else: logger.info("Stopping cluster...") try: pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] is_slurm = True except: logger.debug('stop_server: not a slurm cluster') is_slurm = False if is_slurm: if pdir is None and profile is None: pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Shutting down %d engines.' % (ne))) c.close() c.shutdown(hub=True) shutil.rmtree('profile_' + str(profile)) try: shutil.rmtree('./log/') except: print('creating log folder') files = glob.glob('*.log') os.mkdir('./log') for fl in files: shutil.move(fl, './log/') else: if ipcluster == "ipcluster": proc = subprocess.Popen( "ipcluster stop", shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt')) else: proc = subprocess.Popen(shlex.split(ipcluster + " stop"), shell=True, stderr=subprocess.PIPE, close_fds=(os.name != 'nt')) line_out = proc.stderr.readline() if b'CRITICAL' in line_out: logger.info("No cluster to stop...") elif b'Stopping' in line_out: st = time.time() logger.debug('Waiting for cluster to stop...') while (time.time() - st) < 4: sys.stdout.write('.') sys.stdout.flush() time.sleep(1) else: print(line_out) print( '**** Unrecognized syntax in ipcluster output, waiting for server to stop anyways ****') proc.stderr.close() logger.info("stop_cluster(): done")
def update_spatial_components(Y, C, f, A_in, sn=None, d1=None, d2=None, min_size=3, max_size=8, dist=3, method='ellipse', expandCore=None, backend='single_thread', n_processes=4, n_pixels_per_process=128 ): """update spatial footprints and background through Basis Pursuit Denoising for each pixel i solve the problem [A(i,:),b(i)] = argmin sum(A(i,:)) subject to || Y(i,:) - A(i,:)*C + b(i)*f || <= sn(i)*sqrt(T); for each pixel the search is limited to a few spatial components Parameters ---------- Y: np.ndarray (2D) movie, raw data in 2D (pixels x time). C: np.ndarray calcium activity of each neuron. f: np.ndarray temporal profile of background activity. Ain: np.ndarray spatial profile of background activity. d1: [optional] int x movie dimension d2: [optional] int y movie dimension min_size: [optional] int max_size: [optional] int dist: [optional] int sn: [optional] float noise associated with each pixel if known n_processes: [optional] int number of threads to use when the backend is multiprocessing,threading, or ipyparallel backend [optional] str 'ipyparallel', 'single_thread' single_thread:no parallelization. It can be used with small datasets. ipyparallel: uses ipython clusters and then send jobs to each of them n_pixels_per_process: [optional] int number of pixels to be processed by each thread method: [optional] string method used to expand the search for pixels 'ellipse' or 'dilate' expandCore: [optional] scipy.ndimage.morphology if method is dilate this represents the kernel used for expansion Returns -------- A: np.ndarray new estimate of spatial footprints b: np.ndarray new estimate of spatial background C: np.ndarray temporal components (updated only when spatial components are completely removed) """ if expandCore is None: expandCore = iterate_structure(generate_binary_structure(2, 1), 2).astype(int) if d1 is None or d2 is None: raise Exception('You need to define the input dimensions') if Y.ndim<2 and not type(Y) is str: Y = np.atleast_2d(Y) if Y.shape[1] == 1: raise Exception('Dimension of Matrix Y must be pixels x time') C = np.atleast_2d(C) if C.shape[1] == 1: raise Exception('Dimension of Matrix C must be neurons x time') f = np.atleast_2d(f) if f.shape[1] == 1: raise Exception('Dimension of Matrix f must be neurons x time ') if len(A_in.shape) == 1: A_in = np.atleast_2d(A_in).T if A_in.shape[0] == 1: raise Exception('Dimension of Matrix A must be pixels x neurons ') start_time = time.time() Cf = np.vstack((C, f)) # create matrix that include background components [d, T] = np.shape(Y) if n_pixels_per_process > d: raise Exception( 'The number of pixels per process (n_pixels_per_process) is larger than the total number of pixels!! Decrease suitably.') nr, _ = np.shape(C) # number of neurons IND = determine_search_location( A_in, d1, d2, method=method, min_size=min_size, max_size=max_size, dist=dist, expandCore=expandCore) print " find search location" ind2_ = [np.hstack((np.where(iid_)[0], nr + np.arange(f.shape[0]))) if np.size(np.where(iid_)[0]) > 0 else [] for iid_ in IND] folder = tempfile.mkdtemp() # use the ipyparallel package, you need to start a cluster server # (ipcluster command) in order to use it if backend == 'ipyparallel': C_name = os.path.join(folder, 'C_temp.npy') np.save(C_name, Cf) if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename # if not create a memory mapped version (necessary for parallelization) elif type(Y) is str: Y_name = Y else: Y_name = os.path.join(folder, 'Y_temp.npy') np.save(Y_name, Y) Y,_,_,_=load_memmap(Y_name) # create arguments to be passed to the function. Here we are grouping # bunch of pixels to be processed by each thread pixel_groups = [(Y_name, C_name, sn, ind2_, range(i, i + n_pixels_per_process)) for i in range(0, d1 * d2 - n_pixels_per_process + 1, n_pixels_per_process)] A_ = np.zeros((d, nr + np.size(f, 0))) try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value") dview = c[:n_processes] # use the number of processes #serial_result = map(lars_regression_noise_ipyparallel, pixel_groups) parallel_result = dview.map_sync(lars_regression_noise_ipyparallel, pixel_groups) # clean up for chunk in parallel_result: for pars in chunk: px, idxs_, a = pars A_[px, idxs_] = a dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend == 'single_thread': Cf_ = [Cf[idx_, :] for idx_ in ind2_] #% LARS regression A_ = np.hstack((np.zeros((d, nr)), np.zeros((d, np.size(f, 0))))) for c, y, s, id2_, px in zip(Cf_, Y, sn, ind2_, range(d)): if px % 1000 == 0: print px if np.size(c) > 0: _, _, a, _, _ = lars_regression_noise(y, np.array(c.T), 1, sn[px]**2 * T) if np.isscalar(a): A_[px, id2_] = a else: A_[px, id2_] = a.T else: raise Exception( 'Unknown backend specified: use single_thread, threading, multiprocessing or ipyparallel') #% print 'Updated Spatial Components' A_ = threshold_components(A_, d1, d2) print "threshold" ff = np.where(np.sum(A_, axis=0) == 0) # remove empty components if np.size(ff) > 0: ff = ff[0] print('eliminating empty components!!') nr = nr - len(ff) A_ = np.delete(A_, list(ff), 1) C = np.delete(C, list(ff), 0) A_ = A_[:, :nr] A_ = coo_matrix(A_) # import pdb # pdb.set_trace() Y_resf = np.dot(Y, f.T) - A_.dot(coo_matrix(C[:nr, :]).dot(f.T)) print "Computing A_bas" A_bas = np.fmax(Y_resf / scipy.linalg.norm(f)**2, 0) # update baseline based on residual # A_bas = np.fmax(np.dot(Y_res,f.T)/scipy.linalg.norm(f)**2,0) # update # baseline based on residual b = A_bas print("--- %s seconds ---" % (time.time() - start_time)) try: # clean up # remove temporary file created print "Remove temporary file created" shutil.rmtree(folder) except: raise Exception("Failed to delete: " + folder) return A_, b, C
import numpy as np import ca_source_extraction as cse from scipy.sparse import coo_matrix import scipy import pylab as pl import calblitz as cb import glob with np.load('results_analysis.npz') as ld: locals().update(ld) fname_new = glob.glob('Yr_*.mmap')[0] Yr, (d1, d2), T = cse.utilities.load_memmap(fname_new) d, T = np.shape(Yr) Y = np.reshape(Yr, (d1, d2, T), order='F') # 3D version of the movie A = scipy.sparse.coo_matrix(A) dims = (d1, d2) #%% STOP CLUSTER pl.close() if not single_thread: c.close() cse.utilities.stop_server() log_files = glob.glob('Yr*_LOG_*') for log_file in log_files: os.remove(log_file) #%% cse.utilities.stop_server(is_slurm=(backend == 'SLURM'))
class ClusterLab(epyc.Lab): """A :class:`Lab` running on an ``ipyparallel`` compute cluster. Experiments are submitted to engines in the cluster for execution in parallel, with the experiments being performed asynchronously to allow for disconnection and subsequent retrieval of results. Combined with a persistent :class:`LabNotebook`, this allows for fully decoupled access to an on-going computational experiment with piecewise retrieval of results. This class requires a cluster to already be set up and running, configured for persistent access, with access to the necessary code and libraries, and with appropriate security information available to the client. """ # Tuning parameters WaitingTime = 30 #: Waiting time for checking for job completion. Lower values increase network traffic. def __init__( self, notebook = None, url_file = None, profile = None, profile_dir = None, ipython_dir = None, context = None, debug = False, sshserver = None, sshkey = None, password = None, paramiko = None, timeout = 10, cluster_id = None, use_dill = False, **extra_args ): """Create an empty lab attached to the given cluster. Most of the arguments are as expected by the ``ipyparallel.Client`` class, and are used to create the underlying connection to the cluster. The connection is opened immediately, meaning the cluster must be up and accessible when creating a lab to use it. :param notebook: the notebook used to results (defaults to an empty :class:`LabNotebook`) :param url_file: file containing connection information for accessing cluster :param profile: name of the IPython profile to use :param profile_dir: directory containing the profile's connection information :param ipython_dir: directory containing profile directories :param context: ZMQ context :param debug: whether to issue debugging information (defaults to False) :param sshserver: username and machine for ssh connections :param sshkey: file containing ssh key :param password: ssh password :param paramiko: True to use paramiko for ssh (defaults to False) :param timeout: timeout in seconds for ssh connection (defaults to 10s) :param cluster_id: string added to runtime files to prevent collisions :param use_dill: whether to use Dill as pickler (defaults to False)""" super(epyc.ClusterLab, self).__init__(notebook) # record all the connection arguments for later self._arguments = dict(url_file = url_file, profile = profile, profile_dir = profile_dir, ipython_dir = ipython_dir, context = context, debug = debug, sshserver = sshserver, sshkey = sshkey, password = password, paramiko = paramiko, timeout = timeout, cluster_id = cluster_id, **extra_args) self._client = None # connect to the cluster self.open() # use Dill if requested if use_dill: self.use_dill() def open( self ): """Connect to the cluster.""" if self._client is None: self._client = Client(**self._arguments) def close( self ): """Close down the connection to the cluster.""" if self._client is not None: self._client.close() self._client = None def numberOfEngines( self ): """Return the number of engines available to this lab. :returns: the number of engines""" return len(self.engines()) def engines( self ): """Return a list of the available engines. :returns: a list of engines""" self.open() return self._client[:] def use_dill( self ): """Make the cluster use Dill as pickler for transferring results. This isn't generally needed, but is sometimes useful for particularly complex experiments such as those involving closures. (Or, to put it another way, if you find yourself tempted to use this method, consider re-structuring your experiment code.)""" self.open() with self.sync_imports(quiet = True): import dill self._client.direct_view().use_dill() def sync_imports( self, quiet = False ): """Return a context manager to control imports onto all the engines in the underlying cluster. This method is used within a ``with`` statement. Any imports should be done with no experiments running, otherwise the method will block until the cluster is quiet. Generally imports will be one of the first things done when connecting to a cluster. (But be careful not to accidentally try to re-import if re-connecting to a running cluster.) :param quiet: if True, suppresses messages (defaults to False) :returns: a context manager""" self.open() return self._client[:].sync_imports(quiet = quiet) def _mixup( self, ps ): """Private method to mix up a list of values in-place using a Fisher-Yates shuffle (see https://en.wikipedia.org/wiki/Fisher-Yates_shuffle). :param ps: the array :returns: the array, shuffled in-place""" for i in range(len(ps) - 1, 0, -1): j = int(numpy.random.random() * i) temp = ps[i] ps[i] = ps[j] ps[j] = temp return ps def runExperiment( self, e ): """Run the experiment across the parameter space in parallel using all the engines in the cluster. This method returns immediately. The experiments are run asynchronously, with the points in the parameter space being explored randomly so that intermediate retrievals of results are more representative of the overall result. Put another way, for a lot of experiments the results available will converge towards a final answer, so we can plot them and see the answer emerge. :param e: the experiment""" # create the parameter space space = self.parameterSpace() # only proceed if there's work to do if len(space) > 0: nb = self.notebook() # randomise the order of the parameter space so that we evaluate across # the space as we go along to try to make intermediate (incomplete) result # sets more representative of the overall result set ps = self._mixup(space) try: # connect to the cluster self.open() # submit an experiment at each point in the parameter space to the cluster view = self._client.load_balanced_view() jobs = [] for p in ps: jobs.extend((view.apply_async((lambda p: e.set(p).run()), p)).msg_ids) # there seems to be a race condition in submitting jobs, # whereby jobs get dropped if they're submitted too quickly time.sleep(0.01) # record the mesage ids of all the jobs as submitted but not yet completed psjs = zip(ps, jobs) for (p, j) in psjs: nb.addPendingResult(p, j) finally: # commit our pending results in the notebook nb.commit() self.close() def updateResults( self ): """Update our results within any pending results that have completed since we last retrieved results from the cluster. :returns: the number of pending results completed at this call""" # we do all the tests for pending results against the notebook directly, # as the corresponding methods on self call this method themselves nb = self.notebook() # look for pending results if we're waiting for any n = 0 if nb.numberOfPendingResults() > 0: # we have results to get self.open() for j in set(nb.pendingResults()): # query the status of a job status = self._client.result_status(j, status_only = False) # add all completed jobs to the notebook if j in status['completed']: r = status[j] # update the result in the notebook, cancelling # the pending result as well # values come back from Client.result_status() in # varying degrees of list-nesting, which LabNotebook.addResult() # handles itself nb.addResult(r, j) # commit changes to the notebook nb.commit() # purge the completed job from the cluster self._client.purge_hub_results(j) # record that we retrieved the results for the given job n = n + 1 return n def numberOfResults( self ): """Return the number of results we have available at the moment. :returns: the number of results""" self.updateResults() return self.notebook().numberOfResults() def numberOfPendingResults( self ): """Return the number of resultswe are waiting for. :returns: the number of pending results""" self.updateResults() return self.notebook().numberOfPendingResults() def _availableResultsFraction( self ): """Private method to return the fraction of results available, as a real number between 0 and 1. This does not update the results fetched from the cluster. :returns: the fraction of available results""" tr = self.notebook().numberOfResults() + self.notebook().numberOfPendingResults() if tr == 0: return 0 else: return (self.notebook().numberOfResults() + 0.0) / tr def readyFraction( self ): """Test what fraction of results are available. This will change over time as the results come in. :returns: the fraction from 0 to 1""" self.updateResults() return self._availableResultsFraction() def ready( self ): """Test whether all the results are available. This will change over time as the results come in. :returns: True if all the results are available""" return (self.readyFraction() == 1) def wait( self, timeout = -1 ): """Wait for all pending results to be finished. If timeout is set, return after this many seconds regardless. :param timeout: timeout period in seconds (defaults to forever) :returns: True if all the results completed""" # we can't use ipyparallel.Client.wait() for this, because that # method only works for cases where the Client object is the one that # submitted the jobs to the cluster hub -- and therefore has the # necessary data structures to perform synchronisation. This isn't the # case for us, as one of the main goals of epyc is to support disconnected # operation, which implies a different Client object retrieving results # than the one that submitted the jobs in the first place. This is # unfortunate, but understandable given the typical use cases for # Client objects. # # Instead. we have to code around a little busily. The ClusterLab.WaitingTime # global sets the latency for waiting, and we repeatedly wait for this amount # of time before updating the results. The latency value essentially controls # how busy this process is: given that most simulations are expected to # be long, a latency in the tens of seconds feels about right as a default if self.numberOfPendingResults() > 0: # we've got pending results, wait for them timeWaited = 0 while (timeout < 0) or (timeWaited < timeout): if self.numberOfPendingResults() == 0: # no pending jobs left, we're complete return True else: # not done yet, calculate the waiting period if timeout == -1: # wait for the default waiting period dt = self.WaitingTime else: # wait for the default waiting period or until the end of the timeout. # whichever comes first if (timeout - timeWaited) < self.WaitingTime: dt = timeout - timeWaited else: dt = self.WaitingTime # sleep for a while time.sleep(dt) timeWaited = timeWaited + dt # if we get here, the timeout expired, so do a final check # and then exit return (self.numberOfPendingResults() == 0) else: # no results, so we got them all return True def pendingResults( self ): """Return the list of job iods for any pending results. :returns: a list of job ids""" return self.notebook().pendingResults() def pendingResultsFor( self, params ): """Return a list of job ids for any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters :returns: a list of job ids""" return self.notebook().pendingResultsFor(params) def _abortJobs( self, js ): """Private method to abort a set of jobs. :param js: the job ids to be aborted""" self.open() self._client.abort(jobs = js) self.close() def cancelPendingResultsFor( self, params ): """Cancel any results pending for experiments at the given point in the parameter space. :param params: the experimental parameters""" # grab the result job ids jobs = self.pendingResultsFor(params) if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelPendingResultsFor(params) def cancelAllPendingResults( self ): """Cancel all pending results.""" # grab all the pending job ids jobs = self.pendingResults() if len(jobs) > 0: # abort in the cluster self._abortJobs(jobs) # cancel in the notebook self.notebook().cancelAllPendingResults()
class AdvancedIppController(ParallelController): ''' A wrapper class to deal with monitoring and logging parallel jobs activity. Note that functions should -at least for now- accept only one parameter, (which could be a structure anyway). ''' NOT_STARTED = None FAILED = -1 RUNNING = 0 SUCCESS = 1 def __init__(self, name='ParallelController', serial_fun=None, args=None, const_vars=None, chunksize=1, logfile=None, loglevel=None, poll_interval=60, max_batch=None, max_exec_time=None, dbfile=None, fresh_run=False, retry=3, commit_timeout=30): self.name = name self._serial_fun = serial_fun # serial function to be mapped self._args = args # arguments to be mapped self.const_vars = const_vars # consts for the function self.chunksize = chunksize self.logfile = logfile self.loglevel = loglevel self.poll_interval = poll_interval # poll for status every # poll_interval seconds self.results = [] self._status = [] self._to_process = [] self._ok = False # flag for successfull completion self.chunksize self._client = None # ipyparallel client self._view = None # ipyparallel load balanced view self._logger = None # logger self._ar = None # async results #self._fwrapper = None # function wrapper self._max_batch = max_batch self.max_exec_time = max_exec_time self.retry = retry self._queue = multiprocessing.Queue() self.commit_timeout = commit_timeout if self._args is None: self._args = list() if self.const_vars is None: self.const_vars = dict() if dbfile is None: self.dbfile = '%s.db' % name else: self.dbfile = dbfile if fresh_run: if os.path.isfile(self.dbfile): os.remove(self.dbfile) def get_status(self): return self._status def set_function(self, value): self._serial_fun = value def get_function(self): return self._serial_fun def set_const(self, name, val): self.const_vars[name] = val def set_args(self, args): self._args = args self._status = [AdvancedIppController.NOT_STARTED] * len(args) self.results = [None] * len(self._args) def get_args(self): return self._args def _setup_logger(self, batch_no=None): # setup logger if batch_no is None: self._logger = logging.getLogger(self.name) else: self._logger = logging.getLogger(self.name + '/batch%d' % batch_no) # keep only stream handlers fnames = [fh.baseFilename for fh in self._logger.handlers] if self.logfile is not None and self.logfile not in fnames: fh = logging.FileHandler(self.logfile) fh.setFormatter(default_log_formatter) self._logger.addHandler(fh) self._logger.setLevel(self.loglevel) # prepare the remote function #self._fwrapper = FunctionWrapper(self._serial_fun, self.const_vars) def _setup_ipp(self): # get client and view instances, and use cloudpickle from ipyparallel import Client self._client = Client(context=zmq.Context()) self._ids = self._client.ids self._dview = self._client[self._ids] self._dview.use_cloudpickle() self._view = self._client.load_balanced_view(targets=self._ids) def _cleanup(self): if self._client: self._client.close() def _handle_errors(self): failed = [ i for i, x in enumerate(self._status) if x == AdvancedIppController.FAILED ] n_failed = len(failed) self._logger.error('%d tasks have failed.', n_failed) print('%s: %d tasks have failed.' % (self.name, n_failed), file=sys.stderr) for cnt, i in enumerate(failed): if cnt > 2: self._logger.error('... %d more errors ...', n_failed - 3) print('... %d more errors ...' % (n_failed - 3), file=sys.stderr) break self._logger.error('JOB# %d:\n %s \n' + '-' * 40, i, self.results[i]) print('JOB# %d:\n %s' % (i, self.results[i]), file=sys.stderr) print('-' * 40, file=sys.stderr) return n_failed def _split_batches(self): if self._max_batch is None: return [(0, len(self._to_process))] else: num_batch = len(self._to_process) // self._max_batch if len(self._to_process) % self._max_batch != 0: num_batch += 1 return [(b * self._max_batch, min(len(self._to_process), (b + 1) * self._max_batch)) for b in range(num_batch)] def _check_db(self): create_table = False if os.path.isfile(self.dbfile) else True with sqlite3.connect(self.dbfile) as conn: if create_table: conn.execute('CREATE TABLE completed (jid INT, ' 'completed_time INT, run_time REAL)') query = 'SELECT jid FROM completed' c = conn.execute(query) completed = {x[0] for x in c.fetchall()} not_completed = set(self._args) - completed self._to_process = list(not_completed) for i in completed: self.results[i] = 0 self._status[i] = AdvancedIppController.SUCCESS def _run_all(self): self._setup_logger() self.results = [None] * len(self._args) self._status = [AdvancedIppController.RUNNING] * len(self._args) self._check_db() tot_time = 0 trial = 0 error_count = 0 last_commit = time.time() self._start_time = time.time() with sqlite3.connect(self.dbfile) as conn: while trial < self.retry and len(self._to_process): now_completed = [] error_count = 0 self.job_batches = self._split_batches() self._logger.info( 'Starting %s - %d jobs, divided in %d batches (trial %d)', self.name, len(self._to_process), len(self.job_batches), trial) for batch_no, (batch_start, batch_end) in enumerate(self.job_batches): p = multiprocessing.Process(target=self._run_batch, args=(batch_no, )) p.start() while True: # keep the db file updated, so we can read the situation from outside if time.time() - last_commit > self.commit_timeout: conn.commit() last_commit = time.time() i, result = self._queue.get() if i >= 0: self.results[i] = result if result[0] == -1: error_count += 1 self.results[i] = result[1] self._status[i] = AdvancedIppController.FAILED elif result[0] == 0: self._status[i] = AdvancedIppController.SUCCESS self.results[i] = result[1] etime = result[2] conn.execute( 'INSERT INTO completed VALUES (?, ?, ?)', (i, int(time.time()), etime)) now_completed.append(i) elif i == -2: p.join() raise RuntimeError('Process raised error', result) elif i == -3: # batch finished signal tot_time += result break p.join() for i in now_completed: self._to_process.remove(i) if error_count: self._logger.warning( 'Got %d errors during the execution, retrying...', error_count) trial += 1 # handle errors if any occurred if error_count: n_failed = self._handle_errors() raise RuntimeError('%d jobs failed. Log file: %s' % (n_failed, self.logfile)) else: self._logger.info('Done. Time elapsed: %s', pretty_tdelta(tot_time)) self._ok = True def _run_batch(self, batch_no): self._setup_logger(batch_no) batch_start, batch_end = self.job_batches[batch_no] self._logger.info('Starting batch %d of %d: %d tasks', batch_no + 1, len(self.job_batches), batch_end - batch_start) self._setup_ipp() self._logger.info('Working on %d worker engines', len(self._ids)) # maps asyncronously on workers fwrapper = IppFunctionWrapper(self._serial_fun, self.const_vars, self.max_exec_time) self._ar = self._view.map_async( fwrapper, self._to_process[batch_start:batch_end], chunksize=self.chunksize) # start a thread to monitor progress self._monitor_flag = True monitor_thread = threading.Thread(target=self._monitor) monitor_thread.start() try: # collect results for i, r in enumerate(self._ar): self._queue.put((self._to_process[i + batch_start], r)) except: self._monitor_flag = False exc_type, exc_value, exc_traceback = sys.exc_info() tb_str = ''.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) self._queue.put((-2, tb_str)) self._queue.put((-3, self._ar.elapsed)) # close the monitor thread and print details self._logger.info('Batch completed. Time elapsed: %s', pretty_tdelta(self._ar.elapsed)) monitor_thread.join() def _monitor(self): while not self._ar.ready() and self._monitor_flag: n_tasks = len(self._ar) if self._ar.progress > 0: time_per_task = float(self._ar.elapsed) / self._ar.progress eta = (n_tasks - self._ar.progress) * time_per_task etastr = pretty_tdelta(eta) else: etastr = 'N/A' self._logger.info( 'Completed %d of %d tasks. Time elapsed: %s Remaining: %s', self._ar.progress, n_tasks, pretty_tdelta(self._ar.elapsed), etastr) elapsed = 0 while elapsed < self.poll_interval: if not self._monitor_flag: break self._ar.wait(1) elapsed += 1 def submit(self): if not self._serial_fun: raise RuntimeError('AdvancedIppController.serial_fun not set') if not self._args: raise RuntimeError('AdvancedIppController.args not set') try: self._run_all() finally: self._cleanup() def success(self): return self._ok def map(self, parallel_task, args): self.serial_fun = parallel_task self.args = args self.submit() return self.results status = property(get_status, None, None) serial_fun = property(get_function, set_function, None) args = property(get_args, set_args, None)
def get_noise_fft_parallel(Y, n_processes=4,n_pixels_per_process=100, backend='ipyparallel', **kwargs): """parallel version of get_noise_fft. Params: ------- Y: ndarray input movie (n_pixels x Time). Can be also memory mapped file. n_processes: [optional] int number of processes/threads to use concurrently n_pixels_per_process: [optional] int number of pixels to be simultaneously processed by each process backend: [optional] string the type of concurrency to be employed. only 'multithreading' for the moment **kwargs: [optional] dict all the parameters passed to get_noise_fft Returns: -------- sn: ndarray(double) noise associated to each pixel """ folder = tempfile.mkdtemp() # Pre-allocate a writeable shared memory map as a container for the # results of the parallel computation pixel_groups=range(0,Y.shape[0]-n_pixels_per_process+1,n_pixels_per_process) # if backend=="threading": # case joblib # sn_name = os.path.join(folder, 'sn_s') # print "using threading" # # sn_s = np.memmap(sn_name, dtype=np.float32,shape=Y.shape[0], mode='w+') # # Fork the worker processes to perform computation concurrently # Parallel(n_jobs=n_processes, backend=backend)(delayed(fft_psd_parallel)(Y, sn_s, i, n_pixels_per_process, **kwargs) # for i in pixel_groups) if backend=='multithreading': pool = ThreadPool(n_processes) argsin=[(Y, i, n_pixels_per_process, kwargs) for i in pixel_groups] results = pool.map(fft_psd_multithreading, argsin) _,_,psx_= results[0] sn_s=np.zeros(Y.shape[0]) psx_s=np.zeros((Y.shape[0],psx_.shape[-1])) for idx,sn,psx_ in results: sn_s[idx]=sn psx_s[idx,:]=psx_ elif backend=='ipyparallel' or backend=='SLURM': if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename else: raise Exception('ipyparallel backend only works with memory mapped files') try: if backend=='SLURM': if 'IPPPDIR' in os.environ and 'IPPPROFILE' in os.environ: pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] else: raise Exception('envirnomment variables not found, please source slurmAlloc.rc') c = Client(ipython_dir=pdir, profile=profile) else: c = Client() dview = c[:] ne = len(dview) print 'Running on %d engines.'%(ne) argsin=[(Y_name, i, n_pixels_per_process, kwargs) for i in pixel_groups] if backend=='SLURM': results = dview.map(fft_psd_multithreading, argsin) else: results = dview.map_sync(fft_psd_multithreading, argsin) _,_,psx_= results[0] psx_s=np.zeros((Y.shape[0],psx_.shape[-1])) sn_s=np.zeros(Y.shape[0]) for idx,sn, psx_ in results: sn_s[idx]=sn psx_s[idx,:]=psx_ finally: try: c.close() except: print 'closing c failed' elif backend=='single_thread': # pool = ThreadPool(n_processes) argsin=[(Y, i, n_pixels_per_process, kwargs) for i in pixel_groups] results = map(fft_psd_multithreading, argsin) _,_,psx_= results[0] sn_s=np.zeros(Y.shape[0]) psx_s=np.zeros((Y.shape[0],psx_.shape[-1])) for idx,sn,psx_ in results: sn_s[idx]=sn psx_s[idx,:]=psx_ else: raise Exception('Unknown method') # if n_pixels_per_process is not a multiple of Y.shape[0] run on remaining pixels pixels_remaining= Y.shape[0] % n_pixels_per_process if pixels_remaining>0: print "Running fft for remaining pixels:" + str(pixels_remaining) if type(Y) is np.core.memmap: # if input file is already memory mapped then find the filename Y_name = Y.filename elif type(Y) is str: Y_name=Y else: raise Exception('ipyparallel backend only works with memory mapped files') idx,sn, psx_=fft_psd_multithreading((Y_name,Y.shape[0]-pixels_remaining, pixels_remaining, kwargs)) sn_s[idx]=sn psx_s[idx,:]=psx_ sn_s=np.array(sn_s) psx_s=np.array(psx_s) try: shutil.rmtree(folder) except: print("Failed to delete: " + folder) raise return sn_s,psx_s
def run_CNMF_patches(file_name, shape, options, rf=16, stride = 4, n_processes=2, backend='single_thread'): """ Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' Returns ------- A_tot: C_tot: sn_tot: optional_outputs: """ (d1,d2,T)=shape d=d1*d2 K=options['init_params']['K'] idx_flat,idx_2d=extract_patch_coordinates(d1, d2, rf=rf, stride = stride) args_in=[] for id_f,id_2d in zip(idx_flat[:],idx_2d[:]): args_in.append((file_name, id_f,id_2d[0].shape, options)) print len(idx_flat) st=time.time() if backend is 'ipyparallel': try: c = Client() dview=c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) finally: dview.results.clear() c.purge_results('all') c.purge_everything() c.close() elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time()-st # extract the values from the output of mapped computation num_patches=len(file_res) A_tot=scipy.sparse.csc_matrix((d,K*num_patches)) C_tot=np.zeros((K*num_patches,T)) sn_tot=np.zeros((d1*d2)) b_tot=[] f_tot=[] bl_tot=[] c1_tot=[] neurons_sn_tot=[] g_tot=[] idx_tot=[]; shapes_tot=[] id_patch_tot=[] count=0 patch_id=0 print 'Transforming patches into full matrix' for idx_,shapes,A,b,C,f,S,bl,c1,neurons_sn,g,sn,_ in file_res: sn_tot[idx_]=sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) for ii in range(np.shape(A)[-1]): new_comp=A.tocsc()[:,ii]/np.sqrt(np.sum(np.array(A.tocsc()[:,ii].todense())**2)) if new_comp.sum()>0: A_tot[idx_,count]=new_comp C_tot[count,:]=C[ii,:] id_patch_tot.append(patch_id) count+=1 patch_id+=1 A_tot=A_tot[:,:count] C_tot=C_tot[:count,:] optional_outputs=dict() optional_outputs['b_tot']=b_tot optional_outputs['f_tot']=f_tot optional_outputs['bl_tot']=bl_tot optional_outputs['c1_tot']=c1_tot optional_outputs['neurons_sn_tot']=neurons_sn_tot optional_outputs['g_tot']=g_tot optional_outputs['idx_tot']=idx_tot optional_outputs['shapes_tot']=shapes_tot optional_outputs['id_patch_tot']= id_patch_tot return A_tot,C_tot,sn_tot, optional_outputs
def power_of_test(data1, data2, rvs_func='rvs_pairs', tests=['chi2_2samp'], rvs_key={}, test_key={}, parallel=None, sync=True): """Compute the corresponding p-values for each histrogram pairs from the random variates of the given 2 samples/frequencies for size_times. Parameters ---------- data1, data2 : sequence of 1-D ndarrays Input data. Observed samples or frequencies. rvs_func : [callable|str], optional, default : "rvs_pairs" The random variates function. The rvs_func can be either a callable or one of the following strings:: String Description "rvs_pairs" Compute the histogram pairs from the random variates of the given 2 samples/frequencies for size_times. tests : ([callable|str],...), optional, default : ["chi2_2samp"] A list of *test* statistical functions. The *test* can be either a callable or one of the following strings:: String Description "chi2_2samp" Read TS.chi2_2samp for further information. "BDM_2samp" Read TS.BDM_2samp for further information. "likelihoodratio_ksamp" Read TS.likelihoodratio_ksamp for further information. "likelihoodvalue_ksamp" Read TS.likelihoodvalue_ksamp for further information. "ks_2samp" Read TS.ks_2samp for further information. "anderson_ksamp" Read TS.anderson_ksamp for further information. "CVM_2samp" Read TS.CVM_2samp for further information. rvs_key : dict, optional, default : {} Keyword arguments for the rvs function, rvs_func. test_key : dict, optional Keyword arguments for the test statistical function, test. parallel : bool, optional, default : None If True, import IPyParallel package to do the parallel computation. If parallel is None, the global variable PARALLEL will be used instead. sync : bool, optional, default : True When sync is False, an IPyParallel AsyncResult Object will be returned instead. Onyl affect when parallel is True. Returns ------- [p1, p2, ...] : 1-D array The corresponding p-values for each histogram pairs. """ if parallel == None: parallel = PARALLEL if parallel: try: global client client = Client(**ipp_profile) size = rvs_key['size'] N = len(client) jobs = [] for i in range(N): rvs_key['size'] = (size // N + 1) if (i < size % N) else size // N jobs.append(client[client.ids[i]].apply_async( power_of_test, data1, data2, rvs_func, test, rvs_key, test_key, False)) ars = client._asyncresult_from_jobs(jobs) if sync: ars.wait_interactive() ret = {} for key, val in ars.get(): ret.setdefault(key, []).extend(val) else: return ars finally: client.close() return ret if type(rvs_func) == str: rvs_func = globals()[rvs_func] if type(tests) not in (list, tuple): tests = [tests] tests = [(t, getattr(TS, t)) if type(t) == str else (str(t), t) for t in tests] ret = {} for rvs1, rvs2 in rvs_func(data1, data2, **rvs_key): for tname, test in tests: ret.setdefault(tname, []).append( test(rvs1, rvs2, binned=True, **test_key).pvalue) return ret