def run_jobs_on_ipythoncluster(worker, task_list, shutdown_ipengines_after_done=False): t0 = time.time() rc = Client(CLUSTER_CLIENT_JSON) lview = rc.load_balanced_view() cnt_nodes = len(lview.targets or rc.ids) print("\t# nodes in use: {}".format(cnt_nodes)) lview.block = False print("\t# of tasks: {}".format(len(task_list))) print("\tsubmitting...", end='') job = lview.map_async(worker,task_list) print("done.") try: job.wait_interactive() except KeyboardInterrupt: #handle "Ctrl-C" if ask("\nAbort all submitted jobs?") == 'Y': lview.abort() print("Aborted, all submitted jobs are cancelled.") else: print("Aborted, but your jobs are still running on the cluster.") return if len(job.result()) != len(task_list): print("WARNING:\t# of results returned ({}) != # of tasks ({}).".format(len(job.result()), len(task_list))) print("\ttotal time: {}".format(timesofar(t0))) if shutdown_ipengines_after_done: print("\tshuting down all ipengine nodes...", end='') lview.shutdown() print('Done.') return job.result()
class ParallelPool( object ): def __init__(self): #Load configuration self.c = Configuration.Configuration( ) #Now instance the pool of batch workers according #to the technology selected in the configuration file if self.c.parallel.technology=='ipython': self.IPYc = Client( profile=self.c.parallel.ipython.profile ) self.pool = self.IPYc[:] elif self.c.parallel.technology=='python': if self.c.parallel.python.number_of_processes==0: n_cpus = multiprocessing.cpu_count() else: n_cpus = self.c.parallel.python.number_of_processes self.pool = multiprocessing.Pool( n_cpus ) else: raise ValueError("Unknown technology %s in configuration file" %(self.c.parallel.technology)) #The following methods simply forward the requests to the #batch worker technology def map( self, *args, **kwargs ): if self.c.parallel.technology=='ipython': return self.pool.map( *args, **kwargs ).get() else: return self.pool.map( *args, **kwargs ) def imap( self, *args, **kwargs ): return self.pool.imap( *args, **kwargs ) def close( self ): if self.c.parallel.technology=='ipython': self.IPYc.close() else: self.pool.close() self.pool.join()
def create_optimizer(args): '''returns configured bluepyopt.optimisations.DEAPOptimisation''' if args.ipyparallel or os.getenv('L5PCBENCHMARK_USEIPYP'): from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE')) logger.debug('Using ipyparallel with %d engines', len(rc)) lview = rc.load_balanced_view() def mapper(func, it): start_time = datetime.now() ret = lview.map_sync(func, it) logger.debug('Generation took %s', datetime.now() - start_time) return ret map_function = mapper else: map_function = None evaluator = l5pc_evaluator.create() seed = os.getenv('BLUEPYOPT_SEED', args.seed) opt = bluepyopt.optimisations.DEAPOptimisation( evaluator=evaluator, map_function=map_function, seed=seed) return opt
def __enter__(self): args = [] if self.profile is not None: args.append("--profile=" + self.profile) if self.cluster_id is not None: args.append("--cluster-id=" + self.cluster_id) if self.num_engines is not None: args.append("--n=" + str(self.num_engines)) if self.ipython_dir is not None: args.append("--ipython-dir=" + self.ipython_dir) cmd = " ".join(["ipcluster start --daemonize"] + args) self.logger.info('Staring IPython cluster with "' + cmd + '"') os.system(cmd) num_engines, timeout = self.num_engines, self.timeout time.sleep(self.min_wait) waited = self.min_wait client = None while client is None: try: client = Client(profile=self.profile, cluster_id=self.cluster_id) except (IOError, TimeoutError): if waited >= self.timeout: raise IOError("Could not connect to IPython cluster controller") if waited % 10 == 0: self.logger.info("Waiting for controller to start ...") time.sleep(1) waited += 1 if num_engines is None: while len(client) == 0 and waited < timeout: if waited % 10 == 0: self.logger.info("Waiting for engines to start ...") time.sleep(1) waited += 1 if len(client) == 0: raise IOError("IPython cluster engines failed to start") wait = min(waited, timeout - waited) if wait > 0: self.logger.info("Waiting {} more seconds for engines to start ...".format(wait)) time.sleep(wait) else: running = len(client) while running < num_engines and waited < timeout: if waited % 10 == 0: self.logger.info( "Waiting for {} of {} engines to start ...".format(num_engines - running, num_engines) ) time.sleep(1) waited += 1 running = len(client) running = len(client) if running < num_engines: raise IOError( "{} of {} IPython cluster engines failed to start".format(num_engines - running, num_engines) ) client.close() self.pool = IPythonPool(profile=self.profile, cluster_id=self.cluster_id) return self.pool
def add_engines(n=1, profile='iptest', total=False): """add a number of engines to a given profile. If total is True, then already running engines are counted, and only the additional engines necessary (if any) are started. """ rc = Client(profile=profile) base = len(rc) if total: n = max(n - base, 0) eps = [] for i in range(n): ep = TestProcessLauncher() ep.cmd_and_args = ipengine_cmd_argv + [ '--profile=%s' % profile, '--InteractiveShell.colors=nocolor' ] ep.start() launchers.append(ep) eps.append(ep) tic = time.time() while len(rc) < base+n: if any([ ep.poll() is not None for ep in eps ]): raise RuntimeError("A test engine failed to start.") elif time.time()-tic > 15: raise RuntimeError("Timeout waiting for engines to connect.") time.sleep(.1) rc.close() return eps
def simulate_general(runner, results_filename): """ Function with the general code to simulate the MIMO schemes. """ # xxxxxxxxxx Print the simulation parameters xxxxxxxxxxxxxxxxxxxxxxxxxx pprint(runner.params.parameters) print("MIMO Scheme: {0}".format(runner.mimo_object.__class__.__name__)) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxx Replace any parameter mention in results_filename xxxxxxxxxxxxx runner.set_results_filename(results_filename) # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # xxxxxxxxxx Perform the simulation xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # The simulation will be run either in parallel or serially depending # if the IPython engines are running or not. run_in_parallel = True # noinspection PyBroadException,PyBroadException try: # If we can get an IPython view that means that the IPython engines # are running. In that case we will perform the simulation in # parallel from ipyparallel import Client cl = Client() # We create a direct view to run coe in all engines dview = cl.direct_view() # Reset the engines so that we don't have variables there from last # computations dview.execute('%reset') dview.execute('import sys') # We use block=True to ensure that all engines have modified their # path to include the folder with the simulator before we create # the load lanced view in the following. dview.execute('sys.path.append("{0}")'.format(parent_dir), block=True) # But for the actual simulation we are better using a load balanced # view lview = cl.load_balanced_view() except Exception: # pylint: disable=W0703 # If we can't get an IPython view then we will perform the # simulation serially run_in_parallel = False if run_in_parallel is True: print("-----> Simulation will be run in Parallel") runner.simulate_in_parallel(lview) else: print("-----> Simulation will be run serially") runner.simulate() # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx print("Runned iterations: {0}".format(runner.runned_reps)) print("Elapsed Time: {0}".format(runner.elapsed_time)) print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n") return runner.results, runner.results_filename
def __init__(self): from ipyparallel import Client rc = Client() rc.block=True self.cpu = rc[:] print '{} cores ready'.format(len(self.cpu)) self.cpu.execute('import numpy as np') self.cpu.execute('from sklearn.neighbors import KDTree, BallTree')
def do_parallel(filelist): rc = Client() print('# of engines : %d' % len(rc.ids)) print('# of job : %d' % len(filelist)) lv = rc.load_balanced_view() result = lv.map_async(singlejob, filelist) result.wait_interactive()
def connect_client(self): """connect a client with my Context, and track its sockets for cleanup""" c = Client(profile='iptest', context=self.context) c.wait = lambda *a, **kw: self.client_wait(c, *a, **kw) for name in filter(lambda n:n.endswith('socket'), dir(c)): s = getattr(c, name) s.setsockopt(zmq.LINGER, 0) self.sockets.append(s) return c
def setup_parallel(dbname): c = Client() dview = c.direct_view() dview.push({'dbname': str(dbname)}) # dview.push({'remove_duplicates_from_image_name_data': # remove_duplicates_from_image_name_data, # 'get_temp_fname': get_temp_fname, # 'dbname': dbname}) lbview = c.load_balanced_view() return lbview
def setup_parallel(parallel): if parallel: pickleutil.use_dill() #can_map.pop(FunctionType, None) #serialize.pickle = pickle print("Running in parallel") rc = Client() rc[:].use_dill() lview = rc.load_balanced_view() lview.block = True else: lview = None return lview
def _nengines_up(url_file): "return the number of engines up" client = None try: client = Client(url_file, timeout=60) up = len(client.ids) client.close() # the controller isn't up yet except iperror.TimeoutError: return 0 # the JSON file is not available to parse except IOError: return 0 else: return up
def download_and_calibrate_parallel(list_of_ids, n=None): """Download and calibrate in parallel. Parameters ---------- list_of_ids : list, optional container with img_ids to process n : int Number of cores for the parallel processing. Default: n_cores_system//2 """ setup_cluster(n_cores=n) c = Client() lbview = c.load_balanced_view() lbview.map_async(download_and_calibrate, list_of_ids) subprocess.Popen(["ipcluster", "stop", "--quiet"])
def stop_server(is_slurm=False): ''' programmatically stops the ipyparallel server ''' sys.stdout.write("Stopping cluster...\n") sys.stdout.flush() if is_slurm: from ipyparallel import Client pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print 'Shutting down %d engines.'%(ne) c.shutdown(hub=True) shutil.rmtree('profile_' + str(profile)) try: shutil.rmtree('./log/') except: print 'creating log folder' files = glob.glob('*.log') os.mkdir('./log') for fl in files: shutil.move(fl, './log/') else: proc = subprocess.Popen(["ipcluster stop"], shell=True, stderr=subprocess.PIPE) line_out = proc.stderr.readline() if 'CRITICAL' in line_out: sys.stdout.write("No cluster to stop...") sys.stdout.flush() elif 'Stopping' in line_out: st = time.time() sys.stdout.write('Waiting for cluster to stop...') while (time.time() - st) < 4: sys.stdout.write('.') sys.stdout.flush() time.sleep(1) else: print '**** Unrecognized Syntax in ipcluster output, waiting for server to stop anyways ****' sys.stdout.write(" done\n")
def __init__(self, addpath=None): self.client = Client() self.load_balanced_view = self.client.load_balanced_view() if(len(self.client.ids) == 0): print('# of engines : single mode') else: print('# of engines : %d' % len(self.client.ids))
def __init__(self): #Load configuration self.c = Configuration.Configuration( ) #Now instance the pool of batch workers according #to the technology selected in the configuration file if self.c.parallel.technology=='ipython': self.IPYc = Client( profile=self.c.parallel.ipython.profile ) self.pool = self.IPYc[:] elif self.c.parallel.technology=='python': if self.c.parallel.python.number_of_processes==0: n_cpus = multiprocessing.cpu_count() else: n_cpus = self.c.parallel.python.number_of_processes self.pool = multiprocessing.Pool( n_cpus ) else: raise ValueError("Unknown technology %s in configuration file" %(self.c.parallel.technology))
def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) self.verb = specs.get('verbose', True) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS, EXOSIMS.util.get_module, \ os, os.path, time, random, pickle, traceback, numpy if 'logger' in specs: specs.pop('logger') if 'seed' in specs: specs.pop('seed') self.dview.push(dict(specs=specs)) self.vprint("Building SurveySimulation object on all workers.") res = self.dview.execute("SS = EXOSIMS.util.get_module.get_module(specs['modules'] \ ['SurveySimulation'], 'SurveySimulation')(**specs)") res2 = self.dview.execute("SS.reset_sim()") self.vprint("Created SurveySimulation objects on %d engines."%len(self.rc.ids)) #for row in res.stdout: # self.vprint(row) self.lview = self.rc.load_balanced_view() self.maxNumEngines = len(self.rc.ids)
def start_server(slurm_script=None, ipcluster="ipcluster", ncpus=None): """ programmatically start the ipyparallel server Parameters: ---------- ncpus: int number of processors ipcluster : str ipcluster binary file name; requires 4 path separators on Windows. ipcluster="C:\\\\Anaconda2\\\\Scripts\\\\ipcluster.exe" Default: "ipcluster" """ logger.info("Starting cluster...") if ncpus is None: ncpus = psutil.cpu_count() if slurm_script is None: if ipcluster == "ipcluster": subprocess.Popen( "ipcluster start -n {0}".format(ncpus), shell=True, close_fds=(os.name != 'nt')) else: subprocess.Popen(shlex.split( "{0} start -n {1}".format(ipcluster, ncpus)), shell=True, close_fds=(os.name != 'nt')) # Check that all processes have started time.sleep(1) client = ipyparallel.Client() while len(client) < ncpus: client.close() time.sleep(1) client = ipyparallel.Client() time.sleep(10) logger.debug('Making sure everything is up and running') client.close() else: shell_source(slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] print([pdir,profile]) c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print(('Running on %d engines.' % (ne))) c.close() sys.stdout.write(" done\n")
def test_hubresult_timestamps(self): self.minimum_engines(4) v = self.client[:] ar = v.apply_async(time.sleep, 0.25) ar.get(2) rc2 = Client(profile='iptest') # must have try/finally to close second Client, otherwise # will have dangling sockets causing problems try: time.sleep(0.25) hr = rc2.get_result(ar.msg_ids) self.assertTrue(hr.elapsed > 0., "got bad elapsed: %s" % hr.elapsed) hr.get(1) self.assertTrue(hr.wall_time < ar.wall_time + 0.2, "got bad wall_time: %s > %s" % (hr.wall_time, ar.wall_time)) self.assertEqual(hr.serial_time, ar.serial_time) finally: rc2.close()
def process_trajectories(*processors, postprocessor, ipyparallel=None): trajectories = [] for proc in processors: for info in proc.get_infos(): trajectories += [Trajectory(info, proc, postprocessor)] if ipyparallel is not None: from ipyparallel import Client rc = Client(profile=ipyparallel) lbv = rc.load_balanced_view() with lbv.temp_flags(retries=10): lbv.map_async(_process_trajectory, trajectories, retries=10) else: with Pool(processes=os.cpu_count() - 1) as pool: pool.map(_process_trajectory, trajectories, chunksize=1) log.info("Done!")
class IPClusterEnsemble(SurveyEnsemble): """ Parallelized suvey ensemble based on IPython parallal (ipcluster) Args: \*\*specs: user specified values Attributes: Notes: """ def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS,EXOSIMS.util.get_module r1 = self.dview.execute("SurveySim = EXOSIMS.util.get_module.get_module('%s', 'SurveySimulation')"%specs['modules']['SurveySimulation']) self.dview.push(dict(specs=specs)) r2 = self.dview.execute("sim = SurveySim(**specs)") self.lview = self.rc.load_balanced_view() def run_ensemble(self,run_one,N=10): t1 = time.time() async_res = [] for j in range(N): ar = self.lview.apply_async(run_one) async_res.append(ar) print "Submitted tasks: ", len(async_res) self.rc.wait(async_res) t2 = time.time() print "Completed in %d sec" %(t2-t1) res = [ar.get() for ar in async_res] return res
def main(): parser = OptionParser() parser.set_defaults(n=100) parser.set_defaults(tmin=1e-3) parser.set_defaults(tmax=1) parser.set_defaults(profile='default') parser.add_option("-n", type='int', dest='n', help='the number of tasks to run') parser.add_option("-t", type='float', dest='tmin', help='the minimum task length in seconds') parser.add_option("-T", type='float', dest='tmax', help='the maximum task length in seconds') parser.add_option("-p", '--profile', type='str', dest='profile', help="the cluster profile [default: 'default']") (opts, args) = parser.parse_args() assert opts.tmax >= opts.tmin, "tmax must not be smaller than tmin" rc = Client() view = rc.load_balanced_view() print(view) rc.block=True nengines = len(rc.ids) with rc[:].sync_imports(): from IPython.utils.timing import time # the jobs should take a random time within a range times = [random.random()*(opts.tmax-opts.tmin)+opts.tmin for i in range(opts.n)] stime = sum(times) print("executing %i tasks, totalling %.1f secs on %i engines"%(opts.n, stime, nengines)) time.sleep(1) start = time.time() amr = view.map(time.sleep, times) amr.get() stop = time.time() ptime = stop-start scale = stime/ptime print("executed %.1f secs in %.1f secs"%(stime, ptime)) print("%.3fx parallel performance on %i engines"%(scale, nengines)) print("%.1f%% of theoretical max"%(100*scale/nengines))
def __init__(self, site): self.client = Client() self.view = self.client.load_balanced_view() self.mux = self.client[:] self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site
def main(): parser = argparse.ArgumentParser() parser.add_argument('directory', help="Provide the directory of the HDF files " "that shall be converted to csv here.") args = parser.parse_args() root = os.path.abspath(args.directory) fnames = glob.glob(os.path.join(root, '*.hdf')) logging.info('Found %i files to convert.', len(fnames)) c = Client() lbview = c.load_balanced_view() results = lbview.map_async(process_fname, fnames) # progress display while not results.ready(): print("{:.1f} %".format(100 * results.progress / len(fnames))) sys.stdout.flush() time.sleep(10) logging.info('Conversion done.')
def _perform_evolution(self, algo, pop): try: from ipyparallel import Client # Create client rc = Client() # Create Load-balanced view lbview = rc.load_balanced_view() # Run the task lbview.block = True ar = lbview.apply(_maptask_target, args=(algo, pop)) # Get retval retval = ar.get() if isinstance(retval, BaseException): raise retval return retval except BaseException as e: print('Exception caught during evolution:') print(e) raise RuntimeError()
def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS,EXOSIMS.util.get_module r1 = self.dview.execute("SurveySim = EXOSIMS.util.get_module.get_module('%s', 'SurveySimulation')"%specs['modules']['SurveySimulation']) self.dview.push(dict(specs=specs)) r2 = self.dview.execute("sim = SurveySim(**specs)") self.lview = self.rc.load_balanced_view()
def par_value(n): """ Parallel option valuation Parameters ========== n: int number of option valuations/strikes """ import numpy as np from ipyparallel import Client c = Client(profile="default") view = c.load_balanced_view() strikes = np.linspace(80, 20, n) option_values = [] for strike in strikes: values = view.apply_async(bsm_mcs_valuation, strike) option_values.append(values) c.wait(option_values) return strikes, option_values
class DistributedSpider(object): # Time to wait between polling for task results. pollingDelay = 0.5 def __init__(self, site): self.client = Client() self.view = self.client.load_balanced_view() self.mux = self.client[:] self.allLinks = [] self.linksWorking = {} self.linksDone = {} self.site = site def visitLink(self, url): if url not in self.allLinks: self.allLinks.append(url) if url.startswith(self.site): print(" ", url) self.linksWorking[url] = self.view.apply(fetchAndParse, url) def onVisitDone(self, links, url): print(url + ":") self.linksDone[url] = None del self.linksWorking[url] for link in links: self.visitLink(link) def run(self): self.visitLink(self.site) while self.linksWorking: print(len(self.linksWorking), "pending...") self.synchronize() time.sleep(self.pollingDelay) def synchronize(self): for url, ar in list(self.linksWorking.items()): # Calling get_task_result with block=False will return None if the # task is not done yet. This provides a simple way of polling. try: links = ar.get(0) except error.TimeoutError: continue except Exception as e: self.linksDone[url] = None del self.linksWorking[url] print("%s: %s" % (url, e)) else: self.onVisitDone(links, url)
def start_server(ncpus,slurm_script=None): ''' programmatically start the ipyparallel server Parameters ---------- ncpus: int number of processors ''' sys.stdout.write("Starting cluster...") sys.stdout.flush() if slurm_script is None: subprocess.Popen(["ipcluster start -n {0}".format(ncpus)], shell=True) while True: try: c = ipyparallel.Client() if len(c) < ncpus: sys.stdout.write(".") sys.stdout.flush() raise ipyparallel.error.TimeoutError c.close() break except (IOError, ipyparallel.error.TimeoutError): sys.stdout.write(".") sys.stdout.flush() time.sleep(1) else: shell_source(slurm_script) from ipyparallel import Client pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) ee = c[:] ne = len(ee) print 'Running on %d engines.'%(ne) c.close() sys.stdout.write(" done\n")
def start_ipcluster(ipcluster_exe, nengines, profile, max_retries=50): """ Start a new IPython parallel cluster (daemon) with a number of `nengines` and using `profile`. """ from ipyparallel import Client ipcluster = None rc = None dview = None lview = None ipcluster = os.system( '{} start -n={} --profile={} --daemon' .format(ipcluster_exe, nengines, profile) ) # retry until ipcluster is ready time.sleep(3) rc = Client(profile=profile) retries = 0 while True: if retries > max_retries: stop_ipcluster(ipcluster_exe, profile) raise Exception("impossible to access to (all) engines " "of the IPython parallel cluster") if len(rc.ids) < nengines: retries += 1 time.sleep(1) continue else: break dview = rc[:] lview = rc.load_balanced_view() return ipcluster, rc, dview, lview
class SnuddaInput(object): def __init__(self, spikeDataFileName, inputConfigFile, networkConfigFile=None, positionFile=None, HDF5networkFile=None, time=10.0, isMaster=True, h5libver="latest", randomSeed=None, logFile=None, verbose=True): if (type(logFile) == str): self.logFile = open(logFile, "w") else: self.logFile = logFile self.verbose = verbose self.writeLog("Time = " + str(time)) # We need to set the seed, to avoid same seed on workers np.random.seed(randomSeed) self.writeLog("Setting random seed: " + str(randomSeed)) self.h5libver = h5libver self.writeLog("Using hdf5 version " + str(h5libver)) if (HDF5networkFile is not None): assert networkConfigFile is None and positionFile is None, \ "If HDF5networkFile specified then positionFile " + \ "and networkConfigFile should be left empty." if (HDF5networkFile == "last"): HDF5networkFile = self.findLatestFile() self.HDF5networkFile = HDF5networkFile self.readHDF5info(HDF5networkFile) else: self.networkConfigFile = networkConfigFile self.positionFile = positionFile #self.writeLog("Assuming axonStumpIDFlag is True (Running Network_simulate.py)") self.axonStumpIDFlag = False self.inputConfigFile = inputConfigFile if (spikeDataFileName is None): spikeDataFileName = "save/input-spikes-" + str( self.networkSlurmID) + ".hdf5" self.spikeDataFileName = spikeDataFileName self.time = time # How long time to generate inputs for self.neuronCache = dict([]) # Read in the input configuration information from JSON file self.readInputConfigFile() # Read the network position file self.readNeuronPositions() # Read the network config file self.readNetworkConfigFile() # Only the master node should start the work if (isMaster): # Initialises lbView and dView (load balance, and direct view) self.setupParallell() # Make the "master input" for each channel self.makeChannelSpikeTrains() # Generate the actual input spikes, and the locations # stored in self.neuronInput dictionary self.makeNeuronInputParallell() # Consolidate code, so same code runs for serial and parallel case #if(self.lbView is None): # self.makeNeuronInput() #else: # self.makeNeuronInputParallell() # Write spikes to disk, HDF5 format self.writeHDF5() # Verify correlation --- THIS IS VERY VERY SLOW #self.verifyCorrelation() self.checkSorted() # !!! TODO # 1. Define what the within correlation, and between correlation should be # for each neuron type. Also what input frequency should we have for each # neuron. --- Does it depend on size of dendritic tree? # Store the info in an internal dict. # 2. Read the position file, so we know what neurons are in the network # 3. Create the "master input" for each channel. # 4. Mix the master input with random input, for each neuron, to create # the appropriate correlations # 5. Randomize which compartments each synaptic input should be on # 6. Verify correlation of input # 7. Write to disk # If more than one worker node, then we need to split the data # into multiple files # self.nWorkers=nWorkers ############################################################################ def writeHDF5(self): import timeit import h5py self.writeLog("Writing spikes to " + self.spikeDataFileName) startTime = timeit.default_timer() outFile = h5py.File(self.spikeDataFileName, 'w', libver=self.h5libver) configData = outFile.create_dataset("config", data=json.dumps(self.inputInfo, indent=4)) inputGroup = outFile.create_group("input") for neuronID in self.neuronInput: NIDGroup = inputGroup.create_group(str(neuronID)) neuronType = self.neuronType[neuronID] # nName = self.neuronName[neuronID] for inputType in self.neuronInput[neuronID]: if (inputType.lower() != "VirtualNeuron".lower()): itGroup = NIDGroup.create_group(inputType) neuronIn = self.neuronInput[neuronID][inputType] spikeMat, nSpikes = self.createSpikeMatrix( neuronIn["spikes"]) itGroup.create_dataset("spikes", data=spikeMat) itGroup.create_dataset("nSpikes", data=nSpikes) itGroup.create_dataset("sectionID", data=neuronIn["location"][1]) itGroup.create_dataset("sectionX", data=neuronIn["location"][2]) itGroup.create_dataset("freq", data=neuronIn["freq"]) itGroup.create_dataset("correlation", data=neuronIn["correlation"]) itGroup.create_dataset("jitter", data=neuronIn["jitter"]) itGroup.create_dataset("synapseDensity", data=neuronIn["synapseDensity"]) itGroup.create_dataset("start", data=neuronIn["start"]) itGroup.create_dataset("end", data=neuronIn["end"]) itGroup.create_dataset("conductance", data=neuronIn["conductance"]) channelID = neuronIn["channelID"] itGroup.create_dataset("channelID", data=channelID) chanSpikes = self.channelSpikes[neuronType][inputType][ channelID] itGroup.create_dataset("channelSpikes", data=chanSpikes) itGroup.create_dataset("generator", data=neuronIn["generator"]) try: itGroup.create_dataset("modFile", data=neuronIn["modFile"]) if (neuronIn["parameterFile"]): itGroup.create_dataset( "parameterFile", data=neuronIn["parameterFile"]) # We need to convert this to string to be able to save it itGroup.create_dataset("parameterList", data=json.dumps( neuronIn["parameterList"])) itGroup.create_dataset("parameterID", data=neuronIn["parameterID"]) except: import traceback tstr = traceback.format_exc() self.writeLog(tstr) import pdb pdb.set_trace() else: # Input is activity of a virtual neuron aGroup = NIDGroup.create_group("activity") spikes = self.neuronInput[neuronID][inputType]["spikes"] aGroup.create_dataset("spikes", data=spikes) generator = self.neuronInput[neuronID][inputType][ "generator"] aGroup.create_dataset("generator", data=generator) outFile.close() ############################################################################ def createSpikeMatrix(self, spikes): if (len(spikes) == 0): return np.zeros((0, 0)), 0 nInputTrains = len(spikes) nSpikes = np.array([len(x) for x in spikes]) maxLen = max(nSpikes) spikeMat = -1 * np.ones((nInputTrains, maxLen)) for idx, st in enumerate(spikes): n = st.shape[0] spikeMat[idx, :n] = st return spikeMat, nSpikes ############################################################################ # Reads from self.inputConfigFile def readInputConfigFile(self): self.writeLog("Loading input configuration from " + str(self.inputConfigFile)) with open(self.inputConfigFile, 'rt') as f: self.inputInfo = json.load(f) for neuronType in self.inputInfo: for inputType in self.inputInfo[neuronType]: if ("parameterFile" in self.inputInfo[neuronType][inputType]): parFile = self.inputInfo[neuronType][inputType][ "parameterFile"] # Allow user to use $DATA to refer to snudda data directory parFile = parFile.replace( "$DATA", os.path.dirname(__file__) + "/data") parDataDict = json.load(open(parFile, 'r')) # Read in parameters into a list parData = [] for pd in parDataDict: parData.append(parDataDict[pd]) else: parData = None self.inputInfo[neuronType][inputType][ "parameterList"] = parData ############################################################################ # Each synaptic input will contain a fraction of "channel" spikes, which are # taken from a stream of spikes unique to that particular channel # This function generates these correlated spikes def makeChannelSpikeTrains(self, nChannels=None, timeRange=None): self.writeLog("Running makeChannelSpikeTrains") if (nChannels is None): nChannels = self.nChannels if (timeRange is None): timeRange = (0, self.time) self.channelSpikes = dict([]) for cellType in self.inputInfo: self.channelSpikes[cellType] = dict([]) for inputType in self.inputInfo[cellType]: if (self.inputInfo[cellType][inputType]["generator"] == "poisson"): freq = self.inputInfo[cellType][inputType]["frequency"] self.channelSpikes[cellType][inputType] = dict([]) for idxChan in range(0, self.nChannels): self.channelSpikes[cellType][inputType][idxChan] = \ self.generateSpikes(freq=freq,timeRange=timeRange) return self.channelSpikes ############################################################################ def makeNeuronInputParallell(self): self.writeLog("Running makeNeuronInputParallell") self.neuronInput = dict([]) neuronIDList = [] inputTypeList = [] freqList = [] startList = [] endList = [] synapseDensityList = [] nInputsList = [] PkeepList = [] channelSpikesList = [] jitterDtList = [] locationList = [] channelIDList = [] conductanceList = [] correlationList = [] modFileList = [] parameterFileList = [] parameterListList = [] for (neuronID,neuronType,channelID) \ in zip(self.neuronID, self.neuronType,self.channelID): self.neuronInput[neuronID] = dict([]) if (neuronType not in self.inputInfo): self.writeLog("!!! Warning, synaptic input to " + str(neuronType) \ + " missing in " + str(self.inputConfigFile) ) continue for inputType in self.inputInfo[neuronType]: self.neuronInput[neuronID][inputType] = dict([]) inputInf = self.inputInfo[neuronType][inputType] if (inputInf["generator"] == "poisson"): neuronIDList.append(neuronID) inputTypeList.append(inputType) freqList.append(inputInf["frequency"]) PkeepList.append(np.sqrt(inputInf["channelCorrelation"])) jitterDtList.append(inputInf["jitter"]) if ("start" in inputInf): startList.append(inputInf["start"]) else: startList.append(0.0) # Default start at beginning if ("end" in inputInf): endList.append(inputInf["end"]) else: endList.append(self.time) if (inputType.lower() == "VirtualNeuron".lower()): # Virtual neurons spikes specify their activity, location and conductance not used cond = None nInp = 1 modFile = None parameterFile = None parameterList = None else: assert "location" not in inputInf, \ "Location in input config has been replaced with synapseDensity" cond = inputInf["conductance"] if ("nInputs" in inputInf): nInp = inputInf["nInputs"] else: nInp = None modFile = inputInf["modFile"] if ("parameterFile" in inputInf): parameterFile = inputInf["parameterFile"] else: parameterFile = None if ("parameterList" in inputInf): parameterList = inputInf["parameterList"] else: parameterList = None if ("synapseDensity" in inputInf): synapseDensity = inputInf["synapseDensity"] else: synapseDensity = "1" synapseDensityList.append(synapseDensity) nInputsList.append(nInp) channelIDList.append(channelID) conductanceList.append(cond) correlationList.append(inputInf["channelCorrelation"]) cSpikes = self.channelSpikes[neuronType][inputType][ channelID] channelSpikesList.append(cSpikes) modFileList.append(modFile) parameterFileList.append(parameterFile) parameterListList.append(parameterList) elif (inputInf["generator"] == "csv"): csvFile = inputInf["csvFile"] % neuronID self.neuronInput[neuronID][inputType]["spikes"] \ = np.genfromtxt(csvFile, delimiter=',') self.neuronInput[neuronID][inputType]["generator"] = "csv" else: self.writeLog("Unknown input generator: " + inputInf["generator"]\ + " for " + str(neuronID)) # The old code had so that all neurons within a channel shared the same # mother process, which caused them all to activate at the same time # with high probability. By setting channelSpikeList to None we disable it self.writeLog( "Clearing channelSpikesList, thus all neurons will have their own mother process for each input" ) channelSpikesList = [None for x in channelSpikesList] amr = None #Lets try and swap self.lbView for self.dView if (self.dView is not None): #self.writeLog("Sending jobs to workers, using lbView") self.writeLog("Sending jobs to workers, using dView") # Changed the logic, the old input helper needed a global # variable to be visible, but it was not always so in its scope inputList = list( zip(neuronIDList, inputTypeList, freqList, startList, endList, synapseDensityList, nInputsList, PkeepList, channelSpikesList, jitterDtList, channelIDList, conductanceList, correlationList, modFileList, parameterFileList, parameterListList)) self.dView.scatter("inputList", inputList, block=True) cmdStr = "inpt = list(map(nl.makeInputHelperParallel,inputList))" self.dView.execute(cmdStr, block=True) inpt = self.dView["inpt"] amr = list(itertools.chain.from_iterable(inpt)) else: # If no lbView then we run it in serial self.writeLog("Running input generation in serial") amr = map(self.makeInputHelperSerial, neuronIDList, inputTypeList, freqList, startList, endList, synapseDensityList, nInputsList, PkeepList, channelSpikesList, jitterDtList, channelIDList, conductanceList, correlationList, modFileList, parameterFileList, parameterListList) # Gather the spikes that were generated in parallell for neuronID, inputType, spikes, loc, synapseDensity, frq, \ jdt, cID,cond,corr,timeRange, \ modFile,paramFile,paramList,paramID in amr: self.writeLog("Gathering " + str(neuronID) + " - " + str(inputType)) self.neuronInput[neuronID][inputType]["spikes"] = spikes if (inputType.lower() != "VirtualNeuron".lower()): # Virtual neurons have no location of their input, as the "input" # specifies the spike times of the virtual neuron itself self.neuronInput[neuronID][inputType]["location"] = loc self.neuronInput[neuronID][inputType]["synapseDensity"] \ = synapseDensity self.neuronInput[neuronID][inputType]["conductance"] = cond self.neuronInput[neuronID][inputType]["freq"] = frq self.neuronInput[neuronID][inputType]["correlation"] = corr self.neuronInput[neuronID][inputType]["jitter"] = jdt self.neuronInput[neuronID][inputType]["start"] = timeRange[0] self.neuronInput[neuronID][inputType]["end"] = timeRange[1] self.neuronInput[neuronID][inputType]["channelID"] = cID self.neuronInput[neuronID][inputType]["generator"] = "poisson" self.neuronInput[neuronID][inputType]["modFile"] = modFile self.neuronInput[neuronID][inputType]["parameterFile"] = paramFile self.neuronInput[neuronID][inputType]["parameterList"] = paramList self.neuronInput[neuronID][inputType]["parameterID"] = paramID return self.neuronInput ############################################################################ # This generates poisson spikes with frequency freq, for a given time range def generateSpikes(self, freq, timeRange): # https://stackoverflow.com/questions/5148635/how-to-simulate-poisson-arrival start = timeRange[0] end = timeRange[1] duration = end - start tDiff = -np.log(1.0 - np.random.random( int(np.ceil(max(1, freq * duration))))) / freq tSpikes = [] tSpikes.append(start + np.cumsum(tDiff)) # Is last spike after end of duration while (tSpikes[-1][-1] <= end): tDiff = -np.log(1.0 - np.random.random( int(np.ceil(freq * duration * 0.1)))) / freq tSpikes.append(tSpikes[-1][-1] + np.cumsum(tDiff)) # Prune away any spikes after end if (len(tSpikes[-1]) > 0): tSpikes[-1] = tSpikes[-1][tSpikes[-1] <= end] # Return spike times return np.concatenate(tSpikes) ############################################################################ # This takes a list of spike trains and returns a single spike train # including all spikes def mixSpikes(self, spikes): return np.sort(np.concatenate(spikes)) ############################################################################ def cullSpikes(self, spikes, Pkeep): return spikes[np.random.random(spikes.shape) < Pkeep] ############################################################################ # timeRange --- (start,end time) of spike train # freq -- frequency of spike train # nSpikeTrains -- number of spike trains to generate # Pkeep -- fraction of channel spikes to include in spike train # retChanSpikes -- if true, returns tuple with second item channel spikes # if false, just return spikes # channelSpikes --- if None, new channel spikes will be generated # (channelSpikes are the spikes shared between correlated # spike trains) def makeCorrelatedSpikes(self,freq,timeRange,nSpikeTrains,Pkeep, \ channelSpikes=None, \ retChanSpikes=False,jitterDt=None): assert (Pkeep >= 0 and Pkeep <= 1) if (channelSpikes is None): channelSpikes = self.generateSpikes(freq, timeRange) uniqueFreq = freq * (1 - Pkeep) spikeTrains = [] for i in range(0, nSpikeTrains): tUnique = self.generateSpikes(uniqueFreq, timeRange) tChannel = self.cullSpikes(channelSpikes, Pkeep) spikeTrains.append(self.mixSpikes([tUnique, tChannel])) #if(False): #self.verifyCorrelation(spikeTrains=spikeTrains) # THIS STEP IS VERY VERY SLOW if (jitterDt is not None): spikeTrains = self.jitterSpikes(spikeTrains, jitterDt, timeRange=timeRange) if (retChanSpikes): return (spikeTrains, channelSpikes) else: return spikeTrains ############################################################################ def makeUncorrelatedSpikes(self, freq, start, end, nSpikeTrains): spikeTrains = [] for i in range(0, nSpikeTrains): spikeTrains.append(self.generateSpikes(freq, start, end)) return spikeTrains ############################################################################ # If a timeRange (start,endtime) is given then all spike times will # be modulo duration, so if we jitter and they go to before start time, # they wrap around and appear at end of the timeline def jitterSpikes(self, spikeTrains, dt, timeRange=None): jitteredSpikes = [] for i in range(0, len(spikeTrains)): spikes = spikeTrains[i] + np.random.normal(0, dt, spikeTrains[i].shape) if (timeRange is not None): start = timeRange[0] end = timeRange[1] spikes = np.mod(spikes - start, end - start) + start s = np.sort(spikes) # Remove any spikes that happened to go negative s = s[np.where(s >= 0)] jitteredSpikes.append(s) return jitteredSpikes ############################################################################ # Plot spikes as a raster plot, for debugging and visualisation purposes def rasterPlot(self,spikeTimes, \ markSpikes=None,markIdx=None, \ title=None,figFile=None,fig=None): if (fig is None): fig = plt.figure() # ax = plt.gca() for i, spikes in enumerate(spikeTimes): plt.vlines(spikes, i + 1.5, i + 0.5, color="black") plt.ylim(0.5, len(spikeTimes) + 0.5) if (markSpikes is not None and markIdx is not None): for i, spikes in zip(markIdx, markSpikes): plt.vlines(spikes, i + 1.5, i + 0.5, color="red") plt.ylim(min(0.5,min(markIdx)-0.5), \ max(max(markIdx)+0.5,len(spikeTimes))+0.5) plt.xlabel("Time") plt.ylabel("Inputs") plt.ion() plt.show() if (title is not None): plt.title(title) fig.show() if (figFile is not None): plt.savefig(figFile) return fig ############################################################################ def readNeuronPositions(self): self.writeLog("Reading neuron postions") posInfo = SnuddaLoad(self.positionFile).data self.networkInfo = posInfo self.neuronInfo = posInfo["neurons"] # import pdb # pdb.set_trace() # Make sure the position file matches the network config file assert (posInfo["configFile"] == self.networkConfigFile) self.nChannels = posInfo["nChannels"] self.channelID = posInfo["neuronChannel"] self.neuronName = [n["name"] for n in self.neuronInfo] self.neuronID = [n["neuronID"] for n in self.neuronInfo] self.neuronType = [n["type"] for n in self.neuronInfo] # self.nInputs = [n["nInputs"] for n in self.neuronInfo] ############################################################################ def readNetworkConfigFile(self): self.writeLog("Reading config file " + str(self.networkConfigFile)) import json with open(self.networkConfigFile, 'r') as f: self.networkConfig = json.load(f) ############################################################################ def verifyCorrelation(self, spikeTrains, expectedCorr=None, dt=0): # THIS FUNCTION IS VERY VERY SLOW corrVec = [] for si, s in enumerate(spikeTrains): for s2i, s2 in enumerate(spikeTrains): if (si == s2i): # No self comparison continue corrVec.append(self.estimateCorrelation(s, s2, dt=dt)) # print("corr = " + str(corrVec)) self.writeLog("meanCorr = " + str(np.mean(corrVec))) ############################################################################ def estimateCorrelation(self, spikesA, spikesB, dt=0): nSpikesA = len(spikesA) corrSpikes = 0 for t in spikesA: if (np.min(abs(spikesB - t)) <= dt): corrSpikes += 1 return (corrSpikes / float(nSpikesA)) ############################################################################ # inputDensity = f(d) where d is micrometers from soma, # unit of f is synapses/micrometer # !!! Returns input locations only on dendrites, not on soma def dendriteInputLocations(self, neuronID, synapseDensity="1", nSpikeTrains=None): neuronName = self.neuronName[neuronID] swcFile = self.networkConfig["Neurons"][neuronName]["morphology"] if (swcFile in self.neuronCache): morphology = self.neuronCache[swcFile] else: morphology = NeuronMorphology(name=neuronID, swc_filename=swcFile, axonStumpIDFlag=self.axonStumpIDFlag) self.neuronCache[swcFile] = morphology return morphology.dendriteInputLocations(synapseDensity=synapseDensity, nLocations=nSpikeTrains) ############################################################################ # Returns random dendrite compartments # Pdist = function of d (micrometers), e.g. "1", "2*d", ... etc def randomCompartment(self, neuronID, nLocations, Pdist="1"): neuronName = self.neuronName[neuronID] swcFile = self.networkConfig[neuronName]["morphology"] if (swcFile in self.neuronCache): morphology = self.neuronCache[swcFile] else: # !!! Should I use NeuronMorphology, or maybe the loadSWC in ConvertNetwork # -- maybe move loadSWC out to a separate file morphology = NeuronMorphology(name=neuronID, swc_filename=swcFile, axonStumpIDFlag=self.axonStumpIDFlag) self.neuronCache[swcFile] = morphology # morphology.dend -- 0-2: x,y,z 3: r, 4: dist to soma d = morphology.dend[:, 4] Px = eval(Pdist) if (type(Px) in (int, float)): # If Px is a constant, we need to set it for all points Px *= np.ones(d.shape) Pcomp = (Px[morphology.dendLinks[:, 0]] + Px[morphology.dendLinks[:, 1]]) / 2 compLen = morphology.compartmentLength(compType="dend") # Multiply by length, so longer compartments are proportionally more likely to be # connected to Pcomp = np.multiply(Pcomp, compLen) # Randomize locations. Here we then sort the locations, this is to make # it quicker to locate where they are (one pass in the Pxcumsum array) Pxcumsum = np.cumsum(Pcomp) x = np.random.uniform(low=0, high=Pxcumsum[-1], size=nLocations) xsort = np.sort(x) nComps = len(d) lastValidIdx = nComps - 1 compIdx = 0 secID = np.zeros((nLocations, ), dtype=int) secX = np.zeros((nLocations, )) compCoords = np.zeros((nLocations, 3)) for ix, xval in enumerate(xsort): while (xval > Pxcumsum[compIdx] and compIdx < lastValidIdx): compIdx += 1 secID[ix] = morphology.dendSecID[compIdx] secX[ix] = np.random.rand()*(morphology.dendSecX[compIdx,1] \ - morphology.dendSecX[compIdx,0]) \ + morphology.dendSecX[compIdx,0] compCoords[ix,:3] = (morphology.dend[morphology.dendLinks[compIdx,0],:3] \ + morphology.dend[morphology.dendLinks[compIdx,1],:3])/2 # The probability of picking a compartment is dependent on a distance # dependent probability function, and the length of the compartment # Plot synapses and segments, as a verification if (False): import matplotlib.pyplot as plt plt.hist(d[compID], label="synapses") plt.hist(d, label="segments") plt.legend(loc='upper right') plt.show() return (compCoords, secID, secX) ############################################################################ def setSeed(self, randomSeed): self.writeLog("Setting random seed: " + str(randomSeed)) np.random.seed(randomSeed) ############################################################################ def newWorkerSeeds(self, dView): nWorkers = len(self.dView) workerSeeds = np.random.randint(0, np.iinfo(np.uint32).max, dtype=np.uint32, size=(nWorkers, )) self.dView.scatter("workerSeed", workerSeeds, block=True) self.dView.execute("nl.setSeed(workerSeed[0])", block=True) self.writeLog("New worker seeds: " + str(workerSeeds)) ############################################################################ def setupParallell(self): import os SlurmJobID = os.getenv("SLURM_JOBID") if (SlurmJobID is None): self.SlurmID = 0 else: self.SlurmID = int(SlurmJobID) self.writeLog("IPYTHON_PROFILE = " + str(os.getenv('IPYTHON_PROFILE'))) if (os.getenv('IPYTHON_PROFILE') is not None): from ipyparallel import Client self.rc = Client(profile=os.getenv('IPYTHON_PROFILE')) # http://davidmasad.com/blog/simulation-with-ipyparallel/ # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html self.writeLog("Client IDs: " + str(self.rc.ids)) self.dView = self.rc[:] # Direct view into clients self.lbView = self.rc.load_balanced_view() if (self.logFile is not None): logFileName = self.logFile.name engineLogFile = [logFileName + "-" \ + str(x) for x in range(0,len(self.dView))] else: engineLogFile = [None for x in range(0, len(self.dView))] else: self.writeLog( "No IPYTHON_PROFILE enviroment variable set, running in serial" ) self.dView = None self.lbView = None return with self.dView.sync_imports(): from snudda.input import SnuddaInput self.dView.push({ "inputConfigFile": self.inputConfigFile, "networkConfigFile": self.networkConfigFile, "positionFile": self.positionFile, "spikeDataFileName": self.spikeDataFileName, "isMaster": False, "time": self.time }) self.writeLog("Scattering engineLogFile = " + str(engineLogFile)) self.dView.scatter('logFileName', engineLogFile, block=True) self.writeLog("nl = SnuddaInput(inputConfigFile='" + self.inputConfigFile \ + "',networkConfigFile='" + self.networkConfigFile \ + "',positionFile='" + self.positionFile\ + "',spikeDataFileName='" + self.spikeDataFileName \ + "',isMaster=False " \ + ",time=" +str(self.time) + ",logFile=logFileName[0])") cmdStr = 'global nl; nl = SnuddaInput(inputConfigFile=inputConfigFile,networkConfigFile=networkConfigFile,positionFile=positionFile,spikeDataFileName=spikeDataFileName,isMaster=isMaster,time=time,logFile=logFileName[0])' self.dView.execute(cmdStr, block=True) self.newWorkerSeeds(self.dView) self.writeLog("Workers set up") ############################################################################ # Function for debugging def dumpToRandomFile(self, filePrefix, dataToDump): import uuid tmp = open("save/" + filePrefix + "-file-" + str(uuid.uuid4()), 'w') tmp.write(str(dataToDump)) tmp.close() ############################################################################ def checkSorted(self): # Just a double check that the spikes are not jumbled for neuronID in self.neuronInput: for inputType in self.neuronInput[neuronID]: if (inputType == "VirtualNeuron"): s = self.neuronInput[neuronID][inputType]["spikes"] assert (np.diff(s) >= 0).all(), \ str(neuronID) + " " + inputType + ": Spikes must be in order" else: for spikes in self.neuronInput[neuronID][inputType][ "spikes"]: assert len(spikes) == 0 or spikes[0] >= 0 assert (np.diff(spikes) >= 0).all(), \ str(neuronID) + " " + inputType + ": Spikes must be in order" ############################################################################ def plotSpikes(self, neuronID=None): self.writeLog("Plotting spikes for neuronID: " + str(neuronID)) if (neuronID is None): neuronID = self.neuronInput spikeTimes = [] for nID in neuronID: for inputType in self.neuronInput[nID]: for spikes in self.neuronInput[nID][inputType]["spikes"]: spikeTimes.append(spikes) self.rasterPlot(spikeTimes) ############################################################################ def readHDF5info(self, hdf5File): self.writeLog("Loading HDF5-file: " + hdf5File) try: with h5py.File(hdf5File, 'r') as f: self.networkConfigFile = f["meta"]["configFile"].value self.positionFile = f["meta"]["positionFile"].value self.networkSlurmID = int(f["meta/SlurmID"].value) self.axonStumpIDFlag = f["meta/axonStumpIDFlag"].value except Exception as e: self.writeLog("Error in readHDF5info: " + str(e)) self.writeLog("Opening: " + hdf5File) import traceback tstr = traceback.format_exc() self.writeLog(tstr) import pdb pdb.set_trace() ############################################################################ def findLatestFile(self): files = glob('save/network-connect-voxel-pruned-synapse-file-*.hdf5') modTime = [os.path.getmtime(f) for f in files] idx = np.argsort(modTime) self.writeLog("Using the newest file: " + files[idx[-1]]) return files[idx[-1]] ############################################################################ def makeInputHelperParallel(self, args): try: neuronID, inputType, freq, start, end, synapseDensity, nSpikeTrains, Pkeep, channelSpikes, jitterDt, channelID, conductance, correlation, modFile, parameterFile, parameterList = args return self.makeInputHelperSerial(neuronID=neuronID, inputType=inputType, freq=freq, start=start, end=end, synapseDensity=synapseDensity, nSpikeTrains=nSpikeTrains, Pkeep=Pkeep, channelSpikes=channelSpikes, jitterDt=jitterDt, channelID=channelID, conductance=conductance, correlation=correlation, modFile=modFile, parameterFile=parameterFile, parameterList=parameterList) except: import traceback tstr = traceback.format_exc() self.writeLog(tstr) import pdb pdb.set_trace() ############################################################################ # Normally specify synapseDensity which then sets number of inputs # ie leave nSpikeTrains as None. If nSpikeTrains is set, that will then # scale synapseDensity to get the requested number of inputs (approximately) # For virtual neurons nSpikeTrains must be set, as it defines their activity def makeInputHelperSerial(self, neuronID, inputType, freq, start, end, synapseDensity, nSpikeTrains, Pkeep, channelSpikes, jitterDt, channelID, conductance, correlation, modFile, parameterFile, parameterList): # First, find out how many inputs and where, based on morphology and # synapse density try: timeRange = (start, end) if (inputType.lower() == "VirtualNeuron".lower()): # This specifies activity of a virtual neuron loc = None conductance = None assert nSpikeTrains is None or nSpikeTrains == 1, \ "Virtual neuron " + self.neuronName[neuronID] \ + " should have only one spike train, fix nSpikeTrains in config" spikes = self.makeCorrelatedSpikes(freq=freq, timeRange=timeRange, nSpikeTrains=1, Pkeep=Pkeep, channelSpikes=channelSpikes, jitterDt=jitterDt) nInputs = 1 else: # x,y,z, secID, secX inputLoc = self.dendriteInputLocations( neuronID=neuronID, synapseDensity=synapseDensity, nSpikeTrains=nSpikeTrains) nInputs = inputLoc[0].shape[0] print("Generating " + str(nInputs) + " inputs for " \ + self.neuronName[neuronID]) # OBS, nInputs might differ slightly from nSpikeTrains if that is given spikes = self.makeCorrelatedSpikes(freq=freq, timeRange=timeRange, nSpikeTrains=nInputs, Pkeep=Pkeep, channelSpikes=channelSpikes, jitterDt=jitterDt) except: import traceback tstr = traceback.format_exc() self.writeLog(tstr) import pdb pdb.set_trace() # We need to pick which parameter set to use for the input also parameterID = np.random.randint(1e6, size=nInputs) # We need to keep track of the neuronID, since it will all be jumbled # when doing asynchronous prallellisation return (neuronID, inputType, spikes, inputLoc, synapseDensity, freq, jitterDt, channelID, conductance, correlation, timeRange, modFile, parameterFile, parameterList, parameterID) ############################################################################ def makeInputHelperSerialOLD(self, neuronID, inputType, freq, start, end, nSpikeTrains, Pkeep, channelSpikes, jitterDt, location, channelID, conductance, correlation, modFile, parameterFile, parameterList, parameterID): try: assert False, "Depricated use makeInputHelperSerial" timeRange = (start, end) spikes = self.makeCorrelatedSpikes(freq=freq, timeRange=timeRange, nSpikeTrains=nSpikeTrains, Pkeep=Pkeep, channelSpikes=channelSpikes, jitterDt=jitterDt) if (inputType.lower() == "VirtualNeuron".lower()): loc = None conductance = None else: loc = self.randomCompartment(neuronID, nSpikeTrains, location) except Exception as e: import uuid import traceback tstr = traceback.format_exc() tmp = open("save/tmp-log-file-" + str(uuid.uuid4()), 'w') tmp.write("Exception: " + str(e)) tmp.write("Trace:" + tstr) tmp.close() self.writeLog(tstr) import pdb pdb.set_trace() # We need to keep track of the neuronID, since it will all be jumbled # when doing asynchronous prallellisation return (neuronID, inputType, spikes, loc, freq, jitterDt, channelID, conductance, correlation, location, timeRange, modFile, parameterFile, parameterList, parameterID) ############################################################################ def writeLog(self, text, flush=True): # Change flush to False in future, debug if (self.logFile is not None): self.logFile.write(text + "\n") print(text) if (flush): self.logFile.flush() else: if (self.verbose): print(text)
def client_wait(self, client, jobs=None, timeout=-1): """my wait wrapper, sets a default finite timeout to avoid hangs""" if timeout < 0: timeout = self.timeout return Client.wait(client, jobs, timeout)
from ipyparallel import Client from numpy import array, nan, percentile, savez from .adf_simulation import adf_simulation # Time in seconds to sleep before checking if ready SLEEP = 10 # Number of repetitions EX_NUM = 500 # Number of simulations per exercise EX_SIZE = 200000 # Approximately controls memory use, in MiB MAX_MEMORY_SIZE = 100 rc = Client() dview = rc.direct_view() with dview.sync_imports(): from numpy import arange, zeros from numpy.random import RandomState def clear_cache(client, view): """Cache-clearing function from mailing list""" assert not rc.outstanding, "don't clear history when tasks are outstanding" client.purge_results("all") # clears controller client.results.clear() client.metadata.clear() view.results.clear() client.history = [] view.history = []
else: try: c.close() except: print('C was not existing, creating one') print("Stopping cluster to avoid unnencessary use of memory....") sys.stdout.flush() if backend == 'SLURM': try: cse.utilities.stop_server(is_slurm=True) except: print('Nothing to stop') slurm_script = '/mnt/xfs1/home/agiovann/SOFTWARE/Constrained_NMF/SLURM/slurmStart.sh' cse.utilities.start_server(slurm_script=slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) else: cse.utilities.stop_server() cse.utilities.start_server() c = Client() print(('Using ' + str(len(c)) + ' processes')) dview = c[:] #%% get all the right folders params = [ #['Jan25_2015_07_13',30,False,False,False], # fname, frate, do_rotate_template, do_self_motion_correct, do_motion_correct #['Jan40_exp2_001',30,False,False,False], #['Jan42_exp4_001',30,False,False,False], #['Jan-AMG1_exp2_new_001',30,False,False,False], #['Jan-AMG_exp3_001',30,False,False,False],
#%% start cluster for efficient computation single_thread = False if single_thread: dview = None else: try: c.close() except: print 'C was not existing, creating one' print "Stopping cluster to avoid unnencessary use of memory...." sys.stdout.flush() cse.utilities.stop_server() cse.utilities.start_server() c = Client() dview = c[:n_processes] #%% FOR LOADING ALL TIFF FILES IN A FILE AND SAVING THEM ON A SINGLE MEMORY MAPPABLE FILE fnames = [] base_folder = './movies/' # folder containing the demo files for file in glob.glob(os.path.join(base_folder, '*.tif')): if file.endswith("ie.tif"): fnames.append(file) fnames.sort() print fnames fnames = fnames #%% Create a unique file fot the whole dataset # THIS IS ONLY IF YOU NEED TO SELECT A SUBSET OF THE FIELD OF VIEW #fraction_downsample=1; #idx_x=slice(10,502,None) #idx_y=slice(10,502,None)
def run_CNMF_patches(file_name, shape, options, rf=16, stride=4, n_processes=2, backend='single_thread', memory_fact=1): """Function that runs CNMF in patches, either in parallel or sequentiually, and return the result for each. It requires that ipyparallel is running Parameters ---------- file_name: string full path to an npy file (2D, pixels x time) containing the movie shape: tuple of thre elements dimensions of the original movie across y, x, and time options: dictionary containing all the parameters for the various algorithms rf: int half-size of the square patch in pixel stride: int amount of overlap between patches backend: string 'ipyparallel' or 'single_thread' or SLURM n_processes: int nuber of cores to be used (should be less than the number of cores started with ipyparallel) memory_fact: double unitless number accounting how much memory should be used. It represents the fration of patch processed in a single thread. You will need to try different values to see which one would work Returns ------- A_tot: matrix containing all the componenents from all the patches C_tot: matrix containing the calcium traces corresponding to A_tot sn_tot: per pixel noise estimate optional_outputs: set of outputs related to the result of CNMF ALGORITHM ON EACH patch """ (d1, d2, T) = shape d = d1 * d2 K = options['init_params']['K'] options['preprocess_params']['backend'] = 'single_thread' options['preprocess_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['temporal_params']['n_pixels_per_process'] = np.int( (rf * rf) / memory_fact) options['spatial_params']['backend'] = 'single_thread' options['temporal_params']['backend'] = 'single_thread' idx_flat, idx_2d = extract_patch_coordinates(d1, d2, rf=rf, stride=stride) # import pdb # pdb.set_trace() args_in = [] for id_f, id_2d in zip(idx_flat[:], idx_2d[:]): args_in.append((file_name, id_f, id_2d[0].shape, options)) print len(idx_flat) st = time.time() if backend is 'ipyparallel' or backend is 'SLURM': try: if backend is 'SLURM': if 'IPPPDIR' in os.environ and 'IPPPROFILE' in os.environ: pdir, profile = os.environ['IPPPDIR'], os.environ[ 'IPPPROFILE'] else: raise Exception( 'envirnomment variables not found, please source slurmAlloc.rc' ) c = Client(ipython_dir=pdir, profile=profile) print 'Using ' + str(len(c)) + ' processes' else: c = Client() dview = c[:n_processes] file_res = dview.map_sync(cnmf_patches, args_in) dview.results.clear() c.purge_results('all') c.purge_everything() c.close() except: print('Something went wrong') raise finally: print('You may think that it went well but reality is harsh') elif backend is 'single_thread': file_res = map(cnmf_patches, args_in) else: raise Exception('Backend unknown') print time.time() - st # extract the values from the output of mapped computation num_patches = len(file_res) A_tot = scipy.sparse.csc_matrix((d, K * num_patches)) B_tot = scipy.sparse.csc_matrix((d, num_patches)) C_tot = np.zeros((K * num_patches, T)) F_tot = np.zeros((num_patches, T)) mask = np.zeros(d) sn_tot = np.zeros((d1 * d2)) b_tot = [] f_tot = [] bl_tot = [] c1_tot = [] neurons_sn_tot = [] g_tot = [] idx_tot = [] shapes_tot = [] id_patch_tot = [] count = 0 patch_id = 0 print 'Transforming patches into full matrix' for fff in file_res: if fff is not None: idx_, shapes, A, b, C, f, S, bl, c1, neurons_sn, g, sn, _ = fff sn_tot[idx_] = sn b_tot.append(b) f_tot.append(f) bl_tot.append(bl) c1_tot.append(c1) neurons_sn_tot.append(neurons_sn) g_tot.append(g) idx_tot.append(idx_) shapes_tot.append(shapes) mask[idx_] += 1 F_tot[patch_id, :] = f B_tot[idx_, patch_id] = b for ii in range(np.shape(A)[-1]): new_comp = A.tocsc()[:, ii] / np.sqrt( np.sum(np.array(A.tocsc()[:, ii].todense())**2)) if new_comp.sum() > 0: A_tot[idx_, count] = new_comp C_tot[count, :] = C[ii, :] id_patch_tot.append(patch_id) count += 1 patch_id += 1 else: print('Skipped Empty Patch') A_tot = A_tot[:, :count] C_tot = C_tot[:count, :] optional_outputs = dict() optional_outputs['b_tot'] = b_tot optional_outputs['f_tot'] = f_tot optional_outputs['bl_tot'] = bl_tot optional_outputs['c1_tot'] = c1_tot optional_outputs['neurons_sn_tot'] = neurons_sn_tot optional_outputs['g_tot'] = g_tot optional_outputs['idx_tot'] = idx_tot optional_outputs['shapes_tot'] = shapes_tot optional_outputs['id_patch_tot'] = id_patch_tot optional_outputs['B'] = B_tot optional_outputs['F'] = F_tot optional_outputs['mask'] = mask Im = scipy.sparse.csr_matrix((1. / mask, (np.arange(d), np.arange(d)))) Bm = Im.dot(B_tot) A_tot = Im.dot(A_tot) f = np.mean(F_tot, axis=0) for iter in range(10): b = Bm.dot(F_tot.dot(f)) / np.sum(f**2) f = np.dot((Bm.T.dot(b)).T, F_tot) / np.sum(b**2) return A_tot, C_tot, b, f, sn_tot, optional_outputs
def __init__(self, temperature, eDensity, wavelength, filter=(chfilters.gaussianR, 1000.), label=None, elementList=None, ionList=None, minAbund=None, keepIons=0, doLines=1, doContinuum=1, allLines=1, em=None, abundance=None, verbose=0, timeout=0.1): # wavelength = np.atleast_1d(wavelength) if wavelength.size < 2: print( ' wavelength must have at least two values, current length %3i' % (wavelength.size)) return t1 = datetime.now() # rcAll = Client() # all_engines = rcAll[:] lbvAll = rcAll.load_balanced_view() # # # creates Intensity dict from first ion calculated # setupIntensity = 0 # self.Defaults = chdata.Defaults # self.argCheck(temperature=temperature, eDensity=eDensity, pDensity=None, em=em) if verbose: print('NTempDens: %5i' % (self.NTempDens)) # # if self.Em.max() == 1.: ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ ($\int\,$ N$_e\,$N$_H\,$d${\it l}$)$^{-1}$' else: ylabel = r'erg cm$^{-2}$ s$^{-1}$ sr$^{-1} \AA^{-1}$ $' if verbose: print('len of self.Em %5i' % (len(self.Em))) # # # xlabel = 'Wavelength (' + self.Defaults['wavelength'] + ')' # self.AllLines = allLines # # if abundance != None: if abundance in list(chdata.Abundance.keys()): self.AbundanceName = abundance else: abundChoices = list(chdata.Abundance.keys()) abundChoice = chGui.gui.selectorDialog( abundChoices, label='Select Abundance name', multiChoice=False) abundChoice_idx = abundChoice.selectedIndex self.AbundanceName = abundChoices[abundChoice_idx[0]] print((' Abundance chosen: %s ' % (self.AbundanceName))) else: self.AbundanceName = self.Defaults['abundfile'] # # abundAll = chdata.Abundance[self.AbundanceName]['abundance'] self.AbundAll = abundAll self.MinAbund = minAbund # #ionInfo = chio.masterListInfo() wavelength = np.asarray(wavelength) nWvl = wavelength.size self.Wavelength = wavelength ntemp = self.Ntemp # # freeFree = np.zeros((ntemp, nWvl), np.float64).squeeze() freeBound = np.zeros((ntemp, nWvl), np.float64).squeeze() twoPhoton = np.zeros((self.NTempDens, nWvl), np.float64).squeeze() lineSpectrum = np.zeros((self.NTempDens, nWvl), np.float64).squeeze() # # allInpt = [] # if keepIons: self.IonInstances = {} self.FbInstances = {} self.FfInstances = {} # # ionGate creates the self.Todo list # self.ionGate(elementList=elementList, ionList=ionList, minAbund=minAbund, doLines=doLines, doContinuum=doContinuum, verbose=verbose) # for akey in sorted(self.Todo.keys()): zStuff = util.convertName(akey) Z = zStuff['Z'] abundance = chdata.Abundance[self.AbundanceName]['abundance'][Z - 1] if verbose: print(' %5i %5s abundance = %10.2e ' % (Z, const.El[Z - 1], abundance)) if verbose: print(' doing ion %s for the following processes %s' % (akey, self.Todo[akey])) if 'ff' in self.Todo[akey]: allInpt.append( [akey, 'ff', temperature, wavelength, abundance, em]) if 'fb' in self.Todo[akey]: allInpt.append( [akey, 'fb', temperature, wavelength, abundance, em]) if 'line' in self.Todo[akey]: allInpt.append([ akey, 'line', temperature, eDensity, wavelength, filter, allLines, abundance, em, doContinuum ]) # result = lbvAll.map_sync(doAll, allInpt) if verbose: print(' got all ff, fb, line results') ionsCalculated = [] # for ijk in range(len(result)): out = result[ijk] if type(out) != list: print(' a problem has occured - this can be caused by') print('running Python3 and not using ipcluster3') return ionS = out[0] if verbose: print(' collecting calculation for %s' % (ionS)) ionsCalculated.append(ionS) calcType = out[1] if verbose: print(' processing %s results' % (calcType)) # if calcType == 'ff': thisFf = out[2] if keepIons: self.FfInstances[ionS] = thisFf freeFree += thisFf['intensity'] elif calcType == 'fb': thisFb = out[2] if verbose: print(' fb ion = %s' % (ionS)) if 'intensity' in thisFb.keys(): if 'errorMessage' not in sorted(thisFb.keys()): if keepIons: self.FbInstances[ionS] = thisFb freeBound += thisFb['intensity'] else: print(thisFb['errorMessage']) elif calcType == 'line': thisIon = out[2] if not 'errorMessage' in sorted(thisIon.Intensity.keys()): if keepIons: self.IonInstances[ionS] = thisIon thisIntensity = thisIon.Intensity ## self.IonInstances.append(copy.deepcopy(thisIon)) if setupIntensity: for akey in sorted(self.Intensity.keys()): self.Intensity[akey] = np.hstack( (self.Intensity[akey], thisIntensity[akey])) else: setupIntensity = 1 self.Intensity = thisIntensity # lineSpectrum += thisIon.Spectrum['intensity'] # check for two-photon emission if len(out) == 4: tp = out[3] if verbose: for akey in tp: print(' tp keys %s' % (akey)) if self.NTempDens == 1: twoPhoton += tp['intensity'].squeeze() else: for iTempDen in range(self.NTempDens): twoPhoton[iTempDen] += tp['intensity'][ iTempDen] else: if 'errorMessage' in sorted(thisIon.Intensity.keys()): print(thisIon.Intensity['errorMessage']) # # self.IonsCalculated = ionsCalculated # # self.FreeFree = { 'wavelength': wavelength, 'intensity': freeFree.squeeze() } self.FreeBound = { 'wavelength': wavelength, 'intensity': freeBound.squeeze() } self.LineSpectrum = { 'wavelength': wavelength, 'intensity': lineSpectrum.squeeze() } self.TwoPhoton = { 'wavelength': wavelength, 'intensity': twoPhoton.squeeze() } # total = freeFree + freeBound + lineSpectrum + twoPhoton # t2 = datetime.now() dt = t2 - t1 print(' elapsed seconds = %12.3e' % (dt.seconds)) rcAll.purge_results('all') # if self.NTempDens == 1: integrated = total else: integrated = total.sum(axis=0) # if type(label) == type(''): if hasattr(self, 'Spectrum'): print(' hasattr = true') self.Spectrum[label] = { 'wavelength': wavelength, 'intensity': total.squeeze(), 'filter': filter[0].__name__, 'width': filter[1], 'integrated': integrated, 'em': em, 'Abundance': self.AbundanceName, 'xlabel': xlabel, 'ylabel': ylabel } else: self.Spectrum = { label: { 'wavelength': wavelength, 'intensity': total.squeeze(), 'filter': filter[0].__name__, 'width': filter[1], 'integrated': integrated, 'em': em, 'Abundance': self.AbundanceName, 'xlabel': xlabel, 'ylabel': ylabel } } else: self.Spectrum = { 'wavelength': wavelength, 'intensity': total.squeeze(), 'filter': filter[0].__name__, 'width': filter[1], 'integrated': integrated, 'em': em, 'Abundance': self.AbundanceName, 'xlabel': xlabel, 'ylabel': ylabel }
def main(): usage = ''' Usage: ------------------------------------------------ Sequential change detection for polarimetric SAR images python %s [OPTIONS] infiles outfile enl Options: -h this help -m run 3x3 median filter on p-values prior to thresholding (e.g. for noisy satellite data) -d dims files are to be co-registered to a subset dims = [x0,y0,rows,cols] of the first image, otherwise it is assumed that the images are co-registered and have identical spatial dimensions -s signif significance level for change detection (default 0.01) infiles: comma-separated list of full paths to input files, no blank spaces: /path/to/infile_1, ... ,/path/to/infile_k outfile: without path (will be written to same directory as infile_1) enl: equivalent number of looks -------------------------------------------------''' % sys.argv[0] options, args = getopt.getopt(sys.argv[1:], 'hmd:s:') medianfilter = False dims = None significance = 0.01 for option, value in options: if option == '-h': print usage return elif option == '-m': medianfilter = True elif option == '-d': dims = eval(value) elif option == '-s': significance = eval(value) if len(args) != 3: print 'Incorrect number of arguments' print usage sys.exit(1) fns = args[0].split(',') outfn = args[1] n = np.float64(eval(args[2])) # equivalent number of looks k = len(fns) # number of images gdal.AllRegister() start = time.time() # first SAR image try: inDataset1 = gdal.Open(fns[0], GA_ReadOnly) cols = inDataset1.RasterXSize rows = inDataset1.RasterYSize bands = inDataset1.RasterCount except Exception as e: print 'Error: %s -- Could not read file' % e sys.exit(1) if dims is not None: # images are not yet co-registered, so subset first image and register the others _, _, cols, rows = dims fn0 = subset(fns[0], dims) args1 = [(fns[0], fns[i], dims) for i in range(1, k)] try: print ' \nattempting parallel execution of co-registration ...' start1 = time.time() c = Client() print 'available engines %s' % str(c.ids) v = c[:] fns = v.map_sync(call_register, args1) print 'elapsed time for co-registration: ' + str(time.time() - start1) except Exception as e: start1 = time.time() print '%s \nFailed, so running sequential co-registration ...' % e fns = map(call_register, args1) print 'elapsed time for co-registration: ' + str(time.time() - start) fns.insert(0, fn0) # point inDataset1 to the subset image for correct georefrerencing inDataset1 = gdal.Open(fn0, GA_ReadOnly) print '===============================================' print ' Multi-temporal SAR Change Detection' print '===============================================' print time.asctime() print 'First (reference) filename: %s' % fns[0] print 'number of images: %i' % k print 'equivalent number of looks: %f' % n print 'significance level: %f' % significance # output file path = os.path.abspath(fns[0]) dirn = os.path.dirname(path) outfn = dirn + '/' + outfn # create temporary, memory-mapped array of change indices p(Ri<ri) mm = NamedTemporaryFile() pvarray = np.memmap(mm.name, dtype=np.float64, mode='w+', shape=(k - 1, k - 1, rows * cols)) print 'pre-calculating Rj and p-values ...' start1 = time.time() try: print 'attempting parallel calculation ...' c = Client() print 'available engines %s' % str(c.ids) v = c[:] v.execute('import numpy as np') v.execute('from osgeo.gdalconst import GA_ReadOnly') v.execute('import sys, gdal') v.execute('from scipy import stats, ndimage') print 'ell = ', sys.stdout.flush() for i in range(k - 1): print i + 1, sys.stdout.flush() args1 = [(fns[i:j + 2], n, cols, rows, bands) for j in range(i, k - 1)] pvs = v.map_sync(PV, args1) if medianfilter: pvs = v.map_sync(call_median_filter, pvs) for j in range(i, k - 1): pvarray[i, j, :] = pvs[j - i].ravel() except Exception as e: print '%s \nfailed, so running sequential calculation ...' % e print 'ell= ', sys.stdout.flush() for i in range(k - 1): print i + 1, sys.stdout.flush() args1 = [(fns[i:j + 2], n, cols, rows, bands) for j in range(i, k - 1)] pvs = map(PV, args1) if medianfilter: pvs = map(call_median_filter, pvs) for j in range(i, k - 1): pvarray[i, j, :] = pvs[j - i].ravel() print '\nelapsed time for p-value calculation: ' + str(time.time() - start1) cmap, smap, fmap, bmap = change_maps(pvarray, significance) # write to file system cmap = np.reshape(cmap, (rows, cols)) fmap = np.reshape(fmap, (rows, cols)) smap = np.reshape(smap, (rows, cols)) bmap = np.reshape(bmap, (rows, cols, k - 1)) driver = inDataset1.GetDriver() basename = os.path.basename(outfn) name, _ = os.path.splitext(basename) outfn1 = outfn.replace(name, name + '_cmap') outDataset = driver.Create(outfn1, cols, rows, 1, GDT_Byte) geotransform = inDataset1.GetGeoTransform() if geotransform is not None: outDataset.SetGeoTransform(geotransform) projection = inDataset1.GetProjection() if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(cmap, 0, 0) outBand.FlushCache() print 'most recent change map written to: %s' % outfn1 outfn2 = outfn.replace(name, name + '_fmap') outDataset = driver.Create(outfn2, cols, rows, 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(fmap, 0, 0) outBand.FlushCache() print 'frequency map written to: %s' % outfn2 outfn3 = outfn.replace(name, name + '_bmap') outDataset = driver.Create(outfn3, cols, rows, k - 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) for i in range(k - 1): outBand = outDataset.GetRasterBand(i + 1) outBand.WriteArray(bmap[:, :, i], 0, 0) outBand.FlushCache() print 'bitemporal map image written to: %s' % outfn3 outfn4 = outfn.replace(name, name + '_smap') outDataset = driver.Create(outfn4, cols, rows, 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(smap, 0, 0) outBand.FlushCache() print 'first change map written to: %s' % outfn4 print 'total elapsed time: ' + str(time.time() - start) outDataset = None inDataset1 = None
def main(test, i_max): i_max = int(i_max) if test == 'True': #Don't chunk for f_name in ['train', 'test']: print('Tokenizing (test)', f_name) D = pd.read_csv(PROJECT_DIR + '/data/interim/' + f_name + '_test.csv', index_col='id') D = question_type_chunk(D) D.to_csv(PROJECT_DIR + '/data/interim/' + f_name + '_test.csv', index_label='id') else: #test != 'True' pool = Client() with pool[:].sync_imports(): import nltk push_res = pool[:].push({ 'Q': Q, 'Q_TYPE1': Q_TYPE1, 'question_type_chunk': question_type_chunk, 'question_types1': question_types1, 'get_question': get_question, 'question_type1': question_type1, 'n_types': n_types }) N_JOBS = len(pool) left_indices = range(0, CHUNKSIZE, CHUNKSIZE // N_JOBS) right_indices = range(CHUNKSIZE // N_JOBS, CHUNKSIZE + 1, CHUNKSIZE // N_JOBS) for f_name in ['train', 'test']: D_it = pd.read_csv(PROJECT_DIR + '/data/interim/' + f_name + '.csv', chunksize=CHUNKSIZE, index_col='id') D0 = D_it.get_chunk() D0 = question_type_chunk(D0) D0.to_csv(PROJECT_DIR + '/data/interim/' + 'D_tmp.csv', mode='w', index_label='id') del D0 i = 0 for Di in D_it: i += 1 if i_max != 0 and i > i_max: break print('Classifying question type ', f_name, ' chunk: ', i, end='\r') sys.stdout.flush() results = [] for pi, li, ri in zip(pool, left_indices, right_indices): results.append( pi.apply_async(question_type_chunk, Di[li:ri])) for res in results: Di = res.get() if len(Di) > 0: Di.to_csv(PROJECT_DIR + '/data/interim/' + 'D_tmp.csv', mode='a', header=False, index_label='id') del Di print() os.rename(PROJECT_DIR + '/data/interim/' + 'D_tmp.csv', PROJECT_DIR + '/data/interim/' + f_name + '.csv') return
# TODO store definition dicts in json # TODO add functionality to read settings of every object from config format import l5pc_evaluator evaluator = l5pc_evaluator.create() def evaluate(parameter_array): """Global evaluate function""" return evaluator.evaluate(parameter_array) if os.getenv('L5PCBENCHMARK_USEIPYP') == '1': from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE')) lview = rc.load_balanced_view() map_function = lview.map_sync else: map_function = None opt = bluepyopt.optimisations.DEAPOptimisation( evaluator=evaluator, map_function=map_function, seed=os.getenv('BLUEPYOPT_SEED')) def main(): """Main""" parser = argparse.ArgumentParser(description='L5PC example')
def test_project(self): # Are there connections dSPN->iSPN from snudda.utils.load import SnuddaLoad network_file = os.path.join(self.network_path, "network-synapses.hdf5") sl = SnuddaLoad(network_file) dspn_id_list = sl.get_cell_id_of_type("dSPN") ispn_id_list = sl.get_cell_id_of_type("iSPN") tot_proj_ctr = 0 for dspn_id in dspn_id_list: for ispn_id in ispn_id_list: synapses, synapse_coords = sl.find_synapses(pre_id=dspn_id, post_id=ispn_id) if synapses is not None: tot_proj_ctr += synapses.shape[0] with self.subTest(stage="projection_exists"): # There should be projection synapses between dSPN and iSPN in this toy example self.assertTrue(tot_proj_ctr > 0) tot_dd_syn_ctr = 0 for dspn_id in dspn_id_list: for dspn_id2 in dspn_id_list: synapses, synapse_coords = sl.find_synapses(pre_id=dspn_id, post_id=dspn_id2) if synapses is not None: tot_dd_syn_ctr += synapses.shape[0] tot_ii_syn_ctr = 0 for ispn_id in ispn_id_list: for ispn_id2 in ispn_id_list: synapses, synapse_coords = sl.find_synapses(pre_id=ispn_id, post_id=ispn_id2) if synapses is not None: tot_ii_syn_ctr += synapses.shape[0] with self.subTest(stage="normal_synapses_exist"): # In this toy example neurons are quite sparsely placed, but we should have at least some # synapses self.assertTrue(tot_dd_syn_ctr > 0) self.assertTrue(tot_ii_syn_ctr > 0) # We need to run in parallel also to verify we get same result (same random seed) serial_synapses = sl.data["synapses"].copy() del sl # Close old file so we can overwrite it os.environ["IPYTHONDIR"] = os.path.join(os.path.abspath(os.getcwd()), ".ipython") os.environ["IPYTHON_PROFILE"] = "default" os.system( "ipcluster start -n 4 --profile=$IPYTHON_PROFILE --ip=127.0.0.1&") time.sleep(10) # Run place, detect and prune in parallel by passing rc from ipyparallel import Client u_file = os.path.join(".ipython", "profile_default", "security", "ipcontroller-client.json") rc = Client(url_file=u_file, timeout=120, debug=False) d_view = rc.direct_view( targets='all') # rc[:] # Direct view into clients from snudda.detect.detect import SnuddaDetect sd = SnuddaDetect(network_path=self.network_path, hyper_voxel_size=100, rc=rc, verbose=True) sd.detect() from snudda.detect.project import SnuddaProject # TODO: Currently SnuddaProject only runs in serial sp = SnuddaProject(network_path=self.network_path) sp.project() from snudda.detect.prune import SnuddaPrune # Prune has different methods for serial and parallel execution, important to test it! sp = SnuddaPrune(network_path=self.network_path, rc=rc, verbose=True) sp.prune() with self.subTest(stage="check-parallel-identical"): sl2 = SnuddaLoad(network_file) parallel_synapses = sl2.data["synapses"].copy() # ParameterID, sec_X etc are randomised in hyper voxel, so you need to use same # hypervoxel size for reproducability between serial and parallel execution # All synapses should be identical regardless of serial or parallel execution path self.assertTrue((serial_synapses == parallel_synapses).all()) os.system("ipcluster stop")
from ipyparallel import Client import sys import numpy as np import glob import pickle ######################################################################################################################## # set up parallel client rc = Client() dview = rc[:] with dview.sync_imports(): from weak_strong_convergence import experiment fnames = glob.glob( './data/tay_obs_seed_000_sys_dim_10_analint_001_diffusion_*') fnames = sorted(fnames) exps = [] for i in range(len(fnames)): f = open(fnames[i], 'rb') tmp = pickle.load(f) f.close() params = tmp['params'] x_init = tmp['tobs'] for j in range(1, 10): for p in [1]: args = [] args.append(x_init[:, j])
def setupParallell(self): import os SlurmJobID = os.getenv("SLURM_JOBID") if (SlurmJobID is None): self.SlurmID = 0 else: self.SlurmID = int(SlurmJobID) self.writeLog("IPYTHON_PROFILE = " + str(os.getenv('IPYTHON_PROFILE'))) if (os.getenv('IPYTHON_PROFILE') is not None): from ipyparallel import Client self.rc = Client(profile=os.getenv('IPYTHON_PROFILE')) # http://davidmasad.com/blog/simulation-with-ipyparallel/ # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html self.writeLog("Client IDs: " + str(self.rc.ids)) self.dView = self.rc[:] # Direct view into clients self.lbView = self.rc.load_balanced_view() if (self.logFile is not None): logFileName = self.logFile.name engineLogFile = [logFileName + "-" \ + str(x) for x in range(0,len(self.dView))] else: engineLogFile = [None for x in range(0, len(self.dView))] else: self.writeLog( "No IPYTHON_PROFILE enviroment variable set, running in serial" ) self.dView = None self.lbView = None return with self.dView.sync_imports(): from snudda.input import SnuddaInput self.dView.push({ "inputConfigFile": self.inputConfigFile, "networkConfigFile": self.networkConfigFile, "positionFile": self.positionFile, "spikeDataFileName": self.spikeDataFileName, "isMaster": False, "time": self.time }) self.writeLog("Scattering engineLogFile = " + str(engineLogFile)) self.dView.scatter('logFileName', engineLogFile, block=True) self.writeLog("nl = SnuddaInput(inputConfigFile='" + self.inputConfigFile \ + "',networkConfigFile='" + self.networkConfigFile \ + "',positionFile='" + self.positionFile\ + "',spikeDataFileName='" + self.spikeDataFileName \ + "',isMaster=False " \ + ",time=" +str(self.time) + ",logFile=logFileName[0])") cmdStr = 'global nl; nl = SnuddaInput(inputConfigFile=inputConfigFile,networkConfigFile=networkConfigFile,positionFile=positionFile,spikeDataFileName=spikeDataFileName,isMaster=isMaster,time=time,logFile=logFileName[0])' self.dView.execute(cmdStr, block=True) self.newWorkerSeeds(self.dView) self.writeLog("Workers set up")
class Snudda(object): ############################################################################ def __init__(self, networkPath): if (networkPath[-1] == "/"): self.networkPath = networkPath[:-1] else: self.networkPath = networkPath # Add current dir to python path sys.path.append(os.getcwd()) self.start = timeit.default_timer() ############################################################################ def helpInfo(self, args): from .snudda_help import snudda_help_text print(snudda_help_text()) ############################################################################ def initConfig(self, args): # self.networkPath = args.path print("Creating config file") print("Network path: " + str(self.networkPath)) assert args.size is not None, \ "You need to speicfy --size when initialising config for network2" from .init import SnuddaInit structDef = { "Striatum": args.size, "GPe": 0, "GPi": 0, "SNr": 0, "STN": 0, "Cortex": 0, "Thalamus": 0 } # Cortex and thalamus axons disabled right now, set to 1 to include one if not args.overwrite: assert not os.path.exists(self.networkPath), \ "Network path " + str(self.networkPath) + " already exists" \ + " (aborting to prevent accidental overwriting)" self.makeDirIfNeeded(self.networkPath) nChannels = args.nchannels configFile = self.networkPath + "/network-config.json" SnuddaInit(structDef=structDef, configName=configFile, nChannels=nChannels) if (args.size > 1e5): print("Make sure there is enough disk space in " + str(self.networkPath)) print("Large networks take up ALOT of space") ############################################################################ def placeNeurons(self, args): # self.networkPath = args.path print("Placing neurons") print("Network path: " + str(self.networkPath)) configFile = self.networkPath + "/network-config.json" positionFile = self.networkPath + "/network-neuron-positions.hdf5" logFileName = self.networkPath + "/log/logFile-place-neurons.txt" self.setupLogFile(logFileName) # sets self.logFile self.setupParallel() # sets self.dView and self.lbView from .place import SnuddaPlace if (args.h5legacy): h5libver = "earliest" else: h5libver = "latest" # default npn = SnuddaPlace(config_file=configFile, logFile=self.logFile, verbose=True, dView=self.dView, h5libver=h5libver) npn.writeDataHDF5(positionFile) self.stopParallel() self.closeLogFile() ############################################################################ def touchDetection(self, args): # self.networkPath = args.path print("Touch detection") print("Network path: " + str(self.networkPath)) if (args.hvsize is not None): hyperVoxelSize = int(args.hvsize) else: hyperVoxelSize = 100 if (args.volumeID is not None): volumeID = args.volumeID else: volumeID = None logDir = self.networkPath + "/log" configFile = self.networkPath + "/network-config.json" positionFile = self.networkPath + "/network-neuron-positions.hdf5" logFileName = self.networkPath + "/log/logFile-touch-detection.txt" saveFile = self.networkPath + "/voxels/network-putative-synapses.hdf5" voxelDir = self.networkPath + "/voxels" self.makeDirIfNeeded(voxelDir) self.setupLogFile(logFileName) # sets self.logFile self.setupParallel() # sets self.dView and self.lbView if (args.h5legacy): h5libver = "earliest" else: h5libver = "latest" # default from .detect import SnuddaDetect if (args.cont): # Continue previous run print("Continuing previous touch detection") ncv = SnuddaDetect(configFile=configFile, positionFile=positionFile, logFile=self.logFile, saveFile=saveFile, SlurmID=self.SlurmID, volumeID=volumeID, rc=self.rc, hyperVoxelSize=hyperVoxelSize, h5libver=h5libver, restartDetectionFlag=False) else: ncv = SnuddaDetect(configFile=configFile, positionFile=positionFile, logFile=self.logFile, saveFile=saveFile, SlurmID=self.SlurmID, volumeID=volumeID, rc=self.rc, h5libver=h5libver, hyperVoxelSize=hyperVoxelSize) self.stopParallel() self.closeLogFile() ############################################################################ def pruneSynapses(self, args): # self.networkPath = args.path print("Prune synapses") print("Network path: " + str(self.networkPath)) from .prune import SnuddaPrune logFileName = self.networkPath + "/log/logFile-synapse-pruning.txt" workLog = self.networkPath + "/log/network-detect-worklog.hdf5" self.setupLogFile(logFileName) # sets self.logFile self.setupParallel() # sets self.dView and self.lbView # Optionally set this scratchPath = None if (args.mergeonly): preMergeOnly = True else: preMergeOnly = False print("preMergeOnly : " + str(preMergeOnly)) if (args.h5legacy): h5libver = "earliest" else: h5libver = "latest" # default ncvp = SnuddaPrune(workHistoryFile=workLog, logFile=self.logFile, logFileName=logFileName, dView=self.dView, lbView=self.lbView, scratchPath=scratchPath, h5libver=h5libver, preMergeOnly=preMergeOnly) self.stopParallel() self.closeLogFile() ############################################################################ def setupInput(self, args): from .input import SnuddaInput print("Setting up inputs, assuming input.json exists") logFileName = self.networkPath + "/log/logFile-setup-input.log" self.setupLogFile(logFileName) # sets self.logFile self.setupParallel() # sets self.dView and self.lbView if "input" in args: inputConfig = args.input else: inputConfig = self.networkPath + "/input.json" if (not os.path.isfile(inputConfig)): print("Missing input config file: " + str(inputConfig)) return if (args.networkFile): networkFile = args.networkFile else: networkFile = self.networkPath \ + "/network-pruned-synapses.hdf5" if (args.inputFile): spikeFile = args.inputFile else: spikeFile = self.networkPath + "/input-spikes.hdf5" if (args.time): inputTime = args.time print("Writing input spikes to " + spikeFile) ni = SnuddaInput(inputConfigFile=inputConfig, HDF5networkFile=networkFile, spikeDataFileName=spikeFile, time=inputTime, logFile=self.logFile) self.stopParallel() self.closeLogFile() ############################################################################ def exportToSONATA(self, args): from ConvertNetwork import ConvertNetwork print("Exporting to SONATA format") print("Network path: " + str(self.networkPath)) if (args.networkFile): networkFile = args.networkFile else: networkFile = self.networkPath \ + "/network-pruned-synapses.hdf5" if (args.inputFile): inputFile = args.inputFile else: inputFile = self.networkPath + "/input-spikes.hdf5" outDir = self.networkPath + "/SONATA/" cn = ConvertNetwork(networkFile=networkFile, inputFile=inputFile, outDir=outDir) ############################################################################ def simulate(self, args): start = timeit.default_timer() from .simulate import SnuddaSimulate if (args.networkFile): networkFile = args.networkFile else: networkFile = self.networkPath \ + "/network-pruned-synapses.hdf5" if (args.inputFile): inputFile = args.inputFile else: inputFile = self.networkPath + "/input-spikes.hdf5" self.makeDirIfNeeded(self.networkPath + "/simulation") print("Using input file " + inputFile) #nWorkers = args.ncores #print("Using " + str(nWorkers) + " workers for neuron") # Problems with nested symbolic links when the second one is a relative # path going beyond the original base path if (args.mechDir is None): mechDir = os.path.dirname(networkFile) + "/mechanisms" # !!! problem with paths, testing to create mechanism dir in current dir mechDir = "mechanisms" if (not os.path.exists(mechDir)): mDir = os.path.dirname(__file__) + "/data/cellspecs/mechanisms" os.symlink(mDir, mechDir) else: mechDir = args.mechDir # !!! These are saved in current directory x86_64 # --- problem since nrnivmodl seems to want a relative path... makeModsStr = "nrnivmodl " + mechDir if (not os.path.exists('x86_64')): print("Please first run: " + makeModsStr) exit(-1) # I was having problems when running nrnivmodl in the script, but # running it manually in bash works... WHY?!! # os.system(makeModsStr) saveDir = os.path.dirname(networkFile) + "/simulation/" if (not os.path.exists(saveDir)): print("Creating directory " + saveDir) os.makedirs(saveDir, exist_ok=True) # Get the SlurmID, used in default file names SlurmID = os.getenv('SLURM_JOBID') if (SlurmID is None): SlurmID = str(666) print("args: " + str(args)) if (args.voltOut is not None): # Save neuron voltage if (args.voltOut == "default"): voltFile = saveDir + 'network-voltage-' + SlurmID + '.csv' else: voltFile = args.voltOut else: voltFile = None if (args.spikesOut is None or args.spikesOut == "default"): spikesFile = saveDir + 'network-output-spikes-' + SlurmID + '.txt' else: spikesFile = args.spikesOut disableGJ = args.disableGJ if (disableGJ): print("!!! WE HAVE DISABLED GAP JUNCTIONS !!!") logFile = os.path.dirname(networkFile) \ + "/log/network-simulation-log.txt" logDir = os.path.dirname(networkFile) + "/log" if (not os.path.exists(logDir)): print("Creating directory " + logDir) os.makedirs(logDir, exist_ok=True) from mpi4py import MPI # This must be imported before neuron, to run parallel from neuron import h #, gui pc = h.ParallelContext() sim = SnuddaSimulate(networkFile=networkFile, inputFile=inputFile, disableGapJunctions=disableGJ, logFile=logFile, verbose=args.verbose) sim.addExternalInput() sim.checkMemoryStatus() if (voltFile is not None): sim.addRecording( sideLen=None) # Side len let you record from a subset #sim.addRecordingOfType("dSPN",5) # Side len let you record from a subset tSim = args.time * 1000 # Convert from s to ms for Neuron simulator sim.checkMemoryStatus() print("Running simulation for " + str(tSim) + " ms.") sim.run(tSim) # In milliseconds print("Simulation done, saving output") if (spikesFile is not None): sim.writeSpikes(spikesFile) if (voltFile is not None): sim.writeVoltage(voltFile) stop = timeit.default_timer() if (sim.pc.id() == 0): print("Program run time: " + str(stop - start)) # sim.plot() exit(0) #cmdStr = "nrnivmodl " + mechDir + " && mpiexec -n " + str(nWorkers) + " -map-by socket:OVERSUBSCRIBE python3 " + os.path.dirname(__file__) + " simulate.py " + networkFile + " " + inputFile + " --time " + str(args.time) #if(args.voltOut is not None): # cmdStr += " --voltOut " + args.voltOut #os.system(cmdStr) ############################################################################ def analyse(self, args): print("Add analysis code here, see Network_analyse.py") ############################################################################ def setupParallel(self): self.SlurmID = os.getenv('SLURM_JOBID') if (self.SlurmID is None): self.SlurmID = self.nextRunID() else: self.SlurmID = int(self.SlurmID) self.logFile.write("Using SlurmID: " + str(self.SlurmID)) if (os.getenv('IPYTHON_PROFILE') is not None): self.logFile.write('Creating ipyparallel client\n') from ipyparallel import Client #self.rc = Client(profile=os.getenv('IPYTHON_PROFILE'), # # sshserver='127.0.0.1', # debug=False) ufile = os.getenv('IPYTHONDIR') + "/profile_" \ + os.getenv('IPYTHON_PROFILE') \ + "/security/ipcontroller-client.json" self.rc = Client(url_file=ufile, timeout=120, debug=False) self.logFile.write('Client IDs: ' + str(self.rc.ids)) # http://davidmasad.com/blog/simulation-with-ipyparallel/ # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html self.dView = self.rc.direct_view( targets='all') # rc[:] # Direct view into clients self.lbView = self.rc.load_balanced_view(targets='all') # Define nc globally # self.dView.execute("nc = None",block=True) else: self.logFile.write( "No IPYTHON_PROFILE enviroment variable set, running in serial" ) self.dView = None self.lbView = None self.rc = None ############################################################################ def stopParallel(self): # Disable this function, keep the pool running for now return if (self.rc is not None): print("Stopping ipyparallel") self.rc.shutdown(hub=True) ############################################################################ def setupLogFile(self, logFileName): dataDir = os.path.dirname(logFileName) self.makeDirIfNeeded(dataDir) try: self.logFile = open(logFileName, 'w') self.logFile.write('Starting log file\n') except: print("Unable to set up log file " + str(logFileName)) ############################################################################ def closeLogFile(self): stop = timeit.default_timer() print("\nProgram run time: " + str(stop - self.start)) self.logFile.write("Program run time: " + str(stop - self.start)) self.logFile.write("End of log. Closing file.") self.logFile.close() ############################################################################## def nextRunID(self): import pickle runIDfile = ".runID.pickle" try: if (os.path.isfile(runIDfile)): with open(runIDfile, 'rb') as f: runID = pickle.load(f) nextID = int(np.ceil(np.max(runID)) + 1) runID.append(nextID) with open(runIDfile, 'wb') as f: pickle.dump(runID, f, -1) else: with open(runIDfile, 'wb') as f: nextID = 1 runID = [1] pickle.dump(runID, f, -1) except Exception as e: import traceback tstr = traceback.format_exc() print(tstr) print("Problem reading .runID.pickle file, setting runID to 0") import pdb pdb.set_trace() return 0 print("Using runID = " + str(nextID)) return nextID ############################################################################ def makeDirIfNeeded(self, dirPath): if (not os.path.exists(dirPath)): print("Creating missing directory " + dirPath) os.makedirs(dirPath)
class Snudda(object): ############################################################################ def __init__(self, network_path): self.network_path = network_path self.d_view = None self.lb_view = None self.rc = None self.slurm_id = 0 # Add current dir to python path sys.path.append(os.getcwd()) self.start = timeit.default_timer() ############################################################################ @staticmethod def help_info(args): from snudda.help import snudda_help_text ############################################################################ def init_config(self, args): # self.networkPath = args.path print("Creating config file") print(f"Network path: {self.network_path}") assert args.size is not None, "You need to specify --size when initialising config for the network" from snudda.init.init import SnuddaInit struct_def = { "Striatum": args.size, "GPe": 0, "GPi": 0, "SNr": 0, "STN": 0, "Cortex": 0, "Thalamus": 0 } # Cortex and thalamus axons disabled right now, set to 1 to include one if not args.overwrite: assert not os.path.exists(self.network_path), \ "Network path {self.network_path} already exists (aborting to prevent accidental overwriting)" self.make_dir_if_needed(self.network_path) random_seed = args.randomseed config_file = os.path.join(self.network_path, "network-config.json") SnuddaInit(struct_def=struct_def, config_file=config_file, random_seed=random_seed) if args.size > 1e5: print( f"Make sure there is enough disk space in {self.network_path}") print("Large networks take up ALOT of space") ############################################################################ def place_neurons(self, args): # self.networkPath = args.path print("Placing neurons") print(f"Network path: {self.network_path}") log_file_name = os.path.join(self.network_path, "log", "logFile-place-neurons.txt") random_seed = args.randomseed self.setup_log_file(log_file_name) # sets self.logFile if args.parallel: self.setup_parallel() # sets self.d_view and self.lb_view from snudda.place.place import SnuddaPlace if args.h5legacy: h5libver = "earliest" else: h5libver = "latest" # default sp = SnuddaPlace(network_path=self.network_path, log_file=self.logfile, verbose=args.verbose, d_view=self.d_view, h5libver=h5libver, raytrace_borders=args.raytrace_borders, random_seed=random_seed) sp.place() self.stop_parallel() self.close_log_file() ############################################################################ def touch_detection(self, args): # self.networkPath = args.path print("Touch detection") print("Network path: " + str(self.network_path)) if args.hvsize is not None: hyper_voxel_size = int(args.hvsize) else: hyper_voxel_size = 100 if args.volumeID is not None: volume_id = args.volumeID else: volume_id = None log_dir = os.path.join(self.network_path, "log") if not os.path.exists(log_dir): print(f"Creating directory {log_dir}") os.makedirs(log_dir, exist_ok=True) config_file = os.path.join(self.network_path, "network-config.json") position_file = os.path.join(self.network_path, "network-neuron-positions.hdf5") log_filename = os.path.join(self.network_path, "log", "logFile-touch-detection.txt") save_file = os.path.join(self.network_path, "voxels", "network-putative-synapses.hdf5") random_seed = args.randomseed voxel_dir = os.path.join(self.network_path, "voxels") self.make_dir_if_needed(voxel_dir) self.setup_log_file(log_filename) # sets self.logfile if args.parallel: self.setup_parallel() # sets self.d_view and self.lb_view if args.h5legacy: h5libver = "earliest" else: h5libver = "latest" # default from snudda.detect.detect import SnuddaDetect # You can now setup SnuddaDetect with only network_path and it will use default values # for config_file, position_file, logfile, save_file sd = SnuddaDetect(config_file=config_file, position_file=position_file, logfile=self.logfile, save_file=save_file, slurm_id=self.slurm_id, volume_id=volume_id, rc=self.rc, hyper_voxel_size=hyper_voxel_size, h5libver=h5libver, random_seed=random_seed, verbose=args.verbose) if args.cont: # Continue previous run print("Continuing previous touch detection") sd.detect(restart_detection_flag=False) else: sd.detect(restart_detection_flag=True) # Also run SnuddaProject to handle projections between volume from snudda.detect.project import SnuddaProject sp = SnuddaProject(network_path=self.network_path) sp.project() self.stop_parallel() self.close_log_file() ############################################################################ def prune_synapses(self, args): # self.networkPath = args.path print("Prune synapses") print("Network path: " + str(self.network_path)) from snudda.detect.prune import SnuddaPrune log_filename = os.path.join(self.network_path, "log", "logFile-synapse-pruning.txt") random_seed = args.randomseed self.setup_log_file(log_filename) # sets self.logfile if args.parallel: self.setup_parallel() # sets self.d_view and self.lb_view # Optionally set this scratch_path = None if args.h5legacy: h5libver = "earliest" else: h5libver = "latest" # default sp = SnuddaPrune(network_path=self.network_path, logfile=self.logfile, logfile_name=log_filename, config_file=args.config_file, d_view=self.d_view, lb_view=self.lb_view, scratch_path=scratch_path, h5libver=h5libver, random_seed=random_seed, verbose=args.verbose, keep_files=args.keepfiles) sp.prune() self.stop_parallel() self.close_log_file() ############################################################################ def setup_input(self, args): from snudda.input.input import SnuddaInput print("Setting up inputs, assuming input.json exists") log_filename = os.path.join(self.network_path, "log", "logFile-setup-input.log") self.setup_log_file(log_filename) # sets self.logfile if args.parallel: self.setup_parallel() # sets self.d_view and self.lb_view if "input" in args and args.input: input_config = args.input else: input_config = os.path.join(self.network_path, "input.json") if not snudda_isfile(input_config): print(f"Missing input config file: {input_config}") return if args.network_file: network_file = args.network_file else: network_file = os.path.join(self.network_path, "network-synapses.hdf5") if args.input_file: spike_file = args.input_file else: spike_file = os.path.join(self.network_path, "input-spikes.hdf5") if args.time: input_time = args.time else: input_time = None random_seed = args.randomseed if args.h5legacy: h5libver = "earliest" else: h5libver = "latest" # default print(f"Writing input spikes to {spike_file}") si = SnuddaInput(input_config_file=input_config, hdf5_network_file=network_file, spike_data_filename=spike_file, time=input_time, logfile=self.logfile, rc=self.rc, random_seed=random_seed, h5libver=h5libver, verbose=args.verbose) si.generate() self.stop_parallel() self.close_log_file() ############################################################################ def export_to_SONATA(self, args): assert False, "Old export to SONATA borken, fixme!" # TODO: Fix this from snudda.ConvertNetwork import ConvertNetwork print("Exporting to SONATA format") print(f"Network path: {self.network_path}") if args.network_file: network_file = args.network_file else: network_file = os.path.join(self.network_path, "network-synapses.hdf5") if args.input_file: input_file = args.input_file else: input_file = os.path.join(self.network_path, "input-spikes.hdf5") out_dir = os.path.join(self.network_path, "SONATA") cn = ConvertNetwork(networkFile=network_file, inputFile=input_file, outDir=out_dir) ############################################################################ def simulate(self, args): start = timeit.default_timer() from snudda.simulate.simulate import SnuddaSimulate if args.network_file: network_file = args.network_file else: network_file = os.path.join(self.network_path, "network-synapses.hdf5") if args.input_file: input_file = args.input_file else: input_file = os.path.join(self.network_path, "input-spikes.hdf5") self.make_dir_if_needed(os.path.join(self.network_path, "simulation")) print(f"Using input file {input_file}") # nWorkers = args.ncores # print("Using " + str(nWorkers) + " workers for neuron") # Problems with nested symbolic links when the second one is a relative # path going beyond the original base path if args.mech_dir is None: # mech_dir = os.path.join(os.path.dirname(network_file), "mechanisms") # TODO!!! problem with paths, testing to create mechanism dir in current dir mech_dir = "mechanisms" if not os.path.exists(mech_dir): try: m_dir = os.path.realpath( os.path.join(os.path.dirname(__file__), "data", "neurons", "mechanisms")) os.symlink(m_dir, mech_dir) except: print(f"Failed to create symlink {mech_dir} -> {m_dir}") else: mech_dir = args.mech_dir # !!! These are saved in current directory x86_64 # --- problem since nrnivmodl seems to want a relative path... make_mods_str = f"nrnivmodl {mech_dir}" # x86_64 on linux, nrnmech.dll on windows... if not os.path.exists("x86_64") and not os.path.exists("nrnmech.dll"): print(f"Please first run: {make_mods_str}") os.sys.exit(-1) # I was having problems when running nrnivmodl in the script, but # running it manually in bash works... WHY?!! # os.system(makeModsStr) save_dir = os.path.join(os.path.dirname(network_file), "simulation") if not os.path.exists(save_dir): print(f"Creating directory {save_dir}") os.makedirs(save_dir, exist_ok=True) # Get the SlurmID, used in default file names slurm_id = os.getenv('SLURM_JOBID') if slurm_id is None: slurm_id = str(666) print(f"args: {args}") if args.volt_out is not None: # Save neuron voltage if args.volt_out == "default": volt_file = os.path.join(save_dir, f"network-voltage-{slurm_id}.csv") else: volt_file = args.volt_out else: volt_file = None if args.spikes_out is None or args.spikes_out == "default": spikes_file = os.path.join( save_dir, f"network-output-spikes-{slurm_id}.txt") else: spikes_file = args.spikes_out disable_gj = args.disable_gj if disable_gj: print("!!! WE HAVE DISABLED GAP JUNCTIONS !!!") log_file = os.path.join(os.path.dirname(network_file), "log", "network-simulation-log.txt") log_dir = os.path.join(os.path.dirname(network_file), "log") if not os.path.exists(log_dir): print(f"Creating directory {log_dir}") os.makedirs(log_dir, exist_ok=True) from mpi4py import MPI # This must be imported before neuron, to run parallel from neuron import h # , gui pc = h.ParallelContext() # Simulate is deterministic, no random seed. sim = SnuddaSimulate(network_file=network_file, input_file=input_file, disable_gap_junctions=disable_gj, log_file=log_file, verbose=args.verbose) sim.add_external_input() sim.check_memory_status() if volt_file is not None: sim.add_recording( side_len=None) # Side len let you record from a subset # sim.addRecordingOfType("dSPN",5) # Side len let you record from a subset t_sim = args.time * 1000 # Convert from s to ms for Neuron simulator if args.exportCoreNeuron: sim.export_to_core_neuron() return # We do not run simulation when exporting to core neuron sim.check_memory_status() print("Running simulation for " + str(t_sim) + " ms.") sim.run(t_sim) # In milliseconds print("Simulation done, saving output") if spikes_file is not None: sim.write_spikes(spikes_file) if volt_file is not None: sim.write_voltage(volt_file) stop = timeit.default_timer() if sim.pc.id() == 0: print(f"Program run time: {stop - start:.1f}s") # sim.plot() ############################################################################ def analyse(self, args): print("Add analysis code here, see Network_analyse.py") ############################################################################ def setup_parallel(self): self.slurm_id = os.getenv('SLURM_JOBID') if self.slurm_id is None: self.slurm_id = 0 else: self.slurm_id = int(self.slurm_id) self.logfile.write(f"Using slurm_id: {self.slurm_id}") ipython_profile = os.getenv('IPYTHON_PROFILE') if not ipython_profile: ipython_profile = "default" ipython_dir = os.getenv('IPYTHONDIR') if not ipython_dir: ipython_dir = os.path.join(os.path.abspath(os.getcwd()), ".ipython") self.logfile.write('Creating ipyparallel client\n') from ipyparallel import Client u_file = os.path.join(ipython_dir, f"profile_{ipython_profile}", "security", "ipcontroller-client.json") self.rc = Client(url_file=u_file, timeout=120, debug=False) self.logfile.write(f'Client IDs: {self.rc.ids}') # http://davidmasad.com/blog/simulation-with-ipyparallel/ # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html self.d_view = self.rc.direct_view( targets='all') # rc[:] # Direct view into clients self.lb_view = self.rc.load_balanced_view(targets='all') ############################################################################ def stop_parallel(self): # Disable this function, keep the pool running for now return # if self.rc is not None: # print("Stopping ipyparallel") # self.rc.shutdown(hub=True) ############################################################################ def setup_log_file(self, log_file_name): data_dir = os.path.dirname(log_file_name) self.make_dir_if_needed(data_dir) try: self.logfile = open(log_file_name, 'w') self.logfile.write('Starting log file\n') except: print("Unable to set up log file " + str(log_file_name)) ############################################################################ def close_log_file(self): stop = timeit.default_timer() print(f"\nProgram run time: {stop - self.start:.1f}s") self.logfile.write(f"Program run time: {stop - self.start:.1f}s") self.logfile.write("End of log. Closing file.") self.logfile.close() ############################################################################ @staticmethod def make_dir_if_needed(dir_path): if not os.path.exists(dir_path): print("Creating missing directory " + dir_path) try: os.makedirs(dir_path) print("Created directory " + dir_path) except: print("Failed to create dir " + dir_path)
def update_temporal_components(Y, A, b, Cin, fin, bl=None, c1=None, g=None, sn=None, ITER=2, method_foopsi='constrained_foopsi', n_processes=1, backend='single_thread', memory_efficient=False, debug=False, **kwargs): """Update temporal components and background given spatial components using a block coordinate descent approach. Parameters ----------- Y: np.ndarray (2D) input data with time in the last axis (d x T) A: sparse matrix (crc format) matrix of temporal components (d x K) b: ndarray (dx1) current estimate of background component Cin: np.ndarray current estimate of temporal components (K x T) fin: np.ndarray current estimate of temporal background (vector of length T) g: np.ndarray Global time constant (not used) bl: np.ndarray baseline for fluorescence trace for each column in A c1: np.ndarray initial concentration for each column in A g: np.ndarray discrete time constant for each column in A sn: np.ndarray noise level for each column in A ITER: positive integer Maximum number of block coordinate descent loops. method_foopsi: string Method of deconvolution of neural activity. constrained_foopsi is the only method supported at the moment. n_processes: int number of processes to use for parallel computation. Should be less than the number of processes started with ipcluster. backend: 'str' single_thread no parallelization ipyparallel, parallelization using the ipyparallel cluster. You should start the cluster (install ipyparallel and then type ipcluster -n 6, where 6 is the number of processes). SLURM: using SLURM scheduler memory_efficient: Bool whether or not to optimize for memory usage (longer running times). nevessary with very large datasets **kwargs: dict all parameters passed to constrained_foopsi except bl,c1,g,sn (see documentation). Some useful parameters are p: int order of the autoregression model method: [optional] string solution method for constrained foopsi. Choices are 'cvx': using cvxopt and picos (slow especially without the MOSEK solver) 'cvxpy': using cvxopt and cvxpy with the ECOS solver (faster, default) solvers: list string primary and secondary (if problem unfeasible for approx solution) solvers to be used with cvxpy, default is ['ECOS','SCS'] Note -------- The temporal components are updated in parallel by default by forming of sequence of vertex covers. Returns -------- C: np.ndarray matrix of temporal components (K x T) f: np.array vector of temporal background (length T) S: np.ndarray matrix of merged deconvolved activity (spikes) (K x T) bl: float same as input c1: float same as input g: float same as input sn: float same as input YrA: np.ndarray matrix of spatial component filtered raw data, after all contributions have been removed. YrA corresponds to the residual trace for each component and is used for faster plotting (K x T) """ if not kwargs.has_key('p') or kwargs['p'] is None: raise Exception("You have to provide a value for p") d, T = np.shape(Y) nr = np.shape(A)[-1] if bl is None: bl = np.repeat(None, nr) if c1 is None: c1 = np.repeat(None, nr) if g is None: g = np.repeat(None, nr) if sn is None: sn = np.repeat(None, nr) A = scipy.sparse.hstack((A, coo_matrix(b))) S = np.zeros(np.shape(Cin)) Cin = np.vstack((Cin, fin)) C = Cin nA = np.squeeze(np.array(np.sum(np.square(A.todense()), axis=0))) #import pdb #pdb.set_trace() Cin = coo_matrix(Cin) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A))) YA = (A.T.dot(Y).T) * spdiags(1. / nA, 0, nr + 1, nr + 1) AA = ((A.T.dot(A)) * spdiags(1. / nA, 0, nr + 1, nr + 1)).tocsr() YrA = YA - Cin.T.dot(AA) #YrA = ((A.T.dot(Y)).T-Cin.T.dot(A.T.dot(A)))*spdiags(1./nA,0,nr+1,nr+1) if backend == 'ipyparallel' or backend == 'SLURM': try: # if server is not running and raise exception if not installed or not started from ipyparallel import Client if backend is 'SLURM': if 'IPPPDIR' in os.environ and 'IPPPROFILE' in os.environ: pdir, profile = os.environ['IPPPDIR'], os.environ[ 'IPPPROFILE'] else: raise Exception( 'envirnomment variables not found, please source slurmAlloc.rc' ) c = Client(ipython_dir=pdir, profile=profile) print 'Using ' + str(len(c)) + ' processes' else: c = Client() except: print "this backend requires the installation of the ipyparallel (pip install ipyparallel) package and starting a cluster (type ipcluster start -n 6) where 6 is the number of nodes" raise if len(c) < n_processes: print len(c) raise Exception( "the number of nodes in the cluster are less than the required processes: decrease the n_processes parameter to a suitable value" ) dview = c[:n_processes] # use the number of processes Cin = np.array(Cin.todense()) for iter in range(ITER): O, lo = update_order(A.tocsc()[:, :nr]) P_ = [] for count, jo_ in enumerate(O): jo = np.array(list(jo_)) #Ytemp = YrA[:,jo.flatten()] + (np.dot(np.diag(nA[jo]),Cin[jo,:])).T Ytemp = YrA[:, jo.flatten()] + Cin[jo, :].T Ctemp = np.zeros((np.size(jo), T)) Stemp = np.zeros((np.size(jo), T)) btemp = np.zeros((np.size(jo), 1)) sntemp = btemp.copy() c1temp = btemp.copy() gtemp = np.zeros((np.size(jo), kwargs['p'])) nT = nA[jo] # args_in=[(np.squeeze(np.array(Ytemp[:,jj])), nT[jj], jj, bl[jo[jj]], c1[jo[jj]], g[jo[jj]], sn[jo[jj]], kwargs) for jj in range(len(jo))] args_in = [(np.squeeze(np.array(Ytemp[:, jj])), nT[jj], jj, None, None, None, None, kwargs) for jj in range(len(jo))] # import pdb # pdb.set_trace() if backend == 'ipyparallel' or backend == 'SLURM': # if debug: results = dview.map_async(constrained_foopsi_parallel, args_in) results.get() for outp in results.stdout: print outp[:-1] sys.stdout.flush() for outp in results.stderr: print outp[:-1] sys.stderr.flush() else: results = dview.map_sync(constrained_foopsi_parallel, args_in) elif backend == 'single_thread': results = map(constrained_foopsi_parallel, args_in) else: raise Exception( 'Backend not defined. Use either single_thread or ipyparallel or SLURM' ) for chunk in results: pars = dict() C_, Sp_, Ytemp_, cb_, c1_, sn_, gn_, jj_ = chunk Ctemp[jj_, :] = C_[None, :] Stemp[jj_, :] = Sp_ Ytemp[:, jj_] = Ytemp_[:, None] btemp[jj_] = cb_ c1temp[jj_] = c1_ sntemp[jj_] = sn_ gtemp[jj_, :] = gn_.T bl[jo[jj_]] = cb_ c1[jo[jj_]] = c1_ sn[jo[jj_]] = sn_ g[jo[jj_]] = gn_.T if kwargs['p'] > 0 else [] #gtemp[jj,:] pars['b'] = cb_ pars['c1'] = c1_ pars['neuron_sn'] = sn_ pars['gn'] = gtemp[jj_, np.abs(gtemp[jj, :]) > 0] pars['neuron_id'] = jo[jj_] P_.append(pars) YrA -= (Ctemp - C[jo, :]).T * AA[jo, :] #YrA[:,jo] = Ytemp C[jo, :] = Ctemp.copy() S[jo, :] = Stemp # if (np.sum(lo[:jo])+1)%1 == 0: print str(np.sum(lo[:count + 1])) + ' out of total ' + str( nr) + ' temporal components updated' ii = nr #YrA[:,ii] = YrA[:,ii] + np.atleast_2d(Cin[ii,:]).T #cc = np.maximum(YrA[:,ii],0) cc = np.maximum(YrA[:, ii] + np.atleast_2d(Cin[ii, :]).T, 0) YrA -= (cc - np.atleast_2d(Cin[ii, :]).T) * AA[ii, :] C[ii, :] = cc.T #YrA = YA - C.T.dot(AA) #YrA[:,ii] = YrA[:,ii] - np.atleast_2d(C[ii,:]).T if backend == 'ipyparallel' or backend == 'SLURM': dview.results.clear() c.purge_results('all') c.purge_everything() if scipy.linalg.norm(Cin - C, 'fro') / scipy.linalg.norm( C, 'fro') <= 1e-3: # stop if the overall temporal component does not change by much print "stopping: overall temporal component not changing significantly" break else: Cin = C f = C[nr:, :] C = C[:nr, :] YrA = np.array(YrA[:, :nr]).T P_ = sorted(P_, key=lambda k: k['neuron_id']) if backend == 'ipyparallel' or backend == 'SLURM': c.close() return C, f, S, bl, c1, sn, g, YrA #,P_
print("CALC_GLOBAL: greedy single-mode activation calculations") input_fn = INPUT_FN print(" analyzing file %s" % input_fn) # Construct the save directory timestamp = time.strftime("%Y%m%d-%H%M%S", time.gmtime()) SAVE_DIR = os.path.join(os.getcwd(), "save_%s" % timestamp) if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) print(" saving into directory %s" % SAVE_DIR) if PARALLEL: from ipyparallel import Client # PARALLEL PROCESSING CONTROL client = Client(profile="legion") dv = client[:] with dv.sync_imports(): import os import parcel_model import numpy dv['exec_run'] = exec_run dv['smax_nact_calc'] = smax_nact_calc dv['mode_dict'] = mode_dict # Correct save_dir for network mount SAVE_DIR = os.path.join("/net/legion", SAVE_DIR) dv['SAVE_DIR'] = SAVE_DIR dv['MAX_ITER'] = MAX_ITER
from ipyparallel import Client rc = Client(packer='pickle') view = rc[:] results = view.map(lambda x: x**30, range(8)) print(results.get())
if __name__ == '__main__': Jmax = 50 thetaN = 257 linear = False if linear: ext = 'linear' balance = np.array([.28, .40, .49, .58, .70]) else: ext = 'symtop' balance = np.array([.284, .431, .568, .716]) cl = Client() view = cl[:] view['thN'] = thetaN view['Jmax'] = Jmax @require('os') @interactive def wig_funs(): os.chdir('/home/brausse/program/align-symtop/') from symtop import generate_wigner_KM for (ki, mi) in zip(k, m): ds.append(generate_wigner_KM(ki, mi, Jmax, thN)) j21 = np.arange(-Jmax, Jmax + 1)
def run_task(seed, task_id, estimator_name, n_iter, n_jobs, n_folds_inner_cv, profile, joblib_tmp_dir, run_tmp_dir): # retrieve dataset / task task = openml.tasks.get_task(task_id) num_features = task.get_X_and_y()[0].shape[1] indices = task.get_dataset().get_features_by_type('nominal', [task.target_name]) # retrieve classifier classifierfactory = openmlstudy14.pipeline.EstimatorFactory( n_folds_inner_cv, n_iter, n_jobs) estimator = classifierfactory.get_flow_mapping()[estimator_name]( indices, num_features=num_features) print('Running task with ID %d.' % task_id) print('Arguments: random search iterations: %d, inner CV folds %d, ' 'n parallel jobs: %d, seed %d' % (n_iter, n_folds_inner_cv, n_jobs, seed)) print('Model: %s' % str(estimator)) flow = openml.flows.sklearn_to_flow(estimator) flow.tags.append('study_14') import time start_time = time.time() # TODO generate a flow first if profile is None: import warnings with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', module='sklearn\.externals\.joblib\.parallel') run = openml.runs.run_flow_on_task(task, flow, seed=seed) else: print('Using ipython parallel with scheduler file %s' % profile) for i in range(1000): profile_file = os.path.join(os.path.expanduser('~'), '.ipython', 'profile_%s' % profile, 'security', 'ipcontroller-engine.json') try: with open(profile_file) as fh: scheduler_information = yaml.load(fh) break except FileNotFoundError: print('scheduler file %s not found. sleeping ... zzz' % profile_file) time.sleep(1) continue c = Client(profile=profile) bview = c.load_balanced_view() register_parallel_backend( 'ipyparallel', lambda: NPCachingIpyParallelBackend(view=bview, tmp_dir=joblib_tmp_dir)) with parallel_backend('ipyparallel'): run = openml.runs.run_flow_on_task(task, flow, seed=seed) end_time = time.time() run.tags.append('study_14') tmp_dir = os.path.join(run_tmp_dir, '%s_%s' % (str(task_id), estimator_name)) print(tmp_dir) try: os.makedirs(tmp_dir) except Exception as e: print(e) run_xml = run._create_description_xml() predictions_arff = arff.dumps(run._generate_arff_dict()) with open(tmp_dir + '/run.xml', 'w') as f: f.write(run_xml) with open(tmp_dir + '/predictions.arff', 'w') as f: f.write(predictions_arff) run_prime = run.publish() print('READTHIS', estimator_name, task_id, run_prime.run_id, end_time - start_time) return run
class IPClusterEnsemble(SurveyEnsemble): """Parallelized suvey ensemble based on IPython parallel (ipcluster) """ def __init__(self, **specs): SurveyEnsemble.__init__(self, **specs) self.verb = specs.get('verbose', True) # access the cluster self.rc = Client() self.dview = self.rc[:] self.dview.block = True with self.dview.sync_imports(): import EXOSIMS, EXOSIMS.util.get_module, \ os, os.path, time, random, cPickle, traceback if specs.has_key('logger'): specs.pop('logger') if specs.has_key('seed'): specs.pop('seed') self.dview.push(dict(specs=specs)) res = self.dview.execute( "SS = EXOSIMS.util.get_module.get_module(specs['modules'] \ ['SurveySimulation'], 'SurveySimulation')(**specs)") self.vprint("Created SurveySimulation objects on %d engines." % len(self.rc.ids)) #for row in res.stdout: # self.vprint(row) self.lview = self.rc.load_balanced_view() def run_ensemble(self, sim, nb_run_sim, run_one=None, genNewPlanets=True, rewindPlanets=True, kwargs={}): t1 = time.time() async_res = [] for j in range(nb_run_sim): ar = self.lview.apply_async(run_one, genNewPlanets=genNewPlanets, rewindPlanets=rewindPlanets, **kwargs) async_res.append(ar) print("Submitted %d tasks." % len(async_res)) ar = self.rc._asyncresult_from_jobs(async_res) while not ar.ready(): ar.wait(10.) clear_output(wait=True) if ar.progress > 0: timeleft = ar.elapsed / ar.progress * (nb_run_sim - ar.progress) if timeleft > 3600.: timeleftstr = "%2.2f hours" % (timeleft / 3600.) elif timeleft > 60.: timeleftstr = "%2.2f minutes" % (timeleft / 60.) else: timeleftstr = "%2.2f seconds" % timeleft else: timeleftstr = "who knows" print("%4i/%i tasks finished after %4i s. About %s to go." % (ar.progress, nb_run_sim, ar.elapsed, timeleftstr), end="") sys.stdout.flush() #self.rc.wait(async_res) #self.rc.wait_interactive(async_res) t2 = time.time() print("\nCompleted in %d sec" % (t2 - t1)) res = [ar.get() for ar in async_res] return res
fnames1.append(f) #%% motion correct t1 = time() file_res=cb.motion_correct_parallel(fnames1,fr=30,template=None,margins_out=0,max_shift_w=45, max_shift_h=45,dview=None,apply_smooth=True) t2=time()-t1 print t2 #%% LOGIN TO MASTER NODE # TYPE salloc -n n_nodes --exclusive # source activate environment_name #%%#%% slurm_script='/mnt/xfs1/home/agiovann/SOFTWARE/Constrained_NMF/SLURM/slurmStart.sh' cse.utilities.start_server(slurm_script=slurm_script) #n_processes = 27#np.maximum(psutil.cpu_count() - 2,1) # roughly number of cores on your machine minus 1 pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] client_ = Client(ipython_dir=pdir, profile=profile) print 'Using '+ str(len(client_)) + ' processes' #%% motion correct t1 = time() file_res=cb.motion_correct_parallel(fnames,fr=30,template=None,margins_out=0,max_shift_w=45, max_shift_h=45,dview=client_[::2],apply_smooth=True) t2=time()-t1 print t2 #%% all_movs=[] for f in fnames: print f with np.load(f[:-3]+'npz') as fl: # pl.subplot(1,2,1) # pl.imshow(fl['template'],cmap=pl.cm.gray)
def main(): import numpy as np import os, sys, time, getopt from osgeo import gdal from auxil import subset from ipyparallel import Client from osgeo.gdalconst import GA_ReadOnly, GDT_Byte, GDT_Float32 from tempfile import NamedTemporaryFile usage = ''' Usage: ------------------------------------------------ Sequential change detection for polarimetric SAR images python %s [OPTIONS] infiles* outfile enl Options: -h this help -d <list> files are to be co-registered to a subset dims = [x0,y0,rows,cols] of the first image, otherwise it is assumed that the images are co-registered and have identical spatial dimensions -m run 3x3 median filter over p-values -s <float> significance level for change detection (default 0.0001) infiles: full paths to all input files: /path/to/infile_1 /path/to/infile_1 ... /path/to/infile_k outfile_stub: without path (outputs will be written to same directory as infile_1) enl: equivalent number of looks files written: outfile_stub_cmap outfile_stub_fmap outfile_stub_bmap outfile_stub_smap infile_last_atsflog infile_last_atsf -------------------------------------------------''' % sys.argv[0] options, args = getopt.getopt(sys.argv[1:], 'hmd:s:') dims = None significance = 0.0001 medianfilter = False for option, value in options: if option == '-h': print(usage) return elif option == '-m': medianfilter = True elif option == '-d': dims = eval(value) elif option == '-s': significance = eval(value) if len(args) < 4: print('incorrect number of arguments') print(usage) sys.exit() k = len(args) - 2 fns = args[0:k] lastfn = fns[-1] n = np.float64(eval(args[-1])) outfn = args[-2] gdal.AllRegister() start = time.time() # first SAR image try: inDataset1 = gdal.Open(fns[0], GA_ReadOnly) cols = inDataset1.RasterXSize rows = inDataset1.RasterYSize bands = inDataset1.RasterCount except Exception as e: print('Error: %s -- Could not read file' % e) sys.exit(1) if dims is not None: # images are assumed not yet co-registered, so subset first image and register the others _, _, cols, rows = dims fn0 = subset.subset(fns[0], dims) args1 = [(fns[0], fns[i], dims) for i in range(1, k)] try: print(' \nattempting parallel execution of co-registration ...') start1 = time.time() c = Client() print('available engines %s' % str(c.ids)) v = c[:] fns = v.map_sync(call_register, args1) print('elapsed time for co-registration: ' + str(time.time() - start1)) except Exception as e: start1 = time.time() print('%s \nFailed, so running sequential co-registration ...' % e) fns = list(map(call_register, args1)) fns.insert(0, fn0) # point inDataset1 to the subset image for correct georefrerencing inDataset1 = gdal.Open(fn0, GA_ReadOnly) print('===============================================') print(' Multi-temporal SAR Change Detection') print('===============================================') print(time.asctime()) print('First (reference) filename: %s' % fns[0]) print('number of images: %i' % k) print('equivalent number of looks: %f' % n) print('significance level: %f' % significance) if bands == 9: print('Quad polarization') elif bands == 4: print('Dual polarizaton') elif bands == 3: print('Quad polarization, diagonal only') elif bands == 2: print('Dual polarization, diagonal only') else: print('Intensity images') # output file path = os.path.abspath(fns[0]) dirn = os.path.dirname(path) outfn = dirn + '/' + outfn # create temporary, memory-mapped array of change indices p(Ri<ri) mm = NamedTemporaryFile() pvarray = np.memmap(mm.name, dtype=np.float64, mode='w+', shape=(k, k, rows * cols)) print('pre-calculating Rj and p-values ...') start1 = time.time() try: print('attempting parallel calculation ...') c = Client() print('available engines %s' % str(c.ids)) v = c[:] print('ell = ', end=' ') for i in range(k - 1): print(i + 1, end=' ') args1 = [(fns[i:j + 2], n, cols, rows, bands) for j in range(i, k - 1)] results = v.map_sync(PV, args1) # list of tuples (p-value, lnRj) pvs = [result[0] for result in results] if medianfilter: pvs = v.map_sync(call_median_filter, pvs) lnRjs = np.array([result[1] for result in results]) lnQ = np.sum(lnRjs, axis=0) pvQ = getpvQ(lnQ, bands, k - i, n) for j in range(i, k - 1): pvarray[i, j, :] = pvs[j - i].ravel() pvarray[i, k - 1, :] = pvQ.ravel() except Exception as e: print('%s \nfailed, so running sequential calculation ...' % e) print('ell= ', end=' ') for i in range(k - 1): print(i + 1, end=' ') args1 = [(fns[i:j + 2], n, cols, rows, bands) for j in range(i, k - 1)] results = list(map(PV, args1)) # list of tuples (p-value, lnRj) pvs = [result[0] for result in results] if medianfilter: pvs = list(map(call_median_filter, pvs)) lnRjs = np.array([result[1] for result in results]) lnQ = np.sum(lnRjs, axis=0) pvQ = getpvQ(lnQ, bands, k - i, n) for j in range(i, k - 1): pvarray[i, j, :] = pvs[j - i].ravel() pvarray[i, k - 1, :] = pvQ.ravel() print('\nelapsed time for p-value calculation: ' + str(time.time() - start1)) cmap, smap, fmap, bmap = change_maps(pvarray, significance) # post process bmap for Loewner direction avimg = getimg(fns[0]) avimglog = cmap * 0 + k r = 1.0 for i in range(k - 1): img = getimg(fns[i + 1]) direct = loewner(img - avimg) bmap[:, i] = np.where(bmap[:, i], direct, bmap[:, i]) avimglog = np.where(bmap[:, i], k - i, avimglog) # provisional means r += 1.0 avimg = avimg + (img - avimg) / r for j in range(bands): # reset avimg where change occurred avimg[:, j] = np.where(bmap[:, i], img[:, j], avimg[:, j]) # write to file system cmap = np.reshape(cmap, (rows, cols)) fmap = np.reshape(fmap, (rows, cols)) smap = np.reshape(smap, (rows, cols)) bmap = np.reshape(bmap, (rows, cols, k - 1)) atsf = np.reshape(avimg, (rows, cols, bands)) avimglog = np.reshape(avimglog, (rows, cols)) driver = inDataset1.GetDriver() basename = os.path.basename(outfn) name, _ = os.path.splitext(basename) outfn1 = outfn.replace(name, name + '_cmap') outDataset = driver.Create(outfn1, cols, rows, 1, GDT_Byte) geotransform = inDataset1.GetGeoTransform() if geotransform is not None: outDataset.SetGeoTransform(geotransform) projection = inDataset1.GetProjection() if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(cmap, 0, 0) outBand.FlushCache() print('last change map written to: %s' % outfn1) outfn2 = outfn.replace(name, name + '_fmap') outDataset = driver.Create(outfn2, cols, rows, 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(fmap, 0, 0) outBand.FlushCache() print('frequency map written to: %s' % outfn2) outfn3 = outfn.replace(name, name + '_bmap') outDataset = driver.Create(outfn3, cols, rows, k - 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) for i in range(k - 1): outBand = outDataset.GetRasterBand(i + 1) outBand.WriteArray(bmap[:, :, i], 0, 0) outBand.FlushCache() print('bitemporal map image written to: %s' % outfn3) outfn4 = outfn.replace(name, name + '_smap') outDataset = driver.Create(outfn4, cols, rows, 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(smap, 0, 0) outBand.FlushCache() print('first change map written to: %s' % outfn4) basename = os.path.basename(lastfn) name, _ = os.path.splitext(basename) outfn5 = lastfn.replace(name, name + '_atsflog') outDataset = driver.Create(outfn5, cols, rows, 1, GDT_Byte) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) outBand = outDataset.GetRasterBand(1) outBand.WriteArray(avimglog, 0, 0) outBand.FlushCache() print('atsf log written to: %s' % outfn5) outfn6 = lastfn.replace(name, name + '_atsf') outDataset = driver.Create(outfn6, cols, rows, bands, GDT_Float32) if geotransform is not None: outDataset.SetGeoTransform(geotransform) if projection is not None: outDataset.SetProjection(projection) for i in range(bands): outBand = outDataset.GetRasterBand(i + 1) outBand.WriteArray(atsf[:, :, i], 0, 0) outBand.FlushCache() print('atsf written to: %s' % outfn6) print('total elapsed time: ' + str(time.time() - start)) outDataset = None inDataset1 = None
try: c.close() except: print('C was not existing, creating one') print("Stopping cluster to avoid unnencessary use of memory....") sys.stdout.flush() if backend == 'SLURM': try: stop_server(is_slurm=True) except: print('Nothing to stop') # todocument slurm_script = '/mnt/xfs1/home/agiovann/SOFTWARE/Constrained_NMF/SLURM/slurmStart.sh' cm.start_server(slurm_script=slurm_script) pdir, profile = os.environ['IPPPDIR'], os.environ['IPPPROFILE'] c = Client(ipython_dir=pdir, profile=profile) else: cm.stop_server() cm.start_server() c = Client() print(('Using ' + str(len(c)) + ' processes')) dview = c[:len(c)] #%% FOR LOADING ALL TIFF FILES IN A FILE AND SAVING THEM ON A SINGLE MEMORY MAPPABLE FILE fnames = [] base_folder = './example_movies/' # folder containing the demo files for file in glob.glob(os.path.join(base_folder, '*.tif')): if file.endswith("ie.tif"): fnames.append(os.path.abspath(file)) fnames.sort()
#coding=utf-8 __author__ = 'linchao' import my_test as mt import pandas as pd import numpy as np import MySQLdb, datetime import matplotlib.pyplot as plt import my_performance as mp from time import * from ipyparallel import Client import my_test import cPickle c = Client('ipcontroller-client.json') start = datetime.date(2014, 1, 1) # 回测起始时间 end = datetime.date(2015, 4, 1) # 回测结束时间 train_count = 2 test_count = 1 train_time_list = [] test_time_list = [] time_list = [] head_list = [] head = start.month - 1 train_start = start.month + train_count - 1 test_start = start.month + train_count + test_count - 1 con = False while 1: train_y = train_start / 12 test_y = test_start / 12 head_y = head / 12
# =============================================== # # For loop to run the grid search and save output # =============================================== # model_name = 'stimcoding_z_SW' def parloop(): # Create a list for the model variables nEs = list(mydata.columns[10:14]) modelCount = 0 for nE in nEs: print('***** WORKING ON ELEC ' + nE + ' *****\n\n\n') modelCount = modelCount + 1 try: print('We are up to model ', modelCount,'!') run_model(mypath, model_name, nE, 8000, 2000, 1) except: print('Failing to run model ', modelCount,'!') continue from ipyparallel import Client rc = Client(profle='default') v = Client()[:] jobs = v.map(parloop, range(4))
import datetime from numpy import array, nan, percentile, savez from ipyparallel import Client from .adf_simulation import adf_simulation # Number of repetitions EX_NUM = 500 # Number of simulations per exercise EX_SIZE = 200000 # Approximately controls memory use, in MiB MAX_MEMORY_SIZE = 100 rc = Client() dview = rc.direct_view() with dview.sync_imports(): from numpy import arange, zeros from numpy.random import RandomState def lmap(*args): return list(map(*args)) def wrapper(n, trend, b, seed=0): """ Wraps and blocks the main simulation so that the maximum amount of memory can be controlled on multi processor systems when executing in parallel """
def main(num_rows): num_rows = int(num_rows) # ---------preprocessing---------- get_questions = ExtractCols(['question1', 'question2']) # ---------question typing--------- sent_tokenizer = SentTokenize() question_typer = QuestionTypes(question_types=question_types) question_type_pipe = Pipeline([('sent_tokenizer', sent_tokenizer), ('question_typer', question_typer)]) # --------distance calculation------- calc_masi_dist = MasiDistance() # These have tuning parameters and # could go through cv... calc_edit_dist1 = EditDistance() calc_edit_dist2 = EditDistance(sub_cost=2.0) calc_edit_dist3 = EditDistance(sub_cost=0.5) calc_edit_dist4 = EditDistance(transpositions=True) calc_jacc_dist = JaccardDistance() dist_fu = FeatureUnion([('calc_masi_dist', calc_masi_dist), ('calc_edit_dist1', calc_edit_dist1), ('calc_edit_dist2', calc_edit_dist2), ('calc_edit_dist3', calc_edit_dist3), ('calc_edit_dist4', calc_edit_dist4), ('calc_jacc_dist', calc_jacc_dist)], n_jobs=1) word_tokenizer = WordTokenize() dist_pipe = Pipeline([('word_tokenizer', word_tokenizer), ('dist_fu', dist_fu)]) output_fu = FeatureUnion([('dist_pipe', dist_pipe), ('question_type_pipe', question_type_pipe)], n_jobs=1) # --------final assembly---------- data_pipe = Pipeline([('get_questions', get_questions), ('output_fu', output_fu)]) # output feature union pool = Client() pool[:].map(os.chdir, [FEATURES_DIR] * len(pool)) with pool[:].sync_imports(): pass pool[:].push({'data_pipe': data_pipe}) n_jobs = len(pool) left_indices = range(0, CHUNKSIZE, CHUNKSIZE // n_jobs) right_indices = range(CHUNKSIZE // n_jobs, CHUNKSIZE + 1, CHUNKSIZE // n_jobs) for f_name in RAW_DATA_FILES: h5f = tables.open_file(INTERIM_HDF_PATH, 'r') n_chunks = h5f.get_node_attr('/' + f_name, 'n_chunks') h5f.close() for i in range(n_chunks): print('chunk', i + 1, '/', n_chunks, end='\r') sys.stdout.flush() Di = pd.read_hdf(INTERIM_HDF_PATH, key='/' + f_name + '/' + f_name + str(i)) q1_i = Di.loc[:, Q[0]].values q2_i = Di.loc[:, Q[1]].values try: q1 = np.concatenate((q1, q1_i)) q2 = np.concatenate((q2, q2_i)) except NameError: q1 = q1_i q2 = q2_i results = [] # Send to workers # It would be much faster to load chunks of data off disk # as fast as possible and use some kind of switch on the # pool to send them chunks when they are ready. As is, # a huge amount of time (probably most) is spent waiting # for io. Not to mention time used copying data back # and forth between the processes. # Maybe the best approach is for each process to load and # store it's own results to the database independently, # similar to what I did for the dwglasso application. for pi, li, ri in zip(pool, left_indices, right_indices): if len(Di[li:ri]) > 0: results.append( pi.apply_async(data_pipe.fit_transform, Di[li:ri])) for res in results: Xi = res.get() try: X = np.vstack((X, Xi)) except NameError: X = Xi X.dump(INTERIM_DATA_DIR + 'X_dist_' + f_name + '.npy') del X # Free up memory nrows = len(q1) assert nrows == len(q2), 'q1 and q2 not equal length!' q1 = np.append(q1, q2) del q2 q1.dump(INTERIM_DATA_DIR + 'q_' + f_name + '.npy') del q1 return
def dask_executor(): from distributed import Client client = Client(n_workers=1) yield client client.close()
self.dView.execute(cmdStr,block=True) self.parallelSetupFlag = True ############################################################################ if __name__ == "__main__": assert False, "This code uses the old incomplete Planert data, for the article we instead resorted to surrogate data. See Planert2010.py and Planert2010part2.py" print("IPYTHON_PROFILE = " + str(os.getenv('IPYTHON_PROFILE'))) if(os.getenv('IPYTHON_PROFILE') is not None \ or os.getenv('SLURMID') is not None): from ipyparallel import Client rc = Client(profile=os.getenv('IPYTHON_PROFILE'), debug=False) print('Client IDs: ' + str(rc.ids)) # http://davidmasad.com/blog/simulation-with-ipyparallel/ # http://people.duke.edu/~ccc14/sta-663-2016/19C_IPyParallel.html dView = rc.direct_view(targets='all') # rc[:] # Direct view into clients lbView = rc.load_balanced_view(targets='all') else: dView = None traceList = "DATA/Planert2010/d1d2conns/traces/trace_table.txt" logFile = "logs/Planert-log.txt" osp = OptimiseSynapsesPlanert(traceList,dView=dView, logFileName=logFile)