def checkAllDatasets(self): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_) ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" for dataset in catalog.keys(): self.checkDatasetFiles(dataset,catalog) outcomes = self.parallel_.wait(printOutput=False) ## for dsetName,ifile,fName,ret,out in outcomes: for ign1, ign2, outcome in outcomes: dsetName,ifile,fName,ret,out = outcome info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True else: extraInfo = json.loads(str(out)) for key,val in extraInfo.iteritems(): info[key] = val print "Writing catalog" self.writeCatalog(catalog) print "Done"
def __init__(self, mode='dummy', address=None, high_duration=0.005, verbose=None): if mode == 'parallel': if sys.platform.startswith('linux'): address = '/dev/parport0' if address is None else address if not isinstance(address, string_types): raise ValueError('addrss must be a string or None, got %s ' 'of type %s' % (address, type(address))) from parallel import Parallel self._port = Parallel(address) self._portname = address self._set_data = self._port.setData elif sys.platform.startswith('win'): from ctypes import windll if not hasattr(windll, 'inpout32'): raise SystemError( 'Must have inpout32 installed, see:\n\n' 'http://www.highrez.co.uk/downloads/inpout32/') base = 0x378 if address is None else address if isinstance(base, string_types): base = int(base, 16) if not isinstance(base, int): raise ValueError('address must be int or None, got %s of ' 'type %s' % (base, type(base))) self._port = windll.inpout32 mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7) # Use ECP to put the port into byte mode val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5)) self._port.Out32(base + 0x402, val) # Now to make sure the port is in output mode we need to make # sure that bit 5 of the control register is not set val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5)) self._port.Out32(base + 2, val) self._set_data = lambda data: self._port.Out32(base, data) self._portname = str(base) else: raise NotImplementedError('Parallel port triggering only ' 'supported on Linux and Windows') else: # mode == 'dummy': self._port = self._portname = None self._trigger_list = list() self._set_data = lambda x: (self._trigger_list.append(x) if x != 0 else None) self.high_duration = high_duration self.mode = mode
def __init__(self, motor_inputs, state=0, delay=0.05): ''' :param motor_inputs: Ordered list of parallel values to turn motor :type motor_inputs: list or tuple :param state: Initial starting state of motor position :type state: int :param delay: Delay between steps (speed) :type delay: float ''' self.MOTOR_INPUTS = motor_inputs self.state = state self.delay = delay # Setup parallel interface on first init self.parallel_interface = Parallel()
class USARTTest(unittest.TestCase): par = Parallel() def __init__(self, *args): unittest.TestCase.__init__(self, *args) self.ser = Serial(_SERIAL_PATH, baudrate=_BAUDRATE, bytesize=8, parity='N', stopbits=1, timeout=1, xonxoff=0, rtscts=0) self._toWrite = "Write_TEST123" self._toRead = "Read_TEST123" def testread(self): time.sleep(0.1) USARTTest.par.setData(0x01) time.sleep(0.1) self.ser.write(self._toRead + '\n') USARTTest.par.setData(0x00) print "See avr console..." def testwrite(self): time.sleep(0.1) USARTTest.par.setData(0x01) line = self.ser.readline(len(self._toWrite) + 1, '\n') USARTTest.par.setData(0x00) self.assertNotEqual(len(line), 0) self.assertNotEqual(line.find(self._toWrite), -1, "Received %s" % line)
def checkDatasetFiles(self,dsetName,catalog=None): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_) wait = True print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print len(files) for ifile,finfo in enumerate(files): name = finfo["name"] self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile]) if wait: self.parallel_.wait() self.parallel_ = None if writeCatalog: self.writeCatalog(catalog)
def checkAllDatasets(self): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_) ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" for dataset in catalog.keys(): self.checkDatasetFiles(dataset,catalog) outcomes = self.parallel_.wait() for dsetName,ifile,fName,ret,out in outcomes: info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True else: extraInfo = json.loads(str(out)) for key,val in extraInfo.iteritems(): info[key] = val print "Writing catalog" self.writeCatalog(catalog) print "Done"
def main(): p = Parallel() previous = True with Bus() as bus: with bus.get_service_proxy({"type": "speak"}, multiple=True) as s: while True: current = p.getInPaperOut() if current != previous: previous = current if not current: # Button was just pressed (i.e. the paperOut pin # was just shorted to ground) print "Doorbell was pressed" try: print s["say_text"]("someone is ringing the_front doorbell", callback=None) except: print_exc() sleep(5) sleep(0.03)
def __call__(self): """ __call__ Run all jobs. """ self.parallel = Parallel(self.options.ncpu, lsfQueue=self.options.queue, lsfJobName="%s/runJobs" % self.options.outputDir, asyncLsf=False) self.jobs = None if self.options.cont: pass else: self.firstRun() self.monitor()
def getFilesFomEOS(self,dsetName): """ Read dataset files crawling EOS. @dsetName: dataset name Note: not implemented """ if not self.parallel_: self.parallel_ = Parallel(200,self.queue_) ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2] ## print out files = [] for line in out.split("\n"): if line.endswith(".root"): files.append( {"name":line.replace("/eos/cms",""), "nevents":0} ) return files
def __call__(self): """ __call__ Run all jobs. """ self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,asyncLsf=False) self.jobs = None if self.options.cont: pass else: self.firstRun() self.monitor()
def getFilesFomEOS(self,dsetName): """ Read dataset files crawling EOS. @dsetName: dataset name Note: not implemented """ if not self.parallel_: self.parallel_ = Parallel(200,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True) ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2] files = [] for line in out.split("\n"): if line.endswith(".root"): files.append( {"name":line.replace("/eos/cms",""), "nevents":0} ) return files
def monitor(self): (options, args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r") as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] if not options.dry_run: ## FIXME: job resubmission self.jobs = task_config["jobs"] returns = self.wait(parallel, self) task_config["jobs"] = self.jobs if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc, out in poutfiles.iteritems(): outfile, outfiles = out p.run("%s %s" % (hadd, outfile), outfiles) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset, out in doutfiles.iteritems(): outfile, outfiles = out p.run("%s %s" % (hadd, outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd, outputPfx), outfiles) self.wait(p) with open("%s/task_config.json" % (options.outputDir), "w+") as cfout: cfout.write(json.dumps(task_config, indent=4)) cfout.close() self.parallel.stop()
def monitor(self): (options,args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] self.task_config = task_config if options.summary: self.printSummary() return if not options.dry_run: ## FIXME: job resubmission returns = self.wait(parallel,self) if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc,out in poutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd, outfile), outfiles ) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset,out in doutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd,outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd,outputPfx), outfiles) self.wait(p) self.storeTaskConfig(task_config) self.parallel.stop()
def monitor(self): (options,args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] if not options.dry_run: ## FIXME: job resubmission self.jobs = task_config["jobs"] returns = self.wait(parallel,self) task_config["jobs"] = self.jobs if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc,out in poutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd, outfile), outfiles ) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset,out in doutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd,outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd,outputPfx), outfiles) self.wait(p) with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout: cfout.write( json.dumps(task_config,indent=4) ) cfout.close() self.parallel.stop()
def __call__(self): """ __call__ Run all jobs. """ if self.options.summary: self.options.dry_run = True self.options.cont = True self.jobFactory = TarballJobFactory(self.options.stageTo,self.options.stageCmd,job_outdir=self.options.outputDir, batchSystem=self.options.batchSystem) self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir, asyncLsf=self.options.asyncLsf,jobDriver=self.jobFactory,batchSystem=self.options.batchSystem) self.jobs = None if self.options.cont: if self.options.asyncLsf: self.loadLsfMon() else: self.firstRun() self.monitor() self.parallel.stop()
def insertMarks(expInfo, nombreEDF): ponermarcas = [] if expInfo[EXPERIMENT_TYPE] == EMOTIV: from multiprocessing import Process, Queue import guardar q_marcas = Queue() p = Process(target=guardar.save_data, args=( nombreEDF, q_marcas, )) p.start() ponermarcas = 1 elif expInfo[EXPERIMENT_TYPE] == TRADITIONAL_EEG: from parallel import Parallel # Version sugerida por Fede (ver mail 02/08/2016) q_marcas = Parallel( ) # Version sugerida por Fede (ver mail 02/08/2016) q_marcas.setData( 0) # Solo para asegurarse de que arranque con todos los pins abajo ponermarcas = 2 elif expInfo[EXPERIMENT_TYPE] == CONDUCTUAL: q_marcas = 1 ponermarcas = 0 return ponermarcas, q_marcas
def do_task(self): if self.task != None: p = Parallel() print(p) Parallel().run(self.task.logic)
warnings.simplefilter("ignore") tweets = None sentiment_dir = "../sentiment/" sentiment_models = { "text_blob": find_text_blob_sentiment, "vader": find_vader_sentiment, } for model_name, model_function in sentiment_models.items(): sentiment_path = os.path.join(sentiment_dir, model_name) + ".pickle" if not os.path.exists(sentiment_path): if tweets is None: tweets = load_tweets() tweets = list(tweets.items()) results = Parallel(find_text_blob_sentiment, tweets, model_name) sentiment = {tweet_id: value for tweet_id, value in results} save_pickle(sentiment, sentiment_path) model_name = "flair" sentiment_path = os.path.join(sentiment_dir, model_name) + ".pickle" if not os.path.exists(sentiment_path): if tweets is None: tweets = load_tweets() tweets = list(tweets.items()) sentiment = find_flair_sentiment(tweets, chunk_len=100000) sentiment_models[model_name] = sentiment save_pickle(sentiment, sentiment_path)
def main(): # Info de la sesion expInfo = { NAME: 'nombre', BIRTHDATE: 'DD/MM/AA', HAND: 'mano', EXPERIMENT_TYPE: 'conductual', OPERATOR: '' } # Presento cuadro para rellenar dlg = gui.DlgFromDict(expInfo, title='Formulario') if not (dlg.OK): core.quit() else: fileName = expInfo[NAME] if not os.path.exists('./Datos/' + fileName): os.makedirs('./Datos/' + fileName) dataFile = open( './Datos/' + fileName + '/' + str(datetime.date.today()) + '_' + fileName + '.csv', 'a') nombreEDF = './Datos/' + fileName + "/" + str( datetime.date.today()) + '_' + fileName texto = expInfo[NAME] + '; ' + str(datetime.datetime.now( )) + '; ' + expInfo[BIRTHDATE] + '; ' + expInfo[HAND] + '; ' + expInfo[ EXPERIMENT_TYPE] + '; ' + expInfo[OPERATOR] dataFile.write(texto) ########################## ## Parametros Pantalla ## ########################## res = [gtk.gdk.screen_width(), gtk.gdk.screen_height()] pantCompleta = True #win = visual.Window(res, monitor="Mi Monitor", units="pix", color=gris, colorSpace='hex', fullscr=pantCompleta) win = visual.Window(res, units="pix", color=gris, colorSpace='hex', fullscr=pantCompleta, monitor="testMonitor") win.setMouseVisible(False) ########################################### ## Inicializo parametros del experimento ## ########################################### proporcion = 0.7 pruebas = 30 Nsess = 12 # Tiempos #StimDur = 0.184 #ISI = 0.986 StimDur = 0.404 ISI = 0.986 ponermarcas = [] if expInfo[EXPERIMENT_TYPE] == EMOTIV: from multiprocessing import Process, Queue import guardar q_marcas = Queue() p = Process(target=guardar.save_data, args=( nombreEDF, q_marcas, )) p.start() ponermarcas = 1 elif expInfo[EXPERIMENT_TYPE] == TRADITIONAL_EEG: from parallel import Parallel # Version sugerida por Fede (ver mail 02/08/2016) #from psychopy import parallel # BIOSEMI #q_marcas=parallel.ParallelPort(address=u'/dev/parport0') #q_marcas=parallel.PParallelDLPortIO(address=888) # Chequear que este bien la direccion del puerto paralelo q_marcas = Parallel( ) # Version sugerida por Fede (ver mail 02/08/2016) q_marcas.setData( 0) # Solo para asegurarse de que arranque con todos los pins abajo ponermarcas = 2 elif expInfo[EXPERIMENT_TYPE] == CONDUCTUAL: q_marcas = 1 ponermarcas = 0 cond = pacman stimuli = [ pacmanImage, "./estimulo/fantasma_naranja.png", "./estimulo/fantasma_rosado.png", "./estimulo/fantasma_verde.png", "./estimulo/fantasma_azul.png" ] pantalla_inicio = "./estimulo/pantini_pacman.png" run_training(win, proporcion, 10, 6, StimDur, ISI, res, gris, negro, blanco, stimuli, pantalla_inicio) run_experiment(dataFile, win, proporcion, pruebas, Nsess, StimDur, ISI, q_marcas, ponermarcas, res, gris, negro, blanco, stimuli, pantalla_inicio, cond) cond = angry stimuli = [ birdImage, "./estimulo/cerdo_naranja.png", "./estimulo/cerdo_rosado.png", "./estimulo/cerdo_verde.png", "./estimulo/cerdo_azul.png" ] pantalla_inicio = "./estimulo/pantini_angry.png" run_training(win, proporcion, 10, 6, StimDur, ISI, res, gris, negro, blanco, stimuli, pantalla_inicio) run_experiment(dataFile, win, proporcion, pruebas, Nsess, StimDur, ISI, q_marcas, ponermarcas, res, gris, negro, blanco, stimuli, pantalla_inicio, cond)
def process_mentions(): """retrieves all mentions and generates captions for those who are fighting fit""" if not cfg('twitter:user_requests:bool'): return params = dict(count=200) sources_whitelist = cfg('twitter:sources_whitelist:list') mention_prefix = '@%s ' % twitter.me.screen_name.lower() try: with open('state_mentions_timeline.txt') as fp: since_id = int(fp.read()) utils.logging.info('State: since_id=%d', since_id) params['since_id'] = since_id except Exception as exc: utils.logging.warning("There's no last id saved, so I will save the " 'last id I see and then quit.') since_id = None filtered_statuses = [] statuses = [ status for page in tweepy.Cursor(twitter.api.mentions_timeline, ** params).pages() for status in page ] # they are in reverse chronological order, so put them straight statuses = statuses[::-1] if not since_id: since_id = statuses[-1].id with open('state_mentions_timeline.txt', 'wt') as fp: fp.write(str(since_id)) utils.logging.info('New since_id=%d. Goodbye!', since_id) return for status in statuses: # ignore mentions that are not directed at me if not status.text.lower().startswith(mention_prefix): continue # ignore retweets if hasattr(status, 'retweeted_status'): continue # if the sources whitelist is enabled, ignore those who aren't on it if (sources_whitelist and status.source not in sources_whitelist): continue # store this status filtered_statuses.append(status) if filtered_statuses: utils.logging.info('Retrieved %d new mentions (from %d to %d).', len(filtered_statuses), filtered_statuses[0].id, filtered_statuses[-1].id) with open('state_mentions_timeline.txt', 'wt') as fp: fp.write(str(filtered_statuses[-1].id)) Akari.warmup() parallel = Parallel(process_request, filtered_statuses, cfg('twitter:process_threads:int') or 3) parallel.start() else: utils.logging.info('Retrieved no new mentions.')
class SamplesManager(object): def __init__(self, catalog, cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"], dbs_instance="prod/phys03", queue=None, maxThreads=200,force=False,doContinue=False ): """ Constructur: @catalog: json file used to read/write dataset information @cross_sections: json file where samples cross sections are stored @dbs_instance: DBS instance tp use """ self.cross_sections_ = {} self.dbs_instance_ = dbs_instance for xsecFile in cross_sections: fname = shell_expand(xsecFile) self.cross_sections_.update( json.loads( open(fname).read() ) ) self.catalog_ = shell_expand(catalog) self.parallel_ = None self.sem_ = Semaphore() print "Will use the following datasets catalog:" print self.catalog_ self.queue_ = queue self.maxThreads_ = maxThreads self.force_ = force self.continue_ = doContinue def importFromDAS(self,list_datasets): """ Import datasets from DAS to the catalog. @datasets: wildecard to be usd in dataset query """ catalog = self.readCatalog() print "Importing from das %s" % list_datasets datasets = [] for dataset in list_datasets: if "*" in dataset: response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % dataset, 0, 0, False, self.dbs_instance_) for d in response["data"]: datasets.append( d["dataset"][0]["name"] ) else: datasets.append(dataset) print "Datasets to import" print "\n".join(datasets) for dsetName in datasets: print "Importing %s" % dsetName files = self.getFilesFomDAS(dsetName) self.addToDataset(catalog,dsetName,files) ## if dsetName in catalog: ## if self.force_: ## catalog[ dsetName ]["files"] = files ## else: ## self.mergeDataset(catalog[ dsetName ],{ "files" : files }) ## else: ## catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomDAS(self,dsetName): """ Read dataset files from DAS. @dsetName: dataset name """ response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_) files=[] for d in response["data"]: for jf in d["file"]: if "nevents" in jf: files.append({ "name" : jf["name"], "nevents" : jf["nevents"] }) break ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } ) return files def importFromEOS(self,folders): """ Import datasets from DAS to the catalog. @datasets: dataset to be imported """ catalog = self.readCatalog() auto=False assumeOk=False for folder in folders: dsetName = "" print print "importing folder\n %s" % folder while not len(dsetName.split("/")) == 4: if auto: splitFolder = folder.split("/") prim, sec = splitFolder[-4:-2] dsetName = "/%s/%s/USER" % (prim,sec) print "guessed dataset name ", dsetName if not assumeOk: resp=ask_user("ok?",["y","n","a"]) if resp == "n": dsetName = "" auto=False elif resp=="a": assumeOk=True if not auto: print "enter dataset name (auto/noauto to enables/disables automatic guessing) ", dsetName = raw_input() if(dsetName=="auto"): auto=True elif (dsetName=="noauto"): auto=False print "Importing %s as %s" % (folder,dsetName) files = self.getFilesFomEOS(folder) self.addToDataset(catalog,dsetName,files) ## if dsetName in catalog: ## catalog[ dsetName ]["files"] = files ## else: ## catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomEOS(self,dsetName): """ Read dataset files crawling EOS. @dsetName: dataset name Note: not implemented """ if not self.parallel_: self.parallel_ = Parallel(200,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True) ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2] ## print out files = [] for line in out.split("\n"): if line.endswith(".root"): files.append( {"name":line.replace("/eos/cms",""), "nevents":0} ) return files def findDuplicates(self,dsetName): """ Find duplicate job outputs in dataset. @dsetName: dataset name Note: not implemented """ pass def invalidateBadFiles(self,dsetName): """ Invalidate duplicate job output and corrupted files in DAS. @dsetName: dataset name Note: not implemented """ pass def checkAllDatasets(self,match=None,light=False): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True,lsfJobName=".fgg/job") ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" self.outcomes = [] for dataset in catalog.keys(): if match and not fnmatch(dataset,match): continue self.checkDatasetFiles(dataset,catalog,light=light) # write catalog to avoid redoing duplicates removal self.writeCatalog(catalog) if self.queue_: self.parallel_.wait(printOutput=True,handler=self) outcomes = self.outcomes else: outcomes = self.parallel_.wait(printOutput=False) ## for dsetName,ifile,fName,ret,out in outcomes: nfailed = 0 for oc in outcomes: ign1, ign2, outcome= oc ## for ign1, ign2, outcome in outcomes: dsetName,ifile,fName,ret,out = outcome info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True nfailed += 1 else: info["bad"] = False extraInfo = json.loads(str(out)) if len(extraInfo.keys()) == 0: nfailed += 1 info["bad"] = True for key,val in extraInfo.iteritems(): info[key] = val self.parallel_.stop() print "Writing catalog" self.writeCatalog(catalog) print "Done" if nfailed > 0: print print "WARNING: some of the check jobs failed or did not return any output." print " Those (%d) files were marked a bad and won't be usable for analysis." % nfailed print " Re-running the check command may recover the temporary failures." print if self.queue_: print print "Note: log files may have been written in ./.fgg" print " it's up to you to clean up though..." def checkDatasetFiles(self,dsetName,catalog=None,light=False): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True) wait = True print print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print "Number of files: ", len(files) if self.force_ or not catalog[dsetName].get("vetted",False): toremove = [] keep_wildcard=None for ifil,eifil in enumerate(files): if ifil in toremove: continue for jfil,ejfil in enumerate(files[ifil+1:]): if ifil+jfil in toremove: continue if eifil["name"] == ejfil["name"]: toremove.append(ifil) else: iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1] jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1] if iid == jid: if not keep_wildcard: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both (yes/no/matching)? ",["y","n","m"]) if reply == "m": while not keep_wildcard: print "enter wildcard matching expression", keep_wildcard=raw_input() if ask_user("keep all files matching '%s'?" % keep_wildcard) == "n": keep_wildcard=None if keep_wildcard: imatch=fnmatch(eifil["name"],keep_wildcard) jmatch=fnmatch(ejfil["name"],keep_wildcard) if imatch != jmatch: if imatch: toremove.append(ifil+jfil) else: toremove.append(ifil) continue else: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both? ") if reply == "n": if ask_user( "keep %s? " % ejfil["name"] ) == "n": ## files.pop(ifil+jfil) toremove.append(ifil+jfil) if ask_user( "keep %s? " % eifil["name"] ) == "n": toremove.append(ifil) ## files.pop(ifil) for ifile in sorted(toremove,reverse=True): ## print ifile files.pop(ifile) print "After duplicates removal: ", len(files) nsub = 0 catalog[dsetName]["vetted"] = True if not light: info = catalog[dsetName]["files"] = files for ifile,finfo in enumerate(files): name = finfo["name"] if self.force_ or not "weights" in finfo: nsub+=1 self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile],interactive=(self.queue_!=None)) if nsub == 0: print "No files needed to be checked" else: print "Submitted %d check jobs" % nsub if wait: self.parallel_.wait(printOutput=False) self.parallel_ = None if writeCatalog: self.writeCatalog(catalog) def reviewCatalog(self): datasets,catalog = self.getAllDatasets() primaries = {} keepAll = False for d in datasets: if not keepAll: reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"]) if reply == "n": catalog.pop(d) continue if reply == "a": keepAll = True primary = d.split("/")[1] if not primary in primaries: primaries[ primary ] = [] primaries[ primary ].append(d) for name,val in primaries.iteritems(): if len(val) == 1: continue reply = ask_user("More than one sample for %s:\n %s\nKeep all (yes/no/merge)?" % (name,"\n ".join(val)),["y","n","m"]) if reply == "m": dst = val[0] for merge in val[1:]: self.mergeDataset(catalog[dst],catalog[merge]) catalog.pop(merge) if reply == "n": for d in val: reply = ask_user("keep this dataset?\n %s\n" % d) if reply == "n": catalog.pop(d) self.writeCatalog(catalog) def mergeDataset(self,dst,merge): dst["vetted"]=False dstFiles=dst["files"] mergeFiles=merge["files"] for fil in mergeFiles: skip = False for dfil in dstFiles: if dfil["name"] == fil["name"]: skip = True if not skip: dstFiles.append( fil ) def addToDataset(self,catalog,dsetName,files): if dsetName in catalog: if self.force_: catalog[ dsetName ]["files"] = files else: self.mergeDataset(catalog[ dsetName ],{ "files" : files }) else: catalog[ dsetName ] = { "files" : files } def checkFile(self,fileName,dsetName,ifile): """ Check if file is valid. @fileName: file name """ fName = fileName tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile) if self.continue_: if os.path.exists(tmp): print "%s already exists" % tmp outcome = self.readJobOutput(tmp,0,"",dsetName,fileName,ifile) if self.queue_: self.outcomes.append((None,None,outcome)) else: return outcome return None if self.queue_: self.parallel_.run("fggCheckFile.py",[fName,tmp,dsetName,str(ifile),"2>/dev/null"],interactive=False) else: ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,dsetName,str(ifile),"2>/dev/null"],interactive=True)[2] return self.readJobOutput(tmp,ret,out,dsetName,fileName,ifile) ### try: ### fout = open(tmp) ### out = fout.read() ### fout.close() ### except IOError, e: ### print ret, out ### print e ### out = "{}" ### ### os.remove(tmp) ### return dsetName,ifile,fileName,ret,out def readJobOutput(self,tmp,ret,out,dsetName,fileName,ifile): try: fout = open(tmp) out = fout.read() fout.close() os.remove(tmp) except Exception, e: print ret, out print e out = "{}" return dsetName,int(ifile),fileName,ret,out
class ParallelTrigger(object): """Parallel port and dummy triggering support. .. warning:: When using the parallel port, calling :meth:`expyfun.ExperimentController.start_stimulus` will automatically invoke a stamping of the 1 trigger, which will in turn cause a delay equal to that of ``trigger_duration``. This can effect e.g. :class:`EyelinkController` timing. Parameters ---------- mode : str 'parallel' for real use. 'dummy', passes all calls. address : str | int | None The address to use. On Linux this should be a string path like ``'/dev/parport0'`` (equivalent to None), on Windows it should be an integer address like ``888`` or ``0x378`` (equivalent to None). The config variable ``TRIGGER_ADDRESS`` can be used to set this permanently. trigger_duration : float Amount of time (seconds) to leave the trigger high whenever sending a trigger. ec : instance of ExperimentController The ExperimentController. verbose : bool, str, int, or None If not None, override default verbose level. Notes ----- Parallel port activation is enabled by using the ``trigger_controller`` argument of :class:`expyfun.ExperimentController`. """ @verbose_dec def __init__(self, mode='dummy', address=None, trigger_duration=0.01, ec=None, verbose=None): self.ec = ec if mode == 'parallel': if sys.platform.startswith('linux'): address = '/dev/parport0' if address is None else address if not isinstance(address, string_types): raise ValueError('addrss must be a string or None, got %s ' 'of type %s' % (address, type(address))) from parallel import Parallel logger.info('Expyfun: Using address %s' % (address,)) self._port = Parallel(address) self._portname = address self._set_data = self._port.setData elif sys.platform.startswith('win'): from ctypes import windll if not hasattr(windll, 'inpout32'): raise SystemError( 'Must have inpout32 installed, see:\n\n' 'http://www.highrez.co.uk/downloads/inpout32/') base = '0x378' if address is None else address logger.info('Expyfun: Using base address %s' % (base,)) if isinstance(base, string_types): base = int(base, 16) if not isinstance(base, int): raise ValueError('address must be int or None, got %s of ' 'type %s' % (base, type(base))) self._port = windll.inpout32 mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7) # Use ECP to put the port into byte mode val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5)) self._port.Out32(base + 0x402, val) # Now to make sure the port is in output mode we need to make # sure that bit 5 of the control register is not set val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5)) self._port.Out32(base + 2, val) self._set_data = lambda data: self._port.Out32(base, data) self._portname = str(base) else: raise NotImplementedError('Parallel port triggering only ' 'supported on Linux and Windows') else: # mode == 'dummy': self._port = self._portname = None self._trigger_list = list() self._set_data = lambda x: (self._trigger_list.append(x) if x != 0 else None) self.trigger_duration = trigger_duration self.mode = mode def __repr__(self): return '<ParallelTrigger : %s (%s)>' % (self.mode, self._portname) def _stamp_trigger(self, trig): """Fake stamping.""" self._set_data(int(trig)) self.ec.wait_secs(self.trigger_duration) self._set_data(0) def stamp_triggers(self, triggers, delay=None, wait_for_last=True, is_trial_id=False): """Stamp a list of triggers with a given inter-trigger delay. Parameters ---------- triggers : list No input checking is done, so ensure triggers is a list, with each entry an integer with fewer than 8 bits (max 255). delay : float | None The inter-trigger-onset delay (includes "on" time). If None, will use twice the trigger duration (50% duty cycle). wait_for_last : bool If True, wait for last trigger to be stamped before returning. is_trial_id : bool No effect for this trigger controller. """ if delay is None: delay = 2 * self.trigger_duration for ti, trig in enumerate(triggers): self._stamp_trigger(trig) if ti < len(triggers) - 1 or wait_for_last: self.ec.wait_secs(delay - self.trigger_duration) def close(self): """Release hardware interfaces.""" if hasattr(self, '_port'): del self._port def __del__(self): return self.close()
def checkDatasetFiles(self,dsetName,catalog=None): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_) wait = True print print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print "Number of files: ", len(files) toremove = [] for ifil,eifil in enumerate(files): if ifil in toremove: continue for jfil,ejfil in enumerate(files[ifil+1:]): if ifil+jfil in toremove: continue if eifil["name"] == ejfil["name"]: toremove.append(ifil) else: iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1] jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1] if iid == jid: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both? ") if reply == "n": if ask_user( "keep %s? " % ejfil["name"] ) == "n": ## files.pop(ifil+jfil) toremove.append(ifil+jfil) if ask_user( "keep %s? " % eifil["name"] ) == "n": toremove.append(ifil) ## files.pop(ifil) for ifile in sorted(toremove,reverse=True): ## print ifile files.pop(ifile) print "After duplicates removal: ", len(files) info = catalog[dsetName]["files"] = files for ifile,finfo in enumerate(files): name = finfo["name"] self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile]) if wait: self.parallel_.wait(printOutput=False) self.parallel_ = None if writeCatalog: self.writeCatalog(catalog)
#!/usr/bin/env python from sys import argv from parallel import Parallel p = Parallel() args = argv[1:] if args: if 'on' in args: p.setData(0) if 'off' in args: p.setData(255)
class SamplesManager(object): def __init__(self, catalog, cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"], dbs_instance="prod/phys03", queue=None ): """ Constructur: @catalog: json file used to read/write dataset information @cross_sections: json file where samples cross sections are stored @dbs_instance: DBS instance tp use """ self.cross_sections_ = {} self.dbs_instance_ = dbs_instance for xsecFile in cross_sections: fname = shell_expand(xsecFile) self.cross_sections_.update( json.loads( open(fname).read() ) ) self.catalog_ = shell_expand(catalog) self.parallel_ = None self.sem_ = Semaphore() print "Will use the following datasets catalog:" print self.catalog_ self.queue_ = queue def importFromDAS(self,list_datasets): """ Import datasets from DAS to the catalog. @datasets: wildecard to be usd in dataset query """ catalog = self.readCatalog() print "Importing from das %s" % list_datasets datasets = [] for dataset in list_datasets: if "*" in dataset: response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % dataset, 0, 0, False, self.dbs_instance_) for d in response["data"]: datasets.append( d["dataset"][0]["name"] ) else: datasets.append(dataset) print "Datasets to import" print "\n".join(datasets) for dsetName in datasets: print "Importing %s" % dsetName files = self.getFilesFomDAS(dsetName) if dsetName in catalog: catalog[ dsetName ]["files"] = files else: catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomDAS(self,dsetName): """ Read dataset files from DAS. @dsetName: dataset name """ response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_) files=[] for d in response["data"]: for jf in d["file"]: if "nevents" in jf: files.append({ "name" : jf["name"], "nevents" : jf["nevents"] }) break ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } ) return files def importFromEOS(self,folders): """ Import datasets from DAS to the catalog. @datasets: dataset to be imported """ catalog = self.readCatalog() auto=False assumeOk=False for folder in folders: dsetName = "" print print "importing folder\n %s" % folder while not len(dsetName.split("/")) == 4: if auto: splitFolder = folder.split("/") prim, sec = splitFolder[-4:-2] dsetName = "/%s/%s/USER" % (prim,sec) print "guessed dataset name ", dsetName if not assumeOk: resp=ask_user("ok?",["y","n","a"]) if resp == "n": dsetName = "" auto=False elif resp=="a": assumeOk=True if not auto: print "enter dataset name (auto/noauto to enables/disables automatic guessing) ", dsetName = raw_input() if(dsetName=="auto"): auto=True elif (dsetName=="noauto"): auto=False print "Importing %s as %s" % (folder,dsetName) files = self.getFilesFomEOS(folder) if dsetName in catalog: catalog[ dsetName ]["files"] = files else: catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomEOS(self,dsetName): """ Read dataset files crawling EOS. @dsetName: dataset name Note: not implemented """ if not self.parallel_: self.parallel_ = Parallel(200,self.queue_) ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2] ## print out files = [] for line in out.split("\n"): if line.endswith(".root"): files.append( {"name":line.replace("/eos/cms",""), "nevents":0} ) return files def findDuplicates(self,dsetName): """ Find duplicate job outputs in dataset. @dsetName: dataset name Note: not implemented """ pass def invalidateBadFiles(self,dsetName): """ Invalidate duplicate job output and corrupted files in DAS. @dsetName: dataset name Note: not implemented """ pass def checkAllDatasets(self): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_) ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" for dataset in catalog.keys(): self.checkDatasetFiles(dataset,catalog) outcomes = self.parallel_.wait(printOutput=False) ## for dsetName,ifile,fName,ret,out in outcomes: for ign1, ign2, outcome in outcomes: dsetName,ifile,fName,ret,out = outcome info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True else: extraInfo = json.loads(str(out)) for key,val in extraInfo.iteritems(): info[key] = val print "Writing catalog" self.writeCatalog(catalog) print "Done" def checkDatasetFiles(self,dsetName,catalog=None): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_) wait = True print print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print "Number of files: ", len(files) toremove = [] for ifil,eifil in enumerate(files): if ifil in toremove: continue for jfil,ejfil in enumerate(files[ifil+1:]): if ifil+jfil in toremove: continue if eifil["name"] == ejfil["name"]: toremove.append(ifil) else: iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1] jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1] if iid == jid: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both? ") if reply == "n": if ask_user( "keep %s? " % ejfil["name"] ) == "n": ## files.pop(ifil+jfil) toremove.append(ifil+jfil) if ask_user( "keep %s? " % eifil["name"] ) == "n": toremove.append(ifil) ## files.pop(ifil) for ifile in sorted(toremove,reverse=True): ## print ifile files.pop(ifile) print "After duplicates removal: ", len(files) info = catalog[dsetName]["files"] = files for ifile,finfo in enumerate(files): name = finfo["name"] self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile]) if wait: self.parallel_.wait(printOutput=False) self.parallel_ = None if writeCatalog: self.writeCatalog(catalog) def reviewCatalog(self): datasets,catalog = self.getAllDatasets() primaries = {} keepAll = False for d in datasets: if not keepAll: reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"]) if reply == "n": catalog.pop(d) continue if reply == "a": keepAll = True primary = d.split("/")[1] if not primary in primaries: primaries[ primary ] = [] primaries[ primary ].append(d) for name,val in primaries.iteritems(): if len(val) == 1: continue reply = ask_user("More than one sample for %s:\n %s\nKeep all?" % (name,"\n ".join(val)),["y","n","m"]) if reply == "m": dst = val[0] for merge in val[1:]: self.mergeDataset(catalog[dst],catalog[merge]) catalog.pop(merge) if reply == "n": for d in val: reply = ask_user("keep this dataset?\n %s\n" % d) if reply == "n": catalog.pop(d) self.writeCatalog(catalog) def mergeDataset(self,dst,merge): dstFiles=dst["files"] mergeFiles=merge["files"] for fil in mergeFiles: skip = False for dfil in dstFiles: if dfil["name"] == fil["name"]: skip = True if not skip: dstFiles.append( fil ) def checkFile(self,fileName,dsetName,ifile): """ Check if file is valid. @fileName: file name """ ## fName = "root://eoscms//eos/cms%s" % fileName fName = fileName tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile) ## print "fggCheckFile.py",[fName,tmp,"2>/dev/null"] ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,"2>/dev/null"],interactive=True)[2] try: fout = open(tmp) out = fout.read() fout.close() except IOError, e: print ret, out print e out = "{}" os.remove(tmp) return dsetName,ifile,fileName,ret,out
print CommandSequence([Loop('x', 1, 10, 0.5, Comment("Hello"))]) print CommandSequence( [Loop('x', 1, 10, 0.5, Comment("Hello"), completion=True)]) print CommandSequence([ Loop('x', 1, 10, 0.5, [Comment("Hello"), Comment("World")], completion=True, timeout=10) ]) print CommandSequence([ Loop('x', 2, 20, 5, [ Loop('y', 1, 10, 0.5, [Comment("Hello"), Comment("World")], completion=True, timeout=10) ]) ]) print CommandSequence([ Parallel(Loop('x', 1, 10, 0.5, Comment("Hello")), Loop('y', 1, 10, 0.5, Comment("There"))) ])
class JobsManager(object): def __init__(self, defaults={} ): """ Constructur: @defaults: default options """ # Command line options parser = OptionParser(option_list=[ make_option("--processes", action="callback", callback=Load(), type="string", dest="processes", default={}, help="List of datasets to be analyzed"), make_option("--load", # special option to load whole configuaration from JSON action="callback",callback=Load(),dest="__opts__", type="string", help="load JSON file with configuration",metavar="CONFIG.json" ), make_option("-n","--njobs",dest="njobs",type="int",default=0, help="number of jobs to run"), make_option("-q","--queue",dest="queue",type="string",default=None, help="LSF queue to use. default: %default"), make_option("--sync-lsf",dest="asyncLsf",action="store_false",default=True, help="Run LSF jobs in sync mode (with -K). This will spawn one thread per job. Use only if you know what you are doing." " default: False"), make_option("--use-tarball",dest="use_tarball",action="store_true",default=True, help="Make a sandbox tarball for the task default: %default"), make_option("--no-use-tarball",dest="useTarball",action="store_false",default=True, help="Do not make a sandbox tarball for the task."), make_option("--stage-to",dest="stageTo",action="store",default=None,type="string", help="Stage output to folder. default: %default"), make_option("--stage-cmd",dest="stageCmd",action="store",default="guess",type="string", help="Stage out command. (use 'guess' to have the script guessing the command from the output folder) default : %default"), make_option("--summary",dest="summary",action="store_true",default=False, help="Print jobs summary and exit"), make_option("-o","--output",dest="output",type="string", default="output.root", help="output file name. default: %default"), make_option("-d","--outputDir",dest="outputDir",type="string", default=None, help="output folder. default: %default"), make_option("-x","--jobEx",dest="jobExe",type="string", default=None, help="job executable. default: %default"), make_option("-c","--cmdLine",dest="cmdLine",type="string", default=None, help="job command line. The script arguments will be prepended. default: %default"), make_option("--dumpCfg", action="store_true", default=False, help="dump configuaration and exit. default: %default"), make_option("-v","--verbose", action="store_true", dest="verbose", default=False, help="default: %default"), make_option("-m","--max-resubmissions",dest="maxResub", type="int",default=3), make_option("-N","--ncpu",dest="ncpu", type="int",default=cpu_count()), make_option("-H","--hadd",dest="hadd",default=False, action="store_true", help="hadd output files when all jobs are finished." ), make_option("-D","--hadd-dateset",dest="hadd_dataset",default=False, action="store_true", help="hadd output per dataset when all jobs are finished." ), make_option("-P","--hadd-process",dest="hadd_process",default=False, action="store_true", help="hadd output per process when all jobs are finished." ), make_option("--dry-run",dest="dry_run",default=False, action="store_true", help="do not actually run the jobs." ), make_option("-C","--cont",dest="cont",default=False, action="store_true", help="continue interrupted task." ), make_option("-b","--batch-system",dest="batchSystem",type="string", default="auto",help="Batch system name. Currently supported: sge lsf, default: %default" ), ] ) # parse the command line (self.options, self.args) = parser.parse_args() self.maxResub = self.options.maxResub if self.options.cmdLine: self.args = self.args+shell_args(str(self.options.cmdLine)) if self.options.jobExe: self.options.jobExe = shell_expand(self.options.jobExe) if not self.args[0] == self.options.jobExe: self.args = [self.options.jobExe]+self.args self.uniqueNames = {} # ------------------------------------------------------------------------------------------------------------------- def __call__(self): """ __call__ Run all jobs. """ if self.options.summary: self.options.dry_run = True self.options.cont = True self.jobFactory = TarballJobFactory(self.options.stageTo,self.options.stageCmd,job_outdir=self.options.outputDir, batchSystem=self.options.batchSystem) self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir, asyncLsf=self.options.asyncLsf,jobDriver=self.jobFactory,batchSystem=self.options.batchSystem) self.jobs = None if self.options.cont: if self.options.asyncLsf: self.loadLsfMon() else: self.firstRun() self.monitor() self.parallel.stop() # ------------------------------------------------------------------------------------------------------------------- def loadLsfMon(self): with open("%s/task_config.json" % (self.options.outputDir), "r" ) as cfin: task_config = json.loads(cfin.read()) jobs = task_config["jobs"] if self.options.useTarball: if not "tarball" in task_config: print print "You asked to run the jobs using a sandbox tarball, but the tarball name was not found in the task configuration" print " If you specified the --use-tarball now but not in the original submission, please remove it." print " Otherwise the task configuration may have been corrupted." print sys.exit(-1) self.jobFactory.setTarball(task_config["tarball"]) if not self.options.stageTo: self.jobFactory.stageDest( os.path.abspath(self.options.outputDir) ) self.parallel.setJobId(task_config.get("last_job_id",1)) for job in jobs: cmd, args, outfile, nsub, ret, batchId = job if type(batchId) == tuple or type(batchId) == list: jobName,batchId = batchId else: jobName=None if ret != 0 and nsub <= self.options.maxResub: self.parallel.addJob(cmd,args,batchId,jobName) # ------------------------------------------------------------------------------------------------------------------- def firstRun(self): (options,args) = (self.options, self.args) parallel = self.parallel task_config = {} outputPfx = options.output.replace(".root","") if not options.outputDir: sys.exit("\nPlease specify an output folder using the -d option\n") if options.dumpCfg: print ( dumpCfg(options) ) sys.exit(0) if not os.path.exists(options.outputDir): os.mkdir(options.outputDir) outputPfx = "%s/%s" % ( options.outputDir, outputPfx ) args.append("processIdMap=%s/config.json" % os.path.abspath(options.outputDir)) pset = args[0] if not options.jobExe else args[1] with open(pset,"r") as pin: with open("%s/%s" % ( options.outputDir, os.path.basename(pset) ), "w+" ) as pout: pout.write(pin.read()) pout.close() if not options.jobExe: os.chmod( "%s/%s" % ( options.outputDir, os.path.basename(pset)), 0755 ) pin.close() pset = "%s/%s" % ( options.outputDir, os.path.basename(pset) ) pset = os.path.abspath(pset) if options.useTarball: apset = os.path.abspath(pset) self.jobFactory.mkTarball("%s/sandbox.tgz" % os.path.abspath(options.outputDir), tarball_entries=[apset,"python","lib","bin"],tarball_patterns={"src/*":"data"}, tarball_transform="'s,%s,pset.py,'" % (apset.lstrip("/")) ) if not options.queue: print "\nWARNING: You specified the --use-tarball option but no batch queue. The tarball was created but the jobs won't actually use it." print " To avoid this printout run with --no-use-tarball or specify a batch queue using the --queue option.\n" options.useTarball = False task_config["tarball"] = self.jobFactory.tarball if not options.stageTo: self.jobFactory.stageDest( os.path.abspath(options.outputDir) ) options.stageTo = os.path.abspath(options.outputDir) print "\nWill stage output to %s using the command '%s'\n" % ( self.jobFactory.stage_dest, self.jobFactory.getStageCmd() ) if options.jobExe: args[1] = pset else: args[0] = pset with open("%s/config.json" % (options.outputDir), "w+" ) as fout: fout.write( dumpCfg(options,skip=["dry_run","summary"]) ) # store cmdLine options.cmdLine = str(" ".join(args)) outfiles = [] doutfiles = {} poutfiles = {} jobs = [] for name,datasets in options.processes.iteritems(): poutfiles[name] = ( "%s_%s.root" % ( outputPfx,name), [] ) for dset in datasets: job = args[0] if self.options.jobExe: pyjob = "" else: pyjob = job if type(dset) == list: dset,dopts = dset else: dopts = {} jobargs = copy(args[1:]) dsetName = dset.lstrip("/").replace("/","_") dsetName = self.getUniqueName(dsetName) outfile = "%s_%s.root" % ( outputPfx, dsetName ) doutfiles[dsetName] = ( str(outfile),[] ) jobargs.extend( ["dataset=%s" % dset, "outputFile=%s" % outfile ] ) # add (and replace) per-dataset job arguments dargs = dopts.get("args",[]) if type(dargs) != list: print "\nERROR : dataset-specific arguments should be list not %s" % (type(dargs)) print " dataset %s" % dset sys.exit(-1) if len(dargs) > 0: replace = {} for arg in dargs: aname,val = arg.split("=") replace[aname] = arg newargs = [] anames = [] for arg in jobargs: if not "=" in arg: newargs.append(arg) continue aname,val = arg.split("=") if aname in replace: newargs.append( replace.pop(aname) ) else: newargs.append(arg) jobargs = newargs for aname,arg in replace.iteritems(): jobargs.append(arg) print "running: %s %s" % ( job, " ".join(jobargs) ) njobs = dopts.get("njobs",options.njobs) if options.njobs != 0 else 0 if njobs != 0: print "splitting in (up to) %d jobs\n checking how many are needed... " % njobs, dnjobs = 0 dargs = jobargs+shell_args("nJobs=%d" % (njobs)) ret,out = parallel.run("python %s" % pyjob,dargs+shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName) ),interactive=True)[2] maxJobs = self.getMaxJobs(out) print maxJobs if maxJobs < 0: print "Error getting number of jobs to be submitted" print out hadd = self.getHadd(out,outfile) print " now submitting jobs", for ijob in range(maxJobs): ## FIXME allow specific job selection iargs = jobargs+shell_args("nJobs=%d jobId=%d" % (maxJobs, ijob)) dnjobs += 1 batchId = -1 if not options.dry_run: ret,out = parallel.run(job,iargs)[-1] if self.options.queue and self.options.asyncLsf: batchId = out[1] print ".", output = hadd.replace(".root","_%d.root" % ijob) outfiles.append( output ) doutfiles[dsetName][1].append( outfiles[-1] ) poutfiles[name][1].append( outfiles[-1] ) jobs.append( (job,iargs,output,0,-1,batchId) ) print "\n %d jobs submitted" % dnjobs else: ret,out = parallel.run("python %s" % pyjob,jobargs+shell_args("dryRun=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName)),interactive=True)[2] if ret != 0: print ret,out continue output = self.getHadd(out,outfile) batchId = -1 if not options.dry_run: ret,out = parallel.run(job,jobargs)[-1] if self.options.queue and self.options.asyncLsf: batchId = out[1] outfiles.append( output ) jobs.append( (job,jobargs,output,0,-1,batchId) ) poutfiles[name][1].append( outfiles[-1] ) print task_config["jobs"] = jobs task_config["datasets_output"] = doutfiles task_config["process_output"] = poutfiles task_config["output"] = outfiles task_config["outputPfx"] = outputPfx self.storeTaskConfig(task_config) # ------------------------------------------------------------------------------------------------------------------- def storeTaskConfig(self,task_config): with open("%s/task_config.json" % (self.options.outputDir), "w+" ) as cfout: task_config["last_job_id"] = self.parallel.currJobId() cfout.write( json.dumps(task_config,indent=4) ) cfout.close() # ------------------------------------------------------------------------------------------------------------------- def getUniqueName(self,basename): if basename in self.uniqueNames: self.uniqueNames[basename] += 1 else: self.uniqueNames[basename] = 0 return basename return "%s%d" % (basename,self.uniqueNames[basename]) # ------------------------------------------------------------------------------------------------------------------- def monitor(self): (options,args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] self.task_config = task_config if options.summary: self.printSummary() return if not options.dry_run: ## FIXME: job resubmission returns = self.wait(parallel,self) if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc,out in poutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd, outfile), outfiles ) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset,out in doutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd,outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd,outputPfx), outfiles) self.wait(p) self.storeTaskConfig(task_config) self.parallel.stop() # ------------------------------------------------------------------------------------------------------------------- def wait(self,parallel,handler=None): return parallel.wait(handler) # ------------------------------------------------------------------------------------------------------------------- def handleJobOutput(self,job,jobargs,ret): print "------------" print "Job finished: (exit code %d) '%s' '%s'" % ( ret[0], job, " ".join(jobargs) ) print "Job output: " print lines = ret[1].split("\n") if self.options.queue and self.options.asyncLsf: lines = lines[-10:] for line in lines: print line print jobargs = shell_args(" ".join(jobargs)) job = jobargs[0] jobargs = jobargs[1:] for ijob in self.task_config["jobs"]: inam,iargs = ijob[0:2] if inam == job and iargs == jobargs: ijob[4] = ret[0] if ret[0] != 0: print "" print "Job failed. Number of resubmissions: %d / %d. " % (ijob[3], self.maxResub), if ijob[3] < self.maxResub: print "Resubmitting." ijob[3] += 1 if ijob[3] == self.maxResub: iargs.append("lastAttempt=1") jobName = ijob[5][0] if self.options.queue else None out = self.parallel.run(inam,iargs,jobName=jobName) if self.options.queue and self.options.asyncLsf: ijob[5] = out[-1][1][1] self.storeTaskConfig(self.task_config) print "------------" return 1 else: print "Giving up." self.storeTaskConfig(self.task_config) print "------------" return 0 # ------------------------------------------------------------------------------------------------------------------- def getHadd(self,stg,fallback): for line in stg.split("\n"): if line.startswith("hadd:"): return line.replace("hadd:","") return fallback # ------------------------------------------------------------------------------------------------------------------- def getMaxJobs(self,stg): for line in stg.split("\n"): if line.startswith("maxJobs:"): return int(line.replace("maxJobs:","")) return -1 # ------------------------------------------------------------------------------------------------------------------- def printSummary(self): jobs = self.task_config["jobs"] procs = self.task_config["datasets_output"] status = {} for job in jobs: cmd, args, outfile, nsub, ret, batchId = job status[outfile] = (nsub,ret) for proc,out in procs.iteritems(): outfile,outfiles = out finished = [] missing = {} for jfile in outfiles: nsub,ret = status[jfile] if ret != 0: if not nsub in missing: missing[nsub] = [] missing[nsub].append( jfile ) else: finished.append(jfile) print "----------" print "process: %s " % outfile.replace(".root","") print "njobs: %d " % len(outfiles) print "finished: %d " % len(finished) for nsub,lst in missing.iteritems(): print "submitted %d times: %d" % (nsub+1, len(lst)) print
def run(self, genome_files, output_dir, called_genes=False, translation_table=None, meta=False, closed_ends=False): """Call genes with Prodigal. Call genes with prodigal and store the results in the specified output directory. For convenience, the called_gene flag can be used to indicate genes have previously been called and simply need to be copied to the specified output directory. Parameters ---------- genome_files : list of str Nucleotide fasta files to call genes on. called_genes : boolean Flag indicating if genes are already called. translation_table : int Specifies desired translation table, use None to automatically select between tables 4 and 11. meta : boolean Flag indicating if prodigal should call genes with the metagenomics procedure. closed_ends : boolean If True, do not allow genes to run off edges (throws -c flag). output_dir : str Directory to store called genes. Returns ------- d[genome_id] -> namedtuple(best_translation_table coding_density_4 coding_density_11) Summary statistics of called genes for each genome. """ self.called_genes = called_genes self.translation_table = translation_table self.meta = meta self.closed_ends = closed_ends self.output_dir = output_dir make_sure_path_exists(self.output_dir) progress_func = None if self.verbose: file_type = 'genomes' self.progress_str = ' Finished processing %d of %d (%.2f%%) genomes.' if meta: file_type = 'scaffolds' if len(genome_files): file_type = ntpath.basename(genome_files[0]) self.progress_str = ' Finished processing %d of %d (%.2f%%) files.' self.logger.info('Identifying genes within %s: ' % file_type) progress_func = self._progress parallel = Parallel(self.cpus) summary_stats = parallel.run( self._producer, self._consumer, genome_files, progress_func) # An error was encountered during Prodigal processing, clean up. if not summary_stats: shutil.rmtree(self.output_dir) return summary_stats
class StepperMotor(object): def __init__(self, motor_inputs, state=0, delay=0.05): ''' :param motor_inputs: Ordered list of parallel values to turn motor :type motor_inputs: list or tuple :param state: Initial starting state of motor position :type state: int :param delay: Delay between steps (speed) :type delay: float ''' self.MOTOR_INPUTS = motor_inputs self.state = state self.delay = delay # Setup parallel interface on first init self.parallel_interface = Parallel() #Q: Keep as a function or store state to self? def stepper_generator(self, state_steps): ''' Returns a generator object which yields the current state and motor input. :param state_steps: Number of steps to step the motor. :type state_steps: int :returns: Generator yielding tuples (state_index, motor_input) :rtype: (int, hex) ''' if state_steps < 0: step = -1 else: step = 1 for virtual_state in xrange(self.state + 1, self.state + state_steps + 1, step): # NOTE: virtual_state is not used other than for informing the user the # overall relative step we've applied! self.state += step if self.state >= len(self.MOTOR_INPUTS): # start at list 0 self.state = 0 elif self.state < 0: # start at the end self.state = len(self.MOTOR_INPUTS) - 1 else: # we're at an index within the current motor inputs list pass motor_command = self.MOTOR_INPUTS[self.state] print "%+ 4d : Moving to internal state index %02d, %s hex %03.2f degrees" % ( virtual_state, self.state, hex(motor_command), state_to_angle(self.state, len(self.MOTOR_INPUTS))) # present the required value yield motor_command def turn_motor(self, cycles): ''' Turns the motor the desired amount. :param cycles: Loops to turn :type cycles: float :returns: New state position :rtype: int ''' # round to the nearest step possible steps = int(round(cycles * len(self.MOTOR_INPUTS))) stepper = self.stepper_generator(steps) for motor_position in stepper: ##print "turn motor to position %s" % hex(motor_position) self.parallel_interface.setData(motor_position) time.sleep(self.delay) return self.state def turn_to_angle(self, angle): ''' Turns the motor to the desired absolute angle. Accessor which converts arguments for turn_motor method. :param angle: Angle to turn to :type angle: float :returns: New state position :rtype: int ''' cycles = self.angle_to_cycles(angle, self.state) return self.turn_motor(cycles) def rotate(self, degrees): ''' Turns the motor by the number of degrees. -720 will turn the motor two whole cycles anti-clockwise. Accessor which converts arguments for turn_motor method. :param degrees: Degrees to turn motor by :type degrees: float :returns: New state position :rtype: int ''' cycles = degrees / 360.0 return self.turn_motor(cycles)
class StepperMotor(object): def __init__(self, motor_inputs, state=0, delay=0.05): ''' :param motor_inputs: Ordered list of parallel values to turn motor :type motor_inputs: list or tuple :param state: Initial starting state of motor position :type state: int :param delay: Delay between steps (speed) :type delay: float ''' self.MOTOR_INPUTS = motor_inputs self.state = state self.delay = delay # Setup parallel interface on first init self.parallel_interface = Parallel() #Q: Keep as a function or store state to self? def stepper_generator(self, state_steps): ''' Returns a generator object which yields the current state and motor input. :param state_steps: Number of steps to step the motor. :type state_steps: int :returns: Generator yielding tuples (state_index, motor_input) :rtype: (int, hex) ''' if state_steps < 0: step = -1 else: step = 1 for virtual_state in xrange(self.state+1, self.state+state_steps+1, step): # NOTE: virtual_state is not used other than for informing the user the # overall relative step we've applied! self.state += step if self.state >= len(self.MOTOR_INPUTS): # start at list 0 self.state = 0 elif self.state < 0: # start at the end self.state = len(self.MOTOR_INPUTS) -1 else: # we're at an index within the current motor inputs list pass motor_command = self.MOTOR_INPUTS[self.state] print "%+ 4d : Moving to internal state index %02d, %s hex %03.2f degrees" % ( virtual_state, self.state, hex(motor_command), state_to_angle(self.state, len(self.MOTOR_INPUTS))) # present the required value yield motor_command def turn_motor(self, cycles): ''' Turns the motor the desired amount. :param cycles: Loops to turn :type cycles: float :returns: New state position :rtype: int ''' # round to the nearest step possible steps = int(round(cycles * len(self.MOTOR_INPUTS))) stepper = self.stepper_generator(steps) for motor_position in stepper: ##print "turn motor to position %s" % hex(motor_position) self.parallel_interface.setData(motor_position) time.sleep(self.delay) return self.state def turn_to_angle(self, angle): ''' Turns the motor to the desired absolute angle. Accessor which converts arguments for turn_motor method. :param angle: Angle to turn to :type angle: float :returns: New state position :rtype: int ''' cycles = self.angle_to_cycles(angle, self.state) return self.turn_motor(cycles) def rotate(self, degrees): ''' Turns the motor by the number of degrees. -720 will turn the motor two whole cycles anti-clockwise. Accessor which converts arguments for turn_motor method. :param degrees: Degrees to turn motor by :type degrees: float :returns: New state position :rtype: int ''' cycles = degrees / 360.0 return self.turn_motor(cycles)
def checkAllDatasets(self,match=None,light=False): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True,lsfJobName=".fgg/job") ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" self.outcomes = [] for dataset in catalog.keys(): if match and not fnmatch(dataset,match): continue self.checkDatasetFiles(dataset,catalog,light=light) # write catalog to avoid redoing duplicates removal self.writeCatalog(catalog) if self.queue_: self.parallel_.wait(printOutput=True,handler=self) outcomes = self.outcomes else: outcomes = self.parallel_.wait(printOutput=False) ## for dsetName,ifile,fName,ret,out in outcomes: nfailed = 0 for oc in outcomes: ign1, ign2, outcome= oc ## for ign1, ign2, outcome in outcomes: dsetName,ifile,fName,ret,out = outcome info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True nfailed += 1 else: info["bad"] = False extraInfo = json.loads(str(out)) if len(extraInfo.keys()) == 0: nfailed += 1 info["bad"] = True for key,val in extraInfo.iteritems(): info[key] = val self.parallel_.stop() print "Writing catalog" self.writeCatalog(catalog) print "Done" if nfailed > 0: print print "WARNING: some of the check jobs failed or did not return any output." print " Those (%d) files were marked a bad and won't be usable for analysis." % nfailed print " Re-running the check command may recover the temporary failures." print if self.queue_: print print "Note: log files may have been written in ./.fgg" print " it's up to you to clean up though..."
class JobsManager(object): def __init__(self, defaults={}): """ Constructur: @defaults: default options """ # Command line options parser = OptionParser(option_list=[ make_option("--processes", action="callback", callback=Load(), type="string", dest="processes", default={}, help="List of datasets to be analyzed"), make_option( "--load", # special option to load whole configuaration from JSON action="callback", callback=Load(), dest="__opts__", type="string", help="load JSON file with configuration", metavar="CONFIG.json"), make_option("-n", "--njobs", dest="njobs", type="int", default=0, help="number of jobs to run"), make_option("-q", "--queue", dest="queue", type="string", default=None, help="LSF queue to use. default: %default"), make_option("-o", "--output", dest="output", type="string", default="output.root", help="output file name. default: %default"), make_option("-d", "--outputDir", dest="outputDir", type="string", default=None, help="output folder. default: %default"), make_option("-x", "--jobEx", dest="jobExe", type="string", default=None, help="job executable. default: %default"), make_option( "-c", "--cmdLine", dest="cmdLine", type="string", default=None, help= "job command line. The script arguments will be prepended. default: %default" ), make_option( "--dumpCfg", action="store_true", default=False, help="dump configuaration and exit. default: %default"), make_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="default: %default"), make_option("-m", "--max-resubmissions", dest="maxResub", type="int", default=3), make_option( "-N", "--ncpu", dest="ncpu", type="int", default=cpu_count()), make_option("-H", "--hadd", dest="hadd", default=False, action="store_true", help="hadd output files when all jobs are finished."), make_option( "-D", "--hadd-dateset", dest="hadd_dataset", default=False, action="store_true", help="hadd output per dataset when all jobs are finished."), make_option( "-P", "--hadd-process", dest="hadd_process", default=False, action="store_true", help="hadd output per process when all jobs are finished."), make_option("--dry-run", dest="dry_run", default=False, action="store_true", help="do not actually run the jobs."), make_option("-C", "--cont", dest="cont", default=False, action="store_true", help="continue interrupted task."), ]) # parse the command line (self.options, self.args) = parser.parse_args() self.maxResub = self.options.maxResub if self.options.cmdLine: self.args = self.args + shell_args(str(self.options.cmdLine)) if self.options.jobExe: self.args = [shell_expand(self.options.jobExe)] + self.args def __call__(self): """ __call__ Run all jobs. """ self.parallel = Parallel(self.options.ncpu, lsfQueue=self.options.queue, lsfJobName="%s/runJobs" % self.options.outputDir, asyncLsf=False) self.jobs = None if self.options.cont: pass else: self.firstRun() self.monitor() def firstRun(self): (options, args) = (self.options, self.args) parallel = self.parallel outputPfx = options.output.replace(".root", "") if not options.outputDir: sys.exit("Please specify an output folder") if options.dumpCfg: print(dumpCfg(options)) sys.exit(0) if not os.path.exists(options.outputDir): os.mkdir(options.outputDir) outputPfx = "%s/%s" % (options.outputDir, outputPfx) args.append("processIdMap=%s/config.json" % options.outputDir) ## options.cmdLine += " %s" % (" ".join(args)) options.cmdLine = str(" ".join(args)) with open("%s/config.json" % (options.outputDir), "w+") as fout: fout.write(dumpCfg(options)) outfiles = [] doutfiles = {} poutfiles = {} jobs = [] for name, datasets in options.processes.iteritems(): poutfiles[name] = ("%s_%s.root" % (outputPfx, name), []) for dset in datasets: job = args[0] if self.options.jobExe: pyjob = "" else: pyjob = job jobargs = copy(args[1:]) dsetName = dset.lstrip("/").replace("/", "_") outfile = "%s_%s.root" % (outputPfx, dsetName) doutfiles[dset] = (str(outfile), []) jobargs.extend( ["dataset=%s" % dset, "outputFile=%s" % outfile]) print "running: %s %s" % (job, " ".join(jobargs)) if options.njobs != 0: print "splitting in (up to) %d jobs\n checking how many are needed... " % options.njobs dnjobs = 0 dargs = jobargs + shell_args("nJobs=%d" % (options.njobs)) ret, out = parallel.run( "python %s" % pyjob, dargs + shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" % os.path.join(options.outputDir, dsetName)), interactive=True)[2] maxJobs = self.getMaxJobs(out) if maxJobs < 0: print "Error getting numer of jobs to be submitted" print out hadd = self.getHadd(out, outfile) ## for ijob in range(options.njobs): for ijob in range(maxJobs): ## FIXME allow specific job selection ## iargs = dargs+shell_args("jobId=%d" % (ijob)) iargs = jobargs + shell_args("nJobs=%d jobId=%d" % (maxJobs, ijob)) ## # run python <command-line> dryRun=1 to check if the job needs to be run ## ret,out = parallel.run("python %s" % pyjob,iargs+shell_args("dryRun=1"),interactive=True)[2] ## if ret != 0: ## continue dnjobs += 1 if not options.dry_run: ## FIXME: ## - handle output ## - store log files parallel.run(job, iargs) ## outfiles.append( outfile.replace(".root","_%d.root" % ijob) ) ## output = self.getHadd(out,outfile.replace(".root","_%d.root" % ijob)) output = hadd.replace(".root", "_%d.root" % ijob) outfiles.append(output) doutfiles[dset][1].append(outfiles[-1]) poutfiles[name][1].append(outfiles[-1]) jobs.append((job, iargs, output, 0, -1)) print " %d jobs actually submitted" % dnjobs else: ret, out = parallel.run( "python %s" % pyjob, jobargs + shell_args("dryRun=1 dumpPython=%s.py" % os.path.join(options.outputDir, dsetName)), interactive=True)[2] if ret != 0: print ret, out continue if not options.dry_run: parallel.run(job, jobargs) ## outfiles.append( outfile ) output = self.getHadd(out, outfile) outfiles.append(output) jobs.append((job, jobargs, output, 0, -1)) poutfiles[name][1].append(outfiles[-1]) print task_config = { "jobs": jobs, "datasets_output": doutfiles, "process_output": poutfiles, "output": outfiles, "outputPfx": outputPfx } with open("%s/task_config.json" % (options.outputDir), "w+") as cfout: cfout.write(json.dumps(task_config, indent=4)) cfout.close() def monitor(self): (options, args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r") as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] if not options.dry_run: ## FIXME: job resubmission self.jobs = task_config["jobs"] returns = self.wait(parallel, self) task_config["jobs"] = self.jobs if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc, out in poutfiles.iteritems(): outfile, outfiles = out p.run("%s %s" % (hadd, outfile), outfiles) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset, out in doutfiles.iteritems(): outfile, outfiles = out p.run("%s %s" % (hadd, outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd, outputPfx), outfiles) self.wait(p) with open("%s/task_config.json" % (options.outputDir), "w+") as cfout: cfout.write(json.dumps(task_config, indent=4)) cfout.close() self.parallel.stop() def wait(self, parallel, handler=None): return parallel.wait(handler) ### for i in range(parallel.njobs): ### print "Finished jobs: %d. Total jobs: %d" % (i, parallel.njobs) ### job, jobargs, ret = parallel.returned.get() ### print "finished: %s %s" % ( job, " ".join(jobargs) ) ### for line in ret[1].split("\n"): ### print line def handleJobOutput(self, job, jobargs, ret): print "------------" print "Job finished: (exit code %d) '%s' '%s'" % (ret[0], job, " ".join(jobargs)) print "Job output: " print for line in ret[1].split("\n"): print line print jobargs = shell_args(" ".join(jobargs)) job = jobargs[0] jobargs = jobargs[1:] for ijob in self.jobs: inam, iargs = ijob[0:2] ### print inam, job, inam == job ### for i,a in enumerate(iargs): ### b = jobargs[i] ### print a, b, a == b if inam == job and iargs == jobargs: ijob[4] = ret[0] if ret[0] != 0: print "" print "Job failed. Number of resubmissions: %d / %d. " % ( ijob[3], self.maxResub), if ijob[3] < self.maxResub: print "Resubmitting." self.parallel.run(inam, iargs) ijob[3] += 1 print "------------" return 1 else: print "Giving up." print "------------" return 0 def getHadd(self, stg, fallback): for line in stg.split("\n"): if line.startswith("hadd:"): return line.replace("hadd:", "") return fallback def getMaxJobs(self, stg): for line in stg.split("\n"): if line.startswith("maxJobs:"): return int(line.replace("maxJobs:", "")) return -1
def checkDatasetFiles(self,dsetName,catalog=None,light=False): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True) wait = True print print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print "Number of files: ", len(files) if self.force_ or not catalog[dsetName].get("vetted",False): toremove = [] keep_wildcard=None for ifil,eifil in enumerate(files): if ifil in toremove: continue for jfil,ejfil in enumerate(files[ifil+1:]): if ifil+jfil in toremove: continue if eifil["name"] == ejfil["name"]: toremove.append(ifil) else: iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1] jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1] if iid == jid: if not keep_wildcard: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both (yes/no/matching)? ",["y","n","m"]) if reply == "m": while not keep_wildcard: print "enter wildcard matching expression", keep_wildcard=raw_input() if ask_user("keep all files matching '%s'?" % keep_wildcard) == "n": keep_wildcard=None if keep_wildcard: imatch=fnmatch(eifil["name"],keep_wildcard) jmatch=fnmatch(ejfil["name"],keep_wildcard) if imatch != jmatch: if imatch: toremove.append(ifil+jfil) else: toremove.append(ifil) continue else: print "duplicated file index ", iid print eifil["name"] print ejfil["name"] reply=ask_user("keep both? ") if reply == "n": if ask_user( "keep %s? " % ejfil["name"] ) == "n": ## files.pop(ifil+jfil) toremove.append(ifil+jfil) if ask_user( "keep %s? " % eifil["name"] ) == "n": toremove.append(ifil) ## files.pop(ifil) for ifile in sorted(toremove,reverse=True): ## print ifile files.pop(ifile) print "After duplicates removal: ", len(files) nsub = 0 catalog[dsetName]["vetted"] = True if not light: info = catalog[dsetName]["files"] = files for ifile,finfo in enumerate(files): name = finfo["name"] if self.force_ or not "weights" in finfo: nsub+=1 self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile],interactive=(self.queue_!=None)) if nsub == 0: print "No files needed to be checked" else: print "Submitted %d check jobs" % nsub if wait: self.parallel_.wait(printOutput=False) self.parallel_ = None if writeCatalog: self.writeCatalog(catalog)
class JobsManager(object): def __init__(self, defaults={} ): """ Constructur: @defaults: default options """ # Command line options parser = OptionParser(option_list=[ make_option("--processes", action="callback", callback=Load(), type="string", dest="processes", default={}, help="List of datasets to be analyzed"), make_option("--load", # special option to load whole configuaration from JSON action="callback",callback=Load(),dest="__opts__", type="string", help="load JSON file with configuration",metavar="CONFIG.json" ), make_option("-n","--njobs",dest="njobs",type="int",default=0, help="number of jobs to run"), make_option("-q","--queue",dest="queue",type="string",default=None, help="LSF queue to use. default: %default"), make_option("-o","--output",dest="output",type="string", default="output.root", help="output file name. default: %default"), make_option("-d","--outputDir",dest="outputDir",type="string", default=None, help="output folder. default: %default"), make_option("-x","--jobEx",dest="jobExe",type="string", default=None, help="job executable. default: %default"), make_option("-c","--cmdLine",dest="cmdLine",type="string", default=None, help="job command line. The script arguments will be prepended. default: %default"), make_option("--dumpCfg", action="store_true", default=False, help="dump configuaration and exit. default: %default"), make_option("-v","--verbose", action="store_true", dest="verbose", default=False, help="default: %default"), make_option("-m","--max-resubmissions",dest="maxResub", type="int",default=3), make_option("-N","--ncpu",dest="ncpu", type="int",default=cpu_count()), make_option("-H","--hadd",dest="hadd",default=False, action="store_true", help="hadd output files when all jobs are finished." ), make_option("-D","--hadd-dateset",dest="hadd_dataset",default=False, action="store_true", help="hadd output per dataset when all jobs are finished." ), make_option("-P","--hadd-process",dest="hadd_process",default=False, action="store_true", help="hadd output per process when all jobs are finished." ), make_option("--dry-run",dest="dry_run",default=False, action="store_true", help="do not actually run the jobs." ), make_option("-C","--cont",dest="cont",default=False, action="store_true", help="continue interrupted task." ), ] ) # parse the command line (self.options, self.args) = parser.parse_args() self.maxResub = self.options.maxResub if self.options.cmdLine: self.args = self.args+shell_args(str(self.options.cmdLine)) if self.options.jobExe: self.args = [shell_expand(self.options.jobExe)]+self.args def __call__(self): """ __call__ Run all jobs. """ self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,asyncLsf=False) self.jobs = None if self.options.cont: pass else: self.firstRun() self.monitor() def firstRun(self): (options,args) = (self.options, self.args) parallel = self.parallel outputPfx = options.output.replace(".root","") if not options.outputDir: sys.exit("Please specify an output folder") if options.dumpCfg: print ( dumpCfg(options) ) sys.exit(0) if not os.path.exists(options.outputDir): os.mkdir(options.outputDir) outputPfx = "%s/%s" % ( options.outputDir, outputPfx ) args.append("processIdMap=%s/config.json" % options.outputDir) ## options.cmdLine += " %s" % (" ".join(args)) options.cmdLine = str(" ".join(args)) with open("%s/config.json" % (options.outputDir), "w+" ) as fout: fout.write( dumpCfg(options) ) outfiles = [] doutfiles = {} poutfiles = {} jobs = [] for name,datasets in options.processes.iteritems(): poutfiles[name] = ( "%s_%s.root" % ( outputPfx,name), [] ) for dset in datasets: job = args[0] if self.options.jobExe: pyjob = "" else: pyjob = job jobargs = copy(args[1:]) dsetName = dset.lstrip("/").replace("/","_") outfile = "%s_%s.root" % ( outputPfx, dsetName ) doutfiles[dset] = ( str(outfile),[] ) jobargs.extend( ["dataset=%s" % dset, "outputFile=%s" % outfile ] ) print "running: %s %s" % ( job, " ".join(jobargs) ) if options.njobs != 0: print "splitting in (up to) %d jobs\n checking how many are needed... " % options.njobs dnjobs = 0 dargs = jobargs+shell_args("nJobs=%d" % (options.njobs)) ret,out = parallel.run("python %s" % pyjob,dargs+shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName) ),interactive=True)[2] maxJobs = self.getMaxJobs(out) if maxJobs < 0: print "Error getting numer of jobs to be submitted" print out hadd = self.getHadd(out,outfile) ## for ijob in range(options.njobs): for ijob in range(maxJobs): ## FIXME allow specific job selection ## iargs = dargs+shell_args("jobId=%d" % (ijob)) iargs = jobargs+shell_args("nJobs=%d jobId=%d" % (maxJobs, ijob)) ## # run python <command-line> dryRun=1 to check if the job needs to be run ## ret,out = parallel.run("python %s" % pyjob,iargs+shell_args("dryRun=1"),interactive=True)[2] ## if ret != 0: ## continue dnjobs += 1 if not options.dry_run: ## FIXME: ## - handle output ## - store log files parallel.run(job,iargs) ## outfiles.append( outfile.replace(".root","_%d.root" % ijob) ) ## output = self.getHadd(out,outfile.replace(".root","_%d.root" % ijob)) output = hadd.replace(".root","_%d.root" % ijob) outfiles.append( output ) doutfiles[dset][1].append( outfiles[-1] ) poutfiles[name][1].append( outfiles[-1] ) jobs.append( (job,iargs,output,0,-1) ) print " %d jobs actually submitted" % dnjobs else: ret,out = parallel.run("python %s" % pyjob,jobargs+shell_args("dryRun=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName)),interactive=True)[2] if ret != 0: print ret,out continue if not options.dry_run: parallel.run(job,jobargs) ## outfiles.append( outfile ) output = self.getHadd(out,outfile) outfiles.append( output ) jobs.append( (job,jobargs,output,0,-1) ) poutfiles[name][1].append( outfiles[-1] ) print task_config = { "jobs" : jobs, "datasets_output" : doutfiles, "process_output" : poutfiles, "output" : outfiles, "outputPfx" : outputPfx } with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout: cfout.write( json.dumps(task_config,indent=4) ) cfout.close() def monitor(self): (options,args) = (self.options, self.args) parallel = self.parallel with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin: task_config = json.loads(cfin.read()) doutfiles = task_config["datasets_output"] poutfiles = task_config["process_output"] outfiles = task_config["output"] outputPfx = task_config["outputPfx"] if not options.dry_run: ## FIXME: job resubmission self.jobs = task_config["jobs"] returns = self.wait(parallel,self) task_config["jobs"] = self.jobs if options.hadd: print "All jobs finished. Merging output." p = Parallel(options.ncpu) hadd = "hadd -f " if options.hadd_process: for proc,out in poutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd, outfile), outfiles ) if options.hadd_dataset: if options.hadd_process: hadd += " -T" for dset,out in doutfiles.iteritems(): outfile,outfiles = out p.run("%s %s" % (hadd,outfile), outfiles) if not (options.hadd_process or options.hadd_dataset): p.run("%s %s.root" % (hadd,outputPfx), outfiles) self.wait(p) with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout: cfout.write( json.dumps(task_config,indent=4) ) cfout.close() self.parallel.stop() def wait(self,parallel,handler=None): return parallel.wait(handler) ### for i in range(parallel.njobs): ### print "Finished jobs: %d. Total jobs: %d" % (i, parallel.njobs) ### job, jobargs, ret = parallel.returned.get() ### print "finished: %s %s" % ( job, " ".join(jobargs) ) ### for line in ret[1].split("\n"): ### print line def handleJobOutput(self,job,jobargs,ret): print "------------" print "Job finished: (exit code %d) '%s' '%s'" % ( ret[0], job, " ".join(jobargs) ) print "Job output: " print for line in ret[1].split("\n"): print line print jobargs = shell_args(" ".join(jobargs)) job = jobargs[0] jobargs = jobargs[1:] for ijob in self.jobs: inam,iargs = ijob[0:2] ### print inam, job, inam == job ### for i,a in enumerate(iargs): ### b = jobargs[i] ### print a, b, a == b if inam == job and iargs == jobargs: ijob[4] = ret[0] if ret[0] != 0: print "" print "Job failed. Number of resubmissions: %d / %d. " % (ijob[3], self.maxResub), if ijob[3] < self.maxResub: print "Resubmitting." self.parallel.run(inam,iargs) ijob[3] += 1 print "------------" return 1 else: print "Giving up." print "------------" return 0 def getHadd(self,stg,fallback): for line in stg.split("\n"): if line.startswith("hadd:"): return line.replace("hadd:","") return fallback def getMaxJobs(self,stg): for line in stg.split("\n"): if line.startswith("maxJobs:"): return int(line.replace("maxJobs:","")) return -1
class SamplesManager(object): def __init__(self, catalog, cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"], dbs_instance="prod/phys03", queue=None ): """ Constructur: @catalog: json file used to read/write dataset information @cross_sections: json file where samples cross sections are stored @dbs_instance: DBS instance tp use """ self.cross_sections_ = {} self.dbs_instance_ = dbs_instance for xsecFile in cross_sections: fname = shell_expand(xsecFile) self.cross_sections_.update( json.loads( open(fname).read() ) ) self.catalog_ = shell_expand(catalog) self.parallel_ = None self.sem_ = Semaphore() print "Will use the following datasets catalog:" print self.catalog_ self.queue_ = queue def importFromDAS(self,datasets): """ Import datasets from DAS to the catalog. @datasets: wildecard to be usd in dataset query """ catalog = self.readCatalog() print "Importing from das %s" % datasets if "*" in datasets: response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % datasets, 0, 0, False, self.dbs_instance_) datasets=[] for d in response["data"]: datasets.append( d["dataset"][0]["name"] ) print "Datasets to import" print "\n".join(datasets) for dsetName in datasets: print "Importing %s" % dsetName files = self.getFilesFomDAS(dsetName) if dsetName in catalog: catalog[ dsetName ]["files"] = files else: catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomDAS(self,dsetName): """ Read dataset files from DAS. @dsetName: dataset name """ response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_) files=[] for d in response["data"]: for jf in d["file"]: if "nevents" in jf: files.append({ "name" : jf["name"], "nevents" : jf["nevents"] }) break ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } ) return files def importFromEOS(self,folders): """ Import datasets from DAS to the catalog. @datasets: dataset to be imported """ catalog = self.readCatalog() for folder in folders: dsetName = "" while not len(dsetName.split("/")) == 4: print "enter dataset name for folder %s" % folder, dsetName = raw_input() print "Importing %s as %s" % (folder,dsetName) files = self.getFilesFomEOS(folder) if dsetName in catalog: catalog[ dsetName ]["files"] = files else: catalog[ dsetName ] = { "files" : files } print "Writing catalog" self.writeCatalog(catalog) print "Done" def getFilesFomEOS(self,dsetName): """ Read dataset files crawling EOS. @dsetName: dataset name Note: not implemented """ if not self.parallel_: self.parallel_ = Parallel(200,self.queue_) ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2] print out files = [] for line in out.split("\n"): if line.endswith(".root"): files.append( {"name":line.replace("/eos/cms",""), "nevents":0} ) return files def findDuplicates(self,dsetName): """ Find duplicate job outputs in dataset. @dsetName: dataset name Note: not implemented """ pass def invalidateBadFiles(self,dsetName): """ Invalidate duplicate job output and corrupted files in DAS. @dsetName: dataset name Note: not implemented """ pass def checkAllDatasets(self): """ Look for corrupted files in the whole catalog. """ catalog = self.readCatalog() self.parallel_ = Parallel(50,self.queue_) ## self.parallel_ = Parallel(1,self.queue_) print "Checking all datasets" for dataset in catalog.keys(): self.checkDatasetFiles(dataset,catalog) outcomes = self.parallel_.wait() for dsetName,ifile,fName,ret,out in outcomes: info = catalog[dsetName]["files"][ifile] if info["name"] != fName: print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out else: if ret != 0: info["bad"] = True else: extraInfo = json.loads(str(out)) for key,val in extraInfo.iteritems(): info[key] = val print "Writing catalog" self.writeCatalog(catalog) print "Done" def checkDatasetFiles(self,dsetName,catalog=None): """ Look for corrupted files in dataset. @dsetName: dataset name Note: not implemented """ writeCatalog = False if not catalog: catalog = self.readCatalog() writeCatalog = True wait = False if not self.parallel_: self.parallel_ = Parallel(16,self.queue_) wait = True print "Checking dataset",dsetName info = catalog[dsetName] files = info["files"] print len(files) for ifile,finfo in enumerate(files): name = finfo["name"] self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile]) if wait: self.parallel_.wait() self.parallel_ = None if writeCatalog: self.writeCatalog(catalog) def reviewCatalog(self): datasets,catalog = self.getAllDatasets() primaries = {} keepAll = False for d in datasets: if not keepAll: reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"]) if reply == "n": catalog.pop(d) continue if reply == "a": keepAll = True primary = d.split("/")[1] if not primary in primaries: primaries[ primary ] = [] primaries[ primary ].append(d) for name,val in primaries.iteritems(): if len(val) == 1: continue reply = ask_user("More than one sample for %s:\n %s\nKeep all?" % (name,"\n ".join(val))) if reply == "n": for d in val: reply = ask_user("keep this dataset?\n %s\n" % d) if reply == "n": catalog.pop(d) self.writeCatalog(catalog) def checkFile(self,fileName,dsetName,ifile): """ Check if file is valid. @fileName: file name """ ## fName = "root://eoscms//eos/cms%s" % fileName fName = fileName tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile) ## print "fggCheckFile.py",[fName,tmp,"2>/dev/null"] ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,"2>/dev/null"],interactive=True)[2] try: fout = open(tmp) out = fout.read() fout.close() except IOError, e: print ret, out print e out = "{}" os.remove(tmp) return dsetName,ifile,fileName,ret,out
dataset = Dataset(dataset_name=args.dataset) mlp_aux = MLP_AUX(dataset, args.negative_sampling_size, eval(args.layers), args.epochs, args.batch_size, args.validation_split, args.user_sampling_size, args.core_number, args.sim_threshold) model = mlp_aux.train_model() hits, ndcgs = evaluate_model(model, dataset.test_data, dataset.test_negatives, 10, 1, True) print("Hitrate: {}".format(sum(hits) / len(hits))) print("NDCG: {}".format(sum(ndcgs) / len(ndcgs))) elif args.network_type == 'parallel': dataset = Dataset(dataset_name=args.dataset) parallel = Parallel(dataset, args.negative_sampling_size, eval(args.layers), args.epochs, args.batch_size, args.validation_split) model = parallel.train_model() hits, ndcgs = evaluate_model(model, dataset.test_data, dataset.test_negatives, 10, 1) print("Hitrate: {}".format(sum(hits) / len(hits))) print("NDCG: {}".format(sum(ndcgs) / len(ndcgs))) elif args.network_type == 'parallel-aux': dataset = Dataset(dataset_name=args.dataset) parallel_aux = Parallel_AUX(dataset, args.negative_sampling_size, eval(args.layers), args.epochs, args.batch_size, args.validation_split, args.user_sampling_size, args.core_number, args.sim_threshold) model = parallel_aux.train_model() hits, ndcgs = evaluate_model(model, dataset.test_data,
class ParallelTrigger(object): """Parallel port and dummy triggering support .. warning:: When using the parallel port, calling :meth:`expyfun.ExperimentController.start_stimulus` will automatically invoke a stamping of the 1 trigger, which will in turn cause a delay equal to that of ``high_duration``. This can effect e.g. :class:`EyelinkController` timing. Parameters ---------- mode : str 'parallel' for real use. 'dummy', passes all calls. address : str | int | None The address to use. On Linux this should be a string path like ``'/dev/parport0'`` (equivalent to None), on Windows it should be an integer address like ``888`` or ``0x378`` (equivalent to None). high_duration : float Amount of time (seconds) to leave the trigger high whenever sending a trigger. verbose : bool, str, int, or None If not None, override default verbose level (see expyfun.verbose). Notes ----- Parallel port activation is enabled by using the ``trigger_controller`` argument of :class:`expyfun.ExperimentController`. On Linux, parallel port may require some combination of the following: 1. ``sudo modprobe ppdev`` 2. Add user to ``lp`` group (``/etc/group``) 3. Run ``sudo rmmod lp`` (otherwise ``lp`` takes exclusive control) 4. Edit ``/etc/modprobe.d/blacklist.conf`` to add ``blacklist lp`` The ``parallel`` module must also be installed. On Windows, you may need to download ``inpout32.dll`` from someplace like http://www.highrez.co.uk/downloads/inpout32/. """ @verbose_dec def __init__(self, mode='dummy', address=None, high_duration=0.005, verbose=None): if mode == 'parallel': if sys.platform.startswith('linux'): address = '/dev/parport0' if address is None else address if not isinstance(address, string_types): raise ValueError('addrss must be a string or None, got %s ' 'of type %s' % (address, type(address))) from parallel import Parallel self._port = Parallel(address) self._portname = address self._set_data = self._port.setData elif sys.platform.startswith('win'): from ctypes import windll if not hasattr(windll, 'inpout32'): raise SystemError( 'Must have inpout32 installed, see:\n\n' 'http://www.highrez.co.uk/downloads/inpout32/') base = 0x378 if address is None else address if isinstance(base, string_types): base = int(base, 16) if not isinstance(base, int): raise ValueError('address must be int or None, got %s of ' 'type %s' % (base, type(base))) self._port = windll.inpout32 mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7) # Use ECP to put the port into byte mode val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5)) self._port.Out32(base + 0x402, val) # Now to make sure the port is in output mode we need to make # sure that bit 5 of the control register is not set val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5)) self._port.Out32(base + 2, val) self._set_data = lambda data: self._port.Out32(base, data) self._portname = str(base) else: raise NotImplementedError('Parallel port triggering only ' 'supported on Linux and Windows') else: # mode == 'dummy': self._port = self._portname = None self._trigger_list = list() self._set_data = lambda x: (self._trigger_list.append(x) if x != 0 else None) self.high_duration = high_duration self.mode = mode def __repr__(self): return '<ParallelTrigger : %s (%s)>' % (self.mode, self._portname) def _stamp_trigger(self, trig): """Fake stamping""" self._set_data(int(trig)) wait_secs(self.high_duration) self._set_data(0) def stamp_triggers(self, triggers, delay=0.03, wait_for_last=True): """Stamp a list of triggers with a given inter-trigger delay Parameters ---------- triggers : list No input checking is done, so ensure triggers is a list, with each entry an integer with fewer than 8 bits (max 255). delay : float The inter-trigger delay. wait_for_last : bool If True, wait for last trigger to be stamped before returning. """ for ti, trig in enumerate(triggers): self._stamp_trigger(trig) if ti < len(triggers) - 1 or wait_for_last: wait_secs(delay - self.high_duration) def close(self): """Release hardware interfaces """ if hasattr(self, '_port'): del self._port def __del__(self): return self.close()
def fit(self, X, y, sample_weight=None): # 线性回归方法 """ Fit linear model. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features) Training data y : array_like, shape (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary sample_weight : numpy array of shape [n_samples] Individual weights for each sample .. versionadded:: 0.17 parameter *sample_weight* support to LinearRegression. Returns ------- self : returns an instance of self. """ n_jobs_ = self.n_jobs X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], y_numeric=True, multi_output=True) # 检查数据并进行必要的格式转换 #------------------------------ # atleast_xd 支持将输入的数据 # 直接视为 x 维 # atleast_1d # atleast_2d # atleast_3d if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1: raise ValueError("Sample weights must be 1D array or scalar" ) # sampple_weight 必须为一维 array X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight) # 对 X 数据进行中心以及归一化 if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) # 给数据添加 sample_weight if sp.issparse(X): if y.ndim < 2: out = sparse_lsqr(X, y) # 哈哈哈!直接调用 scipy sparse linalg 的 # least square solution self.coef_ = out[0] self._residues = out[3] else: # sparse_lstsq cannot handle y with shape (M, K) outs = Parallel(n_jobs=n_jobs_)( delayed(sparse_lsqr)(X, y[:, j].ravel()) for j in range(y.shape[1])) self.coef_ = np.vstack(out[0] for out in outs) self._residues = np.vstack(out[3] for out in outs) else: self.coef_, self._residues, self.rank_, self.singular_ = \ linalg.lstsq(X, y) # Compute least-squares solution to equation Ax = b 哈哈哈!! self.coef_ = self.coef_.T if y.ndim == 1: self.coef_ = np.ravel(self.coef_) self._set_intercept(X_offset, y_offset, X_scale) return self
def update(self, nprocs=1, factor=None, bw_hz=None, foi_hz=None, fs_hz=None, f_ord=None, ftype=None, n_freqs=None, n_samples=None, n_channels=None): self.n_channels = n_channels if n_channels is not None else self.n_channels self.n_freqs = n_freqs if n_freqs is not None else self.n_freqs self.n_processes = min(Parallel.check_nprocs() - 1, self.n_freqs) if nprocs != 1 else 1 # Signal process properties self.decimate_by = factor self.n_samples = int(n_samples / self.decimate_by) self.sample_rate = fs_hz / self.decimate_by if fs_hz is not None else self.sample_rate self.bandwidth = bw_hz if bw_hz is not None else self.bandwidth self.w_, self.H_ = self.create_filter(f_ord, self.bandwidth / 2.0, self.sample_rate / 2.0, self.n_samples, ftype='fir', output='freq') self.Hwin = self.H_[np.logical_and(self.w_ >= -self.bandwidth / 2.0, self.w_ < self.bandwidth / 2.0)] self.n_samples_procs = self.Hwin.size # Setup center frequencies if len(foi_hz) > 1: cf = np.arange(*foi_hz, np.diff(foi_hz) / self.n_freqs, dtype=int) diff = cf.shape[0] - self.n_freqs if diff > 0: cf = cf[:-diff] else: cf = foi_hz self.freqs = np.asarray([ (f - self.bandwidth / 2, f + self.bandwidth / 2) for f in cf ]) # Create rules for how to handle the data self._encoder_rule() self._decoder_rule() if self.n_processes > 1: self.pfunc = Parallel(self.multiply, nprocs=self.n_processes, axis=0, flag=0, ins_shape=[(self.n_channels, self.n_freqs, self.n_samples_procs), (1, self.n_samples_procs)], ins_dtype=[np.complex64, np.complex64], out_shape=(self.n_channels, self.n_freqs, self.n_samples_procs), out_dtype=np.complex64)
# !setup! import neworder from parallel import Parallel # import our model definition #neworder.verbose() #neworder.checked(False) # must be MPI enabled assert neworder.mpi.size( ) > 1, "This configuration requires MPI with >1 process" # !setup! # !run! population_size = 100 p = 0.01 timeline = neworder.LinearTimeline(0, 10, 10) model = Parallel(timeline, p, population_size) neworder.run(model) #!run!