Esempio n. 1
0
    def checkAllDatasets(self):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_)
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        for dataset in catalog.keys():            
            self.checkDatasetFiles(dataset,catalog)
        
        outcomes = self.parallel_.wait(printOutput=False)

        ## for dsetName,ifile,fName,ret,out in outcomes:
        for ign1, ign2, outcome in outcomes:
            dsetName,ifile,fName,ret,out = outcome
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                else:
                    extraInfo = json.loads(str(out))
                    for key,val in extraInfo.iteritems():
                        info[key] = val

        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
Esempio n. 2
0
    def __init__(self,
                 mode='dummy',
                 address=None,
                 high_duration=0.005,
                 verbose=None):
        if mode == 'parallel':
            if sys.platform.startswith('linux'):
                address = '/dev/parport0' if address is None else address
                if not isinstance(address, string_types):
                    raise ValueError('addrss must be a string or None, got %s '
                                     'of type %s' % (address, type(address)))
                from parallel import Parallel
                self._port = Parallel(address)
                self._portname = address
                self._set_data = self._port.setData
            elif sys.platform.startswith('win'):
                from ctypes import windll
                if not hasattr(windll, 'inpout32'):
                    raise SystemError(
                        'Must have inpout32 installed, see:\n\n'
                        'http://www.highrez.co.uk/downloads/inpout32/')

                base = 0x378 if address is None else address
                if isinstance(base, string_types):
                    base = int(base, 16)
                if not isinstance(base, int):
                    raise ValueError('address must be int or None, got %s of '
                                     'type %s' % (base, type(base)))
                self._port = windll.inpout32
                mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7)
                # Use ECP to put the port into byte mode
                val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5))
                self._port.Out32(base + 0x402, val)

                # Now to make sure the port is in output mode we need to make
                # sure that bit 5 of the control register is not set
                val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5))
                self._port.Out32(base + 2, val)
                self._set_data = lambda data: self._port.Out32(base, data)
                self._portname = str(base)
            else:
                raise NotImplementedError('Parallel port triggering only '
                                          'supported on Linux and Windows')
        else:  # mode == 'dummy':
            self._port = self._portname = None
            self._trigger_list = list()
            self._set_data = lambda x: (self._trigger_list.append(x)
                                        if x != 0 else None)
        self.high_duration = high_duration
        self.mode = mode
Esempio n. 3
0
 def __init__(self, motor_inputs, state=0, delay=0.05):
     '''
     :param motor_inputs: Ordered list of parallel values to turn motor
     :type motor_inputs: list or tuple
     :param state: Initial starting state of motor position
     :type state: int
     :param delay: Delay between steps (speed)
     :type delay: float
     '''
     self.MOTOR_INPUTS = motor_inputs
     self.state = state
     self.delay = delay
     # Setup parallel interface on first init
     self.parallel_interface = Parallel()
Esempio n. 4
0
class USARTTest(unittest.TestCase):

    par = Parallel()

    def __init__(self, *args):
        unittest.TestCase.__init__(self, *args)
        self.ser = Serial(_SERIAL_PATH,
                          baudrate=_BAUDRATE,
                          bytesize=8,
                          parity='N',
                          stopbits=1,
                          timeout=1,
                          xonxoff=0,
                          rtscts=0)
        self._toWrite = "Write_TEST123"
        self._toRead = "Read_TEST123"

    def testread(self):
        time.sleep(0.1)
        USARTTest.par.setData(0x01)
        time.sleep(0.1)
        self.ser.write(self._toRead + '\n')
        USARTTest.par.setData(0x00)

        print "See avr console..."

    def testwrite(self):
        time.sleep(0.1)
        USARTTest.par.setData(0x01)
        line = self.ser.readline(len(self._toWrite) + 1, '\n')
        USARTTest.par.setData(0x00)

        self.assertNotEqual(len(line), 0)

        self.assertNotEqual(line.find(self._toWrite), -1, "Received %s" % line)
Esempio n. 5
0
    def checkDatasetFiles(self,dsetName,catalog=None):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_)
            wait = True

        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]

        print len(files)
        for ifile,finfo in enumerate(files):            
            name = finfo["name"]
            self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile])

        if wait:
            self.parallel_.wait()            
            self.parallel_ = None
        if writeCatalog:
            self.writeCatalog(catalog)
Esempio n. 6
0
    def checkAllDatasets(self):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_)
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        for dataset in catalog.keys():            
            self.checkDatasetFiles(dataset,catalog)
        
        outcomes = self.parallel_.wait()
        for dsetName,ifile,fName,ret,out in outcomes:
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                else:
                    extraInfo = json.loads(str(out))
                    for key,val in extraInfo.iteritems():
                        info[key] = val

            print "Writing catalog"
            self.writeCatalog(catalog)
        
        print "Done"
Esempio n. 7
0
def main():
    p = Parallel()
    previous = True
    with Bus() as bus:
        with bus.get_service_proxy({"type": "speak"}, multiple=True) as s: 
            while True:
                current = p.getInPaperOut()
                if current != previous:
                    previous = current
                    if not current: # Button was just pressed (i.e. the paperOut pin
                        # was just shorted to ground)
                        print "Doorbell was pressed"
                        try:
                            print s["say_text"]("someone is ringing the_front doorbell", callback=None)
                        except:
                            print_exc()
                        sleep(5)
                sleep(0.03)
Esempio n. 8
0
    def __call__(self):
        """
        __call__
        Run all jobs.
        """
        self.parallel = Parallel(self.options.ncpu,
                                 lsfQueue=self.options.queue,
                                 lsfJobName="%s/runJobs" %
                                 self.options.outputDir,
                                 asyncLsf=False)

        self.jobs = None
        if self.options.cont:
            pass
        else:
            self.firstRun()

        self.monitor()
Esempio n. 9
0
    def getFilesFomEOS(self,dsetName):
        """
        Read dataset files crawling EOS.
        @dsetName: dataset name
        Note: not implemented
        """
        
        if not self.parallel_:
            self.parallel_ = Parallel(200,self.queue_)
        
        ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2]
        ## print out
        files = []
        for line in out.split("\n"):
            if line.endswith(".root"):
                files.append( {"name":line.replace("/eos/cms",""), "nevents":0} )

        return files
Esempio n. 10
0
 def __call__(self):
     """
     __call__
     Run all jobs.
     """
     self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,asyncLsf=False)
     
     self.jobs = None
     if self.options.cont:
         pass
     else:
         self.firstRun()
         
     self.monitor()
Esempio n. 11
0
 def __init__(self, motor_inputs, state=0, delay=0.05):
     '''
     :param motor_inputs: Ordered list of parallel values to turn motor
     :type motor_inputs: list or tuple
     :param state: Initial starting state of motor position
     :type state: int
     :param delay: Delay between steps (speed)
     :type delay: float
     '''
     self.MOTOR_INPUTS = motor_inputs
     self.state = state
     self.delay = delay
     # Setup parallel interface on first init
     self.parallel_interface = Parallel()
Esempio n. 12
0
    def getFilesFomEOS(self,dsetName):
        """
        Read dataset files crawling EOS.
        @dsetName: dataset name
        Note: not implemented
        """
        
        if not self.parallel_:
            self.parallel_ = Parallel(200,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True)
        
        ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2]
        files = []
        for line in out.split("\n"):
            if line.endswith(".root"):
                files.append( {"name":line.replace("/eos/cms",""), "nevents":0} )

        return files
Esempio n. 13
0
    def monitor(self):

        (options, args) = (self.options, self.args)
        parallel = self.parallel

        with open("%s/task_config.json" % (options.outputDir), "r") as cfin:
            task_config = json.loads(cfin.read())

        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles = task_config["output"]
        outputPfx = task_config["outputPfx"]

        if not options.dry_run:
            ## FIXME: job resubmission
            self.jobs = task_config["jobs"]
            returns = self.wait(parallel, self)
            task_config["jobs"] = self.jobs

        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc, out in poutfiles.iteritems():
                    outfile, outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles)
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset, out in doutfiles.iteritems():
                    outfile, outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles)
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd, outputPfx), outfiles)

            self.wait(p)

        with open("%s/task_config.json" % (options.outputDir), "w+") as cfout:
            cfout.write(json.dumps(task_config, indent=4))
            cfout.close()

        self.parallel.stop()
Esempio n. 14
0
    def monitor(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin:
            task_config = json.loads(cfin.read())
        
        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles  = task_config["output"]
        outputPfx = task_config["outputPfx"]

        self.task_config = task_config
        
        if options.summary:
            self.printSummary()
            return

        if not options.dry_run:
            ## FIXME: job resubmission
            returns = self.wait(parallel,self)
            
        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc,out in poutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles )
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset,out in doutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd,outfile), outfiles) 
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd,outputPfx), outfiles)
            
            self.wait(p)

        self.storeTaskConfig(task_config)
        
        self.parallel.stop()
Esempio n. 15
0
    def monitor(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin:
            task_config = json.loads(cfin.read())
        
        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles  = task_config["output"]
        outputPfx = task_config["outputPfx"]
        

        if not options.dry_run:
            ## FIXME: job resubmission
            self.jobs = task_config["jobs"]
            returns = self.wait(parallel,self)
            task_config["jobs"] = self.jobs
            
        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc,out in poutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles )
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset,out in doutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd,outfile), outfiles) 
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd,outputPfx), outfiles)
            
            self.wait(p)

        with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout:
            cfout.write( json.dumps(task_config,indent=4) )
            cfout.close()
        
        self.parallel.stop()
Esempio n. 16
0
 def __call__(self):
     """
     __call__
     Run all jobs.
     """
     if self.options.summary:
         self.options.dry_run = True
         self.options.cont = True
         
     self.jobFactory = TarballJobFactory(self.options.stageTo,self.options.stageCmd,job_outdir=self.options.outputDir,
                                         batchSystem=self.options.batchSystem)
     self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,
                              asyncLsf=self.options.asyncLsf,jobDriver=self.jobFactory,batchSystem=self.options.batchSystem)
     
     self.jobs = None
     if self.options.cont:
         if self.options.asyncLsf:
             self.loadLsfMon()
     else:
         self.firstRun()
         
     self.monitor()
     self.parallel.stop()
Esempio n. 17
0
def insertMarks(expInfo, nombreEDF):
    ponermarcas = []
    if expInfo[EXPERIMENT_TYPE] == EMOTIV:
        from multiprocessing import Process, Queue
        import guardar
        q_marcas = Queue()
        p = Process(target=guardar.save_data, args=(
            nombreEDF,
            q_marcas,
        ))
        p.start()
        ponermarcas = 1
    elif expInfo[EXPERIMENT_TYPE] == TRADITIONAL_EEG:
        from parallel import Parallel  # Version sugerida por Fede (ver mail 02/08/2016)
        q_marcas = Parallel(
        )  # Version sugerida por Fede (ver mail 02/08/2016)
        q_marcas.setData(
            0)  # Solo para asegurarse de que arranque con todos los pins abajo
        ponermarcas = 2
    elif expInfo[EXPERIMENT_TYPE] == CONDUCTUAL:
        q_marcas = 1
        ponermarcas = 0
    return ponermarcas, q_marcas
Esempio n. 18
0
 def do_task(self):
     if self.task != None:
         p = Parallel()
         print(p)
         Parallel().run(self.task.logic)
Esempio n. 19
0
    warnings.simplefilter("ignore")

    tweets = None
    sentiment_dir = "../sentiment/"
    sentiment_models = {
        "text_blob": find_text_blob_sentiment,
        "vader": find_vader_sentiment,
    }

    for model_name, model_function in sentiment_models.items():
        sentiment_path = os.path.join(sentiment_dir, model_name) + ".pickle"
        if not os.path.exists(sentiment_path):
            if tweets is None:
                tweets = load_tweets()
                tweets = list(tweets.items())

            results = Parallel(find_text_blob_sentiment, tweets, model_name)

            sentiment = {tweet_id: value for tweet_id, value in results}
            save_pickle(sentiment, sentiment_path)

    model_name = "flair"
    sentiment_path = os.path.join(sentiment_dir, model_name) + ".pickle"
    if not os.path.exists(sentiment_path):
        if tweets is None:
            tweets = load_tweets()
            tweets = list(tweets.items())
        sentiment = find_flair_sentiment(tweets, chunk_len=100000)
        sentiment_models[model_name] = sentiment
        save_pickle(sentiment, sentiment_path)
Esempio n. 20
0
def main():
    # Info de la sesion
    expInfo = {
        NAME: 'nombre',
        BIRTHDATE: 'DD/MM/AA',
        HAND: 'mano',
        EXPERIMENT_TYPE: 'conductual',
        OPERATOR: ''
    }

    # Presento cuadro para rellenar
    dlg = gui.DlgFromDict(expInfo, title='Formulario')
    if not (dlg.OK):
        core.quit()
    else:
        fileName = expInfo[NAME]
        if not os.path.exists('./Datos/' + fileName):
            os.makedirs('./Datos/' + fileName)
        dataFile = open(
            './Datos/' + fileName + '/' + str(datetime.date.today()) + '_' +
            fileName + '.csv', 'a')
        nombreEDF = './Datos/' + fileName + "/" + str(
            datetime.date.today()) + '_' + fileName
        texto = expInfo[NAME] + '; ' + str(datetime.datetime.now(
        )) + '; ' + expInfo[BIRTHDATE] + '; ' + expInfo[HAND] + '; ' + expInfo[
            EXPERIMENT_TYPE] + '; ' + expInfo[OPERATOR]
    dataFile.write(texto)

    ##########################
    ##  Parametros Pantalla ##
    ##########################
    res = [gtk.gdk.screen_width(), gtk.gdk.screen_height()]
    pantCompleta = True

    #win = visual.Window(res, monitor="Mi Monitor", units="pix",  color=gris, colorSpace='hex', fullscr=pantCompleta)
    win = visual.Window(res,
                        units="pix",
                        color=gris,
                        colorSpace='hex',
                        fullscr=pantCompleta,
                        monitor="testMonitor")
    win.setMouseVisible(False)

    ###########################################
    ## Inicializo parametros del experimento ##
    ###########################################
    proporcion = 0.7
    pruebas = 30
    Nsess = 12

    # Tiempos
    #StimDur = 0.184
    #ISI = 0.986
    StimDur = 0.404
    ISI = 0.986

    ponermarcas = []
    if expInfo[EXPERIMENT_TYPE] == EMOTIV:
        from multiprocessing import Process, Queue
        import guardar
        q_marcas = Queue()
        p = Process(target=guardar.save_data, args=(
            nombreEDF,
            q_marcas,
        ))
        p.start()
        ponermarcas = 1
    elif expInfo[EXPERIMENT_TYPE] == TRADITIONAL_EEG:
        from parallel import Parallel  # Version sugerida por Fede (ver mail 02/08/2016)
        #from psychopy import parallel
        # BIOSEMI
        #q_marcas=parallel.ParallelPort(address=u'/dev/parport0')
        #q_marcas=parallel.PParallelDLPortIO(address=888) # Chequear que este bien la direccion del puerto paralelo
        q_marcas = Parallel(
        )  # Version sugerida por Fede (ver mail 02/08/2016)
        q_marcas.setData(
            0)  # Solo para asegurarse de que arranque con todos los pins abajo
        ponermarcas = 2
    elif expInfo[EXPERIMENT_TYPE] == CONDUCTUAL:
        q_marcas = 1
        ponermarcas = 0

    cond = pacman
    stimuli = [
        pacmanImage, "./estimulo/fantasma_naranja.png",
        "./estimulo/fantasma_rosado.png", "./estimulo/fantasma_verde.png",
        "./estimulo/fantasma_azul.png"
    ]
    pantalla_inicio = "./estimulo/pantini_pacman.png"
    run_training(win, proporcion, 10, 6, StimDur, ISI, res, gris, negro,
                 blanco, stimuli, pantalla_inicio)
    run_experiment(dataFile, win, proporcion, pruebas, Nsess, StimDur, ISI,
                   q_marcas, ponermarcas, res, gris, negro, blanco, stimuli,
                   pantalla_inicio, cond)

    cond = angry
    stimuli = [
        birdImage, "./estimulo/cerdo_naranja.png",
        "./estimulo/cerdo_rosado.png", "./estimulo/cerdo_verde.png",
        "./estimulo/cerdo_azul.png"
    ]
    pantalla_inicio = "./estimulo/pantini_angry.png"
    run_training(win, proporcion, 10, 6, StimDur, ISI, res, gris, negro,
                 blanco, stimuli, pantalla_inicio)
    run_experiment(dataFile, win, proporcion, pruebas, Nsess, StimDur, ISI,
                   q_marcas, ponermarcas, res, gris, negro, blanco, stimuli,
                   pantalla_inicio, cond)
Esempio n. 21
0
def process_mentions():
    """retrieves all mentions and generates captions for those who are fighting
        fit"""
    if not cfg('twitter:user_requests:bool'):
        return

    params = dict(count=200)
    sources_whitelist = cfg('twitter:sources_whitelist:list')
    mention_prefix = '@%s ' % twitter.me.screen_name.lower()

    try:
        with open('state_mentions_timeline.txt') as fp:
            since_id = int(fp.read())
        utils.logging.info('State: since_id=%d', since_id)
        params['since_id'] = since_id
    except Exception as exc:
        utils.logging.warning("There's no last id saved, so I will save the "
                              'last id I see and then quit.')
        since_id = None

    filtered_statuses = []
    statuses = [
        status for page in tweepy.Cursor(twitter.api.mentions_timeline, **
                                         params).pages() for status in page
    ]
    # they are in reverse chronological order, so put them straight
    statuses = statuses[::-1]
    if not since_id:
        since_id = statuses[-1].id
        with open('state_mentions_timeline.txt', 'wt') as fp:
            fp.write(str(since_id))
        utils.logging.info('New since_id=%d. Goodbye!', since_id)
        return

    for status in statuses:
        # ignore mentions that are not directed at me
        if not status.text.lower().startswith(mention_prefix):
            continue

        # ignore retweets
        if hasattr(status, 'retweeted_status'):
            continue

        # if the sources whitelist is enabled, ignore those who aren't on it
        if (sources_whitelist and status.source not in sources_whitelist):
            continue

        # store this status
        filtered_statuses.append(status)

    if filtered_statuses:
        utils.logging.info('Retrieved %d new mentions (from %d to %d).',
                           len(filtered_statuses), filtered_statuses[0].id,
                           filtered_statuses[-1].id)
        with open('state_mentions_timeline.txt', 'wt') as fp:
            fp.write(str(filtered_statuses[-1].id))

        Akari.warmup()

        parallel = Parallel(process_request, filtered_statuses,
                            cfg('twitter:process_threads:int') or 3)
        parallel.start()
    else:
        utils.logging.info('Retrieved no new mentions.')
Esempio n. 22
0
class SamplesManager(object):
    
    def __init__(self,
                 catalog,
                 cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"],
                 dbs_instance="prod/phys03",
                 queue=None, maxThreads=200,force=False,doContinue=False
                 ):
        """
        Constructur:
        @catalog: json file used to read/write dataset information
        @cross_sections: json file where samples cross sections are stored
        @dbs_instance: DBS instance tp use
        """
        self.cross_sections_ = {}
        self.dbs_instance_ = dbs_instance

        for xsecFile in cross_sections:
            fname = shell_expand(xsecFile)
            self.cross_sections_.update( json.loads( open(fname).read() ) )
            
        self.catalog_ = shell_expand(catalog)

        self.parallel_ = None
        self.sem_ = Semaphore()

        print "Will use the following datasets catalog:"
        print self.catalog_
        
        self.queue_ = queue
        self.maxThreads_ = maxThreads
        self.force_ = force
        self.continue_ = doContinue

    def importFromDAS(self,list_datasets):
        """
        Import datasets from DAS to the catalog.
        @datasets: wildecard to be usd in dataset query
        """
        catalog = self.readCatalog()
        
        print "Importing from das %s" % list_datasets
        datasets = []
        for dataset in list_datasets:
            if "*" in dataset:
                response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % dataset, 0, 0, False, self.dbs_instance_)
        
                for d in response["data"]:
                    datasets.append( d["dataset"][0]["name"] )
            else:
                datasets.append(dataset)

        print "Datasets to import"
        print "\n".join(datasets)
        for dsetName in datasets:
            print "Importing %s" % dsetName
            files = self.getFilesFomDAS(dsetName)
            self.addToDataset(catalog,dsetName,files)
            ## if dsetName in catalog:
            ##     if self.force_:
            ##         catalog[ dsetName ]["files"]  = files
            ##     else:
            ##         self.mergeDataset(catalog[ dsetName ],{ "files" : files })
            ## else:
            ##     catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
    

    def getFilesFomDAS(self,dsetName):
        """
        Read dataset files from DAS.
        @dsetName: dataset name
        """
        response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_)
        
        files=[]
        for d in response["data"]:
            for jf in d["file"]:
                if "nevents" in jf:
                    files.append({ "name" : jf["name"], "nevents" : jf["nevents"] })
                    break
                ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } )

        return files

    def importFromEOS(self,folders):
        """
        Import datasets from DAS to the catalog.
        @datasets: dataset to be imported
        """
        catalog = self.readCatalog()
        
        auto=False
        assumeOk=False
        for folder in folders:
            dsetName = ""

            print
            print "importing folder\n %s" % folder
            
            while not len(dsetName.split("/")) == 4:
                if auto:
                    splitFolder = folder.split("/")
                    prim, sec = splitFolder[-4:-2]
                    dsetName = "/%s/%s/USER" % (prim,sec)
                    print "guessed dataset name ", dsetName
                    if not assumeOk:
                        resp=ask_user("ok?",["y","n","a"])
                        if resp == "n":
                            dsetName = ""
                            auto=False
                        elif resp=="a":
                            assumeOk=True
                if not auto:
                    print "enter dataset name (auto/noauto to enables/disables automatic guessing) ",
                    dsetName = raw_input()
                    if(dsetName=="auto"):
                        auto=True
                    elif (dsetName=="noauto"):
                        auto=False
                
                
            print "Importing %s as %s" % (folder,dsetName)
            files = self.getFilesFomEOS(folder)            
            self.addToDataset(catalog,dsetName,files)
            ## if dsetName in catalog:
            ##     catalog[ dsetName ]["files"]  = files
            ## else:
            ##     catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
        
    def getFilesFomEOS(self,dsetName):
        """
        Read dataset files crawling EOS.
        @dsetName: dataset name
        Note: not implemented
        """
        
        if not self.parallel_:
            self.parallel_ = Parallel(200,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True)
        
        ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2]
        ## print out
        files = []
        for line in out.split("\n"):
            if line.endswith(".root"):
                files.append( {"name":line.replace("/eos/cms",""), "nevents":0} )

        return files

    def findDuplicates(self,dsetName):
        """
        Find duplicate job outputs in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        pass
    
    def invalidateBadFiles(self,dsetName):
        """
        Invalidate duplicate job output and corrupted files in DAS.
        @dsetName: dataset name
        Note: not implemented
        """
        pass

    def checkAllDatasets(self,match=None,light=False):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True,lsfJobName=".fgg/job")
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        self.outcomes = []
        for dataset in catalog.keys():  
            if match and not fnmatch(dataset,match): continue
            self.checkDatasetFiles(dataset,catalog,light=light)
        # write catalog to avoid redoing duplicates removal
        self.writeCatalog(catalog)
                
        if self.queue_:
            self.parallel_.wait(printOutput=True,handler=self)
            outcomes = self.outcomes
        else:
            outcomes = self.parallel_.wait(printOutput=False)

        ## for dsetName,ifile,fName,ret,out in outcomes:
        nfailed = 0
        for oc in outcomes:
            ign1, ign2, outcome= oc
            ## for ign1, ign2, outcome in outcomes:
            dsetName,ifile,fName,ret,out = outcome
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                    nfailed += 1
                else:
                    info["bad"] = False
                    extraInfo = json.loads(str(out))
                    if len(extraInfo.keys()) == 0:
                        nfailed += 1
                        info["bad"] = True
                    for key,val in extraInfo.iteritems():
                        info[key] = val

        self.parallel_.stop()

        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"

        if nfailed > 0:
            print 
            print "WARNING: some of the check jobs failed or did not return any output."
            print "         Those (%d) files were marked a bad and won't be usable for analysis." % nfailed
            print "         Re-running the check command may recover the temporary failures."
            print 
        
        if self.queue_:
            print 
            print "Note: log files may have been written in ./.fgg"
            print "      it's up to you to clean up though..."

    
    def checkDatasetFiles(self,dsetName,catalog=None,light=False):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True)
            wait = True

        print 
        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]
        print "Number of files: ", len(files)
        
        if self.force_ or not catalog[dsetName].get("vetted",False):
            toremove = []
            keep_wildcard=None
            for ifil,eifil in enumerate(files):
                if ifil in toremove:
                    continue
                for jfil,ejfil in enumerate(files[ifil+1:]):
                    if ifil+jfil in toremove:
                        continue
                    if eifil["name"] == ejfil["name"]:
                        toremove.append(ifil)
                    else:
                        iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1]
                        jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1]
                        if iid == jid:
                            if not keep_wildcard:
                                print "duplicated file index ", iid
                                print eifil["name"]
                                print ejfil["name"]
                                reply=ask_user("keep both (yes/no/matching)? ",["y","n","m"])
                                if reply == "m":             
                                    while not keep_wildcard:
                                        print "enter wildcard matching expression",
                                        keep_wildcard=raw_input()
                                        if ask_user("keep all files matching '%s'?" % keep_wildcard) == "n":
                                            keep_wildcard=None
                            if keep_wildcard:                            
                                imatch=fnmatch(eifil["name"],keep_wildcard)
                                jmatch=fnmatch(ejfil["name"],keep_wildcard)
                                if imatch != jmatch:
                                    if imatch: toremove.append(ifil+jfil)
                                    else: toremove.append(ifil)                            
                                    continue                       
                                else:
                                    print "duplicated file index ", iid
                                    print eifil["name"]
                                    print ejfil["name"]
                                    reply=ask_user("keep both? ")
                            if reply == "n":
                                if ask_user( "keep %s? " % ejfil["name"] ) == "n":
                                    ## files.pop(ifil+jfil)
                                    toremove.append(ifil+jfil)
                                if ask_user( "keep %s? " % eifil["name"] ) == "n":
                                    toremove.append(ifil)
                                    ## files.pop(ifil)
                                    
            for ifile in sorted(toremove,reverse=True):
                ## print ifile
                files.pop(ifile)
            
        print "After duplicates removal: ", len(files)
        nsub = 0
        catalog[dsetName]["vetted"] = True
        if not light:
            info = catalog[dsetName]["files"] = files
            for ifile,finfo in enumerate(files):            
                name = finfo["name"]
                if self.force_ or not "weights" in finfo:
                    nsub+=1
                    self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile],interactive=(self.queue_!=None))
        if nsub == 0:
            print "No files needed to be checked"
        else:
            print "Submitted %d check jobs" % nsub
            
        if wait:
            self.parallel_.wait(printOutput=False)            
            self.parallel_ = None
            
        if writeCatalog:
            self.writeCatalog(catalog)

    def reviewCatalog(self):
        datasets,catalog = self.getAllDatasets()

        primaries = {}
        keepAll = False
        for d in datasets:
            if not keepAll:
                reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"])
                if reply == "n":
                    catalog.pop(d)
                    continue
                if reply == "a": 
                    keepAll = True
            primary = d.split("/")[1]
            if not primary in primaries:
                primaries[ primary ] = []
                
            primaries[ primary ].append(d)
            
        for name,val in primaries.iteritems():
            if len(val) == 1: continue
            reply = ask_user("More than one sample for %s:\n %s\nKeep all (yes/no/merge)?" % (name,"\n ".join(val)),["y","n","m"])
            if reply == "m":
                dst = val[0]
                for merge in val[1:]:
                    self.mergeDataset(catalog[dst],catalog[merge])
                    catalog.pop(merge)
            if reply == "n":
                for d in val:
                    reply = ask_user("keep this dataset?\n %s\n" % d)
                    if reply == "n":
                        catalog.pop(d)
           
        self.writeCatalog(catalog)
        
    def mergeDataset(self,dst,merge):
        dst["vetted"]=False
        dstFiles=dst["files"]
        mergeFiles=merge["files"]
        for fil in mergeFiles:
            skip = False
            for dfil in dstFiles:
                if dfil["name"] == fil["name"]:
                    skip = True
            if not skip:
                dstFiles.append( fil )
        
    def addToDataset(self,catalog,dsetName,files):
        if dsetName in catalog:
            if self.force_:
                catalog[ dsetName ]["files"]  = files
            else:
                self.mergeDataset(catalog[ dsetName ],{ "files" : files })
        else:
            catalog[ dsetName ] = { "files" : files }


    def checkFile(self,fileName,dsetName,ifile):
        """
        Check if file is valid.
        @fileName: file name
        """
        fName = fileName
        tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile)
        if self.continue_:
            if os.path.exists(tmp):
                print "%s already exists" % tmp
                outcome = self.readJobOutput(tmp,0,"",dsetName,fileName,ifile)
                if self.queue_:
                    self.outcomes.append((None,None,outcome))
                else:
                    return outcome
            return None
        if self.queue_:
            self.parallel_.run("fggCheckFile.py",[fName,tmp,dsetName,str(ifile),"2>/dev/null"],interactive=False)
        else:
            ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,dsetName,str(ifile),"2>/dev/null"],interactive=True)[2]
            return self.readJobOutput(tmp,ret,out,dsetName,fileName,ifile)

        ### try:
        ###     fout = open(tmp)
        ###     out = fout.read()
        ###     fout.close()
        ### except IOError, e:
        ###     print ret, out 
        ###     print e
        ###     out = "{}"
        ### 
        ### os.remove(tmp)
        ### return dsetName,ifile,fileName,ret,out
        

    def readJobOutput(self,tmp,ret,out,dsetName,fileName,ifile):
        try:
            fout = open(tmp)
            out = fout.read()
            fout.close()
            os.remove(tmp)
        except Exception, e:
            print ret, out 
            print e
            out = "{}"

        return dsetName,int(ifile),fileName,ret,out
Esempio n. 23
0
class ParallelTrigger(object):
    """Parallel port and dummy triggering support.

    .. warning:: When using the parallel port, calling
                 :meth:`expyfun.ExperimentController.start_stimulus`
                 will automatically invoke a stamping of the 1 trigger, which
                 will in turn cause a delay equal to that of
                 ``trigger_duration``.
                 This can effect e.g. :class:`EyelinkController` timing.

    Parameters
    ----------
    mode : str
        'parallel' for real use. 'dummy', passes all calls.
    address : str | int | None
        The address to use. On Linux this should be a string path like
        ``'/dev/parport0'`` (equivalent to None), on Windows it should be an
        integer address like ``888`` or ``0x378`` (equivalent to None).
        The config variable ``TRIGGER_ADDRESS`` can be used to set this
        permanently.
    trigger_duration : float
        Amount of time (seconds) to leave the trigger high whenever
        sending a trigger.
    ec : instance of ExperimentController
        The ExperimentController.
    verbose : bool, str, int, or None
        If not None, override default verbose level.

    Notes
    -----
    Parallel port activation is enabled by using the ``trigger_controller``
    argument of :class:`expyfun.ExperimentController`.
    """

    @verbose_dec
    def __init__(self, mode='dummy', address=None, trigger_duration=0.01,
                 ec=None, verbose=None):
        self.ec = ec
        if mode == 'parallel':
            if sys.platform.startswith('linux'):
                address = '/dev/parport0' if address is None else address
                if not isinstance(address, string_types):
                    raise ValueError('addrss must be a string or None, got %s '
                                     'of type %s' % (address, type(address)))
                from parallel import Parallel
                logger.info('Expyfun: Using address %s' % (address,))
                self._port = Parallel(address)
                self._portname = address
                self._set_data = self._port.setData
            elif sys.platform.startswith('win'):
                from ctypes import windll
                if not hasattr(windll, 'inpout32'):
                    raise SystemError(
                        'Must have inpout32 installed, see:\n\n'
                        'http://www.highrez.co.uk/downloads/inpout32/')

                base = '0x378' if address is None else address
                logger.info('Expyfun: Using base address %s' % (base,))
                if isinstance(base, string_types):
                    base = int(base, 16)
                if not isinstance(base, int):
                    raise ValueError('address must be int or None, got %s of '
                                     'type %s' % (base, type(base)))
                self._port = windll.inpout32
                mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7)
                # Use ECP to put the port into byte mode
                val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5))
                self._port.Out32(base + 0x402, val)

                # Now to make sure the port is in output mode we need to make
                # sure that bit 5 of the control register is not set
                val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5))
                self._port.Out32(base + 2, val)
                self._set_data = lambda data: self._port.Out32(base, data)
                self._portname = str(base)
            else:
                raise NotImplementedError('Parallel port triggering only '
                                          'supported on Linux and Windows')
        else:  # mode == 'dummy':
            self._port = self._portname = None
            self._trigger_list = list()
            self._set_data = lambda x: (self._trigger_list.append(x)
                                        if x != 0 else None)
        self.trigger_duration = trigger_duration
        self.mode = mode

    def __repr__(self):
        return '<ParallelTrigger : %s (%s)>' % (self.mode, self._portname)

    def _stamp_trigger(self, trig):
        """Fake stamping."""
        self._set_data(int(trig))
        self.ec.wait_secs(self.trigger_duration)
        self._set_data(0)

    def stamp_triggers(self, triggers, delay=None, wait_for_last=True,
                       is_trial_id=False):
        """Stamp a list of triggers with a given inter-trigger delay.

        Parameters
        ----------
        triggers : list
            No input checking is done, so ensure triggers is a list,
            with each entry an integer with fewer than 8 bits (max 255).
        delay : float | None
            The inter-trigger-onset delay (includes "on" time).
            If None, will use twice the trigger duration (50% duty cycle).
        wait_for_last : bool
            If True, wait for last trigger to be stamped before returning.
        is_trial_id : bool
            No effect for this trigger controller.
        """
        if delay is None:
            delay = 2 * self.trigger_duration
        for ti, trig in enumerate(triggers):
            self._stamp_trigger(trig)
            if ti < len(triggers) - 1 or wait_for_last:
                self.ec.wait_secs(delay - self.trigger_duration)

    def close(self):
        """Release hardware interfaces."""
        if hasattr(self, '_port'):
            del self._port

    def __del__(self):
        return self.close()
Esempio n. 24
0
    def checkDatasetFiles(self,dsetName,catalog=None):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_)
            wait = True

        print 
        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]
        print "Number of files: ", len(files)
        
        toremove = []
        for ifil,eifil in enumerate(files):
            if ifil in toremove:
                continue
            for jfil,ejfil in enumerate(files[ifil+1:]):
                if ifil+jfil in toremove:
                    continue
                if eifil["name"] == ejfil["name"]:
                    toremove.append(ifil)
                else:
                    iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1]
                    jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1]
                    if iid == jid:
                        print "duplicated file index ", iid
                        print eifil["name"]
                        print ejfil["name"]
                        reply=ask_user("keep both? ")
                        if reply == "n":
                            if ask_user( "keep %s? " % ejfil["name"] ) == "n":
                                ## files.pop(ifil+jfil)
                                toremove.append(ifil+jfil)
                            if ask_user( "keep %s? " % eifil["name"] ) == "n":
                                toremove.append(ifil)
                                ## files.pop(ifil)
                                
        for ifile in sorted(toremove,reverse=True):
            ## print ifile
            files.pop(ifile)
            
        print "After duplicates removal: ", len(files)
        info = catalog[dsetName]["files"] = files
        for ifile,finfo in enumerate(files):            
            name = finfo["name"]
            self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile])

        if wait:
            self.parallel_.wait(printOutput=False)            
            self.parallel_ = None
            
        if writeCatalog:
            self.writeCatalog(catalog)
Esempio n. 25
0
#!/usr/bin/env python
from sys import argv
from parallel import Parallel
p = Parallel()

args = argv[1:]
if args:
    if 'on' in args:
        p.setData(0)
    if 'off' in args:
        p.setData(255)
Esempio n. 26
0
class SamplesManager(object):
    
    def __init__(self,
                 catalog,
                 cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"],
                 dbs_instance="prod/phys03",
                 queue=None
                 ):
        """
        Constructur:
        @catalog: json file used to read/write dataset information
        @cross_sections: json file where samples cross sections are stored
        @dbs_instance: DBS instance tp use
        """
        self.cross_sections_ = {}
        self.dbs_instance_ = dbs_instance

        for xsecFile in cross_sections:
            fname = shell_expand(xsecFile)
            self.cross_sections_.update( json.loads( open(fname).read() ) )
            
        self.catalog_ = shell_expand(catalog)

        self.parallel_ = None
        self.sem_ = Semaphore()

        print "Will use the following datasets catalog:"
        print self.catalog_
        
        self.queue_ = queue
        
    def importFromDAS(self,list_datasets):
        """
        Import datasets from DAS to the catalog.
        @datasets: wildecard to be usd in dataset query
        """
        catalog = self.readCatalog()
        
        print "Importing from das %s" % list_datasets
        datasets = []
        for dataset in list_datasets:
            if "*" in dataset:
                response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % dataset, 0, 0, False, self.dbs_instance_)
        
                for d in response["data"]:
                    datasets.append( d["dataset"][0]["name"] )
            else:
                datasets.append(dataset)

        print "Datasets to import"
        print "\n".join(datasets)
        for dsetName in datasets:
            print "Importing %s" % dsetName
            files = self.getFilesFomDAS(dsetName)
            if dsetName in catalog:
                catalog[ dsetName ]["files"]  = files
            else:
                catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"

    def getFilesFomDAS(self,dsetName):
        """
        Read dataset files from DAS.
        @dsetName: dataset name
        """
        response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_)
        
        files=[]
        for d in response["data"]:
            for jf in d["file"]:
                if "nevents" in jf:
                    files.append({ "name" : jf["name"], "nevents" : jf["nevents"] })
                    break
                ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } )

        return files

    def importFromEOS(self,folders):
        """
        Import datasets from DAS to the catalog.
        @datasets: dataset to be imported
        """
        catalog = self.readCatalog()
        
        auto=False
        assumeOk=False
        for folder in folders:
            dsetName = ""

            print
            print "importing folder\n %s" % folder
            
            while not len(dsetName.split("/")) == 4:
                if auto:
                    splitFolder = folder.split("/")
                    prim, sec = splitFolder[-4:-2]
                    dsetName = "/%s/%s/USER" % (prim,sec)
                    print "guessed dataset name ", dsetName
                    if not assumeOk:
                        resp=ask_user("ok?",["y","n","a"])
                        if resp == "n":
                            dsetName = ""
                            auto=False
                        elif resp=="a":
                            assumeOk=True
                if not auto:
                    print "enter dataset name (auto/noauto to enables/disables automatic guessing) ",
                    dsetName = raw_input()
                    if(dsetName=="auto"):
                        auto=True
                    elif (dsetName=="noauto"):
                        auto=False
                
                
            print "Importing %s as %s" % (folder,dsetName)
            files = self.getFilesFomEOS(folder)            
            if dsetName in catalog:
                catalog[ dsetName ]["files"]  = files
            else:
                catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
        
    def getFilesFomEOS(self,dsetName):
        """
        Read dataset files crawling EOS.
        @dsetName: dataset name
        Note: not implemented
        """
        
        if not self.parallel_:
            self.parallel_ = Parallel(200,self.queue_)
        
        ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2]
        ## print out
        files = []
        for line in out.split("\n"):
            if line.endswith(".root"):
                files.append( {"name":line.replace("/eos/cms",""), "nevents":0} )

        return files

    def findDuplicates(self,dsetName):
        """
        Find duplicate job outputs in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        pass
    
    def invalidateBadFiles(self,dsetName):
        """
        Invalidate duplicate job output and corrupted files in DAS.
        @dsetName: dataset name
        Note: not implemented
        """
        pass

    def checkAllDatasets(self):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_)
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        for dataset in catalog.keys():            
            self.checkDatasetFiles(dataset,catalog)
        
        outcomes = self.parallel_.wait(printOutput=False)

        ## for dsetName,ifile,fName,ret,out in outcomes:
        for ign1, ign2, outcome in outcomes:
            dsetName,ifile,fName,ret,out = outcome
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                else:
                    extraInfo = json.loads(str(out))
                    for key,val in extraInfo.iteritems():
                        info[key] = val

        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
    
    def checkDatasetFiles(self,dsetName,catalog=None):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_)
            wait = True

        print 
        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]
        print "Number of files: ", len(files)
        
        toremove = []
        for ifil,eifil in enumerate(files):
            if ifil in toremove:
                continue
            for jfil,ejfil in enumerate(files[ifil+1:]):
                if ifil+jfil in toremove:
                    continue
                if eifil["name"] == ejfil["name"]:
                    toremove.append(ifil)
                else:
                    iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1]
                    jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1]
                    if iid == jid:
                        print "duplicated file index ", iid
                        print eifil["name"]
                        print ejfil["name"]
                        reply=ask_user("keep both? ")
                        if reply == "n":
                            if ask_user( "keep %s? " % ejfil["name"] ) == "n":
                                ## files.pop(ifil+jfil)
                                toremove.append(ifil+jfil)
                            if ask_user( "keep %s? " % eifil["name"] ) == "n":
                                toremove.append(ifil)
                                ## files.pop(ifil)
                                
        for ifile in sorted(toremove,reverse=True):
            ## print ifile
            files.pop(ifile)
            
        print "After duplicates removal: ", len(files)
        info = catalog[dsetName]["files"] = files
        for ifile,finfo in enumerate(files):            
            name = finfo["name"]
            self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile])

        if wait:
            self.parallel_.wait(printOutput=False)            
            self.parallel_ = None
            
        if writeCatalog:
            self.writeCatalog(catalog)

    def reviewCatalog(self):
        datasets,catalog = self.getAllDatasets()

        primaries = {}
        keepAll = False
        for d in datasets:
            if not keepAll:
                reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"])
                if reply == "n":
                    catalog.pop(d)
                    continue
                if reply == "a": 
                    keepAll = True
            primary = d.split("/")[1]
            if not primary in primaries:
                primaries[ primary ] = []
                
            primaries[ primary ].append(d)
            
        for name,val in primaries.iteritems():
            if len(val) == 1: continue
            reply = ask_user("More than one sample for %s:\n %s\nKeep all?" % (name,"\n ".join(val)),["y","n","m"])
            if reply == "m":
                dst = val[0]
                for merge in val[1:]:
                    self.mergeDataset(catalog[dst],catalog[merge])
                    catalog.pop(merge)
            if reply == "n":
                for d in val:
                    reply = ask_user("keep this dataset?\n %s\n" % d)
                    if reply == "n":
                        catalog.pop(d)
           
        self.writeCatalog(catalog)
        
    def mergeDataset(self,dst,merge):
        dstFiles=dst["files"]
        mergeFiles=merge["files"]
        for fil in mergeFiles:
            skip = False
            for dfil in dstFiles:
                if dfil["name"] == fil["name"]:
                    skip = True
            if not skip:
                dstFiles.append( fil )
        
    def checkFile(self,fileName,dsetName,ifile):
        """
        Check if file is valid.
        @fileName: file name
        """
        ## fName = "root://eoscms//eos/cms%s" % fileName
        fName = fileName
        tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile)
        ## print "fggCheckFile.py",[fName,tmp,"2>/dev/null"]
        ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,"2>/dev/null"],interactive=True)[2]
        
        try:
            fout = open(tmp)
            out = fout.read()
            fout.close()
        except IOError, e:
            print ret, out 
            print e
            out = "{}"

        os.remove(tmp)
        return dsetName,ifile,fileName,ret,out
Esempio n. 27
0
    print CommandSequence([Loop('x', 1, 10, 0.5, Comment("Hello"))])

    print CommandSequence(
        [Loop('x', 1, 10, 0.5, Comment("Hello"), completion=True)])

    print CommandSequence([
        Loop('x',
             1,
             10,
             0.5, [Comment("Hello"), Comment("World")],
             completion=True,
             timeout=10)
    ])

    print CommandSequence([
        Loop('x', 2, 20, 5, [
            Loop('y',
                 1,
                 10,
                 0.5, [Comment("Hello"), Comment("World")],
                 completion=True,
                 timeout=10)
        ])
    ])

    print CommandSequence([
        Parallel(Loop('x', 1, 10, 0.5, Comment("Hello")),
                 Loop('y', 1, 10, 0.5, Comment("There")))
    ])
Esempio n. 28
0
class JobsManager(object):
    
    def __init__(self,
                 defaults={}
                 ):       
        """
        Constructur:
        @defaults: default options
        """

        # Command line options
        parser = OptionParser(option_list=[
                make_option("--processes", action="callback", callback=Load(), type="string", dest="processes",
                            default={}, help="List of datasets to be analyzed"),
                make_option("--load",  # special option to load whole configuaration from JSON
                            action="callback",callback=Load(),dest="__opts__",
                            type="string",
                            help="load JSON file with configuration",metavar="CONFIG.json"
                            ),
                make_option("-n","--njobs",dest="njobs",type="int",default=0,
                            help="number of jobs to run"),
                make_option("-q","--queue",dest="queue",type="string",default=None,
                            help="LSF queue to use. default: %default"),
                make_option("--sync-lsf",dest="asyncLsf",action="store_false",default=True,
                            help="Run LSF jobs in sync mode (with -K). This will spawn one thread per job. Use only if you know what you are doing."
                            " default: False"),
                make_option("--use-tarball",dest="use_tarball",action="store_true",default=True,
                            help="Make a sandbox tarball for the task default: %default"),
                make_option("--no-use-tarball",dest="useTarball",action="store_false",default=True,
                            help="Do not make a sandbox tarball for the task."),
                make_option("--stage-to",dest="stageTo",action="store",default=None,type="string",
                            help="Stage output to folder. default: %default"),                
                make_option("--stage-cmd",dest="stageCmd",action="store",default="guess",type="string",
                            help="Stage out command. (use 'guess' to have the script guessing the command from the output folder) default : %default"),                
                make_option("--summary",dest="summary",action="store_true",default=False,
                            help="Print jobs summary and exit"),
                make_option("-o","--output",dest="output",type="string",
                            default="output.root", help="output file name. default: %default"),
                make_option("-d","--outputDir",dest="outputDir",type="string",
                            default=None, help="output folder. default: %default"),
                make_option("-x","--jobEx",dest="jobExe",type="string",
                            default=None, help="job executable. default: %default"),
                make_option("-c","--cmdLine",dest="cmdLine",type="string",
                            default=None, help="job command line. The script arguments will be prepended. default: %default"),
                make_option("--dumpCfg",
                            action="store_true",
                            default=False,
                            help="dump configuaration and exit. default: %default"),
                make_option("-v","--verbose",
                            action="store_true", dest="verbose",
                            default=False,
                            help="default: %default"),
                make_option("-m","--max-resubmissions",dest="maxResub", type="int",default=3),
                make_option("-N","--ncpu",dest="ncpu", type="int",default=cpu_count()),
                make_option("-H","--hadd",dest="hadd",default=False, action="store_true",
                            help="hadd output files when all jobs are finished."
                            ),
                make_option("-D","--hadd-dateset",dest="hadd_dataset",default=False, action="store_true",
                            help="hadd output per dataset when all jobs are finished."
                            ),
                make_option("-P","--hadd-process",dest="hadd_process",default=False, action="store_true",
                            help="hadd output per process when all jobs are finished."
                            ),
                make_option("--dry-run",dest="dry_run",default=False, action="store_true",
                            help="do not actually run the jobs."
                            ),
                make_option("-C","--cont",dest="cont",default=False, action="store_true",
                            help="continue interrupted task."
                            ),
                make_option("-b","--batch-system",dest="batchSystem",type="string",
                            default="auto",help="Batch system name. Currently supported: sge lsf, default: %default"
                            ),
                ]
                              )
        
        # parse the command line
        (self.options, self.args) = parser.parse_args()
        self.maxResub = self.options.maxResub

        if self.options.cmdLine:
            self.args = self.args+shell_args(str(self.options.cmdLine))
        
        if self.options.jobExe:
            self.options.jobExe = shell_expand(self.options.jobExe)
            if not self.args[0] == self.options.jobExe:
                self.args = [self.options.jobExe]+self.args
            
        self.uniqueNames = {}


    # -------------------------------------------------------------------------------------------------------------------
    def __call__(self):
        """
        __call__
        Run all jobs.
        """
        if self.options.summary:
            self.options.dry_run = True
            self.options.cont = True
            
        self.jobFactory = TarballJobFactory(self.options.stageTo,self.options.stageCmd,job_outdir=self.options.outputDir,
                                            batchSystem=self.options.batchSystem)
        self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,
                                 asyncLsf=self.options.asyncLsf,jobDriver=self.jobFactory,batchSystem=self.options.batchSystem)
        
        self.jobs = None
        if self.options.cont:
            if self.options.asyncLsf:
                self.loadLsfMon()
        else:
            self.firstRun()
            
        self.monitor()
        self.parallel.stop()

    # -------------------------------------------------------------------------------------------------------------------
    def loadLsfMon(self):
        
        with open("%s/task_config.json" % (self.options.outputDir), "r" ) as cfin:
            task_config = json.loads(cfin.read())
        jobs = task_config["jobs"]
        
        if self.options.useTarball:
            if not "tarball" in task_config:
                print 
                print "You asked to run the jobs using a sandbox tarball, but the tarball name was not found in the task configuration"
                print "    If you specified the --use-tarball now but not in the original submission, please remove it."
                print "    Otherwise the task configuration may have been corrupted."
                print 
                sys.exit(-1)
            self.jobFactory.setTarball(task_config["tarball"])
            if not self.options.stageTo:
                self.jobFactory.stageDest( os.path.abspath(self.options.outputDir) )

        self.parallel.setJobId(task_config.get("last_job_id",1))
        for job in jobs:
            cmd, args, outfile, nsub, ret, batchId = job
            if type(batchId) == tuple or type(batchId) == list:
                jobName,batchId = batchId
            else:
                jobName=None
            if ret != 0 and nsub <= self.options.maxResub:
                self.parallel.addJob(cmd,args,batchId,jobName)
            

    # -------------------------------------------------------------------------------------------------------------------
    def firstRun(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        task_config = {}

        outputPfx = options.output.replace(".root","")
        
        if not options.outputDir:
            sys.exit("\nPlease specify an output folder using the -d option\n")

        if options.dumpCfg:
            print ( dumpCfg(options) )
            sys.exit(0)
            
        if not os.path.exists(options.outputDir):
            os.mkdir(options.outputDir)
        outputPfx = "%s/%s" % ( options.outputDir, outputPfx )
        

        args.append("processIdMap=%s/config.json" % os.path.abspath(options.outputDir))

        pset = args[0] if not options.jobExe else args[1]
        with open(pset,"r") as pin:
            with open("%s/%s" % ( options.outputDir, os.path.basename(pset) ), "w+" ) as pout:
                pout.write(pin.read())
                pout.close()
                if not options.jobExe: os.chmod( "%s/%s" % ( options.outputDir, os.path.basename(pset)), 0755  )
            pin.close()
        pset = "%s/%s" % ( options.outputDir, os.path.basename(pset) )
        pset = os.path.abspath(pset)
        
        if options.useTarball:
            apset = os.path.abspath(pset)
            self.jobFactory.mkTarball("%s/sandbox.tgz" % os.path.abspath(options.outputDir),
                                      tarball_entries=[apset,"python","lib","bin"],tarball_patterns={"src/*":"data"},
                                      tarball_transform="'s,%s,pset.py,'" % (apset.lstrip("/"))
                                      )
            if not options.queue:
                print "\nWARNING: You specified the --use-tarball option but no batch queue. The tarball was created but the jobs won't actually use it."
                print "           To avoid this printout run with --no-use-tarball or specify a batch queue using the --queue option.\n"
                options.useTarball = False
                
            task_config["tarball"] = self.jobFactory.tarball
            
        if not options.stageTo:
            self.jobFactory.stageDest( os.path.abspath(options.outputDir) )
            options.stageTo = os.path.abspath(options.outputDir)
            print "\nWill stage output to %s using the command '%s'\n" % ( self.jobFactory.stage_dest, self.jobFactory.getStageCmd() )

        if options.jobExe:
            args[1] = pset
        else:
            args[0] = pset

        with open("%s/config.json" % (options.outputDir), "w+" ) as fout:
            fout.write( dumpCfg(options,skip=["dry_run","summary"]) )
        
        # store cmdLine
        options.cmdLine = str(" ".join(args))

        outfiles = []
        doutfiles = {}
        poutfiles = {}
        
        jobs = []

        for name,datasets in options.processes.iteritems():
            poutfiles[name] = ( "%s_%s.root" % ( outputPfx,name), [] )
        
            for dset in datasets:
                job = args[0]
                if self.options.jobExe:
                    pyjob = ""
                else:
                    pyjob = job
                if type(dset) == list:
                    dset,dopts = dset
                else:
                    dopts = {}
                jobargs = copy(args[1:])
                dsetName = dset.lstrip("/").replace("/","_")
                dsetName = self.getUniqueName(dsetName)
                outfile = "%s_%s.root" % ( outputPfx, dsetName )
                doutfiles[dsetName] = ( str(outfile),[] )
                jobargs.extend( ["dataset=%s" % dset, "outputFile=%s" % outfile ] )
                # add (and replace) per-dataset job arguments
                dargs = dopts.get("args",[])
                if type(dargs) != list:
                    print "\nERROR : dataset-specific arguments should be list not %s" % (type(dargs))
                    print "          dataset %s" % dset
                    sys.exit(-1)
                if len(dargs) > 0:
                    replace = {}
                    for arg in dargs:
                        aname,val = arg.split("=")
                        replace[aname] = arg
                    newargs = []
                    anames = []
                    for arg in jobargs:
                        if not "=" in arg: 
                            newargs.append(arg)
                            continue
                        aname,val = arg.split("=")
                        if aname in replace: newargs.append( replace.pop(aname) )
                        else: newargs.append(arg)
                    jobargs = newargs
                    for aname,arg in replace.iteritems(): jobargs.append(arg)
                print "running: %s %s" % ( job, " ".join(jobargs) )
                njobs = dopts.get("njobs",options.njobs) if options.njobs != 0 else 0
                if  njobs != 0:
                    print  "splitting in (up to) %d jobs\n checking how many are needed... " % njobs, 
                    dnjobs = 0
                    dargs = jobargs+shell_args("nJobs=%d" % (njobs)) 
                    ret,out = parallel.run("python %s" % pyjob,dargs+shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName) ),interactive=True)[2]
                    maxJobs = self.getMaxJobs(out)
                    print maxJobs
                    if maxJobs < 0:
                        print "Error getting number of jobs to be submitted"
                        print out
                    hadd = self.getHadd(out,outfile)
                    print " now submitting jobs",
                    for ijob in range(maxJobs):
                        ## FIXME allow specific job selection
                        iargs = jobargs+shell_args("nJobs=%d jobId=%d" % (maxJobs, ijob))
                        dnjobs += 1 
                        batchId = -1
                        if not options.dry_run:
                            ret,out = parallel.run(job,iargs)[-1]
                            if self.options.queue and self.options.asyncLsf:
                                batchId = out[1]
                            print ".",
                        output = hadd.replace(".root","_%d.root" % ijob)
                        outfiles.append( output )
                        doutfiles[dsetName][1].append( outfiles[-1] )
                        poutfiles[name][1].append( outfiles[-1] )
                        jobs.append( (job,iargs,output,0,-1,batchId) )
                    print "\n %d jobs submitted" % dnjobs                
                else:
                    ret,out = parallel.run("python %s" % pyjob,jobargs+shell_args("dryRun=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName)),interactive=True)[2]
                    if ret != 0:
                        print ret,out
                        continue
                    output = self.getHadd(out,outfile)

                    batchId = -1
                    if not options.dry_run:
                        ret,out = parallel.run(job,jobargs)[-1]
                        if self.options.queue and self.options.asyncLsf:
                            batchId = out[1]
                            
                    outfiles.append( output )
                    jobs.append( (job,jobargs,output,0,-1,batchId) )
                    poutfiles[name][1].append( outfiles[-1] )
                print

        task_config["jobs"] =  jobs
        task_config["datasets_output"] =  doutfiles
        task_config["process_output"] =  poutfiles
        task_config["output"] =  outfiles
        task_config["outputPfx"] =  outputPfx
        
        self.storeTaskConfig(task_config)

    # -------------------------------------------------------------------------------------------------------------------
    def storeTaskConfig(self,task_config):
        with open("%s/task_config.json" % (self.options.outputDir), "w+" ) as cfout:
            task_config["last_job_id"] = self.parallel.currJobId()
            cfout.write( json.dumps(task_config,indent=4) )
            cfout.close()
            
    # -------------------------------------------------------------------------------------------------------------------
    def getUniqueName(self,basename):
        if basename in self.uniqueNames:
            self.uniqueNames[basename] += 1
        else:
            self.uniqueNames[basename] = 0
            return basename
        return "%s%d" % (basename,self.uniqueNames[basename])

    # -------------------------------------------------------------------------------------------------------------------
    def monitor(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin:
            task_config = json.loads(cfin.read())
        
        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles  = task_config["output"]
        outputPfx = task_config["outputPfx"]

        self.task_config = task_config
        
        if options.summary:
            self.printSummary()
            return

        if not options.dry_run:
            ## FIXME: job resubmission
            returns = self.wait(parallel,self)
            
        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc,out in poutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles )
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset,out in doutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd,outfile), outfiles) 
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd,outputPfx), outfiles)
            
            self.wait(p)

        self.storeTaskConfig(task_config)
        
        self.parallel.stop()

    # -------------------------------------------------------------------------------------------------------------------
    def wait(self,parallel,handler=None):
        return parallel.wait(handler)

    # -------------------------------------------------------------------------------------------------------------------
    def handleJobOutput(self,job,jobargs,ret):
        print "------------"
        print "Job finished: (exit code %d) '%s' '%s'" % ( ret[0], job, " ".join(jobargs) )
        print "Job output: "
        print

        lines = ret[1].split("\n")
        if self.options.queue and self.options.asyncLsf:
            lines = lines[-10:]
        for line in lines:
            print line
        print
        jobargs = shell_args(" ".join(jobargs))
        job = jobargs[0]
        jobargs = jobargs[1:]
        for ijob in self.task_config["jobs"]:
            inam,iargs = ijob[0:2]
            if inam == job and iargs == jobargs:
                ijob[4] = ret[0]
                if ret[0] != 0:
                    print ""
                    print "Job failed. Number of resubmissions: %d / %d. " % (ijob[3], self.maxResub),
                    if ijob[3] < self.maxResub:
                        print "Resubmitting."
                        ijob[3] += 1
                        if ijob[3] == self.maxResub:
                            iargs.append("lastAttempt=1")                        
                        jobName = ijob[5][0] if self.options.queue else None
                        out = self.parallel.run(inam,iargs,jobName=jobName)
                        if self.options.queue and self.options.asyncLsf:
                            ijob[5] = out[-1][1][1]
                            self.storeTaskConfig(self.task_config)
                        print "------------"
                        return 1
                    else:
                        print "Giving up."
                        
        self.storeTaskConfig(self.task_config)
        print "------------"
        return 0
    
    # -------------------------------------------------------------------------------------------------------------------
    def getHadd(self,stg,fallback):
        for line in stg.split("\n"):
            if line.startswith("hadd:"):
                return line.replace("hadd:","")
        return fallback

    # -------------------------------------------------------------------------------------------------------------------
    def getMaxJobs(self,stg):
        for line in stg.split("\n"):
            if line.startswith("maxJobs:"):
                return int(line.replace("maxJobs:",""))
        return -1
    
    # -------------------------------------------------------------------------------------------------------------------
    def printSummary(self):
        
        jobs = self.task_config["jobs"]
        procs = self.task_config["datasets_output"]
        
        status = {}
        for job in jobs:
            cmd, args, outfile, nsub, ret, batchId = job
            status[outfile] = (nsub,ret)
            
        for proc,out in procs.iteritems():
            outfile,outfiles = out
            finished = []
            missing  = {}
            for jfile in outfiles:
                nsub,ret = status[jfile]
                if ret != 0:
                    if not nsub in missing:
                        missing[nsub] = []
                    missing[nsub].append( jfile )
                else:
                    finished.append(jfile)
            print "----------"
            print "process:           %s " % outfile.replace(".root","")
            print "njobs:             %d " % len(outfiles)
            print "finished:          %d " % len(finished)
            for nsub,lst in missing.iteritems():
                print "submitted %d times: %d"  % (nsub+1, len(lst))
            print 
Esempio n. 29
0
    def run(self,
            genome_files,
            output_dir,
            called_genes=False,
            translation_table=None,
            meta=False,
            closed_ends=False):
        """Call genes with Prodigal.

        Call genes with prodigal and store the results in the
        specified output directory. For convenience, the
        called_gene flag can be used to indicate genes have
        previously been called and simply need to be copied
        to the specified output directory.

        Parameters
        ----------
        genome_files : list of str
            Nucleotide fasta files to call genes on.
        called_genes : boolean
            Flag indicating if genes are already called.
        translation_table : int
            Specifies desired translation table, use None to automatically
            select between tables 4 and 11.
        meta : boolean
            Flag indicating if prodigal should call genes with the metagenomics procedure.
        closed_ends : boolean
            If True, do not allow genes to run off edges (throws -c flag).
        output_dir : str
            Directory to store called genes.

        Returns
        -------
        d[genome_id] -> namedtuple(best_translation_table
                                            coding_density_4
                                            coding_density_11)
            Summary statistics of called genes for each genome.
        """

        self.called_genes = called_genes
        self.translation_table = translation_table
        self.meta = meta
        self.closed_ends = closed_ends
        self.output_dir = output_dir

        make_sure_path_exists(self.output_dir)

        progress_func = None
        if self.verbose:
            file_type = 'genomes'
            self.progress_str = '  Finished processing %d of %d (%.2f%%) genomes.'
            if meta:
                file_type = 'scaffolds'
                if len(genome_files):
                    file_type = ntpath.basename(genome_files[0])

                self.progress_str = '  Finished processing %d of %d (%.2f%%) files.'

            self.logger.info('Identifying genes within %s: ' % file_type)
            progress_func = self._progress

        parallel = Parallel(self.cpus)
        summary_stats = parallel.run(
            self._producer, self._consumer, genome_files, progress_func)

        # An error was encountered during Prodigal processing, clean up.
        if not summary_stats:
            shutil.rmtree(self.output_dir)

        return summary_stats
Esempio n. 30
0
class StepperMotor(object):
    def __init__(self, motor_inputs, state=0, delay=0.05):
        '''
        :param motor_inputs: Ordered list of parallel values to turn motor
        :type motor_inputs: list or tuple
        :param state: Initial starting state of motor position
        :type state: int
        :param delay: Delay between steps (speed)
        :type delay: float
        '''
        self.MOTOR_INPUTS = motor_inputs
        self.state = state
        self.delay = delay
        # Setup parallel interface on first init
        self.parallel_interface = Parallel()

    #Q: Keep as a function or store state to self?
    def stepper_generator(self, state_steps):
        '''
        Returns a generator object which yields the current state and motor input.
        
        :param state_steps: Number of steps to step the motor.
        :type state_steps: int
        :returns: Generator yielding tuples (state_index, motor_input)
        :rtype: (int, hex)
        '''
        if state_steps < 0:
            step = -1
        else:
            step = 1

        for virtual_state in xrange(self.state + 1,
                                    self.state + state_steps + 1, step):
            # NOTE: virtual_state is not used other than for informing the user the
            # overall relative step we've applied!
            self.state += step
            if self.state >= len(self.MOTOR_INPUTS):
                # start at list 0
                self.state = 0
            elif self.state < 0:
                # start at the end
                self.state = len(self.MOTOR_INPUTS) - 1
            else:
                # we're at an index within the current motor inputs list
                pass

            motor_command = self.MOTOR_INPUTS[self.state]

            print "%+ 4d : Moving to internal state index %02d, %s hex %03.2f degrees" % (
                virtual_state, self.state, hex(motor_command),
                state_to_angle(self.state, len(self.MOTOR_INPUTS)))

            # present the required value
            yield motor_command

    def turn_motor(self, cycles):
        '''
        Turns the motor the desired amount.
        
        :param cycles: Loops to turn
        :type cycles: float
        :returns: New state position
        :rtype: int
        '''
        # round to the nearest step possible
        steps = int(round(cycles * len(self.MOTOR_INPUTS)))

        stepper = self.stepper_generator(steps)

        for motor_position in stepper:
            ##print "turn motor to position %s" % hex(motor_position)
            self.parallel_interface.setData(motor_position)
            time.sleep(self.delay)

        return self.state

    def turn_to_angle(self, angle):
        '''
        Turns the motor to the desired absolute angle.
        
        Accessor which converts arguments for turn_motor method.
        
        :param angle: Angle to turn to
        :type angle: float
        :returns: New state position
        :rtype: int
        '''
        cycles = self.angle_to_cycles(angle, self.state)
        return self.turn_motor(cycles)

    def rotate(self, degrees):
        '''
        Turns the motor by the number of degrees. -720 will turn the motor
        two whole cycles anti-clockwise.
        
        Accessor which converts arguments for turn_motor method.
        
        :param degrees: Degrees to turn motor by
        :type degrees: float
        :returns: New state position
        :rtype: int
        '''
        cycles = degrees / 360.0
        return self.turn_motor(cycles)
Esempio n. 31
0
class StepperMotor(object):
    def __init__(self, motor_inputs, state=0, delay=0.05):
        '''
        :param motor_inputs: Ordered list of parallel values to turn motor
        :type motor_inputs: list or tuple
        :param state: Initial starting state of motor position
        :type state: int
        :param delay: Delay between steps (speed)
        :type delay: float
        '''
        self.MOTOR_INPUTS = motor_inputs
        self.state = state
        self.delay = delay
        # Setup parallel interface on first init
        self.parallel_interface = Parallel()
        
    #Q: Keep as a function or store state to self?
    def stepper_generator(self, state_steps):
        '''
        Returns a generator object which yields the current state and motor input.
        
        :param state_steps: Number of steps to step the motor.
        :type state_steps: int
        :returns: Generator yielding tuples (state_index, motor_input)
        :rtype: (int, hex)
        '''
        if state_steps < 0:
            step = -1
        else:
            step = 1
        
        for virtual_state in xrange(self.state+1, self.state+state_steps+1, step):
            # NOTE: virtual_state is not used other than for informing the user the 
            # overall relative step we've applied!  
            self.state += step
            if self.state >= len(self.MOTOR_INPUTS):
                # start at list 0
                self.state = 0
            elif self.state < 0:
                # start at the end
                self.state = len(self.MOTOR_INPUTS) -1
            else:
                # we're at an index within the current motor inputs list
                pass
            
            motor_command = self.MOTOR_INPUTS[self.state]
            
            print "%+ 4d : Moving to internal state index %02d, %s hex %03.2f degrees" % (
                virtual_state, self.state, hex(motor_command),
                state_to_angle(self.state, len(self.MOTOR_INPUTS)))
    
            # present the required value
            yield motor_command
    
    
    def turn_motor(self, cycles):
        '''
        Turns the motor the desired amount.
        
        :param cycles: Loops to turn
        :type cycles: float
        :returns: New state position
        :rtype: int
        '''
        # round to the nearest step possible
        steps = int(round(cycles * len(self.MOTOR_INPUTS)))
        
        stepper = self.stepper_generator(steps)
        
        for motor_position in stepper:
            ##print "turn motor to position %s" % hex(motor_position)
            self.parallel_interface.setData(motor_position)
            time.sleep(self.delay)
    
        return self.state
            
    def turn_to_angle(self, angle):
        '''
        Turns the motor to the desired absolute angle.
        
        Accessor which converts arguments for turn_motor method.
        
        :param angle: Angle to turn to
        :type angle: float
        :returns: New state position
        :rtype: int
        '''
        cycles = self.angle_to_cycles(angle, self.state)
        return self.turn_motor(cycles)
    
    def rotate(self, degrees):
        '''
        Turns the motor by the number of degrees. -720 will turn the motor
        two whole cycles anti-clockwise.
        
        Accessor which converts arguments for turn_motor method.
        
        :param degrees: Degrees to turn motor by
        :type degrees: float
        :returns: New state position
        :rtype: int
        '''
        cycles = degrees / 360.0
        return self.turn_motor(cycles)
Esempio n. 32
0
    def checkAllDatasets(self,match=None,light=False):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True,lsfJobName=".fgg/job")
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        self.outcomes = []
        for dataset in catalog.keys():  
            if match and not fnmatch(dataset,match): continue
            self.checkDatasetFiles(dataset,catalog,light=light)
        # write catalog to avoid redoing duplicates removal
        self.writeCatalog(catalog)
                
        if self.queue_:
            self.parallel_.wait(printOutput=True,handler=self)
            outcomes = self.outcomes
        else:
            outcomes = self.parallel_.wait(printOutput=False)

        ## for dsetName,ifile,fName,ret,out in outcomes:
        nfailed = 0
        for oc in outcomes:
            ign1, ign2, outcome= oc
            ## for ign1, ign2, outcome in outcomes:
            dsetName,ifile,fName,ret,out = outcome
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                    nfailed += 1
                else:
                    info["bad"] = False
                    extraInfo = json.loads(str(out))
                    if len(extraInfo.keys()) == 0:
                        nfailed += 1
                        info["bad"] = True
                    for key,val in extraInfo.iteritems():
                        info[key] = val

        self.parallel_.stop()

        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"

        if nfailed > 0:
            print 
            print "WARNING: some of the check jobs failed or did not return any output."
            print "         Those (%d) files were marked a bad and won't be usable for analysis." % nfailed
            print "         Re-running the check command may recover the temporary failures."
            print 
        
        if self.queue_:
            print 
            print "Note: log files may have been written in ./.fgg"
            print "      it's up to you to clean up though..."
Esempio n. 33
0
class JobsManager(object):
    def __init__(self, defaults={}):
        """
        Constructur:
        @defaults: default options
        """

        # Command line options
        parser = OptionParser(option_list=[
            make_option("--processes",
                        action="callback",
                        callback=Load(),
                        type="string",
                        dest="processes",
                        default={},
                        help="List of datasets to be analyzed"),
            make_option(
                "--load",  # special option to load whole configuaration from JSON
                action="callback",
                callback=Load(),
                dest="__opts__",
                type="string",
                help="load JSON file with configuration",
                metavar="CONFIG.json"),
            make_option("-n",
                        "--njobs",
                        dest="njobs",
                        type="int",
                        default=0,
                        help="number of jobs to run"),
            make_option("-q",
                        "--queue",
                        dest="queue",
                        type="string",
                        default=None,
                        help="LSF queue to use. default: %default"),
            make_option("-o",
                        "--output",
                        dest="output",
                        type="string",
                        default="output.root",
                        help="output file name. default: %default"),
            make_option("-d",
                        "--outputDir",
                        dest="outputDir",
                        type="string",
                        default=None,
                        help="output folder. default: %default"),
            make_option("-x",
                        "--jobEx",
                        dest="jobExe",
                        type="string",
                        default=None,
                        help="job executable. default: %default"),
            make_option(
                "-c",
                "--cmdLine",
                dest="cmdLine",
                type="string",
                default=None,
                help=
                "job command line. The script arguments will be prepended. default: %default"
            ),
            make_option(
                "--dumpCfg",
                action="store_true",
                default=False,
                help="dump configuaration and exit. default: %default"),
            make_option("-v",
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        default=False,
                        help="default: %default"),
            make_option("-m",
                        "--max-resubmissions",
                        dest="maxResub",
                        type="int",
                        default=3),
            make_option(
                "-N", "--ncpu", dest="ncpu", type="int", default=cpu_count()),
            make_option("-H",
                        "--hadd",
                        dest="hadd",
                        default=False,
                        action="store_true",
                        help="hadd output files when all jobs are finished."),
            make_option(
                "-D",
                "--hadd-dateset",
                dest="hadd_dataset",
                default=False,
                action="store_true",
                help="hadd output per dataset when all jobs are finished."),
            make_option(
                "-P",
                "--hadd-process",
                dest="hadd_process",
                default=False,
                action="store_true",
                help="hadd output per process when all jobs are finished."),
            make_option("--dry-run",
                        dest="dry_run",
                        default=False,
                        action="store_true",
                        help="do not actually run the jobs."),
            make_option("-C",
                        "--cont",
                        dest="cont",
                        default=False,
                        action="store_true",
                        help="continue interrupted task."),
        ])

        # parse the command line
        (self.options, self.args) = parser.parse_args()
        self.maxResub = self.options.maxResub

        if self.options.cmdLine:
            self.args = self.args + shell_args(str(self.options.cmdLine))

        if self.options.jobExe:
            self.args = [shell_expand(self.options.jobExe)] + self.args

    def __call__(self):
        """
        __call__
        Run all jobs.
        """
        self.parallel = Parallel(self.options.ncpu,
                                 lsfQueue=self.options.queue,
                                 lsfJobName="%s/runJobs" %
                                 self.options.outputDir,
                                 asyncLsf=False)

        self.jobs = None
        if self.options.cont:
            pass
        else:
            self.firstRun()

        self.monitor()

    def firstRun(self):

        (options, args) = (self.options, self.args)
        parallel = self.parallel

        outputPfx = options.output.replace(".root", "")

        if not options.outputDir:
            sys.exit("Please specify an output folder")

        if options.dumpCfg:
            print(dumpCfg(options))
            sys.exit(0)

        if not os.path.exists(options.outputDir):
            os.mkdir(options.outputDir)
        outputPfx = "%s/%s" % (options.outputDir, outputPfx)

        args.append("processIdMap=%s/config.json" % options.outputDir)
        ## options.cmdLine += " %s" % (" ".join(args))
        options.cmdLine = str(" ".join(args))
        with open("%s/config.json" % (options.outputDir), "w+") as fout:
            fout.write(dumpCfg(options))

        outfiles = []
        doutfiles = {}
        poutfiles = {}

        jobs = []

        for name, datasets in options.processes.iteritems():
            poutfiles[name] = ("%s_%s.root" % (outputPfx, name), [])

            for dset in datasets:
                job = args[0]
                if self.options.jobExe:
                    pyjob = ""
                else:
                    pyjob = job
                jobargs = copy(args[1:])
                dsetName = dset.lstrip("/").replace("/", "_")
                outfile = "%s_%s.root" % (outputPfx, dsetName)
                doutfiles[dset] = (str(outfile), [])
                jobargs.extend(
                    ["dataset=%s" % dset,
                     "outputFile=%s" % outfile])
                print "running: %s %s" % (job, " ".join(jobargs))
                if options.njobs != 0:
                    print "splitting in (up to) %d jobs\n checking how many are needed... " % options.njobs
                    dnjobs = 0
                    dargs = jobargs + shell_args("nJobs=%d" % (options.njobs))
                    ret, out = parallel.run(
                        "python %s" % pyjob,
                        dargs +
                        shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" %
                                   os.path.join(options.outputDir, dsetName)),
                        interactive=True)[2]
                    maxJobs = self.getMaxJobs(out)
                    if maxJobs < 0:
                        print "Error getting numer of jobs to be submitted"
                        print out
                    hadd = self.getHadd(out, outfile)
                    ## for ijob in range(options.njobs):
                    for ijob in range(maxJobs):
                        ## FIXME allow specific job selection
                        ## iargs = dargs+shell_args("jobId=%d" % (ijob))
                        iargs = jobargs + shell_args("nJobs=%d jobId=%d" %
                                                     (maxJobs, ijob))
                        ## # run python <command-line> dryRun=1 to check if the job needs to be run
                        ## ret,out = parallel.run("python %s" % pyjob,iargs+shell_args("dryRun=1"),interactive=True)[2]
                        ## if ret != 0:
                        ##     continue
                        dnjobs += 1
                        if not options.dry_run:
                            ## FIXME:
                            ##   - handle output
                            ##   - store log files
                            parallel.run(job, iargs)
                        ## outfiles.append( outfile.replace(".root","_%d.root" % ijob) )
                        ## output = self.getHadd(out,outfile.replace(".root","_%d.root" % ijob))
                        output = hadd.replace(".root", "_%d.root" % ijob)
                        outfiles.append(output)
                        doutfiles[dset][1].append(outfiles[-1])
                        poutfiles[name][1].append(outfiles[-1])
                        jobs.append((job, iargs, output, 0, -1))
                    print " %d jobs actually submitted" % dnjobs
                else:
                    ret, out = parallel.run(
                        "python %s" % pyjob,
                        jobargs +
                        shell_args("dryRun=1 dumpPython=%s.py" %
                                   os.path.join(options.outputDir, dsetName)),
                        interactive=True)[2]
                    if ret != 0:
                        print ret, out
                        continue
                    if not options.dry_run:
                        parallel.run(job, jobargs)
                    ## outfiles.append( outfile )
                    output = self.getHadd(out, outfile)
                    outfiles.append(output)
                    jobs.append((job, jobargs, output, 0, -1))
                    poutfiles[name][1].append(outfiles[-1])
                print

        task_config = {
            "jobs": jobs,
            "datasets_output": doutfiles,
            "process_output": poutfiles,
            "output": outfiles,
            "outputPfx": outputPfx
        }
        with open("%s/task_config.json" % (options.outputDir), "w+") as cfout:
            cfout.write(json.dumps(task_config, indent=4))
            cfout.close()

    def monitor(self):

        (options, args) = (self.options, self.args)
        parallel = self.parallel

        with open("%s/task_config.json" % (options.outputDir), "r") as cfin:
            task_config = json.loads(cfin.read())

        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles = task_config["output"]
        outputPfx = task_config["outputPfx"]

        if not options.dry_run:
            ## FIXME: job resubmission
            self.jobs = task_config["jobs"]
            returns = self.wait(parallel, self)
            task_config["jobs"] = self.jobs

        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc, out in poutfiles.iteritems():
                    outfile, outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles)
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset, out in doutfiles.iteritems():
                    outfile, outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles)
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd, outputPfx), outfiles)

            self.wait(p)

        with open("%s/task_config.json" % (options.outputDir), "w+") as cfout:
            cfout.write(json.dumps(task_config, indent=4))
            cfout.close()

        self.parallel.stop()

    def wait(self, parallel, handler=None):
        return parallel.wait(handler)

        ### for i in range(parallel.njobs):
        ###     print "Finished jobs: %d. Total jobs: %d" % (i, parallel.njobs)
        ###     job, jobargs, ret = parallel.returned.get()
        ###     print "finished: %s %s" % ( job, " ".join(jobargs) )
        ###     for line in ret[1].split("\n"):
        ###         print line

    def handleJobOutput(self, job, jobargs, ret):
        print "------------"
        print "Job finished: (exit code %d) '%s' '%s'" % (ret[0], job,
                                                          " ".join(jobargs))
        print "Job output: "
        print
        for line in ret[1].split("\n"):
            print line
        print
        jobargs = shell_args(" ".join(jobargs))
        job = jobargs[0]
        jobargs = jobargs[1:]
        for ijob in self.jobs:
            inam, iargs = ijob[0:2]
            ### print inam, job, inam == job
            ### for i,a in enumerate(iargs):
            ###     b = jobargs[i]
            ###     print a, b,  a == b
            if inam == job and iargs == jobargs:
                ijob[4] = ret[0]
                if ret[0] != 0:
                    print ""
                    print "Job failed. Number of resubmissions: %d / %d. " % (
                        ijob[3], self.maxResub),
                    if ijob[3] < self.maxResub:
                        print "Resubmitting."
                        self.parallel.run(inam, iargs)
                        ijob[3] += 1
                        print "------------"
                        return 1
                    else:
                        print "Giving up."
        print "------------"
        return 0

    def getHadd(self, stg, fallback):
        for line in stg.split("\n"):
            if line.startswith("hadd:"):
                return line.replace("hadd:", "")
        return fallback

    def getMaxJobs(self, stg):
        for line in stg.split("\n"):
            if line.startswith("maxJobs:"):
                return int(line.replace("maxJobs:", ""))
        return -1
Esempio n. 34
0
    def checkDatasetFiles(self,dsetName,catalog=None,light=False):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_,maxThreads=self.maxThreads_,asyncLsf=True)
            wait = True

        print 
        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]
        print "Number of files: ", len(files)
        
        if self.force_ or not catalog[dsetName].get("vetted",False):
            toremove = []
            keep_wildcard=None
            for ifil,eifil in enumerate(files):
                if ifil in toremove:
                    continue
                for jfil,ejfil in enumerate(files[ifil+1:]):
                    if ifil+jfil in toremove:
                        continue
                    if eifil["name"] == ejfil["name"]:
                        toremove.append(ifil)
                    else:
                        iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1]
                        jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1]
                        if iid == jid:
                            if not keep_wildcard:
                                print "duplicated file index ", iid
                                print eifil["name"]
                                print ejfil["name"]
                                reply=ask_user("keep both (yes/no/matching)? ",["y","n","m"])
                                if reply == "m":             
                                    while not keep_wildcard:
                                        print "enter wildcard matching expression",
                                        keep_wildcard=raw_input()
                                        if ask_user("keep all files matching '%s'?" % keep_wildcard) == "n":
                                            keep_wildcard=None
                            if keep_wildcard:                            
                                imatch=fnmatch(eifil["name"],keep_wildcard)
                                jmatch=fnmatch(ejfil["name"],keep_wildcard)
                                if imatch != jmatch:
                                    if imatch: toremove.append(ifil+jfil)
                                    else: toremove.append(ifil)                            
                                    continue                       
                                else:
                                    print "duplicated file index ", iid
                                    print eifil["name"]
                                    print ejfil["name"]
                                    reply=ask_user("keep both? ")
                            if reply == "n":
                                if ask_user( "keep %s? " % ejfil["name"] ) == "n":
                                    ## files.pop(ifil+jfil)
                                    toremove.append(ifil+jfil)
                                if ask_user( "keep %s? " % eifil["name"] ) == "n":
                                    toremove.append(ifil)
                                    ## files.pop(ifil)
                                    
            for ifile in sorted(toremove,reverse=True):
                ## print ifile
                files.pop(ifile)
            
        print "After duplicates removal: ", len(files)
        nsub = 0
        catalog[dsetName]["vetted"] = True
        if not light:
            info = catalog[dsetName]["files"] = files
            for ifile,finfo in enumerate(files):            
                name = finfo["name"]
                if self.force_ or not "weights" in finfo:
                    nsub+=1
                    self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile],interactive=(self.queue_!=None))
        if nsub == 0:
            print "No files needed to be checked"
        else:
            print "Submitted %d check jobs" % nsub
            
        if wait:
            self.parallel_.wait(printOutput=False)            
            self.parallel_ = None
            
        if writeCatalog:
            self.writeCatalog(catalog)
Esempio n. 35
0
class JobsManager(object):
    
    def __init__(self,
                 defaults={}
                 ):       
        """
        Constructur:
        @defaults: default options
        """

        # Command line options
        parser = OptionParser(option_list=[
                make_option("--processes", action="callback", callback=Load(), type="string", dest="processes",
                            default={}, help="List of datasets to be analyzed"),
                make_option("--load",  # special option to load whole configuaration from JSON
                            action="callback",callback=Load(),dest="__opts__",
                            type="string",
                            help="load JSON file with configuration",metavar="CONFIG.json"
                            ),
                make_option("-n","--njobs",dest="njobs",type="int",default=0,
                            help="number of jobs to run"),
                make_option("-q","--queue",dest="queue",type="string",default=None,
                            help="LSF queue to use. default: %default"),
                make_option("-o","--output",dest="output",type="string",
                            default="output.root", help="output file name. default: %default"),
                make_option("-d","--outputDir",dest="outputDir",type="string",
                            default=None, help="output folder. default: %default"),
                make_option("-x","--jobEx",dest="jobExe",type="string",
                            default=None, help="job executable. default: %default"),
                make_option("-c","--cmdLine",dest="cmdLine",type="string",
                            default=None, help="job command line. The script arguments will be prepended. default: %default"),
                make_option("--dumpCfg",
                            action="store_true",
                            default=False,
                            help="dump configuaration and exit. default: %default"),
                make_option("-v","--verbose",
                            action="store_true", dest="verbose",
                            default=False,
                            help="default: %default"),
                make_option("-m","--max-resubmissions",dest="maxResub", type="int",default=3),
                make_option("-N","--ncpu",dest="ncpu", type="int",default=cpu_count()),
                make_option("-H","--hadd",dest="hadd",default=False, action="store_true",
                            help="hadd output files when all jobs are finished."
                            ),
                make_option("-D","--hadd-dateset",dest="hadd_dataset",default=False, action="store_true",
                            help="hadd output per dataset when all jobs are finished."
                            ),
                make_option("-P","--hadd-process",dest="hadd_process",default=False, action="store_true",
                            help="hadd output per process when all jobs are finished."
                            ),
                make_option("--dry-run",dest="dry_run",default=False, action="store_true",
                            help="do not actually run the jobs."
                            ),
                make_option("-C","--cont",dest="cont",default=False, action="store_true",
                            help="continue interrupted task."
                            ),
                ]
                              )
        
        # parse the command line
        (self.options, self.args) = parser.parse_args()
        self.maxResub = self.options.maxResub

        if self.options.cmdLine:
            self.args = self.args+shell_args(str(self.options.cmdLine))
        
        if self.options.jobExe:
            self.args = [shell_expand(self.options.jobExe)]+self.args
            
    def __call__(self):
        """
        __call__
        Run all jobs.
        """
        self.parallel = Parallel(self.options.ncpu,lsfQueue=self.options.queue,lsfJobName="%s/runJobs" % self.options.outputDir,asyncLsf=False)
        
        self.jobs = None
        if self.options.cont:
            pass
        else:
            self.firstRun()
            
        self.monitor()
        
    def firstRun(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        outputPfx = options.output.replace(".root","")
        
        if not options.outputDir:
            sys.exit("Please specify an output folder")

        if options.dumpCfg:
            print ( dumpCfg(options) )
            sys.exit(0)

        if not os.path.exists(options.outputDir):
            os.mkdir(options.outputDir)
        outputPfx = "%s/%s" % ( options.outputDir, outputPfx )
        
        args.append("processIdMap=%s/config.json" % options.outputDir)
        ## options.cmdLine += " %s" % (" ".join(args))
        options.cmdLine = str(" ".join(args))
        with open("%s/config.json" % (options.outputDir), "w+" ) as fout:
            fout.write( dumpCfg(options) )
            
        
        
        outfiles = []
        doutfiles = {}
        poutfiles = {}
        
        jobs = []

        for name,datasets in options.processes.iteritems():
            poutfiles[name] = ( "%s_%s.root" % ( outputPfx,name), [] )
        
            for dset in datasets:
                job = args[0]
                if self.options.jobExe:
                    pyjob = ""
                else:
                    pyjob = job
                jobargs = copy(args[1:])
                dsetName = dset.lstrip("/").replace("/","_")
                outfile = "%s_%s.root" % ( outputPfx, dsetName )
                doutfiles[dset] = ( str(outfile),[] )
                jobargs.extend( ["dataset=%s" % dset, "outputFile=%s" % outfile ] )
                print "running: %s %s" % ( job, " ".join(jobargs) )
                if options.njobs != 0:
                    print  "splitting in (up to) %d jobs\n checking how many are needed... " % options.njobs
                    dnjobs = 0
                    dargs = jobargs+shell_args("nJobs=%d" % (options.njobs)) 
                    ret,out = parallel.run("python %s" % pyjob,dargs+shell_args("dryRun=1 getMaxJobs=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName) ),interactive=True)[2]
                    maxJobs = self.getMaxJobs(out)
                    if maxJobs < 0:
                        print "Error getting numer of jobs to be submitted"
                        print out
                    hadd = self.getHadd(out,outfile)
                    ## for ijob in range(options.njobs):
                    for ijob in range(maxJobs):
                        ## FIXME allow specific job selection
                        ## iargs = dargs+shell_args("jobId=%d" % (ijob))
                        iargs = jobargs+shell_args("nJobs=%d jobId=%d" % (maxJobs, ijob))
                        ## # run python <command-line> dryRun=1 to check if the job needs to be run
                        ## ret,out = parallel.run("python %s" % pyjob,iargs+shell_args("dryRun=1"),interactive=True)[2]
                        ## if ret != 0:
                        ##     continue
                        dnjobs += 1 
                        if not options.dry_run:
                            ## FIXME: 
                            ##   - handle output
                            ##   - store log files
                            parallel.run(job,iargs)
                        ## outfiles.append( outfile.replace(".root","_%d.root" % ijob) )
                        ## output = self.getHadd(out,outfile.replace(".root","_%d.root" % ijob))
                        output = hadd.replace(".root","_%d.root" % ijob)
                        outfiles.append( output )
                        doutfiles[dset][1].append( outfiles[-1] )
                        poutfiles[name][1].append( outfiles[-1] )
                        jobs.append( (job,iargs,output,0,-1) )
                    print " %d jobs actually submitted" % dnjobs                
                else:
                    ret,out = parallel.run("python %s" % pyjob,jobargs+shell_args("dryRun=1 dumpPython=%s.py" % os.path.join(options.outputDir,dsetName)),interactive=True)[2]
                    if ret != 0:
                        print ret,out
                        continue
                    if not options.dry_run:
                        parallel.run(job,jobargs)
                    ## outfiles.append( outfile )
                    output = self.getHadd(out,outfile)
                    outfiles.append( output )
                    jobs.append( (job,jobargs,output,0,-1) )
                    poutfiles[name][1].append( outfiles[-1] )
                print

        task_config = {
            "jobs" : jobs,
            "datasets_output" : doutfiles,
            "process_output"  : poutfiles,
            "output"          : outfiles,
            "outputPfx"       : outputPfx
            }
        with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout:
            cfout.write( json.dumps(task_config,indent=4) )
            cfout.close()
            
    def monitor(self):

        (options,args) = (self.options, self.args)
        parallel = self.parallel
        
        with open("%s/task_config.json" % (options.outputDir), "r" ) as cfin:
            task_config = json.loads(cfin.read())
        
        doutfiles = task_config["datasets_output"]
        poutfiles = task_config["process_output"]
        outfiles  = task_config["output"]
        outputPfx = task_config["outputPfx"]
        

        if not options.dry_run:
            ## FIXME: job resubmission
            self.jobs = task_config["jobs"]
            returns = self.wait(parallel,self)
            task_config["jobs"] = self.jobs
            
        if options.hadd:
            print "All jobs finished. Merging output."
            p = Parallel(options.ncpu)
            hadd = "hadd -f "
            if options.hadd_process:
                for proc,out in poutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd, outfile), outfiles )
            if options.hadd_dataset:
                if options.hadd_process:
                    hadd += " -T"
                for dset,out in doutfiles.iteritems():
                    outfile,outfiles = out
                    p.run("%s %s" % (hadd,outfile), outfiles) 
            if not (options.hadd_process or options.hadd_dataset):
                p.run("%s %s.root" % (hadd,outputPfx), outfiles)
            
            self.wait(p)

        with open("%s/task_config.json" % (options.outputDir), "w+" ) as cfout:
            cfout.write( json.dumps(task_config,indent=4) )
            cfout.close()
        
        self.parallel.stop()

    def wait(self,parallel,handler=None):
        return parallel.wait(handler)
    
        ### for i in range(parallel.njobs):
        ###     print "Finished jobs: %d. Total jobs: %d" % (i, parallel.njobs)
        ###     job, jobargs, ret = parallel.returned.get()
        ###     print "finished: %s %s" % ( job, " ".join(jobargs) )
        ###     for line in ret[1].split("\n"):
        ###         print line

    def handleJobOutput(self,job,jobargs,ret):
        print "------------"
        print "Job finished: (exit code %d) '%s' '%s'" % ( ret[0], job, " ".join(jobargs) )
        print "Job output: "
        print 
        for line in ret[1].split("\n"):
            print line
        print
        jobargs = shell_args(" ".join(jobargs))
        job = jobargs[0]
        jobargs = jobargs[1:]
        for ijob in self.jobs:
            inam,iargs = ijob[0:2]
            ### print inam, job, inam == job
            ### for i,a in enumerate(iargs):
            ###     b = jobargs[i]
            ###     print a, b,  a == b
            if inam == job and iargs == jobargs:
                ijob[4] = ret[0]
                if ret[0] != 0:
                    print ""
                    print "Job failed. Number of resubmissions: %d / %d. " % (ijob[3], self.maxResub),
                    if ijob[3] < self.maxResub:
                        print "Resubmitting."
                        self.parallel.run(inam,iargs)
                        ijob[3] += 1
                        print "------------"
                        return 1
                    else:
                        print "Giving up."
        print "------------"
        return 0
    
    def getHadd(self,stg,fallback):
        for line in stg.split("\n"):
            if line.startswith("hadd:"):
                return line.replace("hadd:","")
        return fallback

    def getMaxJobs(self,stg):
        for line in stg.split("\n"):
            if line.startswith("maxJobs:"):
                return int(line.replace("maxJobs:",""))
        return -1
Esempio n. 36
0
class SamplesManager(object):
    
    def __init__(self,
                 catalog,
                 cross_sections=["$CMSSW_BASE/src/flashgg/MetaData/data/cross_sections.json"],
                 dbs_instance="prod/phys03",
                 queue=None
                 ):
        """
        Constructur:
        @catalog: json file used to read/write dataset information
        @cross_sections: json file where samples cross sections are stored
        @dbs_instance: DBS instance tp use
        """
        self.cross_sections_ = {}
        self.dbs_instance_ = dbs_instance

        for xsecFile in cross_sections:
            fname = shell_expand(xsecFile)
            self.cross_sections_.update( json.loads( open(fname).read() ) )
            
        self.catalog_ = shell_expand(catalog)

        self.parallel_ = None
        self.sem_ = Semaphore()

        print "Will use the following datasets catalog:"
        print self.catalog_
        
        self.queue_ = queue
        
    def importFromDAS(self,datasets):
        """
        Import datasets from DAS to the catalog.
        @datasets: wildecard to be usd in dataset query
        """
        catalog = self.readCatalog()
        
        print "Importing from das %s" % datasets
        if "*" in datasets:
            response = das_query("https://cmsweb.cern.ch","dataset dataset=%s | grep dataset.name" % datasets, 0, 0, False, self.dbs_instance_)
        
            datasets=[]
            for d in response["data"]:
                datasets.append( d["dataset"][0]["name"] )
            print "Datasets to import"
            print "\n".join(datasets)
            
        for dsetName in datasets:
            print "Importing %s" % dsetName
            files = self.getFilesFomDAS(dsetName)
            if dsetName in catalog:
                catalog[ dsetName ]["files"]  = files
            else:
                catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"

    def getFilesFomDAS(self,dsetName):
        """
        Read dataset files from DAS.
        @dsetName: dataset name
        """
        response = das_query("https://cmsweb.cern.ch","file dataset=%s | grep file.name,file.nevents" % dsetName, 0, 0, False, self.dbs_instance_)
        
        files=[]
        for d in response["data"]:
            for jf in d["file"]:
                if "nevents" in jf:
                    files.append({ "name" : jf["name"], "nevents" : jf["nevents"] })
                    break
                ## files.append( { "name" : d["file"][0]["name"], "nevents" : d["file"][0]["nevents"] } )

        return files

    def importFromEOS(self,folders):
        """
        Import datasets from DAS to the catalog.
        @datasets: dataset to be imported
        """
        catalog = self.readCatalog()

        for folder in folders:
            dsetName = ""
            while not len(dsetName.split("/")) == 4:
                print "enter dataset name for folder %s" % folder, 
                dsetName = raw_input()
                
            print "Importing %s as %s" % (folder,dsetName)
            files = self.getFilesFomEOS(folder)            
            if dsetName in catalog:
                catalog[ dsetName ]["files"]  = files
            else:
                catalog[ dsetName ] = { "files" : files }
            
        print "Writing catalog"
        self.writeCatalog(catalog)
        print "Done"
        
    def getFilesFomEOS(self,dsetName):
        """
        Read dataset files crawling EOS.
        @dsetName: dataset name
        Note: not implemented
        """
        
        if not self.parallel_:
            self.parallel_ = Parallel(200,self.queue_)
        
        ret,out = self.parallel_.run("/afs/cern.ch/project/eos/installation/0.3.15/bin/eos.select",["find",dsetName],interactive=True)[2]
        print out
        files = []
        for line in out.split("\n"):
            if line.endswith(".root"):
                files.append( {"name":line.replace("/eos/cms",""), "nevents":0} )

        return files

    def findDuplicates(self,dsetName):
        """
        Find duplicate job outputs in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        pass
    
    def invalidateBadFiles(self,dsetName):
        """
        Invalidate duplicate job output and corrupted files in DAS.
        @dsetName: dataset name
        Note: not implemented
        """
        pass

    def checkAllDatasets(self):
        """
        Look for corrupted files in the whole catalog.
        """
        catalog = self.readCatalog()
        
        self.parallel_ = Parallel(50,self.queue_)
        ## self.parallel_ = Parallel(1,self.queue_)

        print "Checking all datasets"
        for dataset in catalog.keys():            
            self.checkDatasetFiles(dataset,catalog)
        
        outcomes = self.parallel_.wait()
        for dsetName,ifile,fName,ret,out in outcomes:
            info = catalog[dsetName]["files"][ifile]
            if info["name"] != fName:
                print "Inconsistent outcome ", info["name"], dsetName,ifile,fName,ret,out
            else:
                if ret != 0:
                    info["bad"] = True
                else:
                    extraInfo = json.loads(str(out))
                    for key,val in extraInfo.iteritems():
                        info[key] = val

            print "Writing catalog"
            self.writeCatalog(catalog)
        
        print "Done"
    
    def checkDatasetFiles(self,dsetName,catalog=None):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_)
            wait = True

        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]

        print len(files)
        for ifile,finfo in enumerate(files):            
            name = finfo["name"]
            self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile])

        if wait:
            self.parallel_.wait()            
            self.parallel_ = None
        if writeCatalog:
            self.writeCatalog(catalog)

    def reviewCatalog(self):
        datasets,catalog = self.getAllDatasets()

        primaries = {}
        keepAll = False
        for d in datasets:
            if not keepAll:
                reply = ask_user("keep this dataset (yes/no/all)?\n %s\n" % d, ["y","n","a"])
                if reply == "n":
                    catalog.pop(d)
                    continue
                if reply == "a": 
                    keepAll = True
                    
            primary = d.split("/")[1]
            if not primary in primaries:
                primaries[ primary ] = []
                
            primaries[ primary ].append(d)
            
        for name,val in primaries.iteritems():
            if len(val) == 1: continue
            reply = ask_user("More than one sample for %s:\n %s\nKeep all?" % (name,"\n ".join(val)))
            if reply == "n":
                for d in val:
                    reply = ask_user("keep this dataset?\n %s\n" % d)
                    if reply == "n":
                        catalog.pop(d)
           
        self.writeCatalog(catalog)

    def checkFile(self,fileName,dsetName,ifile):
        """
        Check if file is valid.
        @fileName: file name
        """
        ## fName = "root://eoscms//eos/cms%s" % fileName
        fName = fileName
        tmp = ".tmp%s_%d.json"%(dsetName.replace("/","_"),ifile)
        ## print "fggCheckFile.py",[fName,tmp,"2>/dev/null"]
        ret,out = self.parallel_.run("fggCheckFile.py",[fName,tmp,"2>/dev/null"],interactive=True)[2]
        
        try:
            fout = open(tmp)
            out = fout.read()
            fout.close()
        except IOError, e:
            print ret, out 
            print e
            out = "{}"

        os.remove(tmp)
        return dsetName,ifile,fileName,ret,out
Esempio n. 37
0
        dataset = Dataset(dataset_name=args.dataset)
        mlp_aux = MLP_AUX(dataset, args.negative_sampling_size,
                          eval(args.layers), args.epochs, args.batch_size,
                          args.validation_split, args.user_sampling_size,
                          args.core_number, args.sim_threshold)
        model = mlp_aux.train_model()
        hits, ndcgs = evaluate_model(model, dataset.test_data,
                                     dataset.test_negatives, 10, 1, True)
        print("Hitrate: {}".format(sum(hits) / len(hits)))
        print("NDCG: {}".format(sum(ndcgs) / len(ndcgs)))
    elif args.network_type == 'parallel':

        dataset = Dataset(dataset_name=args.dataset)
        parallel = Parallel(dataset, args.negative_sampling_size,
                            eval(args.layers), args.epochs, args.batch_size,
                            args.validation_split)
        model = parallel.train_model()
        hits, ndcgs = evaluate_model(model, dataset.test_data,
                                     dataset.test_negatives, 10, 1)
        print("Hitrate: {}".format(sum(hits) / len(hits)))
        print("NDCG: {}".format(sum(ndcgs) / len(ndcgs)))
    elif args.network_type == 'parallel-aux':
        dataset = Dataset(dataset_name=args.dataset)
        parallel_aux = Parallel_AUX(dataset, args.negative_sampling_size,
                                    eval(args.layers), args.epochs,
                                    args.batch_size, args.validation_split,
                                    args.user_sampling_size, args.core_number,
                                    args.sim_threshold)
        model = parallel_aux.train_model()
        hits, ndcgs = evaluate_model(model, dataset.test_data,
Esempio n. 38
0
    def checkDatasetFiles(self,dsetName,catalog=None):
        """
        Look for corrupted files in dataset.
        @dsetName: dataset name
        Note: not implemented
        """
        writeCatalog = False
        if not catalog:
            catalog = self.readCatalog()
            writeCatalog = True
        
        wait = False
        if not self.parallel_:
            self.parallel_ = Parallel(16,self.queue_)
            wait = True

        print 
        print "Checking dataset",dsetName
        info = catalog[dsetName]
        files = info["files"]
        print "Number of files: ", len(files)
        
        toremove = []
        for ifil,eifil in enumerate(files):
            if ifil in toremove:
                continue
            for jfil,ejfil in enumerate(files[ifil+1:]):
                if ifil+jfil in toremove:
                    continue
                if eifil["name"] == ejfil["name"]:
                    toremove.append(ifil)
                else:
                    iid = eifil["name"].rstrip(".root").rsplit("_",1)[-1]
                    jid = ejfil["name"].rstrip(".root").rsplit("_",1)[-1]
                    if iid == jid:
                        print "duplicated file index ", iid
                        print eifil["name"]
                        print ejfil["name"]
                        reply=ask_user("keep both? ")
                        if reply == "n":
                            if ask_user( "keep %s? " % ejfil["name"] ) == "n":
                                ## files.pop(ifil+jfil)
                                toremove.append(ifil+jfil)
                            if ask_user( "keep %s? " % eifil["name"] ) == "n":
                                toremove.append(ifil)
                                ## files.pop(ifil)
                                
        for ifile in sorted(toremove,reverse=True):
            ## print ifile
            files.pop(ifile)
            
        print "After duplicates removal: ", len(files)
        info = catalog[dsetName]["files"] = files
        for ifile,finfo in enumerate(files):            
            name = finfo["name"]
            self.parallel_.run(SamplesManager.checkFile,[self,name,dsetName,ifile])

        if wait:
            self.parallel_.wait(printOutput=False)            
            self.parallel_ = None
            
        if writeCatalog:
            self.writeCatalog(catalog)
Esempio n. 39
0
class ParallelTrigger(object):
    """Parallel port and dummy triggering support

    .. warning:: When using the parallel port, calling
                 :meth:`expyfun.ExperimentController.start_stimulus`
                 will automatically invoke a stamping of the 1 trigger, which
                 will in turn cause a delay equal to that of ``high_duration``.
                 This can effect e.g. :class:`EyelinkController` timing.

    Parameters
    ----------
    mode : str
        'parallel' for real use. 'dummy', passes all calls.
    address : str | int | None
        The address to use. On Linux this should be a string path like
        ``'/dev/parport0'`` (equivalent to None), on Windows it should be an
        integer address like ``888`` or ``0x378`` (equivalent to None).
    high_duration : float
        Amount of time (seconds) to leave the trigger high whenever
        sending a trigger.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see expyfun.verbose).

    Notes
    -----
    Parallel port activation is enabled by using the ``trigger_controller``
    argument of :class:`expyfun.ExperimentController`.

    On Linux, parallel port may require some combination of the following:

        1. ``sudo modprobe ppdev``
        2. Add user to ``lp`` group (``/etc/group``)
        3. Run ``sudo rmmod lp`` (otherwise ``lp`` takes exclusive control)
        4. Edit ``/etc/modprobe.d/blacklist.conf`` to add ``blacklist lp``

    The ``parallel`` module must also be installed.

    On Windows, you may need to download ``inpout32.dll`` from someplace
    like http://www.highrez.co.uk/downloads/inpout32/.
    """
    @verbose_dec
    def __init__(self,
                 mode='dummy',
                 address=None,
                 high_duration=0.005,
                 verbose=None):
        if mode == 'parallel':
            if sys.platform.startswith('linux'):
                address = '/dev/parport0' if address is None else address
                if not isinstance(address, string_types):
                    raise ValueError('addrss must be a string or None, got %s '
                                     'of type %s' % (address, type(address)))
                from parallel import Parallel
                self._port = Parallel(address)
                self._portname = address
                self._set_data = self._port.setData
            elif sys.platform.startswith('win'):
                from ctypes import windll
                if not hasattr(windll, 'inpout32'):
                    raise SystemError(
                        'Must have inpout32 installed, see:\n\n'
                        'http://www.highrez.co.uk/downloads/inpout32/')

                base = 0x378 if address is None else address
                if isinstance(base, string_types):
                    base = int(base, 16)
                if not isinstance(base, int):
                    raise ValueError('address must be int or None, got %s of '
                                     'type %s' % (base, type(base)))
                self._port = windll.inpout32
                mask = np.uint8(1 << 5 | 1 << 6 | 1 << 7)
                # Use ECP to put the port into byte mode
                val = int((self._port.Inp32(base + 0x402) & ~mask) | (1 << 5))
                self._port.Out32(base + 0x402, val)

                # Now to make sure the port is in output mode we need to make
                # sure that bit 5 of the control register is not set
                val = int(self._port.Inp32(base + 2) & ~np.uint8(1 << 5))
                self._port.Out32(base + 2, val)
                self._set_data = lambda data: self._port.Out32(base, data)
                self._portname = str(base)
            else:
                raise NotImplementedError('Parallel port triggering only '
                                          'supported on Linux and Windows')
        else:  # mode == 'dummy':
            self._port = self._portname = None
            self._trigger_list = list()
            self._set_data = lambda x: (self._trigger_list.append(x)
                                        if x != 0 else None)
        self.high_duration = high_duration
        self.mode = mode

    def __repr__(self):
        return '<ParallelTrigger : %s (%s)>' % (self.mode, self._portname)

    def _stamp_trigger(self, trig):
        """Fake stamping"""
        self._set_data(int(trig))
        wait_secs(self.high_duration)
        self._set_data(0)

    def stamp_triggers(self, triggers, delay=0.03, wait_for_last=True):
        """Stamp a list of triggers with a given inter-trigger delay

        Parameters
        ----------
        triggers : list
            No input checking is done, so ensure triggers is a list,
            with each entry an integer with fewer than 8 bits (max 255).
        delay : float
            The inter-trigger delay.
        wait_for_last : bool
            If True, wait for last trigger to be stamped before returning.
        """
        for ti, trig in enumerate(triggers):
            self._stamp_trigger(trig)
            if ti < len(triggers) - 1 or wait_for_last:
                wait_secs(delay - self.high_duration)

    def close(self):
        """Release hardware interfaces
        """
        if hasattr(self, '_port'):
            del self._port

    def __del__(self):
        return self.close()
    def fit(self, X, y, sample_weight=None):  # 线性回归方法
        """
        Fit linear model.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Training data

        y : array_like, shape (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary

        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample

            .. versionadded:: 0.17
               parameter *sample_weight* support to LinearRegression.

        Returns
        -------
        self : returns an instance of self.
        """

        n_jobs_ = self.n_jobs
        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csr', 'csc', 'coo'],
                         y_numeric=True,
                         multi_output=True)  # 检查数据并进行必要的格式转换

        #------------------------------
        # atleast_xd 支持将输入的数据
        # 直接视为 x 维
        # atleast_1d
        # atleast_2d
        # atleast_3d
        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar"
                             )  # sampple_weight 必须为一维 array

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight)  # 对 X 数据进行中心以及归一化

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)  # 给数据添加 sample_weight

        if sp.issparse(X):

            if y.ndim < 2:

                out = sparse_lsqr(X, y)  # 哈哈哈!直接调用 scipy sparse linalg 的
                # least square solution
                self.coef_ = out[0]
                self._residues = out[3]

            else:
                # sparse_lstsq cannot handle y with shape (M, K)
                outs = Parallel(n_jobs=n_jobs_)(
                    delayed(sparse_lsqr)(X, y[:, j].ravel())
                    for j in range(y.shape[1]))
                self.coef_ = np.vstack(out[0] for out in outs)
                self._residues = np.vstack(out[3] for out in outs)

        else:
            self.coef_, self._residues, self.rank_, self.singular_ = \
                linalg.lstsq(X, y) # Compute least-squares solution to equation Ax = b 哈哈哈!!
            self.coef_ = self.coef_.T

        if y.ndim == 1:

            self.coef_ = np.ravel(self.coef_)
            self._set_intercept(X_offset, y_offset, X_scale)

        return self
Esempio n. 41
0
    def update(self,
               nprocs=1,
               factor=None,
               bw_hz=None,
               foi_hz=None,
               fs_hz=None,
               f_ord=None,
               ftype=None,
               n_freqs=None,
               n_samples=None,
               n_channels=None):

        self.n_channels = n_channels if n_channels is not None else self.n_channels
        self.n_freqs = n_freqs if n_freqs is not None else self.n_freqs
        self.n_processes = min(Parallel.check_nprocs() -
                               1, self.n_freqs) if nprocs != 1 else 1

        # Signal process properties
        self.decimate_by = factor
        self.n_samples = int(n_samples / self.decimate_by)
        self.sample_rate = fs_hz / self.decimate_by if fs_hz is not None else self.sample_rate

        self.bandwidth = bw_hz if bw_hz is not None else self.bandwidth

        self.w_, self.H_ = self.create_filter(f_ord,
                                              self.bandwidth / 2.0,
                                              self.sample_rate / 2.0,
                                              self.n_samples,
                                              ftype='fir',
                                              output='freq')
        self.Hwin = self.H_[np.logical_and(self.w_ >= -self.bandwidth / 2.0,
                                           self.w_ < self.bandwidth / 2.0)]

        self.n_samples_procs = self.Hwin.size

        # Setup center frequencies
        if len(foi_hz) > 1:
            cf = np.arange(*foi_hz, np.diff(foi_hz) / self.n_freqs, dtype=int)
            diff = cf.shape[0] - self.n_freqs
            if diff > 0:
                cf = cf[:-diff]
        else:
            cf = foi_hz

        self.freqs = np.asarray([
            (f - self.bandwidth / 2, f + self.bandwidth / 2) for f in cf
        ])

        # Create rules for how to handle the data
        self._encoder_rule()
        self._decoder_rule()

        if self.n_processes > 1:
            self.pfunc = Parallel(self.multiply,
                                  nprocs=self.n_processes,
                                  axis=0,
                                  flag=0,
                                  ins_shape=[(self.n_channels, self.n_freqs,
                                              self.n_samples_procs),
                                             (1, self.n_samples_procs)],
                                  ins_dtype=[np.complex64, np.complex64],
                                  out_shape=(self.n_channels, self.n_freqs,
                                             self.n_samples_procs),
                                  out_dtype=np.complex64)
Esempio n. 42
0
# !setup!
import neworder
from parallel import Parallel  # import our model definition

#neworder.verbose()
#neworder.checked(False)

# must be MPI enabled
assert neworder.mpi.size(
) > 1, "This configuration requires MPI with >1 process"
# !setup!

# !run!
population_size = 100
p = 0.01
timeline = neworder.LinearTimeline(0, 10, 10)
model = Parallel(timeline, p, population_size)
neworder.run(model)
#!run!