def finish_job(job): """Finish the jobs and send to postprocess_phaser""" info = self.jobs.pop(job) self.tprint(' Finished Phaser on %s with id: %s' % (info['name'], info['tag']), level=30, color="white") self.logger.debug('Finished Phaser on %s' % info['name']) if self.computer_cluster: results_json = self.redis.get(info['tag']) # This try/except is for when results aren't in Redis in time. try: results = json.loads(results_json) self.postprocess_phaser(info['name'], results) self.redis.delete(info['tag']) except Exception as e: self.logger.error('Error ' + str(e)) #print 'PROBLEM: %s %s'%(info['name'], info['output_id']) #print results_json else: results = info['result_queue'].get() # pprint(results.get('stdout', " ")) # pprint(json.loads(results.get('stdout'," "))) # if results["stderr"]: # print results["stderr"] self.postprocess_phaser(info['name'], json.loads(results.get('stdout', " "))) jobs.remove(job)
def run(self): """The core process of the Launcher instance""" # Set up overwatcher if self.overwatch_id: self.ow_registrar = Registrar(site=self.site, ow_type="launcher", ow_id=self.overwatch_id) self.ow_registrar.register({ "site_id": json.dumps(self.launcher.get('site_tag')), "job_list": self.job_list }) try: timer = 0 # This is the server portion of the code while self.running: # Have Registrar update status every second if round(timer % 1, 1) in (0.0, 1.0): if self.overwatch_id: #self.ow_registrar.update({"site_id":self.site.ID, self.ow_registrar.update({ "site_id": json.dumps(self.launcher.get('site_tag')), "job_list": self.job_list }) #self.ow_registrar.update({"job_list":self.job_list}) # Look for a new command # This will throw a redis.exceptions.ConnectionError if redis is unreachable #command = self.redis.brpop(["RAPD_JOBS",], 5) try: while self.redis.llen(self.job_list) != 0: command = self.redis.rpop(self.job_list) # Handle the message if command: self.handle_command(json.loads(command)) # Only run 1 command # self.running = False # break # sleep a little when jobs aren't coming in. time.sleep(0.2) timer += 0.2 except redis.exceptions.ConnectionError: if self.logger: self.logger.exception( "Remote Redis is not up. Waiting for Sentinal to switch to new host" ) time.sleep(1) except KeyboardInterrupt: self.stop()
def load_command(self): """ Load and parse the command file """ # Load the file message = open(self.command_file, "r").read() # Decode json command file return json.loads(message)
def run(self): """ The while loop for watching the files """ self.logger.info("NecatGatherer.run") # Set up overwatcher self.ow_registrar = Registrar(site=self.site, ow_type="gatherer", ow_id=self.overwatch_id) #self.ow_registrar.register({"site_id":self.site.ID}) self.ow_registrar.register({"site_id":self.tag}) #self.logger.debug(" Will publish new images on filecreate:%s" % self.tag) #self.logger.debug(" Will push new images onto images_collected:%s" % self.tag) self.logger.debug(" Will publish new datasets on run_data:%s" % self.tag) self.logger.debug(" Will push new datasets onto runs_data:%s" % self.tag) # path prefix for RDMA folder location with Eiger #if self.tag == 'NECAT_E': # path_prefix = '/epu/rdma' #else: # path_prefix = '' try: while self.go: # Check if the run info changed in beamline Redis DB. #current_run = self.pipe.get("RUN_INFO_SV").set("RUN_INFO_SV", "").execute() # get run info passed from RAPD #current_run = self.redis.rpop('run_info_T') #current_run = self.redis.rpop('run_info_%s'%self.tag[-1]) current_run_raw = self.redis.rpop('run_info_%s'%self.tag[-1]) if current_run_raw not in (None, ""): current_run = json.loads(current_run_raw) # get the additional beamline params and put into nice dict. run_data = self.get_run_data(current_run) if self.ignored(run_data['directory']): self.logger.debug("Directory %s is marked to be ignored - skipping", run_data['directory']) else: #run_data['directory'] = dir self.logger.debug("runs_data:%s %s", self.tag, run_data) # Put into exchangable format run_data_json = json.dumps(run_data) # Publish to Redis self.redis.publish("run_data:%s" % self.tag, run_data_json) #self.redis.publish("run_data:%s" % self.tag, run_data) # Push onto redis list in case no one is currently listening self.redis.lpush("runs_data:%s" % self.tag, run_data_json) #self.redis.lpush("runs_data:%s" % self.tag, run_data) time.sleep(0.2) # Have Registrar update status self.ow_registrar.update({"site_id":self.tag}) except KeyboardInterrupt: self.stop()
def run(self): """ The while loop for watching the files """ self.logger.info("NecatGatherer.run") # Set up overwatcher self.ow_registrar = Registrar(site=self.site, ow_type="gatherer", ow_id=self.overwatch_id) self.ow_registrar.register({"site_id":self.site.ID}) #self.logger.debug(" Will publish new images on filecreate:%s" % self.tag) #self.logger.debug(" Will push new images onto images_collected:%s" % self.tag) self.logger.debug(" Will publish new datasets on run_data:%s" % self.tag) self.logger.debug(" Will push new datasets onto runs_data:%s" % self.tag) # path prefix for RDMA folder location with Eiger if self.tag == 'NECAT_E': path_prefix = '/epu2/rdma' else: path_prefix = '' try: while self.go: # Check if the run info changed in beamline Redis DB. #current_run = self.pipe.get("RUN_INFO_SV").set("RUN_INFO_SV", "").execute() # get run info passed from RAPD #current_run = self.redis.rpop('run_info_T') #current_run = self.redis.rpop('run_info_%s'%self.tag[-1]) current_run_raw = self.redis.rpop('run_info_%s'%self.tag[-1]) if current_run_raw not in (None, ""): current_run = json.loads(current_run_raw) # get the additional beamline params and put into nice dict. run_data = self.get_run_data(current_run) if self.ignored(run_data['directory']): self.logger.debug("Directory %s is marked to be ignored - skipping", run_data['directory']) else: #run_data['directory'] = dir self.logger.debug("runs_data:%s %s", self.tag, run_data) # Put into exchangable format run_data_json = json.dumps(run_data) # Publish to Redis self.redis.publish("run_data:%s" % self.tag, run_data_json) #self.redis.publish("run_data:%s" % self.tag, run_data) # Push onto redis list in case no one is currently listening self.redis.lpush("runs_data:%s" % self.tag, run_data_json) #self.redis.lpush("runs_data:%s" % self.tag, run_data) time.sleep(0.2) # Have Registrar update status self.ow_registrar.update({"site_id":self.site.ID}) except KeyboardInterrupt: self.stop()
def run_processing(target, plugin, tprint, verbose=True): """Run a processing test""" tprint(" Testing %s" % plugin, 99, "white") target_def = test_sets.DATA_SETS[target] command = target_def[plugin+"_command"] test_module = importlib.import_module(test_sets.PLUGINS[plugin]+".test") # Change to working directory work_dir = os.path.join(TEST_CACHE, target) if not os.path.exists(work_dir): os.makedirs(work_dir) os.chdir(work_dir) # Run the process tprint(" Running test with command `%s`" % command, 10, "white") if verbose: proc = subprocess.Popen(command, shell=True) else: proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) proc.wait() # Read in the results tprint(" Comparing results", 10, "white") print "cwd", os.getcwd() result_standard = json.loads(open(plugin+".json", "r").readlines()[0]) result_test = json.loads(open(target_def[plugin+"_result"], "r").readlines()[0]) test_successful = test_module.compare_results(result_standard, result_test, tprint) if test_successful: tprint(" %s tests sucessful" % plugin, 99, "green") else: tprint(" %s tests fail" % plugin, 10, "red") return test_successful
def run(self): # This is the "server" while self.Go: channel, message = self.redis.brpop(["RAPD_RESULTS"]) print channel, message # Trying to catch hanging try: self.receiver(json.loads(message)) except: info = sys.exc_info() self.logger.exception("Unexpected error in control_server: %s"%info[0])
def cell_search(self, search_params): """search for PDBs within unit cell range.""" # Query server #print "%s/search/" % PDBQ_SERVER response = urllib2.urlopen(urllib2.Request("%s/cell_search/" % \ self.server, data=json.dumps(search_params))).read() # Decode search result search_results = json.loads(response) # Create handy description key for k in search_results.keys(): search_results[k]["description"] = \ search_results[k].pop("struct.pdbx_descriptor") return search_results
def cell_search(self, search_params): """search for PDBs within unit cell range.""" # Query server #print "%s/search/" % rglobals.PDBQ_SERVER response = urllib2.urlopen(urllib2.Request("%s/cell_search/" % \ self.server, data=json.dumps(search_params))).read() # Decode search result search_results = json.loads(response) # Create handy description key for k in search_results.keys(): search_results[k]["description"] = \ search_results[k].pop("struct.pdbx_descriptor") return search_results
def run(self): self.logger.debug("Running") # Connect to Redis self.connect_to_redis() # Create Overwatch Registrar instance if self.overwatch_id: self.ow_registrar = Registrar(site=self.site, ow_type="control", ow_id=self.overwatch_id) # Register self.ow_registrar.register() # Determine interval for overwatch update # ow_round_interval = max(int((5 * len(self.run_lists)) / POLLING_REST), int(5/POLLING_REST)) ow_round_interval = 10 self.logger.debug("Finished registering %d", ow_round_interval) while self.running: # ~5 seconds between overwatch updates for __ in range(ow_round_interval): for run_list, site_tag in self.run_lists: raw_run_data = self.redis.rpop(run_list) # Have new run data if raw_run_data not in (None, ""): # Parse into python object #print raw_run_data run_data = json.loads(raw_run_data) # Notify core thread that an image has been collected self.notify({"message_type":"NEWRUN", "run_data":run_data, "site_tag":site_tag}) self.logger.debug("New run data %s", raw_run_data) # Slow it down a little time.sleep(POLLING_REST) time.sleep(POLLING_REST) # Have Registrar update status if self.overwatch_id: self.ow_registrar.update()
def connect_pdbq(inp): """Query the PDBQ server""" _d0_ = inp l1 = ["a", "b", "c", "alpha", "beta", "gamma"] for y in range(end): _d_ = {} for x in range(len(l1)): _d_[l1[x]] = [self.cell[l2[y][x]] - self.cell[l2[y][x]] * self.percent/2, self.cell[l2[y][x]] + self.cell[l2[y][x]] *self.percent/2] # Query server response = urllib2.urlopen(urllib2.Request("%s/cell_search/" % \ PDBQ_SERVER, data=json.dumps(_d_))).read() j = json.loads(response) for k in j.keys(): j[k]["Name"] = j[k].pop("struct.pdbx_descriptor") _d0_.update(j) return _d0_
def run(self): self.logger.debug('ControllerHandler::run') #Receive the output back from the cluster message = '' while not (message.endswith('<rapd_end>')): data = self.conn.recv(CLUSTER_ADDRESS) message += data time.sleep(0.001) self.conn.close() #strip off the start and end markers stripped = message.rstrip().replace('<rapd_start>','').replace('<rapd_end>','') #load the JSON decoded_received = json.loads(stripped) #feedback #self.logger.debug(decoded_received) #assign the command self.receiver(decoded_received)
def check_for_pdbs(self, pdb_list): """Check if PDB file in PDB repository and pass back molecular description""" output_dict = {} # Make a new list with all upper case PDB codes pdb_list = [pdb.upper() for pdb in pdb_list] # Change list to comma separated string. pdb_str = ','.join(pdb.upper() for pdb in pdb_list) try: # Query pdbq server response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (self.server, pdb_str))).read() # Decode search result entry = json.loads(response) except urllib2.URLError as pdbq_error: if self.tprint: self.tprint(" Error connecting to PDBQ server %s" % pdbq_error, level=30, color="red") for pdb_code in pdb_list: output_dict[pdb_code] = { 'description': "Unknown - unable to connect to PDBQ server" } return output_dict for x, pdb_code in enumerate(pdb_list): if isinstance(entry["message"], list): ret = entry["message"][x] else: ret = entry["message"] # If not in PDBQ if ret in [None]: output_dict[pdb_code] = { 'description': "Unknown - PDB code not found in PDBQ server" } else: output_dict[pdb_code] = { 'description': ret.get("_entity-pdbx_description")[0] } return output_dict
def check_for_pdbs_OLD(self, pdb_list): """Check if PDB file in PDB repository and pass back molecular description""" output_dict = {} for pdb_code in pdb_list: # Make sure we are in upper case pdb_code = pdb_code.upper() try: # Query pdbq server ## Change to comma separated (https://rapd.nec.aps.anl.gov/pdbq/entry/1Z7E,1QRV) response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (self.server, pdb_code))).read() # Decode search result entry = json.loads(response) except urllib2.URLError as pdbq_error: if self.tprint: self.tprint(" Error connecting to PDBQ server %s" % pdbq_error, level=30, color="red") entry = { "message": { "_entity-pdbx_description": ["Unknown - unable to connect to PDBQ sever"] } } # Grab the description description = entry["message"]["_entity-pdbx_description"][0] # Save the entry output_dict[pdb_code] = {'description': description} # Print info to console if self.tprint: self.tprint(" %s - %s" % (pdb_code, description), level=10, color="white") return output_dict
def run(self): self.logger.debug('ControllerHandler::run') #Receive the output back from the cluster message = '' while not (message.endswith('<rapd_end>')): data = self.conn.recv(CLUSTER_ADDRESS) message += data time.sleep(0.001) self.conn.close() #strip off the start and end markers stripped = message.rstrip().replace('<rapd_start>', '').replace('<rapd_end>', '') #load the JSON decoded_received = json.loads(stripped) #feedback #self.logger.debug(decoded_received) #assign the command self.receiver(decoded_received)
def run(self): """The core process of the Launcher instance""" # Set up overwatcher if self.overwatch_id: self.ow_registrar = Registrar(site=self.site, ow_type="launcher", ow_id=self.overwatch_id) self.ow_registrar.register({"site_id":self.site.ID, "job_list":self.job_list}) try: # This is the server portion of the code while self.running: # Have Registrar update status if self.overwatch_id: self.ow_registrar.update({"site_id":self.site.ID, "job_list":self.job_list}) # Look for a new command # This will throw a redis.exceptions.ConnectionError if redis is unreachable #command = self.redis.brpop(["RAPD_JOBS",], 5) try: while self.redis.llen(self.job_list) != 0: command = self.redis.rpop(self.job_list) # Handle the message if command: self.handle_command(json.loads(command)) # Only run 1 command # self.running = False # break # sleep a little when jobs aren't coming in. time.sleep(0.2) except redis.exceptions.ConnectionError: if self.logger: self.logger.exception("Remote Redis is not up. Waiting for Sentinal to switch to new host") time.sleep(1) except KeyboardInterrupt: self.stop()
def check_for_pdbs_OLD(self, pdb_list): """Check if PDB file in PDB repository and pass back molecular description""" output_dict = {} for pdb_code in pdb_list: # Make sure we are in upper case pdb_code = pdb_code.upper() try: # Query pdbq server ## Change to comma separated (https://rapd.nec.aps.anl.gov/pdbq/entry/1Z7E,1QRV) response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (self.server, pdb_code))).read() # Decode search result entry = json.loads(response) except urllib2.URLError as pdbq_error: if self.tprint: self.tprint(" Error connecting to PDBQ server %s" % pdbq_error, level=30, color="red") entry = {"message": {"_entity-pdbx_description": [ "Unknown - unable to connect to PDBQ sever" ]}} # Grab the description description = entry["message"]["_entity-pdbx_description"][0] # Save the entry output_dict[pdb_code] = {'description': description} # Print info to console if self.tprint: self.tprint(" %s - %s" % (pdb_code, description), level=10, color="white") return output_dict
def check_in_pdbq_OLD(self): """Check if input PDB codes are in the PDBQ database""" self.tprint("\nChecking that requested codes are in the PDBQ database", level=30, color="blue") for pdb_code in self.command["input_data"]["pdb_codes"]: # Query pdbq server response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (rglobals.PDBQ_SERVER, pdb_code))).read() # Decode search result entry = json.loads(response) # No entry if entry["message"] == None: # Print info to console self.tprint(" %s not in PDBQ database" % pdb_code, level=50, color="red") # Have an entry in PDBQ else: # Save the entry (NOT USED) #self.pdbq_entries[pdb_code] = entry["message"] # Put code on the good list self.pdbs_to_download.append(pdb_code) # Grab the description description = entry["message"]["_entity-pdbx_description"][0] # Print info to console self.tprint(" %s - %s" % (pdb_code, description), level=10, color="white")
def check_in_pdbq_OLD(self): """Check if input PDB codes are in the PDBQ database""" self.tprint("\nChecking that requested codes are in the PDBQ database", level=30, color="blue") for pdb_code in self.command["input_data"]["pdb_codes"]: # Query pdbq server response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (PDBQ_SERVER, pdb_code))).read() # Decode search result entry = json.loads(response) # No entry if entry["message"] == None: # Print info to console self.tprint(" %s not in PDBQ database" % pdb_code, level=50, color="red") # Have an entry in PDBQ else: # Save the entry (NOT USED) #self.pdbq_entries[pdb_code] = entry["message"] # Put code on the good list self.pdbs_to_download.append(pdb_code) # Grab the description description = entry["message"]["_entity-pdbx_description"][0] # Print info to console self.tprint(" %s - %s" % (pdb_code, description), level=10, color="white")
def connect_pdbq(inp): """Query the PDBQ server""" _d0_ = inp l1 = ["a", "b", "c", "alpha", "beta", "gamma"] for y in range(end): _d_ = {} for x in range(len(l1)): _d_[l1[x]] = [ self.cell[l2[y][x]] - self.cell[l2[y][x]] * self.percent / 2, self.cell[l2[y][x]] + self.cell[l2[y][x]] * self.percent / 2 ] # Query server response = urllib2.urlopen(urllib2.Request("%s/cell_search/" % \ PDBQ_SERVER, data=json.dumps(_d_))).read() j = json.loads(response) for k in j.keys(): j[k]["Name"] = j[k].pop("struct.pdbx_descriptor") _d0_.update(j) return _d0_
def finish_job(job): """Finish the jobs and send to postprocess_phaser""" info = self.jobs.pop(job) print 'Finished Phaser on %s with id: %s'%(info['name'], info['output_id']) self.logger.debug('Finished Phaser on %s'%info['name']) results_json = self.redis.get(info['output_id']) # This try/except is for when results aren't in Redis in time. try: results = json.loads(results_json) self.postprocess_phaser(info['name'], results) self.redis.delete(info['output_id']) except Exception as e: self.logger.error('Error'+ str(e)) # print 'PROBLEM: %s %s'%(info['name'], info['output_id']) # print results_json # self.logger.debug('PROBLEM: %s %s'%(info['name'], info['output_id'])) # self.logger.debug(results_json) #results = json.loads(results_json) #print results #self.postprocess_phaser(info['name'], results) #self.redis.delete(info['output_id']) jobs.remove(job)
def check_for_pdbs(self, pdb_list): """Check if PDB file in PDB repository and pass back molecular description""" output_dict = {} # Make a new list with all upper case PDB codes pdb_list = [pdb.upper() for pdb in pdb_list] # Change list to comma separated string. pdb_str = ','.join(pdb.upper() for pdb in pdb_list) try: # Query pdbq server response = urllib2.urlopen(urllib2.Request("%s/entry/%s" % \ (self.server, pdb_str))).read() # Decode search result entry = json.loads(response) except urllib2.URLError as pdbq_error: if self.tprint: self.tprint(" Error connecting to PDBQ server %s" % pdbq_error, level=30, color="red") for pdb_code in pdb_list: output_dict[pdb_code] = {'description': "Unknown - unable to connect to PDBQ server"} return output_dict for x, pdb_code in enumerate(pdb_list): if isinstance(entry["message"], list): ret = entry["message"][x] else: ret = entry["message"] # If not in PDBQ if ret in [None]: output_dict[pdb_code] = {'description': "Unknown - PDB code not found in PDBQ server"} else: output_dict[pdb_code] = {'description': ret.get("_entity-pdbx_description")[0]} return output_dict
def run(self): """The core process of the Launcher instance""" # Set up overwatcher if self.overwatch_id: self.ow_registrar = Registrar(site=self.site, ow_type="launch_manager", ow_id=self.overwatch_id) self.ow_registrar.register() # Get the initial possible jobs lists full_job_list = [x.get('job_list') for x in self.site.LAUNCHER_SETTINGS["LAUNCHER_SPECIFICATIONS"]] try: # This is the server portion of the code while self.running: # Have Registrar update status #if self.overwatch_id: # self.ow_registrar.update() # Get updated job list by checking which launchers are running # Reassign jobs if launcher(s) status changes if round(self.timer%TIMER,1) == 1.0: try: # Have Registrar update status if self.overwatch_id: self.ow_registrar.update() # Check which launchers are running temp = [l for l in full_job_list if self.redis.get("OW:"+l)] # Determine which launcher(s) went offline offline = [line for line in self.job_list if temp.count(line) == False] if len(offline) > 0: # Pop waiting jobs off their job_lists and push back in RAPD_JOBS for reassignment. for _l in offline: while self.redis.llen(_l) != 0: self.redis.rpoplpush(_l, 'RAPD_JOBS') # Determine which launcher(s) came online (Also runs at startup!) online = [line for line in temp if self.job_list.count(line) == False] if len(online) > 0: # Pop jobs off RAPD_JOBS_WAITING and push back onto RAPD_JOBS for reassignment. while self.redis.llen('RAPD_JOBS_WAITING') != 0: self.redis.rpoplpush('RAPD_JOBS_WAITING', 'RAPD_JOBS') # Update the self.job_list self.job_list = temp except redis.exceptions.ConnectionError: if self.logger: self.logger.exception("Remote Redis is not up. Waiting for Sentinal to switch to new host") time.sleep(1) # Look for a new command # This will throw a redis.exceptions.ConnectionError if redis is unreachable #command = self.redis.brpop(["RAPD_JOBS",], 5) try: while self.redis.llen("RAPD_JOBS") != 0: command = self.redis.rpop("RAPD_JOBS") # Handle the message if command: #self.push_command(json.loads(command)) self.push_command(json.loads(command)) # Only run 1 command # self.running = False # break # sleep a little when jobs aren't coming in. time.sleep(0.2) self.timer += 0.2 except redis.exceptions.ConnectionError: if self.logger: self.logger.exception("Remote Redis is not up. Waiting for Sentinal to switch to new host") time.sleep(1) except KeyboardInterrupt: self.stop()
def run(self): self.logger.debug('Handler::run') self.logger.debug(self.mode) print 'run' #if we are looking at a socket connection for the incoming message if not self.mode == 'file': self.logger.debug('not running file') #read the message from the socket message = '' while not message.endswith('<rapd_end>'): data = self.conn.recv(BUFFER_SIZE) #self.logger.debug('message',str(message)) message += data time.sleep(0.001) #close the connection self.conn.close() #strip the message of its delivery tags message = message.rstrip().replace('<rapd_start>', '').replace('<rapd_end>', '') self.logger.debug('message', str(message)) #The ClusterServer is spawning processes on a central node if self.mode == 'server': self.logger.debug('running server') #strip out extra spaces and decode json command = json.loads(message) self.controller_address = tuple(command[-1]) #feedback self.logger.debug(command) #assign the command self.Assign(command) #qsub is being used to spawn commands elif self.mode == 'qsub': self.logger.debug('running qsub') self.logger.debug('Creating files for qsub submission') #The type of command b = message[:] command = json.loads(b)[0] if len(command) > 3: tag = command[:4] else: tag = command #write the command to a file tmp = tempfile.NamedTemporaryFile(mode='w', dir='./', prefix='rapd_'+tag+'-', suffix='.json', delete=False) tmp.write(message) tmp.close() # Determine which cluster queue to submit the job # (Can add other tags to run on new nodes) if tag == 'AUTO': cl_queue = ' -q index.q -pe smp 4' #cl_queue = ' -q index.q' elif tag == 'INTE': cl_queue = ' -q phase2.q' else: cl_queue = ' -q phase1.q' #For labeling the qsub job qsub_name = os.path.basename(tmp.name).replace('rapd_', '').replace('.json', '') self.logger.debug("qsub_name", qsub_name) """ NOT GOING TO USE PROJECTS TO SPLIT RESOURCES... Not sure if priority is inherited to sub-jobs anyway?? if (self.queue): self.logger.debug('Submit %s to qsub %s'%(tmp.name,self.queue)) #Send to the new nodes in the cluster p = subprocess.Popen("qsub -cwd -V -P "+self.queue+" -b y -l h_rt=3:00:00 -N "+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) else: self.logger.debug('Submit %s to qsub'%tmp.name) p = subprocess.Popen("qsub -cwd -V -b y -l h_rt=3:00:00 -N "+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) """ #now prep the qsub command self.logger.debug('Submit %s to qsub'%tmp.name) #p = subprocess.Popen("qsub -cwd -V -b y -l h_rt=3:00:00 -N "+qsub_name+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) #qsub = "qsub -cwd -V -b y -N "+qsub_name+cl_queue+" python2.6 rapd_cluster.py "+tmp.name qsub = "qsub -cwd -V -b y -N "+qsub_name+cl_queue+" rapd.python rapd_cluster.py "+tmp.name p = subprocess.Popen(qsub, shell=True) self.logger.debug(qsub) sts = os.waitpid(p.pid, 0)[1] #A command is being received from a file elif self.mode == 'file': self.logger.debug('File has been submitted to run') #feedback self.logger.debug(self.command) #store the reply to server's address self.controller_address = tuple(self.command[-1]) #assign the command self.Assign(self.command) self.logger.debug('HERE!')
def run(self): self.logger.debug('Handler::run') self.logger.debug(self.mode) print 'run' #if we are looking at a socket connection for the incoming message if not self.mode == 'file': self.logger.debug('not running file') #read the message from the socket message = '' while not message.endswith('<rapd_end>'): data = self.conn.recv(BUFFER_SIZE) #self.logger.debug('message',str(message)) message += data time.sleep(0.001) #close the connection self.conn.close() #strip the message of its delivery tags message = message.rstrip().replace('<rapd_start>', '').replace('<rapd_end>', '') self.logger.debug('message', str(message)) #The ClusterServer is spawning processes on a central node if self.mode == 'server': self.logger.debug('running server') #strip out extra spaces and decode json command = json.loads(message) self.controller_address = tuple(command[-1]) #feedback self.logger.debug(command) #assign the command self.Assign(command) #qsub is being used to spawn commands elif self.mode == 'qsub': self.logger.debug('running qsub') self.logger.debug('Creating files for qsub submission') #The type of command b = message[:] command = json.loads(b)[0] if len(command) > 3: tag = command[:4] else: tag = command #write the command to a file tmp = tempfile.NamedTemporaryFile(mode='w', dir='./', prefix='rapd_' + tag + '-', suffix='.json', delete=False) tmp.write(message) tmp.close() # Determine which cluster queue to submit the job # (Can add other tags to run on new nodes) if tag == 'AUTO': cl_queue = ' -q index.q -pe smp 4' #cl_queue = ' -q index.q' elif tag == 'INTE': cl_queue = ' -q phase2.q' else: cl_queue = ' -q phase1.q' #For labeling the qsub job qsub_name = os.path.basename(tmp.name).replace('rapd_', '').replace( '.json', '') self.logger.debug("qsub_name", qsub_name) """ NOT GOING TO USE PROJECTS TO SPLIT RESOURCES... Not sure if priority is inherited to sub-jobs anyway?? if (self.queue): self.logger.debug('Submit %s to qsub %s'%(tmp.name,self.queue)) #Send to the new nodes in the cluster p = subprocess.Popen("qsub -cwd -V -P "+self.queue+" -b y -l h_rt=3:00:00 -N "+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) else: self.logger.debug('Submit %s to qsub'%tmp.name) p = subprocess.Popen("qsub -cwd -V -b y -l h_rt=3:00:00 -N "+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) """ #now prep the qsub command self.logger.debug('Submit %s to qsub' % tmp.name) #p = subprocess.Popen("qsub -cwd -V -b y -l h_rt=3:00:00 -N "+qsub_name+cl_queue+" python2.6 rapd_cluster.py "+tmp.name,shell=True) #qsub = "qsub -cwd -V -b y -N "+qsub_name+cl_queue+" python2.6 rapd_cluster.py "+tmp.name qsub = "qsub -cwd -V -b y -N " + qsub_name + cl_queue + " rapd.python rapd_cluster.py " + tmp.name p = subprocess.Popen(qsub, shell=True) self.logger.debug(qsub) sts = os.waitpid(p.pid, 0)[1] #A command is being received from a file elif self.mode == 'file': self.logger.debug('File has been submitted to run') #feedback self.logger.debug(self.command) #store the reply to server's address self.controller_address = tuple(self.command[-1]) #assign the command self.Assign(self.command) self.logger.debug('HERE!')
def handleRemote(image, parameters, beamline, logger=False): """Handle remote access support actions """ if logger: logger.debug('handleRemote %s %s' % (image, beamline)) # Some manipulations we might need split_image = image.split("_") image_number = int(split_image[-1].split(".")[0]) image_uuid = str(uuid.uuid4()) # CURRENT_RASTER # Is there a current raster for my beamline? Check both redis instances # Could track locally, but using expiring redis key works well current_raster_return = _RedisClient1.get("%s:current_raster" % beamline) sure_thing = 1 if (current_raster_return == None): if logger: logger.debug("Cannot find a current raster specification for %s" % beamline) return False current_raster_id = current_raster_return.split(":")[1] if logger: logger.debug("Have a current raster %s" % current_raster_id) # RASTER_SPECIFICATION current_raster_data = False # MongoDB if (not current_raster_data) and _MONGO: if logger: logger.debug("Try mongo to get the raster specification") current_raster_data = get_raster_specification(current_raster_id, logger) if logger: logger.debug("current_raster_data from MongoDB") logger.debug(current_raster_data) # Redis if not current_raster_data: if logger: logger.debug('Using redis to get the raster specification') raster_key = "image_raster_exec:%s" % current_raster_id current_raster_data = json.loads(_RedisClient1.get(raster_key)) if logger: logger.debug("Have current raster data for %s" % current_raster_id) logger.debug(current_raster_data) # Check for points data update = False points = False if (not current_raster_data.get("points", False)): if logger: logger.debug('Handling ADX_PERFRAME') points = getAdxPerframe(beamline, image, logger) update = True if logger: logger.debug("%d points retrieved" % len(points)) # Store in soon to be passed dict current_raster_data["points"] = points # Scan coordinates coords = False md2_xyz = False if (not current_raster_data.get("coords", False)): if logger: logger.debug('Handling Scan Coords') coords, md2_xyz = getScanCoords(beamline, image, current_raster_data, logger) if coords: update = True if logger: logger.debug("Scan coordinates retrieved") logger.debug(coords) # Has anything updated? if update: if logger: logger.debug("Raster specification has been updated") # Save points # DEV when redis goes away this will change if points: if logger: logger.debug("Points updated") # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data, "points", logger) # Save coords state in the current_raster_data if coords: if logger: logger.debug("Coords updated") current_raster_data["coords"] = coords # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data, "coords", logger) # Save md2_xyz if md2_xyz: if logger: logger.debug("md2_xyz updated") current_raster_data["md2_xyz"] = md2_xyz # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data, "md2_xyz", logger) # DEPRECATED # Save to redis database # Convert to JSON for portability current_raster_data.pop("_id", False) json_current_raster_data = json.dumps(current_raster_data) if logger: logger.debug("Updating image_raster_exec:%s" % current_raster_id) _RedisClient1.set("image_raster_exec:%s" % current_raster_id, json_current_raster_data) #_RedisClient2.set("image_raster_exec:%s" % current_raster_id,json_current_raster_data) if points: if logger: logger.debug("Setting points:%s" % current_raster_id) _RedisClient1.set("points:%s" % current_raster_id, json.dumps(points)) #_RedisClient2.set("points:%s"%current_raster_id,json.dumps(points)) if coords: if logger: logger.debug("Setting %s:coords" % current_raster_id) _RedisClient1.set("coords:%s" % current_raster_id, json.dumps(coords)) #_RedisClient2.set("coords:%s"%current_raster_id,json.dumps(coords)) if md2_xyz: if logger: logger.debug("Setting md2_xyz:%s" % current_raster_id) md2_xyz_json = json.dumps(md2_xyz) #_RedisClient1.set("md2_xyz:%s"%current_raster_id,md2_xyz_json) #_RedisClient2.set("md2_xyz:%s"%current_raster_id,md2_xyz_json) # END DEPRECATED # Publish specs for immediate use if logger: logger.debug("Publishing %s:new_raster_spec" % beamline) _RedisClient1.publish('%s:new_raster_spec' % beamline, json_current_raster_data) else: if logger: logger.debug("Already have all perframe positional information") # Store xyz data in result data temporarily for use current_raster_data["md2_x"] = current_raster_data["points"][image_number - 1]["x"] current_raster_data["md2_y"] = current_raster_data["points"][image_number - 1]["y"] current_raster_data["md2_z"] = current_raster_data["points"][image_number - 1]["z"] # Make sure inputs are something reasonable puck_label = current_raster_data.get('puck_label') if puck_label: puck_label = puck_label.replace(' ', '_') else: puck_label = "UNK" # Compose the target directory tdir = os.path.join( '/', current_raster_data.get('filespace'), 'users', current_raster_data.get('inst'), '_'.join( (current_raster_data.get('group'), current_raster_data.get('beamline'), str(current_raster_data.get('session_id')))), 'process', 'rasters', puck_label) if logger: logger.debug("Image will be copied to %s" % tdir) # If we are not testing, then copy the file if beamline != 'T': # If the target directory does not exist, make it if not (os.path.exists(tdir)): if logger: logger.debug("Creating directory %s" % tdir) try: os.makedirs(tdir) except: if logger: logger.debug("Error creating %s" % tdir) return False # rastersnap if (("_raster_snap" in image) or ("rastersnap_scan" in image)): if logger: logger.debug("RASTERSNAP") tfile = '_'.join( (puck_label, str(current_raster_data.get('sample')), 'line', str(current_raster_data.get('iteration')), split_image[-1])) # dfa elif (("_dfa_" in image) or ("dfa_scan" in image)): if logger: logger.debug("DFA") tfile = '_'.join( (puck_label, str(current_raster_data.get('sample')), 'grid', str(current_raster_data.get('iteration')), split_image[-1])) # ova elif ("ova_scan" in image): if logger: logger.debug("OVA") tfile = '_'.join( (current_raster_data.get('puck_label').replace(' ', '_'), str(current_raster_data.get('sample')), 'vert', str(current_raster_data.get('iteration')), split_image[-1])) # if logger: logger.debug("Target file: %s" % tfile) target_file = os.path.join(tdir, tfile) # If we are not testing, then copy the file if beamline != 'T': # Now copy the file success = copyFile(image, target_file, logger) if (not success): return False # # Tell remote access about the new image # try: # image_metadata = {"fullname": target_file, # "id": image_uuid, # "run_uuid": current_raster_id, # "type": "RUN", # Change type to 1D?? # "name": os.path.basename(target_file), # "osc_start": current_raster_data.get("omega_initial",0.0), # "osc_range": current_raster_data.get("omega_delta",0.0), # "time": current_raster_data.get("exposure",0.0), # "distance": current_raster_data.get("distance",0.0), # "transmission": current_raster_data.get("transmission",0.0), # "analysis": True, # Not sure on the usage of this, but seems important # "image_number": image_number # } # except: # if logger: # logger.debug("Error constructing image_metadata") # # if logger: # logger.debug("\nWill publish on channel %s" % target_file) # logger.debug(image_metadata) # # # # Publish the file being created - remember this is now in the user space #json_image_metadata = json.dumps(image_metadata) #_RedisClient1.publish(target_file,json_image_metadata) # Publish the DISTL results assemble = { 'raster_uuid': current_raster_id, 'image_uuid': image_uuid, 'status': 'success', 'beamline': beamline, 'crystal_image': current_raster_data.get('crystal_image', ''), 'fullname': target_file, 'image_id': current_raster_data.get('image_id', 0), 'image_number': image_number, 'md2_x': current_raster_data.get('md2_x', 0), 'md2_y': current_raster_data.get('md2_y', 0), 'md2_z': current_raster_data.get('md2_z', 0) } # Update results (parameters) with image data parameters.update(assemble) # Save DISTL analysis to MongoDB if _MONGO: update_quickanalysis_result(parameters) # For portability json_assemble = json.dumps(parameters) # DEPRECATED # Save to redis a_key = 'quickanalysis_result:%s:%s' % (current_raster_id, str(image_number)) _RedisClient1.set(a_key, json_assemble) # END DEPRECATED # Publish result over redis if logger: logger.debug("Will publish on channel %s:quickanalysis_result" % beamline) logger.debug(parameters) _RedisClient1.publish('%s:quickanalysis_result' % beamline, json_assemble) return True
def handleRemote(image,parameters,beamline,logger=False): """Handle remote access support actions """ if logger: logger.debug('handleRemote %s %s' % (image,beamline)) # Some manipulations we might need split_image = image.split("_") image_number = int(split_image[-1].split(".")[0]) image_uuid = str(uuid.uuid4()) # CURRENT_RASTER # Is there a current raster for my beamline? Check both redis instances # Could track locally, but using expiring redis key works well current_raster_return = _RedisClient1.get("%s:current_raster" % beamline) sure_thing = 1 if (current_raster_return == None): if logger: logger.debug("Cannot find a current raster specification for %s" % beamline) return False current_raster_id = current_raster_return.split(":")[1] if logger: logger.debug("Have a current raster %s" % current_raster_id) # RASTER_SPECIFICATION current_raster_data = False # MongoDB if (not current_raster_data) and _MONGO: if logger: logger.debug("Try mongo to get the raster specification") current_raster_data = get_raster_specification(current_raster_id,logger) if logger: logger.debug("current_raster_data from MongoDB") logger.debug(current_raster_data) # Redis if not current_raster_data: if logger: logger.debug('Using redis to get the raster specification') raster_key = "image_raster_exec:%s" % current_raster_id current_raster_data = json.loads(_RedisClient1.get(raster_key)) if logger: logger.debug("Have current raster data for %s" % current_raster_id) logger.debug(current_raster_data) # Check for points data update = False points = False if (not current_raster_data.get("points",False)): if logger: logger.debug('Handling ADX_PERFRAME') points = getAdxPerframe(beamline,image,logger) update = True if logger: logger.debug("%d points retrieved" % len(points)) # Store in soon to be passed dict current_raster_data["points"] = points # Scan coordinates coords = False md2_xyz = False if (not current_raster_data.get("coords",False)): if logger: logger.debug('Handling Scan Coords') coords,md2_xyz = getScanCoords(beamline,image,current_raster_data,logger) if coords: update = True if logger: logger.debug("Scan coordinates retrieved") logger.debug(coords) # Has anything updated? if update: if logger: logger.debug("Raster specification has been updated") # Save points # DEV when redis goes away this will change if points: if logger: logger.debug("Points updated") # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data,"points",logger) # Save coords state in the current_raster_data if coords: if logger: logger.debug("Coords updated") current_raster_data["coords"] = coords # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data,"coords",logger) # Save md2_xyz if md2_xyz: if logger: logger.debug("md2_xyz updated") current_raster_data["md2_xyz"] = md2_xyz # Update MongoDB? if _MONGO: update_raster_specification(current_raster_data,"md2_xyz",logger) # DEPRECATED # Save to redis database # Convert to JSON for portability current_raster_data.pop("_id",False) json_current_raster_data = json.dumps(current_raster_data) if logger: logger.debug("Updating image_raster_exec:%s" % current_raster_id) _RedisClient1.set("image_raster_exec:%s" % current_raster_id,json_current_raster_data) #_RedisClient2.set("image_raster_exec:%s" % current_raster_id,json_current_raster_data) if points: if logger: logger.debug("Setting points:%s" % current_raster_id) _RedisClient1.set("points:%s"%current_raster_id,json.dumps(points)) #_RedisClient2.set("points:%s"%current_raster_id,json.dumps(points)) if coords: if logger: logger.debug("Setting %s:coords" % current_raster_id) _RedisClient1.set("coords:%s"%current_raster_id,json.dumps(coords)) #_RedisClient2.set("coords:%s"%current_raster_id,json.dumps(coords)) if md2_xyz: if logger: logger.debug("Setting md2_xyz:%s" % current_raster_id) md2_xyz_json = json.dumps(md2_xyz) #_RedisClient1.set("md2_xyz:%s"%current_raster_id,md2_xyz_json) #_RedisClient2.set("md2_xyz:%s"%current_raster_id,md2_xyz_json) # END DEPRECATED # Publish specs for immediate use if logger: logger.debug("Publishing %s:new_raster_spec" % beamline) _RedisClient1.publish('%s:new_raster_spec' % beamline, json_current_raster_data) else: if logger: logger.debug("Already have all perframe positional information") # Store xyz data in result data temporarily for use current_raster_data["md2_x"] = current_raster_data["points"][image_number-1]["x"] current_raster_data["md2_y"] = current_raster_data["points"][image_number-1]["y"] current_raster_data["md2_z"] = current_raster_data["points"][image_number-1]["z"] # Make sure inputs are something reasonable puck_label = current_raster_data.get('puck_label') if puck_label: puck_label = puck_label.replace(' ','_') else: puck_label = "UNK" # Compose the target directory tdir = os.path.join('/', current_raster_data.get('filespace'), 'users', current_raster_data.get('inst'), '_'.join((current_raster_data.get('group'),current_raster_data.get('beamline'),str(current_raster_data.get('session_id')))), 'process', 'rasters', puck_label) if logger: logger.debug("Image will be copied to %s" % tdir) # If we are not testing, then copy the file if beamline != 'T': # If the target directory does not exist, make it if not (os.path.exists(tdir)): if logger: logger.debug("Creating directory %s" % tdir) try: os.makedirs(tdir) except: if logger: logger.debug("Error creating %s" % tdir) return False # rastersnap if (("_raster_snap" in image) or ("rastersnap_scan" in image)): if logger: logger.debug("RASTERSNAP") tfile = '_'.join((puck_label, str(current_raster_data.get('sample')), 'line', str(current_raster_data.get('iteration')), split_image[-1])) # dfa elif (("_dfa_" in image) or ("dfa_scan" in image)): if logger: logger.debug("DFA") tfile = '_'.join((puck_label, str(current_raster_data.get('sample')), 'grid', str(current_raster_data.get('iteration')), split_image[-1])) # ova elif ("ova_scan" in image): if logger: logger.debug("OVA") tfile = '_'.join((current_raster_data.get('puck_label').replace(' ','_'), str(current_raster_data.get('sample')), 'vert', str(current_raster_data.get('iteration')), split_image[-1])) # if logger: logger.debug("Target file: %s" % tfile) target_file = os.path.join(tdir,tfile) # If we are not testing, then copy the file if beamline != 'T': # Now copy the file success = copyFile(image,target_file,logger) if (not success): return False # # Tell remote access about the new image # try: # image_metadata = {"fullname": target_file, # "id": image_uuid, # "run_uuid": current_raster_id, # "type": "RUN", # Change type to 1D?? # "name": os.path.basename(target_file), # "osc_start": current_raster_data.get("omega_initial",0.0), # "osc_range": current_raster_data.get("omega_delta",0.0), # "time": current_raster_data.get("exposure",0.0), # "distance": current_raster_data.get("distance",0.0), # "transmission": current_raster_data.get("transmission",0.0), # "analysis": True, # Not sure on the usage of this, but seems important # "image_number": image_number # } # except: # if logger: # logger.debug("Error constructing image_metadata") # # if logger: # logger.debug("\nWill publish on channel %s" % target_file) # logger.debug(image_metadata) # # # # Publish the file being created - remember this is now in the user space #json_image_metadata = json.dumps(image_metadata) #_RedisClient1.publish(target_file,json_image_metadata) # Publish the DISTL results assemble = {'raster_uuid': current_raster_id, 'image_uuid': image_uuid, 'status': 'success', 'beamline': beamline, 'crystal_image': current_raster_data.get('crystal_image',''), 'fullname': target_file, 'image_id': current_raster_data.get('image_id',0), 'image_number': image_number, 'md2_x': current_raster_data.get('md2_x',0), 'md2_y': current_raster_data.get('md2_y',0), 'md2_z': current_raster_data.get('md2_z',0) } # Update results (parameters) with image data parameters.update(assemble) # Save DISTL analysis to MongoDB if _MONGO: update_quickanalysis_result(parameters) # For portability json_assemble = json.dumps(parameters) # DEPRECATED # Save to redis a_key = 'quickanalysis_result:%s:%s' % (current_raster_id,str(image_number)) _RedisClient1.set(a_key,json_assemble) # END DEPRECATED # Publish result over redis if logger: logger.debug("Will publish on channel %s:quickanalysis_result" % beamline) logger.debug(parameters) _RedisClient1.publish('%s:quickanalysis_result' % beamline,json_assemble) return True
def run(self): """The core process of the Launcher instance""" # Set up overwatcher if self.overwatch_id: self.ow_registrar = Registrar(site=self.site, ow_type="launch_manager", ow_id=self.overwatch_id) self.ow_registrar.register() # Get the initial possible jobs lists full_job_list = [ x.get('job_list') for x in self.site.LAUNCHER_SETTINGS["LAUNCHER_SPECIFICATIONS"] ] try: # This is the server portion of the code while self.running: # Get updated job list by checking which launchers are running # Reassign jobs if launcher(s) status changes if round(self.timer % TIMER, 1) == 1.0: try: # Have Registrar update status if self.overwatch_id: self.ow_registrar.update() # Check which launchers are running temp = [ l for l in full_job_list if self.redis.get("OW:" + l) ] # Determine which launcher(s) went offline offline = [ line for line in self.job_list if temp.count(line) == False ] if len(offline) > 0: # Pop waiting jobs off their job_lists and push back in RAPD_JOBS for reassignment. for _l in offline: while self.redis.llen(_l) != 0: self.redis.rpoplpush(_l, 'RAPD_JOBS') # Determine which launcher(s) came online (Also runs at startup!) online = [ line for line in temp if self.job_list.count(line) == False ] if len(online) > 0: # Pop jobs off RAPD_JOBS_WAITING and push back onto RAPD_JOBS for reassignment. while self.redis.llen('RAPD_JOBS_WAITING') != 0: self.redis.rpoplpush('RAPD_JOBS_WAITING', 'RAPD_JOBS') # Update the self.job_list self.job_list = temp except redis.exceptions.ConnectionError: if self.logger: self.logger.exception( "Remote Redis is not up. Waiting for Sentinal to switch to new host" ) time.sleep(1) # Look for a new command # This will throw a redis.exceptions.ConnectionError if redis is unreachable #command = self.redis.brpop(["RAPD_JOBS",], 5) try: while self.redis.llen("RAPD_JOBS") != 0: command = self.redis.rpop("RAPD_JOBS") # Handle the message if command: self.push_command(json.loads(command)) # Only run 1 command # self.running = False # break # sleep a little when jobs aren't coming in. time.sleep(0.2) self.timer += 0.2 except redis.exceptions.ConnectionError: if self.logger: self.logger.exception( "Remote Redis is not up. Waiting for Sentinal to switch to new host" ) time.sleep(1) except KeyboardInterrupt: self.stop()