def query_missing(self): now = time.time() log.info("Querying %i missing data entries." % len(self.missing)) for mtime in self.missing: starttime = mtime endtime = mtime + datetime.timedelta(0, 3600) results = self.query_transfers(starttime, endtime) if not results: log.warning("No transfer results found for %s." % starttime) for result in results: res_time, count, volume_mb = result res_time = float(res_time) starttime = self._timestamp_to_datetime(res_time) if now-res_time >= 3600: endtime = self._timestamp_to_datetime(res_time+3600) else: endtime = self._timestamp_to_datetime(now) if res_time > now: continue td = TransferData() td.starttime = starttime td.endtime = endtime td.count = count td.volume_mb = volume_mb self.data[starttime] = td log.debug("Successfully parsed results for %s." % starttime) self.save_cache()
def query_jobs(self): params = self.get_params() response = gracc_query_jobs(self.es, jobs_summary_index, **params) results = response.aggregations.EndTime.buckets all_results = [ (x.Records.value or x.doc_count, x.CoreHours.value, x.key / 1000) for x in results ] log.info("GRACC returned %i results for daily jobs" % len(all_results)) log.debug("Job result dump:") for count, hrs, epochtime in all_results: time_tuple = time.gmtime(epochtime) time_str = time.strftime("%Y-%m-%d %H:%M", time_tuple) log.debug("Day %s: Jobs %i, Job Hours %.2f" % (time_str, count, hrs)) count_results = [i[0] for i in all_results] hour_results = [i[1] for i in all_results] num_results = int(self.cp.get("GRACC", "days")) count_results = count_results[-num_results-1:-1] hour_results = hour_results[-num_results-1:-1] self.count_results, self.hour_results = count_results, hour_results return count_results, hour_results
def does_it_happen(prob,tick_length=const.basic_tick): """calculate random number and see whether """ #return random.random()<prob*tick_length/const.day_in_seconds now=prob*tick_length/const.basic_tick rand=random.random() log.debug(3,"does_it_happen? "+str(rand)+" "+str(now)) return rand<now
def Cancel(config, jobid): """ Cancel a job. The TERM signal is sent to allow the process to terminate gracefully within 5 seconds, followed by a KILL signal. :param str config: path to arc.conf :param str jobid: local job ID :return: ``True`` if successfully cancelled, else ``False`` :rtype: :py:obj:`bool` """ debug('----- starting forkCancel.py -----', 'fork.Cancel') configure(config) if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) info('Killing job with pid %s' % jobid, 'fork.Cancel') if not Config.remote_host: import signal try: os.kill(jobid, signal.SIGTERM) time.sleep(5) os.kill(jobid, signal.SIGKILL) except OSError: # Job already died or terminated gracefully after SIGTERM pass except: return False else: args = 'kill -s TERM %i; sleep 5; kill -s KILL %i' % (jobid, jobid) handle = execute_remote(args) debug('----- exiting forkCancel.py -----', 'fork.Cancel') return True
def debug(self, pattern, *args): """ Show debug information if DEBUG mode """ if isinstance(self, Tank): if self._selected: log.debug('%s:%s' % (self.id, pattern), *args) else: log.debug('%s:%s:%s' % (self.__class__.__name__, self.id, pattern), *args)
def download_software(self): r = requests.get(self.software.download_url, stream=True) if r.status_code == 200: directory = os.path.dirname(self.download_path) if not os.path.exists(directory): os.makedirs(directory) with open(self.download_path, 'wb') as f: for chunk in r.iter_content(1024): f.write(chunk) debug('Download finished: url [%s]' % self.software.download_url)
def misc(host, port, prefix, fields, debug=False): boot_time = psutil.boot_time() uptime = time.time() - boot_time client = statsd.StatsClient(host, port, prefix=prefix) with client.pipeline() as pipe: pipe.gauge('uptime{}'.format(fields), uptime) if debug: log.debug("uptime={}".format(uptime)) pipe.gauge('users{}'.format(fields), len(psutil.users())) pipe.gauge('processes{}'.format(fields), len(psutil.pids()))
def nonterminal(self, oitem): """ Attempts to apply the (unary) Nonterminal rule and consume oitem, returning the result if successful. """ if self.target != Item.NONTERMINAL: raise TypeError, "%s is not a nonterminal." % str(self) if oitem.target != Item.ROOT: raise TypeError, "%s is not at the root of its tree decomposition." %str(oitem) if oitem.rule.symbol != self.next_key: log.debug('symbol mismatch') return None if len(oitem.rule.rhs1.external_nodes) != len(self.next_key_edge[2]): log.debug('hyperedge type mismatch') return None nsubgraph = self.check_subgraph_overlap(oitem) if not nsubgraph: log.debug('overlap') return None nmapping = self.check_mapping_bijection_nonterminal(oitem) if not nmapping: log.debug('bijection') return None return self.__class__(self.rule, self.rule.tree_to_parent[self.tree_node], self.graph, nsubgraph, nmapping, nodelabels = self.nodelabels)
def search_variants(tumor_pileup_filename, normal_pileup_filename, cand_somatic_variant_file, cand_hetero_germline_variant_file): tumor_f = open(tumor_pileup_filename, u'r') normal_f = open(normal_pileup_filename, u'r') normal_l = pileup.PileupLine(normal_f.readline()) current_chromosome = normal_l.chromosome tumor_l = pileup.PileupLine(tumor_f.readline()) if current_chromosome != tumor_l.chromosome: raise CustomError(u"different_chromosome_at_the_first_line") line_count = 0 while True: # check how many lines have been processed line_count += 1 if line_count % settings.debug_number_of_lines == 0: log.debug(u"""processing... \ttumor: {0} \tnormal: {1}""".format(tumor_l, normal_l)) line_count = 0 try: if tumor_l.chromosome != normal_l.chromosome: if normal_l.chromosome == current_chromosome: normal_l = pileup.PileupLine(normal_f.readline()) continue else: tumor_l = pileup.PileupLine(tumor_f.readline()) continue if tumor_l.position < normal_l.position: tumor_l = pileup.PileupLine(tumor_f.readline()) continue elif tumor_l.position > normal_l.position: normal_l = pileup.PileupLine(normal_f.readline()) continue except IOError as e: log.debug(u"reach the bottom of the file. {0}".format(e)) break try: hetero_germline_results, somatic_results = get_variants_from_matched_lines(tumor_l, normal_l) [cand_somatic_variant_file.write(u"{0}\n".format(v)) for v in somatic_results] [cand_hetero_germline_variant_file.write(u"{0}\n".format(v)) for v in hetero_germline_results] except TooFewVariantReadsError: pass except LowDepthError: pass except HighDepthError: pass except CustomError as e: log.debug(u"CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_l, normal_l)) try: current_chromosome = normal_l.chromosome normal_l = pileup.PileupLine(normal_f.readline()) tumor_l = pileup.PileupLine(tumor_f.readline()) except IOError, e: log.debug(u"reach the bottom of the file. {0}".format(e)) break
def query_sites(self): fd = urllib2.urlopen(self.resource_group_url) dom = parse(fd) sites = set() for site_dom in dom.getElementsByTagName("Site"): for name_dom in site_dom.getElementsByTagName("Name"): try: sites.add(str(name_dom.firstChild.data)) except: pass log.debug("OIM returned the following sites: %s" % ", ".join(sites)) log.info("OIM has %i registered sites." % len(sites)) self.sites_results = sites return sites
def get(addr, path, debug=False): client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) client.connect(addr) client.send("GET {} HTTP/1.0\r\n\r\n".format(path)) resp_str = client.recv(65536) source = FakeSocket(resp_str) resp = HTTPResponse(source) resp.begin() if resp.status == 200: text = resp.read(len(resp_str)) data = json.loads(text) if debug: log.debug(data) return data return {}
def getcache(self): cachedresultslist=[] num_time_cach_read=0 #check if full refresh needed try: pickle_f_handle = open(self.cache_count_file_name) num_time_cach_read = cPickle.load(pickle_f_handle) pickle_f_handle.close() if(num_time_cach_read >= self.deprecate_cache_after): log.debug("Signaling read complete data from db, reads reached: <%s>" %(num_time_cach_read)) num_time_cach_read=0 else: num_time_cach_read=num_time_cach_read+1 log.debug("Incrementing number of cached reads to: <%s>" %(num_time_cach_read)) except Exception, e: log.info("Unable to find cache file: <%s>"%(self.cache_count_file_name))
def _extract(self): t = tarfile.open(self.download_path, 'r:gz') directory = os.path.dirname(self.download_path) members = t.getmembers() for member in members: name = str.replace(member.name, './', '') if name == '.': continue extension = '' for index, val in enumerate(name.split('.')): if index is 0: name = val else: extension = '.' + val member.name = './%s-release-%s%s' % (name, self.software.model, extension) t.extractall(path=directory, members=members) debug('Extracted file: %s' % self.download_path)
def save_cache(self): now = datetime.datetime.now() old_keys = [] for key in self.data.keys(): if (now - key).days >= 7: old_keys.append(key) for key in old_keys: del self.data[key] try: name, tmpname = get_files(self.cp, "transfer_data") fp = open(tmpname, 'w') pickle.dump(self.data, fp) fp.close() commit_files(name, tmpname) log.debug("Saved data to cache.") except Exception, e: log.warning("Unable to write cache; message: %s" % str(e))
def add_thing(self,thing): """I own this thing""" #CHEAT!: this feature not yet supported ## if not thing.location: ## thing.location=self.get_knowledge("location",thing.place) log.debug(3,str(self)+" "+str(thing)+" before add_thing: "+str(self.things)) #thought about owing thing name = self.thing_name(thing) if not name: self.pending_things.append(thing.id) return # desc="I own %s." % name # what=thing.as_entity() # ent = Entity(description=desc, what=what) # self.send(Operation("thought",ent)) dictlist.add_value(self.things,name,thing) log.debug(3,"\tafter: "+str(self.things))
def Scan(config, ctr_dirs): """ Query the local or remote (SSH) machine for all jobs in /[controldir]/processing. If the job has stopped running, the exit code is read and the comments file is updated. :param str config: path to arc.conf :param ctr_dirs: list of paths to control directories :type ctr_dirs: :py:obj:`list` [ :py:obj:`str` ... ] """ configure(config, set_fork) if Config.scanscriptlog: scanlogfile = arc.common.LogFile(Config.scanscriptlog) arc.common.Logger_getRootLogger().addDestination(scanlogfile) arc.common.Logger_getRootLogger().setThreshold(Config.log_threshold) jobs = get_jobs(ctr_dirs) if not jobs: return if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) execute = execute_local if not Config.remote_host else execute_remote args = 'ps -opid ' + (' '.join(jobs.keys())) if os.environ.has_key('__FORK_TEST'): handle = execute(args, env=dict(os.environ)) else: handle = execute(args) if handle.returncode != 0: debug('Got error code %i from ps -opid' % handle.returncode, 'fork.Scan') debug('Error output is:\n' + ''.join(handle.stderr), 'fork.Scan') running = [line.strip() for line in handle.stdout] for localid, job in jobs.items(): if localid in running: continue if set_exit_code_from_diag(job): job.message = MESSAGES[job.state] else: job.exitcode = -1 with open(job.lrms_done_file, 'w') as f: f.write('%i %s\n' % (job.exitcode, job.message)) write_comments(job)
def update_state(remote_ip, vm_name, action, state): """ @cmview_ci @param_post{remote_ip,string} @param_post{vm_name} @param_post{action} @param_post{state} """ try: node = Node.objects.get(address=remote_ip) except: raise CMException('node_not_found') try: vm_id = int(vm_name.split('-')[1]) user_id = int(vm_name.split('-')[2]) except: log.debug(0, "Unknown vm from hook: %s" % vm_name) raise CMException('vm_not_found') if action != "stopped": log.debug(user_id, "Not updating vm state: action is %s" % str(action)) return '' try: VM.objects.update() vm = VM.objects.get(id=vm_id) except: log.error(user_id, 'Cannot find vm in database!') raise CMException('vm_not_found') if not vm.state in [vm_states['running ctx'], vm_states['running']]: log.error(user_id, 'VM is not running!') raise CMException('vm_not_running') if vm.state == vm_states['restart']: raise CMException('vm_restart') thread = VMThread(vm, 'delete') thread.start() return ''
def query_ce_se(self): log.debug("Querying the following MyOSG URL: %s" % \ self.resource_group_url) fd = urllib2.urlopen(self.resource_group_url) dom = parse(fd) ses = set() ces = set() for service_dom in dom.getElementsByTagName("Service"): service_type = None for name_dom in service_dom.getElementsByTagName("Name"): try: service_type = str(name_dom.firstChild.data).strip() except: pass uri = None for uri_dom in service_dom.getElementsByTagName("ServiceUri"): try: uri = str(uri_dom.firstChild.data).strip() except: pass if uri and service_type: if service_type == 'SRMv2': ses.add(uri) elif service_type == 'CE': ces.add(uri) log.debug("OIM returned the following CEs: %s." % ", ".join(ces)) log.debug("OIM returned the following SEs: %s." % ", ".join(ses)) log.info("OIM returned %i CEs and %i SEs" % (len(ces), len(ses))) self.ces_results, self.ses_results = ces, ses return len(ces), len(ses)
def load_cached(self): try: data = pickle.load(open(self.cp.get("Filenames", "transfer_data") \ % {'uid': euid}, "r")) # Verify we didn't get useless data for time, tdata in data.items(): assert isinstance(time, datetime.datetime) assert isinstance(tdata, TransferData) assert isinstance(tdata.starttime, datetime.datetime) assert isinstance(tdata.endtime, datetime.datetime) assert tdata.count != None assert tdata.volume_mb != None assert tdata.starttime != None self.data = data log.info("Successfully loaded transfer data from cache; %i" \ " cache entries." % len(data)) remove_data = [] now = globals()['time'].time() now_dt = datetime.datetime.now() for time, tdata in data.items(): if not hasattr(tdata, 'createtime') or not tdata.createtime: log.debug("Ignoring cached data from %s as it has no " \ "create time info." % time) remove_data.append(time) continue if now - tdata.createtime > 3600: log.debug("Ignoring cached data from %s as it is over " \ "an hour old." % time) remove_data.append(time) age_starttime = now_dt - tdata.starttime age_starttime = age_starttime.days*86400 + age_starttime.seconds if (now - tdata.createtime > 1800) and (age_starttime <= 12*3600): log.debug("Ignoring cached data from %s as it is over " \ "30 minutes old and is for a recent interval." % \ time) remove_data.append(time) for time in remove_data: del self.data[time] except Exception, e: log.warning("Unable to load cache; it may not exist. Error: %s" % \ str(e))
def run_docker(address, interval, host, port, debug=False): prev_cpu, prev_system = {}, {} prev_tx_bytes, prev_rx_bytes, prev_timer = {}, {}, {} client = statsd.StatsClient(host, port) MEM_USAGE = jmespath.compile('memory_stats.usage') MEM_LIMIT = jmespath.compile('memory_stats.limit') TOTAL_USAGE = jmespath.compile('cpu_stats.cpu_usage.total_usage') SYSTEM_USAGE = jmespath.compile('cpu_stats.system_cpu_usage') NUM_CPUS = jmespath.compile('length(cpu_stats.cpu_usage.percpu_usage)') TX_BYTES = jmespath.compile('networks.eth0.tx_bytes') # TODO: Always eth0??? (likely not...) RX_BYTES = jmespath.compile('networks.eth0.rx_bytes') try: while True: with client.pipeline() as pipe: start = time.time() containers = get(address, '/containers/json?all=1', debug) for container in containers: name = container.get('Names')[0].strip('/') status = container.get('Status') id_ = container.get('Id') log.debug("{}: {}".format(name, status)) stats = get(address, '/containers/{}/stats?stream=0'.format(id_), debug) # Very slow call... mem_usage = MEM_USAGE.search(stats) or 0 mem_limit = MEM_LIMIT.search(stats) or 1 mem_percent = 100.0 * (mem_usage / mem_limit) if mem_limit > 0 else 0 if debug: log.debug("{}: Mem: {:,} {:,} {}%".format(name, mem_usage, mem_limit, mem_percent)) pipe.gauge('system.memory.virtual.percent,service={}'.format(name), mem_percent) # http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage cpu_percent = 0 total_usage = TOTAL_USAGE.search(stats) or 0 cpu_delta = total_usage - prev_cpu.get(name, 0) system_usage = SYSTEM_USAGE.search(stats) or 0 system_delta = system_usage - prev_system.get(name, 0) num_cpus = NUM_CPUS.search(stats) or 1 if system_delta > 0 and cpu_delta > 0: cpu_percent = (cpu_delta / system_delta) * num_cpus * 100.0 if debug: log.debug("{}: Cpu: {}, {}: {}%".format(name, cpu_delta, system_delta, cpu_percent)) prev_cpu[name], prev_system[name] = total_usage, system_usage pipe.gauge('system.cpu.percent,service={}'.format(name), cpu_percent) tx_bytes = TX_BYTES.search(stats) or 0 rx_bytes = RX_BYTES.search(stats) or 0 tx = tx_bytes - prev_tx_bytes.setdefault(name, 0) # B rx = rx_bytes - prev_rx_bytes.setdefault(name, 0) timer = time.time() elapsed = timer - prev_timer.get(name, 0) # s prev_timer[name] = timer tx_rate = tx / elapsed if tx > 0 and elapsed > 0 else 0 # B/s rx_rate = rx / elapsed if rx > 0 and elapsed > 0 else 0 pipe.gauge('system.network.send_rate,service={}'.format(name), tx_rate) pipe.gauge('system.network.recv_rate,service={}'.format(name), rx_rate) if debug: log.debug("{}: Net Tx: {:,} -> {:,} ({}B/s)".format(name, tx_bytes, prev_tx_bytes[name], tx_rate)) log.debug("{}: Net Rx: {:,} -> {:,} ({}B/s)".format(name, rx_bytes, prev_rx_bytes[name], rx_rate)) prev_tx_bytes[name] = tx_bytes prev_rx_bytes[name] = rx_bytes pipe.gauge('system.disk.root.percent,service={}'.format(name), 0) elapsed = time.time() - start log.debug("docker: {}ms".format(int(elapsed * 1000))) time.sleep(interval - elapsed) except Exception as e: log.exception(e)
def run_docker(address, interval, host, port, debug=False): prev_cpu, prev_system = {}, {} prev_tx_bytes, prev_rx_bytes, prev_timer = {}, {}, {} client = statsd.StatsClient(host, port) MEM_USAGE = jmespath.compile('memory_stats.usage') MEM_LIMIT = jmespath.compile('memory_stats.limit') TOTAL_USAGE = jmespath.compile('cpu_stats.cpu_usage.total_usage') SYSTEM_USAGE = jmespath.compile('cpu_stats.system_cpu_usage') NUM_CPUS = jmespath.compile('length(cpu_stats.cpu_usage.percpu_usage)') TX_BYTES = jmespath.compile( 'networks.eth0.tx_bytes') # TODO: Always eth0??? (likely not...) RX_BYTES = jmespath.compile('networks.eth0.rx_bytes') try: while True: with client.pipeline() as pipe: start = time.time() containers = get(address, '/containers/json?all=1', debug) for container in containers: name = container.get('Names')[0].strip('/') status = container.get('Status') id_ = container.get('Id') log.debug("{}: {}".format(name, status)) stats = get(address, '/containers/{}/stats?stream=0'.format(id_), debug) # Very slow call... mem_usage = MEM_USAGE.search(stats) or 0 mem_limit = MEM_LIMIT.search(stats) or 1 mem_percent = 100.0 * (mem_usage / mem_limit) if mem_limit > 0 else 0 if debug: log.debug("{}: Mem: {:,} {:,} {}%".format( name, mem_usage, mem_limit, mem_percent)) pipe.gauge( 'system.memory.virtual.percent,service={}'.format( name), mem_percent) # http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage cpu_percent = 0 total_usage = TOTAL_USAGE.search(stats) or 0 cpu_delta = total_usage - prev_cpu.get(name, 0) system_usage = SYSTEM_USAGE.search(stats) or 0 system_delta = system_usage - prev_system.get(name, 0) num_cpus = NUM_CPUS.search(stats) or 1 if system_delta > 0 and cpu_delta > 0: cpu_percent = (cpu_delta / system_delta) * num_cpus * 100.0 if debug: log.debug("{}: Cpu: {}, {}: {}%".format( name, cpu_delta, system_delta, cpu_percent)) prev_cpu[name], prev_system[ name] = total_usage, system_usage pipe.gauge('system.cpu.percent,service={}'.format(name), cpu_percent) tx_bytes = TX_BYTES.search(stats) or 0 rx_bytes = RX_BYTES.search(stats) or 0 tx = tx_bytes - prev_tx_bytes.setdefault(name, 0) # B rx = rx_bytes - prev_rx_bytes.setdefault(name, 0) timer = time.time() elapsed = timer - prev_timer.get(name, 0) # s prev_timer[name] = timer tx_rate = tx / elapsed if tx > 0 and elapsed > 0 else 0 # B/s rx_rate = rx / elapsed if rx > 0 and elapsed > 0 else 0 pipe.gauge( 'system.network.send_rate,service={}'.format(name), tx_rate) pipe.gauge( 'system.network.recv_rate,service={}'.format(name), rx_rate) if debug: log.debug("{}: Net Tx: {:,} -> {:,} ({}B/s)".format( name, tx_bytes, prev_tx_bytes[name], tx_rate)) log.debug("{}: Net Rx: {:,} -> {:,} ({}B/s)".format( name, rx_bytes, prev_rx_bytes[name], rx_rate)) prev_tx_bytes[name] = tx_bytes prev_rx_bytes[name] = rx_bytes pipe.gauge( 'system.disk.root.percent,service={}'.format(name), 0) elapsed = time.time() - start log.debug("docker: {}ms".format(int(elapsed * 1000))) time.sleep(interval - elapsed) except Exception as e: log.exception(e)
def update(self, collection, query, record): """update""" log.debug("mongodb %s(qurey=%s) update: %s" % (collection, query, record)) self.__client[collection].update_many(query, {"$set": record})
def log_debug(self, info): log.debug(info)
def cluster(self, reads, save_input_path=None, output_dir=None, cached_output=None): import shutil def get_seq_obj(output): seq_mapping = dict([(x.id, x) for x in reads]) output_seqs = map(lambda x: [seq_mapping[y] for y in x], output) return output_seqs try: ## reads are a path self.reads = dict([(x.id, x) for x in SeqIO.parse(reads, 'fasta')]) except AttributeError as e: ## reads is a list of SeqRecord-like objects self.reads = dict([(x.id, x) for x in reads]) finally: if cached_output and self.distance_calculator.matrix: self.input_matrix, num_edges, mapping = self.convert_adjacency_matrix( self.distance_calculator.matrix) mapping = self.reverse_mappings(mapping, self.reads) return [ Cluster(x, cluster_id=i, clustering_tool=self) for i, x in enumerate( sorted(self.parse_dsf_output(cached_output, mapping), key=lambda x: len(x), reverse=True)) ] self.distance_calculator.generate_distances(reads) try: reads.close() except AttributeError as e: pass self.input_matrix, num_edges, mapping = self.convert_adjacency_matrix( self.distance_calculator.matrix) mapping = self.reverse_mappings(mapping, self.reads) log.debug('Number of edges in input graph:' + str(num_edges)) # write adjacency to file for dsf input def writer(matrix, num_edges): yield '{} {} 001\n'.format(len(matrix), num_edges) for neighbours in matrix: line = ' '.join([ ' '.join(map(str, (n + 1, w))) for n, w in sorted(neighbours, key=lambda x: x[0]) ]) yield '{}\n'.format(line) matrix_output_iterator = writer(self.input_matrix, num_edges) in_file = tempfile.NamedTemporaryFile(delete=True) try: if save_input_path: in_file = open(save_input_path, 'wb') except IOError as e: in_file = tempfile.NamedTemporaryFile(delete=True) log.warn( 'Provided DSF input matrix write path not valid, using temporary file' ) log.info('Saving dsf input file to'.format(in_file.name)) in_file.writelines(matrix_output_iterator) in_file.flush() # check provided output_dir is valid if output_dir: if not os.path.exists(output_dir): log.warn( 'Provied DSF output directory path not valid, using temporary directory' ) output_dir = None else: temp_dir = None # make temp output dir if no valid output dir provided if not output_dir: temp_dir = tempfile.mkdtemp() output_dir = temp_dir saved_umask = os.umask( 0077) # Ensure the file is read/write by the creator only # run DSF try: output = self.run(self.src, self.params, in_file.name, output_dir, mapping) # run dsf except Exception as e: # This is just so the temp files get deleted in the case some previous unhandled exception gets raised raise e finally: if temp_dir: os.umask(saved_umask) shutil.rmtree(temp_dir) in_file.close() ## generate instances of cluster_class.Cluster as result output = [ Cluster(x, cluster_id=i, clustering_tool=self) for i, x in enumerate( sorted(output, key=lambda x: len(x), reverse=True)) ] return output
def do_run(args): ctgriddata = None if hasattr(args,"rass_data"): rass_data = args.rass_data else: rass_data = RASSData(root_folder=args.root_folder) ################################################################ # Wczytuję opcje z folderu "input" ################################################################ options = default_options() cfname = rass_data.input("config.json") if os.path.isfile(cfname): log.info("Reading options from file: %s" % cfname) with open(cfname) as options_file: options.update(json.load(options_file)) ################################################################ # Przesłaniam opcje za pomocą pliku przekazanego za pomocą argumentów linii komend ################################################################ for i in range(len(argv)): if "options" == argv[i]: fname = "%s" % (argv[i + 1]) log.info("Reading options from file: %s" % fname) with open(fname) as options_file: options.update(json.load(options_file)) dicomutils.DEBUG_LEVEL = options["debug_level"] ################################################################ # Szukam plików DICOM w podkatalogu "input"/dicom ################################################################ rtss, plan, ctlist, doseslist = dicomutils.find_ct_rs_rp_dicom(rass_data.input("dicom")) if rtss is None or plan is None: raise Exception(f"No RS.* or rtss.* file in {rass_data.input('dicom')}") ################################################################ # Wczytuję pliki DICOM z informacjami o strukturach (ROIach) # oraz plan ################################################################ rtss = dicom.read_file(rtss) plan = dicom.read_file(plan) treatment_name = '-'.join(plan.PatientID.split('^')) log.info('Name: ' + treatment_name) ################################################################ # Wczytuję dane CT używając VTK ################################################################ from ct import CTVolumeDataReader reader = CTVolumeDataReader(rass_data.input("dicom"), ctfiles=ctlist) ctVolumeData = reader.read() ctData = ctVolumeData.getCTDataAsNumpyArray() if len(ctlist) > 0: ct = dicom.read_file(ctlist[0]) ctgriddata = list(map(float, ( ct.ImagePositionPatient[0], ct.ImagePositionPatient[1], ct.PixelSpacing[0], ct.PixelSpacing[1], ct.Columns, ct.Rows))) else: ctgriddata = None ################################################################ # reading doses information for beams from DICOM ################################################################ beams = [dicom.read_file(f) for f in doseslist] ################################################################## # Wczytuję dawki z poszczególnych wiązek (beams) ################################################################## beamDoses = {} totalDoses = None totalDosesFile = None doseScaling = None singleBeam = False for beam in beams: doseScaling = float(beam.DoseGridScaling) try: bn = int(beam.ReferencedRTPlanSequence[0].ReferencedFractionGroupSequence[0].ReferencedBeamSequence[0].ReferencedBeamNumber) except: print("Semething wrong went...") if totalDoses is None: singleBeam = True totalDoses = beam.pixel_array.copy() totalDosesFile = beam.filename continue beamDoses[bn] = beam.pixel_array if doseScaling is not None and float(beam.DoseGridScaling) != doseScaling: log.warning('Strange data: DoseGridScaling is not same all beams!') log.info(f"Got doses data for beam number {bn}") ################################################################## # Sumuję dawki z poszczególnych wiązek (beams) do dawki całkowitej ################################################################## if not singleBeam: print(beamDoses) bns = list(beamDoses.keys()) totalDoses = beamDoses[bns[0]].copy() for i in range(1, len(bns)): log.info(f"Adding doses from beam {i}") totalDoses += beamDoses[bns[i]] totalDoses = np.array(totalDoses, dtype=np.float32) log.info("Read doses for %d beams" % len(beamDoses)) minDose = np.min(totalDoses) averageDose = np.average(totalDoses) maxDose = np.max(totalDoses) if totalDosesFile is None: log.info('Total doses calculated as sum of beam doses (min dose=%f, average dose=%f, max dose=%f, doseScaling=%f)' % ( minDose, averageDose, maxDose, doseScaling)) else: log.info('Got total doses from file %s (min dose=%f, average dose=%f, max dose = %f, doseScaling=%f)' % ( totalDosesFile, minDose, averageDose, maxDose, doseScaling)) # To są informacje o siatce planowania wyciete z pierwszej wiązki tBeam = beams[0] kmax = tBeam.Columns # x? jmax = tBeam.Rows # y? imax = len(tBeam.GridFrameOffsetVector) # z xbase = float(tBeam.ImagePositionPatient[0]) * SCALE ybase = float(tBeam.ImagePositionPatient[1]) * SCALE zbase = float(tBeam.ImagePositionPatient[2]) * SCALE dx = float(tBeam.PixelSpacing[0]) * SCALE dy = float(tBeam.PixelSpacing[1]) * SCALE zoffsets = list(map(float, tBeam.GridFrameOffsetVector)) for i in range(len(zoffsets)): zoffsets[i] *= SCALE dz = zoffsets[1] - zoffsets[0] dv = dx * dy * dz log.info('Planning grid: %d x %d x %d in [%g:%g]x[%g:%g]x[%g:%g] dx,dy,dz=%g,%g,%g -> dv=%g' % ( kmax, jmax, imax, xbase, xbase + kmax * dx, ybase, ybase + jmax * dy, zbase + zoffsets[0], zbase + zoffsets[-1], dx, dy, dz, dv)) planGridInfo = {'ixmax': kmax, 'iymax': jmax, 'izmax': imax, 'xorig': xbase, 'yorig': ybase, 'zorig': zbase, 'dx': dx, 'dy': dy, 'dz': dz, 'minDose': minDose, 'avgDose': averageDose, 'maxDose': maxDose, 'doseScaling': doseScaling } #################################################### # Analiza ROIów #################################################### myROIs = [] idxROIBody = -1 for i in range(0, len(rtss.StructureSetROISequence)): roiName = rtss.StructureSetROISequence[i].ROIName log.info(f"Reading contours for {roiName} from DICOM") contours = dicomutils.findContours(rtss, rtss.StructureSetROISequence[i].ROINumber) if len(contours) > 1: r = MyRoi(contours, roiName, float(tBeam.PixelSpacing[0]) / 1000.0) myROIs.append(r) if ("body" in roiName.lower() or "skin" in roiName.lower() or "outline" in roiName.lower()) and (idxROIBody == -1): idxROIBody = i log.info("Found ROI body (or skin): idx = %d" % idxROIBody) if idxROIBody == -1: raise Exception("The structure file does not contain any structure with 'body', 'outline' or 'skin' in the name.") ########################################################################## # Mark ROIs or read from cache (cache is a file in a working # directory, separate file for each ROI, # the filename pattern is: "%s_%s.markscache" % (treatment_name, ROIName) ########################################################################## roi_marks = np.zeros((imax, jmax, kmax), dtype=np.int64) for r in range(0, len(myROIs)): fcache = rass_data.processing("%s_%s.markscache" % (treatment_name, myROIs[r].name)) if myROIs[r].read_marks(fcache, roi_marks): log.info("Read marking voxels for %s from cache" % myROIs[r].name) myROIs[r].countVoxels(roi_marks, 2 ** r) else: log.info("Marking voxels for %s" % myROIs[r].name) log.debug("CTGRID DATA %s" % list(ctgriddata)) myROIs[r].mark(xbase / SCALE, ybase / SCALE, dx / SCALE, dy / SCALE, kmax, jmax, imax, np.linspace(zbase, zbase + (imax - 1) * dz, imax) / SCALE, roi_marks, 2 ** r, ctgriddata=ctgriddata) myROIs[r].save_marks(fcache, roi_marks, 2 ** r) for r in range(len(myROIs)): log.info("Statistics for %20s: ID=%8d, %7d voxels, vol=%8.1f discrete vol=%8.1f [cm3]" % ( myROIs[r].name, 2 ** r, myROIs[r].count, myROIs[r].volume / 1000., myROIs[r].count * dv / SCALE / SCALE / SCALE / 1000.0)) # mam wczytane CT - ctData # mam wczytane Dawki - totalDoses (wspolrzedne siatki planowania) # mam informacje o rojach - roi_marks (współrzędne siatki planowania) # Teraz trzeba przeskalować CT i pozapisywać dane i będzie z głowy... plan_origin = (xbase, ybase, zbase) plan_dimensions = (kmax, jmax, imax) plan_spacing = (dx, dy, dz) ctOnPlanningGrid = ctVolumeData.approximateCTOnPlanGrid( plan_origin, plan_spacing, plan_dimensions ) ## zapisuję do plików VTI npar = ctOnPlanningGrid if not skip_vti: VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, npar, rass_data.output("approximated_ct")) VolumeData.saveVolumeGridToFileAsLong(plan_spacing, plan_dimensions, plan_origin, roi_marks, rass_data.output("roi_marks")) for r in range(0, len(myROIs)): d = np.array(np.bitwise_and(roi_marks, (2 ** r)) / (2 ** r), dtype=np.float32) log.debug(f"ROI: {myROIs[r].name}[{2 ** r}].size() = {np.sum(d)}") log.info(f"Saving roi marks for {myROIs[r].name} to {rass_data.output(f'roi_marks_{myROIs[r].name}')}.vti file ...") VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, d, rass_data.output(f"roi_marks_{myROIs[r].name}")) VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, totalDoses, rass_data.output("total_doses")) ## zapisuję do plików ndarray from bdfileutils import save_ndarray, read_ndarray ctOnPlanningGrid = np.reshape(ctOnPlanningGrid, (imax, jmax, kmax)) save_ndarray(rass_data.output("approximated_ct.nparray"),ctOnPlanningGrid) roi_marks = np.reshape(roi_marks, (imax, jmax, kmax)) save_ndarray(rass_data.output("roi_marks.nparray"),roi_marks) for r in range(0, len(myROIs)): d = np.array(np.bitwise_and(roi_marks, (2 ** r)) / (2 ** r), dtype=np.int32) d = np.reshape(d, (imax, jmax, kmax)) save_ndarray(rass_data.output(f"roi_marks_{myROIs[r].name}.nparray"), d) totalDoses = np.reshape(totalDoses, (imax, jmax, kmax)) save_ndarray(rass_data.output("total_doses.nparray"),totalDoses) with open(rass_data.output("roi_mapping.txt"),"w") as f: for i in range(len(myROIs)): f.write(f"{myROIs[i].name}:{2 ** i}\n")
def parse(self, graph): """ Parses the given graph with the provided grammar. """ # This function is very similar to its counterpart in the regular # (non-tree-decomposing) parser. Read the comments there to understand how it # works. start_time = time.clock() log.chatter('parse...') # ensure that the input graph has its shortest-path table precomputed graph.compute_fw_table() chart = ddict(set) # TODO command line option to switch rule filtering on/off pgrammar = [ self.grammar[r] for r in self.grammar.reachable_rules(graph, None) ] queue = deque() pending = set() attempted = set() visited = set() terminal_lookup = ddict(set) passive_item_lookup = ddict(set) tree_node_lookup = ddict(set) passive_item_rev_lookup = ddict(set) tree_node_rev_lookup = ddict(set) for edge in graph.triples(nodelabels=self.nodelabels): terminal_lookup[edge[1]].add(edge) for rule in pgrammar: for leaf in rule.tree_leaves: axiom = self.item_class(rule, leaf, graph, nodelabels=self.nodelabels) queue.append(axiom) pending.add(axiom) assert leaf not in rule.tree_to_edge success = False while queue: item = queue.popleft() pending.remove(item) visited.add(item) log.debug('handling', item, item.subgraph) if item.target == Item.NONE: log.debug(' none') tree_node_lookup[item.self_key].add(item) for ritem in tree_node_rev_lookup[item.self_key]: if ritem not in pending: queue.append(ritem) pending.add(ritem) elif item.target == Item.ROOT: log.debug(' root') if self.is_goal(item): chart['START'].add((item, )) success = True log.debug("success!") passive_item_lookup[item.self_key].add(item) for ritem in passive_item_rev_lookup[item.self_key]: if ritem not in pending: log.debug(' retrieving', ritem) queue.append(ritem) pending.add(ritem) elif item.target == Item.TERMINAL: log.debug(' terminal') new_items = [ item.terminal(edge) for edge in terminal_lookup[item.next_key] ] new_items = [i for i in new_items if i] for nitem in new_items: chart[nitem].add((item, )) if nitem not in pending and nitem not in visited: log.debug(' new item!', nitem) queue.append(nitem) pending.add(nitem) else: if item.target == Item.BINARY: log.debug(' binary') rev_lookup = tree_node_rev_lookup lookup = tree_node_lookup action = self.item_class.binary elif item.target == Item.NONTERMINAL: log.debug(' nonterminal') rev_lookup = passive_item_rev_lookup lookup = passive_item_lookup action = self.item_class.nonterminal else: assert False rev_lookup[item.next_key].add(item) for oitem in lookup[item.next_key]: if (item, oitem) in attempted: continue attempted.add((item, oitem)) log.debug(' try', oitem, oitem.subgraph) nitem = action(item, oitem) if not nitem: continue log.debug(' new item!', nitem) chart[nitem].add((item, oitem)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) if success: log.chatter(' success!') etime = time.clock() - start_time log.chatter('done in %.2fs' % etime) return chart
def Submit(config, jobdesc): """ Submits an ATLAS job to the ScGrid host specified in arc.conf. This method executes the required RunTimeEnvironment scripts and assembles the bash job script. The job script is written to file and submitted with SCEAPI. :param str config: path to arc.conf :param jobdesc: job description object :type jobdesc: :py:class:`arc.JobDescription` :return: local job ID if successfully submitted, else ``None`` :rtype: :py:obj:`str` """ import fcntl # Allow only one submit at the same time _lock = open("/tmp/sceapi-submit-job.lock", "a") fcntl.flock(_lock, fcntl.LOCK_EX) configure(config, set_sceapi) client = setup_api() validate_attributes(jobdesc) # Run RTE stage0 debug("----- starting sceapiSubmitter.py -----", "sceapi.Submit") rel = re.compile(r"APPS/HEP/ATLAS-(?P<release>[\d\.]+-[\w_-]+)") release = None for rte in jobdesc.Resources.RunTimeEnvironment.getSoftwareList(): match = rel.match(str(rte)) if match: release = match.groupdict()["release"] break if not release: raise ArcError("ATLAS release not specified", "sceapi.Submit") # Create job dict jobJSDL = assemble_dict(jobdesc, release) args = jobJSDL.pop("arguments") input_dict = get_input_dict(jobdesc, args) debug("SCEAPI jobname: %s" % jobdesc.Identification.JobName, "sceapi.Submit") debug("SCEAPI job dict built", "sceapi.Submit") debug("----------------- BEGIN job dict -----", "sceapi.Submit") for key, val in jobJSDL.items(): debug("%s : %s" % (key, val), "sceapi.Submit") debug("----------------- END job dict -----", "sceapi.Submit") ####################################### # Submit the job ###################################### directory = jobdesc.OtherAttributes["joboption;directory"] debug("session directory: %s" % directory, "sceapi.Submit") resp = client.submitJSON(jobJSDL) handle = None try: handle = json.loads(resp, "utf8") except: raise ArcError("SCEAPI client response:\n%s" % str(resp), "sceapi.Submit") failure = "" if handle["status_code"] == 0: jobid = handle["gidujid"]["ujid"] gid = handle["gidujid"]["gid"] upload_tries = 0 ret_code = -1 while upload_tries < 5: resp_text = client.putfiles(gid, input_dict) try: ret_code = json.loads(resp_text, "utf8")["status_code"] assert ret_code == 0 break except AssertionError: sleep(2) upload_tries += 1 except: raise ArcError("SCEAPI client response:\n%s" % str(resp_text), "sceapi") if ret_code == 0: if json.loads(client.run(jobid), "utf8")["status_code"] == 0: debug("job submitted successfully!", "sceapi.Submit") debug("local job id: %s" % jobid, "sceapi.Submit") debug("----- exiting sceapiSubmitter.py -----", "sceapi.Submit") return jobid failure = "Start job query failed." else: failure = "Failed to upload input files." else: failure = "Status code %i: %s" % (handle["status_code"], translate(handle["status_reason"])) debug("job *NOT* submitted successfully!", "sceapi.Submit") if failure: debug(failure.encode("utf-8"), "sceapi.Submit") debug("----- exiting sceapiSubmitter.py -----", "sceapi.Submit")
def log_debug(self, info): if self.log_switch: log.debug(info)
def PreProcessing(pre_option): argv = pre_option['img_PATH'] pre_img_mode = pre_option['pre_img_mode'] after_img_mode = pre_option['after_img_mode'] ############################################## ############################################### ###############전역 변수 설정################### ############################################## ############MINST_SIZE####################### MNIST_IMAGE_FORMAT_SIZE = pre_option['MNIST_IMAGE_FORMAT_SIZE'] ############모폴리지########################## MORPH_KERNEL_SIZE = pre_option['MORPH_KERNEL_SIZE'] # 모폴로지 커널 사이즈 morph_kernel = np.ones((MORPH_KERNEL_SIZE, MORPH_KERNEL_SIZE), np.uint8) # 모폴리지처리용 커널 선언 ###########스레숄드########################## MIN_THRESH = pre_option['MIN_THRESH'] # 스레숄드 연산에 사용될 최소값 MAX_THRESH = pre_option['MAX_THRESH'] # 스레숄드 연산에 사용될 최대값 ########적응형스레숄드######################### ADPT_THRESH = pre_option['ADPT_THRESH'] # adaptiveThreshold에 의해 계산된 문턱값과 # thresholdType에 의해 픽셀에 적용될 최대값 ADPT_BLOCKSIZE = pre_option['ADPT_BLOCKSIZE'] WEIGHTED_C = pre_option['WEIGHTED_C'] #############CANNY########################## MIN_CANNY = pre_option['MIN_CANNY'] # MIN_CANNY 이하에 포함된 가장자리에서 제외 MAX_CANNY = pre_option['MAX_CANNY'] # MAX_CANNY 이상에 포함된 가장자리는 가장자리로 간주 SOBEL_KERNEL_SIZE = pre_option[ 'SOBEL_KERNEL_SIZE'] # Canny에서의 커널 크기 / Sobel마스크의 Aperture Size를 의미 # == apertureSize ###########BLUR########################### GAUSSIAN_KERNEL_SIZE = pre_option[ 'GAUSSIAN_KERNEL_SIZE'] # 가우시안블러의 커널 크기 / 보통 5를 사용 ##########erosion########################## EROSION_ITER1 = pre_option['EROSION_ITER1'] # erosion 반복횟수 EROSION_ITER2 = pre_option['EROSION_ITER2'] EROSION_ITER3 = pre_option['EROSION_ITER3'] EROSION_ITER4 = pre_option['EROSION_ITER4'] EROSION_ITER5 = pre_option['EROSION_ITER5'] EROSION_KERNEL_SIZE = pre_option['EROSION_KERNEL_SIZE'] # erosion 커널사이즈 erosion_kernel = np.ones((EROSION_KERNEL_SIZE, EROSION_KERNEL_SIZE), np.uint8) ##########dilation######################### DILATION_ITER1 = pre_option['DILATION_ITER1'] # dilation 반복횟수 DILATION_ITER2 = pre_option['DILATION_ITER2'] DILATION_ITER3 = pre_option['DILATION_ITER3'] DILATION_ITER4 = pre_option['DILATION_ITER4'] DILATION_ITER5 = pre_option['DILATION_ITER5'] DILATION_KERNEL_SIZE = pre_option['DILATION_KERNEL_SIZE'] # dilation 커널사이즈 dilation_kernel = np.ones((DILATION_KERNEL_SIZE, DILATION_KERNEL_SIZE), np.uint8) ########################################### MARGIN_FOR_SLICEDIMG = pre_option['MARGIN_FOR_SLICEDIMG'] ###################################################### ###################전처리 함수 영역##################### ###################################################### def Gray(img_param): gray = cv2.cvtColor(img_param, cv2.COLOR_BGR2GRAY) return gray def binary_Threshold(img_param): ret, dst = cv2.threshold(img_param, MIN_THRESH, MAX_THRESH, cv2.THRESH_BINARY) return dst def Blur(img_param): blur = cv2.GaussianBlur(img_param, (GAUSSIAN_KERNEL_SIZE, GAUSSIAN_KERNEL_SIZE), 0) return blur def morph_GRADIENT(img_param): morph_G = cv2.morphologyEx(img_param, cv2.MORPH_GRADIENT, morph_kernel) return morph_G def adaptive_Threshold(img_param): adapt_th = cv2.adaptiveThreshold(img_param, ADPT_THRESH, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, ADPT_BLOCKSIZE, WEIGHTED_C) # cv2.ADAPTIVE_THRESH_MEAN_C == 0 # cv2.ADAPTIVE_THRESH_GAUSSIAN_C == 1 return adapt_th def morph_CLOSE(img_param): morph_C = cv2.morphologyEx(img_param, cv2.MORPH_CLOSE, morph_kernel) return morph_C def Canny(img_param): edges = cv2.Canny(img_param, MIN_CANNY, MAX_CANNY, apertureSize=SOBEL_KERNEL_SIZE) return edges def Erosion(img_param, iter): erode = cv2.erode(img_param, erosion_kernel, iterations=iter) return erode def Dilatation(img_param, iter): dil = cv2.dilate(img_param, dilation_kernel, iter) return dil ################################################################### ##################convexhull & slicing IMG & resizing############## ################################################################### ################################################################### def Slicing_Resizing(img_param, pre_mode): # contours, hierarchy = cv2.findContours(img_param, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) c = 0 for cnt in contours: # x, y, w, h = cv2.boundingRect(cnt) c = c + 1 hull = cv2.convexHull(cnt) convexHull = cv2.drawContours(img_param, [hull], 0, (125, 125, 125), thickness=-1) n_contours, n_hierarchy = cv2.findContours(convexHull, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) n_c = 0 rects = [] im_w = img_param.shape[1] for cnt in n_contours: x, y, w, h = cv2.boundingRect(cnt) blocking = cv2.drawContours(img_param, n_contours, -1, (0, 0, 255), 1) n_c = n_c + 1 y2 = round(y / 10) * 10 index = y2 * im_w + x rects.append((index, x, y, w, h)) rects = sorted(rects, key=lambda x: x[0]) for i, r in enumerate(rects): index, x, y, w, h = r if pre_mode == 'bt': sliced_img = bt[y:y + h, x:x + w] elif pre_mode == 'bt_dil1' or pre_mode == 'imgmodel1': sliced_img = bt_dil1[y:y + h, x:x + w] elif pre_mode == 'bt_dil1_ero2' or pre_mode == 'imgmodel2': sliced_img = bt_dil1_ero2[y:y + h, x:x + w] else: sliced_img = bt[y:y + h, x:x + w] sliced_img = 255 - sliced_img global cnt_imgList cnt_imgList = cnt_imgList + 1 ww = round((w if w > h else h) * MARGIN_FOR_SLICEDIMG) spc = np.zeros((ww, ww)) wy = (ww - h) // 2 wx = (ww - w) // 2 spc[wy:wy + h, wx:wx + w] = sliced_img if h or w < MNIST_IMAGE_FORMAT_SIZE: sliced_img = cv2.resize( spc, (MNIST_IMAGE_FORMAT_SIZE, MNIST_IMAGE_FORMAT_SIZE), interpolation=cv2.INTER_AREA) else: sliced_img = cv2.resize( spc, (MNIST_IMAGE_FORMAT_SIZE, MNIST_IMAGE_FORMAT_SIZE), interpolation=cv2.INTER_LINEAR) imgList.append(sliced_img) #Display(imgList) return imgList def After_processing(imgList_param, after_mode): for i in range(cnt_imgList): if after_mode == 'dil1': img_Sliced = Dilatation(imgList_param[i], DILATION_ITER1) elif after_mode == 'dil2': img_Sliced = Dilatation(imgList_param[i], DILATION_ITER2) elif after_mode == 'dil3': img_Sliced = Dilatation(imgList_param[i], DILATION_ITER3) elif after_mode == 'ero1': img_Sliced = Erosion(imgList_param[i], EROSION_ITER1) elif after_mode == 'ero2': img_Sliced = Erosion(imgList_param[i], EROSION_ITER2) elif after_mode == 'ero3': img_Sliced = Erosion(imgList_param[i], EROSION_ITER3) elif after_mode == 'blur': img_Sliced = Blur(imgList_param[i]) else: print("there's nothing mode about your input") imgList2.append(img_Sliced) return imgList2 def imgData_Nomalization(imgList_param): for i in range(cnt_imgList): imgList_param[i] = imgList_param[i].reshape( MNIST_IMAGE_FORMAT_SIZE * MNIST_IMAGE_FORMAT_SIZE) imgList_param[i] = imgList_param[i].astype("float32") / 255.0 X.append(imgList_param[i]) param = np.array(X) return param def namestr(obj, namespace): return [name for name in namespace if namespace[name] is obj] def Display(argv): count = 0 nrows = 6 ncols = 5 plt.figure(figsize=(8, 8)) for n in range(len(argv)): count += 1 plt.subplot(nrows, ncols, count) # plt.title(img_array_name[n]) # plt.imshow(argv[n], cmap='Greys_r') plt.imshow(argv[n]) plt.tight_layout() plt.show() ################################################################### ####################실제 이미지 처리 영역############################ ################################################################### img = cv2.imread(argv) if img is None: log.debug('Cannot load image: ' + argv) exit() gray = Gray(img) bt = binary_Threshold(gray) blur = Blur(bt) mg = morph_GRADIENT(blur) at = adaptive_Threshold(mg) canny = Canny(mg) ############pre img model####################################### bt_dil1 = Dilatation(bt, DILATION_ITER1) # img model 1 bt_dil1_ero2 = Erosion(bt_dil1, EROSION_ITER2) # img model 2 ################################################################ Slicing_Resizing(canny, pre_img_mode) #Slicing_Resizing(param1, param2) #param1 = 자를 이미지 영역을 정할 이미지(canny엣지) #param2 = 사용할 전처리 이미지 After_processing(imgList, after_img_mode) #After_processing(param1, param2) #param1 = 자르고 리사이징된 이미지가 들어간 리스트 #param2 = 자르고 리사이징된 이미지를 다시 전처리할 모드 #Display(imgList2) return imgData_Nomalization(imgList2)
idxROIBody = -1 for i in range(0, len(rtss.StructureSetROISequence)): roiName = rtss.StructureSetROISequence[i].ROIName log.info(f"Finding contours for {roiName}") myROIs.append(MyRoi(dicomutils.findContours(rtss, rtss.StructureSetROISequence[i].ROINumber), roiName, float(first_beam.PixelSpacing[0]) / 1000.0)) if "body" in roiName.lower(): idxROIBody = i log.info("Found ROI body: idx = %d" % idxROIBody) if idxROIBody == -1 and ("skin" in roiName.lower() or "outline" in roiName.lower()): idxROIBody = i log.info(f"Found ROI body ({roiName}): idx = {idxROIBody}") end = time.time() log.debug("Found contours in %s s" % (end - start)) if idxROIBody == -1: raise Exception("The structure file does not contain any structure with 'body', 'outline' or 'skin' in the name.") ########################################################################## # Mark ROIs or read from cache (cache is a file in a working # directory, separate file for each ROI, # the filename pattern is: "%s_%s.markscache" % (treatment_name, ROIName) ########################################################################## roi_marks = np.zeros((imax, jmax, kmax), dtype=int) roi_marks_check = np.zeros((imax, jmax, kmax), dtype=int) for r in range(0, len(myROIs)): fcache = rass_data.processing("%s_%s.markscache" % (treatment_name, myROIs[r].name)) if myROIs[r].read_marks(fcache, roi_marks) is False:
def main(): if isWindows: win32serviceutil.HandleCommandLine(StatsdAgentService) else: import multiprocessing config = StatsdConfig(allow_no_value=True) config.read('statsd-agent.cfg') parser = argparse.ArgumentParser() parser.add_argument('--host', '-t', type=str, default=config.get_str('host', default='localhost'), help='Hostname or IP of statsd/statsite server.') parser.add_argument('--port', '-p', type=int, default=config.get_int('port', default=8125), help='UDP port number of statsd/statsite server.') parser.add_argument('--prefix', '-x', type=str, default=config.get_str('prefix'), help='Prefix value to add to each measurement.') parser.add_argument( '--field', '-f', action='append', default=[], help="One or more 'key=value' fields to add to each measurement.") parser.add_argument('--network', '--nic', '-n', type=str, default=config.get_str('nic'), help='NIC to measure.') parser.add_argument( '--interval', '-i', type=int, default=config.get_int('interval', default=10), help='Time in seconds between system measurements. Must be > 2.') parser.add_argument('--add-host-field', '-a', action='store_true', help='Auto add host= to fields.') parser.add_argument('--debug', '-g', action='store_true', help="Turn on debugging.") parser.add_argument('--docker', '-d', action='store_true', help="Enable docker") parser.add_argument('--docker-addr', '-D', type=str, default=config.get_str( 'address', 'docker', default='/var/run/docker.sock')) parser.add_argument( '--docker-interval', '-I', type=int, default=config.get_int('interval', 'docker', default=15), help='Time in seconds between docker measurements. Must be > 2.') args = parser.parse_args() docker = config.get_boolean('enabled', 'docker', default=False) or args.docker debug = config.get_boolean('debug', default=False) or args.debug prefix = args.prefix if args.prefix else '' if debug: log.debug("host={}:{}".format(args.host, args.port)) log.debug("prefix={}".format(prefix)) fields = config.get_fields(args.field, args.add_host_field) if debug: log.debug("fields: {}".format(fields)) if args.interval < 3: log.error("Invalid system interval (< 3sec).") return 1 if args.docker_interval < 3: log.error("Invalid docker interval (< 3sec).") return 1 nic = get_nic(args.network) if not nic: log.error("Could not locate 10.x.x.x network interface!") return 1 if docker: multiprocessing.Process(target=run_docker, args=(args.docker_addr, args.docker_interval, args.host, args.port, debug)).start() try: while True: start = time.time() run_once(args.host, args.port, prefix, fields, nic, debug) elapsed = time.time() - start log.debug("statsd: {}ms".format(int(elapsed * 1000))) time.sleep(args.interval - elapsed) except KeyboardInterrupt: pass return 0
def main(): if isWindows: win32serviceutil.HandleCommandLine(StatsdAgentService) else: import multiprocessing config = StatsdConfig(allow_no_value=True) config.read('statsd-agent.cfg') parser = argparse.ArgumentParser() parser.add_argument('--host', '-t', type=str, default=config.get_str('host', default='localhost'), help='Hostname or IP of statsd/statsite server.') parser.add_argument('--port', '-p', type=int, default=config.get_int('port', default=8125), help='UDP port number of statsd/statsite server.') parser.add_argument('--prefix', '-x', type=str, default=config.get_str('prefix'), help='Prefix value to add to each measurement.') parser.add_argument('--field', '-f', action='append', default=[], help="One or more 'key=value' fields to add to each measurement.") parser.add_argument('--network', '--nic', '-n', type=str, default=config.get_str('nic'), help='NIC to measure.') parser.add_argument('--interval', '-i', type=int, default=config.get_int('interval', default=10), help='Time in seconds between system measurements. Must be > 2.') parser.add_argument('--add-host-field', '-a', action='store_true', help='Auto add host= to fields.') parser.add_argument('--debug', '-g', action='store_true', help="Turn on debugging.") parser.add_argument('--docker', '-d', action='store_true', help="Enable docker") parser.add_argument('--docker-addr', '-D', type=str, default=config.get_str('address', 'docker', default='/var/run/docker.sock')) parser.add_argument('--docker-interval', '-I', type=int, default=config.get_int('interval', 'docker', default=15), help='Time in seconds between docker measurements. Must be > 2.') args = parser.parse_args() docker = config.get_boolean('enabled', 'docker', default=False) or args.docker debug = config.get_boolean('debug', default=False) or args.debug prefix = args.prefix if args.prefix else '' if debug: log.debug("host={}:{}".format(args.host, args.port)) log.debug("prefix={}".format(prefix)) fields = config.get_fields(args.field, args.add_host_field) if debug: log.debug("fields: {}".format(fields)) if args.interval < 3: log.error("Invalid system interval (< 3sec).") return 1 if args.docker_interval < 3: log.error("Invalid docker interval (< 3sec).") return 1 nic = get_nic(args.network) if not nic: log.error("Could not locate 10.x.x.x network interface!") return 1 if docker: multiprocessing.Process(target=run_docker, args=(args.docker_addr, args.docker_interval, args.host, args.port, debug)).start() try: while True: start = time.time() run_once(args.host, args.port, prefix, fields, nic, debug) elapsed = time.time() - start log.debug("statsd: {}ms".format(int(elapsed * 1000))) time.sleep(args.interval - elapsed) except KeyboardInterrupt: pass return 0
def call_cluster(self, cluster, filter_function=None, result_filter=None, temp_file_path=None): import tempfile if len(cluster) == 1: log.warn('Cluster {} has single read, not calling'.format( cluster.id)) try: cluster.consensus_seq = None cluster.consensus_builder = None cluster.set_call(None) cluster.candidates = None cluster.candidates_method = str(self) except AttributeError as e: pass finally: return None consensus_seq = None consensus_seq_id = None f = None is_cluster_inst = False # flag for filling descriptive attributes if hasattr(cluster, '__getitem__' ): # assumed to be list of sequences, get consensus try: if temp_file_path: with open(temp_file_path, 'wb') as f: f.write( fasta_from_seq(*zip(*[(x.id, x.seq) for x in cluster]))) consensus_seq = self.consensus_builder.generate_consensus( temp_file_path if temp_file_path else cluster) if not consensus_seq: cluster.consensus = None cluster.candidates_method = str(self) return consensus_seq_id = 'cons' log.info('Generated consensus with:\n{}'.format( str(self.consensus_builder))) log.debug('Output:\n{}'.format(consensus_seq)) try: cluster.consensus = consensus_seq cluster.consensus_method = str(self.consensus_builder) except AttributeError as e: pass except TypeError as e: ## No consensus builder is set raise ValueError( 'Cluster calling: list of cluster sequences provided but no consensus builder instantiated.' ) else: if isinstance(cluster, basestring): # input is path if os.path.exists(cluster): cons_path = cluster else: raise ValueError( 'Cluster calling input invalid. String provided but is not valid path. If trying to cast as Bio.Seq.Seq-like object' ) else: # input is consensus seq consensus_seq = cluster.seq consensus_seq_id = cluster.id ## save blasr target in all cases except path as input if consensus_seq: try: f = open( temp_file_path, 'wb+') if temp_file_path else tempfile.NamedTemporaryFile( delete=False) f.write(str(fasta_from_seq(consensus_seq_id, consensus_seq))) cons_path = f.name f.close() except AttributeError as e: raise ValueError( 'Cluster calling input invalid. Provide iterable of cluster sequences, path to cluster consensus or Bio.Seq.Seq-like object to call' ) ## run blasr mapping of consensus_seq against allele database command = [self.blasr.src, '', self.allele_db, cons_path] try: mapping_output = self.blasr.run(*command) except ValueError as e: log.warn('Blasr returned no mapping') try: cluster.set_call(None) cluster.candidates = None cluster.candidates_method = str(self) except AttributeError as e: pass finally: return None f.close() ## select from mapping the desired result as the call if not filter_function: filter_function = self.filter_function try: mapping_output = sorted(mapping_output, key=filter_function) cluster_call = mapping_output[0] except ValueError as e: log.error('Invalid blasr mapping value') log.debug('\n'.join([str(x) for x in mapping_output])) raise e if not result_filter: result_filter = self.result_filter result = result_filter(cluster_call) try: cluster.set_call([result]) cluster.candidates = list(mapping_output) cluster.candidates_method = str(self) except AttributeError as e: return result
python search_variants.py {tumor.pileup} {normal.pileup} {output_file_prefix} <options> \toutputs: {output_file_prefix}somatic_candidates \t\t{output_file_prefix}hetero_germline_variants""" if len(args) != 3: log.info(usage) else: tumor_pileup_filename = args[0] normal_pileup_filename = args[1] out_prefix = args[2] somatic_candidates_filename = u"{0}somatic_candidates".format(out_prefix) hetero_germline_candidates_filename = u"{0}hetero_germline_candidates".format(out_prefix) log.info(u""" inputs: \ttumor pileup file: {0} \tnormal pileup file: {1} outputs: \tcandidate somatic mutations: {2} \t candidate heterozygous germline variants: {3}""".format(tumor_pileup_filename, normal_pileup_filename, somatic_candidates_filename, hetero_germline_candidates_filename)) log.info(u"\nsettings:\n" + settings.to_str()) somatic_candidates_file = open(somatic_candidates_filename, u"w") hetero_germline_candidates_file = open(hetero_germline_candidates_filename, u"w") search_variants(tumor_pileup_filename, normal_pileup_filename, somatic_candidates_file, hetero_germline_candidates_file) log.debug(u"done.")
def generate_distances(self, reads=None, minimap=None, filter_func=lambda x: True): ## Generates distance matrix of form {read_id } ## reads = [Bio.SeqIO, ...] = list of ORIENTED (ie no rev-compl) reads to be clustered (ie containing genes) ## if None uses self.reads ## minimap = instance of MinimapWrapper object. If none uses pre-set self.minimap ## filter_func = distance included in output if threshold(distance) = True import copy from Bio import SeqIO if self.filter_function: filter_func = self.filter_function if self.matrix: log.info('Using cached distance matrix') result = self.matrix if filter_func: result_filtered = self.filter_matrix( copy.deepcopy(self.matrix), filter_func) result = result_filtered return self.matrix if not minimap: minimap = self.minimap mapping = minimap.ava(reads=reads) result = {} mapped_reads = set( ) # for keeping track of mapped reads to report missing reads for i, line in enumerate(mapping): mapped_reads.add(line.qName) mapped_reads.add(line.tName) try: if 'NM' not in line.NM: raise IndexError NM = int(line.NM.split(':')[2]) except IndexError as e: log.error( 'Error in Minimap output: NM field is likely missing\nmapping line:{}' .format('\t'.join(line))) log.debug(dir(line)) log.debug(zip(line.header, line.attributes)) raise ValueError() distance_value = (NM + line.qStart + (line.qLength - line.qEnd) + NM + line.tStart + (line.tLength - line.tEnd)) / float(line.qLength + line.tLength) if line.qName not in result: result[line.qName] = {} result[line.qName][line.tName] = distance_value # check if any reads missing from mapping missing = set([x.id for x in reads]).difference(mapped_reads) if missing: log.warn('{} / {} reads missing from mapping'.format( len(missing), len(list(reads)))) log.debug('\n'.join(list(missing))) self.matrix = result if filter_func: result_filtered = self.filter_matrix(copy.deepcopy(result), filter_func) result = result_filtered return result
def show_stats(norm): for name, arr in norm.items(): if name not in ('samples', 'channels'): log.debug( f'{name} max {blue}{np.nanmax(arr):.5}{reset} min {yellow}{np.nanmin(arr):.5}{reset}' )
def handle_message(self, func, status, message): debug('handle_message - func: %s, status: %s, message: %s' % (func, status, message)) self.fire(func, status, message)
def get_dbsnp(data, region, force=False): mv = myvariant.MyVariantInfo() q = mv.query( '_exists_:dbsnp AND _exists_:hg19 AND {}:{}-{}'.format(*region), fields='dbsnp', fetch_all=True) snps = list(q) # VCF, dbSNP and myVariant use 1-based indexing dbsnp = collections.defaultdict(dict) for snp in snps: pos, ref, alt, rs = snp['dbsnp']['hg19']['start'] - 1, snp['dbsnp'][ 'ref'], snp['dbsnp']['alt'], snp['dbsnp']['rsid'] if len(ref) > 1 or len(alt) > 1: assert (ref[0] == alt[0]) if len(ref) > 1: op = 'DEL.{}'.format(ref[1:]) elif len(alt) > 1: op = 'INS.{}'.format(alt[1:].lower()) else: op = 'SNP.{}{}'.format(ref, alt) dbsnp[pos][op] = rs mutations = {} for a in sorted(data): for m in data[a]['mutations']: if m['pos'] == 'pseudogene': continue if m['dbsnp'] not in ['', '*']: m['dbsnp'] = [m['dbsnp']] else: m['dbsnp'] = [] pos, op = m['pos'], m['op'] # check reversed SNP if op in dbsnp[pos]: rsid = str(dbsnp[pos][op]) if rsid not in m['dbsnp']: if len(m['dbsnp']) > 0: m['dbsnp'][0] += '(k)' m['dbsnp'].append(rsid) log.debug('dbSNP: Variant {} assigned to {}:{}', rsid, pos, op) else: log.debug( 'dbSNP: Variant {} matches the Karolinska\'s prediction', rsid) elif len(dbsnp[pos]) > 0 and (op[:3] == 'SNP' and op[:4] + op[4:6][::-1] in dbsnp[pos]): op = op[:4] + op[4:6][::-1] rsid = str(dbsnp[pos][op]) if rsid not in m['dbsnp']: if len(m['dbsnp']) > 0: m['dbsnp'][0] += '(k)' m['dbsnp'].append(rsid) log.debug('dbSNP: Variant {} assigned to {}:{}', rsid, pos, op) else: log.debug( 'dbSNP: Variant {} matches the Karolinska\'s prediction', rsid) elif len(dbsnp[pos]) != 0: log.trace('How about {} for {}:{} ({})', dbsnp[pos], pos, op, m['old']) return data
def interlinguish_warning(self, op, say, msg): log.debug( 1, str(self.entity.id) + " interlinguish_warning: " + str(msg) + ": " + str(say[0].lexlink.id[1:]), op)
def interlinguish_warning(self, op, say, msg): log.debug(1,str(self.id)+" interlinguish_warning: "+str(msg)+\ ": "+str(say[0].lexlink.id[1:]),op)
def update_one(self, collection, _id, record): """update_one""" log.debug("mongodb %s(_id=%s) update: %s" % (collection, _id, record)) self.__client[collection].update_one({"_id": ObjectId(_id)}, {"$set": record})
def _tokens(license_list): try: sub_input="sacctmgr -pns show resource withcluster" log.debug(sub_input) string_data=subprocess.check_output(sub_input, shell=True).decode("utf-8").strip() except Exception as details: log.error("Failed to check SLURM tokens. " + str(details)) else: active_token_dict = {} # Format output data into dictionary for lic_string in string_data.split("\n"): log.debug(lic_string) str_arr=lic_string.split("|") active_token_dict[str_arr[0] + "@" + str_arr[1]]=str_arr for key, value in licence_list.items(): name = value["software_name"] + "_" + value["lic_type"] if value["lic_type"] else value["software_name"] server = value["institution"] + "_" + value["faculty"] if value["faculty"] else value["institution"] if key not in active_token_dict.keys(): log.error("'" + key + "' does not have a token in slurm database!") # if possible, create. if value["institution"] and value["total"] and value["software_name"]: log.error("Attempting to add...") try: sub_input="sacctmgr -i add resource Name=" + name.lower() + " Server=" + server.lower() + " Count=" + str(int(value["total"]*2)) + " Type=License percentallowed=50 where cluster=mahuika" log.debug(sub_input) subprocess.check_output(sub_input, shell=True).decode("utf-8") except Exception as details: log.error(details) else: log.info("Token added successfully!") else: log.error("Must have 'instituiton, software_name, cluster, total' set in order to generate SLURM token.") else: if value["total"] != int(active_token_dict[key][3])/2: log.error("SLURM TOKEN BAD, HAS " + str(int(active_token_dict[key][3])/2) + " and should be " + str(value["total"])) try: sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + " set Count=" + str(int(value["total"]*2)) log.debug(sub_input) subprocess.check_output(sub_input, shell=True) except Exception as details: log.error(details) else: log.warning("Token modified successfully!") if active_token_dict[key][7] != "50": log.error("SLURM token not cluster-split") try: sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + "percentallocated=100 where cluster=mahuika" + " set PercentAllowed=50" log.debug(sub_input) subprocess.check_output(sub_input, shell=True) sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + "percentallocated=100 where cluster=maui" + " set PercentAllowed=50" log.debug(sub_input) subprocess.check_output(sub_input, shell=True) except Exception as details: log.error(details) else: log.info("Token modified successfully!")
def delete_one(self, collection, query): """delete_one""" log.debug("mongodb %s(query=%s) delete" % (collection, query)) self.__client[collection].delete_one(query)
def merge_clusters(self, clusters): log.info('Remapping clusters') self.remapped_clusters = {} querys = {} targets = OrderedDict() for c in clusters: if self.to_remap(c): log.debug('Remapping reads from cluster {}'.format(c.id)) for r in c: querys[r.id] = r r.original_cluster = c else: targets[str(c.id)] = c log.info('Remapping {} reads'.format(len(querys))) query_seqs = querys.values() target_seqs = [ SeqRecord(c.id, c.consensus.replace('.', '')) for c in targets.values() ] mapping = self.mapper.run(query_seqs, target_seqs) ## sorted list insertion function def add_mapping(new, arr, less_than=lambda x, y: True if x.total_errors() < y.total_errors() else False): if less_than(new, arr[0]): return [new] + arr if len(arr) == 1 or not less_than(new, arr[-1]): return arr + [new] for i, m in enumerate(arr): if less_than(new, m): return arr[:i] + [new] + arr[i:] sort = {} for m in mapping: m.quality = m.total_errors() try: sort[m.qName] = add_mapping(m, sort[m.qName]) except KeyError: sort[m.qName] = [m] for r_id, maps in sort.iteritems(): try: read = querys[r_id] read.cluster_mappings = maps targets[maps[0].tName].reads[ read.id] = read ## add to mapped cluster read.cluster = targets[maps[0].tName] targets[maps[0].tName].has_remaps = True if maps[0].tName not in self.remapped_clusters: self.remapped_clusters[maps[0].tName] = [read] else: self.remapped_clusters[maps[0].tName].append(read) except KeyError as e: print(maps[0]) print(maps[0].tName in targets) print(sorted(targets.keys())) print(targets[maps[0].tName].reads.keys()) raise e return targets.values()
def main(): watchB=time.time() cp = configure() # Set the alarm in case if we go over time if cp.notimeout: log.debug("Running script with no timeout.") else: timeout = int(cp.get("Settings", "timeout")) signal.alarm(timeout) log.debug("Setting script timeout to %i." % timeout) # Hourly graphs (24-hours) watchS=time.time() hjds = HourlyJobsDataSource(cp) hjds.run() dg = DisplayGraph(cp, "jobs_hourly") jobs_data, hours_data = hjds.query_jobs() dg.data = [i/1000 for i in jobs_data] num_jobs = sum(jobs_data) dg.run("jobs_hourly") hjds.disconnect() log.debug("Time log - Hourly Jobs Query Time: %s", (time.time() - watchS)) watchS=time.time() dg = DisplayGraph(cp, "hours_hourly") dg.data = [float(i)/1000. for i in hours_data] dg.run("hours_hourly") log.debug("Time log - Hourly Jobs Graph Time: %s", (time.time() - watchS)) # Generate the more-complex transfers graph watchS=time.time() dst = DataSourceTransfers(cp) dst.run() log.debug("Time log - Hourly Transfer Query Time: %s", (time.time() - watchS)) watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_hourly") dg.data = [i[1]/1024./1024. for i in dst.get_data()] log.debug("Transfer volumes: %s" % ", ".join([str(float(i)) for i in \ dg.data])) dg.run("transfer_volume_hourly") transfer_data = dst.get_data() dg = DisplayGraph(cp, "transfers_hourly") dg.data = [long(i[0])/1000. for i in dst.get_data()] dg.run("transfers_hourly") num_transfers = sum([i[0] for i in transfer_data]) transfer_volume_mb = sum([i[1] for i in transfer_data]) dst.disconnect() log.debug("Time log - Hourly Transfer Graph Time: %s", (time.time() - watchS)) # Daily (30-day graphs) watchS=time.time() dds = DailyDataSource(cp) dds.run() # Jobs graph jobs_data_daily, hours_data_daily = dds.query_jobs() dds.disconnect() log.debug("Time log - 30-Day Query Time: %s", (time.time() - watchS)) # Job count graph watchS=time.time() dg = DisplayGraph(cp, "jobs_daily") dg.data = [float(i)/1000. for i in jobs_data_daily] num_jobs_hist = sum(jobs_data_daily) dg.run("jobs_daily", mode="daily") log.debug("Time log - 30-Day Count Graph Time: %s", (time.time() - watchS)) # CPU Hours graph watchS=time.time() dg = DisplayGraph(cp, "hours_daily") dg.data = [float(i)/1000000. for i in hours_data_daily] num_hours_hist = sum(hours_data_daily) dg.run("hours_daily", mode="daily") log.debug("Time log - 30-Day CPU Graph Time: %s", (time.time() - watchS)) # Transfers data watchS=time.time() transfer_data_daily, volume_data_daily = dds.query_transfers() log.debug("Time log - 30-Day Transfer Query Time: %s", (time.time() - watchS)) # Transfer count graph watchS=time.time() dg = DisplayGraph(cp, "transfers_daily") dg.data = [float(i)/1000000. for i in transfer_data_daily] num_transfers_daily = sum(transfer_data_daily) dg.run("transfers_daily", mode="daily") log.debug("Time log - 30-Day Transfer Count Graph Time: %s", (time.time() - watchS)) # Transfer volume graph watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_daily") dg.data = [float(i)/1024.**3 for i in volume_data_daily] volume_transfers_hist = sum(volume_data_daily) dg.run("transfer_volume_daily", mode="daily") log.debug("Time log - 30-Day Transfer Volume Graph Time: %s", (time.time() - watchS)) # Monthly graphs (12-months) watchS=time.time() mds = MonthlyDataSource(cp) mds.run() # Jobs graph jobs_data_monthly, hours_data_monthly = mds.query_jobs() mds.disconnect() log.debug("Time log - 12-Month Query Time: %s", (time.time() - watchS)) # Job count graph watchS=time.time() dg = DisplayGraph(cp, "jobs_monthly") dg.data = [float(i)/1000000. for i in jobs_data_monthly] num_jobs_monthly = sum(jobs_data_monthly) dg.run("jobs_monthly", mode="monthly") log.debug("Time log - 12-Month Job Count Graph Time: %s", (time.time() - watchS)) # Hours graph watchS=time.time() dg = DisplayGraph(cp, "hours_monthly") dg.data = [float(i)/1000000. for i in hours_data_monthly] num_hours_monthly = sum(hours_data_monthly) dg.run("hours_monthly", mode="monthly") log.debug("Time log - 12-Month Hour Graph Time: %s", (time.time() - watchS)) # Transfers graph watchS=time.time() transfer_data_monthly, volume_data_monthly = mds.query_transfers() log.debug("Time log - 12-Month Transfer Query Time: %s", (time.time() - watchS)) # Transfer count graph watchS=time.time() dg = DisplayGraph(cp, "transfers_monthly") dg.data = [float(i)/1000000. for i in transfer_data_monthly] num_transfers_monthly = sum(transfer_data_monthly) dg.run("transfers_monthly", mode="monthly") log.debug("Time log - 12-Month Transfer Count Graph Time: %s", (time.time() - watchS)) # Transfer volume graph watchS=time.time() dg = DisplayGraph(cp, "transfer_volume_monthly") dg.data = [float(i)/1024.**3 for i in volume_data_monthly] volume_transfers_monthly = sum(volume_data_monthly) dg.run("transfer_volume_monthly", mode="monthly") log.debug("Time log - 12-Month Transfer Volume Graph Time: %s", (time.time() - watchS)) # Pull OIM data watchS=time.time() ods = OIMDataSource(cp) num_sites = len(ods.query_sites()) ces, ses = ods.query_ce_se() log.debug("Time log - OIM Time: %s", (time.time() - watchS)) # Generate the JSON log.debug("Starting JSON creation") d = Data(cp) d.add_datasource(mds) d.add_datasource(hjds) d.add_datasource(dst) d.add_datasource(dds) d.add_datasource(ods) # Monthly data log.debug("Done creating JSON.") name, tmpname = get_files(cp, "json") fd = open(tmpname, 'w') d.run(fd) commit_files(name, tmpname) log.info("OSG Display done!") log.debug("Time log - Total Time: %s", (time.time() - watchB))
def parse(self, string, graph): """ Parses the given string and/or graph. """ # This is a long function, so let's start with a high-level overview. This is # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item # for each rule, and combine these items with each other and with fragments of # the object(s) being parsed to deduce new items. We can think of these items # as defining a search space in which we need to find a path to the goal item. # The parser implemented here performs a BFS of this search space. grammar = self.grammar # remember when we started start_time = time.clock() log.chatter('parse...') # specify what kind of items we're working with if string and graph: axiom_class = CfgHergItem elif string: axiom_class = CfgItem else: axiom_class = HergItem # remember the size of the example if string: string_size = len(string) else: string_size = -1 if graph: graph_size = len(graph.triples(nodelabels=self.nodelabels)) else: graph_size = -1 # initialize data structures and lookups # we use various tables to provide constant-time lookup of fragments available # for shifting, completion, etc. chart = ddict(set) # TODO: Command line option to switch grammar filter on/off if string: pgrammar = [ grammar[r] for r in grammar.reachable_rules(string, None) ] #grammar.values() if graph: pgrammar = [ grammar[r] for r in grammar.reachable_rules(graph, None) ] #grammar.values() queue = deque() # the items left to be visited pending = set() # a copy of queue with constant-time lookup attempted = set() # a cache of previously-attempted item combinations visited = set() # a cache of already-visited items word_terminal_lookup = ddict(set) nonterminal_lookup = ddict(set) # a mapping from labels to graph edges reverse_lookup = ddict( set) # a mapping from outside symbols open items if string: word_terminal_lookup = ddict( set) # mapping from words to string indices for i in range(len(string)): word_terminal_lookup[string[i]].add(i) if graph: edge_terminal_lookup = ddict( set) # mapping from edge labels to graph edges for edge in graph.triples(nodelabels=self.nodelabels): edge_terminal_lookup[edge[1]].add(edge) for rule in pgrammar: axiom = axiom_class(rule, nodelabels=self.nodelabels) queue.append(axiom) pending.add(axiom) if axiom.outside_is_nonterminal: reverse_lookup[axiom.outside_symbol].add(axiom) # keep track of whether we found any complete derivation success = False # parse while queue: item = queue.popleft() pending.remove(item) visited.add(item) log.debug('handling', item) if item.closed: log.debug(' is closed.') # check if it's a complete derivation if self.successful_parse(string, graph, item, string_size, graph_size): chart['START'].add((item, )) success = True # add to nonterminal lookup nonterminal_lookup[item.rule.symbol].add(item) # wake up any containing rules # Unlike in ordinary state-space search, it's possible that we will have # to re-visit items which couldn't be merged with anything the first time # we saw them, and are waiting for the current item. The reverse_lookup # indexes all items by their outside symbol, so we re-append to the queue # all items looking for something with the current item's symbol. for ritem in reverse_lookup[item.rule.symbol]: if ritem not in pending: queue.append(ritem) pending.add(ritem) else: if item.outside_is_nonterminal: # complete reverse_lookup[item.outside_symbol].add(item) for oitem in nonterminal_lookup[item.outside_symbol]: log.debug(" oitem:", oitem) if (item, oitem) in attempted: # don't repeat combinations we've tried before continue attempted.add((item, oitem)) if not item.can_complete(oitem): log.debug(" fail") continue log.debug(" ok") nitem = item.complete(oitem) chart[nitem].add((item, oitem)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) else: # shift if string and graph: if not item.outside_word_is_nonterminal: new_items = [ item.shift_word(item.outside_word, index) for index in word_terminal_lookup[ item.outside_word] if item.can_shift_word( item.outside_word, index) ] else: assert not item.outside_edge_is_nonterminal new_items = [ item.shift_edge(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift_edge(edge) ] elif string: new_items = [ item.shift(item.outside_word, index) for index in word_terminal_lookup[item.outside_word] if item.can_shift(item.outside_word, index) ] else: assert graph new_items = [ item.shift(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift(edge) ] for nitem in new_items: log.debug(' shift', nitem, nitem.shifted) chart[nitem].add((item, )) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) if success: log.chatter(' success!') etime = time.clock() - start_time log.chatter('done in %.2fs' % etime) # TODO return partial chart return chart
def parse(self, string, graph): """ Parses the given string and/or graph. """ # This is a long function, so let's start with a high-level overview. This is # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item # for each rule, and combine these items with each other and with fragments of # the object(s) being parsed to deduce new items. We can think of these items # as defining a search space in which we need to find a path to the goal item. # The parser implemented here performs a BFS of this search space. grammar = self.grammar # remember when we started start_time = time.clock() log.chatter("parse...") # specify what kind of items we're working with if string and graph: axiom_class = CfgHergItem elif string: axiom_class = CfgItem else: axiom_class = HergItem # remember the size of the example if string: string_size = len(string) else: string_size = -1 if graph: graph_size = len(graph.triples(nodelabels=self.nodelabels)) else: graph_size = -1 # initialize data structures and lookups # we use various tables to provide constant-time lookup of fragments available # for shifting, completion, etc. chart = ddict(set) # TODO: Command line option to switch grammar filter on/off if string: pgrammar = [grammar[r] for r in grammar.reachable_rules(string, None)] # grammar.values() if graph: pgrammar = [grammar[r] for r in grammar.reachable_rules(graph, None)] # grammar.values() queue = deque() # the items left to be visited pending = set() # a copy of queue with constant-time lookup attempted = set() # a cache of previously-attempted item combinations visited = set() # a cache of already-visited items word_terminal_lookup = ddict(set) nonterminal_lookup = ddict(set) # a mapping from labels to graph edges reverse_lookup = ddict(set) # a mapping from outside symbols open items if string: word_terminal_lookup = ddict(set) # mapping from words to string indices for i in range(len(string)): word_terminal_lookup[string[i]].add(i) if graph: edge_terminal_lookup = ddict(set) # mapping from edge labels to graph edges for edge in graph.triples(nodelabels=self.nodelabels): edge_terminal_lookup[edge[1]].add(edge) for rule in pgrammar: axiom = axiom_class(rule, nodelabels=self.nodelabels) queue.append(axiom) pending.add(axiom) if axiom.outside_is_nonterminal: reverse_lookup[axiom.outside_symbol].add(axiom) # keep track of whether we found any complete derivation success = False # parse while queue: item = queue.popleft() pending.remove(item) visited.add(item) log.debug("handling", item) if item.closed: log.debug(" is closed.") # check if it's a complete derivation if self.successful_parse(string, graph, item, string_size, graph_size): chart["START"].add((item,)) success = True # add to nonterminal lookup nonterminal_lookup[item.rule.symbol].add(item) # wake up any containing rules # Unlike in ordinary state-space search, it's possible that we will have # to re-visit items which couldn't be merged with anything the first time # we saw them, and are waiting for the current item. The reverse_lookup # indexes all items by their outside symbol, so we re-append to the queue # all items looking for something with the current item's symbol. for ritem in reverse_lookup[item.rule.symbol]: if ritem not in pending: queue.append(ritem) pending.add(ritem) else: if item.outside_is_nonterminal: # complete reverse_lookup[item.outside_symbol].add(item) for oitem in nonterminal_lookup[item.outside_symbol]: log.debug(" oitem:", oitem) if (item, oitem) in attempted: # don't repeat combinations we've tried before continue attempted.add((item, oitem)) if not item.can_complete(oitem): log.debug(" fail") continue log.debug(" ok") nitem = item.complete(oitem) chart[nitem].add((item, oitem)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) else: # shift if string and graph: if not item.outside_word_is_nonterminal: new_items = [ item.shift_word(item.outside_word, index) for index in word_terminal_lookup[item.outside_word] if item.can_shift_word(item.outside_word, index) ] else: assert not item.outside_edge_is_nonterminal new_items = [ item.shift_edge(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift_edge(edge) ] elif string: new_items = [ item.shift(item.outside_word, index) for index in word_terminal_lookup[item.outside_word] if item.can_shift(item.outside_word, index) ] else: assert graph new_items = [ item.shift(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift(edge) ] for nitem in new_items: log.debug(" shift", nitem, nitem.shifted) chart[nitem].add((item,)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) if success: log.chatter(" success!") etime = time.clock() - start_time log.chatter("done in %.2fs" % etime) # TODO return partial chart return chart
def interlinguish_undefined_operation(self, op, say): # CHEAT!: any way to handle these? log.debug(2, str(self.entity.id) + " interlinguish_undefined_operation:", op) log.debug(2, str(say))
def get_pseudo_mutations(gene, pseudogene, force=False): mutations = {} translation = {} # print gene.exons # print pseudogene.exons # print gene.introns # print pseudogene.introns # exit(0) for ei, (e6, e7) in enumerate( zip(gene.exons, pseudogene.exons) + zip(gene.introns, pseudogene.introns)): y6, y7 = e6.start, e7.start e6 = gene.seq[e6.start:e6.end] e7 = pseudogene.seq[e7.start:e7.end] def yy(x, s=10): return ' '.join(x[i:i + s] for i in xrange(0, len(x), s)) log.debug('NCBI: ALN {}{} (len {} / {})', 'E' if ei < len(gene.exons) else 'I', ei + 1 if ei < len(gene.exons) else ei - len(gene.exons) + 1, len(e6), len(e7)) if max(len(e6), len(e7)) < 1000: al = pairwise2.align.globalxs(e6, e7, -1, 0)[0] s = str(pairwise2.format_alignment(*al)).split('\n') else: log.debug('BLAT') a = blat(e6, e7) s = [a[0].query.seq, '', a[0].hit.seq] for i, f in enumerate(a): if i == 0: continue if a[i].query_range[0] == a[i - 1].query_range[1]: s[0] += e6[a[i - 1].hit_range[1]:a[i].hit_range[0]] s[2] += '-' * (a[i].hit_range[0] - a[i - 1].hit_range[1]) if a[i].hit_range[0] == a[i - 1].hit_range[1]: s[0] += '-' * (a[i].query_range[0] - a[i - 1].query_range[1]) s[2] += e7[a[i - 1].query_range[1]:a[i].query_range[0]] s[0] += a[i].query.seq s[2] += a[i].hit.seq s = map(str.upper, map(str, s)) assert (len(s[0]) == len(s[2])) log.debug('NCBI: ALN {}', yy(s[0])) log.debug( 'NCBI: ALN {}', yy(''.join([ '*' if s[0][y] != s[2][y] else '-' for y in xrange(len(s[0])) ]))) log.debug('NCBI: ALN {}', yy(s[2])) gaps6, gaps7 = 0, 0 i = 0 while i < len(al[0]): c6 = y6 + i - gaps6 c7 = y7 + i - gaps7 if c6 not in gene.translation: # print >>sys.stderr, 'CYP2D7 mutation {}:{}{} ignored'.format(y7 + i, a, b) i += 1 continue # print al[0][i] if al[0][i] == '-': seq = '' while i < len(al[0]) and al[0][i] == '-': seq += al[1][i] i += 1 gaps6 += 1 # (deletion in 6 is actually insertion in mapping) c6 = y6 + i - gaps6 # INS je ispred! translation[c6] = mutations[c6] = dict( pos=c6, op='INS.{}'.format(seq.lower()), dbsnp='*', old='{}:{}{}:{}'.format( pseudogene.name, 'e' if ei < len(gene.exons) else 'i', ei + 1 if ei < len(gene.exons) else ei - len(gene.exons) + 1, c7), old_pos=c7) continue if al[1][i] == '-': seq = '' while i < len(al[0]) and al[1][i] == '-': seq += al[0][i] i += 1 gaps7 += 1 # (deletion in 7 is actually deletion in mapping) translation[c6] = mutations[c6] = dict( pos=c6, op='DEL.{}'.format(seq), dbsnp='*', old='{}:{}{}:{}'.format( pseudogene.name, 'e' if ei < len(gene.exons) else 'i', ei + 1 if ei < len(gene.exons) else ei - len(gene.exons) + 1, c7), old_pos=c7) continue if al[0][i] != al[1][i]: translation[c6] = mutations[c6] = dict( pos=c6, op='SNP.{}{}'.format(al[0][i], al[1][i]), dbsnp='*', old='{}:{}{}:{}'.format( pseudogene.name, 'e' if ei < len(gene.exons) else 'i', ei + 1 if ei < len(gene.exons) else ei - len(gene.exons) + 1, c7), old_pos=c7) elif c6 not in mutations: # do not overwrite insertions translation[c6] = dict(old_pos=c7) i += 1 return mutations, translation
def interlinguish_undefined_operation(self, op, say): #CHEAT!: any way to handle these? log.debug(2,str(self.id)+" interlinguish_undefined_operation:",op) log.debug(2,str(say))
def lmutil(): """Checks total of available licences for all objects passed""" # This is a mess. Tidy. pattern="Users of (?P<feature_name>\w*?): \(Total of (?P<total>\d*?) licenses issued; Total of (?P<in_use_real>\d*?) licenses in use\)" # lmutil_list=[] # for key, value in licence_list.items(): # lmutil_list.append={"path":value["address"]} for key, value in licence_list.items(): if not value["file_address"]: continue if not value["feature"]: log.error(key + " must have feature specified in order to check with LMUTIL") continue # if value["flex_method"] == "lmutil": # return features=[] lmutil_return="" try: shell_string="linx64/lmutil " + "lmstat " + "-f " + value["feature"] + " -c " + value["file_address"] log.debug(shell_string) lmutil_return=subprocess.check_output(shell_string, shell=True).strip() #Removed .decode("utf-8") as threw error. except Exception as details: log.error("Failed to fetch " + key + " " + str(details)) else: for line in (lmutil_return.split("\n")): m = re.match(pattern, line) if m: features.append(m.groupdict()) found=False for feature in features: if feature["feature_name"] == value["feature"]: found=True hour_index = dt.datetime.now().hour - 1 value["in_use_real"] = int(feature["in_use_real"]) if value["total"] != int(feature["total"]): log.warning("LMUTIL shows different total number of licences than recorded. Changing from '" + str(value["total"]) + "' to '" + feature["total"] + "'") value["total"] = int(feature["total"]) # Record to running history value["history"].append(value["in_use_real"]) # Pop extra array entries while len(value["history"]) > value["history_points"]: value["history"].pop(0) # Find modified in use value interesting = max(value["history"])-value["in_use_nesi"] value["in_use_modified"] = round(min( max(interesting + value["buffer_constant"], interesting * (1 + value["buffer_factor"])), value["total"], 0 )) # Update average value["day_ave"][hour_index] = ( round( ((value["in_use_real"] * settings["point_weight"]) + (value["day_ave"][hour_index] * (1 - settings["point_weight"]))), 2, ) if value["day_ave"][hour_index] else value["in_use_real"] ) else: log.info("Untracked Feature " + feature["feature_name"] + ": " + (feature["in_use_real"]) +" of " + (feature["total"]) + "in use.") if not found: log.error("Feature '" + value["feature"] + "' not found on server for '" + key + "'")
def log(msg, *args): logg.debug(msg, *args)
def get_variants_from_matched_lines(tumor_line, normal_line): u"""mathced tumor line and normal line""" hetero_germline_variants = [] somatic_variants = [] if (tumor_line.depth < settings.min_depth) or (normal_line.depth < settings.min_depth): raise LowDepthError if (tumor_line.depth > settings.max_depth) or (normal_line.depth > settings.max_depth): raise HighDepthError if tumor_line.ref == u'N': raise CustomError(u"reference_is_N") if len(REGEX_COUNT_W.findall(tumor_line.bases)) < settings.min_variant_supporting_reads: raise TooFewVariantReadsError tumor_pileup_units = tumor_line.get_bases_with_qualities() normal_pileup_units = normal_line.get_bases_with_qualities() tumor_profiles = pileup_unit.get_profiles(tumor_pileup_units) normal_profiles = pileup_unit.get_profiles(normal_pileup_units) for variant_key in tumor_profiles.keys(): if variant_key == tumor_line.ref: # skip for the reference base continue try: tumor_count = tumor_profiles[variant_key] if tumor_count < settings.min_variant_supporting_reads: raise TooFewVariantReadsError tumor_ref_units = [x for x in tumor_pileup_units if x.key() != variant_key] tumor_obs_units = [x for x in tumor_pileup_units if x.key() == variant_key] normal_ref_units = [x for x in normal_pileup_units if x.key() != variant_key] normal_obs_units = [x for x in normal_pileup_units if x.key() == variant_key] IndelCoverChecker.update(tumor_line.chromosome, tumor_line.position, tumor_profiles, normal_profiles) try: normal_count = normal_profiles.get(variant_key, 0) if normal_count < settings.min_variant_supporting_reads: raise TooFewVariantReadsError v = HeterozygousGermlineVariant.from_pileup_units(tumor_ref_units, tumor_obs_units, normal_ref_units, normal_obs_units) v.set_basic_info(variant_key, tumor_line.chromosome, tumor_line.position, tumor_line.ref) if v.is_snv(): try: triallelic_site_checker.check(tumor_line.ref, tumor_line.chromosome, tumor_line.position, tumor_profiles, normal_profiles) except TriallelicSiteError: v.triallelic_site_check = "triallelic" try: IndelCoverChecker.check(tumor_line.chromosome, tumor_line.position) except IndelCoverError: v.indel_cover_check = "indel-cover" hetero_germline_variants.append(v) except AlleleFreqOutOfRangeError: pass except StrandFreqOutOfRangeError: pass except TooFewVariantReadsError: pass except LowDepthError: pass except LowBaseQualityError as e: log.debug(u"HeteroGermline: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line)) except CustomError, e: log.warning(u"HeteroGermline CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line)) try: v = SomaticVariant.from_pileup_units(tumor_ref_units, tumor_obs_units, normal_ref_units, normal_obs_units) v.set_basic_info(variant_key, tumor_line.chromosome, tumor_line.position, tumor_line.ref) v.set_fisher_score() if v.is_snv(): try: triallelic_site_checker.check(tumor_line.ref, tumor_line.chromosome, tumor_line.position, tumor_profiles, normal_profiles) except TriallelicSiteError: v.triallelic_site_check = "triallelic" try: IndelCoverChecker.check(tumor_line.chromosome, tumor_line.position) except IndelCoverError: v.indel_cover_check = "indel-cover" somatic_variants.append(v) except AlleleFreqOutOfRangeError: pass except StrandFreqOutOfRangeError: pass except TooManyNormalVariantReadsError: pass except TooFewVariantReadsError: pass except LowDepthError: pass except LowBaseQualityError as e: log.debug(u"Somatic: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line)) except CustomError as e: log.warning(u"Somatic CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line)) except TooFewVariantReadsError: pass except CustomError as e: log.warning(u"CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))
def parse_bitext(self, obj1, obj2): """ Parse a single pair of objects (two strings, two graphs, or string/graph). """ rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type assert rhs1type in ["string", "hypergraph" ] and rhs2type in ["string", "hypergraph"] # Remember size of input objects and figure out Item subclass if rhs1type == "string": obj1size = len(obj1) elif rhs1type == "hypergraph": obj1size = len(obj1.triples()) if rhs2type == "string": obj2size = len(obj2) elif rhs2type == "hypergraph": obj2size = len(obj2.triples()) grammar = self.grammar start_time = time.clock() log.chatter('parse...') # initialize data structures and lookups # we use various tables to provide constant-time lookup of fragments available # for shifting, completion, etc. chart = ddict(set) #TODO: command line filter to switch rule filter on/off pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2) ] #grammar.values() queue = deque() # the items left to be visited pending = set() # a copy of queue with constant-time lookup attempted = set() # a cache of previously-attempted item combinations visited = set() # a cache of already-visited items nonterminal_lookup = ddict(set) # a mapping from labels to graph edges reverse_lookup = ddict( set) # a mapping from outside symbols to open items # mapping from words to string indices for each string word_terminal_lookup1 = ddict(set) word_terminal_lookup2 = ddict(set) if rhs1type == "string": for i in range(len(obj1)): word_terminal_lookup1[obj1[i]].add(i) if rhs2type == "string": for i in range(len(obj2)): word_terminal_lookup2[obj2[i]].add(i) # mapping from edge labels to graph edges for each graph edge_terminal_lookup1 = ddict(set) edge_terminal_lookup2 = ddict(set) if rhs1type == "hypergraph": for edge in obj1.triples(nodelabels=self.nodelabels): edge_terminal_lookup1[edge[1]].add(edge) if rhs2type == "hypergraph": for edge in obj2.triples(nodelabels=self.nodelabels): edge_terminal_lookup2[edge[1]].add(edge) for rule in pgrammar: item1class = CfgItem if rhs1type == "string" else HergItem item2class = CfgItem if rhs2type == "string" else HergItem axiom = SynchronousItem(rule, item1class, item2class, nodelabels=self.nodelabels) queue.append(axiom) pending.add(axiom) if axiom.outside_is_nonterminal: reverse_lookup[axiom.outside_symbol].add(axiom) # keep track of whether we found any complete derivation success = False # parse while queue: item = queue.popleft() pending.remove(item) visited.add(item) log.debug('handling', item) if item.closed: log.debug(' is closed.') # check if it's a complete derivation if self.successful_biparse(obj1, obj2, item, obj1size, obj2size): chart['START'].add((item, )) success = True # add to nonterminal lookup nonterminal_lookup[item.rule.symbol].add(item) # wake up any containing rules # Unlike in ordinary state-space search, it's possible that we will have # to re-visit items which couldn't be merged with anything the first time # we saw them, and are waiting for the current item. The reverse_lookup # indexes all items by their outside symbol, so we re-append to the queue # all items looking for something with the current item's symbol. for ritem in reverse_lookup[item.rule.symbol]: if ritem not in pending: queue.append(ritem) pending.add(ritem) else: if item.outside_is_nonterminal: # complete reverse_lookup[item.outside_symbol].add(item) for oitem in nonterminal_lookup[item.outside_symbol]: log.debug(" oitem:", oitem) if (item, oitem) in attempted: # don't repeat combinations we've tried before continue attempted.add((item, oitem)) if not item.can_complete(oitem): log.debug(" fail") continue log.debug(" ok") nitem = item.complete(oitem) chart[nitem].add((item, oitem)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) else: # shift ; this depends on the configuration (string/graph -> string/graph) if not item.outside1_is_nonterminal and not item.item1.closed: if rhs1type == "string": new_items = [ item.shift_word1(item.outside_object1, index) for index in word_terminal_lookup1[ item.outside_object1] if item.can_shift_word1( item.outside_object1, index) ] else: assert rhs1type is "hypergraph" new_items = [ item.shift_edge1(edge) for edge in edge_terminal_lookup1[item.outside_object1] if item.can_shift_edge1(edge) ] else: assert not item.outside2_is_nonterminal # Otherwise shift would not be called if rhs2type == "string": new_items = [ item.shift_word2(item.outside_object2, index) for index in word_terminal_lookup2[ item.outside_object2] if item.can_shift_word2( item.outside_object2, index) ] else: assert rhs2type is "hypergraph" new_items = [ item.shift_edge2(edge) for edge in edge_terminal_lookup2[item.outside_object2] if item.can_shift_edge2(edge) ] for nitem in new_items: log.debug(' shift', nitem, nitem.shifted) chart[nitem].add((item, )) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) if success: log.chatter(' success!') etime = time.clock() - start_time log.chatter('done in %.2fs' % etime) # TODO return partial chart return chart
jobs = get_jobs(ctr_dirs) if not jobs: return if Config.remote_host: # NOTE: Assuming 256 B of TCP window needed for each job (squeue) ssh_connect(Config.remote_host, Config.remote_user, Config.private_key, (2 << 7)*len(jobs)) execute = execute_local if not Config.remote_host else execute_remote #args = Config.slurm_bin_path + '/squeue -a -h -o %i:%T -t all -j ' + ','.join(jobs.keys()) args = Config.slurm_bin_path + '/oarstat -fj ' + '-fj'.join(jobs.keys()) if os.environ.has_key('__SLURM_TEST'): handle = execute(args, env=dict(os.environ)) else: handle = execute(args) if handle.returncode != 0: debug('Got error code %i from oarstat' % handle.returncode, 'slurm.Scan') debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Scan') # Slurm can report StartTime and EndTime in at least these two formats: # 2010-02-15T15:30:29 (MDS) # 02/15-15:25:15 # Python does not support duplicate named groups. # Have to use separate regex if we want to use named groups. #date_MDS = re.compile(r'^(?P<YYYY>\d\d\d\d)-(?P<mm>\d\d)-(?P<dd>\d\d)T(?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$') #date_2 = re.compile(r'^(?P<mm>\d\d)/(?P<dd>\d\d)-(?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$') date_MDS = re.compile(r'^(?P<YYYY>\d\d\d\d)-(?P<mm>\d\d)-(?P<dd>\d\d) (?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$') for line in handle.stdout: try: localid, state = line.strip().split(':', 1) except:
def parse_bitext(self, obj1, obj2): """ Parse a single pair of objects (two strings, two graphs, or string/graph). """ rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type assert rhs1type in ["string", "hypergraph"] and rhs2type in ["string", "hypergraph"] # Remember size of input objects and figure out Item subclass if rhs1type == "string": obj1size = len(obj1) elif rhs1type == "hypergraph": obj1size = len(obj1.triples()) if rhs2type == "string": obj2size = len(obj2) elif rhs2type == "hypergraph": obj2size = len(obj2.triples()) grammar = self.grammar start_time = time.clock() log.chatter("parse...") # initialize data structures and lookups # we use various tables to provide constant-time lookup of fragments available # for shifting, completion, etc. chart = ddict(set) # TODO: command line filter to switch rule filter on/off pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2)] # grammar.values() queue = deque() # the items left to be visited pending = set() # a copy of queue with constant-time lookup attempted = set() # a cache of previously-attempted item combinations visited = set() # a cache of already-visited items nonterminal_lookup = ddict(set) # a mapping from labels to graph edges reverse_lookup = ddict(set) # a mapping from outside symbols to open items # mapping from words to string indices for each string word_terminal_lookup1 = ddict(set) word_terminal_lookup2 = ddict(set) if rhs1type == "string": for i in range(len(obj1)): word_terminal_lookup1[obj1[i]].add(i) if rhs2type == "string": for i in range(len(obj2)): word_terminal_lookup2[obj2[i]].add(i) # mapping from edge labels to graph edges for each graph edge_terminal_lookup1 = ddict(set) edge_terminal_lookup2 = ddict(set) if rhs1type == "hypergraph": for edge in obj1.triples(nodelabels=self.nodelabels): edge_terminal_lookup1[edge[1]].add(edge) if rhs2type == "hypergraph": for edge in obj2.triples(nodelabels=self.nodelabels): edge_terminal_lookup2[edge[1]].add(edge) for rule in pgrammar: item1class = CfgItem if rhs1type == "string" else HergItem item2class = CfgItem if rhs2type == "string" else HergItem axiom = SynchronousItem(rule, item1class, item2class, nodelabels=self.nodelabels) queue.append(axiom) pending.add(axiom) if axiom.outside_is_nonterminal: reverse_lookup[axiom.outside_symbol].add(axiom) # keep track of whether we found any complete derivation success = False # parse while queue: item = queue.popleft() pending.remove(item) visited.add(item) log.debug("handling", item) if item.closed: log.debug(" is closed.") # check if it's a complete derivation if self.successful_biparse(obj1, obj2, item, obj1size, obj2size): chart["START"].add((item,)) success = True # add to nonterminal lookup nonterminal_lookup[item.rule.symbol].add(item) # wake up any containing rules # Unlike in ordinary state-space search, it's possible that we will have # to re-visit items which couldn't be merged with anything the first time # we saw them, and are waiting for the current item. The reverse_lookup # indexes all items by their outside symbol, so we re-append to the queue # all items looking for something with the current item's symbol. for ritem in reverse_lookup[item.rule.symbol]: if ritem not in pending: queue.append(ritem) pending.add(ritem) else: if item.outside_is_nonterminal: # complete reverse_lookup[item.outside_symbol].add(item) for oitem in nonterminal_lookup[item.outside_symbol]: log.debug(" oitem:", oitem) if (item, oitem) in attempted: # don't repeat combinations we've tried before continue attempted.add((item, oitem)) if not item.can_complete(oitem): log.debug(" fail") continue log.debug(" ok") nitem = item.complete(oitem) chart[nitem].add((item, oitem)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) else: # shift ; this depends on the configuration (string/graph -> string/graph) if not item.outside1_is_nonterminal and not item.item1.closed: if rhs1type == "string": new_items = [ item.shift_word1(item.outside_object1, index) for index in word_terminal_lookup1[item.outside_object1] if item.can_shift_word1(item.outside_object1, index) ] else: assert rhs1type is "hypergraph" new_items = [ item.shift_edge1(edge) for edge in edge_terminal_lookup1[item.outside_object1] if item.can_shift_edge1(edge) ] else: assert not item.outside2_is_nonterminal # Otherwise shift would not be called if rhs2type == "string": new_items = [ item.shift_word2(item.outside_object2, index) for index in word_terminal_lookup2[item.outside_object2] if item.can_shift_word2(item.outside_object2, index) ] else: assert rhs2type is "hypergraph" new_items = [ item.shift_edge2(edge) for edge in edge_terminal_lookup2[item.outside_object2] if item.can_shift_edge2(edge) ] for nitem in new_items: log.debug(" shift", nitem, nitem.shifted) chart[nitem].add((item,)) if nitem not in pending and nitem not in visited: queue.append(nitem) pending.add(nitem) if success: log.chatter(" success!") etime = time.clock() - start_time log.chatter("done in %.2fs" % etime) # TODO return partial chart return chart
def Submit(config, jobdesc): """ Submits a job to the SLURM queue specified in arc.conf. This method executes the required RunTimeEnvironment scripts and assembles the bash job script. The job script is written to file and submitted with ``sbatch``. :param str config: path to arc.conf :param jobdesc: job description object :type jobdesc: :py:class:`arc.JobDescription` :return: local job ID if successfully submitted, else ``None`` :rtype: :py:obj:`str` """ configure(config, set_slurm) validate_attributes(jobdesc) if Config.remote_host: ssh_connect(Config.remote_host, Config.remote_user, Config.private_key) # Run RTE stage0 debug('----- starting slurmSubmitter.py -----', 'slurm.Submit') RTE_stage0(jobdesc, 'SLURM', SBATCH_ACCOUNT = 'OtherAttributes.SBATCH_ACCOUNT') set_grid_global_jobid(jobdesc) # Create script file and write job script jobscript = get_job_script(jobdesc) script_file = write_script_file(jobscript) debug('Created file %s' % script_file, 'slurm.Submit') debug('SLURM jobname: %s' % jobdesc.Identification.JobName, 'slurm.Submit') debug('SLURM job script built', 'slurm.Submit') debug('----------------- BEGIN job script -----', 'slurm.Submit') emptylines = 0 for line in jobscript.split('\n'): if not line: emptylines += 1 else: debug(emptylines*'\n' + line.replace("%", "%%"), 'slurm.Submit') emptylines = 0 if emptylines > 1: debug((emptylines-1)*'\n', 'slurm.Submit') debug('----------------- END job script -----', 'slurm.Submit') if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes': return "-1" ####################################### # Submit the job ###################################### execute = execute_local if not Config.remote_host else execute_remote directory = jobdesc.OtherAttributes['joboption;directory'] debug('Session directory: %s' % directory, 'slurm.Submit') SLURM_TRIES = 0 handle = None while SLURM_TRIES < 10: args = '%s/oarsub %s' % (Config.slurm_bin_path, script_file) verbose('Executing \'%s\' on %s' % (args, Config.remote_host if Config.remote_host else 'localhost'), 'slurm.Submit') handle = execute(args) if handle.returncode == 0: break if handle.returncode == 198 or wait_for_queue(handle): debug('Waiting for queue to decrease', 'slurm.Submit') time.sleep(60) SLURM_TRIES += 1 continue break # Other error than full queue if handle.returncode == 0: # TODO: Test what happens when the jobqueue is full or when the slurm # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into # STDERR and STDOUT respectively. Concat them, and let sed sort it out. # From the exit code we know that the job was submitted, so this # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011 localid = get_job_id(handle) if localid: debug('Job submitted successfully!', 'slurm.Submit') debug('Local job id: ' + localid, 'slurm.Submit') debug('----- exiting submitSubmitter.py -----', 'slurm.Submit') return localid debug('job *NOT* submitted successfully!', 'slurm.Submit') debug('got error code from sbatch: %d !' % handle.returncode, 'slurm.Submit') debug('Output is:\n' + ''.join(handle.stdout), 'slurm.Submit') debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Submit') debug('----- exiting slurmSubmitter.py -----', 'slurm.Submit')