def query_missing(self):
     now = time.time()
     log.info("Querying %i missing data entries." % len(self.missing))
     for mtime in self.missing:
         starttime = mtime
         endtime = mtime + datetime.timedelta(0, 3600)
         results = self.query_transfers(starttime, endtime)
         if not results:
             log.warning("No transfer results found for %s." % starttime)
         for result in results:
             res_time, count, volume_mb = result
             res_time = float(res_time)
             starttime = self._timestamp_to_datetime(res_time)
             if now-res_time >= 3600:
                 endtime = self._timestamp_to_datetime(res_time+3600)
             else:
                 endtime = self._timestamp_to_datetime(now)
             if res_time > now:
                 continue
             td = TransferData()
             td.starttime = starttime
             td.endtime = endtime
             td.count = count
             td.volume_mb = volume_mb
             self.data[starttime] = td
             log.debug("Successfully parsed results for %s." % starttime)
             self.save_cache()
    def query_jobs(self):
        params = self.get_params()

        response = gracc_query_jobs(self.es, jobs_summary_index, **params)

        results = response.aggregations.EndTime.buckets

        all_results = [ (x.Records.value or x.doc_count,
                         x.CoreHours.value,
                         x.key / 1000) for x in results ]

        log.info("GRACC returned %i results for daily jobs" % len(all_results))
        log.debug("Job result dump:")
        for count, hrs, epochtime in all_results:
            time_tuple = time.gmtime(epochtime)
            time_str = time.strftime("%Y-%m-%d %H:%M", time_tuple)
            log.debug("Day %s: Jobs %i, Job Hours %.2f" %
                (time_str, count, hrs))
        count_results = [i[0] for i in all_results]
        hour_results = [i[1] for i in all_results]
        num_results = int(self.cp.get("GRACC", "days"))
        count_results = count_results[-num_results-1:-1]
        hour_results = hour_results[-num_results-1:-1]
        self.count_results, self.hour_results = count_results, hour_results
        return count_results, hour_results
Example #3
0
def does_it_happen(prob,tick_length=const.basic_tick):
    """calculate random number and see whether  """
    #return random.random()<prob*tick_length/const.day_in_seconds
    now=prob*tick_length/const.basic_tick
    rand=random.random()
    log.debug(3,"does_it_happen? "+str(rand)+" "+str(now))
    return rand<now
Example #4
0
def Cancel(config, jobid):
    """
    Cancel a job. The TERM signal is sent to allow the process to terminate
    gracefully within 5 seconds, followed by a KILL signal.

    :param str config: path to arc.conf
    :param str jobid: local job ID
    :return: ``True`` if successfully cancelled, else ``False``
    :rtype: :py:obj:`bool`
    """

    debug('----- starting forkCancel.py -----', 'fork.Cancel')

    configure(config)
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)

    info('Killing job with pid %s' % jobid, 'fork.Cancel')
    if not Config.remote_host:
        import signal
        try:
            os.kill(jobid, signal.SIGTERM)
            time.sleep(5)
            os.kill(jobid, signal.SIGKILL)
        except OSError:
            # Job already died or terminated gracefully after SIGTERM
            pass
        except:
            return False
    else:
        args = 'kill -s TERM %i; sleep 5; kill -s KILL %i' % (jobid, jobid)
        handle = execute_remote(args)

    debug('----- exiting forkCancel.py -----', 'fork.Cancel')
    return True
Example #5
0
 def debug(self, pattern, *args):
     """
         Show debug information if DEBUG mode
     """
     if isinstance(self, Tank):
         if self._selected:
             log.debug('%s:%s' % (self.id, pattern), *args)
     else:
         log.debug('%s:%s:%s' % (self.__class__.__name__,
                                        self.id, pattern), *args)
Example #6
0
 def download_software(self):
     r = requests.get(self.software.download_url, stream=True)
     if r.status_code == 200:
         directory = os.path.dirname(self.download_path)
         if not os.path.exists(directory):
             os.makedirs(directory)
         with open(self.download_path, 'wb') as f:
             for chunk in r.iter_content(1024):
                 f.write(chunk)
     debug('Download finished: url [%s]' % self.software.download_url)
Example #7
0
def misc(host, port, prefix, fields, debug=False):
    boot_time = psutil.boot_time()
    uptime = time.time() - boot_time
    client = statsd.StatsClient(host, port, prefix=prefix)
    with client.pipeline() as pipe:
        pipe.gauge('uptime{}'.format(fields), uptime)
        if debug:
            log.debug("uptime={}".format(uptime))

        pipe.gauge('users{}'.format(fields), len(psutil.users()))
        pipe.gauge('processes{}'.format(fields), len(psutil.pids()))
Example #8
0
  def nonterminal(self, oitem):
    """
    Attempts to apply the (unary) Nonterminal rule and consume oitem, returning the
    result if successful.
    """

    if self.target != Item.NONTERMINAL:
      raise TypeError, "%s is not a nonterminal." % str(self)
    if oitem.target != Item.ROOT:
      raise TypeError, "%s is not at the root of its tree decomposition." %str(oitem)
    if oitem.rule.symbol != self.next_key:
      log.debug('symbol mismatch')
      return None

    if len(oitem.rule.rhs1.external_nodes) != len(self.next_key_edge[2]):
        log.debug('hyperedge type mismatch')
        return None

    nsubgraph = self.check_subgraph_overlap(oitem)
    if not nsubgraph:
      log.debug('overlap')
      return None

    nmapping = self.check_mapping_bijection_nonterminal(oitem)
    if not nmapping:
      log.debug('bijection')
      return None

    return self.__class__(self.rule,
        self.rule.tree_to_parent[self.tree_node],
        self.graph,
        nsubgraph,
        nmapping, nodelabels = self.nodelabels)
def search_variants(tumor_pileup_filename, normal_pileup_filename,
                    cand_somatic_variant_file, cand_hetero_germline_variant_file):
    tumor_f = open(tumor_pileup_filename, u'r')
    normal_f = open(normal_pileup_filename, u'r')

    normal_l = pileup.PileupLine(normal_f.readline())
    current_chromosome = normal_l.chromosome

    tumor_l = pileup.PileupLine(tumor_f.readline())
    if current_chromosome != tumor_l.chromosome:
        raise CustomError(u"different_chromosome_at_the_first_line")

    line_count = 0
    while True:

        # check how many lines have been processed
        line_count += 1
        if line_count % settings.debug_number_of_lines == 0:
            log.debug(u"""processing...
            \ttumor: {0}
            \tnormal: {1}""".format(tumor_l, normal_l))
            line_count = 0

        try:
            if tumor_l.chromosome != normal_l.chromosome:
                if normal_l.chromosome == current_chromosome:
                    normal_l = pileup.PileupLine(normal_f.readline())
                    continue
                else:
                    tumor_l = pileup.PileupLine(tumor_f.readline())
                    continue
            if tumor_l.position < normal_l.position:
                tumor_l = pileup.PileupLine(tumor_f.readline())
                continue
            elif tumor_l.position > normal_l.position:
                normal_l = pileup.PileupLine(normal_f.readline())
                continue
        except IOError as e:
            log.debug(u"reach the bottom of the file. {0}".format(e))
            break

        try:
            hetero_germline_results, somatic_results = get_variants_from_matched_lines(tumor_l, normal_l)

            [cand_somatic_variant_file.write(u"{0}\n".format(v)) for v in somatic_results]
            [cand_hetero_germline_variant_file.write(u"{0}\n".format(v)) for v in hetero_germline_results]

        except TooFewVariantReadsError: pass
        except LowDepthError: pass
        except HighDepthError: pass
        except CustomError as e:
            log.debug(u"CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_l, normal_l))

        try:
            current_chromosome = normal_l.chromosome
            normal_l = pileup.PileupLine(normal_f.readline())
            tumor_l = pileup.PileupLine(tumor_f.readline())
        except IOError, e:
            log.debug(u"reach the bottom of the file. {0}".format(e))
            break
 def query_sites(self):
     fd = urllib2.urlopen(self.resource_group_url)
     dom = parse(fd)
     sites = set()
     for site_dom in dom.getElementsByTagName("Site"):
         for name_dom in site_dom.getElementsByTagName("Name"):
             try:
                 sites.add(str(name_dom.firstChild.data))
             except:
                 pass
     log.debug("OIM returned the following sites: %s" % ", ".join(sites))
     log.info("OIM has %i registered sites." % len(sites))
     self.sites_results = sites
     return sites
Example #11
0
def get(addr, path, debug=False):
    client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    client.connect(addr)
    client.send("GET {} HTTP/1.0\r\n\r\n".format(path))
    resp_str = client.recv(65536)
    source = FakeSocket(resp_str)
    resp = HTTPResponse(source)
    resp.begin()
    if resp.status == 200:
        text = resp.read(len(resp_str))
        data = json.loads(text)
        if debug:
            log.debug(data)
        return data

    return {}
    def getcache(self):
	cachedresultslist=[]
	num_time_cach_read=0
	#check if full refresh needed
        try:
		pickle_f_handle = open(self.cache_count_file_name)
		num_time_cach_read = cPickle.load(pickle_f_handle)
		pickle_f_handle.close()
		if(num_time_cach_read >= self.deprecate_cache_after):
			log.debug("Signaling read complete data from db, reads reached: <%s>" %(num_time_cach_read))
			num_time_cach_read=0
		else:
			num_time_cach_read=num_time_cach_read+1
			log.debug("Incrementing number of cached reads to: <%s>" %(num_time_cach_read))
        except Exception, e:
            log.info("Unable to find cache file: <%s>"%(self.cache_count_file_name))
Example #13
0
 def _extract(self):
     t = tarfile.open(self.download_path, 'r:gz')
     directory = os.path.dirname(self.download_path)
     members = t.getmembers()
     for member in members:
         name = str.replace(member.name, './', '')
         if name == '.':
             continue
         extension = ''
         for index, val in enumerate(name.split('.')):
             if index is 0:
                 name = val
             else:
                 extension = '.' + val
         member.name = './%s-release-%s%s' % (name, self.software.model, extension)
     t.extractall(path=directory, members=members)
     debug('Extracted file: %s' % self.download_path)
 def save_cache(self):
     now = datetime.datetime.now()
     old_keys = []
     for key in self.data.keys():
         if (now - key).days >= 7:
             old_keys.append(key)
     for key in old_keys:
         del self.data[key]
     try:
         name, tmpname = get_files(self.cp, "transfer_data")
         fp = open(tmpname, 'w')
         pickle.dump(self.data, fp)
         fp.close()
         commit_files(name, tmpname)
         log.debug("Saved data to cache.")
     except Exception, e:
         log.warning("Unable to write cache; message: %s" % str(e))
Example #15
0
    def add_thing(self,thing):
        """I own this thing"""
        #CHEAT!: this feature not yet supported
##         if not thing.location:
##             thing.location=self.get_knowledge("location",thing.place)
        log.debug(3,str(self)+" "+str(thing)+" before add_thing: "+str(self.things))
        #thought about owing thing
        name = self.thing_name(thing)
        if not name:
            self.pending_things.append(thing.id)
            return
#        desc="I own %s." % name
#        what=thing.as_entity()
#        ent = Entity(description=desc, what=what)
#        self.send(Operation("thought",ent))
        dictlist.add_value(self.things,name,thing)
        log.debug(3,"\tafter: "+str(self.things))
Example #16
0
def Scan(config, ctr_dirs):
    """
    Query the local or remote (SSH) machine for all jobs in /[controldir]/processing.
    If the job has stopped running, the exit code is read and the comments file
    is updated.

    :param str config: path to arc.conf
    :param ctr_dirs: list of paths to control directories 
    :type ctr_dirs: :py:obj:`list` [ :py:obj:`str` ... ]
    """

    configure(config, set_fork)
    if Config.scanscriptlog:
        scanlogfile = arc.common.LogFile(Config.scanscriptlog)
        arc.common.Logger_getRootLogger().addDestination(scanlogfile)
        arc.common.Logger_getRootLogger().setThreshold(Config.log_threshold)

    jobs = get_jobs(ctr_dirs)
    if not jobs: return
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)

    execute = execute_local if not Config.remote_host else execute_remote
    args = 'ps -opid ' + (' '.join(jobs.keys()))
    if os.environ.has_key('__FORK_TEST'):
        handle = execute(args, env=dict(os.environ))
    else:
        handle = execute(args)
    if handle.returncode != 0:
        debug('Got error code %i from ps -opid' % handle.returncode, 'fork.Scan')
        debug('Error output is:\n' + ''.join(handle.stderr), 'fork.Scan')

    running = [line.strip() for line in handle.stdout]
    for localid, job in jobs.items():
        if localid in running:
            continue
        if set_exit_code_from_diag(job):
            job.message = MESSAGES[job.state]
        else:
            job.exitcode = -1
        
        with open(job.lrms_done_file, 'w') as f:
            f.write('%i %s\n' % (job.exitcode, job.message))
        write_comments(job)
Example #17
0
File: vm.py Project: cc1-cloud/cc1
def update_state(remote_ip, vm_name, action, state):
    """
    @cmview_ci
    @param_post{remote_ip,string}
    @param_post{vm_name}
    @param_post{action}
    @param_post{state}
    """
    try:
        node = Node.objects.get(address=remote_ip)
    except:
        raise CMException('node_not_found')

    try:
        vm_id = int(vm_name.split('-')[1])
        user_id = int(vm_name.split('-')[2])
    except:
        log.debug(0, "Unknown vm from hook: %s" % vm_name)
        raise CMException('vm_not_found')

    if action != "stopped":
        log.debug(user_id, "Not updating vm state: action is %s" % str(action))
        return ''

    try:
        VM.objects.update()
        vm = VM.objects.get(id=vm_id)
    except:
        log.error(user_id, 'Cannot find vm in database!')
        raise CMException('vm_not_found')

    if not vm.state in [vm_states['running ctx'], vm_states['running']]:
        log.error(user_id, 'VM is not running!')
        raise CMException('vm_not_running')

    if vm.state == vm_states['restart']:
        raise CMException('vm_restart')

    thread = VMThread(vm, 'delete')
    thread.start()

    return ''
 def query_ce_se(self):
     log.debug("Querying the following MyOSG URL: %s" % \
         self.resource_group_url)
     fd = urllib2.urlopen(self.resource_group_url)
     dom = parse(fd)
     ses = set()
     ces = set()
     for service_dom in dom.getElementsByTagName("Service"):
         service_type = None
         for name_dom in service_dom.getElementsByTagName("Name"):
             try:
                 service_type = str(name_dom.firstChild.data).strip()
             except:
                 pass
         uri = None
         for uri_dom in service_dom.getElementsByTagName("ServiceUri"):
             try:
                 uri = str(uri_dom.firstChild.data).strip()
             except:
                 pass
         if uri and service_type:
             if service_type == 'SRMv2':
                 ses.add(uri)
             elif service_type == 'CE':
                 ces.add(uri)
     log.debug("OIM returned the following CEs: %s." % ", ".join(ces))
     log.debug("OIM returned the following SEs: %s." % ", ".join(ses))
     log.info("OIM returned %i CEs and %i SEs" % (len(ces), len(ses)))
     self.ces_results, self.ses_results = ces, ses
     return len(ces), len(ses)
 def load_cached(self):
     try:
         data = pickle.load(open(self.cp.get("Filenames", "transfer_data") \
             % {'uid': euid}, "r"))
         # Verify we didn't get useless data
         for time, tdata in data.items():
             assert isinstance(time, datetime.datetime)
             assert isinstance(tdata, TransferData)
             assert isinstance(tdata.starttime, datetime.datetime)
             assert isinstance(tdata.endtime, datetime.datetime)
             assert tdata.count != None
             assert tdata.volume_mb != None
             assert tdata.starttime != None
         self.data = data
         log.info("Successfully loaded transfer data from cache; %i" \
             " cache entries." % len(data))
         remove_data = []
         now = globals()['time'].time()
         now_dt = datetime.datetime.now()
         for time, tdata in data.items():
              if not hasattr(tdata, 'createtime') or not tdata.createtime:
                  log.debug("Ignoring cached data from %s as it has no " \
                      "create time info." % time)
                  remove_data.append(time)
                  continue
              if now - tdata.createtime > 3600:
                  log.debug("Ignoring cached data from %s as it is over " \
                      "an hour old." % time)
                  remove_data.append(time)
              age_starttime = now_dt - tdata.starttime
              age_starttime = age_starttime.days*86400 + age_starttime.seconds
              if (now - tdata.createtime > 1800) and (age_starttime <= 12*3600):
                  log.debug("Ignoring cached data from %s as it is over " \
                      "30 minutes old and is for a recent interval." % \
                      time)
                  remove_data.append(time)
         for time in remove_data:
             del self.data[time]
     except Exception, e:
         log.warning("Unable to load cache; it may not exist. Error: %s" % \
            str(e))
Example #20
0
def run_docker(address, interval, host, port, debug=False):
    prev_cpu, prev_system = {}, {}
    prev_tx_bytes, prev_rx_bytes, prev_timer = {}, {}, {}
    client = statsd.StatsClient(host, port)
    MEM_USAGE = jmespath.compile('memory_stats.usage')
    MEM_LIMIT = jmespath.compile('memory_stats.limit')
    TOTAL_USAGE = jmespath.compile('cpu_stats.cpu_usage.total_usage')
    SYSTEM_USAGE = jmespath.compile('cpu_stats.system_cpu_usage')
    NUM_CPUS = jmespath.compile('length(cpu_stats.cpu_usage.percpu_usage)')
    TX_BYTES = jmespath.compile('networks.eth0.tx_bytes')  # TODO: Always eth0??? (likely not...)
    RX_BYTES = jmespath.compile('networks.eth0.rx_bytes')
    try:
        while True:
            with client.pipeline() as pipe:
                start = time.time()
                containers = get(address, '/containers/json?all=1', debug)
                for container in containers:
                    name = container.get('Names')[0].strip('/')
                    status = container.get('Status')
                    id_ = container.get('Id')
                    log.debug("{}: {}".format(name, status))
                    stats = get(address, '/containers/{}/stats?stream=0'.format(id_), debug)  # Very slow call...

                    mem_usage = MEM_USAGE.search(stats) or 0
                    mem_limit = MEM_LIMIT.search(stats) or 1
                    mem_percent = 100.0 * (mem_usage / mem_limit) if mem_limit > 0 else 0

                    if debug:
                        log.debug("{}: Mem: {:,} {:,} {}%".format(name, mem_usage, mem_limit, mem_percent))

                    pipe.gauge('system.memory.virtual.percent,service={}'.format(name), mem_percent)

                    # http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage
                    cpu_percent = 0

                    total_usage = TOTAL_USAGE.search(stats) or 0
                    cpu_delta = total_usage - prev_cpu.get(name, 0)

                    system_usage = SYSTEM_USAGE.search(stats) or 0
                    system_delta = system_usage - prev_system.get(name, 0)

                    num_cpus = NUM_CPUS.search(stats) or 1

                    if system_delta > 0 and cpu_delta > 0:
                        cpu_percent = (cpu_delta / system_delta) * num_cpus * 100.0

                    if debug:
                        log.debug("{}: Cpu: {}, {}: {}%".format(name, cpu_delta, system_delta, cpu_percent))

                    prev_cpu[name], prev_system[name] = total_usage, system_usage

                    pipe.gauge('system.cpu.percent,service={}'.format(name), cpu_percent)

                    tx_bytes = TX_BYTES.search(stats) or 0
                    rx_bytes = RX_BYTES.search(stats) or 0

                    tx = tx_bytes - prev_tx_bytes.setdefault(name, 0)  # B
                    rx = rx_bytes - prev_rx_bytes.setdefault(name, 0)

                    timer = time.time()
                    elapsed = timer - prev_timer.get(name, 0)  # s
                    prev_timer[name] = timer

                    tx_rate = tx / elapsed if tx > 0 and elapsed > 0 else 0  # B/s
                    rx_rate = rx / elapsed if rx > 0 and elapsed > 0 else 0

                    pipe.gauge('system.network.send_rate,service={}'.format(name), tx_rate)
                    pipe.gauge('system.network.recv_rate,service={}'.format(name), rx_rate)

                    if debug:
                        log.debug("{}: Net Tx: {:,} -> {:,} ({}B/s)".format(name, tx_bytes, prev_tx_bytes[name], tx_rate))
                        log.debug("{}: Net Rx: {:,} -> {:,} ({}B/s)".format(name, rx_bytes, prev_rx_bytes[name], rx_rate))

                    prev_tx_bytes[name] = tx_bytes
                    prev_rx_bytes[name] = rx_bytes

                    pipe.gauge('system.disk.root.percent,service={}'.format(name), 0)

            elapsed = time.time() - start
            log.debug("docker: {}ms".format(int(elapsed * 1000)))
            time.sleep(interval - elapsed)

    except Exception as e:
        log.exception(e)
Example #21
0
def run_docker(address, interval, host, port, debug=False):
    prev_cpu, prev_system = {}, {}
    prev_tx_bytes, prev_rx_bytes, prev_timer = {}, {}, {}
    client = statsd.StatsClient(host, port)
    MEM_USAGE = jmespath.compile('memory_stats.usage')
    MEM_LIMIT = jmespath.compile('memory_stats.limit')
    TOTAL_USAGE = jmespath.compile('cpu_stats.cpu_usage.total_usage')
    SYSTEM_USAGE = jmespath.compile('cpu_stats.system_cpu_usage')
    NUM_CPUS = jmespath.compile('length(cpu_stats.cpu_usage.percpu_usage)')
    TX_BYTES = jmespath.compile(
        'networks.eth0.tx_bytes')  # TODO: Always eth0??? (likely not...)
    RX_BYTES = jmespath.compile('networks.eth0.rx_bytes')
    try:
        while True:
            with client.pipeline() as pipe:
                start = time.time()
                containers = get(address, '/containers/json?all=1', debug)
                for container in containers:
                    name = container.get('Names')[0].strip('/')
                    status = container.get('Status')
                    id_ = container.get('Id')
                    log.debug("{}: {}".format(name, status))
                    stats = get(address,
                                '/containers/{}/stats?stream=0'.format(id_),
                                debug)  # Very slow call...

                    mem_usage = MEM_USAGE.search(stats) or 0
                    mem_limit = MEM_LIMIT.search(stats) or 1
                    mem_percent = 100.0 * (mem_usage /
                                           mem_limit) if mem_limit > 0 else 0

                    if debug:
                        log.debug("{}: Mem: {:,} {:,} {}%".format(
                            name, mem_usage, mem_limit, mem_percent))

                    pipe.gauge(
                        'system.memory.virtual.percent,service={}'.format(
                            name), mem_percent)

                    # http://stackoverflow.com/questions/30271942/get-docker-container-cpu-usage-as-percentage
                    cpu_percent = 0

                    total_usage = TOTAL_USAGE.search(stats) or 0
                    cpu_delta = total_usage - prev_cpu.get(name, 0)

                    system_usage = SYSTEM_USAGE.search(stats) or 0
                    system_delta = system_usage - prev_system.get(name, 0)

                    num_cpus = NUM_CPUS.search(stats) or 1

                    if system_delta > 0 and cpu_delta > 0:
                        cpu_percent = (cpu_delta /
                                       system_delta) * num_cpus * 100.0

                    if debug:
                        log.debug("{}: Cpu: {}, {}: {}%".format(
                            name, cpu_delta, system_delta, cpu_percent))

                    prev_cpu[name], prev_system[
                        name] = total_usage, system_usage

                    pipe.gauge('system.cpu.percent,service={}'.format(name),
                               cpu_percent)

                    tx_bytes = TX_BYTES.search(stats) or 0
                    rx_bytes = RX_BYTES.search(stats) or 0

                    tx = tx_bytes - prev_tx_bytes.setdefault(name, 0)  # B
                    rx = rx_bytes - prev_rx_bytes.setdefault(name, 0)

                    timer = time.time()
                    elapsed = timer - prev_timer.get(name, 0)  # s
                    prev_timer[name] = timer

                    tx_rate = tx / elapsed if tx > 0 and elapsed > 0 else 0  # B/s
                    rx_rate = rx / elapsed if rx > 0 and elapsed > 0 else 0

                    pipe.gauge(
                        'system.network.send_rate,service={}'.format(name),
                        tx_rate)
                    pipe.gauge(
                        'system.network.recv_rate,service={}'.format(name),
                        rx_rate)

                    if debug:
                        log.debug("{}: Net Tx: {:,} -> {:,} ({}B/s)".format(
                            name, tx_bytes, prev_tx_bytes[name], tx_rate))
                        log.debug("{}: Net Rx: {:,} -> {:,} ({}B/s)".format(
                            name, rx_bytes, prev_rx_bytes[name], rx_rate))

                    prev_tx_bytes[name] = tx_bytes
                    prev_rx_bytes[name] = rx_bytes

                    pipe.gauge(
                        'system.disk.root.percent,service={}'.format(name), 0)

            elapsed = time.time() - start
            log.debug("docker: {}ms".format(int(elapsed * 1000)))
            time.sleep(interval - elapsed)

    except Exception as e:
        log.exception(e)
Example #22
0
 def update(self, collection, query, record):
     """update"""
     log.debug("mongodb %s(qurey=%s) update: %s" %
               (collection, query, record))
     self.__client[collection].update_many(query, {"$set": record})
Example #23
0
 def log_debug(self, info):
     log.debug(info)
Example #24
0
    def cluster(self,
                reads,
                save_input_path=None,
                output_dir=None,
                cached_output=None):
        import shutil

        def get_seq_obj(output):
            seq_mapping = dict([(x.id, x) for x in reads])
            output_seqs = map(lambda x: [seq_mapping[y] for y in x], output)
            return output_seqs

        try:  ## reads are a path
            self.reads = dict([(x.id, x) for x in SeqIO.parse(reads, 'fasta')])
        except AttributeError as e:  ## reads is a list of SeqRecord-like objects
            self.reads = dict([(x.id, x) for x in reads])
        finally:
            if cached_output and self.distance_calculator.matrix:
                self.input_matrix, num_edges, mapping = self.convert_adjacency_matrix(
                    self.distance_calculator.matrix)
                mapping = self.reverse_mappings(mapping, self.reads)
                return [
                    Cluster(x, cluster_id=i, clustering_tool=self)
                    for i, x in enumerate(
                        sorted(self.parse_dsf_output(cached_output, mapping),
                               key=lambda x: len(x),
                               reverse=True))
                ]
            self.distance_calculator.generate_distances(reads)
        try:
            reads.close()
        except AttributeError as e:
            pass

        self.input_matrix, num_edges, mapping = self.convert_adjacency_matrix(
            self.distance_calculator.matrix)
        mapping = self.reverse_mappings(mapping, self.reads)

        log.debug('Number of edges in input graph:' + str(num_edges))

        # write adjacency to file for dsf input
        def writer(matrix, num_edges):
            yield '{} {} 001\n'.format(len(matrix), num_edges)
            for neighbours in matrix:
                line = ' '.join([
                    ' '.join(map(str, (n + 1, w)))
                    for n, w in sorted(neighbours, key=lambda x: x[0])
                ])
                yield '{}\n'.format(line)

        matrix_output_iterator = writer(self.input_matrix, num_edges)
        in_file = tempfile.NamedTemporaryFile(delete=True)

        try:
            if save_input_path:
                in_file = open(save_input_path, 'wb')
        except IOError as e:
            in_file = tempfile.NamedTemporaryFile(delete=True)
            log.warn(
                'Provided DSF input matrix write path not valid, using temporary file'
            )
        log.info('Saving dsf input file to'.format(in_file.name))
        in_file.writelines(matrix_output_iterator)
        in_file.flush()

        # check provided output_dir is valid
        if output_dir:
            if not os.path.exists(output_dir):
                log.warn(
                    'Provied DSF output directory path not valid, using temporary directory'
                )
                output_dir = None
            else:
                temp_dir = None

        # make temp output dir if no valid output dir provided
        if not output_dir:
            temp_dir = tempfile.mkdtemp()
            output_dir = temp_dir
            saved_umask = os.umask(
                0077)  # Ensure the file is read/write by the creator only

        # run DSF
        try:
            output = self.run(self.src, self.params, in_file.name, output_dir,
                              mapping)
            # run dsf
        except Exception as e:  # This is just so the temp files get deleted in the case some previous unhandled exception gets raised
            raise e
        finally:
            if temp_dir:
                os.umask(saved_umask)
                shutil.rmtree(temp_dir)
            in_file.close()

        ## generate instances of cluster_class.Cluster as result
        output = [
            Cluster(x, cluster_id=i, clustering_tool=self)
            for i, x in enumerate(
                sorted(output, key=lambda x: len(x), reverse=True))
        ]
        return output
Example #25
0
def do_run(args):
    ctgriddata = None

    if hasattr(args,"rass_data"):
        rass_data = args.rass_data
    else:
        rass_data = RASSData(root_folder=args.root_folder)

    ################################################################
    # Wczytuję opcje z folderu "input"
    ################################################################
    options = default_options()
    cfname = rass_data.input("config.json")
    if os.path.isfile(cfname):
        log.info("Reading options from file: %s" % cfname)
        with open(cfname) as options_file:
            options.update(json.load(options_file))

    ################################################################
    # Przesłaniam opcje za pomocą pliku przekazanego za pomocą argumentów linii komend
    ################################################################
    for i in range(len(argv)):
        if "options" == argv[i]:
            fname = "%s" % (argv[i + 1])
            log.info("Reading options from file: %s" % fname)
            with open(fname) as options_file:
                options.update(json.load(options_file))

    dicomutils.DEBUG_LEVEL = options["debug_level"]

    ################################################################
    # Szukam plików DICOM w podkatalogu "input"/dicom
    ################################################################
    rtss, plan, ctlist, doseslist = dicomutils.find_ct_rs_rp_dicom(rass_data.input("dicom"))
    if rtss is None or plan is None:
        raise Exception(f"No RS.* or rtss.* file in {rass_data.input('dicom')}")


    ################################################################
    # Wczytuję pliki DICOM z informacjami o strukturach (ROIach) 
    # oraz plan
    ################################################################
    rtss = dicom.read_file(rtss)
    plan = dicom.read_file(plan)
    treatment_name = '-'.join(plan.PatientID.split('^'))
    log.info('Name: ' + treatment_name)


    ################################################################
    # Wczytuję dane CT używając VTK
    ################################################################
    from ct import CTVolumeDataReader
    reader = CTVolumeDataReader(rass_data.input("dicom"), ctfiles=ctlist)
    ctVolumeData = reader.read()
    ctData = ctVolumeData.getCTDataAsNumpyArray()


    if len(ctlist) > 0:
        ct = dicom.read_file(ctlist[0])
        ctgriddata = list(map(float, (
                                 ct.ImagePositionPatient[0], ct.ImagePositionPatient[1],
                                 ct.PixelSpacing[0], ct.PixelSpacing[1], 
                                 ct.Columns, ct.Rows)))
    else:
        ctgriddata = None

    ################################################################
    # reading doses information for beams from DICOM
    ################################################################
    beams = [dicom.read_file(f) for f in doseslist]

    ##################################################################
    # Wczytuję dawki z poszczególnych wiązek (beams) 
    ##################################################################
    beamDoses = {}
    totalDoses = None
    totalDosesFile = None
    doseScaling = None
    singleBeam = False
    for beam in beams:
        doseScaling = float(beam.DoseGridScaling)
        try:
            bn = int(beam.ReferencedRTPlanSequence[0].ReferencedFractionGroupSequence[0].ReferencedBeamSequence[0].ReferencedBeamNumber)
        except:
            print("Semething wrong went...")
            if totalDoses is None:
                singleBeam = True
                totalDoses = beam.pixel_array.copy()
                totalDosesFile = beam.filename
            continue
        beamDoses[bn] = beam.pixel_array
        if doseScaling is not None and float(beam.DoseGridScaling) != doseScaling:
            log.warning('Strange data: DoseGridScaling is not same all beams!')
        log.info(f"Got doses data for beam number {bn}")

    ##################################################################
    # Sumuję dawki z poszczególnych wiązek (beams) do dawki całkowitej 
    ##################################################################
    if not singleBeam:
        print(beamDoses)
        bns = list(beamDoses.keys())
        totalDoses = beamDoses[bns[0]].copy()
        for i in range(1, len(bns)):
            log.info(f"Adding doses from beam {i}")
            totalDoses += beamDoses[bns[i]]

    totalDoses = np.array(totalDoses, dtype=np.float32)
    log.info("Read doses for %d beams" % len(beamDoses))

    minDose = np.min(totalDoses)
    averageDose = np.average(totalDoses)
    maxDose = np.max(totalDoses)

    if totalDosesFile is None:
        log.info('Total doses calculated as sum of beam doses (min dose=%f, average dose=%f, max dose=%f, doseScaling=%f)' % (
            minDose, averageDose, maxDose, doseScaling))
    else:
        log.info('Got total doses from file %s (min dose=%f, average dose=%f, max dose = %f, doseScaling=%f)' % (
            totalDosesFile, minDose, averageDose, maxDose, doseScaling))


    # To są informacje o siatce planowania wyciete z pierwszej wiązki
    tBeam = beams[0]
    kmax = tBeam.Columns # x?
    jmax = tBeam.Rows # y?
    imax = len(tBeam.GridFrameOffsetVector) # z
    xbase = float(tBeam.ImagePositionPatient[0]) * SCALE
    ybase = float(tBeam.ImagePositionPatient[1]) * SCALE
    zbase = float(tBeam.ImagePositionPatient[2]) * SCALE
    dx = float(tBeam.PixelSpacing[0]) * SCALE
    dy = float(tBeam.PixelSpacing[1]) * SCALE
    zoffsets = list(map(float, tBeam.GridFrameOffsetVector))
    for i in range(len(zoffsets)):
        zoffsets[i] *= SCALE
    dz = zoffsets[1] - zoffsets[0]
    dv = dx * dy * dz

    log.info('Planning grid: %d x %d x %d in [%g:%g]x[%g:%g]x[%g:%g] dx,dy,dz=%g,%g,%g -> dv=%g' % (
        kmax, jmax, imax,
        xbase, xbase + kmax * dx, ybase, ybase + jmax * dy, zbase + zoffsets[0], zbase + zoffsets[-1],
        dx, dy, dz, dv))

    planGridInfo = {'ixmax': kmax, 'iymax': jmax, 'izmax': imax,
                    'xorig': xbase, 'yorig': ybase, 'zorig': zbase,
                    'dx': dx, 'dy': dy, 'dz': dz,
                    'minDose': minDose, 'avgDose': averageDose, 'maxDose': maxDose,
                    'doseScaling': doseScaling
                    }


    ####################################################
    # Analiza ROIów
    ####################################################
    myROIs = []
    idxROIBody = -1
    for i in range(0, len(rtss.StructureSetROISequence)):
        roiName = rtss.StructureSetROISequence[i].ROIName
        log.info(f"Reading contours for {roiName} from DICOM")

        contours = dicomutils.findContours(rtss, rtss.StructureSetROISequence[i].ROINumber)
        if len(contours) > 1:
            r = MyRoi(contours, roiName, float(tBeam.PixelSpacing[0]) / 1000.0)
            myROIs.append(r)

            if ("body" in roiName.lower() or "skin" in roiName.lower() or "outline" in roiName.lower()) and (idxROIBody == -1):
                idxROIBody = i
                log.info("Found ROI body (or skin): idx = %d" % idxROIBody)

    if idxROIBody == -1:
        raise Exception("The structure file does not contain any structure with 'body', 'outline' or 'skin' in the name.")


    ##########################################################################
    # Mark ROIs or read from cache (cache is a file in a working
    # directory, separate file for each ROI,
    # the filename pattern is: "%s_%s.markscache" % (treatment_name, ROIName)
    ##########################################################################
    roi_marks = np.zeros((imax, jmax, kmax), dtype=np.int64)
    for r in range(0, len(myROIs)):
        fcache = rass_data.processing("%s_%s.markscache" % (treatment_name, myROIs[r].name))
        if myROIs[r].read_marks(fcache, roi_marks):
            log.info("Read marking voxels for %s from cache" % myROIs[r].name)
            myROIs[r].countVoxels(roi_marks, 2 ** r)
        else:
            log.info("Marking voxels for %s" % myROIs[r].name)
            log.debug("CTGRID DATA %s" % list(ctgriddata))

            myROIs[r].mark(xbase / SCALE, ybase / SCALE, dx / SCALE, dy / SCALE, kmax, jmax, imax,
                           np.linspace(zbase, zbase + (imax - 1) * dz, imax) / SCALE, roi_marks, 2 ** r, ctgriddata=ctgriddata)
            myROIs[r].save_marks(fcache, roi_marks, 2 ** r)

    for r in range(len(myROIs)):
        log.info("Statistics for %20s: ID=%8d, %7d voxels, vol=%8.1f discrete vol=%8.1f [cm3]" % (
            myROIs[r].name, 2 ** r, myROIs[r].count, myROIs[r].volume / 1000.,
            myROIs[r].count * dv / SCALE / SCALE / SCALE / 1000.0))


    # mam wczytane CT - ctData 
    # mam wczytane Dawki - totalDoses (wspolrzedne siatki planowania)
    # mam informacje o rojach - roi_marks (współrzędne siatki planowania)

    # Teraz trzeba przeskalować CT i pozapisywać dane i będzie z głowy...

    plan_origin = (xbase, ybase, zbase)
    plan_dimensions = (kmax, jmax, imax)
    plan_spacing = (dx, dy, dz)
    ctOnPlanningGrid = ctVolumeData.approximateCTOnPlanGrid( plan_origin, plan_spacing, plan_dimensions )

    ## zapisuję do plików VTI
    npar = ctOnPlanningGrid
    if not skip_vti:
        VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, 
             npar, rass_data.output("approximated_ct"))

        VolumeData.saveVolumeGridToFileAsLong(plan_spacing, plan_dimensions, plan_origin, 
             roi_marks, rass_data.output("roi_marks"))

        for r in range(0, len(myROIs)):
            d = np.array(np.bitwise_and(roi_marks, (2 ** r)) / (2 ** r), dtype=np.float32)
            log.debug(f"ROI: {myROIs[r].name}[{2 ** r}].size() = {np.sum(d)}")
            log.info(f"Saving roi marks for {myROIs[r].name} to {rass_data.output(f'roi_marks_{myROIs[r].name}')}.vti file ...")
            VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, 
                    d, rass_data.output(f"roi_marks_{myROIs[r].name}"))


        VolumeData.saveVolumeGridToFile(plan_spacing, plan_dimensions, plan_origin, 
             totalDoses, rass_data.output("total_doses"))


    ## zapisuję do plików ndarray
    from bdfileutils import save_ndarray, read_ndarray
    ctOnPlanningGrid = np.reshape(ctOnPlanningGrid, (imax, jmax, kmax))
    save_ndarray(rass_data.output("approximated_ct.nparray"),ctOnPlanningGrid)

    roi_marks = np.reshape(roi_marks, (imax, jmax, kmax))
    save_ndarray(rass_data.output("roi_marks.nparray"),roi_marks)


    for r in range(0, len(myROIs)):
        d = np.array(np.bitwise_and(roi_marks, (2 ** r)) / (2 ** r), dtype=np.int32)
        d = np.reshape(d, (imax, jmax, kmax))
        save_ndarray(rass_data.output(f"roi_marks_{myROIs[r].name}.nparray"), d)


    totalDoses = np.reshape(totalDoses, (imax, jmax, kmax))
    save_ndarray(rass_data.output("total_doses.nparray"),totalDoses)

    with open(rass_data.output("roi_mapping.txt"),"w") as f:
        for i in range(len(myROIs)):
            f.write(f"{myROIs[i].name}:{2 ** i}\n")
Example #26
0
    def parse(self, graph):
        """
      Parses the given graph with the provided grammar.
      """

        # This function is very similar to its counterpart in the regular
        # (non-tree-decomposing) parser. Read the comments there to understand how it
        # works.

        start_time = time.clock()
        log.chatter('parse...')

        # ensure that the input graph has its shortest-path table precomputed
        graph.compute_fw_table()

        chart = ddict(set)
        # TODO command line option to switch rule filtering on/off
        pgrammar = [
            self.grammar[r] for r in self.grammar.reachable_rules(graph, None)
        ]
        queue = deque()
        pending = set()
        attempted = set()
        visited = set()
        terminal_lookup = ddict(set)
        passive_item_lookup = ddict(set)
        tree_node_lookup = ddict(set)
        passive_item_rev_lookup = ddict(set)
        tree_node_rev_lookup = ddict(set)

        for edge in graph.triples(nodelabels=self.nodelabels):
            terminal_lookup[edge[1]].add(edge)

        for rule in pgrammar:
            for leaf in rule.tree_leaves:
                axiom = self.item_class(rule,
                                        leaf,
                                        graph,
                                        nodelabels=self.nodelabels)
                queue.append(axiom)
                pending.add(axiom)
                assert leaf not in rule.tree_to_edge

        success = False

        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item, item.subgraph)

            if item.target == Item.NONE:
                log.debug('  none')
                tree_node_lookup[item.self_key].add(item)
                for ritem in tree_node_rev_lookup[item.self_key]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            elif item.target == Item.ROOT:
                log.debug('  root')
                if self.is_goal(item):
                    chart['START'].add((item, ))
                    success = True
                    log.debug("success!")

                passive_item_lookup[item.self_key].add(item)
                for ritem in passive_item_rev_lookup[item.self_key]:
                    if ritem not in pending:
                        log.debug('    retrieving', ritem)
                        queue.append(ritem)
                        pending.add(ritem)

            elif item.target == Item.TERMINAL:
                log.debug('  terminal')
                new_items = [
                    item.terminal(edge)
                    for edge in terminal_lookup[item.next_key]
                ]
                new_items = [i for i in new_items if i]
                for nitem in new_items:
                    chart[nitem].add((item, ))
                    if nitem not in pending and nitem not in visited:
                        log.debug('    new item!', nitem)
                        queue.append(nitem)
                        pending.add(nitem)

            else:
                if item.target == Item.BINARY:
                    log.debug('  binary')
                    rev_lookup = tree_node_rev_lookup
                    lookup = tree_node_lookup
                    action = self.item_class.binary
                elif item.target == Item.NONTERMINAL:
                    log.debug('  nonterminal')
                    rev_lookup = passive_item_rev_lookup
                    lookup = passive_item_lookup
                    action = self.item_class.nonterminal
                else:
                    assert False

                rev_lookup[item.next_key].add(item)
                for oitem in lookup[item.next_key]:
                    if (item, oitem) in attempted:
                        continue
                    attempted.add((item, oitem))
                    log.debug('  try', oitem, oitem.subgraph)
                    nitem = action(item, oitem)
                    if not nitem:
                        continue
                    log.debug('    new item!', nitem)
                    chart[nitem].add((item, oitem))
                    if nitem not in pending and nitem not in visited:
                        queue.append(nitem)
                        pending.add(nitem)

        if success:
            log.chatter('  success!')

        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)
        return chart
Example #27
0
def Submit(config, jobdesc):
    """    
    Submits an ATLAS job to the ScGrid host specified in arc.conf. This method executes the required
    RunTimeEnvironment scripts and assembles the bash job script. The job script is
    written to file and submitted with SCEAPI.
                                                                                     
    :param str config: path to arc.conf
    :param jobdesc: job description object  
    :type jobdesc: :py:class:`arc.JobDescription`
    :return: local job ID if successfully submitted, else ``None``
    :rtype: :py:obj:`str`
    """

    import fcntl

    # Allow only one submit at the same time
    _lock = open("/tmp/sceapi-submit-job.lock", "a")
    fcntl.flock(_lock, fcntl.LOCK_EX)

    configure(config, set_sceapi)
    client = setup_api()

    validate_attributes(jobdesc)

    # Run RTE stage0
    debug("----- starting sceapiSubmitter.py -----", "sceapi.Submit")
    rel = re.compile(r"APPS/HEP/ATLAS-(?P<release>[\d\.]+-[\w_-]+)")
    release = None
    for rte in jobdesc.Resources.RunTimeEnvironment.getSoftwareList():
        match = rel.match(str(rte))
        if match:
            release = match.groupdict()["release"]
            break
    if not release:
        raise ArcError("ATLAS release not specified", "sceapi.Submit")

    # Create job dict
    jobJSDL = assemble_dict(jobdesc, release)
    args = jobJSDL.pop("arguments")
    input_dict = get_input_dict(jobdesc, args)

    debug("SCEAPI jobname: %s" % jobdesc.Identification.JobName, "sceapi.Submit")
    debug("SCEAPI job dict built", "sceapi.Submit")
    debug("----------------- BEGIN job dict -----", "sceapi.Submit")
    for key, val in jobJSDL.items():
        debug("%s : %s" % (key, val), "sceapi.Submit")
    debug("----------------- END job dict -----", "sceapi.Submit")

    #######################################
    #  Submit the job
    ######################################

    directory = jobdesc.OtherAttributes["joboption;directory"]
    debug("session directory: %s" % directory, "sceapi.Submit")
    resp = client.submitJSON(jobJSDL)
    handle = None
    try:
        handle = json.loads(resp, "utf8")
    except:
        raise ArcError("SCEAPI client response:\n%s" % str(resp), "sceapi.Submit")

    failure = ""
    if handle["status_code"] == 0:

        jobid = handle["gidujid"]["ujid"]
        gid = handle["gidujid"]["gid"]

        upload_tries = 0
        ret_code = -1
        while upload_tries < 5:
            resp_text = client.putfiles(gid, input_dict)
            try:
                ret_code = json.loads(resp_text, "utf8")["status_code"]
                assert ret_code == 0
                break
            except AssertionError:
                sleep(2)
                upload_tries += 1
            except:
                raise ArcError("SCEAPI client response:\n%s" % str(resp_text), "sceapi")

        if ret_code == 0:
            if json.loads(client.run(jobid), "utf8")["status_code"] == 0:
                debug("job submitted successfully!", "sceapi.Submit")
                debug("local job id: %s" % jobid, "sceapi.Submit")
                debug("----- exiting sceapiSubmitter.py -----", "sceapi.Submit")
                return jobid
            failure = "Start job query failed."
        else:
            failure = "Failed to upload input files."
    else:
        failure = "Status code %i: %s" % (handle["status_code"], translate(handle["status_reason"]))

    debug("job *NOT* submitted successfully!", "sceapi.Submit")
    if failure:
        debug(failure.encode("utf-8"), "sceapi.Submit")
    debug("----- exiting sceapiSubmitter.py -----", "sceapi.Submit")
Example #28
0
 def log_debug(self, info):
     if self.log_switch:
         log.debug(info)
Example #29
0
def PreProcessing(pre_option):

    argv = pre_option['img_PATH']
    pre_img_mode = pre_option['pre_img_mode']
    after_img_mode = pre_option['after_img_mode']

    ##############################################
    ###############################################
    ###############전역 변수 설정###################
    ##############################################

    ############MINST_SIZE#######################
    MNIST_IMAGE_FORMAT_SIZE = pre_option['MNIST_IMAGE_FORMAT_SIZE']

    ############모폴리지##########################
    MORPH_KERNEL_SIZE = pre_option['MORPH_KERNEL_SIZE']  # 모폴로지 커널 사이즈
    morph_kernel = np.ones((MORPH_KERNEL_SIZE, MORPH_KERNEL_SIZE),
                           np.uint8)  # 모폴리지처리용 커널 선언

    ###########스레숄드##########################
    MIN_THRESH = pre_option['MIN_THRESH']  # 스레숄드 연산에 사용될 최소값
    MAX_THRESH = pre_option['MAX_THRESH']  # 스레숄드 연산에 사용될 최대값

    ########적응형스레숄드#########################
    ADPT_THRESH = pre_option['ADPT_THRESH']  # adaptiveThreshold에 의해 계산된 문턱값과
    # thresholdType에 의해 픽셀에 적용될 최대값
    ADPT_BLOCKSIZE = pre_option['ADPT_BLOCKSIZE']
    WEIGHTED_C = pre_option['WEIGHTED_C']

    #############CANNY##########################
    MIN_CANNY = pre_option['MIN_CANNY']  # MIN_CANNY 이하에 포함된 가장자리에서 제외
    MAX_CANNY = pre_option['MAX_CANNY']  # MAX_CANNY 이상에 포함된 가장자리는 가장자리로 간주
    SOBEL_KERNEL_SIZE = pre_option[
        'SOBEL_KERNEL_SIZE']  # Canny에서의 커널 크기 / Sobel마스크의 Aperture Size를 의미
    # == apertureSize

    ###########BLUR###########################
    GAUSSIAN_KERNEL_SIZE = pre_option[
        'GAUSSIAN_KERNEL_SIZE']  # 가우시안블러의 커널 크기 / 보통 5를 사용

    ##########erosion##########################
    EROSION_ITER1 = pre_option['EROSION_ITER1']  # erosion 반복횟수
    EROSION_ITER2 = pre_option['EROSION_ITER2']
    EROSION_ITER3 = pre_option['EROSION_ITER3']
    EROSION_ITER4 = pre_option['EROSION_ITER4']
    EROSION_ITER5 = pre_option['EROSION_ITER5']
    EROSION_KERNEL_SIZE = pre_option['EROSION_KERNEL_SIZE']  # erosion 커널사이즈
    erosion_kernel = np.ones((EROSION_KERNEL_SIZE, EROSION_KERNEL_SIZE),
                             np.uint8)

    ##########dilation#########################
    DILATION_ITER1 = pre_option['DILATION_ITER1']  # dilation 반복횟수
    DILATION_ITER2 = pre_option['DILATION_ITER2']
    DILATION_ITER3 = pre_option['DILATION_ITER3']
    DILATION_ITER4 = pre_option['DILATION_ITER4']
    DILATION_ITER5 = pre_option['DILATION_ITER5']
    DILATION_KERNEL_SIZE = pre_option['DILATION_KERNEL_SIZE']  # dilation 커널사이즈
    dilation_kernel = np.ones((DILATION_KERNEL_SIZE, DILATION_KERNEL_SIZE),
                              np.uint8)
    ###########################################
    MARGIN_FOR_SLICEDIMG = pre_option['MARGIN_FOR_SLICEDIMG']

    ######################################################
    ###################전처리 함수 영역#####################
    ######################################################

    def Gray(img_param):
        gray = cv2.cvtColor(img_param, cv2.COLOR_BGR2GRAY)
        return gray

    def binary_Threshold(img_param):
        ret, dst = cv2.threshold(img_param, MIN_THRESH, MAX_THRESH,
                                 cv2.THRESH_BINARY)
        return dst

    def Blur(img_param):
        blur = cv2.GaussianBlur(img_param,
                                (GAUSSIAN_KERNEL_SIZE, GAUSSIAN_KERNEL_SIZE),
                                0)
        return blur

    def morph_GRADIENT(img_param):
        morph_G = cv2.morphologyEx(img_param, cv2.MORPH_GRADIENT, morph_kernel)
        return morph_G

    def adaptive_Threshold(img_param):
        adapt_th = cv2.adaptiveThreshold(img_param, ADPT_THRESH,
                                         cv2.ADAPTIVE_THRESH_MEAN_C,
                                         cv2.THRESH_BINARY, ADPT_BLOCKSIZE,
                                         WEIGHTED_C)
        # cv2.ADAPTIVE_THRESH_MEAN_C == 0
        # cv2.ADAPTIVE_THRESH_GAUSSIAN_C == 1
        return adapt_th

    def morph_CLOSE(img_param):
        morph_C = cv2.morphologyEx(img_param, cv2.MORPH_CLOSE, morph_kernel)
        return morph_C

    def Canny(img_param):
        edges = cv2.Canny(img_param,
                          MIN_CANNY,
                          MAX_CANNY,
                          apertureSize=SOBEL_KERNEL_SIZE)
        return edges

    def Erosion(img_param, iter):
        erode = cv2.erode(img_param, erosion_kernel, iterations=iter)
        return erode

    def Dilatation(img_param, iter):
        dil = cv2.dilate(img_param, dilation_kernel, iter)
        return dil

    ###################################################################
    ##################convexhull & slicing IMG & resizing##############
    ###################################################################
    ###################################################################

    def Slicing_Resizing(img_param, pre_mode):  #
        contours, hierarchy = cv2.findContours(img_param, cv2.RETR_LIST,
                                               cv2.CHAIN_APPROX_SIMPLE)
        c = 0
        for cnt in contours:
            # x, y, w, h = cv2.boundingRect(cnt)
            c = c + 1
            hull = cv2.convexHull(cnt)
            convexHull = cv2.drawContours(img_param, [hull],
                                          0, (125, 125, 125),
                                          thickness=-1)

        n_contours, n_hierarchy = cv2.findContours(convexHull,
                                                   cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_SIMPLE)
        n_c = 0
        rects = []
        im_w = img_param.shape[1]
        for cnt in n_contours:
            x, y, w, h = cv2.boundingRect(cnt)
            blocking = cv2.drawContours(img_param, n_contours, -1, (0, 0, 255),
                                        1)
            n_c = n_c + 1
            y2 = round(y / 10) * 10
            index = y2 * im_w + x
            rects.append((index, x, y, w, h))
        rects = sorted(rects, key=lambda x: x[0])

        for i, r in enumerate(rects):
            index, x, y, w, h = r
            if pre_mode == 'bt':
                sliced_img = bt[y:y + h, x:x + w]
            elif pre_mode == 'bt_dil1' or pre_mode == 'imgmodel1':
                sliced_img = bt_dil1[y:y + h, x:x + w]

            elif pre_mode == 'bt_dil1_ero2' or pre_mode == 'imgmodel2':
                sliced_img = bt_dil1_ero2[y:y + h, x:x + w]
            else:
                sliced_img = bt[y:y + h, x:x + w]
            sliced_img = 255 - sliced_img
            global cnt_imgList
            cnt_imgList = cnt_imgList + 1

            ww = round((w if w > h else h) * MARGIN_FOR_SLICEDIMG)
            spc = np.zeros((ww, ww))
            wy = (ww - h) // 2
            wx = (ww - w) // 2

            spc[wy:wy + h, wx:wx + w] = sliced_img
            if h or w < MNIST_IMAGE_FORMAT_SIZE:
                sliced_img = cv2.resize(
                    spc, (MNIST_IMAGE_FORMAT_SIZE, MNIST_IMAGE_FORMAT_SIZE),
                    interpolation=cv2.INTER_AREA)
            else:
                sliced_img = cv2.resize(
                    spc, (MNIST_IMAGE_FORMAT_SIZE, MNIST_IMAGE_FORMAT_SIZE),
                    interpolation=cv2.INTER_LINEAR)
            imgList.append(sliced_img)

        #Display(imgList)
        return imgList

    def After_processing(imgList_param, after_mode):
        for i in range(cnt_imgList):
            if after_mode == 'dil1':
                img_Sliced = Dilatation(imgList_param[i], DILATION_ITER1)
            elif after_mode == 'dil2':
                img_Sliced = Dilatation(imgList_param[i], DILATION_ITER2)
            elif after_mode == 'dil3':
                img_Sliced = Dilatation(imgList_param[i], DILATION_ITER3)
            elif after_mode == 'ero1':
                img_Sliced = Erosion(imgList_param[i], EROSION_ITER1)
            elif after_mode == 'ero2':
                img_Sliced = Erosion(imgList_param[i], EROSION_ITER2)
            elif after_mode == 'ero3':
                img_Sliced = Erosion(imgList_param[i], EROSION_ITER3)
            elif after_mode == 'blur':
                img_Sliced = Blur(imgList_param[i])
            else:
                print("there's nothing mode about your input")
            imgList2.append(img_Sliced)
        return imgList2

    def imgData_Nomalization(imgList_param):
        for i in range(cnt_imgList):
            imgList_param[i] = imgList_param[i].reshape(
                MNIST_IMAGE_FORMAT_SIZE * MNIST_IMAGE_FORMAT_SIZE)
            imgList_param[i] = imgList_param[i].astype("float32") / 255.0
            X.append(imgList_param[i])
        param = np.array(X)
        return param

    def namestr(obj, namespace):
        return [name for name in namespace if namespace[name] is obj]

    def Display(argv):
        count = 0
        nrows = 6
        ncols = 5

        plt.figure(figsize=(8, 8))

        for n in range(len(argv)):
            count += 1
            plt.subplot(nrows, ncols, count)
            # plt.title(img_array_name[n])
            # plt.imshow(argv[n], cmap='Greys_r')
            plt.imshow(argv[n])

        plt.tight_layout()
        plt.show()

    ###################################################################
    ####################실제 이미지 처리 영역############################
    ###################################################################

    img = cv2.imread(argv)
    if img is None:
        log.debug('Cannot load image: ' + argv)
        exit()

    gray = Gray(img)
    bt = binary_Threshold(gray)
    blur = Blur(bt)
    mg = morph_GRADIENT(blur)
    at = adaptive_Threshold(mg)
    canny = Canny(mg)

    ############pre img model#######################################
    bt_dil1 = Dilatation(bt, DILATION_ITER1)  # img model 1
    bt_dil1_ero2 = Erosion(bt_dil1, EROSION_ITER2)  # img model 2
    ################################################################
    Slicing_Resizing(canny, pre_img_mode)
    #Slicing_Resizing(param1, param2)
    #param1 = 자를 이미지 영역을 정할 이미지(canny엣지)
    #param2 = 사용할 전처리 이미지

    After_processing(imgList, after_img_mode)
    #After_processing(param1, param2)
    #param1 = 자르고 리사이징된 이미지가 들어간 리스트
    #param2 = 자르고 리사이징된 이미지를 다시 전처리할 모드

    #Display(imgList2)

    return imgData_Nomalization(imgList2)
Example #30
0
    idxROIBody = -1
    for i in range(0, len(rtss.StructureSetROISequence)):
        roiName = rtss.StructureSetROISequence[i].ROIName
        log.info(f"Finding contours for {roiName}")
        myROIs.append(MyRoi(dicomutils.findContours(rtss, rtss.StructureSetROISequence[i].ROINumber),
                            roiName, float(first_beam.PixelSpacing[0]) / 1000.0))

        if "body" in roiName.lower():
            idxROIBody = i
            log.info("Found ROI body: idx = %d" % idxROIBody)
        if  idxROIBody == -1 and ("skin" in roiName.lower() or "outline" in roiName.lower()):
            idxROIBody = i
            log.info(f"Found ROI body ({roiName}): idx = {idxROIBody}")

    end = time.time()
    log.debug("Found contours in %s s" % (end - start))

    if idxROIBody == -1:
        raise Exception("The structure file does not contain any structure with 'body', 'outline' or 'skin' in the name.")


    ##########################################################################
    # Mark ROIs or read from cache (cache is a file in a working
    # directory, separate file for each ROI,
    # the filename pattern is: "%s_%s.markscache" % (treatment_name, ROIName)
    ##########################################################################
    roi_marks = np.zeros((imax, jmax, kmax), dtype=int)
    roi_marks_check = np.zeros((imax, jmax, kmax), dtype=int)
    for r in range(0, len(myROIs)):
        fcache = rass_data.processing("%s_%s.markscache" % (treatment_name, myROIs[r].name))
        if myROIs[r].read_marks(fcache, roi_marks) is False:
Example #31
0
def main():
    if isWindows:
        win32serviceutil.HandleCommandLine(StatsdAgentService)

    else:
        import multiprocessing

        config = StatsdConfig(allow_no_value=True)
        config.read('statsd-agent.cfg')

        parser = argparse.ArgumentParser()
        parser.add_argument('--host',
                            '-t',
                            type=str,
                            default=config.get_str('host',
                                                   default='localhost'),
                            help='Hostname or IP of statsd/statsite server.')
        parser.add_argument('--port',
                            '-p',
                            type=int,
                            default=config.get_int('port', default=8125),
                            help='UDP port number of statsd/statsite server.')
        parser.add_argument('--prefix',
                            '-x',
                            type=str,
                            default=config.get_str('prefix'),
                            help='Prefix value to add to each measurement.')
        parser.add_argument(
            '--field',
            '-f',
            action='append',
            default=[],
            help="One or more 'key=value' fields to add to each measurement.")
        parser.add_argument('--network',
                            '--nic',
                            '-n',
                            type=str,
                            default=config.get_str('nic'),
                            help='NIC to measure.')
        parser.add_argument(
            '--interval',
            '-i',
            type=int,
            default=config.get_int('interval', default=10),
            help='Time in seconds between system measurements. Must be > 2.')
        parser.add_argument('--add-host-field',
                            '-a',
                            action='store_true',
                            help='Auto add host= to fields.')
        parser.add_argument('--debug',
                            '-g',
                            action='store_true',
                            help="Turn on debugging.")
        parser.add_argument('--docker',
                            '-d',
                            action='store_true',
                            help="Enable docker")
        parser.add_argument('--docker-addr',
                            '-D',
                            type=str,
                            default=config.get_str(
                                'address',
                                'docker',
                                default='/var/run/docker.sock'))
        parser.add_argument(
            '--docker-interval',
            '-I',
            type=int,
            default=config.get_int('interval', 'docker', default=15),
            help='Time in seconds between docker measurements. Must be > 2.')

        args = parser.parse_args()
        docker = config.get_boolean('enabled', 'docker',
                                    default=False) or args.docker
        debug = config.get_boolean('debug', default=False) or args.debug
        prefix = args.prefix if args.prefix else ''

        if debug:
            log.debug("host={}:{}".format(args.host, args.port))
            log.debug("prefix={}".format(prefix))

        fields = config.get_fields(args.field, args.add_host_field)

        if debug:
            log.debug("fields: {}".format(fields))

        if args.interval < 3:
            log.error("Invalid system interval (< 3sec).")
            return 1

        if args.docker_interval < 3:
            log.error("Invalid docker interval (< 3sec).")
            return 1

        nic = get_nic(args.network)
        if not nic:
            log.error("Could not locate 10.x.x.x network interface!")
            return 1

        if docker:
            multiprocessing.Process(target=run_docker,
                                    args=(args.docker_addr,
                                          args.docker_interval, args.host,
                                          args.port, debug)).start()

        try:
            while True:
                start = time.time()
                run_once(args.host, args.port, prefix, fields, nic, debug)
                elapsed = time.time() - start
                log.debug("statsd: {}ms".format(int(elapsed * 1000)))
                time.sleep(args.interval - elapsed)
        except KeyboardInterrupt:
            pass

        return 0
Example #32
0
def main():
    if isWindows:
        win32serviceutil.HandleCommandLine(StatsdAgentService)

    else:
        import multiprocessing

        config = StatsdConfig(allow_no_value=True)
        config.read('statsd-agent.cfg')

        parser = argparse.ArgumentParser()
        parser.add_argument('--host', '-t', type=str, default=config.get_str('host', default='localhost'),
                            help='Hostname or IP of statsd/statsite server.')
        parser.add_argument('--port', '-p', type=int, default=config.get_int('port', default=8125),
                            help='UDP port number of statsd/statsite server.')
        parser.add_argument('--prefix', '-x', type=str, default=config.get_str('prefix'),
                            help='Prefix value to add to each measurement.')
        parser.add_argument('--field', '-f', action='append', default=[],
                            help="One or more 'key=value' fields to add to each measurement.")
        parser.add_argument('--network', '--nic', '-n', type=str,
                            default=config.get_str('nic'), help='NIC to measure.')
        parser.add_argument('--interval', '-i', type=int, default=config.get_int('interval', default=10),
                            help='Time in seconds between system measurements. Must be > 2.')
        parser.add_argument('--add-host-field', '-a', action='store_true', help='Auto add host= to fields.')
        parser.add_argument('--debug', '-g', action='store_true', help="Turn on debugging.")
        parser.add_argument('--docker', '-d', action='store_true', help="Enable docker")
        parser.add_argument('--docker-addr', '-D', type=str, default=config.get_str('address', 'docker',
                                                                                    default='/var/run/docker.sock'))
        parser.add_argument('--docker-interval', '-I', type=int, default=config.get_int('interval', 'docker', default=15),
                            help='Time in seconds between docker measurements. Must be > 2.')

        args = parser.parse_args()
        docker = config.get_boolean('enabled', 'docker', default=False) or args.docker
        debug = config.get_boolean('debug', default=False) or args.debug
        prefix = args.prefix if args.prefix else ''

        if debug:
            log.debug("host={}:{}".format(args.host, args.port))
            log.debug("prefix={}".format(prefix))

        fields = config.get_fields(args.field, args.add_host_field)

        if debug:
            log.debug("fields: {}".format(fields))

        if args.interval < 3:
            log.error("Invalid system interval (< 3sec).")
            return 1

        if args.docker_interval < 3:
            log.error("Invalid docker interval (< 3sec).")
            return 1

        nic = get_nic(args.network)
        if not nic:
            log.error("Could not locate 10.x.x.x network interface!")
            return 1

        if docker:
            multiprocessing.Process(target=run_docker,
                                    args=(args.docker_addr, args.docker_interval, args.host, args.port, debug)).start()

        try:
            while True:
                start = time.time()
                run_once(args.host, args.port, prefix, fields, nic, debug)
                elapsed = time.time() - start
                log.debug("statsd: {}ms".format(int(elapsed * 1000)))
                time.sleep(args.interval - elapsed)
        except KeyboardInterrupt:
            pass

        return 0
Example #33
0
    def call_cluster(self,
                     cluster,
                     filter_function=None,
                     result_filter=None,
                     temp_file_path=None):
        import tempfile

        if len(cluster) == 1:
            log.warn('Cluster {} has single read, not calling'.format(
                cluster.id))
            try:
                cluster.consensus_seq = None
                cluster.consensus_builder = None
                cluster.set_call(None)
                cluster.candidates = None
                cluster.candidates_method = str(self)
            except AttributeError as e:
                pass
            finally:
                return None

        consensus_seq = None
        consensus_seq_id = None
        f = None
        is_cluster_inst = False  # flag for filling descriptive attributes
        if hasattr(cluster, '__getitem__'
                   ):  # assumed to be list of sequences, get consensus
            try:
                if temp_file_path:
                    with open(temp_file_path, 'wb') as f:
                        f.write(
                            fasta_from_seq(*zip(*[(x.id, x.seq)
                                                  for x in cluster])))
                consensus_seq = self.consensus_builder.generate_consensus(
                    temp_file_path if temp_file_path else cluster)
                if not consensus_seq:
                    cluster.consensus = None
                    cluster.candidates_method = str(self)
                    return
                consensus_seq_id = 'cons'
                log.info('Generated consensus with:\n{}'.format(
                    str(self.consensus_builder)))
                log.debug('Output:\n{}'.format(consensus_seq))

                try:
                    cluster.consensus = consensus_seq
                    cluster.consensus_method = str(self.consensus_builder)
                except AttributeError as e:
                    pass
            except TypeError as e:  ## No consensus builder is set
                raise ValueError(
                    'Cluster calling: list of cluster sequences provided but no consensus builder instantiated.'
                )
        else:
            if isinstance(cluster, basestring):  # input is path
                if os.path.exists(cluster):
                    cons_path = cluster
                else:
                    raise ValueError(
                        'Cluster calling input invalid. String provided but is not valid path. If trying to cast as Bio.Seq.Seq-like object'
                    )
            else:  # input is consensus seq
                consensus_seq = cluster.seq
                consensus_seq_id = cluster.id

        ## save blasr target in all cases except path as input
        if consensus_seq:
            try:
                f = open(
                    temp_file_path,
                    'wb+') if temp_file_path else tempfile.NamedTemporaryFile(
                        delete=False)
                f.write(str(fasta_from_seq(consensus_seq_id, consensus_seq)))
                cons_path = f.name
                f.close()
            except AttributeError as e:
                raise ValueError(
                    'Cluster calling input invalid. Provide iterable of cluster sequences, path to cluster consensus or Bio.Seq.Seq-like object to call'
                )

        ## run blasr mapping of consensus_seq against allele database
        command = [self.blasr.src, '', self.allele_db, cons_path]

        try:
            mapping_output = self.blasr.run(*command)
        except ValueError as e:
            log.warn('Blasr returned no mapping')
            try:
                cluster.set_call(None)
                cluster.candidates = None
                cluster.candidates_method = str(self)
            except AttributeError as e:
                pass
            finally:
                return None

        f.close()

        ## select from mapping the desired result as the call
        if not filter_function:
            filter_function = self.filter_function

        try:
            mapping_output = sorted(mapping_output, key=filter_function)
            cluster_call = mapping_output[0]
        except ValueError as e:
            log.error('Invalid blasr mapping value')
            log.debug('\n'.join([str(x) for x in mapping_output]))
            raise e

        if not result_filter:
            result_filter = self.result_filter
        result = result_filter(cluster_call)

        try:
            cluster.set_call([result])
            cluster.candidates = list(mapping_output)
            cluster.candidates_method = str(self)
        except AttributeError as e:
            return result
Example #34
0
    python search_variants.py {tumor.pileup} {normal.pileup} {output_file_prefix} <options>
    \toutputs: {output_file_prefix}somatic_candidates
    \t\t{output_file_prefix}hetero_germline_variants"""
    if len(args) != 3:
        log.info(usage)
    else:
        tumor_pileup_filename = args[0]
        normal_pileup_filename = args[1]
        out_prefix = args[2]
        somatic_candidates_filename = u"{0}somatic_candidates".format(out_prefix)
        hetero_germline_candidates_filename = u"{0}hetero_germline_candidates".format(out_prefix)

        log.info(u"""
        inputs:
        \ttumor pileup file: {0}
        \tnormal pileup file: {1}
        outputs:
        \tcandidate somatic mutations: {2}
        \t candidate heterozygous germline variants: {3}""".format(tumor_pileup_filename, normal_pileup_filename,
                                                                   somatic_candidates_filename,
                                                                   hetero_germline_candidates_filename))

        log.info(u"\nsettings:\n" + settings.to_str())

        somatic_candidates_file = open(somatic_candidates_filename, u"w")
        hetero_germline_candidates_file = open(hetero_germline_candidates_filename, u"w")
        search_variants(tumor_pileup_filename, normal_pileup_filename,
                        somatic_candidates_file, hetero_germline_candidates_file)

        log.debug(u"done.")
Example #35
0
    def generate_distances(self,
                           reads=None,
                           minimap=None,
                           filter_func=lambda x: True):
        ## Generates distance matrix of form {read_id }
        ## reads = [Bio.SeqIO, ...] = list of ORIENTED (ie no rev-compl) reads to be clustered (ie containing genes)
        ## if None uses self.reads
        ## minimap = instance of MinimapWrapper object. If none uses pre-set self.minimap
        ## filter_func = distance included in output if threshold(distance) = True
        import copy
        from Bio import SeqIO

        if self.filter_function:
            filter_func = self.filter_function

        if self.matrix:
            log.info('Using cached distance matrix')
            result = self.matrix
            if filter_func:
                result_filtered = self.filter_matrix(
                    copy.deepcopy(self.matrix), filter_func)
                result = result_filtered
            return self.matrix

        if not minimap:
            minimap = self.minimap

        mapping = minimap.ava(reads=reads)

        result = {}

        mapped_reads = set(
        )  # for keeping track of mapped reads to report missing reads
        for i, line in enumerate(mapping):

            mapped_reads.add(line.qName)
            mapped_reads.add(line.tName)

            try:
                if 'NM' not in line.NM:
                    raise IndexError
                NM = int(line.NM.split(':')[2])
            except IndexError as e:
                log.error(
                    'Error in Minimap output: NM field is likely missing\nmapping line:{}'
                    .format('\t'.join(line)))
                log.debug(dir(line))
                log.debug(zip(line.header, line.attributes))
                raise ValueError()

            distance_value = (NM + line.qStart +
                              (line.qLength - line.qEnd) + NM + line.tStart +
                              (line.tLength -
                               line.tEnd)) / float(line.qLength + line.tLength)

            if line.qName not in result:
                result[line.qName] = {}

            result[line.qName][line.tName] = distance_value

        # check if any reads missing from mapping
        missing = set([x.id for x in reads]).difference(mapped_reads)
        if missing:
            log.warn('{} / {} reads missing from mapping'.format(
                len(missing), len(list(reads))))
            log.debug('\n'.join(list(missing)))

        self.matrix = result

        if filter_func:
            result_filtered = self.filter_matrix(copy.deepcopy(result),
                                                 filter_func)

            result = result_filtered

        return result
Example #36
0
def show_stats(norm):
    for name, arr in norm.items():
        if name not in ('samples', 'channels'):
            log.debug(
                f'{name} max {blue}{np.nanmax(arr):.5}{reset} min {yellow}{np.nanmin(arr):.5}{reset}'
            )
Example #37
0
 def handle_message(self, func, status, message):
     debug('handle_message - func: %s, status: %s, message: %s' % (func, status, message))
     self.fire(func, status, message)
Example #38
0
def get_dbsnp(data, region, force=False):
    mv = myvariant.MyVariantInfo()
    q = mv.query(
        '_exists_:dbsnp AND _exists_:hg19 AND {}:{}-{}'.format(*region),
        fields='dbsnp',
        fetch_all=True)
    snps = list(q)

    # VCF, dbSNP and myVariant use 1-based indexing
    dbsnp = collections.defaultdict(dict)
    for snp in snps:
        pos, ref, alt, rs = snp['dbsnp']['hg19']['start'] - 1, snp['dbsnp'][
            'ref'], snp['dbsnp']['alt'], snp['dbsnp']['rsid']
        if len(ref) > 1 or len(alt) > 1:
            assert (ref[0] == alt[0])
        if len(ref) > 1:
            op = 'DEL.{}'.format(ref[1:])
        elif len(alt) > 1:
            op = 'INS.{}'.format(alt[1:].lower())
        else:
            op = 'SNP.{}{}'.format(ref, alt)
        dbsnp[pos][op] = rs

    mutations = {}
    for a in sorted(data):
        for m in data[a]['mutations']:
            if m['pos'] == 'pseudogene': continue
            if m['dbsnp'] not in ['', '*']:
                m['dbsnp'] = [m['dbsnp']]
            else:
                m['dbsnp'] = []
            pos, op = m['pos'], m['op']

            # check reversed SNP
            if op in dbsnp[pos]:
                rsid = str(dbsnp[pos][op])
                if rsid not in m['dbsnp']:
                    if len(m['dbsnp']) > 0: m['dbsnp'][0] += '(k)'
                    m['dbsnp'].append(rsid)
                    log.debug('dbSNP: Variant {} assigned to {}:{}', rsid, pos,
                              op)
                else:
                    log.debug(
                        'dbSNP: Variant {} matches the Karolinska\'s prediction',
                        rsid)
            elif len(dbsnp[pos]) > 0 and (op[:3] == 'SNP' and op[:4] +
                                          op[4:6][::-1] in dbsnp[pos]):
                op = op[:4] + op[4:6][::-1]
                rsid = str(dbsnp[pos][op])
                if rsid not in m['dbsnp']:
                    if len(m['dbsnp']) > 0: m['dbsnp'][0] += '(k)'
                    m['dbsnp'].append(rsid)
                    log.debug('dbSNP: Variant {} assigned to {}:{}', rsid, pos,
                              op)
                else:
                    log.debug(
                        'dbSNP: Variant {} matches the Karolinska\'s prediction',
                        rsid)
            elif len(dbsnp[pos]) != 0:
                log.trace('How about {} for {}:{} ({})', dbsnp[pos], pos, op,
                          m['old'])
    return data
Example #39
0
 def interlinguish_warning(self, op, say, msg):
     log.debug(
         1,
         str(self.entity.id) + " interlinguish_warning: " + str(msg) +
         ": " + str(say[0].lexlink.id[1:]), op)
Example #40
0
 def interlinguish_warning(self, op, say, msg):
     log.debug(1,str(self.id)+" interlinguish_warning: "+str(msg)+\
               ": "+str(say[0].lexlink.id[1:]),op)
Example #41
0
 def update_one(self, collection, _id, record):
     """update_one"""
     log.debug("mongodb %s(_id=%s) update: %s" % (collection, _id, record))
     self.__client[collection].update_one({"_id": ObjectId(_id)},
                                          {"$set": record})
    def _tokens(license_list):
        try:
            sub_input="sacctmgr -pns show resource withcluster"
            log.debug(sub_input)
            string_data=subprocess.check_output(sub_input, shell=True).decode("utf-8").strip()
        except Exception as details:
            log.error("Failed to check SLURM tokens. " + str(details))
        else:
            active_token_dict = {}
            # Format output data into dictionary 
            for lic_string in string_data.split("\n"):

                log.debug(lic_string)
                str_arr=lic_string.split("|")
                active_token_dict[str_arr[0] + "@" + str_arr[1]]=str_arr


            for key, value in licence_list.items():

                name = value["software_name"] + "_" + value["lic_type"] if value["lic_type"] else value["software_name"]
                server = value["institution"] + "_" + value["faculty"] if value["faculty"] else value["institution"]

                if key not in active_token_dict.keys():
                    log.error("'" + key + "' does not have a token in slurm database!")

                    # if possible, create.
                    if value["institution"] and value["total"] and value["software_name"]:           
                        log.error("Attempting to add...")

                        try:
                            sub_input="sacctmgr -i add resource Name=" + name.lower() + " Server=" + server.lower() + " Count=" + str(int(value["total"]*2)) + " Type=License percentallowed=50 where cluster=mahuika"
                            log.debug(sub_input)
                            subprocess.check_output(sub_input, shell=True).decode("utf-8")
                            
                        except Exception as details:
                            log.error(details)
                        else:
                            log.info("Token added successfully!")
                    
                    else:
                        log.error("Must have 'instituiton, software_name, cluster, total' set in order to generate SLURM token.")

                else:
                    if value["total"] != int(active_token_dict[key][3])/2:
                        log.error("SLURM TOKEN BAD, HAS " + str(int(active_token_dict[key][3])/2)  + " and should be " + str(value["total"]))
                        try:
                            sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + " set Count=" + str(int(value["total"]*2))
                            log.debug(sub_input)
                            subprocess.check_output(sub_input, shell=True)        
                        except Exception as details:
                            log.error(details)
                        else:
                            log.warning("Token modified successfully!")
                    if active_token_dict[key][7] != "50":
                        log.error("SLURM token not cluster-split")

                        try:
                            sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + "percentallocated=100 where cluster=mahuika" +  " set PercentAllowed=50"
                            log.debug(sub_input)
                            subprocess.check_output(sub_input, shell=True)

                            sub_input="sacctmgr -i modify resource Name=" + name.lower() + " Server=" + server.lower() + "percentallocated=100 where cluster=maui" +  " set PercentAllowed=50"
                            log.debug(sub_input)
                            subprocess.check_output(sub_input, shell=True)
                        except Exception as details:
                            log.error(details)
                        else:
                            log.info("Token modified successfully!")
Example #43
0
 def delete_one(self, collection, query):
     """delete_one"""
     log.debug("mongodb %s(query=%s) delete" % (collection, query))
     self.__client[collection].delete_one(query)
    def merge_clusters(self, clusters):
        log.info('Remapping clusters')

        self.remapped_clusters = {}

        querys = {}
        targets = OrderedDict()
        for c in clusters:
            if self.to_remap(c):
                log.debug('Remapping reads from cluster {}'.format(c.id))
                for r in c:
                    querys[r.id] = r
                    r.original_cluster = c
            else:
                targets[str(c.id)] = c

        log.info('Remapping {} reads'.format(len(querys)))

        query_seqs = querys.values()
        target_seqs = [
            SeqRecord(c.id, c.consensus.replace('.', ''))
            for c in targets.values()
        ]

        mapping = self.mapper.run(query_seqs, target_seqs)

        ## sorted list insertion function
        def add_mapping(new,
                        arr,
                        less_than=lambda x, y: True
                        if x.total_errors() < y.total_errors() else False):
            if less_than(new, arr[0]):
                return [new] + arr
            if len(arr) == 1 or not less_than(new, arr[-1]):
                return arr + [new]
            for i, m in enumerate(arr):
                if less_than(new, m):
                    return arr[:i] + [new] + arr[i:]

        sort = {}
        for m in mapping:
            m.quality = m.total_errors()
            try:
                sort[m.qName] = add_mapping(m, sort[m.qName])
            except KeyError:
                sort[m.qName] = [m]

        for r_id, maps in sort.iteritems():
            try:
                read = querys[r_id]
                read.cluster_mappings = maps
                targets[maps[0].tName].reads[
                    read.id] = read  ## add to mapped cluster
                read.cluster = targets[maps[0].tName]
                targets[maps[0].tName].has_remaps = True
                if maps[0].tName not in self.remapped_clusters:
                    self.remapped_clusters[maps[0].tName] = [read]
                else:
                    self.remapped_clusters[maps[0].tName].append(read)
            except KeyError as e:
                print(maps[0])
                print(maps[0].tName in targets)
                print(sorted(targets.keys()))
                print(targets[maps[0].tName].reads.keys())
                raise e
        return targets.values()
def main():
    watchB=time.time()
    cp = configure()

    # Set the alarm in case if we go over time
    if cp.notimeout:
        log.debug("Running script with no timeout.")
    else:
        timeout = int(cp.get("Settings", "timeout"))
        signal.alarm(timeout)
        log.debug("Setting script timeout to %i." % timeout)

    # Hourly graphs (24-hours)
    watchS=time.time()
    hjds = HourlyJobsDataSource(cp)
    hjds.run()
    dg = DisplayGraph(cp, "jobs_hourly")
    jobs_data, hours_data = hjds.query_jobs()
    dg.data = [i/1000 for i in jobs_data]
    num_jobs = sum(jobs_data)
    dg.run("jobs_hourly")
    hjds.disconnect()
    log.debug("Time log - Hourly Jobs Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_hourly")
    dg.data = [float(i)/1000. for i in hours_data]
    dg.run("hours_hourly")
    log.debug("Time log - Hourly Jobs Graph Time: %s", (time.time() - watchS))
    # Generate the more-complex transfers graph

    watchS=time.time()
    dst = DataSourceTransfers(cp)
    dst.run()
    log.debug("Time log - Hourly Transfer Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_hourly")
    dg.data = [i[1]/1024./1024. for i in dst.get_data()]
    log.debug("Transfer volumes: %s" % ", ".join([str(float(i)) for i in \
        dg.data]))
    dg.run("transfer_volume_hourly")
    transfer_data = dst.get_data()
    dg = DisplayGraph(cp, "transfers_hourly")
    dg.data = [long(i[0])/1000. for i in dst.get_data()]
    dg.run("transfers_hourly")
    num_transfers = sum([i[0] for i in transfer_data])
    transfer_volume_mb = sum([i[1] for i in transfer_data])
    dst.disconnect()
    log.debug("Time log - Hourly Transfer Graph Time: %s", (time.time() - watchS))

    # Daily (30-day graphs)
    watchS=time.time()
    dds = DailyDataSource(cp)
    dds.run()
    # Jobs graph
    jobs_data_daily, hours_data_daily = dds.query_jobs()
    dds.disconnect() 
    log.debug("Time log - 30-Day Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_daily")
    dg.data = [float(i)/1000. for i in jobs_data_daily]
    num_jobs_hist = sum(jobs_data_daily)
    dg.run("jobs_daily", mode="daily")
    log.debug("Time log - 30-Day Count Graph Time: %s", (time.time() - watchS))
    # CPU Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_daily")
    dg.data = [float(i)/1000000. for i in hours_data_daily]
    num_hours_hist = sum(hours_data_daily) 
    dg.run("hours_daily", mode="daily")
    log.debug("Time log - 30-Day CPU Graph Time: %s", (time.time() - watchS))
    # Transfers data
    watchS=time.time()
    transfer_data_daily, volume_data_daily = dds.query_transfers()
    log.debug("Time log - 30-Day Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_daily")
    dg.data = [float(i)/1000000. for i in transfer_data_daily]
    num_transfers_daily = sum(transfer_data_daily)
    dg.run("transfers_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph 
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_daily")
    dg.data = [float(i)/1024.**3 for i in volume_data_daily]
    volume_transfers_hist = sum(volume_data_daily)
    dg.run("transfer_volume_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Volume Graph Time: %s", (time.time() - watchS))

    # Monthly graphs (12-months)
    watchS=time.time()
    mds = MonthlyDataSource(cp)
    mds.run()
    # Jobs graph
    jobs_data_monthly, hours_data_monthly = mds.query_jobs()
    mds.disconnect()
    log.debug("Time log - 12-Month Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_monthly")
    dg.data = [float(i)/1000000. for i in jobs_data_monthly]
    num_jobs_monthly = sum(jobs_data_monthly)
    dg.run("jobs_monthly", mode="monthly")
    log.debug("Time log - 12-Month Job Count Graph Time: %s", (time.time() - watchS))
    # Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_monthly")
    dg.data = [float(i)/1000000. for i in hours_data_monthly]
    num_hours_monthly = sum(hours_data_monthly)
    dg.run("hours_monthly", mode="monthly")
    log.debug("Time log - 12-Month Hour Graph Time: %s", (time.time() - watchS))
    # Transfers graph
    watchS=time.time()
    transfer_data_monthly, volume_data_monthly = mds.query_transfers()
    log.debug("Time log - 12-Month Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_monthly")
    dg.data = [float(i)/1000000. for i in transfer_data_monthly]
    num_transfers_monthly = sum(transfer_data_monthly)
    dg.run("transfers_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_monthly")
    dg.data = [float(i)/1024.**3 for i in volume_data_monthly]
    volume_transfers_monthly = sum(volume_data_monthly)
    dg.run("transfer_volume_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Volume Graph Time: %s", (time.time() - watchS))
    # Pull OIM data
    watchS=time.time()
    ods = OIMDataSource(cp)
    num_sites = len(ods.query_sites())
    ces, ses = ods.query_ce_se()
    log.debug("Time log - OIM Time: %s", (time.time() - watchS))

    # Generate the JSON
    log.debug("Starting JSON creation")
    d = Data(cp)
    d.add_datasource(mds)
    d.add_datasource(hjds)
    d.add_datasource(dst)
    d.add_datasource(dds)
    d.add_datasource(ods)
    # Monthly data
    log.debug("Done creating JSON.")

    name, tmpname = get_files(cp, "json")
    fd = open(tmpname, 'w')
    d.run(fd)
    commit_files(name, tmpname)

    log.info("OSG Display done!")
    log.debug("Time log - Total Time: %s", (time.time() - watchB))
Example #46
0
    def parse(self, string, graph):
        """
      Parses the given string and/or graph.
      """

        # This is a long function, so let's start with a high-level overview. This is
        # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item
        # for each rule, and combine these items with each other and with fragments of
        # the object(s) being parsed to deduce new items. We can think of these items
        # as defining a search space in which we need to find a path to the goal item.
        # The parser implemented here performs a BFS of this search space.

        grammar = self.grammar

        # remember when we started
        start_time = time.clock()
        log.chatter('parse...')

        # specify what kind of items we're working with
        if string and graph:
            axiom_class = CfgHergItem
        elif string:
            axiom_class = CfgItem
        else:
            axiom_class = HergItem

        # remember the size of the example
        if string:
            string_size = len(string)
        else:
            string_size = -1
        if graph:
            graph_size = len(graph.triples(nodelabels=self.nodelabels))
        else:
            graph_size = -1

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: Command line option to switch grammar filter on/off
        if string:
            pgrammar = [
                grammar[r] for r in grammar.reachable_rules(string, None)
            ]  #grammar.values()
        if graph:
            pgrammar = [
                grammar[r] for r in grammar.reachable_rules(graph, None)
            ]  #grammar.values()

        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        word_terminal_lookup = ddict(set)
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(
            set)  # a mapping from outside symbols open items
        if string:
            word_terminal_lookup = ddict(
                set)  # mapping from words to string indices
            for i in range(len(string)):
                word_terminal_lookup[string[i]].add(i)
        if graph:
            edge_terminal_lookup = ddict(
                set)  # mapping from edge labels to graph edges
            for edge in graph.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup[edge[1]].add(edge)
        for rule in pgrammar:
            axiom = axiom_class(rule, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item)

            if item.closed:
                log.debug('  is closed.')
                # check if it's a complete derivation
                if self.successful_parse(string, graph, item, string_size,
                                         graph_size):
                    chart['START'].add((item, ))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift
                    if string and graph:
                        if not item.outside_word_is_nonterminal:
                            new_items = [
                                item.shift_word(item.outside_word, index)
                                for index in word_terminal_lookup[
                                    item.outside_word] if item.can_shift_word(
                                        item.outside_word, index)
                            ]
                        else:
                            assert not item.outside_edge_is_nonterminal
                            new_items = [
                                item.shift_edge(edge) for edge in
                                edge_terminal_lookup[item.outside_edge]
                                if item.can_shift_edge(edge)
                            ]
                    elif string:
                        new_items = [
                            item.shift(item.outside_word, index) for index in
                            word_terminal_lookup[item.outside_word]
                            if item.can_shift(item.outside_word, index)
                        ]
                    else:
                        assert graph
                        new_items = [
                            item.shift(edge)
                            for edge in edge_terminal_lookup[item.outside_edge]
                            if item.can_shift(edge)
                        ]

                    for nitem in new_items:
                        log.debug('  shift', nitem, nitem.shifted)
                        chart[nitem].add((item, ))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter('  success!')
        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)

        # TODO return partial chart
        return chart
Example #47
0
def main():
    watchB=time.time()
    cp = configure()

    # Set the alarm in case if we go over time
    if cp.notimeout:
        log.debug("Running script with no timeout.")
    else:
        timeout = int(cp.get("Settings", "timeout"))
        signal.alarm(timeout)
        log.debug("Setting script timeout to %i." % timeout)

    # Hourly graphs (24-hours)
    watchS=time.time()
    hjds = HourlyJobsDataSource(cp)
    hjds.run()
    dg = DisplayGraph(cp, "jobs_hourly")
    jobs_data, hours_data = hjds.query_jobs()
    dg.data = [i/1000 for i in jobs_data]
    num_jobs = sum(jobs_data)
    dg.run("jobs_hourly")
    hjds.disconnect()
    log.debug("Time log - Hourly Jobs Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_hourly")
    dg.data = [float(i)/1000. for i in hours_data]
    dg.run("hours_hourly")
    log.debug("Time log - Hourly Jobs Graph Time: %s", (time.time() - watchS))
    # Generate the more-complex transfers graph

    watchS=time.time()
    dst = DataSourceTransfers(cp)
    dst.run()
    log.debug("Time log - Hourly Transfer Query Time: %s", (time.time() - watchS))
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_hourly")
    dg.data = [i[1]/1024./1024. for i in dst.get_data()]
    log.debug("Transfer volumes: %s" % ", ".join([str(float(i)) for i in \
        dg.data]))
    dg.run("transfer_volume_hourly")
    transfer_data = dst.get_data()
    dg = DisplayGraph(cp, "transfers_hourly")
    dg.data = [long(i[0])/1000. for i in dst.get_data()]
    dg.run("transfers_hourly")
    num_transfers = sum([i[0] for i in transfer_data])
    transfer_volume_mb = sum([i[1] for i in transfer_data])
    dst.disconnect()
    log.debug("Time log - Hourly Transfer Graph Time: %s", (time.time() - watchS))

    # Daily (30-day graphs)
    watchS=time.time()
    dds = DailyDataSource(cp)
    dds.run()
    # Jobs graph
    jobs_data_daily, hours_data_daily = dds.query_jobs()
    dds.disconnect() 
    log.debug("Time log - 30-Day Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_daily")
    dg.data = [float(i)/1000. for i in jobs_data_daily]
    num_jobs_hist = sum(jobs_data_daily)
    dg.run("jobs_daily", mode="daily")
    log.debug("Time log - 30-Day Count Graph Time: %s", (time.time() - watchS))
    # CPU Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_daily")
    dg.data = [float(i)/1000000. for i in hours_data_daily]
    num_hours_hist = sum(hours_data_daily) 
    dg.run("hours_daily", mode="daily")
    log.debug("Time log - 30-Day CPU Graph Time: %s", (time.time() - watchS))
    # Transfers data
    watchS=time.time()
    transfer_data_daily, volume_data_daily = dds.query_transfers()
    log.debug("Time log - 30-Day Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_daily")
    dg.data = [float(i)/1000000. for i in transfer_data_daily]
    num_transfers_daily = sum(transfer_data_daily)
    dg.run("transfers_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph 
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_daily")
    dg.data = [float(i)/1024.**3 for i in volume_data_daily]
    volume_transfers_hist = sum(volume_data_daily)
    dg.run("transfer_volume_daily", mode="daily")
    log.debug("Time log - 30-Day Transfer Volume Graph Time: %s", (time.time() - watchS))

    # Monthly graphs (12-months)
    watchS=time.time()
    mds = MonthlyDataSource(cp)
    mds.run()
    # Jobs graph
    jobs_data_monthly, hours_data_monthly = mds.query_jobs()
    mds.disconnect()
    log.debug("Time log - 12-Month Query Time: %s", (time.time() - watchS))
    # Job count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "jobs_monthly")
    dg.data = [float(i)/1000000. for i in jobs_data_monthly]
    num_jobs_monthly = sum(jobs_data_monthly)
    dg.run("jobs_monthly", mode="monthly")
    log.debug("Time log - 12-Month Job Count Graph Time: %s", (time.time() - watchS))
    # Hours graph
    watchS=time.time()
    dg = DisplayGraph(cp, "hours_monthly")
    dg.data = [float(i)/1000000. for i in hours_data_monthly]
    num_hours_monthly = sum(hours_data_monthly)
    dg.run("hours_monthly", mode="monthly")
    log.debug("Time log - 12-Month Hour Graph Time: %s", (time.time() - watchS))
    # Transfers graph
    watchS=time.time()
    transfer_data_monthly, volume_data_monthly = mds.query_transfers()
    log.debug("Time log - 12-Month Transfer Query Time: %s", (time.time() - watchS))
    # Transfer count graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfers_monthly")
    dg.data = [float(i)/1000000. for i in transfer_data_monthly]
    num_transfers_monthly = sum(transfer_data_monthly)
    dg.run("transfers_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Count Graph Time: %s", (time.time() - watchS))
    # Transfer volume graph
    watchS=time.time()
    dg = DisplayGraph(cp, "transfer_volume_monthly")
    dg.data = [float(i)/1024.**3 for i in volume_data_monthly]
    volume_transfers_monthly = sum(volume_data_monthly)
    dg.run("transfer_volume_monthly", mode="monthly")
    log.debug("Time log - 12-Month Transfer Volume Graph Time: %s", (time.time() - watchS))
    # Pull OIM data
    watchS=time.time()
    ods = OIMDataSource(cp)
    num_sites = len(ods.query_sites())
    ces, ses = ods.query_ce_se()
    log.debug("Time log - OIM Time: %s", (time.time() - watchS))

    # Generate the JSON
    log.debug("Starting JSON creation")
    d = Data(cp)
    d.add_datasource(mds)
    d.add_datasource(hjds)
    d.add_datasource(dst)
    d.add_datasource(dds)
    d.add_datasource(ods)
    # Monthly data
    log.debug("Done creating JSON.")

    name, tmpname = get_files(cp, "json")
    fd = open(tmpname, 'w')
    d.run(fd)
    commit_files(name, tmpname)

    log.info("OSG Display done!")
    log.debug("Time log - Total Time: %s", (time.time() - watchB))
Example #48
0
    def parse(self, string, graph):
        """
      Parses the given string and/or graph.
      """

        # This is a long function, so let's start with a high-level overview. This is
        # a "deductive-proof-style" parser: We begin with one "axiomatic" chart item
        # for each rule, and combine these items with each other and with fragments of
        # the object(s) being parsed to deduce new items. We can think of these items
        # as defining a search space in which we need to find a path to the goal item.
        # The parser implemented here performs a BFS of this search space.

        grammar = self.grammar

        # remember when we started
        start_time = time.clock()
        log.chatter("parse...")

        # specify what kind of items we're working with
        if string and graph:
            axiom_class = CfgHergItem
        elif string:
            axiom_class = CfgItem
        else:
            axiom_class = HergItem

        # remember the size of the example
        if string:
            string_size = len(string)
        else:
            string_size = -1
        if graph:
            graph_size = len(graph.triples(nodelabels=self.nodelabels))
        else:
            graph_size = -1

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: Command line option to switch grammar filter on/off
        if string:
            pgrammar = [grammar[r] for r in grammar.reachable_rules(string, None)]  # grammar.values()
        if graph:
            pgrammar = [grammar[r] for r in grammar.reachable_rules(graph, None)]  # grammar.values()

        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        word_terminal_lookup = ddict(set)
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(set)  # a mapping from outside symbols open items
        if string:
            word_terminal_lookup = ddict(set)  # mapping from words to string indices
            for i in range(len(string)):
                word_terminal_lookup[string[i]].add(i)
        if graph:
            edge_terminal_lookup = ddict(set)  # mapping from edge labels to graph edges
            for edge in graph.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup[edge[1]].add(edge)
        for rule in pgrammar:
            axiom = axiom_class(rule, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug("handling", item)

            if item.closed:
                log.debug("  is closed.")
                # check if it's a complete derivation
                if self.successful_parse(string, graph, item, string_size, graph_size):
                    chart["START"].add((item,))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift
                    if string and graph:
                        if not item.outside_word_is_nonterminal:
                            new_items = [
                                item.shift_word(item.outside_word, index)
                                for index in word_terminal_lookup[item.outside_word]
                                if item.can_shift_word(item.outside_word, index)
                            ]
                        else:
                            assert not item.outside_edge_is_nonterminal
                            new_items = [
                                item.shift_edge(edge)
                                for edge in edge_terminal_lookup[item.outside_edge]
                                if item.can_shift_edge(edge)
                            ]
                    elif string:
                        new_items = [
                            item.shift(item.outside_word, index)
                            for index in word_terminal_lookup[item.outside_word]
                            if item.can_shift(item.outside_word, index)
                        ]
                    else:
                        assert graph
                        new_items = [
                            item.shift(edge) for edge in edge_terminal_lookup[item.outside_edge] if item.can_shift(edge)
                        ]

                    for nitem in new_items:
                        log.debug("  shift", nitem, nitem.shifted)
                        chart[nitem].add((item,))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter("  success!")
        etime = time.clock() - start_time
        log.chatter("done in %.2fs" % etime)

        # TODO return partial chart
        return chart
Example #49
0
 def interlinguish_undefined_operation(self, op, say):
     # CHEAT!: any way to handle these?
     log.debug(2,
               str(self.entity.id) + " interlinguish_undefined_operation:",
               op)
     log.debug(2, str(say))
Example #50
0
def get_pseudo_mutations(gene, pseudogene, force=False):
    mutations = {}
    translation = {}
    # print gene.exons
    # print pseudogene.exons
    # print gene.introns
    # print pseudogene.introns
    # exit(0)
    for ei, (e6, e7) in enumerate(
            zip(gene.exons, pseudogene.exons) +
            zip(gene.introns, pseudogene.introns)):
        y6, y7 = e6.start, e7.start
        e6 = gene.seq[e6.start:e6.end]
        e7 = pseudogene.seq[e7.start:e7.end]

        def yy(x, s=10):
            return ' '.join(x[i:i + s] for i in xrange(0, len(x), s))

        log.debug('NCBI: ALN {}{} (len {} / {})',
                  'E' if ei < len(gene.exons) else 'I',
                  ei + 1 if ei < len(gene.exons) else ei - len(gene.exons) + 1,
                  len(e6), len(e7))
        if max(len(e6), len(e7)) < 1000:
            al = pairwise2.align.globalxs(e6, e7, -1, 0)[0]
            s = str(pairwise2.format_alignment(*al)).split('\n')
        else:
            log.debug('BLAT')
            a = blat(e6, e7)

            s = [a[0].query.seq, '', a[0].hit.seq]
            for i, f in enumerate(a):
                if i == 0: continue
                if a[i].query_range[0] == a[i - 1].query_range[1]:
                    s[0] += e6[a[i - 1].hit_range[1]:a[i].hit_range[0]]
                    s[2] += '-' * (a[i].hit_range[0] - a[i - 1].hit_range[1])
                if a[i].hit_range[0] == a[i - 1].hit_range[1]:
                    s[0] += '-' * (a[i].query_range[0] -
                                   a[i - 1].query_range[1])
                    s[2] += e7[a[i - 1].query_range[1]:a[i].query_range[0]]
                s[0] += a[i].query.seq
                s[2] += a[i].hit.seq
            s = map(str.upper, map(str, s))
            assert (len(s[0]) == len(s[2]))

        log.debug('NCBI: ALN {}', yy(s[0]))
        log.debug(
            'NCBI: ALN {}',
            yy(''.join([
                '*' if s[0][y] != s[2][y] else '-' for y in xrange(len(s[0]))
            ])))
        log.debug('NCBI: ALN {}', yy(s[2]))

        gaps6, gaps7 = 0, 0
        i = 0
        while i < len(al[0]):
            c6 = y6 + i - gaps6
            c7 = y7 + i - gaps7
            if c6 not in gene.translation:
                # print >>sys.stderr, 'CYP2D7 mutation {}:{}{} ignored'.format(y7 + i, a, b)
                i += 1
                continue
            # print al[0][i]
            if al[0][i] == '-':
                seq = ''
                while i < len(al[0]) and al[0][i] == '-':
                    seq += al[1][i]
                    i += 1
                    gaps6 += 1
                # (deletion in 6 is actually insertion in mapping)
                c6 = y6 + i - gaps6  # INS je ispred!
                translation[c6] = mutations[c6] = dict(
                    pos=c6,
                    op='INS.{}'.format(seq.lower()),
                    dbsnp='*',
                    old='{}:{}{}:{}'.format(
                        pseudogene.name, 'e' if ei < len(gene.exons) else 'i',
                        ei + 1 if ei < len(gene.exons) else ei -
                        len(gene.exons) + 1, c7),
                    old_pos=c7)
                continue
            if al[1][i] == '-':
                seq = ''
                while i < len(al[0]) and al[1][i] == '-':
                    seq += al[0][i]
                    i += 1
                    gaps7 += 1
                # (deletion in 7 is actually deletion in mapping)
                translation[c6] = mutations[c6] = dict(
                    pos=c6,
                    op='DEL.{}'.format(seq),
                    dbsnp='*',
                    old='{}:{}{}:{}'.format(
                        pseudogene.name, 'e' if ei < len(gene.exons) else 'i',
                        ei + 1 if ei < len(gene.exons) else ei -
                        len(gene.exons) + 1, c7),
                    old_pos=c7)
                continue
            if al[0][i] != al[1][i]:
                translation[c6] = mutations[c6] = dict(
                    pos=c6,
                    op='SNP.{}{}'.format(al[0][i], al[1][i]),
                    dbsnp='*',
                    old='{}:{}{}:{}'.format(
                        pseudogene.name, 'e' if ei < len(gene.exons) else 'i',
                        ei + 1 if ei < len(gene.exons) else ei -
                        len(gene.exons) + 1, c7),
                    old_pos=c7)
            elif c6 not in mutations:  # do not overwrite insertions
                translation[c6] = dict(old_pos=c7)
            i += 1

    return mutations, translation
Example #51
0
 def interlinguish_undefined_operation(self, op, say):
     #CHEAT!: any way to handle these?
     log.debug(2,str(self.id)+" interlinguish_undefined_operation:",op)
     log.debug(2,str(say))
def lmutil():
    """Checks total of available licences for all objects passed"""
    # This is a mess. Tidy.
    pattern="Users of (?P<feature_name>\w*?):  \(Total of (?P<total>\d*?) licenses issued;  Total of (?P<in_use_real>\d*?) licenses in use\)"
    # lmutil_list=[]
    # for key, value in licence_list.items():
    #     lmutil_list.append={"path":value["address"]}

    for key, value in licence_list.items():
        if not value["file_address"]:
            continue 
            
        if not value["feature"]: 
            log.error(key + " must have feature specified in order to check with LMUTIL")
            continue           
            
        # if value["flex_method"] == "lmutil":
        #     return
        features=[]
        lmutil_return=""
        try:
            shell_string="linx64/lmutil " + "lmstat " + "-f " + value["feature"] + " -c " + value["file_address"]
            log.debug(shell_string)
            lmutil_return=subprocess.check_output(shell_string, shell=True).strip()    #Removed .decode("utf-8") as threw error.     
        except Exception as details:
            log.error("Failed to fetch " + key + " " + str(details))
        else:
            for line in (lmutil_return.split("\n")):  
                m = re.match(pattern, line)
                if m:
                    features.append(m.groupdict())

            found=False                

            for feature in features:
                if feature["feature_name"] == value["feature"]:
                    found=True
                    hour_index = dt.datetime.now().hour - 1
                    value["in_use_real"] = int(feature["in_use_real"])

                    if value["total"] != int(feature["total"]):
                        log.warning("LMUTIL shows different total number of licences than recorded. Changing from '" + str(value["total"]) + "' to '" + feature["total"] + "'")
                        value["total"] = int(feature["total"])

                    # Record to running history
                    value["history"].append(value["in_use_real"])

                    # Pop extra array entries
                    while len(value["history"]) > value["history_points"]:
                        value["history"].pop(0)

                    # Find modified in use value
                    interesting = max(value["history"])-value["in_use_nesi"]
                    value["in_use_modified"] = round(min(
                        max(interesting + value["buffer_constant"], interesting * (1 + value["buffer_factor"])), value["total"], 0
                    ))

                    # Update average
                    value["day_ave"][hour_index] = (
                        round(
                            ((value["in_use_real"] * settings["point_weight"]) + (value["day_ave"][hour_index] * (1 - settings["point_weight"]))),
                            2,
                        )
                        if value["day_ave"][hour_index]
                        else value["in_use_real"]
                    )
                else:
                    log.info("Untracked Feature " + feature["feature_name"] + ": " + (feature["in_use_real"]) +" of " + (feature["total"]) + "in use.")

            if not found:
                log.error("Feature '" + value["feature"] + "' not found on server for '" + key + "'")
Example #53
0
def log(msg, *args):  
    logg.debug(msg, *args)
Example #54
0
def get_variants_from_matched_lines(tumor_line, normal_line):
    u"""mathced tumor line and normal line"""

    hetero_germline_variants = []
    somatic_variants = []

    if (tumor_line.depth < settings.min_depth) or (normal_line.depth < settings.min_depth):
        raise LowDepthError

    if (tumor_line.depth > settings.max_depth) or (normal_line.depth > settings.max_depth):
        raise HighDepthError

    if tumor_line.ref == u'N':
        raise CustomError(u"reference_is_N")

    if len(REGEX_COUNT_W.findall(tumor_line.bases)) < settings.min_variant_supporting_reads:
        raise TooFewVariantReadsError

    tumor_pileup_units = tumor_line.get_bases_with_qualities()
    normal_pileup_units = normal_line.get_bases_with_qualities()

    tumor_profiles = pileup_unit.get_profiles(tumor_pileup_units)
    normal_profiles = pileup_unit.get_profiles(normal_pileup_units)

    for variant_key in tumor_profiles.keys():
        if variant_key == tumor_line.ref:
            # skip for the reference base
            continue
        try:
            tumor_count = tumor_profiles[variant_key]
            if tumor_count < settings.min_variant_supporting_reads:
                raise TooFewVariantReadsError

            tumor_ref_units = [x for x in tumor_pileup_units if x.key() != variant_key]
            tumor_obs_units = [x for x in tumor_pileup_units if x.key() == variant_key]
            normal_ref_units = [x for x in normal_pileup_units if x.key() != variant_key]
            normal_obs_units = [x for x in normal_pileup_units if x.key() == variant_key]

            IndelCoverChecker.update(tumor_line.chromosome, tumor_line.position, tumor_profiles, normal_profiles)

            try:
                normal_count = normal_profiles.get(variant_key, 0)
                if normal_count < settings.min_variant_supporting_reads:
                    raise TooFewVariantReadsError
                v = HeterozygousGermlineVariant.from_pileup_units(tumor_ref_units, tumor_obs_units, normal_ref_units, normal_obs_units)
                v.set_basic_info(variant_key, tumor_line.chromosome, tumor_line.position, tumor_line.ref)
                if v.is_snv():
                    try:
                        triallelic_site_checker.check(tumor_line.ref, tumor_line.chromosome, tumor_line.position,
                                                      tumor_profiles, normal_profiles)
                    except TriallelicSiteError:
                        v.triallelic_site_check = "triallelic"
                    try:
                        IndelCoverChecker.check(tumor_line.chromosome, tumor_line.position)
                    except IndelCoverError:
                        v.indel_cover_check = "indel-cover"
                hetero_germline_variants.append(v)
            except AlleleFreqOutOfRangeError: pass
            except StrandFreqOutOfRangeError: pass
            except TooFewVariantReadsError: pass
            except LowDepthError: pass
            except LowBaseQualityError as e:
                log.debug(u"HeteroGermline: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))
            except CustomError, e:
                log.warning(u"HeteroGermline CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))

            try:
                v = SomaticVariant.from_pileup_units(tumor_ref_units, tumor_obs_units, normal_ref_units, normal_obs_units)
                v.set_basic_info(variant_key, tumor_line.chromosome, tumor_line.position, tumor_line.ref)
                v.set_fisher_score()
                if v.is_snv():
                    try:
                        triallelic_site_checker.check(tumor_line.ref, tumor_line.chromosome, tumor_line.position,
                                                      tumor_profiles, normal_profiles)
                    except TriallelicSiteError:
                        v.triallelic_site_check = "triallelic"
                    try:
                        IndelCoverChecker.check(tumor_line.chromosome, tumor_line.position)
                    except IndelCoverError:
                        v.indel_cover_check = "indel-cover"
                somatic_variants.append(v)
            except AlleleFreqOutOfRangeError: pass
            except StrandFreqOutOfRangeError: pass
            except TooManyNormalVariantReadsError: pass
            except TooFewVariantReadsError: pass
            except LowDepthError: pass
            except LowBaseQualityError as e:
                log.debug(u"Somatic: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))
            except CustomError as e:
                log.warning(u"Somatic CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))

        except TooFewVariantReadsError: pass
        except CustomError as e:
            log.warning(u"CustomError: {0}, tumor: {1}, normal: {2}".format(e, tumor_line, normal_line))
Example #55
0
    def parse_bitext(self, obj1, obj2):
        """
      Parse a single pair of objects (two strings, two graphs, or string/graph).
      """
        rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type
        assert rhs1type in ["string", "hypergraph"
                            ] and rhs2type in ["string", "hypergraph"]

        # Remember size of input objects and figure out Item subclass
        if rhs1type == "string":
            obj1size = len(obj1)
        elif rhs1type == "hypergraph":
            obj1size = len(obj1.triples())
        if rhs2type == "string":
            obj2size = len(obj2)
        elif rhs2type == "hypergraph":
            obj2size = len(obj2.triples())
        grammar = self.grammar
        start_time = time.clock()
        log.chatter('parse...')

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        #TODO: command line filter to switch rule filter on/off
        pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2)
                    ]  #grammar.values()
        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(
            set)  # a mapping from outside symbols to open items

        # mapping from words to string indices for each string
        word_terminal_lookup1 = ddict(set)
        word_terminal_lookup2 = ddict(set)

        if rhs1type == "string":
            for i in range(len(obj1)):
                word_terminal_lookup1[obj1[i]].add(i)

        if rhs2type == "string":
            for i in range(len(obj2)):
                word_terminal_lookup2[obj2[i]].add(i)

        # mapping from edge labels to graph edges for each graph
        edge_terminal_lookup1 = ddict(set)
        edge_terminal_lookup2 = ddict(set)

        if rhs1type == "hypergraph":
            for edge in obj1.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup1[edge[1]].add(edge)

        if rhs2type == "hypergraph":
            for edge in obj2.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup2[edge[1]].add(edge)

        for rule in pgrammar:
            item1class = CfgItem if rhs1type == "string" else HergItem
            item2class = CfgItem if rhs2type == "string" else HergItem
            axiom = SynchronousItem(rule,
                                    item1class,
                                    item2class,
                                    nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug('handling', item)

            if item.closed:
                log.debug('  is closed.')
                # check if it's a complete derivation
                if self.successful_biparse(obj1, obj2, item, obj1size,
                                           obj2size):
                    chart['START'].add((item, ))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift ; this depends on the configuration (string/graph -> string/graph)
                    if not item.outside1_is_nonterminal and not item.item1.closed:
                        if rhs1type == "string":
                            new_items = [
                                item.shift_word1(item.outside_object1, index)
                                for index in word_terminal_lookup1[
                                    item.outside_object1]
                                if item.can_shift_word1(
                                    item.outside_object1, index)
                            ]
                        else:
                            assert rhs1type is "hypergraph"
                            new_items = [
                                item.shift_edge1(edge) for edge in
                                edge_terminal_lookup1[item.outside_object1]
                                if item.can_shift_edge1(edge)
                            ]
                    else:
                        assert not item.outside2_is_nonterminal  # Otherwise shift would not be called
                        if rhs2type == "string":
                            new_items = [
                                item.shift_word2(item.outside_object2, index)
                                for index in word_terminal_lookup2[
                                    item.outside_object2]
                                if item.can_shift_word2(
                                    item.outside_object2, index)
                            ]
                        else:
                            assert rhs2type is "hypergraph"
                            new_items = [
                                item.shift_edge2(edge) for edge in
                                edge_terminal_lookup2[item.outside_object2]
                                if item.can_shift_edge2(edge)
                            ]

                    for nitem in new_items:
                        log.debug('  shift', nitem, nitem.shifted)
                        chart[nitem].add((item, ))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter('  success!')
        etime = time.clock() - start_time
        log.chatter('done in %.2fs' % etime)

        # TODO return partial chart
        return chart
Example #56
0
    jobs = get_jobs(ctr_dirs)
    if not jobs: return
    if Config.remote_host:
        # NOTE: Assuming 256 B of TCP window needed for each job (squeue)
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key, (2 << 7)*len(jobs)) 

    execute = execute_local if not Config.remote_host else execute_remote
    #args = Config.slurm_bin_path + '/squeue -a -h -o %i:%T -t all -j ' + ','.join(jobs.keys())
	args = Config.slurm_bin_path + '/oarstat -fj ' + '-fj'.join(jobs.keys())
    if os.environ.has_key('__SLURM_TEST'):
        handle = execute(args, env=dict(os.environ))
    else:
        handle = execute(args)
    if handle.returncode != 0:
        debug('Got error code %i from oarstat' % handle.returncode, 'slurm.Scan')
        debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Scan')

    # Slurm can report StartTime and EndTime in at least these two formats:
    # 2010-02-15T15:30:29 (MDS)
    # 02/15-15:25:15
    # Python does not support duplicate named groups.
    # Have to use separate regex if we want to use named groups.
    #date_MDS = re.compile(r'^(?P<YYYY>\d\d\d\d)-(?P<mm>\d\d)-(?P<dd>\d\d)T(?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$')
    #date_2 = re.compile(r'^(?P<mm>\d\d)/(?P<dd>\d\d)-(?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$')

	date_MDS = re.compile(r'^(?P<YYYY>\d\d\d\d)-(?P<mm>\d\d)-(?P<dd>\d\d) (?P<HH>\d\d):(?P<MM>\d\d):(?P<SS>\d\d)$')
    for line in handle.stdout:
        try:
            localid, state = line.strip().split(':', 1)
        except:
Example #57
0
    def parse_bitext(self, obj1, obj2):
        """
      Parse a single pair of objects (two strings, two graphs, or string/graph).
      """
        rhs1type, rhs2type = self.grammar.rhs1_type, self.grammar.rhs2_type
        assert rhs1type in ["string", "hypergraph"] and rhs2type in ["string", "hypergraph"]

        # Remember size of input objects and figure out Item subclass
        if rhs1type == "string":
            obj1size = len(obj1)
        elif rhs1type == "hypergraph":
            obj1size = len(obj1.triples())
        if rhs2type == "string":
            obj2size = len(obj2)
        elif rhs2type == "hypergraph":
            obj2size = len(obj2.triples())
        grammar = self.grammar
        start_time = time.clock()
        log.chatter("parse...")

        # initialize data structures and lookups
        # we use various tables to provide constant-time lookup of fragments available
        # for shifting, completion, etc.
        chart = ddict(set)

        # TODO: command line filter to switch rule filter on/off
        pgrammar = [grammar[r] for r in grammar.reachable_rules(obj1, obj2)]  # grammar.values()
        queue = deque()  # the items left to be visited
        pending = set()  # a copy of queue with constant-time lookup
        attempted = set()  # a cache of previously-attempted item combinations
        visited = set()  # a cache of already-visited items
        nonterminal_lookup = ddict(set)  # a mapping from labels to graph edges
        reverse_lookup = ddict(set)  # a mapping from outside symbols to open items

        # mapping from words to string indices for each string
        word_terminal_lookup1 = ddict(set)
        word_terminal_lookup2 = ddict(set)

        if rhs1type == "string":
            for i in range(len(obj1)):
                word_terminal_lookup1[obj1[i]].add(i)

        if rhs2type == "string":
            for i in range(len(obj2)):
                word_terminal_lookup2[obj2[i]].add(i)

        # mapping from edge labels to graph edges for each graph
        edge_terminal_lookup1 = ddict(set)
        edge_terminal_lookup2 = ddict(set)

        if rhs1type == "hypergraph":
            for edge in obj1.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup1[edge[1]].add(edge)

        if rhs2type == "hypergraph":
            for edge in obj2.triples(nodelabels=self.nodelabels):
                edge_terminal_lookup2[edge[1]].add(edge)

        for rule in pgrammar:
            item1class = CfgItem if rhs1type == "string" else HergItem
            item2class = CfgItem if rhs2type == "string" else HergItem
            axiom = SynchronousItem(rule, item1class, item2class, nodelabels=self.nodelabels)
            queue.append(axiom)
            pending.add(axiom)
            if axiom.outside_is_nonterminal:
                reverse_lookup[axiom.outside_symbol].add(axiom)

        # keep track of whether we found any complete derivation
        success = False

        # parse
        while queue:
            item = queue.popleft()
            pending.remove(item)
            visited.add(item)
            log.debug("handling", item)

            if item.closed:
                log.debug("  is closed.")
                # check if it's a complete derivation
                if self.successful_biparse(obj1, obj2, item, obj1size, obj2size):
                    chart["START"].add((item,))
                    success = True

                # add to nonterminal lookup
                nonterminal_lookup[item.rule.symbol].add(item)

                # wake up any containing rules
                # Unlike in ordinary state-space search, it's possible that we will have
                # to re-visit items which couldn't be merged with anything the first time
                # we saw them, and are waiting for the current item. The reverse_lookup
                # indexes all items by their outside symbol, so we re-append to the queue
                # all items looking for something with the current item's symbol.
                for ritem in reverse_lookup[item.rule.symbol]:
                    if ritem not in pending:
                        queue.append(ritem)
                        pending.add(ritem)

            else:
                if item.outside_is_nonterminal:
                    # complete
                    reverse_lookup[item.outside_symbol].add(item)

                    for oitem in nonterminal_lookup[item.outside_symbol]:
                        log.debug("  oitem:", oitem)
                        if (item, oitem) in attempted:
                            # don't repeat combinations we've tried before
                            continue
                        attempted.add((item, oitem))
                        if not item.can_complete(oitem):
                            log.debug("    fail")
                            continue
                        log.debug("    ok")
                        nitem = item.complete(oitem)
                        chart[nitem].add((item, oitem))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

                else:
                    # shift ; this depends on the configuration (string/graph -> string/graph)
                    if not item.outside1_is_nonterminal and not item.item1.closed:
                        if rhs1type == "string":
                            new_items = [
                                item.shift_word1(item.outside_object1, index)
                                for index in word_terminal_lookup1[item.outside_object1]
                                if item.can_shift_word1(item.outside_object1, index)
                            ]
                        else:
                            assert rhs1type is "hypergraph"
                            new_items = [
                                item.shift_edge1(edge)
                                for edge in edge_terminal_lookup1[item.outside_object1]
                                if item.can_shift_edge1(edge)
                            ]
                    else:
                        assert not item.outside2_is_nonterminal  # Otherwise shift would not be called
                        if rhs2type == "string":
                            new_items = [
                                item.shift_word2(item.outside_object2, index)
                                for index in word_terminal_lookup2[item.outside_object2]
                                if item.can_shift_word2(item.outside_object2, index)
                            ]
                        else:
                            assert rhs2type is "hypergraph"
                            new_items = [
                                item.shift_edge2(edge)
                                for edge in edge_terminal_lookup2[item.outside_object2]
                                if item.can_shift_edge2(edge)
                            ]

                    for nitem in new_items:
                        log.debug("  shift", nitem, nitem.shifted)
                        chart[nitem].add((item,))
                        if nitem not in pending and nitem not in visited:
                            queue.append(nitem)
                            pending.add(nitem)

        if success:
            log.chatter("  success!")
        etime = time.clock() - start_time
        log.chatter("done in %.2fs" % etime)

        # TODO return partial chart
        return chart
Example #58
0
def Submit(config, jobdesc):
    """
    Submits a job to the SLURM queue specified in arc.conf. This method executes the required
    RunTimeEnvironment scripts and assembles the bash job script. The job script is
    written to file and submitted with ``sbatch``.

    :param str config: path to arc.conf
    :param jobdesc: job description object
    :type jobdesc: :py:class:`arc.JobDescription`
    :return: local job ID if successfully submitted, else ``None``
    :rtype: :py:obj:`str`
    """

    configure(config, set_slurm)

    validate_attributes(jobdesc)
    if Config.remote_host:
        ssh_connect(Config.remote_host, Config.remote_user, Config.private_key)
        
    # Run RTE stage0
    debug('----- starting slurmSubmitter.py -----', 'slurm.Submit')
    RTE_stage0(jobdesc, 'SLURM', SBATCH_ACCOUNT = 'OtherAttributes.SBATCH_ACCOUNT')

    set_grid_global_jobid(jobdesc)

    # Create script file and write job script
    jobscript = get_job_script(jobdesc)
    script_file = write_script_file(jobscript)
    debug('Created file %s' % script_file, 'slurm.Submit')

    debug('SLURM jobname: %s' % jobdesc.Identification.JobName, 'slurm.Submit')
    debug('SLURM job script built', 'slurm.Submit')
    debug('----------------- BEGIN job script -----', 'slurm.Submit')
    emptylines = 0
    for line in jobscript.split('\n'):
        if not line:
            emptylines += 1
        else:
            debug(emptylines*'\n' + line.replace("%", "%%"), 'slurm.Submit')
            emptylines = 0
    if emptylines > 1:
            debug((emptylines-1)*'\n', 'slurm.Submit')
    debug('----------------- END job script -----', 'slurm.Submit')

    if 'ONLY_WRITE_JOBSCRIPT' in os.environ and os.environ['ONLY_WRITE_JOBSCRIPT'] == 'yes':
        return "-1"

    #######################################
    #  Submit the job
    ######################################

    execute = execute_local if not Config.remote_host else execute_remote
    directory = jobdesc.OtherAttributes['joboption;directory']

    debug('Session directory: %s' % directory, 'slurm.Submit')

    SLURM_TRIES = 0
    handle = None
    while SLURM_TRIES < 10:
        args = '%s/oarsub %s' % (Config.slurm_bin_path, script_file)
        verbose('Executing \'%s\' on %s' % 
                (args, Config.remote_host if Config.remote_host else 'localhost'), 'slurm.Submit')
        handle = execute(args)
        if handle.returncode == 0:
            break
        if handle.returncode == 198 or wait_for_queue(handle):
            debug('Waiting for queue to decrease', 'slurm.Submit')
            time.sleep(60)
            SLURM_TRIES += 1
            continue
        break # Other error than full queue

    if handle.returncode == 0:
        # TODO: Test what happens when the jobqueue is full or when the slurm
        # ctld is not responding. SLURM 1.x and 2.2.x outputs the jobid into 
        # STDERR and STDOUT respectively. Concat them, and let sed sort it out. 
        # From the exit code we know that the job was submitted, so this
        # is safe. Ulf Tigerstedt <*****@*****.**> 1.5.2011 
        localid = get_job_id(handle)
        if localid:
            debug('Job submitted successfully!', 'slurm.Submit')
            debug('Local job id: ' + localid, 'slurm.Submit')
            debug('----- exiting submitSubmitter.py -----', 'slurm.Submit')
            return localid

    debug('job *NOT* submitted successfully!', 'slurm.Submit')
    debug('got error code from sbatch: %d !' % handle.returncode, 'slurm.Submit')
    debug('Output is:\n' + ''.join(handle.stdout), 'slurm.Submit')
    debug('Error output is:\n' + ''.join(handle.stderr), 'slurm.Submit')
    debug('----- exiting slurmSubmitter.py -----', 'slurm.Submit')