Beispiel #1
0
 def submit_job(self, func, job_key, args=None, kwargs=None, trigger=None, job_id=None,
                 replace_exist=False, filter_key='', filter_value='', **trigger_args):
     """
         submit job to master through rpc
         :type func: str or callable obj or unicode
         :type job_key: str or unicode
         :type args: tuple or list
         :type kwargs: dict
         :type trigger: str or unicode
         :type job_id: str or unicode
         :type replace_exist: bool
         :type trigger_args: dict
     """
     job_key = '%s:%s' % (self.name, job_key)
     # use worker's timezone if trigger don't provide specific `timezone` configuration
     trigger_args['timezone'] = self.timezone
     job_in_dict = {
         'id': job_id,
         'func': func,
         'args': args,
         'trigger': create_trigger(trigger, trigger_args) if trigger else None,
         'kwargs': kwargs,
         'filter_key': '%s_%s' % (self.name, filter_key),
         'filter_value': filter_value,
     }
     job = Job(**job_in_dict)
     rpc_client_call('submit_job', Binary(job.serialize()),
                     job_key, job.id, replace_exist)
 def _build_jobs(self, response, data, now, ignoreempty = False):
   roll_ntime = 1
   expiry = 60
   isp2pool = False
   headers = response.getheaders()
   for h in headers:
     if h[0].lower() == "x-is-p2pool" and h[1].lower() == "true": isp2pool = True
     elif h[0].lower() == "x-roll-ntime" and h[1] and h[1].lower() != "n":
       roll_ntime = 60
       parts = h[1].split("=", 1)
       if parts[0].strip().lower() == "expire":
         try: roll_ntime = int(parts[1])
         except: pass
       expiry = roll_ntime
   if isp2pool: expiry = 60
   self.stats.supports_rollntime = roll_ntime > 1
   response = data.decode("utf_8")
   if len(response) == 0 and ignoreempty: return
   response = json.loads(response)
   data = unhexlify(response["result"]["data"].encode("ascii"))
   target = unhexlify(response["result"]["target"].encode("ascii"))
   try: identifier = int(response["result"]["identifier"])
   except: identifier = None
   midstate = Job.calculate_midstate(data)
   prefix = data[:68]
   timebase = struct.unpack(">I", data[68:72])[0]
   suffix = data[72:]
   return [Job(self.core, self, now + expiry - self.settings.expirymargin, prefix + struct.pack(">I", timebase + i) + suffix, target, midstate, identifier) for i in range(roll_ntime)]
Beispiel #3
0
 def submit_job(self, serialized_job, job_key, job_id, replace_exist):
     """
         Receive submit_job rpc request from worker.
         :type serialized_job str or xmlrpclib.Binary
         :type job_key str
         :type job_id str
         :type replace_exist bool
     """
     self.log.debug('client call submit job, id=%s, key=%s' % (job_id, job_key))
     if isinstance(serialized_job, Binary):
         serialized_job = serialized_job.data
     job_in_dict = Job.deserialize_to_dict(serialized_job)
     # if job doesn't contains trigger, then enqueue it into job queue immediately
     if not job_in_dict['trigger']:
         self._enqueue_job(job_key, serialized_job)
     # else store job into job store first
     else:
         # should I need a lock here?
         with self.jobstore_lock:
             try:
                 self.jobstore.add_job(job_id, job_key, job_in_dict['next_run_time'], serialized_job)
             except JobAlreadyExist:
                 if replace_exist:
                     self.jobstore.update_job(job_id, job_key, job_in_dict['next_run_time'], serialized_job)
                 else:
                     self.log.warning('submit job error. job id%s already exist' % job_id)
         # wake up when new job has store into job store
         self.wake_up()
Beispiel #4
0
    def start_new_task(task, user_proc_exit_cb=None, save_buffer=True):
        worker = TornadoWorker.get_instance()
        logging.debug("we have new job to start %s" % str(task))
        core = Core.get_instance()
        new_job = Job.create_job(task, core)
        logging.debug(new_job)

        if new_job.task_id not in worker.pool["buffer"]:
            worker.pool["buffer"][new_job.task_id] = []

        task.update(status=JobDataModel.STATUS_RUNNING)
        proc_exit_cb = None
        if user_proc_exit_cb:
            proc_exit_cb = lambda job, exit_code: TornadoWorker.process_finished(
                worker, job, exit_code, user_proc_exit_cb)
        else:
            proc_exit_cb = lambda job, exit_code: TornadoWorker.process_finished(
                worker, job, exit_code)

        read_logs = None
        if save_buffer:
            read_logs = lambda lines, log_level: TornadoWorker.async_read_logs(
                worker, new_job, lines, log_level)
        else:
            read_logs = lambda lines, log_level: TornadoWorker.async_write_logs2stdout(
                worker, new_job, lines, log_level)
        job_log_manager = LogManager(core)

        new_job.start_job_async(job_log_manager, proc_exit_cb, read_logs)
Beispiel #5
0
Datei: cli.py Projekt: rodipm/OS
    def add_command(self, run_time, number):
        run_time = int(run_time)
        number = int(number)
        finish_events = {
            "disco": DiskFinishedEvent,
            "leitora1": LeitoraUmFinishedEvent,
            "leitora2": LeitoraDoisFinishedEvent,
            "impressora1": ImpressoraUmFinishedEvent,
            "impressora2": ImpressoraDoisFinishedEvent
        }

        for _ in range(number):
            io = {
                "disco": None,
                "leitora1": None,
                "leitora2": None,
                "impressora1": None,
                "impressora2": None
            }

            last_start_cycles = [1]

            for dev in io.keys():
                io_requests = []
                has_device = bool(random.random() < 0.9)

                if not has_device:
                    continue

                number_requests = random.randint(1, 5)

                for i in range(number_requests):
                    io_cycles = random.randint(*io_config[dev])
                    start_cycle = 1

                    try:
                        start_cycle = random.randint(
                            last_start_cycles[-1],
                            i * run_time // number_requests - io_cycles)
                        if start_cycle in last_start_cycles:
                            continue

                        last_start_cycles.append(start_cycle)
                    except ValueError:
                        continue

                    io_requests.append((start_cycle, io_cycles))

                if len(io_requests):
                    io[dev] = Device(dev, io_requests, finish_events[dev])

            job_priority = random.choice(list(JobPriority))
            job_size = random.randint(10, 70)

            new_job = Job(self.job_ids, run_time, job_priority, io, job_size)

            self.job_ids += 1

            self.os.add_job(new_job)
Beispiel #6
0
    def wrapper():

        if Agent.get('agent_status') == 'disabled':
            return jsonify({'status': 'disabled'})
        Agent.set('agent_status', 'busy')

        log.info(f'processing request: \n'
                 f'{[{k:v} for k,v in request.args.items()]}\n'
                 f'role: {api.__name__}')

        try:
            job = Job(request)
            job.set('role', api.__name__)
            log.info(f'job object created with id: {job.job_id}')
            return api(job)
        except Exception as e:
            log.info(f'error in job processing: {e}', report=True)
Beispiel #7
0
    def run_arriba(self):
        """
        """

        jobs = []
        for sample in self.samples:
            if len(sample.readsets) > 1:
                raise Exception("Error: only one read set per sample allowed")
            if sample.readsets[0].bam:  # .bam input
                fastq_dir = os.path.join("fusions", "picard_sam_to_fastq",
                                         sample.name)
                bam = sample.readsets[0].bam
                left_fastq = os.path.join(
                    self._output_dir, fastq_dir,
                    os.path.basename(re.sub(r"\.bam$", ".pair1.fastq.gz",
                                            bam)))
                right_fastq = os.path.join(
                    self._output_dir, fastq_dir,
                    os.path.basename(re.sub(r"\.bam$", ".pair1.fastq.gz",
                                            bam)))
            elif sample.readsets[0].fastq2 and sample.readsets[0].fastq2.split(
                    ".")[-1] == "gz":
                left_fastq = sample.readsets[0].fastq1
                right_fastq = sample.readsets[0].fastq2
            else:
                raise Exception(
                    "Error: only .bam and .fastq.gz inputs allowed")
            output_dir = os.path.join("fusions", "arriba", sample.name)
            # JOBS
            chgdir_job = Job(command="cd " + output_dir)
            back_to_outdir_job = Job(command="cd " + self._output_dir)
            # CONCAT
            job = concat_jobs([
                Job(command="mkdir -p " + output_dir), chgdir_job,
                arriba.run(left_fastq,
                           right_fastq,
                           self._output_dir,
                           output_dir,
                           keep_bam=self.args.keep_bams), back_to_outdir_job
            ],
                              name="run_arriba." + sample.name)

            job.samples = [sample]
            jobs.append(job)

        return jobs
Beispiel #8
0
 def run(self):
     for job_config in self.job_configs:
         assert job_config.submit_time >= self.env.now
         yield self.env.timeout(job_config.submit_time - self.env.now)
         job = Job(self.env, job_config)
         # print('a task arrived at time %f' % self.env.now)
         self.cluster.add_job(job)
     self.destroyed = True
Beispiel #9
0
    def run(self, guids: List[str]):
        """
        Run a module

        Usage: 
            run <guids>...
            run -h | --help

        Arguments:
            guids    session guids to run modules on

        Options:
            -h, --help   Show dis
        """
        job = Job(self.selected)
        for guid in guids:
            ipc_server.publish(NEW_JOB, (guid, job.encode()))
Beispiel #10
0
    def gunzip_fastq(self):
        """
        Gunzip .fastq.gz files or symlink if already uncompressed
        """
        jobs = []
        for readset in self.readsets:
            out_dir = os.path.join("fusions", "gunzip_fastq",
                                   readset.sample.name)
            # Find input readset FASTQs first from previous trimmomatic job,
            # then from original FASTQs in the readset sheet
            if readset.run_type == "PAIRED_END":
                candidate_input_files = []
                if readset.fastq1 and readset.fastq2:
                    candidate_input_files.append(
                        [readset.fastq1, readset.fastq2])
                if readset.bam:
                    picard_dir = os.path.join("fusions", "picard_sam_to_fastq",
                                              readset.sample.name)
                    candidate_input_files.append([
                        os.path.join(
                            picard_dir,
                            os.path.basename(
                                re.sub(r"\.bam$", ".pair1.fastq.gz",
                                       readset.bam))),
                        os.path.join(
                            picard_dir,
                            os.path.basename(
                                re.sub(r"\.bam$", ".pair2.fastq.gz",
                                       readset.bam)))
                    ])
                if readset.cram:
                    picard_dir = os.path.join("fusions", "picard_sam_to_fastq",
                                              readset.sample.name)
                    candidate_input_files.append([
                        os.path.join(
                            picard_dir,
                            os.path.basename(readset.cram) +
                            ".pair1.fastq.gz"),
                        os.path.join(
                            picard_dir,
                            os.path.basename(readset.cram) + ".pair2.fastq.gz")
                    ])
                [fastq1,
                 fastq2] = self.select_input_files(candidate_input_files)
            else:
                raise Exception("Error: run type \"" + readset.run_type +
                                "\" is invalid for readset \"" + readset.name +
                                "\" (should be PAIRED_END)!")
            gunzip1_job = gunzip.gunzip_fastq(fastq1, out_dir)
            gunzip2_job = gunzip.gunzip_fastq(fastq2, out_dir)
            job = concat_jobs(
                [Job(command="mkdir -p " + out_dir), gunzip1_job, gunzip2_job],
                name="gunzip_fastq." + readset.sample.name + "." +
                readset.name)

            jobs.append(job)

        return jobs
Beispiel #11
0
    def chimerascan(self):
        """
        Run chimerascan to call gene fusions
        """
        jobs = []
        for sample in self.samples:
            fastq1, fastq2 = self.select_input_fastq(sample)
            out_dir = os.path.join("fusions", "chimerascan", sample.name)
            chimerascan_job = chimerascan.run(fastq1, fastq2, out_dir)
            job = concat_jobs([
                Job(command="mkdir -p " + out_dir),
                Job(command="rm -r " + out_dir), chimerascan_job
            ],
                              name="chimerascan." + sample.name)

            jobs.append(job)

        return jobs
Beispiel #12
0
    def __init__(self, guid, remote_address, pubkey_xml):
        self.guid = guid
        self.address = remote_address
        self.data = None
        self.checkin_time = None
        self.crypto = ECDHE(pubkey_xml)
        self.jobs = Queue()

        self.add_job(Job(command=('checkin', '')))
Beispiel #13
0
 def _build_jobs(self, response, data, epoch, now, source, ignoreempty = False, discardiffull = False):
   decoded = data.decode("utf_8")
   if len(decoded) == 0 and ignoreempty:
     self.core.log(self, "Got empty %s response\n" % source, 500)
     return
   decoded = json.loads(decoded)
   data = unhexlify(decoded["result"]["data"].encode("ascii"))
   target = unhexlify(decoded["result"]["target"].encode("ascii"))
   try: identifier = int(decoded["result"]["identifier"])
   except: identifier = None
   if identifier != self.lastidentifier:
     self._cancel_jobs()
     self.lastidentifier = identifier
   self.blockchain.check_job(Job(self.core, self, 0, data, target, True, identifier))
   roll_ntime = 1
   expiry = 60
   isp2pool = False
   headers = response.getheaders()
   for h in headers:
     if h[0].lower() == "x-is-p2pool" and h[1].lower() == "true": isp2pool = True
     elif h[0].lower() == "x-roll-ntime" and h[1] and h[1].lower() != "n":
       roll_ntime = 60
       parts = h[1].split("=", 1)
       if parts[0].strip().lower() == "expire":
         try: roll_ntime = int(parts[1])
         except: pass
       expiry = roll_ntime
   if isp2pool: expiry = 60
   self.stats.supports_rollntime = roll_ntime > 1
   if epoch != self.jobepoch:
     self.core.log(self, "Discarding %d jobs from %s response because request was issued before flush\n" % (roll_ntime, source), 500)
     with self.stats.lock: self.stats.jobsreceived += roll_ntime
     return
   if self.core.workqueue.count > self.core.workqueue.target * (1 if discardiffull else 5):
     self.core.log(self, "Discarding %d jobs from %s response because work buffer is full\n" % (roll_ntime, source), 500)
     with self.stats.lock: self.stats.jobsreceived += roll_ntime
     return
   expiry += now - self.settings.expirymargin
   midstate = Job.calculate_midstate(data)
   prefix = data[:68]
   timebase = struct.unpack(">I", data[68:72])[0]
   suffix = data[72:]
   return [Job(self.core, self, expiry, prefix + struct.pack(">I", timebase + i) + suffix, target, midstate, identifier) for i in range(roll_ntime)]
Beispiel #14
0
def main(hub_id, dataset_id, version):
    conn = psql.connect('')
    queue = pq.PQ(conn=conn)['jobs']

    queue.put(
        Job(1, 'verify_partitions', {
            'hub_id': hub_id,
            'dataset_id': dataset_id,
            'version': version,
        }).__dict__)
Beispiel #15
0
    def ericscript(self):
        """
        Run EricScript to call gene fusions
        """
        jobs = []
        for sample in self.samples:
            fastq1, fastq2 = self.select_input_fastq(sample)
            out_dir = os.path.join("fusions", "ericscript", sample.name)
            ericscript_job = ericscript.ericscript(
                fastq1, fastq2, out_dir, keep_bam=self.args.keep_bams)
            job = concat_jobs([
                Job(command="mkdir -p " + out_dir),
                Job(command="rm -r " + out_dir), ericscript_job
            ],
                              name="ericscript." + sample.name)

            jobs.append(job)

        return jobs
Beispiel #16
0
    def start(self):
        #self._install_signal_handlers()
        if self.running:
            raise AlreadyRunningException

        self._stopped = False
        self.log.debug('elric worker running..')
        while self.running:
            key, serialized_job = RedisJobQueue.dequeue_any(self.server, self.listen_keys)
            job = Job.deserialize(serialized_job)
            self.log.debug('get job id=[%s] func=[%s]from key %s' % (job.id, job.func, key))
            self.executor.execute_job(job)
Beispiel #17
0
    def start(self):
        """
            Start elric master. Select all due jobs from jobstore and enqueue them into redis queue.
            Then update due jobs' information into jobstore.
        :return:
        """
        if self.running:
            raise AlreadyRunningException
        self._stopped = False
        self.log.debug('eric master start...')

        while True:
            now = datetime.now(self.timezone)
            wait_seconds = None
            with self.jobstore_lock:
                for job_id, job_key, serialized_job in self.jobstore.get_due_jobs(now):
                    # enqueue due job into redis queue
                    self._enqueue_job(job_key, serialized_job)
                    # update job's information, such as next_run_time
                    job_in_dict = Job.deserialize_to_dict(serialized_job)
                    last_run_time = Job.get_serial_run_times(job_in_dict, now)
                    if last_run_time:
                        next_run_time = Job.get_next_trigger_time(job_in_dict, last_run_time[-1])
                        if next_run_time:
                            job_in_dict['next_run_time'] = next_run_time
                            self.update_job(job_id, job_key, next_run_time, Job.dict_to_serialization(job_in_dict))
                        else:
                            # if job has no next run time, then remove it from jobstore
                            self.remove_job(job_id=job_id)

                # get next closet run time job from jobstore and set it to be wake up time
                closest_run_time = self.jobstore.get_closest_run_time()

            if closest_run_time is not None:
                wait_seconds = max(timedelta_seconds(closest_run_time - now), 0)
                self.log.debug('Next wakeup is due at %s (in %f seconds)' % (closest_run_time, wait_seconds))
            self._event.wait(wait_seconds if wait_seconds is not None else self.MAX_WAIT_TIME)
            self._event.clear()
 def get_jobs(self):
     jobs = []
     for task in self.tasks:
         for job_number in range(0, task.get_number_of_jobs(self.H)):
             start = task.phase + task.period * job_number
             end = task.deadline + task.period * job_number + task.phase
             job = Job(task=task,
                       name=job_number + 1,
                       release=start,
                       deadline=end,
                       ex_time=task.ex_time,
                       status=1)
             jobs.append(job)
     return jobs
Beispiel #19
0
    def integrate(self):
        """
        Run Integrate to call gene fusions
        """
        jobs = []
        for sample in self.samples:
            input_dir = os.path.join("fusions", "tophat2", sample.name)
            accepted_bam = os.path.join(self.output_dir, input_dir,
                                        "accepted_hits.bam")
            unmapped_bam = os.path.join(self.output_dir, input_dir,
                                        "unmapped.bam")

            out_dir = os.path.join("fusions", "integrate", sample.name)
            integrate_job = integrate.integrate(accepted_bam, unmapped_bam,
                                                out_dir)
            job = concat_jobs([
                Job(command="mkdir -p " + out_dir),
                Job(command="cd " + out_dir), integrate_job,
                Job(command="cd -")
            ],
                              name="integrate." + sample.name)
            jobs.append(job)
        return jobs
Beispiel #20
0
    def sleep(self, guid: str, interval: int):
        """
        Set the checkin interval for an agent

        Usage: sleep <guid> <interval> [-h]

        Arguments:
            guid  filter by session's guid
            interval  checkin interval in milliseconds
        """

        for session in self.sessions:
            if session == guid:
                session.add_job(Job(command=('sleep', int(interval))))
Beispiel #21
0
    def fusionmap(self):
        """
        Run FusionMap to call gene fusions
        """
        jobs = []
        for sample in self.samples:
            # add pipeline top outpud dir as input to bfx fusionmap script
            # self._output_dir assigned from command line args in pipeline.py
            top_dir = self._output_dir

            fastq1, fastq2 = self.select_input_fastq(sample)
            out_dir = os.path.join("fusions", "fusionmap", sample.name)
            fusionmap_job = fusionmap.fusionmap(fastq1, fastq2, out_dir,
                                                top_dir)
            job = concat_jobs([
                Job(command="mkdir -p " + out_dir), fusionmap_job,
                Job(command="ls " + out_dir + "/02_RNA*")
            ],
                              name="fusionmap." + sample.name)

            jobs.append(job)

        return jobs
Beispiel #22
0
    def run_star_seqr(self):
        """
        RNA Fusion Detection and Quantification using STAR
        https://github.com/ExpressionAnalysis/STAR-SEQR
        """

        jobs = []
        for sample in self.samples:
            if len(sample.readsets) > 1:
                raise Exception("Error: only one read set per sample allowed")
            if sample.readsets[0].bam:  # .bam input
                fastq_dir = os.path.join("fusions", "picard_sam_to_fastq",
                                         sample.name)
                bam = sample.readsets[0].bam
                # fastq1 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair1.fastq.gz", out_bam)))
                # fastq2 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair2.fastq.gz", out_bam)))
                left_fastq = os.path.join(
                    fastq_dir,
                    os.path.basename(re.sub(r"\.bam$", ".pair1.fastq.gz",
                                            bam)))
                right_fastq = os.path.join(
                    fastq_dir,
                    os.path.basename(re.sub(r"\.bam$", ".pair1.fastq.gz",
                                            bam)))
            elif sample.readsets[0].fastq2 and sample.readsets[0].fastq2.split(
                    ".")[-1] == "gz":
                # print(sample.readsets[0].fastq2)
                # print(sample.readsets[0].fastq2.split(".")[-1])
                left_fastq = sample.readsets[0].fastq1
                right_fastq = sample.readsets[0].fastq2
            else:
                raise Exception(
                    "Error: only .bam and .fastq.gz inputs allowed")
            output_dir = os.path.join("fusions", "star_seqr", sample.name)

            job = concat_jobs([
                Job(command="mkdir -p " + output_dir),
                star_seqr.run(left_fastq,
                              right_fastq,
                              output_dir,
                              sample.name,
                              keep_bam=self.args.keep_bams)
            ],
                              name="run_star_seqr." + sample.name)

            job.samples = [sample]
            jobs.append(job)

        return jobs
Beispiel #23
0
    def delete_fastqs(self):
        """
        Delete fastqs when all callers' jobs are finished                                                     
        """
        jobs = []
        for sample in self.samples:
            defuse_result = os.path.join("fusions", "defuse", sample.name,
                                         "results.filtered.tsv")
            fusionmap_result = os.path.join("fusions", "fusionmap",
                                            sample.name,
                                            "02_RNA.FusionReport.txt")
            ericscript_result = os.path.join("fusions", "ericscript",
                                             sample.name,
                                             "fusion.results.filtered.tsv")
            integrate_result = os.path.join("fusions", "integrate",
                                            sample.name, "breakpoints.cov.tsv")
            star_seqr_result = os.path.join("fusions", "star_seqr",
                                            sample.name,
                                            "out_STAR-SEQR_candidates.txt")
            arriba_result = os.path.join("fusions", "arriba", sample.name,
                                         "fusions.tsv")
            star_fusion_result = os.path.join(
                "fusions", "star_fusion", sample.name,
                "star-fusion.fusion_predictions.abridged.coding_effect.tsv")
            cicero_result = os.path.join("fusions", "cicero", sample.name,
                                         "final_fusions.txt")

            # result_file_list = [defuse_result, fusionmap_result, ericscript_result, integrate_result,
            #                     star_seqr_result, arriba_result, star_fusion_result]
            result_file_list = [defuse_result, fusionmap_result]
            del_job = delete_fastqs.delete_fastqs(sample.name,
                                                  result_file_list)
            job = concat_jobs([Job(command="mkdir -p delete_fastqs"), del_job],
                              name="delete_fastqs." + sample.name)
            # job = concat_jobs([
            #    Job(command="mkdir -p delete_fastqs")
            # ], name="delete_fastqs." + sample.name)
            job.input_files = [
                defuse_result, fusionmap_result, ericscript_result,
                integrate_result, star_seqr_result, arriba_result,
                star_fusion_result, cicero_result
            ]
            jobs.append(job)
            # DELETE BAMS JOB (one across all samples)
        del_bams_job = concat_jobs(
            [delete_fastqs.delete_bams(result_file_list, self._output_dir)],
            name="delete_bams")
        jobs.append(del_bams_job)
        return jobs
def cancel_task(request, task_id):
    task_id = int(task_id)
    redirect_to=None
    redirect_to = request.GET.get("back", None)
    if redirect_to is None:
        redirect_to="/"

    try:
        if task_id !=0 :
            job = Job.get_job(task_id)
            job.job2canceled()
    except:
        # just do nothing
        pass

    return HttpResponseRedirect(redirect_to)
Beispiel #25
0
    def MetaFusion_clinical(self):
        """
        Run MetaFusion.IsoHunter.clinical
        """
        jobs = []
        out_dir_abspath = self._output_dir
        metafusion_outdir = os.path.join("fusions", "metafusion_clinical")
        metafusion_job = metafusion_clinical.run_metafusion_clinical(
            out_dir_abspath, self.args.database)
        job = concat_jobs(
            [Job(command="mkdir -p " + metafusion_outdir), metafusion_job],
            name="MetaFusion.clinical")

        jobs.append(job)

        return jobs
Beispiel #26
0
    def MetaFusion_IsoHunter(self):
        """
        Run MetaFusion.IsoHunter
        """
        jobs = []
        out_dir_abspath = self._output_dir
        isohunter_outdir = os.path.join("fusions", "metafusion_isohunter")
        metafusion_job = metafusion_isohunter.run_isohunter_singularity(
            out_dir_abspath)
        job = concat_jobs(
            [Job(command="mkdir -p " + isohunter_outdir), metafusion_job],
            name="MetaFusion.IsoHunter")

        jobs.append(job)

        return jobs
Beispiel #27
0
 def tophat2(self):
     """
     Run Tophat2 for Integrate. Determines accepted hits and unmapped reads, and outputs 
     corresponding .bam files required as input files for integrate step.
     """
     jobs = []
     for sample in self.samples:
         fastq1, fastq2 = self.select_input_fastq(sample)
         out_dir = os.path.join(self.output_dir, "fusions", "tophat2",
                                sample.name)
         tophat2_job = tophat2.tophat2(fastq1, fastq2, out_dir)
         job = concat_jobs(
             [Job(command="mkdir -p " + out_dir), tophat2_job],
             name="tophat2." + sample.name)
         jobs.append(job)
     return jobs
Beispiel #28
0
    def MetaFusion(self):
        """
        Run MetaFusion
        """
        jobs = []
        cff_dir_abspath = os.path.join(self._output_dir, "fusions", "cff")
        out_dir_abspath = os.path.join(self._output_dir, "fusions",
                                       "metafusion")
        metafusion_job = metafusion.run_metafusion_singularity(out_dir_abspath)
        # metafusion_job.name = "MetaFusion"
        job = concat_jobs(
            [Job(command="mkdir -p " + out_dir_abspath), metafusion_job],
            name="MetaFusion")

        jobs.append(job)

        return jobs
Beispiel #29
0
    def defuse(self):
        """
        Run Defuse to call gene fusions
        """
        jobs = []
        for sample in self.samples:
            fastq1, fastq2 = self.select_input_fastq(sample)
            out_dir = os.path.join("fusions", "defuse", sample.name)
            defuse_job = defuse.defuse(fastq1,
                                       fastq2,
                                       out_dir,
                                       keep_bam=self.args.keep_bams)
            job = concat_jobs([Job(command="mkdir -p " + out_dir), defuse_job],
                              name="defuse." + sample.name)

            jobs.append(job)

        return jobs
Beispiel #30
0
def main():
    logging.configure()

    conn = psql.connect('')
    queue = pq.PQ(conn=conn)['jobs']

    backends = load_backends(conn.cursor())

    for job_entry in queue:
        if job_entry is None:
            time.sleep(2)
            continue

        job = Job(**job_entry.data)
        backend = backends[job.backend_id]

        run_job(conn.cursor(), backend, job)
        conn.commit()
Beispiel #31
0
    def run(self, guids: List[str]):
        """
        Run a module

        Usage:
            run <guids>...
            run -h | --help

        Arguments:
            guids    session guids to run modules on

        Options:
            -h, --help   Show dis
        """

        for guid in guids:
            self.prompt_session.contexts[1].add_job(
                (guid, Job(module=self.selected)))
Beispiel #32
0
    def __init__(self, guid, remote_address, pubkey_xml):
        self.guid = guid
        self.address = remote_address
        self.data = None
        self.checkin_time = None
        self.crypto = ECDHE(pubkey_xml)
        self.jobs = Queue()

        self.logger = logging.getLogger(str(guid))
        self.logger.propagate = False
        self.logger.setLevel(logging.DEBUG)

        formatter = logging.Formatter('%(asctime)s - %(message)s')
        fh = logging.FileHandler(f"./logs/{guid}.log", encoding='UTF-8')
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)

        self.logger.addHandler(fh)

        self.add_job(Job(command=('checkin', '')))
Beispiel #33
0
    def star_fusion(self):
        """
        Run STAR-Fusion to call gene fusions
        """
        jobs = []
        CTAT_resource_lib = "/hpf/largeprojects/ccmbio/mapostolides/validate_fusion/test_star_star-fusion/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play/ctat_genome_lib_build_dir"
        for sample in self.samples:
            fastq1, fastq2 = self.select_input_fastq(sample)
            out_dir = os.path.join("fusions", "star_fusion", sample.name)
            # star_fusion_job = star_fusion.star_fusion(fastq1, fastq2, out_dir, CTAT_resource_lib)
            star_fusion_job = star_fusion.star_fusion(
                fastq1,
                fastq2,
                CTAT_resource_lib,
                out_dir,
                keep_bam=self.args.keep_bams)
            job = concat_jobs(
                [Job(command="mkdir -p " + out_dir), star_fusion_job],
                name="star_fusion." + sample.name)

            jobs.append(job)

        return jobs
Beispiel #34
0
    def submit_job(self, serialized_job, job_key, job_id, replace_exist):
        def exist(key, value):
            with self.filter_lock:
                try:
                    return self.filter_list[key].exist(value)
                except KeyError:
                    self.filter_list[key] = MemoryFilter()
                    return self.filter_list[key].exist(value)

        self.log.debug("client call submit job %s" % job_id)

        if isinstance(serialized_job, Binary):
            serialized_job = serialized_job.data

        job_in_dict = Job.deserialize_to_dict(serialized_job)
        filter_key = job_in_dict['filter_key']
        filter_value = job_in_dict['filter_value']

        if filter_key and filter_value:
            if exist(filter_key, filter_value):
                self.log.debug("%s has been filter..." % filter_value)
                return False

        if not job_in_dict['trigger']:
            self._enqueue_job(job_key, serialized_job)
        else:
            with self.jobstore_lock:
                try:
                    self.jobstore.add_job(job_id, job_key, job_in_dict['next_run_time'], serialized_job)
                except JobAlreadyExist:
                    if replace_exist:
                        self.jobstore.update_job(job_id, job_key, job_in_dict['next_run_time'], serialized_job)
                    else:
                        self.log.warn('job %s already exist' % job_id)
            self.wake_up()

        return True
Beispiel #35
0
    def fusion_stats(self):
        """
        Outputs count files and plots about the detected gene fusions.
        """
        jobs = []
        cff_dir = os.path.join("fusions", "cff")
        out_dir = os.path.join("fusions", "fusion_stats")
        sampleinfo_file = os.path.relpath(self.args.sampleinfo.name,
                                          self.output_dir)

        fusion_stats_job = fusion_stats.fusion_stats(cff_dir, out_dir,
                                                     sampleinfo_file)
        category_table_job = fusion_stats.generate_category_count_table(
            cff_dir, out_dir)
        category_barplot_job = fusion_stats.generate_categories_barplot(
            fusion_stats_dir=out_dir)
        job = concat_jobs([
            Job(command="mkdir -p " + out_dir), fusion_stats_job,
            category_table_job, category_barplot_job
        ],
                          name="fusion_stats")

        jobs.append(job)
        return jobs
Beispiel #36
0
  def polling_thread(self):
    try:
      lastshares = []
      errorcount = [0] * (self.device.maximum_multiplier + 1)
      errorweight = [0] * (self.device.maximum_multiplier + 1)
      maxerrorrate = [0] * (self.device.maximum_multiplier + 1)
      errorlimit = 0.05
      errorhysteresis = 0.1
      counter = 0
      
      while not self.shutdown:
      
        counter += 1
      
        # Poll for nonces
        now = time.time()
        nonces = self.device.read_nonces()
        exhausted = False
        with self.wakeup:
          if nonces[0][1] < self.lastnonce:
            self.lastnonce = nonces[0][1]
            exhausted = True
        if exhausted: self.send("keyspace_exhausted")
        for nonce in nonces:
          if nonce[0] != -self.device.nonce_offset and not nonce[0] in lastshares:
            if self.job: self.send("nonce_found", time.time(), struct.pack("<I", nonce[0]))
            lastshares.append(nonce[0])
            while len(lastshares) > len(nonces): lastshares.pop(0)
        
        # Verify proper operation and adjust clocking if neccessary
        if now > self.checklockout and self.job:
          errorcount[self.multiplier] *= 0.995
          errorweight[self.multiplier] = errorweight[self.multiplier] * 0.995 + 1
          for nonce in nonces:
            invalid = True
            for offset in (0, 1, -1, 2, -2):
              hash = Job.calculate_hash(self.job[:76] + struct.pack("<I", nonce[1] + offset))
              if struct.unpack("!I", hash[-4:])[0] == (nonce[2] + 0x5be0cd19) & 0xffffffff:
                invalid = False
                break
            if invalid: errorcount[self.multiplier] += 1. / len(nonces)
          certainty = min(1, errorweight[self.multiplier] / 100)
          errorrate = errorcount[self.multiplier] / errorweight[self.multiplier]
          maxerrorrate[self.multiplier] = max(maxerrorrate[self.multiplier], errorrate * certainty)
          for i in range(len(maxerrorrate) - 1):
            if maxerrorrate[i + 1] * i < maxerrorrate[i] * (i + 20):
              maxerrorrate[i + 1] = maxerrorrate[i] * (1 + 20.0 / i)
          limit = 0
          while limit < self.device.default_multiplier and maxerrorrate[limit + 1] < errorlimit: limit += 1
          while limit < self.device.maximum_multiplier and errorweight[limit] > 150 and maxerrorrate[limit + 1] < errorlimit: limit += 1
          multiplier = 0
          best = 0
          for i in range(limit + 1):
            effective = (i + 1 + (errorhysteresis if i == self.multiplier else 0)) * (1 - maxerrorrate[i])
            if effective > best:
              best = effective
              multiplier = i
          self._set_multiplier(multiplier)
          
          if counter >= 10:
            counter = 0
            try: self.send("error_rate", errorcount[self.multiplier] / errorweight[self.multiplier])
            except: pass

        with self.wakeup: self.wakeup.wait(self.pollinterval)
        
    except Exception as e:
      self.error = e
      # Unblock main thread
      self.send("ping")
Beispiel #37
0
def main():
    if len(sys.argv) < 3:
        print 'usage: %s input_dir output_dir' % sys.argv[0]
        return
        
    conf = DefaultConfigure()
    job = Job(conf)
    job.set_splliter(LineSplitter)
    job.set_mapper(WordCountMapper)
    job.set_mapper_num(4)
    job.set_reducer(WordCountReducer)
    job.set_reducer_num(1)
    
    job.add_input_path(sys.argv[1])
    job.set_output_path(sys.argv[2])
    
    print job.run()
Beispiel #38
0
    def convert_fusion_results_to_cff(self):
        """
        Convert fusion results of all 4 gene fusion callers to cff format
        """
        jobs = []
        out_dir = os.path.join("fusions", "cff")
        job_list = [Job(command="mkdir -p " + out_dir)]
        sampleinfo_file = os.path.relpath(self.args.sampleinfo.name,
                                          self.output_dir)

        for sample in self.samples:

            # Define result files
            # output_file = os.path.join(output_dir, prefix + "_STAR-SEQR", prefix  + "_STAR-SEQR_candidates.txt")
            # star_seqr_result = os.path.join("fusions", "star_seqr", sample.name,
            #                                 "out_STAR-SEQR", "out_STAR-SEQR_candidates.txt")
            star_seqr_result = os.path.join("fusions", "star_seqr",
                                            sample.name,
                                            "out_STAR-SEQR_candidates.txt")
            # print >> sys.stderr, star_seqr_result
            arriba_result = os.path.join("fusions", "arriba", sample.name,
                                         "fusions.tsv")
            # star_fusion_result = os.path.join("fusions", "star_fusion",
            #                                   sample.name, "star-fusion.fusion_predictions.abridged.tsv")
            star_fusion_result = os.path.join(
                "fusions", "star_fusion", sample.name,
                "star-fusion.fusion_predictions.abridged.coding_effect.tsv")
            defuse_result = os.path.join("fusions", "defuse", sample.name,
                                         "results.filtered.tsv")
            fusionmap_result = os.path.join("fusions", "fusionmap",
                                            sample.name,
                                            "02_RNA.FusionReport.txt")
            ericscript_result = os.path.join("fusions", "ericscript",
                                             sample.name,
                                             "fusion.results.filtered.tsv")
            integrate_result = os.path.join("fusions", "integrate",
                                            sample.name, "breakpoints.cov.tsv")
            cicero_result = os.path.join("fusions", "cicero", sample.name,
                                         "final_fusions.txt")
            # Build tool_results list based on self.tool_list
            result_file_dict = {
                "star_seqr": star_seqr_result,
                "arriba": arriba_result,
                "star_fusion": star_fusion_result,
                "defuse": defuse_result,
                "fusionmap": fusionmap_result,
                "ericscript": ericscript_result,
                "integrate": integrate_result,
                "cicero": cicero_result
            }
            tool_results = [(key, result_file_dict[key])
                            for key in result_file_dict.keys()
                            if key in self.tool_list]
            # tool_results = [("star_seqr",star_seqr_result), ("arriba", arriba_result),
            #                 ("star_fusion", star_fusion_result), ("defuse", defuse_result),
            #                 ("fusionmap", fusionmap_result), ("ericscript", ericscript_result),
            #                 ("integrate", integrate_result)]
            # tool_results = [("arriba", arriba_result), ("star_fusion", star_fusion_result),
            #                 ("defuse", defuse_result), ("fusionmap", fusionmap_result),
            #                 ("ericscript", ericscript_result), ("integrate", integrate_result)]
            # determine sample_type
            """
            sample_type = ""
            for contrast in self.contrasts:
                if sample in contrast.controls:
                    sample_type = "Normal"
                elif sample in contrast.treatments:
                    sample_type = "Tumor"
                if sample_type:
                    disease_name = contrast.name
                    break    
            if not sample_type:
                raise Exception("Error: sample " + sample.name + " not found in design file " + self.args.design.name)
            """
            # convert caller output files to common fusion format(cff)
            for tool, result_file in tool_results:
                job = cff_conversion.cff_convert(sample.name, result_file,
                                                 sampleinfo_file, tool,
                                                 out_dir)
                job.command = job.command.strip()
                job_list.append(job)
        job = concat_jobs(job_list, name="cff_conversion")
        jobs.append(job)
        return jobs