예제 #1
0
def submit_workflow(wdl,
                    inputs,
                    dependencies,
                    label,
                    username=None,
                    server='localhost',
                    extra_options=None,
                    labels_dict=None):
    labels_dict = kv_list_to_dict(label) if kv_list_to_dict(
        label) is not None else {}
    if username is None:
        username = global_config.getuser()
    labels_dict['username'] = username
    section_name = 'remote_%s' % server if server != 'localhost' else 'local'
    host, port, auth = global_config.get_conn_info(server, section_name)
    cromwell = Cromwell(host=host, port=port, auth=auth)
    result = cromwell.jstart_workflow(
        wdl_file=wdl,
        json_file=inputs,
        dependencies=dependencies,
        extra_options=kv_list_to_dict(extra_options),
        custom_labels=labels_dict)
    result['port'] = cromwell.port

    return result
예제 #2
0
    def __init__(self,
                 user,
                 host,
                 no_notify,
                 verbose,
                 interval,
                 workflow_id=None):
        section_name = 'remote_%s' % host if host != 'localhost' else 'local'
        self.host, self.port, self.auth = global_config.get_conn_info(
            host, section_name)
        self.user = user
        self.interval = interval
        self.cromwell = Cromwell(host=self.host,
                                 port=self.port,
                                 auth=self.auth)
        self.messenger = Messenger(self.user)
        self.no_notify = no_notify
        self.verbose = verbose
        self.workflow_id = workflow_id
        if user == "*":
            self.event_subscribers = [
                EmailNotification(self.cromwell),
            ]

            engine = create_engine(
                "sqlite:///" +
                global_config.get_path('general', 'workflow_db'))
            Base.metadata.bind = engine
            DBSession = sessionmaker()
            DBSession.bind = engine
            self.session = DBSession()
예제 #3
0
def get_workflow_metadata():
    host, port, auth = c.get_conn_info('remote')
    cromwell = Cromwell(host, port, auth)
    workflow_id_lst = ['ec30ad38-bc22-4c46-9693-7e9321d3e8ae']
    for workflow_id in workflow_id_lst:
        # TODO: handle network error.
        metadata = cromwell.query_metadata(workflow_id)
        yield metadata
예제 #4
0
def is_user_workflow(host, user, workflow_id):
    """A top-level function that returns a workflow if it matches the user workflow. This can't be an instance method of Monitor because we run into serializing issues otherwise. See: https://stackoverflow.com/questions/26249442/can-i-use-multiprocessing-pool-in-a-method-of-a-class

    :param host: cromwell server
    :param user: user name to monitor
    :param workflow_id: workflow
    :return:  The workflow_id if it's the user owns the workflow. Otherwise None.
    """
    section_name = 'remote_%s' % host if host != 'localhost' else 'local'
    host, port, auth = global_config.get_conn_info(host, section_name)
    metadata = Cromwell(host=host, port=port,
                        auth=auth).query_metadata(workflow_id)

    try:
        j_input = json.loads(metadata['submittedFiles']['inputs'])
        if j_input['user'] == user:
            return workflow_id
    except KeyError:
        return None
예제 #5
0
    def attach_logs(msg, metadata):
        failed_jobs = Cromwell.getCalls(
            'Failed', metadata['calls'], full_logs=True)

        for log in failed_jobs:
            stdout_attachment = MIMEText(str(log["stdout"]['log']))
            stdout_attachment.add_header(
                'Content-Disposition', 'attachment', filename=log["stdout"]["label"])
            msg.attach(stdout_attachment)

            stderr_attachment = MIMEText(str(log["stderr"]['log']))
            stderr_attachment.add_header(
                'Content-Disposition', 'attachment', filename=log["stderr"]["label"])
            msg.attach(stderr_attachment)

        metadata_attachment = MIMEText(
            str(json.dumps(metadata, indent=4, default=EmailNotification.json_serializer)))
        metadata_attachment.add_header(
            'Content-Disposition', 'attachment', filename=metadata["id"] + ".metadata")
        msg.attach(metadata_attachment)
예제 #6
0
class MyTestCase(unittest.TestCase):
    @classmethod
    def setUp(self):
        resources = c.resource_dir
        self.cromwell = Cromwell(host='btl-cromwell')
        self.json = os.path.join(resources, 'hello_world_on_prem.json')
        self.wdl = os.path.join(resources, 'hello_world_on_prem.wdl')

    def test_monitor_workflow(self):
        m = Monitor(user='******',
                    host='btl-cromwell',
                    no_notify=False,
                    verbose=True,
                    interval=5)
        wf = self.cromwell.jstart_workflow(self.wdl, self.json)
        time.sleep(2)
        workflow_id = wf['id']
        self.assertEqual(0, m.monitor_workflow(workflow_id))

    @classmethod
    def tearDown(self):
        print("Done!")
예제 #7
0
class Monitor:
    """A class for monitoring a user's workflows, providing status reports at regular intervals as well as e-mail notification.
    """
    def __init__(self,
                 user,
                 host,
                 no_notify,
                 verbose,
                 interval,
                 workflow_id=None):
        section_name = 'remote_%s' % host if host != 'localhost' else 'local'
        self.host, self.port, self.auth = global_config.get_conn_info(
            host, section_name)
        self.user = user
        self.interval = interval
        self.cromwell = Cromwell(host=self.host,
                                 port=self.port,
                                 auth=self.auth)
        self.messenger = Messenger(self.user)
        self.no_notify = no_notify
        self.verbose = verbose
        self.workflow_id = workflow_id
        if user == "*":
            self.event_subscribers = [
                EmailNotification(self.cromwell),
            ]

            engine = create_engine(
                "sqlite:///" +
                global_config.get_path('general', 'workflow_db'))
            Base.metadata.bind = engine
            DBSession = sessionmaker()
            DBSession.bind = engine
            self.session = DBSession()

    def get_user_workflows(self, raw=False, start_time=None, silent=False):
        """A function for creating a list of workflows owned by a particular user.

        :return: A list of workflow IDs owned by the user.
        """
        if not silent:
            print('Determining {}\'s workflows...'.format(self.user))

        user_workflows = []
        results = None
        if self.user == "*":
            results = self.cromwell.query_labels({},
                                                 start_time=start_time,
                                                 running_jobs=True)
        else:
            results = self.cromwell.query_labels({'username': self.user},
                                                 start_time=start_time)

        if raw:
            return results

        try:
            for result in results['results']:
                if result['status'] in global_config.run_states:
                    user_workflows.append(result['id'])
        except Exception as e:
            logging.error(str(e))
            print('No user workflows found with username {}.'.format(
                self.user))
        return user_workflows

    def process_events(self, workflow):
        for event_subscriber in self.event_subscribers:
            metadata = self.cromwell.query_metadata(
                workflow.id)  # get final metadata
            try:
                event_subscriber.on_changed_workflow_status(
                    workflow, metadata, self.host, self.port)
            except Exception as e:
                logging.error(str(e))
                traceback.print_exc()
                print("Event processing error occurred above.")

    def run(self):
        while True:
            try:
                one_day_ago = datetime.datetime.now() - datetime.timedelta(
                    days=int(1))
                db_workflows = dict(
                    (d.id, d) for d in self.session.query(Workflow).filter(
                        Workflow.start > one_day_ago))
                cromwell_workflows = dict(
                    (c["id"], c) for c in self.get_user_workflows(
                        raw=True,
                        start_time=get_iso_datestr(one_day_ago),
                        silent=True)['results'])

                new_workflows = map(
                    lambda c: Workflow(self.cromwell, c["id"]),
                    filter(lambda w: w["id"] not in db_workflows,
                           cromwell_workflows.values()))
                [self.session.add(w) for w in new_workflows]

                changed_workflows = filter(
                    lambda d: d.id in cromwell_workflows and d.status
                    !=  # noqa
                    cromwell_workflows[d.id]["status"],
                    db_workflows.values())
                [
                    w.update_status(cromwell_workflows[w.id]["status"])
                    for w in changed_workflows
                ]

                workflows_to_notify = new_workflows + changed_workflows
                [self.process_events(w) for w in workflows_to_notify]

                self.session.flush()
                self.session.commit()
            except Exception:
                traceback.print_exc()

            time.sleep(self.interval)

    def monitor_user_workflows(self):
        """A function for monitoring a several workflows.

        :return:
        """
        print('Monitoring {}\'s workflows.'.format(self.user))
        workflows = self.get_user_workflows()
        if len(workflows) == 0:
            print("User {} has no running workflows.".format(self.user))
        else:
            for workflow in workflows:
                t = threading.Thread(target=self.monitor_workflow,
                                     args=[workflow])
                t.start()

    def monitor_workflow(self, workflow_id):
        """Monitor the status of a single workflow.

        :param workflow_id: Workflow ID of workflow to monitor.
        :return: returns 0 when workflow reaches terminal state.
        """

        while 0 == 0:
            query_status = self.cromwell.query_status(workflow_id)
            if self.verbose:
                print('Workflow {} | {}'.format(query_status['id'],
                                                query_status['status']))
            if query_status['status'] not in global_config.run_states:
                if not self.no_notify:
                    filename = '{}.metadata.json'.format(query_status['id'])
                    log_dir = global_config.get_path('general', 'log_dir')
                    filepath = os.path.join(
                        log_dir, '{}.metadata.json'.format(query_status['id']))
                    metadata = open(filepath, 'w+')
                    json.dump(self.cromwell.query_metadata(workflow_id),
                              indent=4,
                              fp=metadata)
                    metadata.close()
                    email_content = self.generate_content(
                        query_status=query_status, workflow_id=workflow_id)
                    msg = self.messenger.compose_email(email_content)

                    file_dict = {filename: filepath}
                    if 'Failed' in query_status['status']:
                        jdata = self.cromwell.query_metadata(workflow_id)
                        for task, call in jdata['calls'].items():
                            for shard in call:
                                if 'Failed' in shard['executionStatus']:
                                    attach_prefix = "{}.{}".format(
                                        task, shard['shardIndex'])
                                    stdout = "{}.stdout".format(attach_prefix)
                                    stderr = "{}.stderr".format(attach_prefix)
                                    try:
                                        file_dict[stdout] = shard['stdout']
                                    except Exception as e:
                                        logging.warn(str(e))
                                    try:
                                        file_dict[stderr] = shard['stderr']
                                    except Exception as e:
                                        logging.warn(str(e))
                                    break

                    attachments = self.generate_attachments(file_dict)
                    for attachment in attachments:
                        if attachment:
                            msg.attach(attachment)

                    email_account = global_config.get(
                        'email', 'email_notification_account')
                    email_domain = global_config.get('email', 'email_domain')
                    if email_account:
                        self.messenger.send_email(
                            msg, "{}@{}".format(email_account, email_domain))
                    else:
                        self.messenger.send_email(msg)

                    os.unlink(filepath)
                return 0
            else:
                time.sleep(self.interval)

    @staticmethod
    def generate_attachment(filename, filepath):
        """Create attachment from a file.

        :param filename: The name to assign to the attachment.
        :param filepath: The absolute path of the file including the file itself.
        :return: An attachment object.
        """
        try:
            read_data = open(filepath, 'r')
            attachment = MIMEText(read_data.read())
            read_data.close()
            attachment.add_header('Content-Disposition',
                                  'attachment',
                                  filename=filename)
            return attachment
        except Exception as e:
            logging.warn('Unable to generate attachment for {}:\n{}'.format(
                filename, e))

    def generate_attachments(self, file_dict):
        """Generates a list of attachments to be added to an e-mail

        :param file_dict: A dictionary of filename:filepath pairs. Note the name is what the file will be called, and does not refer to the name of the file as it exists prior to attaching. That should be part of the filepath.
        :return: A list of attachments
        """
        attachments = list()
        # if file_dict.items() > 3:
        #     attachment = MIMEBase('application', 'zip')
        #     with zipfile.ZipFile('workflow_logs.zip', mode='w') as zf:
        #         for file_name, path in file_dict.items():
        #             try:
        #                 zf.write(path, os.path.basename(file_name))
        #             except Exception as e:
        #                 logging.warn('Unable to generate attachment for {}:\n{}'.format(file_name, e))
        #     zf.close()
        #     attachment.set_payload('workflow_logs.zip')
        #     encoders.encode_base64(attachment)
        #     attachment.add_header('Content-Disposition', 'attachment', filename='workflow_logs.zip')
        #     attachments.append(attachment)
        # else:
        for name, path in file_dict.items():
            attachments.append(self.generate_attachment(name, path))
        return attachments

    def generate_content(self,
                         query_status,
                         workflow_id,
                         metadata=None,
                         user=None):
        """A method for generating the email content to be sent to user.

        :param query_status: status of workflow (helps determine what content to include in email).
        :param workflow_id: Workflow ID of the workflow to create e-mail for.
        :param metadata: The metadata of the workflow (optional).
        :return: a dictionary containing the email contents for the template.
        """
        jdata = self.cromwell.query_metadata(
            workflow_id) if metadata is None else metadata
        summary = ""
        if 'start' in jdata:
            summary += "<br><b>Started:</b> {}".format(jdata['start'])
        if 'end' in jdata:
            summary += "<br><b>Ended:</b> {}".format(jdata['end'])
        if 'start' in jdata and 'end' in jdata:
            start = parse(jdata['start'])
            end = parse(jdata['end'])
            duration = (end - start)
            hours, remainder = divmod(duration.seconds, 3600)
            minutes, seconds = divmod(remainder, 60)
            summary += '<br><b>Duration:</b> {} hours, {} minutes, {} seconds'.format(
                hours, minutes, seconds)
        if 'Failed' in jdata['status']:
            fail_summary = "<br><b>Failures:</b> {}".format(
                json.dumps(jdata['failures']))
            fail_summary = fail_summary.replace(',', '<br>')
            summary += fail_summary.replace('\n', '<br>')
        if 'workflowName' in jdata:
            summary = "<b>Workflow Name:</b> {}{}".format(
                jdata['workflowName'], summary)
        if 'workflowRoot' in jdata:
            summary += "<br><b>workflowRoot:</b> {}".format(
                jdata['workflowRoot'])
        summary += "<br><b>Timing graph:</b> http://{}:{}/api/workflows/v2/{}/timing".format(
            self.host, self.port, jdata['id'])
        user = self.user if user is None else user
        email_content = {
            'user': user,
            'workflow_id': jdata['id'],
            'status': jdata['status'],
            'summary': summary
        }
        return email_content
예제 #8
0
 def setUpClass(self):
     resources = c.resource_dir
     self.cromwell = Cromwell(host='btl-cromwell')
     self.json = os.path.join(resources, 'hello.json')
     self.wdl = os.path.join(resources, 'hello_world.wdl')
     self.labels = {'username': '******', 'foo': 'bar'}
예제 #9
0
class CromwellUnitTests(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        resources = c.resource_dir
        self.cromwell = Cromwell(host='btl-cromwell')
        self.json = os.path.join(resources, 'hello.json')
        self.wdl = os.path.join(resources, 'hello_world.wdl')
        self.labels = {'username': '******', 'foo': 'bar'}

    def _initiate_workflow(self):
        wf = self.cromwell.jstart_workflow(self.wdl, self.json)
        time.sleep(5)
        return wf

    def test_start_workflow(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        self.assertTrue('id' in wf and 'status' in wf)
        self.assertEqual(wf['status'], 'Submitted')
        self.assertEqual(len(wfid), 36)
        self.cromwell.stop_workflow(wfid)

    def test_build_long_url(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        url_dict = {
            'name': 'test_build_long_url',
            'id': wfid,
            'start': datetime.datetime.now() - datetime.timedelta(days=1),
            'end': datetime.datetime.now()
        }
        query_url = self.cromwell.build_query_url(
            'http://btl-cromwell:9000/api/workflows/v1/query?', url_dict)
        r = requests.get(query_url)
        self.assertEquals(r.status_code, 200)
        self.cromwell.stop_workflow(wfid)

    def test_label_workflow(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        r = self.cromwell.label_workflow(wfid, self.labels)
        self.assertEquals(r.status_code, 200)
        self.cromwell.stop_workflow(wfid)

    def test_explain(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        time.sleep(10)
        result = self.cromwell.explain_workflow(wfid)
        self.assertIsInstance(result, tuple)
        self.cromwell.stop_workflow(wfid)

    def test_stop_workflow(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        result = self.cromwell.stop_workflow(wfid)
        print(result)
        self.cromwell.stop_workflow(wfid)

    @classmethod
    def tearDownClass(self):
        print("Done!")
예제 #10
0
class QueryUnitTests(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        resources = c.resource_dir
        self.cromwell = Cromwell(host='btl-cromwell')
        self.json = os.path.join(resources, 'hw.json')
        self.wdl = os.path.join(resources, 'hw.wdl')
        self.labels = {'username': '******', 'foo': 'bar'}

    def _initiate_workflow(self):
        wf = self.cromwell.jstart_workflow(self.wdl, self.json)
        time.sleep(5)
        return wf

    def test_query_status(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        result = self.cromwell.query_status(wfid)
        self.assertTrue('id' in result and 'status' in result)
        self.cromwell.stop_workflow(wfid)

    def test_query_metadata(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        result = self.cromwell.query_metadata(wfid)
        self.assertTrue('id' in result and 'submission' in result)
        self.cromwell.stop_workflow(wfid)

    def test_query_logs(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        result = self.cromwell.query_logs(wfid)
        self.assertTrue('id' in result)
        self.cromwell.stop_workflow(wfid)

    def test_build_long_url(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        url_dict = {
            'name': 'test_build_long_url',
            'id': wfid,
            'start': datetime.datetime.now() - datetime.timedelta(days=1),
            'end': datetime.datetime.now()
        }
        query_url = self.cromwell.build_query_url(
            'http://btl-cromwell:9000/api/workflows/v1/query?', url_dict)
        r = requests.get(query_url)
        self.assertEquals(r.status_code, 200)
        self.cromwell.stop_workflow(wfid)

    def test_query(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        url_dict = {
            'name': 'gatk',
            'id': [wfid],
            'start': datetime.datetime.now() - datetime.timedelta(days=1),
            'end': datetime.datetime.now()
        }
        result = self.cromwell.query(url_dict)
        self.assertTrue(isinstance(result['results'], list), True)
        self.cromwell.stop_workflow(wfid)

    def test_label_workflow(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        r = self.cromwell.label_workflow(wfid, self.labels)
        self.assertEquals(r.status_code, 200)
        self.cromwell.stop_workflow(wfid)

    def test_query_labels(self):
        wf = self._initiate_workflow()
        wfid = wf['id']
        labels = {'username': '******', 'foo': 'bar'}
        self.cromwell.label_workflow(wfid, self.labels)
        # This sleep is needed to make sure the label workflow completes before we query for it. # noqa
        time.sleep(5)
        r = self.cromwell.query_labels(labels)
        # Here, the most recent workflow that matches the query will be the last item so we can use that to check # noqa
        # this assertion.
        self.assertTrue(wfid in r['results'][-1]['id'])
        self.cromwell.stop_workflow(wfid)

    def test_query_filter_by_statuses(self):
        from argparse import Namespace
        from choppy import call_list
        wf = self._initiate_workflow()
        wfid = wf['id']
        result = call_list(
            Namespace(server="btl-cromwell",
                      all=False,
                      no_notify=True,
                      verbose=True,
                      interval=None,
                      username="******",
                      days=1,
                      filter=['Succeeded', 'Failed']))
        statuses = set(d['status'] for d in result)
        self.assertEqual(len(statuses), 2)
        self.assertIn('Succeeded', statuses)
        self.assertIn('Failed', statuses)
        self.cromwell.stop_workflow(wfid)

    def test_query_filter_by_name(self):
        from argparse import Namespace
        from choppy import call_list
        user_result = call_list(
            Namespace(server="btl-cromwell",
                      all=False,
                      no_notify=True,
                      verbose=True,
                      interval=None,
                      username="******",
                      days=1,
                      filter=None))
        user_wfids = set(d['id'] for d in user_result)
        all_result = call_list(
            Namespace(server="btl-cromwell",
                      all=False,
                      no_notify=True,
                      verbose=True,
                      interval=None,
                      username="******",
                      days=1,
                      filter=None))
        all_wfids = set(d['id'] for d in all_result)
        self.assertGreater(len(all_wfids), len(user_wfids))

    def test_query_filter_by_days(self):
        from argparse import Namespace
        from choppy import call_list
        result = call_list(
            Namespace(server="btl-cromwell",
                      all=False,
                      no_notify=True,
                      verbose=True,
                      interval=None,
                      username="******",
                      days=1,
                      filter=None))
        all_dates = set(d['start'].split('T')[0] for d in result)
        self.assertEqual(len(all_dates), 1)

    def test_query_backend(self):
        self.assertTrue('defaultBackend' in self.cromwell.query_backend())

    @classmethod
    def tearDownClass(self):
        print("Done!")
예제 #11
0
 def setUp(self):
     resources = c.resource_dir
     self.cromwell = Cromwell(host='btl-cromwell')
     self.json = os.path.join(resources, 'hello_world_on_prem.json')
     self.wdl = os.path.join(resources, 'hello_world_on_prem.wdl')