Example #1
0
    def test_publish_no_changesets(self):
        """
        Ensures that no publish is attempted if there are no changesets to be published because the org has a publish
        job running for a previous changeset.
        """
        # a changeset which is currently being published
        OrgChangeset(
            id='test0_1',
            org_uid='test0',
            changeset=1,
            publish_job_running=True,
            publish_job_failed=False,
            publish_job_finished=False,
            publish_job_count=0
        ).put()

        # a later changeset for the same org, but should be blocked from publishing because a
        # previous changeset is still running
        OrgChangeset(
            id='test0_2',
            org_uid='test0',
            changeset=2,
            publish_job_running=False,
            publish_job_failed=False,
            publish_job_finished=False,
            publish_job_count=0
        ).put()

        response = self.app.get('/orchestrator/publish')
        self.assertEqual(response.status_code, 204)

        # no publish tasks should be created because there is nothing to publish
        self.assertEqual(len(self.taskqueue.get_filtered_tasks()), 0)
Example #2
0
    def test_update_changeset_running(self):
        """
        Verifies that changeset publish status is updated based on the dataflow api.
        """
        OrgChangeset(id='test', publish_job_running=True).put()

        response = self.app.get('/orchestrator/update_changesets')
        self.assertEqual(response.status_code, 204)

        # changeset has been marked as completed
        changeset = OrgChangeset.get_by_id('test')
        self.assertFalse(changeset.publish_job_finished)
        self.assertTrue(changeset.publish_job_running)
        self.assertFalse(changeset.publish_job_failed)
        self.assertEqual(changeset.publish_job_status, 'JOB_STATE_RUNNING')
Example #3
0
def complete_changeset(org_uid):
    """
    Marks a changeset complete by setting changeset_completed_at for an org in the Org datastore kind.  This is called
    once the provider specific sync manager indicates that there is no more data to be pulled.

    This function also writes the changeset record to OrgChangeset kind. OrgChangeset kind keeps record of all
    changesets (as opposed to Org kind which only tracks ingestion status the current changeset for an org).
    OrgChangeset is used by the orchestrator service to coordinate publishing of the completed changesets.

    In case this is the initial sync (first changeset), we kick off a separate publish job immediately for it.

    Args:
        org_uid(str): org identifier
    """
    now = datetime.utcnow()

    org = Org.get_by_id(org_uid)
    org.changeset_completed_at = now
    org.update_cycle_active = False
    org.last_update_cycle_completed_at = now
    org.put()

    changeset = OrgChangeset(org_uid=org_uid,
                             provider='qbo',
                             changeset=org.changeset,
                             ingestion_started_at=org.changeset_started_at,
                             ingestion_completed_at=now,
                             publish_job_running=False,
                             publish_job_finished=False,
                             publish_job_count=0)

    changeset.put()

    if org.changeset == 0:
        taskqueue.add(queue_name='create-publish-job',
                      target='orchestrator',
                      url='/orchestrator/create_publish_job_task',
                      payload=json.dumps({
                          'job_params': {
                              'org_changeset_ids': [changeset.key.id()]
                          }
                      }),
                      transactional=True)
        logging.info(
            "requesting publish after initial sync for org {}".format(org_uid))

    logging.info("completed changeset {} for org {}".format(
        org.changeset, org_uid))
Example #4
0
    def test_changeset_status_published(self, client_mock):
        """
        Verifies the message published on pubsub when a changeset is synced.

        Args:
            client_mock(Mock): mock of the pubsub client
        """
        Org(id='test').put()
        OrgChangeset(org_uid='test',
                     changeset=2,
                     publish_finished_at=datetime(2010, 1, 2)).put()
        publish_changeset_status('test', 2, 'synced')
        publish_mock = client_mock.return_value.topic.return_value.publish
        publish_mock.assert_called_with(
            json.dumps({
                "meta": {
                    "version": "2.0.0",
                    "data_source_id": "test",
                    "timestamp": "2010-01-01T00:00:00"
                },
                "data": [{
                    "type": "changeset_sync_status",
                    "id": "test_2",
                    "attributes": {
                        "status": "synced",
                        "changeset": 2,
                        "synced_at": "2010-01-02T00:00:00"
                    }
                }]
            }))
def update_changesets():
    """
    Updates OrgChangeset records based on status of the publish job.

    Returns:
        (str, int): http response
    """
    now = datetime.utcnow()
    statuses = {}
    org_changesets = OrgChangeset.query(
        OrgChangeset.publish_job_running == True).fetch()

    if not org_changesets:
        logging.info("no changesets to update")
        return '', 204

    for org_changeset in org_changesets:
        if org_changeset.publish_job_id not in statuses:
            try:
                statuses[org_changeset.publish_job_id] = get_job(
                    org_changeset.publish_job_id)
            except Exception:
                logging.exception(
                    "failed to retrieve job status from dataflow api")
                statuses[org_changeset.publish_job_id] = {
                    'currentState': 'STATUS_API_CALL_FAILED'
                }

        job_status = statuses[org_changeset.publish_job_id]
        job_status = job_status.get('currentState',
                                    'STATUS_API_RESPONSE_ERROR')
        org_changeset.publish_job_status = job_status

        # update the changeset details if the publish job status will not change any more
        if job_status in FINAL_STATES:
            org_changeset.publish_job_finished = True
            org_changeset.publish_job_running = False
            org_changeset.publish_job_failed = job_status != SUCCESS_STATE
            org_changeset.publish_finished_at = now

            if job_status == SUCCESS_STATE:
                publish_changeset_status(org_changeset.org_uid,
                                         org_changeset.changeset,
                                         CHANGESET_STATUS_SYNCED)
            else:
                publish_changeset_status(org_changeset.org_uid,
                                         org_changeset.changeset,
                                         CHANGESET_STATUS_ERROR)

        logging.info(
            "updating org changeset ({}, {}) with job status {}".format(
                org_changeset.org_uid, org_changeset.changeset,
                org_changeset.publish_job_status))

        org_changeset.put()

    return '', 204
Example #6
0
    def test_create_publish_job_task(self, dataflow_mock, publish_mock):
        """
        Verifies that the correct changesets are being published.

        Args:
            dataflow_mock(Mock): mock for kicking off dataflow publish job
            publish_mock(Mock): mock of the changeset publish function
        """
        dataflow_mock.return_value = {'id': 'job_1'}

        OrgChangeset(
            id='test0',
            org_uid='test0',
            changeset=0,
            publish_job_running=False,
            publish_job_count=0
        ).put()

        response = self.app.post(
            '/orchestrator/create_publish_job_task',
            data=json.dumps({
                'job_params': {
                    'org_changeset_ids': ['test0']
                }
            })
        )
        self.assertEqual(response.status_code, 204)

        job_params = {"orgChangesets": "test0:0"}
        dataflow_mock.assert_called_once()
        dataflow_mock.assert_called_once_with('sync', ANY, job_params)

        # fields for published org are updated
        changeset = OrgChangeset.get_by_id('test0')
        self.assertTrue(changeset.publish_job_running)
        self.assertFalse(changeset.publish_job_finished)
        self.assertFalse(changeset.publish_job_failed)
        self.assertEqual(changeset.publish_job_id, "job_1")
        self.assertIsNone(changeset.publish_job_status)
        self.assertEqual(changeset.publish_job_count, 1)

        # and changeset status is published
        publish_mock.assert_called_once_with('test0', 0, 'syncing')
Example #7
0
    def test_update_changeset_failure(self, publish_mock):
        """
        Verifies that changeset publish status is updated based on the dataflow api.

        Args:
            publish_mock(Mock): mock of the changeset publish function
        """
        OrgChangeset(id='test', org_uid='test', changeset=0, publish_job_running=True).put()

        response = self.app.get('/orchestrator/update_changesets')
        self.assertEqual(response.status_code, 204)

        # changeset has been marked as completed
        changeset = OrgChangeset.get_by_id('test')
        self.assertTrue(changeset.publish_job_finished)
        self.assertFalse(changeset.publish_job_running)
        self.assertTrue(changeset.publish_job_failed)
        self.assertEqual(changeset.publish_job_status, 'JOB_STATE_FAILED')

        # and changeset status is published
        publish_mock.assert_called_once_with('test', 0, 'error')
Example #8
0
    def test_publish_failure(self, publish_mock):
        """
        Verifies that error org changeset status is published if publish job fails to be created.

        Args:
            publish_mock(Mock): mock of the changeset publish function
        """
        OrgChangeset(id='test0', org_uid='test0', changeset=0, publish_job_running=False).put()
        OrgChangeset(id='test1', org_uid='test1', changeset=0, publish_job_running=False).put()

        with self.assertRaises(ValueError):
            self.app.post(
                '/orchestrator/create_publish_job_task',
                data=json.dumps({
                    'job_params': {
                        'org_changeset_ids': ['test0', 'test1']
                    }
                })
            )

        self.assertEqual(len(self.taskqueue.get_filtered_tasks()), 0)

        self.assertEqual(publish_mock.call_count, 2)
        publish_mock.assert_has_calls([call('test0', 0, 'error'), call('test1', 0, 'error')])
def get_last_changeset(org):
    """
    Gets the last changeset for an org.

    For orgs which are being ingested by the adapter service the last changeset is always Org.changeset, but some orgs
    are 'synced' via an external process (the 'uploader' provider for example). In this case the last changeset needs
    to be derived from OrgChangeset.

    Args:
        org(Org): the Org object
    """
    # org.changeset is the changeset currently being worked on (could be finished also, but it is the last)
    org_uid = org.key.string_id()
    org_changeset = OrgChangeset.query(
        OrgChangeset.org_uid == org_uid).order(-OrgChangeset.changeset).get()
    return max(org.changeset, org_changeset.changeset if org_changeset else -1)
def publish_changeset_status(org_uid, changeset, status_value):
    """
    Utility function for publishing org changeset status events on pubsub.

    Args:
        org_uid(str): org identifier
        changeset(int): update cycle identifier
        status_value(str): status (eg. syncing, synced, error)
    """
    topic = get_client().topic(STATUS_TOPIC)

    payload = {
        "meta": {
            "version": "2.0.0",
            "data_source_id": org_uid,
            "timestamp": datetime.utcnow().replace(microsecond=0).isoformat()
        },
        "data": [{
            "type": "changeset_sync_status",
            "id": "{}_{}".format(org_uid, changeset),
            "attributes": {
                "status": status_value,
                "changeset": changeset,
                "synced_at": None
            }
        }]
    }

    attributes = payload['data'][0]['attributes']

    if status_value == CHANGESET_STATUS_SYNCED:
        org_changeset = OrgChangeset.query(
            OrgChangeset.org_uid == org_uid,
            OrgChangeset.changeset == changeset).get()
        attributes['synced_at'] = org_changeset.publish_finished_at.replace(
            microsecond=0).isoformat()

    logging.info("publishing on status pubsub topic: {}".format(payload))

    topic.publish(json.dumps(payload))
Example #11
0
def changeset_list(org_uid):
    """
    Renders a page which shows all changesets and their status (ingestion and publish). Handles one org or all.

    Args:
        org_uid(str): org identifier

    Returns:
        (str, int): changeset listing page
    """
    cursor = Cursor(urlsafe=request.args.get('cursor'))
    failed = request.args.get('failed') == '1'

    query = OrgChangeset.query()

    if org_uid:
        query = query.filter(OrgChangeset.org_uid == org_uid)

    if failed:
        query = query.filter(
            ndb.OR(OrgChangeset.publish_job_failed == True,
                   OrgChangeset.publish_changeset_failed == True))

    # OR query can't sort by a field
    if failed:
        query = query.order(-OrgChangeset.key)
    else:
        query = query.order(-OrgChangeset.ingestion_completed_at)

    changesets, next_cursor, more = query.fetch_page(20, start_cursor=cursor)

    return render_template('changeset_list.html',
                           org_uid=org_uid,
                           changesets=changesets,
                           next_cursor=next_cursor,
                           more=more,
                           url_root=request.url_root,
                           failed=request.args.get('failed', '0')), 200
Example #12
0
    def test_complete_first_changeset(self):
        """
        Verifies that Org and OrgChangeset get updated to indicate that a changeset is complete.
        """
        started_at = datetime.now()

        org = Org(id='test', changeset=0,
                  changeset_started_at=started_at).put()
        sync_utils.complete_changeset('test')

        # Org flags/timestamps are updated
        org = Org.get_by_id('test')
        self.assertEqual(org.changeset_completed_at, datetime(2010, 1, 1))
        self.assertEqual(org.last_update_cycle_completed_at,
                         datetime(2010, 1, 1))
        self.assertFalse(org.update_cycle_active)

        # OrgChangeset record is added
        org_changeset = OrgChangeset.query().get()
        self.assertEqual(org_changeset.org_uid, 'test')
        self.assertEqual(org_changeset.changeset, 0)
        self.assertEqual(org_changeset.ingestion_started_at, started_at)
        self.assertEqual(org_changeset.ingestion_completed_at,
                         datetime(2010, 1, 1))
        self.assertFalse(org_changeset.publish_job_running)
        self.assertFalse(org_changeset.publish_job_finished)
        self.assertEqual(org_changeset.publish_job_count, 0)

        # Publish task is queued for the first changeset
        self.assertEqual(len(self.taskqueue.get_filtered_tasks()), 1)
        self.assertEqual(
            self.taskqueue.get_filtered_tasks()[0].payload,
            json.dumps({
                "job_params": {
                    "org_changeset_ids": [org_changeset.key.id()]
                }
            }))
    def test_status_endpoint(self):
        """
        A few test cases for the status endpoint.
        """
        Org(id='test1').put()
        response = self.app.get('/adapter/test1/status')
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.json['connected'], False)
        self.assertEqual(response.json['synced'], False)
        self.assertEqual(response.json['updating'], False)
        self.assertEqual(response.json['synced_at'], None)

        Org(id='test2', status=2).put()
        response = self.app.get('/adapter/test2/status')
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.json['connected'], True)

        Org(id='test3', status=2).put()
        OrgChangeset(org_uid='test3',
                     publish_job_finished=True,
                     publish_job_failed=False).put()
        response = self.app.get('/adapter/test3/status')
        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.json['synced'], True)
def get_changeset_status_payload(org_uid, changeset):
    """
    Creates response body for changeset status API.

    Args:
        org_uid(str): org identifier
        changeset(int): update cycle identifier

    Returns:
        dict: changeset status response payload
    """
    changeset_id = "{}_{}".format(org_uid, changeset)
    status = "unknown"
    synced_at = None

    org = Org.get_by_id(org_uid)

    if not org:
        payload = {
            "meta": {
                "version": "2.0.0",
            },
            "errors": [{
                "id":
                "{}_not_found".format(org_uid),
                "status":
                "404",
                "code":
                "not_found",
                "title":
                "Data Source not found",
                "detail":
                "Data Source {} could not be found.".format(org_uid)
            }]
        }

        logging.info("org {}:{} not found - response {}".format(
            org_uid, changeset, payload))
        return payload

    if changeset > get_last_changeset(org):
        payload = {
            "meta": {
                "version": "2.0.0",
                "data_source_id": org_uid
            },
            "errors": [{
                "id":
                "{}_{}_not_found".format(org_uid, changeset),
                "status":
                "404",
                "code":
                "not_found",
                "title":
                "Changeset not found",
                "detail":
                "Changeset {} could not be found for {}.".format(
                    changeset, org_uid)
            }]
        }

        logging.info("changeset {}:{} not found - response {}".format(
            org_uid, changeset, payload))
        return payload

    org_changeset = OrgChangeset.query(
        OrgChangeset.org_uid == org_uid,
        OrgChangeset.changeset == changeset).get()

    # if org_changeset exists means ingestion is done
    if org_changeset:
        # if published successfully it means synced
        finished = org_changeset.publish_job_finished and not org_changeset.publish_job_running
        successful = not org_changeset.publish_job_failed and not org_changeset.publish_changeset_failed

        if finished and successful:
            status = CHANGESET_STATUS_SYNCED
            synced_at = org_changeset.publish_finished_at.replace(
                microsecond=0).isoformat()
        else:
            if not finished:
                status = CHANGESET_STATUS_SYNCING
            else:
                status = CHANGESET_STATUS_ERROR

    # ingestion is still in progress
    else:
        if org.status == CONNECTED:
            status = CHANGESET_STATUS_SYNCING
        elif org.status == DISCONNECTED:
            status = CHANGESET_STATUS_ERROR

    # just in case we have a gap in the above logic (could indicate inconsistent org state also)
    if status == "unknown":
        logging.error("could not determine changeset status for {}:{}".format(
            org_uid, changeset))

    payload = {
        "meta": {
            "version": "2.0.0",
            "data_source_id": org_uid
        },
        "data": [{
            "type": "changeset_status",
            "id": changeset_id,
            "relationships": {
                "sync_status": {
                    "data": {
                        "type": "changeset_sync_status",
                        "id": changeset_id
                    }
                }
            }
        }],
        "included": [{
            "type": "changeset_sync_status",
            "id": changeset_id,
            "attributes": {
                "status": status,
                "synced_at": synced_at
            }
        }]
    }

    logging.info("changeset status for {}: {}".format(changeset_id, payload))

    return payload
def start_publish():
    """
    Kicks off a dataflow template to publish normalised data. The jobs are created via a task queue task, passing the
    ID of the OrgChangesets which need to be published.

    This endpoint is invoked by a regular cron job or by a request from the admin UI, and takes an additional parameter
    which allows for each org to be published by a separate dataflow job (this is useful for isolation of an org which
    causes the whole publish job to fail).

    Returns:
        (str, int): http response
    """
    logging.info("about to kick off a publish dataflow job")

    per_org = request.form.get('per_org') == '1'
    if per_org:
        logging.info("publish job per org requested")

    # we want to publish changesets which:
    # - have newly been ingested (publish not running and not finished)
    # - OR have been attempted to be published but failed
    #   - due to the whole job failing
    #   - OR publish of the individual changeset failing
    org_changesets_query = OrgChangeset.query(
        ndb.OR(
            ndb.AND(OrgChangeset.publish_job_running == False,
                    OrgChangeset.publish_job_finished == False),
            ndb.AND(
                OrgChangeset.publish_job_running == False,
                OrgChangeset.publish_job_finished == True,
                ndb.OR(OrgChangeset.publish_job_failed == True,
                       OrgChangeset.publish_changeset_failed == True)))).order(
                           OrgChangeset.key)

    org_changesets = list(emit_items(org_changesets_query))

    # Query any currently running org changesets
    running_org_changesets_query = OrgChangeset.query(
        OrgChangeset.publish_job_running == True)
    running_org_changesets = list(emit_items(running_org_changesets_query))

    running_orgs = list(
        set([
            running_org_changeset.org_uid
            for running_org_changeset in running_org_changesets
        ]))

    # Filter any org changesets that already have a running changeset for that org
    gated_org_changesets = filter(lambda oc: oc.org_uid not in running_orgs,
                                  org_changesets)

    if len(gated_org_changesets) != len(org_changesets):
        filtered_ocs = filter(lambda oc: oc.org_uid in running_orgs,
                              org_changesets)
        filtered_oc_tuples = [(oc.org_uid, oc.changeset)
                              for oc in filtered_ocs]

        logging.info(
            "stopped these changesets from being published as job already running for the org: {}"
            .format(filtered_oc_tuples))

    if not gated_org_changesets:
        logging.info("nothing to publish")
        return '', 204

    # remove changesets for blacklisted orgs
    blacklisted_orgs = {}
    org_changesets_to_publish = []
    for org_changeset in gated_org_changesets:
        org = blacklisted_orgs.get(org_changeset.org_uid,
                                   Org.get_by_id(org_changeset.org_uid))
        if org and org.publish_disabled:
            blacklisted_orgs[org.key.string_id()] = org
        else:
            org_changesets_to_publish.append(org_changeset)

    to_publish = []

    if per_org:
        org_changesets_sorted = sorted(org_changesets_to_publish,
                                       key=attrgetter('org_uid'))
        for org_uid, changesets in groupby(org_changesets_sorted,
                                           key=attrgetter('org_uid')):
            to_publish.append({
                'org_uid':
                org_uid,
                'org_changeset_ids':
                [changeset.key.id() for changeset in changesets]
            })
    else:
        to_publish.append({
            'org_changeset_ids':
            [changeset.key.id() for changeset in org_changesets_to_publish]
        })

    logging.info("have {} publish tasks to create".format(len(to_publish)))

    items_to_tasks(items=to_publish,
                   queue=Queue('create-publish-job'),
                   task_generator=lambda item: Task(
                       url='/orchestrator/create_publish_job_task',
                       payload=dumps({'job_params': item})))

    return '', 204
def status(org_uid):
    """
    Retrieve org status.

    Args:
        org_uid(str): org identifier

    Returns:
        (str, int): http response
    """
    def date_str(date):
        """
        Formats a date into a string (handles None values also).

        Args:
            date(date|datetime): date to be formatted

        Returns:
            str: formatted date
        """
        if date is None:
            return None

        return date.isoformat() + 'Z'

    org = Org.get_by_id(org_uid)

    if not org:
        logging.info("org {} not found".format(org_uid))
        return '', 404

    changeset = OrgChangeset.query(
        OrgChangeset.org_uid == org_uid,
        OrgChangeset.publish_job_finished == True,
        OrgChangeset.publish_job_failed == False).order(
            -OrgChangeset.publish_finished_at).fetch(1)

    # first publish happens only when all the data is ingested, so if the first publish happened the org is synced
    synced = False
    if changeset:
        synced = True

    # synced_at is the ingestion completion time of the last changeset that got published
    synced_at = None
    if changeset:
        synced_at = changeset[0].ingestion_completed_at

    status_payload = {
        'synced':
        synced,
        'synced_at':
        date_str(synced_at),
        'connected':
        org.status == CONNECTED,
        'updating':
        org.changeset_started_at is not None
        and org.changeset_completed_at is None,
        'source':
        org.provider,
        'id':
        org_uid
    }

    logging.info("org status: {}".format(status_payload))

    return jsonify(status_payload), 200
    def test_changeset_status_endpoint(self):
        """
        A few test cases for the changeset status endpoint.
        """
        # test missing org
        response = self.app.get('/api/data_sources/test0/changesets/0/status')
        self.assertEqual(response.json['errors'][0]['id'], 'test0_not_found')
        self.assertEqual(response.json['errors'][0]['code'], 'not_found')

        # test missing changeset
        Org(id='test1', changeset=10).put()
        response = self.app.get('/api/data_sources/test1/changesets/11/status')
        self.assertEqual(response.json['errors'][0]['id'], 'test1_11_not_found')
        self.assertEqual(response.json['errors'][0]['code'], 'not_found')

        # test synced
        Org(id='test2', changeset=11).put()
        OrgChangeset(
            id='test2',
            org_uid='test2',
            changeset=10,
            publish_job_running=False,
            publish_job_finished=True,
            publish_job_failed=False,
            publish_changeset_failed=False,
            publish_finished_at=datetime(2010, 1, 1)
        ).put()
        response = self.app.get('/api/data_sources/test2/changesets/10/status')
        self.assertEqual(response.json['meta']['data_source_id'], 'test2')
        self.assertEqual(response.json['data'][0]['relationships']['sync_status']['data']['id'], 'test2_10')
        self.assertEqual(response.json['included'][0]['attributes']['status'], 'synced')
        self.assertEqual(response.json['included'][0]['attributes']['synced_at'], '2010-01-01T00:00:00')

        # test syncing
        Org(id='test3', changeset=11).put()
        OrgChangeset(
            id='test3',
            org_uid='test3',
            changeset=10,
            publish_job_running=True,
            publish_job_finished=False,
            publish_job_failed=False,
            publish_changeset_failed=False
        ).put()
        response = self.app.get('/api/data_sources/test3/changesets/10/status')
        self.assertEqual(response.json['included'][0]['attributes']['status'], 'syncing')
        self.assertIsNone(response.json['included'][0]['attributes']['synced_at'])

        # test error
        Org(id='test4', changeset=11).put()
        OrgChangeset(
            id='test4',
            org_uid='test4',
            changeset=10,
            publish_job_running=False,
            publish_job_finished=True,
            publish_job_failed=True,
            publish_changeset_failed=True
        ).put()
        response = self.app.get('/api/data_sources/test4/changesets/10/status')
        self.assertEqual(response.json['included'][0]['attributes']['status'], 'error')
        self.assertIsNone(response.json['included'][0]['attributes']['synced_at'])

        # test ingestion in progress
        Org(id='test5', changeset=11, status=CONNECTED).put()
        response = self.app.get('/api/data_sources/test5/changesets/10/status')
        self.assertEqual(response.json['included'][0]['attributes']['status'], 'syncing')
        self.assertIsNone(response.json['included'][0]['attributes']['synced_at'])

        # test ingestion in error
        Org(id='test6', changeset=11, status=DISCONNECTED).put()
        response = self.app.get('/api/data_sources/test6/changesets/10/status')
        self.assertEqual(response.json['included'][0]['attributes']['status'], 'error')
        self.assertIsNone(response.json['included'][0]['attributes']['synced_at'])
Example #18
0
    def test_publish(self):
        """
        Verifies that the correct changesets are being published.
        """
        # newly ingested changeset (publish not attempted yet)
        OrgChangeset(
            id='test0',
            org_uid='test0',
            changeset=0,
            publish_job_running=False,
            publish_job_finished=False,
            publish_job_count=0
        ).put()

        # the whole publish job failed (set by the orchestrator service based on dataflow api)
        OrgChangeset(
            id='test1',
            org_uid='test1',
            changeset=1,
            publish_job_running=False,
            publish_job_failed=True,
            publish_job_finished=True,
            publish_job_count=1
        ).put()

        # an individual changeset failed to be published (set by the publish job)
        OrgChangeset(
            id='test2',
            org_uid='test2',
            changeset=2,
            publish_job_running=False,
            publish_changeset_failed=True,
            publish_job_finished=True,
            publish_job_count=2
        ).put()

        # this changeset should not get published because its publish job is running
        OrgChangeset(
            id='test3',
            org_uid='test3',
            changeset=3,
            publish_job_running=True,
            publish_job_count=3
        ).put()

        # this changeset should not get published because its org is blacklisted
        Org(
            id='test4',
            publish_disabled=True
        ).put()
        OrgChangeset(
            id='test4',
            org_uid='test4',
            changeset=0,
            publish_job_running=False,
            publish_job_failed=True,
            publish_job_finished=True,
            publish_job_count=0
        ).put()

        # these changesets should not get published as there's already a job running for another changeset
        OrgChangeset(
            id='test5_a',
            org_uid='test5',
            changeset=5,
            publish_job_running=True,
            publish_job_failed=False,
            publish_job_finished=False,
            publish_job_count=0
        ).put()
        OrgChangeset(
            id='test5_b',
            org_uid='test5',
            changeset=6,
            publish_job_running=False,
            publish_job_failed=False,
            publish_job_finished=False,
            publish_job_count=0
        ).put()

        # one task is created if normal publish
        response = self.app.get('/orchestrator/publish')
        self.assertEqual(response.status_code, 204)
        self.assertEqual(len(self.taskqueue.get_filtered_tasks()), 1)

        # one task per org is created if per_org publish is specifed
        response = self.app.post('/orchestrator/publish', data={'per_org': 1})
        self.assertEqual(response.status_code, 204)
        self.assertEqual(len(self.taskqueue.get_filtered_tasks()), 1 + 3)  # 1 from last call, 3 from this call