Exemple #1
0
    def test_changeset_status_error(self, client_mock):
        """
        Verifies the message published on pubsub when a changeset is in error.

        Args:
            client_mock(Mock): mock of the pubsub client
        """
        Org(id='test').put()
        publish_changeset_status('test', 2, 'error')
        publish_mock = client_mock.return_value.topic.return_value.publish
        publish_mock.assert_called_with(
            json.dumps({
                "meta": {
                    "version": "2.0.0",
                    "data_source_id": "test",
                    "timestamp": "2010-01-01T00:00:00"
                },
                "data": [{
                    "type": "changeset_sync_status",
                    "id": "test_2",
                    "attributes": {
                        "status": "error",
                        "changeset": 2,
                        "synced_at": None
                    }
                }]
            }))
Exemple #2
0
    def test_changeset_status_published(self, client_mock):
        """
        Verifies the message published on pubsub when a changeset is synced.

        Args:
            client_mock(Mock): mock of the pubsub client
        """
        Org(id='test').put()
        OrgChangeset(org_uid='test',
                     changeset=2,
                     publish_finished_at=datetime(2010, 1, 2)).put()
        publish_changeset_status('test', 2, 'synced')
        publish_mock = client_mock.return_value.topic.return_value.publish
        publish_mock.assert_called_with(
            json.dumps({
                "meta": {
                    "version": "2.0.0",
                    "data_source_id": "test",
                    "timestamp": "2010-01-01T00:00:00"
                },
                "data": [{
                    "type": "changeset_sync_status",
                    "id": "test_2",
                    "attributes": {
                        "status": "synced",
                        "changeset": 2,
                        "synced_at": "2010-01-02T00:00:00"
                    }
                }]
            }))
Exemple #3
0
def mark_as_connected(org_uid, also_linked=False):
    """
    Flags an org as connected. The org will get included in update cycles from this point.

    Args:
        org_uid(str): org identifier
    """
    logging.info(
        "marking the org as connected (status value {})".format(CONNECTED))
    org = Org.get_by_id(org_uid)
    org.status = CONNECTED

    if also_linked:
        org.linked_at = datetime.utcnow()

    org.connected_at = datetime.utcnow()
    org.put()

    if also_linked:
        publish_status(org_uid, LINK_STATUS_TYPE, LINK_STATUS_LINKED)

    publish_status(org_uid, CONNECT_STATUS_TYPE, CONNECT_STATUS_CONNECTED)

    if is_changeset_in_progress(org):
        logging.info(
            "publishing syncing changeset status for changeset {}:{}".format(
                org_uid, org.changeset))
        publish_changeset_status(org_uid, org.changeset,
                                 CHANGESET_STATUS_SYNCING)
Exemple #4
0
def mark_as_disconnected(org_uid, deactivate_update_cycle):
    """
    Flags an org as disconnected by changing its status to DISCONNECTED and completing current changeset. This is useful
    if the sync gives up because of authentication issues with the provider for example. This does not forcibly
    disconnect the org by deleting the auth keys.

    Publishes an error status for changeset currently being ingested.

    Args:
        org_uid(str): org identifier
        deactivate_update_cycle(bool): indicates if the update_cycle_active flag should be set to false
    """
    logging.info("marking the org as disconnected (status value {})".format(
        DISCONNECTED))
    org = Org.get_by_id(org_uid)
    org.status = DISCONNECTED

    if deactivate_update_cycle:
        org.update_cycle_active = False

    org.put()
    publish_status(org_uid, CONNECT_STATUS_TYPE, CONNECT_STATUS_DISCONNECTED)

    if is_changeset_in_progress(org):
        logging.info(
            "publishing error changeset status for changeset {}:{}".format(
                org_uid, org.changeset))
        publish_changeset_status(org_uid, org.changeset,
                                 CHANGESET_STATUS_ERROR)
def update_changesets():
    """
    Updates OrgChangeset records based on status of the publish job.

    Returns:
        (str, int): http response
    """
    now = datetime.utcnow()
    statuses = {}
    org_changesets = OrgChangeset.query(
        OrgChangeset.publish_job_running == True).fetch()

    if not org_changesets:
        logging.info("no changesets to update")
        return '', 204

    for org_changeset in org_changesets:
        if org_changeset.publish_job_id not in statuses:
            try:
                statuses[org_changeset.publish_job_id] = get_job(
                    org_changeset.publish_job_id)
            except Exception:
                logging.exception(
                    "failed to retrieve job status from dataflow api")
                statuses[org_changeset.publish_job_id] = {
                    'currentState': 'STATUS_API_CALL_FAILED'
                }

        job_status = statuses[org_changeset.publish_job_id]
        job_status = job_status.get('currentState',
                                    'STATUS_API_RESPONSE_ERROR')
        org_changeset.publish_job_status = job_status

        # update the changeset details if the publish job status will not change any more
        if job_status in FINAL_STATES:
            org_changeset.publish_job_finished = True
            org_changeset.publish_job_running = False
            org_changeset.publish_job_failed = job_status != SUCCESS_STATE
            org_changeset.publish_finished_at = now

            if job_status == SUCCESS_STATE:
                publish_changeset_status(org_changeset.org_uid,
                                         org_changeset.changeset,
                                         CHANGESET_STATUS_SYNCED)
            else:
                publish_changeset_status(org_changeset.org_uid,
                                         org_changeset.changeset,
                                         CHANGESET_STATUS_ERROR)

        logging.info(
            "updating org changeset ({}, {}) with job status {}".format(
                org_changeset.org_uid, org_changeset.changeset,
                org_changeset.publish_job_status))

        org_changeset.put()

    return '', 204
def create_publish_job_task():
    """
    Creates dataflow publish jobs for each OrgChangeset specified in the request body.  The org/changeset pairs to be
    published are passed in as arguments to the dataflow job, and are determined by looking at OrgChangeset datastore
    kind.

    Returns:
        (str, int): http response
    """
    logging.info("got a request to create a publish job")

    job_params = loads(request.data).get('job_params', {})
    logging.info("job params in request: {}".format(job_params))

    now = datetime.utcnow()
    job_name = 'publish_job_{}'.format(now.isoformat())

    org_changeset_ids = job_params.get('org_changeset_ids', [])
    org_changesets = ndb.get_multi(
        [Key(OrgChangeset, _id) for _id in org_changeset_ids])
    to_publish = ','.join(
        ["{}:{}".format(row.org_uid, row.changeset) for row in org_changesets])
    job_params = {'orgChangesets': to_publish}
    logging.info("job params: {}".format(job_params))

    try:
        job_details = start_template('sync', job_name, job_params)
        job_id = job_details['id']
    except Exception as exc:
        logging.exception("failed to create dataflow job")

        for org_changeset in org_changesets:
            msg = "publishing error status for changeset {}:{} because dataflow job failed to be created"
            logging.info(
                msg.format(org_changeset.org_uid, org_changeset.changeset))
            publish_changeset_status(org_changeset.org_uid,
                                     org_changeset.changeset,
                                     CHANGESET_STATUS_ERROR)

        raise exc

    logging.info("job scheduled with id: {}".format(job_id))

    # mark the changesets as running
    for org_changeset in org_changesets:
        org_changeset.publish_job_running = True
        org_changeset.publish_job_finished = False
        org_changeset.publish_job_failed = False
        org_changeset.publish_changeset_failed = False
        org_changeset.publish_job_id = job_id
        org_changeset.publish_job_status = None
        org_changeset.publish_job_count += 1
        org_changeset.publish_started_at = now

        # publish changeset status of syncing because this changeset could be in error and is being retried
        publish_changeset_status(org_changeset.org_uid,
                                 org_changeset.changeset,
                                 CHANGESET_STATUS_SYNCING)

    ndb.put_multi(org_changesets)

    logging.info("job details saved in OrgChangeset")

    return '', 204
Exemple #7
0
def init_update(org_uid):
    """
    Initialises update cycle for an org.

    This function will initialise a new changeset if no changeset is in progress, or resume the current changeset if
    there is one in progress. There are a few different states that an sync can be in, this function handles each of:
    - no changeset in progress: create a new changeset and create a task on adapter-update queue
    - a changeset in progress exists
      - no task exists on adapter-update queue (after getting auth issues for a while): create the task
      - a task exists on adapter-update queue (user does re-connects the org): no nothing

    A changeset is an increasing integer, identifying a sync cycle for an org (a sync cycle is a 'pull' of all endpoints
    for an org).

    This function is the only function which should be used to start/resume a new sync cycle. It ensures no current
    cycle is running before starting a new one, and does so by using a transaction which spans the database and task
    queue.  Ensuring that only one sync cycle is running at a time is important because there is nothing stopping two
    concurrent sync cycles trying to refresh org credentials at the same time and lose the refresh token (then the file
    can't be synced without user doing the auth flow again). A sync cycle pulls endpoints serially so there is no danger
    of refresh key corruption if only one sync cycle is running.
    """
    org = Org.get_by_id(org_uid)
    changeset = org.changeset

    is_finished = org.changeset_started_at and org.changeset_completed_at
    not_started = not org.changeset_started_at and not org.changeset_completed_at

    if is_finished or not_started:
        next_changeset = changeset + 1
        logging.info(
            "initializing update cycle with changeset {} for org {}".format(
                next_changeset, org_uid))
        org.changeset = next_changeset
        org.changeset_started_at = datetime.utcnow()
        org.changeset_completed_at = None
        org.update_cycle_active = True
        org.put()

        taskqueue.add(queue_name='adapter-update',
                      target='adapter',
                      url='/adapter/{}/{}/update'.format(
                          org.provider, org.key.string_id()),
                      transactional=True)

        publish_changeset_status(org_uid, org.changeset,
                                 CHANGESET_STATUS_SYNCING)

    else:
        logging.info(
            "update cycle in progress for org {} with changeset {}".format(
                org_uid, org.changeset))

        if org.update_cycle_active:
            logging.info(
                "update cycle is active (update task exists), not adding a new one"
            )
        else:
            logging.info(
                "update cycle is not active (no update task exists), adding a new one"
            )
            taskqueue.add(queue_name='adapter-update',
                          target='adapter',
                          url='/adapter/{}/{}/update'.format(
                              org.provider, org.key.string_id()),
                          transactional=True)
            org.update_cycle_active = True
            org.put()

            publish_changeset_status(org_uid, org.changeset,
                                     CHANGESET_STATUS_SYNCING)