コード例 #1
0
ファイル: base.py プロジェクト: jonasfj/treeherder-service
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __unicode__(self):
        """Unicode representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == "objectstore") and source.oauth_consumer_key and source.oauth_consumer_secret:

                credentials[source.project] = {
                    "consumer_key": source.oauth_consumer_key,
                    "consumer_secret": source.oauth_consumer_secret,
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if not contenttype in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if not contenttype in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_inserted_row_ids(self, dhub):

        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection["master_host"]["cursor"].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection["master_host"]["cursor"].lastrowid
            ids.extend(list(range(last_id - (row_count - 1), last_id + 1)))

        return ids

    def submit_publish_to_pulse_tasks(self, ids, data_type):

        from treeherder.model.tasks import publish_to_pulse

        publish_to_pulse.apply_async(args=[self.project, ids, data_type])

    def get_row_by_id(self, contenttype, table_name, obj_id):
        """
        Given an ``id`` get the row for that item.
        Return none if not found
        """
        proc = "generic.selects.get_row_by_id"
        iter_obj = self.get_dhub(contenttype).execute(
            proc=proc, replace=[table_name], placeholders=[obj_id], debug_show=self.DEBUG, return_type="iter"
        )
        return self.as_single(iter_obj, table_name, id=obj_id)

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        candidate_sources = []
        for source in Datasource.objects.cached():
            if source.project == self.project and source.contenttype == contenttype:
                candidate_sources.append(source)

        if not candidate_sources:
            raise DatasetNotFoundError(self.project, contenttype)

        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)

        return candidate_sources[0]
コード例 #2
0
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == 'objectstore') and \
               source.oauth_consumer_key and \
               source.oauth_consumer_secret:

                credentials[source.project] = {
                    'consumer_key': source.oauth_consumer_key,
                    'consumer_secret': source.oauth_consumer_secret
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if contenttype not in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if contenttype not in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_inserted_row_ids(self, dhub):
        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection['master_host']['cursor'].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection['master_host']['cursor'].lastrowid
            ids.extend(list(range(last_id - (row_count - 1), last_id + 1)))

        return ids

    def _process_conditions(self, conditions, allowed_fields=None):
        """Transform a list of conditions into a list of placeholders and
        replacement strings to feed a datahub.execute statement."""
        placeholders = []
        replace_str = ""
        if conditions:
            for column, condition in conditions.items():
                if allowed_fields is None or column in allowed_fields:
                    if column in allowed_fields:
                        # we need to get the db column string from the passed
                        # in querystring column.  It could be the same, but
                        # often it will have a table prefix for the column.
                        # This allows us to have where clauses on joined fields
                        # of the query.
                        column = allowed_fields[column]
                    for operator, value in condition:
                        replace_str += "AND {0} {1}".format(column, operator)
                        if operator == "IN":
                            # create a list of placeholders of the same length
                            # as the list of values
                            replace_str += "({0})".format(",".join(["%s"] *
                                                                   len(value)))
                            placeholders += value
                        else:
                            replace_str += " %s "
                            placeholders.append(value)

        return replace_str, placeholders

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        try:
            return next(source for source in Datasource.objects.cached()
                        if source.project == self.project
                        and source.contenttype == contenttype)
        except StopIteration:
            raise DatasetNotFoundError(self.project, contenttype)
コード例 #3
0
ファイル: base.py プロジェクト: EricRahm/treeherder
class TreeherderModelBase(object):

    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.source = None
        self.dhub = None
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    def get_dhub(self, procs_file_name=None):
        """
        The configured datahub

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "jobs.json"

        if not self.dhub:
            datasource = self.get_datasource()

            self.dhub = datasource.dhub(procs_file_name)
        return self.dhub

    def get_datasource(self):
        """The datasource of the project."""

        if not self.source:
            self.source = self._get_datasource()

        return self.source

    def get_inserted_row_ids(self, dhub):
        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection['master_host']['cursor'].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection['master_host']['cursor'].lastrowid
            ids.extend(
                list(range(last_id - (row_count - 1), last_id + 1))
            )

        return ids

    def _process_conditions(self, conditions, allowed_fields=None):
        """Transform a list of conditions into a list of placeholders and
        replacement strings to feed a datahub.execute statement."""
        placeholders = []
        replace_str = ""
        if conditions:
            for column, condition in conditions.items():
                if allowed_fields is None or column in allowed_fields:
                    if column in allowed_fields:
                        # we need to get the db column string from the passed
                        # in querystring column.  It could be the same, but
                        # often it will have a table prefix for the column.
                        # This allows us to have where clauses on joined fields
                        # of the query.
                        column = allowed_fields[column]
                    for operator, value in condition:
                        replace_str += "AND {0} {1}".format(column, operator)
                        if operator == "IN":
                            # create a list of placeholders of the same length
                            # as the list of values
                            replace_str += "({0})".format(
                                ",".join(["%s"] * len(value))
                            )
                            placeholders += value
                        else:
                            replace_str += " %s "
                            placeholders.append(value)

        return replace_str, placeholders

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        if self.dhub:
            self.dhub.disconnect()

    def _get_datasource(self):
        """Find the datasource in the cache."""
        try:
            return next(source for source in Datasource.objects.cached()
                        if source.project == self.project)
        except StopIteration:
            raise DatasetNotFoundError(self.project)
コード例 #4
0
ファイル: base.py プロジェクト: uberj/treeherder-service
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __unicode__(self):
        """Unicode representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == "objectstore") and source.oauth_consumer_key and source.oauth_consumer_secret:

                credentials[source.project] = {
                    "consumer_key": source.oauth_consumer_key,
                    "consumer_secret": source.oauth_consumer_secret,
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if not contenttype in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if not contenttype in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_row_by_id(self, contenttype, table_name, obj_id):
        """
        Given an ``id`` get the row for that item.
        Return none if not found
        """
        proc = "generic.selects.get_row_by_id"
        iter_obj = self.get_dhub(contenttype).execute(
            proc=proc, replace=[table_name], placeholders=[obj_id], debug_show=self.DEBUG, return_type="iter"
        )
        return self.as_single(iter_obj, table_name, id=obj_id)

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        candidate_sources = []
        for source in Datasource.objects.cached():
            if source.project == self.project and source.contenttype == contenttype:
                candidate_sources.append(source)

        if not candidate_sources:
            raise DatasetNotFoundError(self.project, contenttype)

        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)

        return candidate_sources[0]
コード例 #5
0
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """
    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __unicode__(self):
        """Unicode representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == 'objectstore') and \
               source.oauth_consumer_key and \
               source.oauth_consumer_secret:

                credentials[source.project] = {
                    'consumer_key': source.oauth_consumer_key,
                    'consumer_secret': source.oauth_consumer_secret
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if not contenttype in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if not contenttype in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_inserted_row_ids(self, dhub):
        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection['master_host']['cursor'].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection['master_host']['cursor'].lastrowid
            ids.extend(list(range(last_id - (row_count - 1), last_id + 1)))

        return ids

    def submit_publish_to_pulse_tasks(self, ids, data_type):

        from treeherder.model.tasks import publish_to_pulse
        publish_to_pulse.apply_async(args=[self.project, ids, data_type])

    def get_row_by_id(self, contenttype, table_name, obj_id):
        """
        Given an ``id`` get the row for that item.
        Return none if not found
        """
        proc = "generic.selects.get_row_by_id"
        iter_obj = self.get_dhub(contenttype).execute(
            proc=proc,
            replace=[table_name],
            placeholders=[obj_id],
            debug_show=self.DEBUG,
            return_type='iter',
        )
        return self.as_single(iter_obj, table_name, id=obj_id)

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        candidate_sources = []
        for source in Datasource.objects.cached():
            if (source.project == self.project
                    and source.contenttype == contenttype):
                candidate_sources.append(source)

        if not candidate_sources:
            raise DatasetNotFoundError(self.project, contenttype)

        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)

        return candidate_sources[0]
コード例 #6
0
ファイル: base.py プロジェクト: Archaeopteryx/treeherder
class TreeherderModelBase(object):

    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.source = None
        self.dhub = None
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    def get_dhub(self, procs_file_name=None):
        """
        The configured datahub

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "jobs.json"

        if not self.dhub:
            datasource = self.get_datasource()

            self.dhub = datasource.dhub(procs_file_name)
        return self.dhub

    def get_datasource(self):
        """The datasource of the project."""

        if not self.source:
            self.source = self._get_datasource()

        return self.source

    def _process_conditions(self, conditions, allowed_fields=None):
        """Transform a list of conditions into a list of placeholders and
        replacement strings to feed a datahub.execute statement."""
        placeholders = []
        replace_str = ""
        if conditions:
            for column, condition in conditions.items():
                if allowed_fields is None or column in allowed_fields:
                    if allowed_fields and column in allowed_fields:
                        # we need to get the db column string from the passed
                        # in querystring column.  It could be the same, but
                        # often it will have a table prefix for the column.
                        # This allows us to have where clauses on joined fields
                        # of the query.
                        column = allowed_fields[column]
                    for operator, value in condition:
                        replace_str += "AND {0} {1}".format(column, operator)
                        if operator == "IN":
                            # create a list of placeholders of the same length
                            # as the list of values
                            replace_str += "({0})".format(
                                ",".join(["%s"] * len(value))
                            )
                            placeholders += value
                        else:
                            replace_str += " %s "
                            placeholders.append(value)

        return replace_str, placeholders

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        if self.dhub:
            self.dhub.disconnect()

    def _get_datasource(self):
        """Find the datasource in the cache."""
        try:
            return next(source for source in Datasource.objects.cached()
                        if source.project == self.project)
        except StopIteration:
            raise DatasetNotFoundError(self.project)
コード例 #7
0
ファイル: base.py プロジェクト: vidyarani-dg/treeherder
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.source = None
        self.dhub = None
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    def get_dhub(self, procs_file_name=None):
        """
        The configured datahub

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "jobs.json"

        if not self.dhub:
            datasource = self.get_datasource()

            self.dhub = datasource.dhub(procs_file_name)
        return self.dhub

    def get_datasource(self):
        """The datasource of the project."""

        if not self.source:
            self.source = self._get_datasource()

        return self.source

    def _process_conditions(self, conditions, allowed_fields=None):
        """Transform a list of conditions into a list of placeholders and
        replacement strings to feed a datahub.execute statement."""
        placeholders = []
        replace_str = ""
        if conditions:
            for column, condition in conditions.items():
                if allowed_fields is None or column in allowed_fields:
                    if allowed_fields and column in allowed_fields:
                        # we need to get the db column string from the passed
                        # in querystring column.  It could be the same, but
                        # often it will have a table prefix for the column.
                        # This allows us to have where clauses on joined fields
                        # of the query.
                        column = allowed_fields[column]
                    for operator, value in condition:
                        replace_str += "AND {0} {1}".format(column, operator)
                        if operator == "IN":
                            # create a list of placeholders of the same length
                            # as the list of values
                            replace_str += "({0})".format(",".join(["%s"] *
                                                                   len(value)))
                            placeholders += value
                        else:
                            replace_str += " %s "
                            placeholders.append(value)

        return replace_str, placeholders

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        if self.dhub:
            self.dhub.disconnect()

    def _get_datasource(self):
        """Find the datasource in the cache."""
        try:
            return next(source for source in Datasource.objects.cached()
                        if source.project == self.project)
        except StopIteration:
            raise DatasetNotFoundError(self.project)
コード例 #8
0
ファイル: base.py プロジェクト: klibby/treeherder-service
class TreeherderModelBase(object):

    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == 'objectstore') and \
               source.oauth_consumer_key and \
               source.oauth_consumer_secret:

                credentials[source.project] = {
                    'consumer_key': source.oauth_consumer_key,
                    'consumer_secret': source.oauth_consumer_secret
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if contenttype not in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if contenttype not in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_inserted_row_ids(self, dhub):
        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection['master_host']['cursor'].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection['master_host']['cursor'].lastrowid
            ids.extend(
                list(range(last_id - (row_count - 1), last_id + 1))
            )

        return ids

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        candidate_sources = []
        for source in Datasource.objects.cached():
            if (source.project == self.project and
                    source.contenttype == contenttype):
                candidate_sources.append(source)

        if not candidate_sources:
            raise DatasetNotFoundError(self.project, contenttype)

        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)

        return candidate_sources[0]
コード例 #9
0
class TreeherderModelBase(object):
    """
    Base model class for all derived models

    """
    logger = logging.getLogger(__name__)

    def __init__(self, project):
        """Encapsulate the dataset access for this ``project`` """

        self.project = project
        self.sources = {}
        self.dhubs = {}
        self.DEBUG = settings.DEBUG
        self.refdata_model = RefDataManager()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.disconnect()

    def __str__(self):
        """String representation is project name."""
        return self.project

    @classmethod
    def get_oauth_credentials(cls):

        credentials = {}

        for source in Datasource.objects.cached():

            if (source.contenttype == 'objectstore') and \
               source.oauth_consumer_key and \
               source.oauth_consumer_secret:

                credentials[source.project] = {
                    'consumer_key': source.oauth_consumer_key,
                    'consumer_secret': source.oauth_consumer_secret
                }

        return credentials

    def get_dhub(self, contenttype, procs_file_name=None):
        """
        The configured datahub for the given contenttype

        """
        if not procs_file_name:  # pragma: no cover
            procs_file_name = "{0}.json".format(contenttype)

        if contenttype not in self.dhubs.keys():
            datasource = self.get_datasource(contenttype)

            self.dhubs[contenttype] = datasource.dhub(procs_file_name)
        return self.dhubs[contenttype]

    def get_datasource(self, contenttype):
        """The datasource for this contenttype of the project."""

        if contenttype not in self.sources.keys():
            self.sources[contenttype] = self._get_datasource(contenttype)

        return self.sources[contenttype]

    def get_inserted_row_ids(self, dhub):
        """
        InnoDB guarantees sequential numbers for AUTO INCREMENT when doing
        bulk inserts, provided innodb_autoinc_lock_mode is set to 0
        (traditional) or 1 (consecutive).

        Consequently you can get the first ID from LAST_INSERT_ID() and the
        last by adding ROW_COUNT()-1

        ref: http://stackoverflow.com/questions/7333524/how-can-i-insert-many-rows-into-a-mysql-table-and-get-ids-back

        NOTE: The cursor rowcount is always one for a
              INSERT INTO/SELECT FROM DUAL WHERE NOT EXISTS query otherwise
              it's equal to the number of rows inserted or updated.
        """

        row_count = dhub.connection['master_host']['cursor'].rowcount
        ids = []

        if row_count > 0:
            last_id = dhub.connection['master_host']['cursor'].lastrowid
            ids.extend(list(range(last_id - (row_count - 1), last_id + 1)))

        return ids

    def disconnect(self):
        """Iterate over and disconnect all data sources."""
        self.refdata_model.disconnect()
        for dhub in self.dhubs.itervalues():
            dhub.disconnect()

    def _get_datasource(self, contenttype):
        """Find the datasource for this contenttype in the cache."""
        candidate_sources = []
        for source in Datasource.objects.cached():
            if (source.project == self.project
                    and source.contenttype == contenttype):
                candidate_sources.append(source)

        if not candidate_sources:
            raise DatasetNotFoundError(self.project, contenttype)

        candidate_sources.sort(key=lambda s: s.dataset, reverse=True)

        return candidate_sources[0]