Ejemplo n.º 1
0
    def get_provider_class_from_feature_id(cls, feature_id):
        """
        Args:
            feature_id: Feature identifier

        Returns:
            Feature data provider class for the datatype defined in the
            feature identifier.

        Raises:
            FeatureNotFoundException: If the datatype part of the feature
            identifier is unknown.

        """
        feature_type_prefix = cls.get_feature_type_string(feature_id)
        if feature_type_prefix is None:
            logger.error(
                "FeatureProviderFactory.from_feature_id: Unknown type: " +
                str(feature_id))
            raise FeatureNotFoundException(feature_id)

        if feature_type_prefix.lower() not in FEATURE_ID_TO_TYPE_MAP:
            logger.error(
                "FeatureProviderFactory.from_feature_id: invalid feature ID: "
                + str(feature_id))
            raise FeatureNotFoundException(feature_id)

        feature_type = FEATURE_ID_TO_TYPE_MAP[feature_type_prefix.lower()]
        return FeatureDataTypeHelper.get_feature_data_provider_from_data_type(
            feature_type)
Ejemplo n.º 2
0
    def from_user_feature_id(cls, feature_id):
        logging.debug("UserFeatureDef.from_user_feature_id {0}".format(
            str([feature_id])))
        # ID breakdown: project ID:Feature ID
        # Example ID: USER:1:6
        regex = re_compile("^USER:"******"([0-9]+):"
                           # Feature ID
                           "([0-9]+)$")

        feature_fields = regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        project_id, user_feature_id = feature_fields[0]
        bq_id = None
        shared_id = None
        is_numeric = False

        try:
            db = get_sql_connection()
            cursor = db.cursor(MySQLdb.cursors.DictCursor)
            cursor.execute(
                """
                SELECT feature_name, bq_map_id, shared_map_id, is_numeric
                FROM projects_user_feature_definitions
                WHERE id = %s
            """, (user_feature_id, ))
            for row in cursor.fetchall():
                if row['shared_map_id']:
                    shared_id = row['shared_map_id']
                bq_id = row["bq_map_id"]
                is_numeric = row['is_numeric'] == 1

            cursor.close()
            db.close()

        except Exception as e:
            if db: db.close()
            if cursor: cursor.close()
            raise e

        if shared_id is not None:
            return cls.from_feature_id(bq_id, project_id)

        if bq_id is None:
            raise FeatureNotFoundException(feature_id)

        # Else we're querying a very specific feature from a specific project
        bq_table, column_name, symbol = cls.get_table_and_field(bq_id)
        if bq_table is None or column_name is None:
            raise FeatureNotFoundException(feature_id)

        logging.debug("{0} {1} {2}".format(bq_table, column_name, symbol))

        filters = None
        if symbol is not None:
            filters = {'Symbol': symbol}

        return [cls(bq_table, column_name, project_id, is_numeric, filters)]
Ejemplo n.º 3
0
    def from_feature_id(cls, feature_id):
        feature_fields = cls.regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        value_field, chromosome, start, end, internal_table_id = feature_fields[0]

        valid_chr_set = frozenset([str(x) for x in xrange(1, 24)] + ['X', 'Y', 'M'])
        if chromosome not in valid_chr_set:
            raise FeatureNotFoundException(feature_id)

        return cls(value_field, chromosome, start, end, internal_table_id)
Ejemplo n.º 4
0
    def from_feature_id(cls, feature_id, project_id=None):
        """
           This is the method used when the user feature maps to feature in an existing project (i.e. there is a
           shared_id in the projects_user_feature_definitions entry).
           It returns a *LIST* of UserFeatureDefs
         """
        logging.debug("UserFeatureDef.from_feature_id: {0}".format(
            str([feature_id, project_id])))
        if feature_id is None:
            raise FeatureNotFoundException(feature_id)
        # ID breakdown: project ID:Feature ID
        # Example ID: USER:1:6
        regex = re_compile("(?:^v2:)?USER:"******"([0-9]+):"
                           # Feature ID
                           "([0-9]+)$")

        feature_fields = regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        project_id, user_feature_id = feature_fields[0]

        try:
            db = get_sql_connection()
            cursor = db.cursor(MySQLdb.cursors.DictCursor)
            cursor.execute(
                """
                SELECT bq_map_id, project_id, is_numeric
                FROM projects_user_feature_definitions
                WHERE id = %s
            """, (user_feature_id, ))

            results = []
            for row in cursor.fetchall():
                bq_table, column_name, symbol = cls.get_table_and_field(
                    row['bq_map_id'])
                filters = None
                if symbol is not None:
                    filters = {'Symbol': symbol}
                results.append(
                    cls(bq_table, column_name, row['project_id'],
                        row['is_numeric'] == 1, filters))

            cursor.close()
            db.close()

            return results

        except Exception as e:
            if db: db.close()
            if cursor: cursor.close()
            raise e
Ejemplo n.º 5
0
    def from_feature_id(cls, feature_id):
        feature_fields = cls.regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)

        gene_label, protein_name, internal_table_id = feature_fields[0]
        return cls(gene_label, protein_name, internal_table_id)
Ejemplo n.º 6
0
    def from_feature_id(cls, feature_id):
        feature_fields = cls.regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)

        gene_label, genomic_build = feature_fields[0]
        return cls(gene_label, genomic_build)
Ejemplo n.º 7
0
    def from_feature_id(cls, feature_id):
        feature_fields = cls.regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        column_name = feature_fields[0]

        # Check if the column exists in any configured tables.
        # If matching tables are found, then check that the value type
        # of the column in the found tables is the same.
        found_tables = []
        for table_config in cls.config_instance.data_table_list:
            schema = TABLE_TO_SCHEMA_MAP[table_config.table_id]

            for field_item in schema:
                if field_item['name'] == column_name:
                    # Capture the type of the field in this table
                    found_tables.append((table_config, field_item['type']))
                    break

        if len(found_tables) == 0:
            raise InvalidClinicalFeatureIDException(
                feature_id, "No tables found for column name")

        data_type_set = set([table[1] for table in found_tables])

        if len(data_type_set) != 1:
            raise InvalidClinicalFeatureIDException(
                feature_id, "Data types of found tables do not match")

        value_type = BigQuerySchemaToValueTypeConverter.get_value_type(
            list(data_type_set)[0])

        return cls(column_name, value_type)
Ejemplo n.º 8
0
    def from_feature_id(cls, feature_id):
        config_instance = METHDataSourceConfig.from_dict(BIGQUERY_CONFIG)

        # Example ID: METH:cg08246323:HumanMethylation450:hg19_chr16
        regex = re_compile("^v2:METH:"
                           # TODO better validation for probe name
                           "([a-zA-Z0-9_.\-]+):"
                           # platform
                           "(HumanMethylation27|HumanMethylation450):"
                           # internal table ID: 'hg19_chr1', 'hg19_chrx', etc
                           "(" + "|".join([
                               table.internal_table_id
                               for table in config_instance.data_table_list
                           ]) + ")$")

        feature_fields = regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        probe, platform, internal_table_id = feature_fields[0]

        return cls(probe, platform, internal_table_id)
Ejemplo n.º 9
0
    def from_user_feature_id(cls, feature_id):
        """
           This returns a *LIST* of one UserFeatureDef, unless it maps to a standard project ID. Then
           the list may have multiple entries.
         """
        logging.debug("UserFeatureDef.from_user_feature_id {0}".format(
            str([feature_id])))
        # ID breakdown: project ID:Feature ID
        # Example ID: USER:1:6
        regex = re_compile("(?:^v2:)?USER:"******"([0-9]+):"
                           # Feature ID
                           "([0-9]+)$")

        feature_fields = regex.findall(feature_id)
        if len(feature_fields) == 0:
            raise FeatureNotFoundException(feature_id)
        project_id, user_feature_id = feature_fields[0]
        bq_id = None
        shared_id = None
        is_numeric = False

        try:
            db = get_sql_connection()
            cursor = db.cursor(MySQLdb.cursors.DictCursor)
            cursor.execute(
                """
                SELECT feature_name, bq_map_id, shared_map_id, is_numeric
                FROM projects_user_feature_definitions
                WHERE id = %s
            """, (user_feature_id, ))
            for row in cursor.fetchall():
                if row['shared_map_id']:
                    shared_id = row['shared_map_id']
                bq_id = row["bq_map_id"]
                is_numeric = row['is_numeric'] == 1

            cursor.close()
            db.close()

        except Exception as e:
            if db: db.close()
            if cursor: cursor.close()
            raise e

        # The feature we want is "shared", i.e. defined in a project we are extended from. So we delegate the
        # task to the from_feature_id() function:
        if shared_id is not None:
            return cls.from_feature_id(bq_id, project_id)

        if bq_id is None:
            raise FeatureNotFoundException(feature_id)

        # Else we're querying a very specific feature from a specific project
        bq_table, column_name, symbol = cls.get_table_and_field(bq_id)
        if bq_table is None or column_name is None:
            raise FeatureNotFoundException(feature_id)

        logging.debug("{0} {1} {2}".format(bq_table, column_name, symbol))

        filters = None
        if symbol is not None:
            filters = {'Symbol': symbol}

        return [cls(bq_table, column_name, project_id, is_numeric, filters)]