def get_provider_class_from_feature_id(cls, feature_id): """ Args: feature_id: Feature identifier Returns: Feature data provider class for the datatype defined in the feature identifier. Raises: FeatureNotFoundException: If the datatype part of the feature identifier is unknown. """ feature_type_prefix = cls.get_feature_type_string(feature_id) if feature_type_prefix is None: logger.error( "FeatureProviderFactory.from_feature_id: Unknown type: " + str(feature_id)) raise FeatureNotFoundException(feature_id) if feature_type_prefix.lower() not in FEATURE_ID_TO_TYPE_MAP: logger.error( "FeatureProviderFactory.from_feature_id: invalid feature ID: " + str(feature_id)) raise FeatureNotFoundException(feature_id) feature_type = FEATURE_ID_TO_TYPE_MAP[feature_type_prefix.lower()] return FeatureDataTypeHelper.get_feature_data_provider_from_data_type( feature_type)
def from_user_feature_id(cls, feature_id): logging.debug("UserFeatureDef.from_user_feature_id {0}".format( str([feature_id]))) # ID breakdown: project ID:Feature ID # Example ID: USER:1:6 regex = re_compile("^USER:"******"([0-9]+):" # Feature ID "([0-9]+)$") feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) project_id, user_feature_id = feature_fields[0] bq_id = None shared_id = None is_numeric = False try: db = get_sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT feature_name, bq_map_id, shared_map_id, is_numeric FROM projects_user_feature_definitions WHERE id = %s """, (user_feature_id, )) for row in cursor.fetchall(): if row['shared_map_id']: shared_id = row['shared_map_id'] bq_id = row["bq_map_id"] is_numeric = row['is_numeric'] == 1 cursor.close() db.close() except Exception as e: if db: db.close() if cursor: cursor.close() raise e if shared_id is not None: return cls.from_feature_id(bq_id, project_id) if bq_id is None: raise FeatureNotFoundException(feature_id) # Else we're querying a very specific feature from a specific project bq_table, column_name, symbol = cls.get_table_and_field(bq_id) if bq_table is None or column_name is None: raise FeatureNotFoundException(feature_id) logging.debug("{0} {1} {2}".format(bq_table, column_name, symbol)) filters = None if symbol is not None: filters = {'Symbol': symbol} return [cls(bq_table, column_name, project_id, is_numeric, filters)]
def from_feature_id(cls, feature_id): feature_fields = cls.regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) value_field, chromosome, start, end, internal_table_id = feature_fields[0] valid_chr_set = frozenset([str(x) for x in xrange(1, 24)] + ['X', 'Y', 'M']) if chromosome not in valid_chr_set: raise FeatureNotFoundException(feature_id) return cls(value_field, chromosome, start, end, internal_table_id)
def from_feature_id(cls, feature_id, project_id=None): """ This is the method used when the user feature maps to feature in an existing project (i.e. there is a shared_id in the projects_user_feature_definitions entry). It returns a *LIST* of UserFeatureDefs """ logging.debug("UserFeatureDef.from_feature_id: {0}".format( str([feature_id, project_id]))) if feature_id is None: raise FeatureNotFoundException(feature_id) # ID breakdown: project ID:Feature ID # Example ID: USER:1:6 regex = re_compile("(?:^v2:)?USER:"******"([0-9]+):" # Feature ID "([0-9]+)$") feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) project_id, user_feature_id = feature_fields[0] try: db = get_sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT bq_map_id, project_id, is_numeric FROM projects_user_feature_definitions WHERE id = %s """, (user_feature_id, )) results = [] for row in cursor.fetchall(): bq_table, column_name, symbol = cls.get_table_and_field( row['bq_map_id']) filters = None if symbol is not None: filters = {'Symbol': symbol} results.append( cls(bq_table, column_name, row['project_id'], row['is_numeric'] == 1, filters)) cursor.close() db.close() return results except Exception as e: if db: db.close() if cursor: cursor.close() raise e
def from_feature_id(cls, feature_id): feature_fields = cls.regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) gene_label, protein_name, internal_table_id = feature_fields[0] return cls(gene_label, protein_name, internal_table_id)
def from_feature_id(cls, feature_id): feature_fields = cls.regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) gene_label, genomic_build = feature_fields[0] return cls(gene_label, genomic_build)
def from_feature_id(cls, feature_id): feature_fields = cls.regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) column_name = feature_fields[0] # Check if the column exists in any configured tables. # If matching tables are found, then check that the value type # of the column in the found tables is the same. found_tables = [] for table_config in cls.config_instance.data_table_list: schema = TABLE_TO_SCHEMA_MAP[table_config.table_id] for field_item in schema: if field_item['name'] == column_name: # Capture the type of the field in this table found_tables.append((table_config, field_item['type'])) break if len(found_tables) == 0: raise InvalidClinicalFeatureIDException( feature_id, "No tables found for column name") data_type_set = set([table[1] for table in found_tables]) if len(data_type_set) != 1: raise InvalidClinicalFeatureIDException( feature_id, "Data types of found tables do not match") value_type = BigQuerySchemaToValueTypeConverter.get_value_type( list(data_type_set)[0]) return cls(column_name, value_type)
def from_feature_id(cls, feature_id): config_instance = METHDataSourceConfig.from_dict(BIGQUERY_CONFIG) # Example ID: METH:cg08246323:HumanMethylation450:hg19_chr16 regex = re_compile("^v2:METH:" # TODO better validation for probe name "([a-zA-Z0-9_.\-]+):" # platform "(HumanMethylation27|HumanMethylation450):" # internal table ID: 'hg19_chr1', 'hg19_chrx', etc "(" + "|".join([ table.internal_table_id for table in config_instance.data_table_list ]) + ")$") feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) probe, platform, internal_table_id = feature_fields[0] return cls(probe, platform, internal_table_id)
def from_user_feature_id(cls, feature_id): """ This returns a *LIST* of one UserFeatureDef, unless it maps to a standard project ID. Then the list may have multiple entries. """ logging.debug("UserFeatureDef.from_user_feature_id {0}".format( str([feature_id]))) # ID breakdown: project ID:Feature ID # Example ID: USER:1:6 regex = re_compile("(?:^v2:)?USER:"******"([0-9]+):" # Feature ID "([0-9]+)$") feature_fields = regex.findall(feature_id) if len(feature_fields) == 0: raise FeatureNotFoundException(feature_id) project_id, user_feature_id = feature_fields[0] bq_id = None shared_id = None is_numeric = False try: db = get_sql_connection() cursor = db.cursor(MySQLdb.cursors.DictCursor) cursor.execute( """ SELECT feature_name, bq_map_id, shared_map_id, is_numeric FROM projects_user_feature_definitions WHERE id = %s """, (user_feature_id, )) for row in cursor.fetchall(): if row['shared_map_id']: shared_id = row['shared_map_id'] bq_id = row["bq_map_id"] is_numeric = row['is_numeric'] == 1 cursor.close() db.close() except Exception as e: if db: db.close() if cursor: cursor.close() raise e # The feature we want is "shared", i.e. defined in a project we are extended from. So we delegate the # task to the from_feature_id() function: if shared_id is not None: return cls.from_feature_id(bq_id, project_id) if bq_id is None: raise FeatureNotFoundException(feature_id) # Else we're querying a very specific feature from a specific project bq_table, column_name, symbol = cls.get_table_and_field(bq_id) if bq_table is None or column_name is None: raise FeatureNotFoundException(feature_id) logging.debug("{0} {1} {2}".format(bq_table, column_name, symbol)) filters = None if symbol is not None: filters = {'Symbol': symbol} return [cls(bq_table, column_name, project_id, is_numeric, filters)]