def test_cvterm_not_allowed(self): cvterm = get_cvterm(session, 'property type', 'GO_internal_notes') assert cvterm.cvterm_id != 0 allowed = check_cvterm_is_allowed( session, cvterm, ['FBcv:environmental_qualifier', 'FBcv:phenotypic_class']) assert allowed == False
def test_cvterm_allowed(self): cvterm = get_cvterm(session, 'FlyBase miscellaneous CV', 'pheno1') assert cvterm.cvterm_id != 0 allowed = check_cvterm_is_allowed( session, cvterm, ['FBcv:environmental_qualifier', 'FBcv:phenotypic_class']) assert allowed == True
def test_cvterm_lookup(self): cvterm = get_cvterm(session, 'FlyBase miscellaneous CV', 'pheno1') assert cvterm.cvterm_id != 0 found = check_cvterm_has_prop(session, cvterm, 'bad_prop') assert found == False found = check_cvterm_has_prop(session, cvterm, 'phenotypic_class') assert found == True
def test_create_and_lookup(self): """Create and then lookup.""" name = "TP{1}Tao[1]" feat_type = get_cvterm(session, 'SO', 'transposable_element_insertion_site') organism, plain_name, sgml = synonym_name_details(session, name) new_feat, _ = get_or_create(session, Feature, name=plain_name, type_id=feat_type.cvterm_id, uniquename="FBti:temp_1", organism_id=organism.organism_id) feature = feature_name_lookup( session, name, type_name='transposable_element_insertion_site') assert feature.name == 'TP{1}Tao[1]' syn_type = get_cvterm(session, 'synonym type', 'symbol') synonym, _ = get_or_create(session, Synonym, type_id=syn_type.cvterm_id, name=plain_name, synonym_sgml=sgml) fs, _ = get_or_create(session, FeatureSynonym, feature_id=new_feat.feature_id, synonym_id=synonym.synonym_id, pub_id=1) fs.is_current = True fs.is_internal = False feature = feature_symbol_lookup(session, 'transposable_element_insertion_site', name, convert=True) assert feature.name == 'TP{1}Tao[1]'
def feature_type_lookup(session: Session, type_name: str): """Lookup feature type cvterm.""" if type_name in feature_type_cache: return feature_type_cache[type_name] feature_type = None for cv_type_name in ['SO', 'FlyBase miscellaneous CV']: if not feature_type: try: feature_type = get_cvterm(session, cv_type_name, type_name) except CodingError: pass if not feature_type: raise DataError( "DataError: Could not find cvterm for feature type {}".format( type_name)) feature_type_cache[type_name] = feature_type return feature_type
def get_sql_query(): """Get sql query results.""" cvname = 'SO' if feat_type in alt_feat_type: cvname = alt_feat_type[feat_type] cvterm = get_cvterm(session, cvname, feat_type) if not args.regex: feat_sql = "SELECT name FROM feature where type_id = {}".format(cvterm.cvterm_id) else: feat_sql = "SELECT name FROM feature where type_id = {} AND name like '%{}%'".format(cvterm.cvterm_id, args.regex) if args.obsolete: feat_sql += " AND is_obsolete = True" else: feat_sql += " AND is_obsolete = False" try: results = session.execute(feat_sql) except Exception: return None return results
def feature_symbol_lookup(session: Session, type_name: str, synonym_name: str, organism_id: Optional[int] = None, cv_name: str = 'synonym type', cvterm_name: str = 'symbol', check_unique: bool = True, obsolete: str = 'f', convert: bool = True) -> Feature: """Lookup feature that has a specific type and synonym name. Args: session (sqlalchemy.orm.session.Session object): db connection to use. type_name (str): <can be None> cvterm name, defining the type of feature. synonym_name (str): symbol to look up. organism_id (int): <optional> chado organism_id. cv_name (str): <optional> cv name defaults too 'synonym type' cvterm_name (str): <optional> cvterm name defaults too 'symbol' check_uniuqe (Bool): <optional> Set to false to fetch more than one feature with that symbol. obsolete ('t', 'f', 'e'): <optional> is feature obsolete t = true f = false (default) e = either not fussed. convert (Bool): <optional> set to True wether to convert chars i.e. '[' to '<up' etc ONLY replace cvterm_name and cv_name if you know what exactly you are doing. symbol lookups are kind of special and initialized here for ease of use. Returns: Feature object or list of feature object if check_unique is passed as False. Raises: NoResultFound: If no feature found matching the synonym. MultipleResultsFound: If more than one feature found matching the synonym. """ # Default to Dros if not organism specified. if not organism_id: organism, plain_name, synonym_sgml = synonym_name_details( session, synonym_name) organism_id = organism.organism_id else: # convert name to sgml format for lookup synonym_sgml = sgml_to_unicode(sub_sup_to_sgml(synonym_name)) if not convert: synonym_sgml = synonym_name # Check cache if type_name in feature_cache and synonym_sgml in feature_cache[type_name]: return feature_cache[type_name][synonym_sgml] synonym_type = get_cvterm(session, cv_name, cvterm_name) check_obs = _check_obsolete(obsolete) filter_spec: Any = (Synonym.type_id == synonym_type.cvterm_id, Synonym.synonym_sgml == synonym_sgml, Feature.organism_id == organism_id, FeatureSynonym.is_current == 't') if check_obs: filter_spec += (Feature.is_obsolete == obsolete, ) if not type_name or type_name == 'gene': filter_spec += (~Feature.uniquename.contains('FBog'), ) if type_name: feature_type = feature_type_lookup(session, type_name) filter_spec += (Feature.type_id == feature_type.cvterm_id, ) if check_unique: feature = session.query(Feature).join(FeatureSynonym).join(Synonym).\ filter(*filter_spec).one() add_to_cache(feature, synonym_sgml) else: feature = session.query(Feature).join(FeatureSynonym).join(Synonym).\ filter(*filter_spec).all() return feature
def feature_synonym_lookup(session: Session, type_name: str, synonym_name: str, organism_id: Optional[int] = None, cv_name: str = 'synonym type', cvterm_name: str = 'symbol', check_unique: bool = False, obsolete: str = 'f'): """Get feature from the synonym. Lookup to see if the synonym has been used before. Even if not current. Check for uniqueness if requested. Args: session (sqlalchemy.orm.session.Session object): db connection to use. type_name (str): cvterm name, defining the type of feature. synonym_name (str): symbol to look up. organism_id (int): <optional> chado organism_id. cv_name (str): <optional> cv name defaults too 'synonym type' cvterm_name (str): <optional> cvterm name defaults too 'symbol' obsolete ('t', 'f', 'e'): <optional> is feature obsolete t = true f = false (default) e = either not fussed. Returns: List of feature objects or Feature depending on check_unique. Raises: DataError: If cvterm for type not found. If feature cannot be found uniquely. """ check_obs = _check_obsolete(obsolete) # Default to Dros if not organism specified. if not organism_id: organism_id = get_default_organism_id(session) # convert name to sgml format for lookup synonym_sgml = sgml_to_unicode(sub_sup_to_sgml(synonym_name)) # check cache if type_name in feature_cache and synonym_sgml in feature_cache[type_name]: return feature_cache[type_name][synonym_sgml] # get feature type expected from type_name feature_type = feature_type_lookup(session, type_name) synonym_type = get_cvterm(session, cv_name, cvterm_name) filter_spec: Any = ( Synonym.type_id == synonym_type.cvterm_id, Synonym.synonym_sgml == synonym_sgml, Feature.organism_id == organism_id, Feature.type_id == feature_type.cvterm_id, ) if check_obs: filter_spec += (Feature.is_obsolete == obsolete, ) try: features = session.query(Feature).join(FeatureSynonym).join(Synonym).\ filter(*filter_spec).all() except NoResultFound: raise DataError( "DataError: Could not find current synonym '{}', sgml = '{}' for type '{}'." .format(synonym_name, synonym_sgml, cvterm_name)) if not check_unique: return features # fs has pub so there may be many of the same symbols with different pubs # check this is the case. uniquecheck = None for feat in features: if uniquecheck and uniquecheck != feat.uniquename: raise DataError( "DataError: Could not find UNIQUE current synonym '{}', sgml = '{}' for type '{}'." .format(synonym_name, synonym_sgml, cvterm_name)) else: uniquecheck = feat.uniquename if uniquecheck: add_to_cache(feat) return feat raise DataError( "DataError: Could not find current unique synonym '{}', sgml = '{}' for type '{}'." .format(synonym_name, synonym_sgml, cvterm_name))
def general_symbol_lookup(session: Session, sql_object_type: GeneralObjects, syn_object_type, type_name: str, synonym_name: str, organism_id: int = None, cv_name: str = 'synonym type', cvterm_name: str = 'symbol', check_unique: bool = True, obsolete: str = 'f', convert: bool = True): """Lookup "other" feature that has a specific type and synonym name. Args: session: db connection to use. sql_object_type (sqlalchemy object type): i.e. Grp, CellLine, Strain type_name (str): <can be None> cvterm name, defining the type of feature. synonym_name (str): symbol to look up. organism_id (int): <optional> chado organism_id. cv_name (str): <optional> cv name defaults too 'synonym type' cvterm_name (str): <optional> cvterm name defaults too 'symbol' check_uniuqe (Bool): <optional> Set to false to fetch more than one feature with that symbol. obsolete ('t', 'f', 'e'): <optional> is feature obsolete t = true f = false (default) e = either not fussed. convert (Bool): <optional> set to True wether to convert chars i.e. '[' to '<up' etc ONLY replace cvterm_name and cv_name if you know what exactly you are doing. symbol lookups are kind of special and initialized here for ease of use. Returns: Feature object or list of feature object if check_unique is passed as False. Raises: NoResultFound: If no feature found matching the synonym. MultipleResultsFound: If more than one feature found matching the synonym. """ # Default to Dros if not organism specified. # if not organism_id: # organism, plain_name, synonym_sgml = synonym_name_details(session, synonym_name) # organism_id = organism.organism_id # else: # # convert name to sgml format for lookup synonym_sgml = sgml_to_unicode(sub_sup_to_sgml(synonym_name)) if not convert: synonym_sgml = synonym_name # Check cache if type_name in general_cache and synonym_sgml in general_cache[type_name]: return general_cache[type_name][synonym_sgml] synonym_type = get_cvterm(session, cv_name, cvterm_name) check_obs = _check_obsolete(obsolete) filter_spec: Any = (Synonym.synonym_sgml == synonym_sgml, ) if type_name: filter_spec += (Synonym.type_id == synonym_type.cvterm_id, ) # Note: type error messages suppressed here as the args should deal with # inconsistences. if organism_id: filter_spec += (sql_object_type.organism_id == organism_id, ) # type: ignore if check_obs: filter_spec += (sql_object_type.is_obsolete == obsolete, ) # type: ignore if type_name: feature_type = general_type_lookup(session, type_name) filter_spec += (sql_object_type.type_id == feature_type.cvterm_id, ) # type: ignore if check_unique: object = session.query(sql_object_type).join(syn_object_type).join(Synonym).\ filter(*filter_spec).one() else: object = session.query(sql_object_type).join(syn_object_type).join(Synonym).\ filter(*filter_spec).all() return object
def test_empty_list(self): cvterm = get_cvterm(session, 'FlyBase miscellaneous CV', 'pheno1') with pytest.raises(CodingError): allowed = check_cvterm_is_allowed(session, cvterm, ['FBcv:madeupcvterm'])
def test_bad_list(self): cvterm = get_cvterm(session, 'FlyBase miscellaneous CV', 'pheno1') with pytest.raises(CodingError): allowed = check_cvterm_is_allowed(session, cvterm, ['badformat'])