def map_to_up_primary(ids): primary_ids = [] for up_id in ids: if not uniprot_client.is_secondary(up_id): primary_ids.append(up_id) continue primary_id = uniprot_client.get_primary_id(up_id) primary_ids.append(primary_id) # If there are no primary IDs, we return None if not primary_ids: return None # Try to get primary IDs if there are # If there is more than one primary ID then we return the first one elif len(primary_ids) > 1: human_upids = [ id for id in primary_ids if uniprot_client.is_human(id) ] if not human_upids: logger.info('More than one primary id but none human, ' 'choosing the first: %s' % ','.join(primary_ids)) primary_id = primary_ids[0] elif len(human_upids) > 1: logger.info('More than one human primary id, choosing ' 'the first: %s' % ','.join(human_upids)) primary_id = human_upids[0] # Only one, so use it else: primary_id = human_upids[0] # One primary ID, so use it else: primary_id = primary_ids[0] # Make sure it's unicode return str(primary_id)
def map_to_up_primary(ids): primary_ids = [] for up_id in ids: if not uniprot_client.is_secondary(up_id): primary_ids.append(up_id) continue primary_id = uniprot_client.get_primary_id(up_id) primary_ids.append(primary_id) # If there are no primary IDs, we return None if not primary_ids: return None # Try to get primary IDs if there are # If there is more than one primary ID then we return the first one if len(primary_ids) > 1: logger.info('More than one primary id: %s' % ','.join(primary_ids)) # Make sure it's unicode primary_id = str(primary_ids[0]) return primary_id
def sanitize_up_ids(up_ids): # First, we map any secondary IDs to primary IDs up_ids = {uniprot_client.get_primary_id(up_id) for up_id in up_ids} # We filter out IDs that are actually mnemonics, these are just mixed # in without any differentiation from other IDs up_ids = {up_id for up_id in up_ids if '_' not in up_id} # TODO: should we do anything about isoforms? # We separate out specific sets of IDs human_ids = [up_id for up_id in up_ids if uniprot_client.is_human(up_id)] reviewed_non_human_ids = [ up_id for up_id in up_ids if not uniprot_client.is_human(up_id) # get_mnemonic is just a quick way to see if we have this entry and uniprot_client.get_mnemonic(up_id, web_fallback=False) ] if human_ids: return human_ids elif reviewed_non_human_ids: return reviewed_non_human_ids else: return []
def test_get_primary_id_secondary_nohuman(): assert uniprot_client.get_primary_id('P31848') in \ ['P0A5M5', 'P9WIU6', 'P9WIU7']
def test_get_primary_id_secondary_hashuman(): assert uniprot_client.get_primary_id('Q96J62') == 'P61978'
def test_get_primary_id_primary(): assert uniprot_client.get_primary_id('Q02750') == 'Q02750'
def test_get_primary_id_secondary_nohuman(): assert uniprot_client.get_primary_id('P31848') in \ ['P0A5M5', 'P9WIU6', 'P9WIU7']
def test_get_primary_id_secondary_hashuman(): assert uniprot_client.get_primary_id('Q96J62') == 'P61978'
def test_get_primary_id_primary(): assert uniprot_client.get_primary_id('Q02750') == 'Q02750'