def add_uniprot_data(self, enzyme_ids, source): '''Gets Uniprot data.''' fields = ['entry name', 'protein names', 'organism-id', 'ec'] enzyme_ids = [enzyme_id for enzyme_id in enzyme_ids if enzyme_id not in self.__nodes] uniprot_values = seq_utils.get_uniprot_values(enzyme_ids, fields, batch_size=512, verbose=True) for uniprot_id, uniprot_value in uniprot_values.iteritems(): enzyme_node = {':LABEL': 'Enzyme', 'uniprot:ID(Enzyme)': uniprot_id} self.__nodes[uniprot_id] = enzyme_node organism_id = uniprot_value.pop('Organism ID') if 'Entry name' in uniprot_value: enzyme_node['entry'] = uniprot_value['Entry name'] if 'Protein names' in uniprot_value: enzyme_node['names'] = uniprot_value['Protein names'] if len(enzyme_node['names']) > 0: enzyme_node['name'] = enzyme_node['names'][0] if 'EC number' in uniprot_value: enzyme_node['ec-code'] = uniprot_value['EC number'] if organism_id is not None and len(organism_id) > 0: self.__org_enz_rels.append([organism_id, 'expresses', uniprot_id, {'source': source}])
def test_get_uniprot_values(self): """Tests get_uniprot_values method.""" result = seq_utils.get_uniprot_values(["P19367", "P42212"], ["organism-id", "protein names"], 1) expected = { "P19367": { "Organism ID": "9606", "Protein names": ["Hexokinase-1", "EC 2.7.1.1", "Brain form hexokinase", "Hexokinase type I", "HK I"], }, "P42212": {"Organism ID": "6100", "Protein names": ["Green fluorescent protein"]}, } self.assertEquals(result, expected)
def test_get_uniprot_values(self): '''Tests get_uniprot_values method.''' result = seq_utils.get_uniprot_values(['P19367', 'P42212'], ['organism-id', 'protein names'], 1) expected = {'P19367': {'Entry': 'P19367', 'Organism ID': '9606', 'Protein names': ['Hexokinase-1', 'EC 2.7.1.1', 'Brain form hexokinase', 'Hexokinase type I', 'HK I', 'Hexokinase-A']}, 'P42212': {'Entry': 'P42212', 'Organism ID': '6100', 'Protein names': ['Green fluorescent protein']}} self.assertEqual(result, expected)
def _get_metadata(prot_id, tir, cai, target_org=None, uniprot_id=None): '''Gets metadata.''' name = prot_id description = prot_id links = [] parameters = [] if uniprot_id is not None: uniprot_vals = seq_utils.get_uniprot_values([uniprot_id], ['entry name', 'protein names', 'organism-id', 'organism', 'ec']) # Add metadata: if len(uniprot_vals.keys()): prot_id = uniprot_vals.keys()[0] name = uniprot_vals[prot_id]['Entry name'] organism = uniprot_vals[prot_id]['Organism'] prot_names = uniprot_vals[prot_id]['Protein names'] description = ', '.join(prot_names) + ' (' + organism + ')' ec_number = uniprot_vals[prot_id].get('EC number', None) parameters.append({'name': 'Organism', 'value': organism}) links.append('http://identifiers.org/uniprot/' + uniprot_id) if ec_number: links.append('http://identifiers.org/ec-code/' + ec_number) parameters.append({'name': 'Type', 'value': 'PART'}) parameters.append({'name': 'TIR', 'value': float("{0:.2f}".format(tir))}) parameters.append({'name': 'CAI', 'value': float("{0:.2f}".format(cai))}) if target_org: links.append('http://identifiers.org/taxonomy/' + target_org) metadata = {'name': name, 'shortDescription': description, 'links': links, 'parameters': parameters} return metadata
def get_fasta(uniprot_id, filename, variants, digest=None): '''Gets a FASTA file representing variants of uniprot id.''' uniprot = seq_utils.get_uniprot_values([uniprot_id], ['sequence']) get_fasta_from_seq(uniprot[uniprot_id]['Sequence'], filename, variants, digest, uniprot_id)
def _get_feature(comp_def): '''Get feature.''' if dna_utils.SO_ASS_COMP in comp_def.roles: # Assembly component: return { 'typ': dna_utils.SO_ASS_COMP, 'name': comp_def.identity, 'seq': '', 'parameters': { 'Tm target': 70 }, 'temp_params': { 'fixed': True, 'required': ['name', 'tm'], 'valid': True, 'id': comp_def.displayId } } if SO_RBS in comp_def.roles: # RBS: return { 'typ': dna_utils.SO_RBS, 'name': comp_def.identity, 'end': 60, 'parameters': { 'TIR target': float(comp_def.displayId.split('_')[1]) }, 'temp_params': { 'fixed': False, 'required': ['name', 'tir'], 'min_end': 35, 'max_end': 10000, 'valid': True, 'id': comp_def.displayId } } if SO_CDS in comp_def.roles: # CDS: uniprot_id = comp_def.displayId.split('_')[0] uniprot_vals = get_uniprot_values([uniprot_id], ['sequence']) if uniprot_id not in uniprot_vals: raise ValueError('Uniprot id not found: %s' % uniprot_id) return { 'typ': dna_utils.SO_CDS, 'name': comp_def.identity, 'temp_params': { 'fixed': False, 'required': ['name', 'prot'], 'valid': True, 'id': comp_def.displayId, 'aa_seq': uniprot_vals[uniprot_id]['Sequence'], 'orig_seq': uniprot_vals[uniprot_id]['Sequence'] }, 'desc': '', 'links': ['http://identifiers.org/uniprot/%s' % uniprot_id] } raise ValueError('Invalid roles in component definition: %s' % comp_def)