Ejemplo n.º 1
0
    def add_uniprot_data(self, enzyme_ids, source):
        '''Gets Uniprot data.'''

        fields = ['entry name', 'protein names', 'organism-id', 'ec']
        enzyme_ids = [enzyme_id for enzyme_id in enzyme_ids
                      if enzyme_id not in self.__nodes]
        uniprot_values = seq_utils.get_uniprot_values(enzyme_ids, fields,
                                                      batch_size=512,
                                                      verbose=True)

        for uniprot_id, uniprot_value in uniprot_values.iteritems():
            enzyme_node = {':LABEL': 'Enzyme',
                           'uniprot:ID(Enzyme)': uniprot_id}
            self.__nodes[uniprot_id] = enzyme_node
            organism_id = uniprot_value.pop('Organism ID')

            if 'Entry name' in uniprot_value:
                enzyme_node['entry'] = uniprot_value['Entry name']

            if 'Protein names' in uniprot_value:
                enzyme_node['names'] = uniprot_value['Protein names']

                if len(enzyme_node['names']) > 0:
                    enzyme_node['name'] = enzyme_node['names'][0]

            if 'EC number' in uniprot_value:
                enzyme_node['ec-code'] = uniprot_value['EC number']

            if organism_id is not None and len(organism_id) > 0:
                self.__org_enz_rels.append([organism_id, 'expresses',
                                            uniprot_id, {'source': source}])
Ejemplo n.º 2
0
    def test_get_uniprot_values(self):
        """Tests get_uniprot_values method."""
        result = seq_utils.get_uniprot_values(["P19367", "P42212"], ["organism-id", "protein names"], 1)

        expected = {
            "P19367": {
                "Organism ID": "9606",
                "Protein names": ["Hexokinase-1", "EC 2.7.1.1", "Brain form hexokinase", "Hexokinase type I", "HK I"],
            },
            "P42212": {"Organism ID": "6100", "Protein names": ["Green fluorescent protein"]},
        }

        self.assertEquals(result, expected)
Ejemplo n.º 3
0
    def test_get_uniprot_values(self):
        '''Tests get_uniprot_values method.'''
        result = seq_utils.get_uniprot_values(['P19367', 'P42212'],
                                              ['organism-id',
                                               'protein names'], 1)

        expected = {'P19367': {'Entry': 'P19367',
                               'Organism ID': '9606',
                               'Protein names': ['Hexokinase-1',
                                                 'EC 2.7.1.1',
                                                 'Brain form hexokinase',
                                                 'Hexokinase type I',
                                                 'HK I',
                                                 'Hexokinase-A']},
                    'P42212': {'Entry': 'P42212',
                               'Organism ID': '6100',
                               'Protein names': ['Green fluorescent protein']}}

        self.assertEqual(result, expected)
Ejemplo n.º 4
0
def _get_metadata(prot_id, tir, cai, target_org=None, uniprot_id=None):
    '''Gets metadata.'''
    name = prot_id
    description = prot_id
    links = []
    parameters = []

    if uniprot_id is not None:
        uniprot_vals = seq_utils.get_uniprot_values([uniprot_id],
                                                    ['entry name',
                                                     'protein names',
                                                     'organism-id',
                                                     'organism',
                                                     'ec'])
        # Add metadata:
        if len(uniprot_vals.keys()):
            prot_id = uniprot_vals.keys()[0]
            name = uniprot_vals[prot_id]['Entry name']
            organism = uniprot_vals[prot_id]['Organism']
            prot_names = uniprot_vals[prot_id]['Protein names']
            description = ', '.join(prot_names) + ' (' + organism + ')'
            ec_number = uniprot_vals[prot_id].get('EC number', None)

            parameters.append({'name': 'Organism', 'value': organism})
            links.append('http://identifiers.org/uniprot/' + uniprot_id)

            if ec_number:
                links.append('http://identifiers.org/ec-code/' + ec_number)

    parameters.append({'name': 'Type', 'value': 'PART'})
    parameters.append({'name': 'TIR', 'value': float("{0:.2f}".format(tir))})
    parameters.append({'name': 'CAI', 'value': float("{0:.2f}".format(cai))})

    if target_org:
        links.append('http://identifiers.org/taxonomy/' + target_org)

    metadata = {'name': name,
                'shortDescription': description,
                'links': links,
                'parameters': parameters}

    return metadata
Ejemplo n.º 5
0
def get_fasta(uniprot_id, filename, variants, digest=None):
    '''Gets a FASTA file representing variants of uniprot id.'''
    uniprot = seq_utils.get_uniprot_values([uniprot_id], ['sequence'])
    get_fasta_from_seq(uniprot[uniprot_id]['Sequence'], filename, variants,
                       digest, uniprot_id)
Ejemplo n.º 6
0
def _get_feature(comp_def):
    '''Get feature.'''
    if dna_utils.SO_ASS_COMP in comp_def.roles:
        # Assembly component:
        return {
            'typ': dna_utils.SO_ASS_COMP,
            'name': comp_def.identity,
            'seq': '',
            'parameters': {
                'Tm target': 70
            },
            'temp_params': {
                'fixed': True,
                'required': ['name', 'tm'],
                'valid': True,
                'id': comp_def.displayId
            }
        }

    if SO_RBS in comp_def.roles:
        # RBS:
        return {
            'typ': dna_utils.SO_RBS,
            'name': comp_def.identity,
            'end': 60,
            'parameters': {
                'TIR target': float(comp_def.displayId.split('_')[1])
            },
            'temp_params': {
                'fixed': False,
                'required': ['name', 'tir'],
                'min_end': 35,
                'max_end': 10000,
                'valid': True,
                'id': comp_def.displayId
            }
        }

    if SO_CDS in comp_def.roles:
        # CDS:
        uniprot_id = comp_def.displayId.split('_')[0]
        uniprot_vals = get_uniprot_values([uniprot_id], ['sequence'])

        if uniprot_id not in uniprot_vals:
            raise ValueError('Uniprot id not found: %s' % uniprot_id)

        return {
            'typ': dna_utils.SO_CDS,
            'name': comp_def.identity,
            'temp_params': {
                'fixed': False,
                'required': ['name', 'prot'],
                'valid': True,
                'id': comp_def.displayId,
                'aa_seq': uniprot_vals[uniprot_id]['Sequence'],
                'orig_seq': uniprot_vals[uniprot_id]['Sequence']
            },
            'desc': '',
            'links': ['http://identifiers.org/uniprot/%s' % uniprot_id]
        }

    raise ValueError('Invalid roles in component definition: %s' % comp_def)