Beispiel #1
0
    def __init__(self, enzyme_type, hidden_edges=True, log_level=0):
        self.enzyme_type = enzyme_type
        self.enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]
        self.node_metadata = self._find_uniref_metadata()

        self.edge_colour = {'color': 'black'}
        self.edge_width = 4
        self.hidden_edges = hidden_edges
        self.uniref_border_width = 1
        self.uniref_border_colour = 'black'
        self.biocatdb_border_width = 3
        self.biocatdb_border_colour = 'darkred'
        self.border_width_selected = 4
        self.opacity = 0.9
        self.luminosity = 'bright'
        self.node_colour = f'rgba(5, 5, 168, {self.opacity})'
        self.node_size = 100
        self.node_shape = 'dot'

        self.log_level = log_level
        self.cluster_positioner = ClusterPositioner()
Beispiel #2
0
def merge_enzyme_types():
    to_merge = request.form['to_merge']
    merge_with = request.form['merge_with']

    if to_merge != merge_with:
        enz_type = EnzymeType.objects(enzyme_type=to_merge)[0]
        change_enzyme_type_name(enz_type, merge_with)
        enz_type.delete()
        result = {
            'status': 'success',
            'msg': 'Enzyme type merged',
            'issues': []
        }
    else:
        result = {
            'status': 'danger',
            'msg': "Can't merge with self",
            'issues': []
        }

    return jsonify(result=result)
def check_blast_status(enzyme_type):
    seqs = Sequence.objects(
        db.Q(enzyme_type=enzyme_type) & db.Q(bioinformatics_ignore__ne=True)
        & db.Q(reviewed=True))
    enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]

    all_complete = True
    for seq in seqs:
        if seq.blast is None:
            all_complete = False
            enz_type_obj.bioinformatics_status = 'Queued for update'
            enz_type_obj.save()

    if all_complete == True:
        if enz_type_obj.bioinformatics_status != 'Complete':
            enz_type_obj.bioinformatics_status = 'Complete'
            enz_type_obj.save()
            ssn_q = SSN_record.objects(enzyme_type=enz_type_obj)
            if len(ssn_q) == 1:
                ssn_record = SSN_record.objects(enzyme_type=enz_type_obj)[0]
                ssn_record.status = 'Queued for update'
                ssn_record.save()
    def parse(self, output, seq_obj):
        blast_record = output
        query_length = len(seq_obj.sequence)
        enzyme_type_obj = EnzymeType.objects(
            enzyme_type=seq_obj.enzyme_type)[0]

        for alignment in blast_record.alignments:
            identifier = alignment.hit_id.replace(self.identifier_head, '')

            if self._alignment_filters(alignment, query_length):
                db_query = UniRef50.objects(
                    db.Q(enzyme_name=identifier)
                    & db.Q(enzyme_type=enzyme_type_obj)).select_related()
                if len(db_query) == 0:
                    protein_sequence = self._get_sequence(identifier)
                    if self._sequence_filters(protein_sequence, query_length):
                        self.log(f"Adding sequence for {identifier}")
                        self._add_uniref(alignment, identifier,
                                         protein_sequence, enzyme_type_obj,
                                         seq_obj)
                else:
                    uniref_obj = db_query[0]
                    self._add_result_of_blasts_for(seq_obj, uniref_obj)
Beispiel #5
0
def ssn_object():
    enzyme_type = request.form['enzyme_type']
    enzyme_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]
    ssn_obj = SSN_record.objects(enzyme_type=enzyme_type_obj)[0]

    num_biocatdb = Sequence.objects(enzyme_type=enzyme_type).count()
    num_uniref = UniRef50.objects(enzyme_type=enzyme_type_obj).count()

    precalc_choices = {}
    for score in ssn_obj.num_at_alignment_score:
        clusters = ssn_obj.num_at_alignment_score[score]
        idt = ssn_obj.identity_at_alignment_score[score]

        choice_text = f"{score}, {clusters} clusters, avg identity {idt[0]} ± {idt[1]}"
        precalc_choices[score] = choice_text

    result = {
        'status': ssn_obj.status,
        'num_biocatdb': num_biocatdb,
        'num_uniref': num_uniref,
        'precalculated': precalc_choices
    }
    return jsonify(result=result)
def show_sequences():

    args = request.args.to_dict()
    title = "Enzyme sequences"

    if 'reviewed' in args:
        revQ = db.Q(reviewed=True)
    else:
        revQ = db.Q()
        title += " (including not reviewed)"

    if 'enzyme_type' in args:
        enzyme_type_query = db.Q(enzyme_type=args['enzyme_type'])
        title += f" for {args['enzyme_type']} enzymes"
    else:
        enzyme_type_query = db.Q()

    if 'paper_id' in args:
        paper_query = db.Q(papers=args['paper_id'])
        paper = Paper.objects(id=args['paper_id'])[0]
        title += f" in {paper.short_citation}"
    else:
        paper_query = db.Q()

    enzyme_data = sequence_table.get_enzyme_data(enzyme_type_query & paper_query & revQ)
    enzyme_types = sorted(list(EnzymeType.objects().distinct("enzyme_type")))

    return render_template('edit_tables/edit_sequences.html',
                           seq_data=enzyme_data,
                           seq_button_columns=[],
                           seq_table_height='80vh',
                           enzyme_types=enzyme_types,
                           show_header_filters=True,
                           include_owner=True,
                           lock_enz_type='false',
                           title=title,
                           row_click_modal=True)
def check_random_uniref(num_to_check=25):
    for enzyme_type in EnzymeType.objects():

        unirefs = UniRef50.objects(enzyme_type=enzyme_type)

        all_match = True
        if len(unirefs) != 0:
            for i in range(num_to_check):
                rand_uniref = random.choice(unirefs)
                name = rand_uniref.enzyme_name
                ref_parser = UniRef_Parser()
                ref_parser.load_xml(name)
                time.sleep(0.2)

                if ref_parser.check_id_match(name) == False:
                    all_match = False

            if all_match != True:
                print(
                    f'Identified mismatches with online uniref entries..  full uniref check for {enzyme_type.enzyme_type}'
                )
                full_uniref_check(enzyme_type)

    print(f'Uniref checks complete ')
Beispiel #8
0
def is_type_taken(form, field):
    for obj in EnzymeType.objects():
        if field.data == obj.enzyme_type:
            raise ValidationError(
                f'{field.data} is already an enzyme type in the database')
def get_enzymes():
    enzymes = list(EnzymeType.objects().distinct('enzyme_type'))
    enzymes.sort()
    return enzymes
def set_bioinformatics_status(enzyme_type, status):
    enz_type_obj = EnzymeType.objects(enzyme_type=enzyme_type)[0]
    enz_type_obj.bioinformatics_status = status
    enz_type_obj.save()
Beispiel #11
0
def df_to_db(spec_df):
    #added_by_dict = make_added_by_user_dict()

    print('Saving biocatdb_2 excel to mongodb..')
    for i, row in spec_df.iterrows():
        html_doi = str(row['html_doi'])
        doi = str(row['html_doi'])
        added_by_string = str(row['added_by'])

        list_html_to_remove = [
            'https://doi.org/', 'http://doi.org/', 'http://dx.doi.org/'
        ]
        for to_remove in list_html_to_remove:
            if to_remove in doi:
                doi = html_doi.replace(to_remove, '')

        if len(Paper.objects(doi=doi)) == 0:
            paper = Paper(short_citation=str(row['short_citation']),
                          html=html_doi,
                          doi=doi)
            paper = paper.save()
            print(f"{row['short_citation']} added")
        else:
            paper = Paper.objects(doi=doi)[0]

        if row['enzyme_type'] is not None and row['enzyme_type'] != '' and type(
                row['enzyme_type']) == str:
            if len(EnzymeType.objects(enzyme_type=row['enzyme_type'])) == 0:
                enz_type = EnzymeType(enzyme_type=row['enzyme_type'],
                                      description='')
                enz_type.save()

        if row['enzyme_name'] is not None and row['enzyme_name'] != '' and type(
                row['enzyme_name']) == str:
            if len(Sequence.objects(enzyme_name=row['enzyme_name'])) == 0:
                seq = Sequence(enzyme_name=check_is_nan(row['enzyme_name']),
                               enzyme_type=check_is_nan(row['enzyme_type']),
                               papers=[paper])
                seq.save()
            else:
                seq = Sequence.objects(enzyme_name=row['enzyme_name'])[0]
                if paper not in seq.papers:
                    seq.papers.append(paper)
                    seq = seq.save()

        if row['binary'] == 1:
            binary = True
        else:
            binary = False

        if row['auto_generated'] == 1:
            auto_gen = True
        else:
            auto_gen = False

        activity = Activity(
            enzyme_type=check_is_nan(row['enzyme_type']),
            enzyme_name=check_is_nan(row['enzyme_name']),
            reaction=check_is_nan(row['reaction']),
            short_citation=check_is_nan(row['short_citation']),
            html_doi=check_is_nan(row['html_doi']),
            added_by_string=added_by_string,
            paper=paper,
            cascade_num=check_is_nan(row['cascade_num']),
            substrate_1_smiles=get_smile(row['substrate_1_smiles']),
            substrate_2_smiles=get_smile(row['substrate_2_smiles']),
            product_1_smiles=get_smile(row['product_1_smiles']),
            temperature=check_is_nan(row['temperature']),
            ph=check_is_nan(row['ph']),
            solvent=check_is_nan(row['solvent']),
            other_conditions=check_is_nan(row['other_conditions']),
            notes=check_is_nan(row['notes']),
            reaction_vol=check_is_nan(row['reaction_vol']),
            formulation=check_is_nan(row['formulation']),
            biocat_conc=check_is_nan(row['biocat_conc']),
            kcat=check_is_float(row['kcat']),
            km=check_is_float(row['km']),
            mw=check_is_float(row['mw']),
            substrate_1_conc=check_is_nan(row['substrate_1_conc']),
            substrate_2_conc=check_is_nan(row['substrate_2_conc']),
            specific_activity=check_is_float(row['specific_activity']),
            conversion=check_is_float(row['conversion']),
            conversion_time=check_is_float(row['conversion_time']),
            categorical=check_is_nan(row['categorical']),
            binary=binary,
            selectivity=check_is_nan(row['selectivity']),
            auto_generated=auto_gen)

        activity.save()
    print('..done')
Beispiel #12
0
def save_or_add_seqs(data_list, paper):
    # Used by upload excel
    user = user_datastore.get_user(current_user.id)
    issues = []
    enzyme_types = EnzymeType.objects().distinct('enzyme_type')

    for seq_dict in data_list:
        if 'sequence_unavailable' in seq_dict:
            if seq_dict['sequence_unavailable'] == '':
                seq_dict['sequence_unavailable'] = 'False'

        if 'structure' in seq_dict:
            if seq_dict['structure'] == '':
                seq_dict['structure'] = 'False'

        if 'sequence' in seq_dict:
            seq_dict['sequence'] = seq_dict['sequence'].replace('\n', '')
            seq_dict['sequence'] = seq_dict['sequence'].replace(' ', '')

        if seq_dict.get('enzyme_name', '') == '':
            issues.append(f"Sequence must have a name")
        else:
            if len(Sequence.objects(enzyme_name=seq_dict['enzyme_name'])) == 0:
                if seq_dict.get('enzyme_type', '') not in enzyme_types:
                    print(
                        f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist"
                    )
                    issues.append(
                        f"Enzyme type {seq_dict.get('enzyme_type', '')} does not exist"
                    )

                elif sequence_check(seq_dict.get('sequence', '')) == False:
                    print(
                        f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters"
                    )
                    issues.append(
                        f"Amino acid sequence for {seq_dict['enzyme_name']} uses incorrect amino acid characters"
                    )

                else:
                    print('Creating new sequence..')
                    seq = Sequence(enzyme_name=seq_dict['enzyme_name'],
                                   enzyme_type=seq_dict['enzyme_type'],
                                   other_names=seq_dict.get('other_names',
                                                            '').split(', '),
                                   sequence=seq_dict.get('sequence', ''),
                                   n_tag=seq_dict.get('n_tag', ''),
                                   c_tag=seq_dict.get('c_tag', ''),
                                   sequence_unavailable=strtobool(
                                       seq_dict.get('sequence_unavailable',
                                                    'False')),
                                   accession=seq_dict.get('accession', ''),
                                   other_identifiers=seq_dict.get(
                                       'other_names', '').split(', '),
                                   pdb=seq_dict.get('pdb', ''),
                                   mutant_of=seq_dict.get('mutant_of', ''),
                                   notes=seq_dict.get('notes', ''),
                                   papers=[paper],
                                   owner=user)
                    seq.save()

            else:
                seq = Sequence.objects(enzyme_name=seq_dict['enzyme_name'])[0]
                if paper not in seq.papers:
                    seq.papers.append(paper)

                if seq.owner == user or seq.owner is None:
                    seq.owner = user
                    other_names = seq_dict.get('other_names', '').split(', ')
                    seq.other_names.extend(other_names)

                    if (seq.sequence is None or seq.sequence == ''):
                        seq.sequence = seq_dict.get('sequence', '')

                    if strtobool(seq_dict.get('sequence_unavailable',
                                              'False')) == True:
                        seq.sequence_unavailable = True

                    if (seq.accession is None or seq.accession == ''):
                        seq.accession = seq_dict.get('accession', '')

                    if seq_dict.get('pdb', '') != '':
                        seq.pdb = seq_dict.get('pdb', '')

                    if (seq.mutant_of is None or seq.mutant_of == ''):
                        seq.mutant_of = seq_dict.get('mutant_of', '')

                    if (seq.notes is None or seq.notes == ''):
                        seq.notes = seq_dict.get('notes', '')

                else:
                    print(
                        'Sequence already exists but owned by another user - added to paper, but no data updated'
                    )
                    issues.append(
                        'Sequence already exists but owned by another user - added to paper, but no data updated'
                    )

                seq.save()

    return issues
Beispiel #13
0
def load_sequence_data():

    name = request.form['name']

    if name == '':
        return jsonify(result={})

    seq = Sequence.objects(
        enzyme_name=name).exclude('papers')[0].select_related()

    sequences_same_type = Sequence.objects(
        enzyme_type=seq.enzyme_type).distinct('enzyme_name')
    sequences_same_type.sort()

    seq_array = {}
    for seq_same_type in sequences_same_type:
        seq_array[seq_same_type] = seq_same_type

    can_edit = False
    self_assigned = False
    other_user = False
    if current_user.is_authenticated:
        user = user_datastore.get_user(current_user.id)
        if check_permission.check_seq_permissions(current_user.id, seq):
            can_edit = True

        if seq.owner == user:
            self_assigned = True
        else:
            if seq.owner != '' and seq.owner is not None:
                other_user = True

    if seq.owner is None:
        owner = ''
    else:
        owner = f"{seq.owner.first_name} {seq.owner.last_name}, {seq.owner.affiliation}"

    other_names = ''
    for i, name in enumerate(seq.other_names):
        other_names += name
        if (len(seq.other_names) > 1) and (i < len(seq.other_names) - 1):
            other_names += ', '

    other_identifiers = ''
    for i, ident in enumerate(seq.other_identifiers):
        other_identifiers += ident
        if (len(seq.other_identifiers) >
                1) and (i < len(seq.other_identifiers) - 1):
            other_identifiers += ', '

    enzyme_type_full = EnzymeType.objects(
        enzyme_type=seq.enzyme_type)[0].full_name

    if seq.n_tag is None:
        seq.n_tag = ''
    if seq.c_tag is None:
        seq.c_tag = ''
    if seq.pdb is None:
        seq.pdb = ''

    result = {
        'enzyme_type': seq.enzyme_type,
        'enzyme_name': seq.enzyme_name,
        'sequence': seq.sequence,
        'sequence_unavailable': seq.sequence_unavailable,
        'n_tag': seq.n_tag,
        'c_tag': seq.c_tag,
        'accession': seq.accession,
        'other_identifiers': other_identifiers,
        'pdb': seq.pdb,
        'mutant_of': seq.mutant_of,
        'sequences': seq_array,
        'notes': seq.notes,
        'bioinformatics_ignore': seq.bioinformatics_ignore,
        'can_edit': can_edit,
        'self_assigned': self_assigned,
        'owner_is_another_user': other_user,
        'other_names': other_names,
        'owner': owner,
        'enzyme_type_full': enzyme_type_full
    }

    return jsonify(result=result)