def test_epicidin(self): "Test lantipeptide prediction for epicidin 280" rec = seqio.read(utils.get_full_path(__file__, 'epicidin_280.gbk')) self.assertEqual(21, len(rec.features)) specific_analysis(rec, None) self.assertEqual(23, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] self.assertAlmostEqual(3115.7, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(3117.7, h._get_molecular_weight(prepeptide)) self.assertEqual([3135.7, 3153.7, 3171.7], h._get_alternative_weights(prepeptide)) self.assertEqual(3, h._get_number_bridges(prepeptide)) self.assertEqual("MENKKDLFDLEIKKDNMENNNELEAQ", h._get_leader_peptide_sequence(leader)) self.assertEqual("SLGPAIKATRQVCPKATRFVTVSCKKSDCQ", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['Lac'], h._get_core_peptide_extra_modifications(prepeptide))
def test_nisin(self): "Test lantipeptide prediction for nisin A" rec = seqio.read(utils.get_full_path(__file__, 'nisin.gbk')) self.assertEqual(38, len(rec.features)) specific_analysis(rec, None) self.assertEqual(40, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] # real monoisotopic mass is 3351.51, but we overpredict a Dha self.assertAlmostEqual(3333.6, h._get_monoisotopic_mass(prepeptide)) # real mw is 3354.5, see above self.assertAlmostEqual(3336.0, h._get_molecular_weight(prepeptide)) self.assertEqual([3354.0, 3372.1, 3390.1, 3408.1], h._get_alternative_weights(prepeptide)) self.assertEqual(5, h._get_number_bridges(prepeptide)) self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR", h._get_leader_peptide_sequence(leader)) self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
def test_microbisporicin(self): "Test lantipeptide prediction for microbisporicin" rec = seqio.read(utils.get_full_path(__file__, 'microbisporicin.gbk')) self.assertEqual(56, len(rec.features)) specific_analysis(rec, None) self.assertEqual(58, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] # NOTE: this is not the correct weight for microbisporicin # there are some additional modifications we do not predict yet self.assertAlmostEqual(2212.9, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(2214.5, h._get_molecular_weight(prepeptide)) self.assertEqual(4, h._get_number_bridges(prepeptide)) self.assertEqual("MPADILETRTSETEDLLDLDLSIGVEEITAGPA", h._get_leader_peptide_sequence(leader)) self.assertEqual("VTSWSLCTPGCTSPGGGSNCSFCC", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['AviCys', 'Cl', 'OH'], h._get_core_peptide_extra_modifications(prepeptide))
def retrieve_gene_cluster_annotations(seq_record, smcogdict, gtrcoglist, transportercoglist, geneclusternr): allcoregenes = [ utils.get_gene_id(cds) for cds in utils.get_secmet_cds_features(seq_record) ] pksnrpscoregenes = [ utils.get_gene_id(cds) for cds in utils.get_pksnrps_cds_features(seq_record) ] feature_by_id = utils.get_feature_dict(seq_record) clustergenes = [ utils.get_gene_id(cds) for cds in utils.get_cluster_cds_features( utils.get_cluster_by_nr(seq_record, geneclusternr), seq_record) ] clustertype = utils.get_cluster_type( utils.get_cluster_by_nr(seq_record, geneclusternr)) annotations = {} colors = [] starts = [] ends = [] strands = [] pksnrpsprots = [] gtrs = [] transporters = [] for j in clustergenes: cdsfeature = feature_by_id[j] if cdsfeature.qualifiers.has_key('product'): annotations[j] = cdsfeature.qualifiers['product'][0] else: annotations[j] = 'Unannotated gene' starts.append(cdsfeature.location.start) ends.append(cdsfeature.location.end) if cdsfeature.strand == -1: strands.append("-") else: strands.append("+") if j in allcoregenes: colors.append("#810E15") else: colors.append("grey") if j in pksnrpscoregenes: pksnrpsprots.append(j) if smcogdict.has_key(j): if len(smcogdict[j]) > 0 and smcogdict[j][0] in gtrcoglist: gtrs.append(j) if len(smcogdict[j]) > 0 and smcogdict[j][0] in transportercoglist: transporters.append(j) clustersize = max(ends) - min(starts) return clustergenes, clustertype, annotations, colors, starts, ends, strands, pksnrpsprots, gtrs, transporters, clustersize
def write_gene(txt, info, options): "Write gene table to TXT" #TXT columns: gene ID, gene start, gene end, gene strand, smCOG, locus_tag/geneID, annotation txt.write("\t".join([ "gene ID", "gene start", "gene end", "gene strand", "smCOG", "locus_tag", "annotation" ]) + "\n") for BGCnr in info.clusternrs: #Retrieve all data that will be written out cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr) cluster_gene_features = utils.get_cluster_cds_features( cluster_feature, info.seq_record) for cds in cluster_gene_features: gene_id = utils.get_gene_acc(cds).partition(".")[0] cds_start = str(cds.location.start) cds_end = str(cds.location.end) if cds.strand == 1: cds_strand = "+" else: cds_strand = "-" smCOG = "" ##Not used for now locus_tag = utils.get_gene_id(cds).partition(".")[0] annotation = utils.get_gene_annotation(cds) txt.write("\t".join([ gene_id, cds_start, cds_end, cds_strand, smCOG, locus_tag, annotation ]) + "\n")
def generate_sidepanel(cluster, seq_record, options, sidepanel=None): logging.debug("generating sidepanel") """Generate sidepanel div""" result_list = None cluster = utils.get_cluster_by_nr(seq_record, cluster['idx']) # use seqrecord.feature cluster_record = seq_record[cluster.location.start:cluster.location.end] result_list = gather_results(cluster_record) sidepanel = pq( '<div>') #TODO add class and put it in the details div class sidepanel.addClass('sidepanel') if len(result_list) > 0: # write visualization script for sidepanel here #output_html = "" #for r in result_list: # output_html += output.create_result_output(r) #sidepanel.html(output_html) id_list = [] for result in result_list: if result.cds_id: id_list.append(result.cds_id) else: id_list.append("Region with unknown ID from %s to %s" % (result.position[0], result.position[1])) sidepanel.html("%s Coding sequences with repeats found:<br> %s" % (len(result_list), "<br>".join(id_list))) else: sidepanel.text("No repetition found")
def generate_details_div(cluster, seq_record, options, js_domains, details=None): logging.info("generating details div") """Generate details div""" cluster = utils.get_cluster_by_nr(seq_record, cluster['idx']) # use seqrecord.feature details = pq('<div>') details.addClass('details') header = pq('<h3>') header.text('Repeatfinder output') cluster_record = seq_record[cluster.location.start:cluster.location.end] result_list = gather_results(cluster_record) sidepanel = pq('<div>') if len(result_list) > 0: # write visualization script for sidepanel here output_html = "" for r in result_list: output_html += output.write_result_summary(r) details.html(output_html) return details
def generate_details_div(cluster, seq_record, options, js_domains, details=None): """Generate details div""" cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx']) if cluster_rec is None: return details if details is None: details = pq('<div>') details.addClass('details') header = pq('<h3>') header.text('Detailed annotation') details.append(header) js_cluster_domains = { 'id': "cluster-%s-details" % cluster['idx'], 'orfs': [] } features = utils.get_cluster_cds_features(cluster_rec, seq_record) for feature in features: if not 'sec_met' in feature.qualifiers: continue if 'translation' in feature.qualifiers: sequence = feature.qualifiers['translation'][0] else: sequence = str(utils.get_aa_sequence(feature)) js_orf = { 'id': utils.get_gene_id(feature), 'sequence': sequence, 'domains': [], } for qual in feature.qualifiers['sec_met']: if not qual.startswith('NRPS/PKS Domain:'): continue js_domain = _parse_domain(qual, feature, seq_record) if len(js_domain) > 0: js_orf['domains'].append(js_domain) if len(js_orf['domains']) > 0: js_cluster_domains['orfs'].append(js_orf) if len(js_cluster_domains['orfs']) > 0: details_svg = pq('<div>') details_svg.addClass('details-svg') details_svg.attr('id', '%s-svg' % js_cluster_domains['id']) details.append(details_svg) js_domains.append(js_cluster_domains) return details
def write_RiPP(txt, info, options): "Write RiPP table to TXT" #TXT columns: RiPP ID, annotation, core peptide, mol weight, monoisotopic_mass, alt mol weights, nr bridges txt.write("\t".join([ "RiPP ID", "annotation", "core peptide", "molecular weight", "monoisotopic_mass", "alternative molecular weights", "number of bridges" ]) + "\n") for BGCnr in info.clusternrs: #Retrieve all data that will be written out cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr) cluster_gene_features = utils.get_cluster_cds_features( cluster_feature, info.seq_record) RiPP_features = _find_core_peptides(cluster_feature, info.seq_record) RiPPs = [] for peptide in RiPP_features: for cds in cluster_gene_features: if utils.features_overlap(cds, peptide): RiPPs.append(utils.get_gene_acc(cds).partition(".")[0]) break idx = 0 for RiPP in RiPP_features: RiPP_ID = RiPPs[idx] note_quals = RiPP.qualifiers['note'] annotation = [ qual.partition("predicted class: ")[2] for qual in note_quals if "predicted class:" in qual ][0] core_peptide = [ qual.partition("predicted core seq: ")[2] for qual in note_quals if "predicted core seq:" in qual ][0] mol_weight = [ qual.partition("molecular weight: ")[2] for qual in note_quals if "molecular weight: " in qual ][0] monoiso_mass = [ qual.partition("monoisotopic mass: ")[2] for qual in note_quals if "monoisotopic mass: " in qual ][0] if "alternative weights" in note_quals: alt_mol_weights = [ qual.partition("alternative weights: ")[2].replace( " ", "") for qual in note_quals if "alternative weights:" in qual ][0] else: alt_mol_weights = "" nr_bridges = [ qual.partition("number of bridges: ")[2] for qual in note_quals if "number of bridges: " in qual ][0] txt.write("\t".join([ RiPP_ID, annotation, core_peptide, mol_weight, monoiso_mass, alt_mol_weights, nr_bridges ]) + "\n") idx += 1
def test_sco_cluster3(self): "Test lantipeptide prediction for SCO cluster #3" rec = seqio.read(utils.get_full_path(__file__, 'sco_cluster3.gbk')) self.assertEqual(69, len(rec.features)) specific_analysis(rec, None) self.assertEqual(71, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
def test_epidermin(self): "Test lantipeptide prediction for epidermin" rec = seqio.read(utils.get_full_path(__file__, 'epidermin.gbk')) self.assertEqual(18, len(rec.features)) specific_analysis(rec, None) self.assertEqual(20, len(rec.features)) prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(prepeptides)) prepeptide = prepeptides[0] leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec) self.assertEqual(1, len(leaders)) leader = leaders[0] self.assertAlmostEqual(2164, h._get_monoisotopic_mass(prepeptide)) self.assertAlmostEqual(2165.6, h._get_molecular_weight(prepeptide)) self.assertEqual(3, h._get_number_bridges(prepeptide)) self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR", h._get_leader_peptide_sequence(leader)) self.assertEqual("IASKFICTPGCAKTGSFNSYCC", h._get_core_peptide_sequence(prepeptide)) self.assertEqual('Class I', h._get_core_peptide_class(prepeptide)) self.assertEqual(['AviCys'], h._get_core_peptide_extra_modifications(prepeptide))
def retrieve_pksnrps_info(seq_record, geneclusternr, pksnrpsprots): pksnrpsprotsnames = [utils.get_gene_id(cds) for cds in utils.get_pksnrps_cds_features(seq_record)] domaindict = utils.get_nrpspks_domain_dict(seq_record) substr_spec_preds = utils.get_nrpspks_substr_spec_preds(seq_record) pksnrpsdomains = {} domsdetails = {} substrspecnrpspredictordict = {} substrspecminowadict = {} substrspecpkssigdict = {} substrspecconsensusdict = {} krpredictionsdict = {} for i in pksnrpsprots: domlist = [] domsdetails = {} doms = domaindict[i] for j in doms: nr = 1 while j[0] + str(nr) in domlist: nr += 1 domname = j[0] + str(nr) domlist.append(domname) domsdetails[domname] = [j[1],j[2]] if "AMP-binding" in domname or "A-OX" in domname: domname2 = i + "_" + "A" + str(nr) substrspecminowadict[domname2] = substr_spec_preds.minowa_nrps_preds[i + "_A" + str(nr)] substrspecnrpspredictordict[domname2] = [substr_spec_preds.nrps_code_preds[i + "_A" + str(nr)], substr_spec_preds.nrps_svm_preds[i + "_A" + str(nr)]] substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_A" + str(nr)] if "PKS_AT" in domname: domname2 = i + "_" + "AT" + str(nr) substrspecminowadict[domname2] = substr_spec_preds.minowa_pks_preds[i + "_AT" + str(nr)] substrspecpkssigdict[domname2] = substr_spec_preds.pks_code_preds[i + "_AT" + str(nr)] substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_AT" + str(nr)] if "CAL_domain" in domname: domname2 = i + "_" + "CAL" + str(nr) substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)] substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)] if "CAL_domain" in domname: domname2 = i + "_" + "CAL" + str(nr) substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)] substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)] if "PKS_KR" in domname: domname2 = i + "_" + "KR" + str(nr) krpredictionsdict[domname2] = [substr_spec_preds.kr_activity_preds[i + "_KR" + str(nr)], substr_spec_preds.kr_stereo_preds[i + "_KR" + str(nr)]] pksnrpsdomains[i] = [domlist,domsdetails] structpred = utils.get_structure_pred(utils.get_cluster_by_nr(seq_record, geneclusternr)) return pksnrpsprotsnames, pksnrpsdomains, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, krpredictionsdict, structpred
def write_signature_gene_info(txt, info, options): "Write signature gene table to TXT" #TXT columns: signature_gene, pHMM_hit, e-value, bit score, nr of seeds txt.write("\t".join([ "signature gene", "pHMM hits", "e-value", "bit score", "number of seeds" ]) + "\n") for BGCnr in info.clusternrs: #Retrieve all data that will be written out cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr) cluster_gene_features = utils.get_cluster_cds_features( cluster_feature, info.seq_record) signature_genes = [ cds for cds in cluster_gene_features if 'sec_met' in cds.qualifiers ] for cds in signature_genes: if len([ qual for qual in cds.qualifiers['sec_met'] if qual.startswith('Domains detected: ') ]) == 0: continue gene_ID = utils.get_gene_acc(cds).partition(".")[0] domdetect_qual = [ qual for qual in cds.qualifiers['sec_met'] if qual.startswith('Domains detected: ') ][0] if ";" in domdetect_qual: domains = domdetect_qual.partition( "Domains detected: ")[2].split(";") else: domains = [domdetect_qual.partition("Domains detected: ")[2]] for domain in domains: domain_name = domain.partition(" (")[0].replace(" ", "") evalue = domain.partition("E-value: ")[2].partition(",")[0] bitscore = domain.partition("bitscore: ")[2].partition(",")[0] nr_seeds = domain.partition("seeds: ")[2].partition(")")[0] txt.write("\t".join( [gene_ID, domain_name, evalue, bitscore, nr_seeds]) + "\n")
def generate_sidepanel(cluster, seq_record, options, sidepanel=None): """Generate sidepanel div""" cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx']) if cluster_rec is None: return sidepanel if sidepanel is None: sidepanel = pq('<div>') sidepanel.addClass('sidepanel') structure = pq('<div>') structure.addClass('structure') structure_header = pq('<h3>') structure_header.text('Predicted core structure') structure.append(structure_header) a = pq('<a>') a.attr('href', _get_structure_image_url(cluster_rec, options.outputfoldername)) a.attr('target', '_new') structure.append(a) structure_img = pq('<img>') structure_img.attr( 'src', _get_structure_image_url(cluster_rec, options.outputfoldername)) a.append(structure_img) warning = pq('<div>') warning.addClass('as-structure-warning') if not 'docking' in options: options.docking = {} if cluster['idx'] in options.docking and options.docking[cluster['idx']]: warning.text('Rough prediction of core scaffold based on assumed ' 'PKS linker matching; tailoring reactions not taken ' 'into account') else: warning.text('Rough prediction of core scaffold based on assumed ' 'PKS/NRPS colinearity; tailoring reactions not taken ' 'into account') structure.append(warning) sidepanel.append(structure) details = pq('<div>') details.addClass('more-details') details_header = pq('<h3>') details_header.text('Prediction details') details.append(details_header) details_list = pq('<dl>') details_list.addClass('prediction-text') details.append(details_list) sidepanel.append(details) dt = pq('<dt>') dt.text('Monomers prediction:') details_list.append(dt) dd = pq('<dd>') dd.text(_get_monomer_prediction(cluster_rec)) details_list.append(dd) features = utils.get_cluster_cds_features(cluster_rec, seq_record) for feature in features: if not 'sec_met' in feature.qualifiers: continue header_printed = False per_CDS_predictions = [] for qual in feature.qualifiers['sec_met']: if not qual.startswith('NRPS/PKS Domain:'): continue # logging.debug("qual: %s" % qual) preds = _parse_substrate_predictions(qual) per_Adomain_predictions = [] for key, val in preds: if not header_printed: dt = pq('<dt>') dt.text(utils.get_gene_id(feature)) details_list.append(dt) header_printed = True dd = pq('<dd>') dd.html('%s: %s<br>' % (key, val)) details_list.append(dd) if qual.startswith("NRPS/PKS Domain: AMP-binding"): values = _filter_norine_as(val.split(",")) if len(values) > 0: per_Adomain_predictions.extend(val.split(",")) if len(preds) > 0: if qual.startswith("NRPS/PKS Domain: AMP-binding"): per_Adomains_predictions_unique = list( set(per_Adomain_predictions)) per_CDS_predictions.append(per_Adomains_predictions_unique) # logging.debug("substrate prediction list: %s" % ",".join(per_Adomains_predictions_unique) ) dd = pq('<dd>') dd.append(pq('<br>')) details_list.append(dd) if len(per_CDS_predictions) > 0: url = _get_norine_url_for_specArray(per_CDS_predictions) if url: dd = pq('<dd>') dd.append("Search NORINE for peptide in ") a = pq('<a>') a.attr('href', url) a.attr('target', '_new') a.text("strict mode") dd.append(a) dd.append(" // ") url = _get_norine_url_for_specArray(per_CDS_predictions, be_strict=False) a = pq('<a>') a.attr('href', url) a.attr('target', '_new') a.text("relaxed mode") dd.append(a) dd.append(pq('<br>')) dd.append(pq('<br>')) details_list.append(dd) if cluster['type'].find('nrps') > -1: cross_refs = pq("<div>") refs_header = pq('<h3>') refs_header.text('Database cross-links') cross_refs.append(refs_header) links = pq("<div>") links.addClass('prediction-text') a = pq("<a>") a.attr('href', 'http://bioinfo.lifl.fr/norine/form2.jsp') a.attr('target', '_new') a.text("Link to NORINE database query form") links.append(a) links.append("<br>") a = pq("<a>") url = _get_norine_url_for_cluster(cluster_rec) logging.debug("NORINE URL string: %s" % url) a.attr('href', url) a.attr('target', '_new') a.text("strict mode") links.append("Direct lookup in NORINE database in ") links.append(a) links.append(" // ") url = _get_norine_url_for_cluster(cluster_rec, be_strict=False) a = pq("<a>") a.attr('href', url) a.attr('target', '_new') a.text("relaxed mode") links.append(a) cross_refs.append(links) sidepanel.append(cross_refs) return sidepanel
def add_cluster_page(d, cluster, seq_record, options, extra_data, seq_id): handlers = find_plugins_for_cluster(options.plugins, cluster) cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx']) rules = get_detection_rules(cluster_rec) page = pq('<div>') page.addClass('page') page.attr('id', 'cluster-%s' % cluster['idx']) header = pq('<h3>') header.text( '%s - Cluster %s - %s' % (seq_record.name, cluster['idx'], cluster['type'].capitalize())) page.append(header) sidepanel = None for handler in handlers: sidepanel = handler.generate_sidepanel(cluster, seq_record, options, sidepanel) if sidepanel is not None: page.append(sidepanel) content = pq('<div>') content.addClass('content') description = pq('<div>') description.addClass('description-container') desc_header = pq('<h3>') desc_header.text('Gene cluster description') description.append(desc_header) cluster_download = pq('<div>') cluster_download.addClass('cluster-download') description.append(cluster_download) dl_link = pq('<a>') dl_link.attr('href', '%s.cluster%03d.gbk' % (seq_id, cluster['idx'])) dl_link.text('Download cluster GenBank file') cluster_download.append(dl_link) desc_text = pq('<div>') desc_text.addClass('description-text') if options.input_type == 'nucl': text = seq_record.name + ' - Gene Cluster %(idx)s. Type = %(type)s. Location: %(start)s - %(end)s nt. ' else: text = seq_record.name + '- Gene Cluster %(idx)s. Type = %(type)s. ' if 'probability' in cluster: text += 'ClusterFinder probability: %(probability)s. ' text += 'Click on genes for more information.' desc_text.text(text % cluster) description.append(desc_text) rules_header = pq('<a>') rules_header.addClass('cluster-rules-header') rules_header.attr('id', 'cluster-%s-rules-header' % cluster['idx']) rules_header.attr('href', '#cluster-%s' % cluster['idx']) rules_header.text('Show pHMM detection rules used') description.append(rules_header) detection_rules = pq('<div>') detection_rules.addClass('cluster-rules') detection_rules.attr('id', 'cluster-%s-rules' % cluster['idx']) detection_rules.html('<br>'.join(rules)) description.append(detection_rules) desc_svg = pq('<div>') desc_svg.attr('id', 'cluster-%s-svg' % cluster['idx']) description.append(desc_svg) content.append(description) if options.input_type == 'nucl': legend = pq('<div>') legend.addClass('legend') legend_header = pq('<h4>') legend_header.text('Legend:') legend.append(legend_header) legend_text = pq('<div>') if not options.smcogs: legend_text.append("Only available when smCOG analysis was run") legend_text.append( generate_legend_entry('legend-type-biosynthetic', 'core biosynthetic genes')) legend_text.append( generate_legend_entry('legend-type-biosynthetic-additional', 'additional biosynthetic genes')) legend_text.append( generate_legend_entry('legend-type-transport', 'transport-related genes')) legend_text.append( generate_legend_entry('legend-type-regulatory', 'regulatory genes')) legend_text.append( generate_legend_entry('legend-type-other', 'other genes')) if options.tta: legend_text.append( generate_legend_entry('legend-tta-codon', 'TTA codon')) if options.cassis: legend_text.append( generate_legend_entry('legend-border-cassis', 'cluster extent as predicted by CASSIS')) if options.borderpredict: legend_text.append( generate_legend_entry( 'legend-border-clusterfinder', 'cluster extent as predicted by ClusterFinder')) legend.append(legend_text) content.append(legend) details = None da = None for handler in handlers: if "generate_details_div" in dir(handler): details = handler.generate_details_div(cluster, seq_record, options, extra_data['js_domains'], details) if 'generate_domain_alignment_div' in dir( handler) and options.transatpks_da: da = handler.generate_domain_alignment_div(cluster, seq_record, options, da) if details is not None: content.append(details) if da is not None: content.append(da) if options.clusterblast: top_ten_clusters = cluster_rec.qualifiers.get('clusterblast', []) cb = pq('<div>') cb.addClass('clusterblast') cb_header = pq('<h3>') cb_header.text("Homologous gene clusters") cb.append(cb_header) cb_control = pq('<div>') cb.append(cb_control) if len(top_ten_clusters) == 0: cb_download = pq('No significant ClusterBlast hits found.') cb_control.append(cb_download) else: cb_select = pq('<select>') cb_select.attr('id', 'clusterblast-%s-select' % cluster['idx']) cb_select.addClass('clusterblast-selector') cb_control.append(cb_select) opt = pq('<option>') opt.attr( 'value', path.join('svg', 'clusterblast%s_all.svg' % cluster['idx'])) opt.text('All hits') cb_select.append(opt) for i in range(1, options.nclusters + 1): svg_file = path.join( 'svg', 'clusterblast%s_%s.svg' % (cluster['idx'], i)) full_path = path.join(options.outputfoldername, svg_file) if path.exists(full_path): opt = pq('<option>') opt.attr('value', svg_file) opt_text = 'Cluster %s hit %s' % (cluster['idx'], i) if len(top_ten_clusters) >= i: opt_text = top_ten_clusters[i - 1].split('\t')[1] opt.text(opt_text) cb_select.append(opt) else: logging.debug("failed to find %r" % full_path) cb_download = pq('<button>') cb_download.attr('id', 'clusterblast-%s-download' % cluster['idx']) cb_download.text('Download graphic') cb_control.append(cb_download) cb_svg = pq('<div>') cb_svg.attr('id', 'clusterblast-%s-svg' % cluster['idx']) cb.append(cb_svg) content.append(cb) if options.subclusterblast: top_ten_clusters = cluster_rec.qualifiers.get('subclusterblast', []) cb = pq('<div>') cb.addClass('subclusterblast') cb_header = pq('<h3>') cb_header.text("Homologous subclusters") cb.append(cb_header) cb_control = pq('<div>') cb.append(cb_control) cb_select = pq('<select>') cb_select.attr('id', 'subclusterblast-%s-select' % cluster['idx']) cb_select.addClass('clusterblast-selector') cb_control.append(cb_select) opt = pq('<option>') opt.attr( 'value', path.join('svg', 'subclusterblast%s_all.svg' % cluster['idx'])) opt.text('All hits') cb_select.append(opt) subclusters_added = 0 for i in range(1, options.nclusters + 1): svg_file = path.join( 'svg', 'subclusterblast%s_%s.svg' % (cluster['idx'], i)) full_path = path.join(options.outputfoldername, svg_file) if path.exists(full_path): opt = pq('<option>') opt.attr('value', svg_file) opt_text = 'Cluster %s hit %s' % (cluster['idx'], i) if len(top_ten_clusters) >= i: opt_text = top_ten_clusters[i - 1].split('\t')[1].replace( '_', ' ') opt.text(opt_text) cb_select.append(opt) subclusters_added += 1 else: logging.debug("failed to find %r" % full_path) cb_svg = pq('<div>') cb_svg.attr('id', 'subclusterblast-%s-svg' % cluster['idx']) cb.append(cb_svg) if path.exists(path.join(options.outputfoldername, 'svg', 'subclusterblast%s_all.svg' % cluster['idx'])) and \ subclusters_added > 0: cb_download = pq('<button>') cb_download.attr('id', 'subclusterblast-%s-download' % cluster['idx']) cb_download.text('Download graphic') cb_control.append(cb_download) content.append(cb) if options.knownclusterblast: top_ten_clusters = cluster_rec.qualifiers.get('knownclusterblast', []) cb = pq('<div>') cb.addClass('knownclusterblast') cb_header = pq('<h3>') cb_header.text("Homologous known gene clusters") cb.append(cb_header) cb_control = pq('<div>') cb.append(cb_control) cb_select = pq('<select>') cb_select.attr('id', 'knownclusterblast-%s-select' % cluster['idx']) cb_select.addClass('clusterblast-selector') cb_control.append(cb_select) opt = pq('<option>') opt.attr( 'value', path.join('svg', 'knownclusterblast%s_all.svg' % cluster['idx'])) opt.text('All hits') cb_select.append(opt) knownclusters_added = 0 for i in range(1, options.nclusters + 1): svg_file = path.join( 'svg', 'knownclusterblast%s_%s.svg' % (cluster['idx'], i)) full_path = path.join(options.outputfoldername, svg_file) if path.exists(full_path): opt = pq('<option>') opt.attr('value', svg_file) opt_text = 'Cluster %s hit %s' % (cluster['idx'], i) if len(top_ten_clusters) >= i: opt_text = top_ten_clusters[i - 1].split('\t')[1].replace( '_', ' ') opt.text(opt_text) cb_select.append(opt) knownclusters_added += 1 else: logging.debug("failed to find %r" % full_path) cb_svg = pq('<div>') cb_svg.attr('id', 'knownclusterblast-%s-svg' % cluster['idx']) cb.append(cb_svg) if path.exists(path.join(options.outputfoldername, 'svg', 'knownclusterblast%s_all.svg' % cluster['idx'])) and \ knownclusters_added > 0: cb_download = pq('<button>') cb_download.attr('id', 'knownclusterblast-%s-download' % cluster['idx']) cb_download.text('Download graphic') cb_control.append(cb_download) content.append(cb) page.append(content) d('.page:last').after(page)
def write_NRPS_PKS(txt, info, options): "Write NRPS/PKS table to TXT" #TXT columns: NRPS/PKS ID, annotation, aSDomain, score, evalue, domain type, subtype, range, activity, NRPSPredictor2, Stachelhaus, Minowa, pkssignature, consensus txt.write("\t".join([ "Cluster_ID", "NRPSPKS_ID", "annotation", "aSDomain", "score", "evalue", "domain_type", "subtype", "domain_start", "domain_end", "KR activity", "KR stereochemistry", "NRPSPredictor2", "Stachelhaus", "Minowa", "pkssignature", "consensus" ]) + "\n") for BGCnr in info.clusternrs: #Retrieve all data that will be written out cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr) cluster_gene_features = utils.get_cluster_cds_features( cluster_feature, info.seq_record) cluster_id = "{seq_id}_c{cluster_nr}".format(seq_id=info.seq_record.id, cluster_nr=BGCnr) NRPSs_PKSs = [ cds for cds in cluster_gene_features if 'sec_met' in cds.qualifiers and len([ qual for qual in cds.qualifiers['sec_met'] if qual.startswith('NRPS/PKS Domain:') ]) > 0 ] for cds in NRPSs_PKSs: enzyme_ID = utils.get_gene_acc(cds).partition(".")[0] if len([ qual for qual in cds.qualifiers['sec_met'] if "NRPS/PKS subtype: " in qual ]) > 0: enzyme_annotation = [ qual for qual in cds.qualifiers['sec_met'] if qual.startswith("NRPS/PKS subtype") ][0].partition("NRPS/PKS subtype: ")[2] else: logging.warn("No enzyme annotation for %s" % enzyme_ID) enzyme_annotation = "" aSDomains = [ dom for dom in utils.get_cluster_aSDomain_features( cluster_feature, info.seq_record) if utils.features_overlap(cds, dom) and utils.get_gene_id(cds) in [dom.qualifiers['locus_tag'], dom.qualifiers['locus_tag'][0]] ] for aSDomain in aSDomains: domtype = aSDomain.qualifiers['domain'][0] if "domain_subtype" in aSDomain.qualifiers: subtype = aSDomain.qualifiers['domain_subtype'][0] else: subtype = "" aSDomain_ID = aSDomain.qualifiers['asDomain_id'][0] score = str(aSDomain.qualifiers['score'][0]) evalue = str(aSDomain.qualifiers['evalue'][0]) dom_start = str(aSDomain.location.start) dom_end = str(aSDomain.location.end) kr_activity = "" kr_stereochemistry = "" NRPSPredictor2 = "" Stachelhaus = "" Minowa = "" pkssignature = "" consensus = "" if aSDomain.qualifiers.has_key('specificity'): if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("KR activity") ]) > 0: kr_activity = [ qual.partition("KR activity: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("KR activity") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("KR stereochemistry") ]) > 0: kr_stereochemistry = [ qual.partition("KR stereochemistry: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("KR stereochemistry") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("NRPSpredictor2") ]) > 0: NRPSPredictor2 = [ qual.partition("NRPSpredictor2 SVM: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("NRPSpredictor2") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("Stachelhaus") ]) > 0: Stachelhaus = [ qual.partition("Stachelhaus code: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("Stachelhaus") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("Minowa") ]) > 0: Minowa = [ qual.partition("Minowa: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("Minowa") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("PKS signature") ]) > 0: pkssignature = [ qual.partition("PKS signature: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("PKS signature") ][0] if len([ qual for qual in aSDomain.qualifiers['specificity'] if qual.startswith("consensus") ]) > 0: consensus = [ qual.partition("consensus: ")[2] for qual in aSDomain.qualifiers['specificity'] if qual.startswith("consensus") ][0] txt.write("\t".join([ cluster_id, enzyme_ID, enzyme_annotation, aSDomain_ID, score, evalue, domtype, subtype, dom_start, dom_end, kr_activity, kr_stereochemistry, NRPSPredictor2, Stachelhaus, Minowa, pkssignature, consensus ]) + "\n")
def generate_details_div(cluster, seq_record, options, js_domains, details=None): """Generate details div""" cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx']) if cluster_rec is None: return details leader_peptides = _find_leader_peptides(cluster_rec, seq_record) core_peptides = _find_core_peptides(cluster_rec, seq_record) if details is None: details = pq('<div>') details.addClass('details') header = pq('<h3>') header.text('Detailed annotation') details.append(header) if len(core_peptides) == 0: details_text = pq('<div>') details_text.addClass('details-text') details_text.text('No core peptides found.') details.append(details_text) return details details_text = pq('<dl>') details_text.addClass('details-text') i = 0 for cp in core_peptides: leader = leader_peptides[i] leader_seq = _get_leader_peptide_sequence(leader) core_seq = _get_core_peptide_sequence(cp) dt = pq('<dt>') dt.text('%s leader / core peptide, putative %s' % (utils.get_gene_id(cp), _get_core_peptide_class(cp))) details_text.append(dt) dd = pq('<dd>') core_seq = core_seq.replace('S', '<span class="dha">Dha</span>') core_seq = core_seq.replace('T', '<span class="dhb">Dhb</span>') core_seq = core_seq.replace('C', '<span class="cys">C</span>') seq = "%s - %s" % (leader_seq, core_seq) dd.html(seq) details_text.append(dd) i += 1 details.append(details_text) legend = pq('<div>') legend.addClass('legend') legend_header = pq('<h4>') legend_header.text('Legend:') legend.append(legend_header) legend_text = pq('<div>') legend_text.html('<span class="dha">Dha</span>: Didehydroalanine<br>' '<span class="dhb">Dhb</span>: Didehydrobutyrine') legend.append(legend_text) details.append(legend) return details
def generate_sidepanel(cluster, seq_record, options, sidepanel=None): """Generate sidepanel div""" cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx']) if cluster_rec is None: return sidepanel if sidepanel is None: sidepanel = pq('<div>') sidepanel.addClass('sidepanel') core_peptides = _find_core_peptides(cluster_rec, seq_record) if len(core_peptides) == 0: return sidepanel details = pq('<div>') details.addClass('more-details') details_header = pq('<h3>') details_header.text('Prediction details') details.append(details_header) details_list = pq('<dl>') details_list.addClass('prediction-text') for cp in core_peptides: dt = pq('<dt>') dt.text(utils.get_gene_id(cp)) details_list.append(dt) dd = pq('<dd>') mass = _get_monoisotopic_mass(cp) mol_weight = _get_molecular_weight(cp) bridges = _get_number_bridges(cp) pred_class = _get_core_peptide_class(cp) score = _get_core_peptide_score(cp) dd.html('Putative %s<br>Score: %0.2f<br>Monoisotopic mass: %s Da<br>'\ 'Molecular weight: %s Da<br>Number of bridges: %s' %\ (pred_class, score, mass, mol_weight, bridges)) for mod in _get_core_peptide_extra_modifications(cp): dd.html('%s<br>Additional modifications: %s' % (dd.html(), mod)) _alt_weights = _get_alternative_weights(cp) if _alt_weights: inner_dl = pq('<dl>') inner_dt = pq('<dt>') inner_dt.text('Alternative weights') inner_dl.append(inner_dt) inner_dd = pq('<dd>') inner_dd.addClass('alt-weight-desc') inner_dd.text('(assuming N unmodified Ser/Thr residues)') inner_dl.append(inner_dd) i = 1 for weight in _alt_weights: inner_dd = pq('<dd>') weight_span = pq('<span>') weight_span.text('%0.1f Da' % weight) weight_span.addClass('alt-weight') n_span = pq('<span>') n_span.text('N = %d' % i) n_span.addClass('alt-weight-n') inner_dd.append(weight_span) inner_dd.append(n_span) inner_dl.append(inner_dd) i += 1 dd.append(inner_dl) details_list.append(dd) details.append(details_list) sidepanel.append(details) cross_refs = pq("<div>") refs_header = pq('<h3>') refs_header.text('Database cross-links') cross_refs.append(refs_header) links = pq("<div>") links.addClass('prediction-text') a = pq("<a>") a.attr('href', 'http://bioinfo.lifl.fr/norine/form2.jsp') a.attr('target', '_new') a.text("Look up in NORINE database") links.append(a) cross_refs.append(links) sidepanel.append(cross_refs) return sidepanel
def write_BGC(txt, info, options): "Write BGC table to TXT" #TXT columns: BGC ID, BGC_type, detection_rules_used, BGC_range, genes, subclusters, # NRPSs_PKSs, signature_genes, RiPPs, pred_structure, monomers txt.write("\t".join([ "BGC ID", "BGC type", "detection rules used", "BGC_range", "genes", "subclusters", "NRPSs/PKSs", "signature_genes", "RiPPs", "predicted structure", "monomers" ]) + "\n") for BGCnr in info.clusternrs: #Retrieve all data that will be written out BGC_ID = "%s_c%s" % (info.seq_record.id.partition(".")[0], BGCnr) cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr) cluster_gene_features = utils.get_cluster_cds_features( cluster_feature, info.seq_record) BGC_type = info.clustertypes[BGCnr].replace("-", ";") detection_rules_used = '"' + ";".join( get_detection_rules(cluster_feature)) + '"' BGC_range = ";".join([ str(cluster_feature.location.start), str(cluster_feature.location.end) ]) genes = ";".join(info.accessions[BGCnr]) if 'subclusterblast' in cluster_feature.qualifiers: subclusters = ";".join([ qual.partition("\t")[2] for qual in cluster_feature.qualifiers['subclusterblast'] ]) else: subclusters = "" #TODO The subclusterblast module should probably be changed for the precalcs to provide a list here of the 100% hits instead of all hits NRPSs_PKSs = ";".join([ utils.get_gene_acc(cds).partition(".")[0] for cds in cluster_gene_features if 'sec_met' in cds.qualifiers and len([ qual for qual in cds.qualifiers['sec_met'] if qual.startswith('NRPS/PKS Domain:') ]) > 0 ]) signature_genes = ";".join([ utils.get_gene_acc(cds).partition(".")[0] for cds in cluster_gene_features if 'sec_met' in cds.qualifiers ]) if len(_find_core_peptides(cluster_feature, info.seq_record)) != 0: ripp_list = [] for peptide in _find_core_peptides(cluster_feature, info.seq_record): for cds in cluster_gene_features: if utils.features_overlap(cds, peptide): ripp_list.append( utils.get_gene_acc(cds).partition(".")[0]) break # RiPPs = ";".join([[utils.get_gene_acc(cds).partition(".")[0] for cds in cluster_gene_features # if utils.features_overlap(cds, peptide)][0] for peptide in # _find_core_peptides(cluster_feature, info.seq_record)]) RiPPs = ";".join(ripp_list) else: RiPPs = "-" if 'structure' in cluster_feature.qualifiers: pred_structure = ";".join(cluster_feature.qualifiers['structure']) else: pred_structure = "N/A" monomers = utils.get_structure_pred(cluster_feature) #Write data to TXT txt.write("\t".join([ BGC_ID, BGC_type, detection_rules_used, BGC_range, genes, subclusters, NRPSs_PKSs, signature_genes, RiPPs, pred_structure, monomers ]) + "\n")