Ejemplo n.º 1
0
    def test_epicidin(self):
        "Test lantipeptide prediction for epicidin 280"
        rec = seqio.read(utils.get_full_path(__file__, 'epicidin_280.gbk'))
        self.assertEqual(21, len(rec.features))

        specific_analysis(rec, None)
        self.assertEqual(23, len(rec.features))
        prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1),
                                            rec)
        self.assertEqual(1, len(prepeptides))
        prepeptide = prepeptides[0]
        leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec)
        self.assertEqual(1, len(leaders))
        leader = leaders[0]
        self.assertAlmostEqual(3115.7, h._get_monoisotopic_mass(prepeptide))
        self.assertAlmostEqual(3117.7, h._get_molecular_weight(prepeptide))
        self.assertEqual([3135.7, 3153.7, 3171.7],
                         h._get_alternative_weights(prepeptide))
        self.assertEqual(3, h._get_number_bridges(prepeptide))
        self.assertEqual("MENKKDLFDLEIKKDNMENNNELEAQ",
                         h._get_leader_peptide_sequence(leader))
        self.assertEqual("SLGPAIKATRQVCPKATRFVTVSCKKSDCQ",
                         h._get_core_peptide_sequence(prepeptide))
        self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
        self.assertEqual(['Lac'],
                         h._get_core_peptide_extra_modifications(prepeptide))
Ejemplo n.º 2
0
    def test_nisin(self):
        "Test lantipeptide prediction for nisin A"
        rec = seqio.read(utils.get_full_path(__file__, 'nisin.gbk'))
        self.assertEqual(38, len(rec.features))

        specific_analysis(rec, None)
        self.assertEqual(40, len(rec.features))
        prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1),
                                            rec)
        self.assertEqual(1, len(prepeptides))
        prepeptide = prepeptides[0]
        leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec)
        self.assertEqual(1, len(leaders))
        leader = leaders[0]
        # real monoisotopic mass is 3351.51, but we overpredict a Dha
        self.assertAlmostEqual(3333.6, h._get_monoisotopic_mass(prepeptide))
        # real mw is 3354.5, see above
        self.assertAlmostEqual(3336.0, h._get_molecular_weight(prepeptide))
        self.assertEqual([3354.0, 3372.1, 3390.1, 3408.1],
                         h._get_alternative_weights(prepeptide))
        self.assertEqual(5, h._get_number_bridges(prepeptide))
        self.assertEqual("MSTKDFNLDLVSVSKKDSGASPR",
                         h._get_leader_peptide_sequence(leader))
        self.assertEqual("ITSISLCTPGCKTGALMGCNMKTATCHCSIHVSK",
                         h._get_core_peptide_sequence(prepeptide))
        self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
Ejemplo n.º 3
0
    def test_microbisporicin(self):
        "Test lantipeptide prediction for microbisporicin"
        rec = seqio.read(utils.get_full_path(__file__, 'microbisporicin.gbk'))
        self.assertEqual(56, len(rec.features))

        specific_analysis(rec, None)
        self.assertEqual(58, len(rec.features))
        prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1),
                                            rec)
        self.assertEqual(1, len(prepeptides))
        prepeptide = prepeptides[0]
        leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec)
        self.assertEqual(1, len(leaders))
        leader = leaders[0]
        # NOTE: this is not the correct weight for microbisporicin
        # there are some additional modifications we do not predict yet
        self.assertAlmostEqual(2212.9, h._get_monoisotopic_mass(prepeptide))
        self.assertAlmostEqual(2214.5, h._get_molecular_weight(prepeptide))
        self.assertEqual(4, h._get_number_bridges(prepeptide))
        self.assertEqual("MPADILETRTSETEDLLDLDLSIGVEEITAGPA",
                         h._get_leader_peptide_sequence(leader))
        self.assertEqual("VTSWSLCTPGCTSPGGGSNCSFCC",
                         h._get_core_peptide_sequence(prepeptide))
        self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
        self.assertEqual(['AviCys', 'Cl', 'OH'],
                         h._get_core_peptide_extra_modifications(prepeptide))
Ejemplo n.º 4
0
def retrieve_gene_cluster_annotations(seq_record, smcogdict, gtrcoglist,
                                      transportercoglist, geneclusternr):
    allcoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_secmet_cds_features(seq_record)
    ]
    pksnrpscoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_pksnrps_cds_features(seq_record)
    ]
    feature_by_id = utils.get_feature_dict(seq_record)
    clustergenes = [
        utils.get_gene_id(cds) for cds in utils.get_cluster_cds_features(
            utils.get_cluster_by_nr(seq_record, geneclusternr), seq_record)
    ]
    clustertype = utils.get_cluster_type(
        utils.get_cluster_by_nr(seq_record, geneclusternr))
    annotations = {}
    colors = []
    starts = []
    ends = []
    strands = []
    pksnrpsprots = []
    gtrs = []
    transporters = []
    for j in clustergenes:
        cdsfeature = feature_by_id[j]
        if cdsfeature.qualifiers.has_key('product'):
            annotations[j] = cdsfeature.qualifiers['product'][0]
        else:
            annotations[j] = 'Unannotated gene'
        starts.append(cdsfeature.location.start)
        ends.append(cdsfeature.location.end)
        if cdsfeature.strand == -1:
            strands.append("-")
        else:
            strands.append("+")
        if j in allcoregenes:
            colors.append("#810E15")
        else:
            colors.append("grey")
        if j in pksnrpscoregenes:
            pksnrpsprots.append(j)
        if smcogdict.has_key(j):
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in gtrcoglist:
                gtrs.append(j)
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in transportercoglist:
                transporters.append(j)
    clustersize = max(ends) - min(starts)
    return clustergenes, clustertype, annotations, colors, starts, ends, strands, pksnrpsprots, gtrs, transporters, clustersize
Ejemplo n.º 5
0
def write_gene(txt, info, options):
    "Write gene table to TXT"
    #TXT columns: gene ID, gene start, gene end, gene strand, smCOG, locus_tag/geneID, annotation
    txt.write("\t".join([
        "gene ID", "gene start", "gene end", "gene strand", "smCOG",
        "locus_tag", "annotation"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        for cds in cluster_gene_features:
            gene_id = utils.get_gene_acc(cds).partition(".")[0]
            cds_start = str(cds.location.start)
            cds_end = str(cds.location.end)
            if cds.strand == 1:
                cds_strand = "+"
            else:
                cds_strand = "-"
            smCOG = ""  ##Not used for now
            locus_tag = utils.get_gene_id(cds).partition(".")[0]
            annotation = utils.get_gene_annotation(cds)
            txt.write("\t".join([
                gene_id, cds_start, cds_end, cds_strand, smCOG, locus_tag,
                annotation
            ]) + "\n")
Ejemplo n.º 6
0
def generate_sidepanel(cluster, seq_record, options, sidepanel=None):
    logging.debug("generating sidepanel")
    """Generate sidepanel div"""
    result_list = None
    cluster = utils.get_cluster_by_nr(seq_record,
                                      cluster['idx'])  # use seqrecord.feature
    cluster_record = seq_record[cluster.location.start:cluster.location.end]
    result_list = gather_results(cluster_record)
    sidepanel = pq(
        '<div>')  #TODO add class and put it in the details div class
    sidepanel.addClass('sidepanel')
    if len(result_list) > 0:

        # write visualization script for sidepanel here
        #output_html = ""
        #for r in result_list:
        #    output_html += output.create_result_output(r)

        #sidepanel.html(output_html)
        id_list = []
        for result in result_list:

            if result.cds_id:
                id_list.append(result.cds_id)

            else:
                id_list.append("Region with unknown ID from %s to %s" %
                               (result.position[0], result.position[1]))

        sidepanel.html("%s Coding sequences with repeats found:<br> %s" %
                       (len(result_list), "<br>".join(id_list)))

    else:
        sidepanel.text("No repetition found")
Ejemplo n.º 7
0
def generate_details_div(cluster,
                         seq_record,
                         options,
                         js_domains,
                         details=None):
    logging.info("generating details div")
    """Generate details div"""
    cluster = utils.get_cluster_by_nr(seq_record,
                                      cluster['idx'])  # use seqrecord.feature
    details = pq('<div>')
    details.addClass('details')
    header = pq('<h3>')
    header.text('Repeatfinder output')
    cluster_record = seq_record[cluster.location.start:cluster.location.end]
    result_list = gather_results(cluster_record)
    sidepanel = pq('<div>')
    if len(result_list) > 0:
        # write visualization script for sidepanel here
        output_html = ""
        for r in result_list:
            output_html += output.write_result_summary(r)

        details.html(output_html)

    return details
Ejemplo n.º 8
0
def generate_details_div(cluster,
                         seq_record,
                         options,
                         js_domains,
                         details=None):
    """Generate details div"""

    cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx'])
    if cluster_rec is None:
        return details

    if details is None:
        details = pq('<div>')
        details.addClass('details')

        header = pq('<h3>')
        header.text('Detailed annotation')
        details.append(header)

    js_cluster_domains = {
        'id': "cluster-%s-details" % cluster['idx'],
        'orfs': []
    }
    features = utils.get_cluster_cds_features(cluster_rec, seq_record)
    for feature in features:
        if not 'sec_met' in feature.qualifiers:
            continue

        if 'translation' in feature.qualifiers:
            sequence = feature.qualifiers['translation'][0]
        else:
            sequence = str(utils.get_aa_sequence(feature))

        js_orf = {
            'id': utils.get_gene_id(feature),
            'sequence': sequence,
            'domains': [],
        }

        for qual in feature.qualifiers['sec_met']:
            if not qual.startswith('NRPS/PKS Domain:'):
                continue

            js_domain = _parse_domain(qual, feature, seq_record)
            if len(js_domain) > 0:
                js_orf['domains'].append(js_domain)

        if len(js_orf['domains']) > 0:
            js_cluster_domains['orfs'].append(js_orf)

    if len(js_cluster_domains['orfs']) > 0:
        details_svg = pq('<div>')
        details_svg.addClass('details-svg')
        details_svg.attr('id', '%s-svg' % js_cluster_domains['id'])
        details.append(details_svg)

        js_domains.append(js_cluster_domains)

    return details
Ejemplo n.º 9
0
def write_RiPP(txt, info, options):
    "Write RiPP table to TXT"
    #TXT columns: RiPP ID, annotation, core peptide, mol weight, monoisotopic_mass, alt mol weights, nr bridges
    txt.write("\t".join([
        "RiPP ID", "annotation", "core peptide", "molecular weight",
        "monoisotopic_mass", "alternative molecular weights",
        "number of bridges"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        RiPP_features = _find_core_peptides(cluster_feature, info.seq_record)
        RiPPs = []
        for peptide in RiPP_features:
            for cds in cluster_gene_features:
                if utils.features_overlap(cds, peptide):
                    RiPPs.append(utils.get_gene_acc(cds).partition(".")[0])
                    break
        idx = 0
        for RiPP in RiPP_features:
            RiPP_ID = RiPPs[idx]
            note_quals = RiPP.qualifiers['note']
            annotation = [
                qual.partition("predicted class: ")[2] for qual in note_quals
                if "predicted class:" in qual
            ][0]
            core_peptide = [
                qual.partition("predicted core seq: ")[2]
                for qual in note_quals if "predicted core seq:" in qual
            ][0]
            mol_weight = [
                qual.partition("molecular weight: ")[2] for qual in note_quals
                if "molecular weight: " in qual
            ][0]
            monoiso_mass = [
                qual.partition("monoisotopic mass: ")[2] for qual in note_quals
                if "monoisotopic mass: " in qual
            ][0]
            if "alternative weights" in note_quals:
                alt_mol_weights = [
                    qual.partition("alternative weights: ")[2].replace(
                        " ", "") for qual in note_quals
                    if "alternative weights:" in qual
                ][0]
            else:
                alt_mol_weights = ""
            nr_bridges = [
                qual.partition("number of bridges: ")[2] for qual in note_quals
                if "number of bridges: " in qual
            ][0]
            txt.write("\t".join([
                RiPP_ID, annotation, core_peptide, mol_weight, monoiso_mass,
                alt_mol_weights, nr_bridges
            ]) + "\n")
            idx += 1
Ejemplo n.º 10
0
    def test_sco_cluster3(self):
        "Test lantipeptide prediction for SCO cluster #3"
        rec = seqio.read(utils.get_full_path(__file__, 'sco_cluster3.gbk'))
        self.assertEqual(69, len(rec.features))

        specific_analysis(rec, None)
        self.assertEqual(71, len(rec.features))
        prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1),
                                            rec)
        self.assertEqual(1, len(prepeptides))
        prepeptide = prepeptides[0]
        self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
Ejemplo n.º 11
0
    def test_epidermin(self):
        "Test lantipeptide prediction for epidermin"
        rec = seqio.read(utils.get_full_path(__file__, 'epidermin.gbk'))
        self.assertEqual(18, len(rec.features))

        specific_analysis(rec, None)
        self.assertEqual(20, len(rec.features))
        prepeptides = h._find_core_peptides(utils.get_cluster_by_nr(rec, 1),
                                            rec)
        self.assertEqual(1, len(prepeptides))
        prepeptide = prepeptides[0]
        leaders = h._find_leader_peptides(utils.get_cluster_by_nr(rec, 1), rec)
        self.assertEqual(1, len(leaders))
        leader = leaders[0]
        self.assertAlmostEqual(2164, h._get_monoisotopic_mass(prepeptide))
        self.assertAlmostEqual(2165.6, h._get_molecular_weight(prepeptide))
        self.assertEqual(3, h._get_number_bridges(prepeptide))
        self.assertEqual("MEAVKEKNDLFNLDVKVNAKESNDSGAEPR",
                         h._get_leader_peptide_sequence(leader))
        self.assertEqual("IASKFICTPGCAKTGSFNSYCC",
                         h._get_core_peptide_sequence(prepeptide))
        self.assertEqual('Class I', h._get_core_peptide_class(prepeptide))
        self.assertEqual(['AviCys'],
                         h._get_core_peptide_extra_modifications(prepeptide))
Ejemplo n.º 12
0
def retrieve_pksnrps_info(seq_record, geneclusternr, pksnrpsprots):
    pksnrpsprotsnames = [utils.get_gene_id(cds) for cds in utils.get_pksnrps_cds_features(seq_record)]
    domaindict = utils.get_nrpspks_domain_dict(seq_record)
    substr_spec_preds = utils.get_nrpspks_substr_spec_preds(seq_record)
    pksnrpsdomains = {}
    domsdetails = {}
    substrspecnrpspredictordict = {}
    substrspecminowadict = {}
    substrspecpkssigdict = {}
    substrspecconsensusdict = {}
    krpredictionsdict = {}
    for i in pksnrpsprots:
        domlist = []
        domsdetails = {}
        doms = domaindict[i]
        for j in doms:
            nr = 1
            while j[0] + str(nr) in domlist:
                nr += 1
            domname = j[0] + str(nr)
            domlist.append(domname)
            domsdetails[domname] = [j[1],j[2]]
            if "AMP-binding" in domname or "A-OX" in domname:
                domname2 = i + "_" + "A" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_nrps_preds[i + "_A" + str(nr)]
                substrspecnrpspredictordict[domname2] = [substr_spec_preds.nrps_code_preds[i + "_A" + str(nr)], substr_spec_preds.nrps_svm_preds[i + "_A" + str(nr)]]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_A" + str(nr)]
            if "PKS_AT" in domname:
                domname2 = i + "_" + "AT" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_pks_preds[i + "_AT" + str(nr)]
                substrspecpkssigdict[domname2] = substr_spec_preds.pks_code_preds[i + "_AT" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_AT" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "PKS_KR" in domname:
                domname2 = i + "_" + "KR" + str(nr)
                krpredictionsdict[domname2] = [substr_spec_preds.kr_activity_preds[i + "_KR" + str(nr)], substr_spec_preds.kr_stereo_preds[i + "_KR" + str(nr)]]
        pksnrpsdomains[i] = [domlist,domsdetails]
    structpred = utils.get_structure_pred(utils.get_cluster_by_nr(seq_record, geneclusternr))
    return pksnrpsprotsnames, pksnrpsdomains, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, krpredictionsdict, structpred
Ejemplo n.º 13
0
def write_signature_gene_info(txt, info, options):
    "Write signature gene table to TXT"
    #TXT columns: signature_gene, pHMM_hit, e-value, bit score, nr of seeds
    txt.write("\t".join([
        "signature gene", "pHMM hits", "e-value", "bit score",
        "number of seeds"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        signature_genes = [
            cds for cds in cluster_gene_features if 'sec_met' in cds.qualifiers
        ]
        for cds in signature_genes:
            if len([
                    qual for qual in cds.qualifiers['sec_met']
                    if qual.startswith('Domains detected: ')
            ]) == 0:
                continue
            gene_ID = utils.get_gene_acc(cds).partition(".")[0]
            domdetect_qual = [
                qual for qual in cds.qualifiers['sec_met']
                if qual.startswith('Domains detected: ')
            ][0]
            if ";" in domdetect_qual:
                domains = domdetect_qual.partition(
                    "Domains detected: ")[2].split(";")
            else:
                domains = [domdetect_qual.partition("Domains detected: ")[2]]
            for domain in domains:
                domain_name = domain.partition(" (")[0].replace(" ", "")
                evalue = domain.partition("E-value: ")[2].partition(",")[0]
                bitscore = domain.partition("bitscore: ")[2].partition(",")[0]
                nr_seeds = domain.partition("seeds: ")[2].partition(")")[0]
                txt.write("\t".join(
                    [gene_ID, domain_name, evalue, bitscore, nr_seeds]) + "\n")
Ejemplo n.º 14
0
def generate_sidepanel(cluster, seq_record, options, sidepanel=None):
    """Generate sidepanel div"""
    cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx'])
    if cluster_rec is None:
        return sidepanel

    if sidepanel is None:
        sidepanel = pq('<div>')
        sidepanel.addClass('sidepanel')

    structure = pq('<div>')
    structure.addClass('structure')
    structure_header = pq('<h3>')
    structure_header.text('Predicted core structure')
    structure.append(structure_header)
    a = pq('<a>')
    a.attr('href',
           _get_structure_image_url(cluster_rec, options.outputfoldername))
    a.attr('target', '_new')
    structure.append(a)
    structure_img = pq('<img>')
    structure_img.attr(
        'src', _get_structure_image_url(cluster_rec, options.outputfoldername))
    a.append(structure_img)
    warning = pq('<div>')
    warning.addClass('as-structure-warning')
    if not 'docking' in options:
        options.docking = {}
    if cluster['idx'] in options.docking and options.docking[cluster['idx']]:
        warning.text('Rough prediction of core scaffold based on assumed '
                     'PKS linker matching; tailoring reactions not taken '
                     'into account')
    else:
        warning.text('Rough prediction of core scaffold based on assumed '
                     'PKS/NRPS colinearity; tailoring reactions not taken '
                     'into account')
    structure.append(warning)
    sidepanel.append(structure)

    details = pq('<div>')
    details.addClass('more-details')
    details_header = pq('<h3>')
    details_header.text('Prediction details')
    details.append(details_header)
    details_list = pq('<dl>')
    details_list.addClass('prediction-text')

    details.append(details_list)
    sidepanel.append(details)
    dt = pq('<dt>')
    dt.text('Monomers prediction:')
    details_list.append(dt)
    dd = pq('<dd>')
    dd.text(_get_monomer_prediction(cluster_rec))
    details_list.append(dd)

    features = utils.get_cluster_cds_features(cluster_rec, seq_record)
    for feature in features:
        if not 'sec_met' in feature.qualifiers:
            continue

        header_printed = False
        per_CDS_predictions = []
        for qual in feature.qualifiers['sec_met']:
            if not qual.startswith('NRPS/PKS Domain:'):
                continue
            # logging.debug("qual: %s" % qual)
            preds = _parse_substrate_predictions(qual)

            per_Adomain_predictions = []
            for key, val in preds:

                if not header_printed:
                    dt = pq('<dt>')
                    dt.text(utils.get_gene_id(feature))
                    details_list.append(dt)
                    header_printed = True
                dd = pq('<dd>')
                dd.html('%s: %s<br>' % (key, val))
                details_list.append(dd)
                if qual.startswith("NRPS/PKS Domain: AMP-binding"):
                    values = _filter_norine_as(val.split(","))
                    if len(values) > 0:
                        per_Adomain_predictions.extend(val.split(","))

            if len(preds) > 0:
                if qual.startswith("NRPS/PKS Domain: AMP-binding"):
                    per_Adomains_predictions_unique = list(
                        set(per_Adomain_predictions))
                    per_CDS_predictions.append(per_Adomains_predictions_unique)
                # logging.debug("substrate prediction list: %s" % ",".join(per_Adomains_predictions_unique) )
                dd = pq('<dd>')
                dd.append(pq('<br>'))
                details_list.append(dd)

        if len(per_CDS_predictions) > 0:
            url = _get_norine_url_for_specArray(per_CDS_predictions)
            if url:
                dd = pq('<dd>')
                dd.append("Search NORINE for peptide in ")
                a = pq('<a>')
                a.attr('href', url)
                a.attr('target', '_new')
                a.text("strict mode")
                dd.append(a)
                dd.append(" // ")
                url = _get_norine_url_for_specArray(per_CDS_predictions,
                                                    be_strict=False)
                a = pq('<a>')
                a.attr('href', url)
                a.attr('target', '_new')
                a.text("relaxed mode")
                dd.append(a)
                dd.append(pq('<br>'))
                dd.append(pq('<br>'))
                details_list.append(dd)

    if cluster['type'].find('nrps') > -1:
        cross_refs = pq("<div>")
        refs_header = pq('<h3>')
        refs_header.text('Database cross-links')
        cross_refs.append(refs_header)
        links = pq("<div>")
        links.addClass('prediction-text')

        a = pq("<a>")
        a.attr('href', 'http://bioinfo.lifl.fr/norine/form2.jsp')
        a.attr('target', '_new')
        a.text("Link to NORINE database query form")
        links.append(a)
        links.append("<br>")

        a = pq("<a>")
        url = _get_norine_url_for_cluster(cluster_rec)
        logging.debug("NORINE URL string: %s" % url)
        a.attr('href', url)
        a.attr('target', '_new')
        a.text("strict mode")
        links.append("Direct lookup in NORINE database in ")
        links.append(a)
        links.append(" // ")
        url = _get_norine_url_for_cluster(cluster_rec, be_strict=False)
        a = pq("<a>")
        a.attr('href', url)
        a.attr('target', '_new')
        a.text("relaxed mode")
        links.append(a)
        cross_refs.append(links)
        sidepanel.append(cross_refs)

    return sidepanel
Ejemplo n.º 15
0
def add_cluster_page(d, cluster, seq_record, options, extra_data, seq_id):

    handlers = find_plugins_for_cluster(options.plugins, cluster)

    cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx'])

    rules = get_detection_rules(cluster_rec)

    page = pq('<div>')
    page.addClass('page')
    page.attr('id', 'cluster-%s' % cluster['idx'])
    header = pq('<h3>')
    header.text(
        '%s - Cluster %s - %s' %
        (seq_record.name, cluster['idx'], cluster['type'].capitalize()))
    page.append(header)

    sidepanel = None
    for handler in handlers:
        sidepanel = handler.generate_sidepanel(cluster, seq_record, options,
                                               sidepanel)

    if sidepanel is not None:
        page.append(sidepanel)

    content = pq('<div>')
    content.addClass('content')

    description = pq('<div>')
    description.addClass('description-container')
    desc_header = pq('<h3>')
    desc_header.text('Gene cluster description')
    description.append(desc_header)

    cluster_download = pq('<div>')
    cluster_download.addClass('cluster-download')
    description.append(cluster_download)
    dl_link = pq('<a>')
    dl_link.attr('href', '%s.cluster%03d.gbk' % (seq_id, cluster['idx']))
    dl_link.text('Download cluster GenBank file')
    cluster_download.append(dl_link)

    desc_text = pq('<div>')
    desc_text.addClass('description-text')
    if options.input_type == 'nucl':
        text = seq_record.name + ' - Gene Cluster %(idx)s. Type = %(type)s. Location: %(start)s - %(end)s nt. '
    else:
        text = seq_record.name + '- Gene Cluster %(idx)s. Type = %(type)s. '
    if 'probability' in cluster:
        text += 'ClusterFinder probability: %(probability)s. '
    text += 'Click on genes for more information.'
    desc_text.text(text % cluster)
    description.append(desc_text)
    rules_header = pq('<a>')
    rules_header.addClass('cluster-rules-header')
    rules_header.attr('id', 'cluster-%s-rules-header' % cluster['idx'])
    rules_header.attr('href', '#cluster-%s' % cluster['idx'])
    rules_header.text('Show pHMM detection rules used')
    description.append(rules_header)

    detection_rules = pq('<div>')
    detection_rules.addClass('cluster-rules')
    detection_rules.attr('id', 'cluster-%s-rules' % cluster['idx'])
    detection_rules.html('<br>'.join(rules))
    description.append(detection_rules)

    desc_svg = pq('<div>')
    desc_svg.attr('id', 'cluster-%s-svg' % cluster['idx'])
    description.append(desc_svg)

    content.append(description)

    if options.input_type == 'nucl':
        legend = pq('<div>')
        legend.addClass('legend')
        legend_header = pq('<h4>')
        legend_header.text('Legend:')
        legend.append(legend_header)
        legend_text = pq('<div>')
        if not options.smcogs:
            legend_text.append("Only available when smCOG analysis was run")
        legend_text.append(
            generate_legend_entry('legend-type-biosynthetic',
                                  'core biosynthetic genes'))
        legend_text.append(
            generate_legend_entry('legend-type-biosynthetic-additional',
                                  'additional biosynthetic genes'))
        legend_text.append(
            generate_legend_entry('legend-type-transport',
                                  'transport-related genes'))
        legend_text.append(
            generate_legend_entry('legend-type-regulatory',
                                  'regulatory genes'))
        legend_text.append(
            generate_legend_entry('legend-type-other', 'other genes'))
        if options.tta:
            legend_text.append(
                generate_legend_entry('legend-tta-codon', 'TTA codon'))
        if options.cassis:
            legend_text.append(
                generate_legend_entry('legend-border-cassis',
                                      'cluster extent as predicted by CASSIS'))
        if options.borderpredict:
            legend_text.append(
                generate_legend_entry(
                    'legend-border-clusterfinder',
                    'cluster extent as predicted by ClusterFinder'))
        legend.append(legend_text)
        content.append(legend)

    details = None
    da = None
    for handler in handlers:
        if "generate_details_div" in dir(handler):
            details = handler.generate_details_div(cluster, seq_record,
                                                   options,
                                                   extra_data['js_domains'],
                                                   details)
        if 'generate_domain_alignment_div' in dir(
                handler) and options.transatpks_da:
            da = handler.generate_domain_alignment_div(cluster, seq_record,
                                                       options, da)

    if details is not None:
        content.append(details)

    if da is not None:
        content.append(da)

    if options.clusterblast:
        top_ten_clusters = cluster_rec.qualifiers.get('clusterblast', [])

        cb = pq('<div>')
        cb.addClass('clusterblast')
        cb_header = pq('<h3>')
        cb_header.text("Homologous gene clusters")
        cb.append(cb_header)
        cb_control = pq('<div>')
        cb.append(cb_control)
        if len(top_ten_clusters) == 0:
            cb_download = pq('No significant ClusterBlast hits found.')
            cb_control.append(cb_download)
        else:
            cb_select = pq('<select>')
            cb_select.attr('id', 'clusterblast-%s-select' % cluster['idx'])
            cb_select.addClass('clusterblast-selector')
            cb_control.append(cb_select)
            opt = pq('<option>')
            opt.attr(
                'value',
                path.join('svg', 'clusterblast%s_all.svg' % cluster['idx']))
            opt.text('All hits')
            cb_select.append(opt)
            for i in range(1, options.nclusters + 1):
                svg_file = path.join(
                    'svg', 'clusterblast%s_%s.svg' % (cluster['idx'], i))
                full_path = path.join(options.outputfoldername, svg_file)
                if path.exists(full_path):
                    opt = pq('<option>')
                    opt.attr('value', svg_file)
                    opt_text = 'Cluster %s hit %s' % (cluster['idx'], i)
                    if len(top_ten_clusters) >= i:
                        opt_text = top_ten_clusters[i - 1].split('\t')[1]
                    opt.text(opt_text)
                    cb_select.append(opt)
                else:
                    logging.debug("failed to find %r" % full_path)
            cb_download = pq('<button>')
            cb_download.attr('id', 'clusterblast-%s-download' % cluster['idx'])
            cb_download.text('Download graphic')
            cb_control.append(cb_download)

        cb_svg = pq('<div>')
        cb_svg.attr('id', 'clusterblast-%s-svg' % cluster['idx'])
        cb.append(cb_svg)
        content.append(cb)

    if options.subclusterblast:
        top_ten_clusters = cluster_rec.qualifiers.get('subclusterblast', [])

        cb = pq('<div>')
        cb.addClass('subclusterblast')
        cb_header = pq('<h3>')
        cb_header.text("Homologous subclusters")
        cb.append(cb_header)
        cb_control = pq('<div>')
        cb.append(cb_control)
        cb_select = pq('<select>')
        cb_select.attr('id', 'subclusterblast-%s-select' % cluster['idx'])
        cb_select.addClass('clusterblast-selector')
        cb_control.append(cb_select)
        opt = pq('<option>')
        opt.attr(
            'value',
            path.join('svg', 'subclusterblast%s_all.svg' % cluster['idx']))
        opt.text('All hits')
        cb_select.append(opt)
        subclusters_added = 0
        for i in range(1, options.nclusters + 1):
            svg_file = path.join(
                'svg', 'subclusterblast%s_%s.svg' % (cluster['idx'], i))
            full_path = path.join(options.outputfoldername, svg_file)
            if path.exists(full_path):
                opt = pq('<option>')
                opt.attr('value', svg_file)
                opt_text = 'Cluster %s hit %s' % (cluster['idx'], i)
                if len(top_ten_clusters) >= i:
                    opt_text = top_ten_clusters[i - 1].split('\t')[1].replace(
                        '_', ' ')
                opt.text(opt_text)
                cb_select.append(opt)
                subclusters_added += 1
            else:
                logging.debug("failed to find %r" % full_path)

        cb_svg = pq('<div>')
        cb_svg.attr('id', 'subclusterblast-%s-svg' % cluster['idx'])
        cb.append(cb_svg)
        if path.exists(path.join(options.outputfoldername, 'svg',
                       'subclusterblast%s_all.svg' % cluster['idx'])) and \
           subclusters_added > 0:
            cb_download = pq('<button>')
            cb_download.attr('id',
                             'subclusterblast-%s-download' % cluster['idx'])
            cb_download.text('Download graphic')
            cb_control.append(cb_download)
            content.append(cb)

    if options.knownclusterblast:
        top_ten_clusters = cluster_rec.qualifiers.get('knownclusterblast', [])

        cb = pq('<div>')
        cb.addClass('knownclusterblast')
        cb_header = pq('<h3>')
        cb_header.text("Homologous known gene clusters")
        cb.append(cb_header)
        cb_control = pq('<div>')
        cb.append(cb_control)
        cb_select = pq('<select>')
        cb_select.attr('id', 'knownclusterblast-%s-select' % cluster['idx'])
        cb_select.addClass('clusterblast-selector')
        cb_control.append(cb_select)
        opt = pq('<option>')
        opt.attr(
            'value',
            path.join('svg', 'knownclusterblast%s_all.svg' % cluster['idx']))
        opt.text('All hits')
        cb_select.append(opt)
        knownclusters_added = 0
        for i in range(1, options.nclusters + 1):
            svg_file = path.join(
                'svg', 'knownclusterblast%s_%s.svg' % (cluster['idx'], i))
            full_path = path.join(options.outputfoldername, svg_file)
            if path.exists(full_path):
                opt = pq('<option>')
                opt.attr('value', svg_file)
                opt_text = 'Cluster %s hit %s' % (cluster['idx'], i)
                if len(top_ten_clusters) >= i:
                    opt_text = top_ten_clusters[i - 1].split('\t')[1].replace(
                        '_', ' ')
                opt.text(opt_text)
                cb_select.append(opt)
                knownclusters_added += 1
            else:
                logging.debug("failed to find %r" % full_path)

        cb_svg = pq('<div>')
        cb_svg.attr('id', 'knownclusterblast-%s-svg' % cluster['idx'])
        cb.append(cb_svg)
        if path.exists(path.join(options.outputfoldername, 'svg',
                       'knownclusterblast%s_all.svg' % cluster['idx'])) and \
           knownclusters_added > 0:
            cb_download = pq('<button>')
            cb_download.attr('id',
                             'knownclusterblast-%s-download' % cluster['idx'])
            cb_download.text('Download graphic')
            cb_control.append(cb_download)
            content.append(cb)

    page.append(content)
    d('.page:last').after(page)
Ejemplo n.º 16
0
def write_NRPS_PKS(txt, info, options):
    "Write NRPS/PKS table to TXT"
    #TXT columns: NRPS/PKS ID, annotation, aSDomain, score, evalue, domain type, subtype, range, activity, NRPSPredictor2, Stachelhaus, Minowa, pkssignature, consensus
    txt.write("\t".join([
        "Cluster_ID", "NRPSPKS_ID", "annotation", "aSDomain", "score",
        "evalue", "domain_type", "subtype", "domain_start", "domain_end",
        "KR activity", "KR stereochemistry", "NRPSPredictor2", "Stachelhaus",
        "Minowa", "pkssignature", "consensus"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        cluster_id = "{seq_id}_c{cluster_nr}".format(seq_id=info.seq_record.id,
                                                     cluster_nr=BGCnr)
        NRPSs_PKSs = [
            cds for cds in cluster_gene_features
            if 'sec_met' in cds.qualifiers and len([
                qual for qual in cds.qualifiers['sec_met']
                if qual.startswith('NRPS/PKS Domain:')
            ]) > 0
        ]
        for cds in NRPSs_PKSs:
            enzyme_ID = utils.get_gene_acc(cds).partition(".")[0]
            if len([
                    qual for qual in cds.qualifiers['sec_met']
                    if "NRPS/PKS subtype: " in qual
            ]) > 0:
                enzyme_annotation = [
                    qual for qual in cds.qualifiers['sec_met']
                    if qual.startswith("NRPS/PKS subtype")
                ][0].partition("NRPS/PKS subtype: ")[2]
            else:
                logging.warn("No enzyme annotation for %s" % enzyme_ID)
                enzyme_annotation = ""
            aSDomains = [
                dom for dom in utils.get_cluster_aSDomain_features(
                    cluster_feature, info.seq_record) if
                utils.features_overlap(cds, dom) and utils.get_gene_id(cds) in
                [dom.qualifiers['locus_tag'], dom.qualifiers['locus_tag'][0]]
            ]
            for aSDomain in aSDomains:
                domtype = aSDomain.qualifiers['domain'][0]
                if "domain_subtype" in aSDomain.qualifiers:
                    subtype = aSDomain.qualifiers['domain_subtype'][0]
                else:
                    subtype = ""
                aSDomain_ID = aSDomain.qualifiers['asDomain_id'][0]
                score = str(aSDomain.qualifiers['score'][0])
                evalue = str(aSDomain.qualifiers['evalue'][0])
                dom_start = str(aSDomain.location.start)
                dom_end = str(aSDomain.location.end)
                kr_activity = ""
                kr_stereochemistry = ""
                NRPSPredictor2 = ""
                Stachelhaus = ""
                Minowa = ""
                pkssignature = ""
                consensus = ""
                if aSDomain.qualifiers.has_key('specificity'):
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("KR activity")
                    ]) > 0:
                        kr_activity = [
                            qual.partition("KR activity: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("KR activity")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("KR stereochemistry")
                    ]) > 0:
                        kr_stereochemistry = [
                            qual.partition("KR stereochemistry: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("KR stereochemistry")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("NRPSpredictor2")
                    ]) > 0:
                        NRPSPredictor2 = [
                            qual.partition("NRPSpredictor2 SVM: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("NRPSpredictor2")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("Stachelhaus")
                    ]) > 0:
                        Stachelhaus = [
                            qual.partition("Stachelhaus code: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("Stachelhaus")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("Minowa")
                    ]) > 0:
                        Minowa = [
                            qual.partition("Minowa: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("Minowa")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("PKS signature")
                    ]) > 0:
                        pkssignature = [
                            qual.partition("PKS signature: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("PKS signature")
                        ][0]
                    if len([
                            qual for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("consensus")
                    ]) > 0:
                        consensus = [
                            qual.partition("consensus: ")[2]
                            for qual in aSDomain.qualifiers['specificity']
                            if qual.startswith("consensus")
                        ][0]

                txt.write("\t".join([
                    cluster_id, enzyme_ID, enzyme_annotation, aSDomain_ID,
                    score, evalue, domtype, subtype, dom_start, dom_end,
                    kr_activity, kr_stereochemistry, NRPSPredictor2,
                    Stachelhaus, Minowa, pkssignature, consensus
                ]) + "\n")
Ejemplo n.º 17
0
def generate_details_div(cluster,
                         seq_record,
                         options,
                         js_domains,
                         details=None):
    """Generate details div"""

    cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx'])
    if cluster_rec is None:
        return details

    leader_peptides = _find_leader_peptides(cluster_rec, seq_record)
    core_peptides = _find_core_peptides(cluster_rec, seq_record)

    if details is None:
        details = pq('<div>')
        details.addClass('details')

        header = pq('<h3>')
        header.text('Detailed annotation')
        details.append(header)

    if len(core_peptides) == 0:
        details_text = pq('<div>')
        details_text.addClass('details-text')
        details_text.text('No core peptides found.')
        details.append(details_text)
        return details

    details_text = pq('<dl>')
    details_text.addClass('details-text')

    i = 0
    for cp in core_peptides:
        leader = leader_peptides[i]
        leader_seq = _get_leader_peptide_sequence(leader)
        core_seq = _get_core_peptide_sequence(cp)
        dt = pq('<dt>')
        dt.text('%s leader / core peptide, putative %s' %
                (utils.get_gene_id(cp), _get_core_peptide_class(cp)))
        details_text.append(dt)

        dd = pq('<dd>')
        core_seq = core_seq.replace('S', '<span class="dha">Dha</span>')
        core_seq = core_seq.replace('T', '<span class="dhb">Dhb</span>')
        core_seq = core_seq.replace('C', '<span class="cys">C</span>')
        seq = "%s - %s" % (leader_seq, core_seq)
        dd.html(seq)
        details_text.append(dd)
        i += 1

    details.append(details_text)

    legend = pq('<div>')
    legend.addClass('legend')
    legend_header = pq('<h4>')
    legend_header.text('Legend:')
    legend.append(legend_header)

    legend_text = pq('<div>')
    legend_text.html('<span class="dha">Dha</span>: Didehydroalanine<br>'
                     '<span class="dhb">Dhb</span>: Didehydrobutyrine')
    legend.append(legend_text)
    details.append(legend)

    return details
Ejemplo n.º 18
0
def generate_sidepanel(cluster, seq_record, options, sidepanel=None):
    """Generate sidepanel div"""
    cluster_rec = utils.get_cluster_by_nr(seq_record, cluster['idx'])
    if cluster_rec is None:
        return sidepanel

    if sidepanel is None:
        sidepanel = pq('<div>')
        sidepanel.addClass('sidepanel')

    core_peptides = _find_core_peptides(cluster_rec, seq_record)
    if len(core_peptides) == 0:
        return sidepanel

    details = pq('<div>')
    details.addClass('more-details')
    details_header = pq('<h3>')
    details_header.text('Prediction details')
    details.append(details_header)
    details_list = pq('<dl>')
    details_list.addClass('prediction-text')

    for cp in core_peptides:
        dt = pq('<dt>')
        dt.text(utils.get_gene_id(cp))
        details_list.append(dt)
        dd = pq('<dd>')
        mass = _get_monoisotopic_mass(cp)
        mol_weight = _get_molecular_weight(cp)
        bridges = _get_number_bridges(cp)
        pred_class = _get_core_peptide_class(cp)
        score = _get_core_peptide_score(cp)
        dd.html('Putative %s<br>Score: %0.2f<br>Monoisotopic mass: %s Da<br>'\
                'Molecular weight: %s Da<br>Number of bridges: %s' %\
                (pred_class, score, mass, mol_weight, bridges))
        for mod in _get_core_peptide_extra_modifications(cp):
            dd.html('%s<br>Additional modifications: %s' % (dd.html(), mod))
        _alt_weights = _get_alternative_weights(cp)
        if _alt_weights:
            inner_dl = pq('<dl>')
            inner_dt = pq('<dt>')
            inner_dt.text('Alternative weights')
            inner_dl.append(inner_dt)
            inner_dd = pq('<dd>')
            inner_dd.addClass('alt-weight-desc')
            inner_dd.text('(assuming N unmodified Ser/Thr residues)')
            inner_dl.append(inner_dd)
            i = 1
            for weight in _alt_weights:
                inner_dd = pq('<dd>')
                weight_span = pq('<span>')
                weight_span.text('%0.1f Da' % weight)
                weight_span.addClass('alt-weight')
                n_span = pq('<span>')
                n_span.text('N = %d' % i)
                n_span.addClass('alt-weight-n')
                inner_dd.append(weight_span)
                inner_dd.append(n_span)
                inner_dl.append(inner_dd)
                i += 1
            dd.append(inner_dl)
        details_list.append(dd)

    details.append(details_list)
    sidepanel.append(details)

    cross_refs = pq("<div>")
    refs_header = pq('<h3>')
    refs_header.text('Database cross-links')
    cross_refs.append(refs_header)
    links = pq("<div>")
    links.addClass('prediction-text')

    a = pq("<a>")
    a.attr('href', 'http://bioinfo.lifl.fr/norine/form2.jsp')
    a.attr('target', '_new')
    a.text("Look up in NORINE database")
    links.append(a)
    cross_refs.append(links)
    sidepanel.append(cross_refs)

    return sidepanel
Ejemplo n.º 19
0
def write_BGC(txt, info, options):
    "Write BGC table to TXT"
    #TXT columns: BGC ID, BGC_type, detection_rules_used, BGC_range, genes, subclusters,
    # NRPSs_PKSs, signature_genes, RiPPs, pred_structure, monomers
    txt.write("\t".join([
        "BGC ID", "BGC type", "detection rules used", "BGC_range", "genes",
        "subclusters", "NRPSs/PKSs", "signature_genes", "RiPPs",
        "predicted structure", "monomers"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        BGC_ID = "%s_c%s" % (info.seq_record.id.partition(".")[0], BGCnr)
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        BGC_type = info.clustertypes[BGCnr].replace("-", ";")
        detection_rules_used = '"' + ";".join(
            get_detection_rules(cluster_feature)) + '"'
        BGC_range = ";".join([
            str(cluster_feature.location.start),
            str(cluster_feature.location.end)
        ])
        genes = ";".join(info.accessions[BGCnr])
        if 'subclusterblast' in cluster_feature.qualifiers:
            subclusters = ";".join([
                qual.partition("\t")[2]
                for qual in cluster_feature.qualifiers['subclusterblast']
            ])
        else:
            subclusters = ""
        #TODO The subclusterblast module should probably be changed for the precalcs to provide a list here of the 100% hits instead of all hits
        NRPSs_PKSs = ";".join([
            utils.get_gene_acc(cds).partition(".")[0]
            for cds in cluster_gene_features
            if 'sec_met' in cds.qualifiers and len([
                qual for qual in cds.qualifiers['sec_met']
                if qual.startswith('NRPS/PKS Domain:')
            ]) > 0
        ])
        signature_genes = ";".join([
            utils.get_gene_acc(cds).partition(".")[0]
            for cds in cluster_gene_features if 'sec_met' in cds.qualifiers
        ])
        if len(_find_core_peptides(cluster_feature, info.seq_record)) != 0:
            ripp_list = []
            for peptide in _find_core_peptides(cluster_feature,
                                               info.seq_record):
                for cds in cluster_gene_features:
                    if utils.features_overlap(cds, peptide):
                        ripp_list.append(
                            utils.get_gene_acc(cds).partition(".")[0])
                        break
#            RiPPs = ";".join([[utils.get_gene_acc(cds).partition(".")[0] for cds in cluster_gene_features
#                if utils.features_overlap(cds, peptide)][0] for peptide in
#               _find_core_peptides(cluster_feature, info.seq_record)])
            RiPPs = ";".join(ripp_list)
        else:
            RiPPs = "-"
        if 'structure' in cluster_feature.qualifiers:
            pred_structure = ";".join(cluster_feature.qualifiers['structure'])
        else:
            pred_structure = "N/A"
        monomers = utils.get_structure_pred(cluster_feature)
        #Write data to TXT
        txt.write("\t".join([
            BGC_ID, BGC_type, detection_rules_used, BGC_range, genes,
            subclusters, NRPSs_PKSs, signature_genes, RiPPs, pred_structure,
            monomers
        ]) + "\n")