예제 #1
0
    def test_get_structure_pred(self):
        "Test utils.get_structure_pred()"
        cluster = FakeFeature('cluster', FeatureLocation(23, 42),
                              {'product': ['fake']})

        self.assertEqual('N/A', utils.get_structure_pred(cluster))

        cluster.qualifiers['product'][0] = 'ectoine'
        self.assertEqual('ectoine', utils.get_structure_pred(cluster))

        cluster.qualifiers['note'] = ['Monomers prediction: fake']
        self.assertEqual('fake', utils.get_structure_pred(cluster))
예제 #2
0
def generate_structure_images(seq_records, options):
    "Generate the structure images based on Monomers prediction in cluster feature"
    
    for seq_record in seq_records:
        # Ugly temporary solution:
        # At first we have to regenerate the relevant information for the pksnrpsvars dictionary from the seq_record file
        pksnrpsvars = utils.Storage()
        pksnrpsvars.compound_pred_dict = {}
        pksnrpsvars.failedstructures = []
        
        geneclusters = utils.get_cluster_features(seq_record)
        
        for genecluster in geneclusters:
            geneclusternr = utils.get_cluster_number(genecluster)
            pksnrpsvars.compound_pred_dict[geneclusternr] = utils.get_structure_pred(genecluster)
        if len(pksnrpsvars.compound_pred_dict) > 0:
            generate_chemical_structure_preds(pksnrpsvars, seq_record, options)
예제 #3
0
def retrieve_pksnrps_info(seq_record, geneclusternr, pksnrpsprots):
    pksnrpsprotsnames = [utils.get_gene_id(cds) for cds in utils.get_pksnrps_cds_features(seq_record)]
    domaindict = utils.get_nrpspks_domain_dict(seq_record)
    substr_spec_preds = utils.get_nrpspks_substr_spec_preds(seq_record)
    pksnrpsdomains = {}
    domsdetails = {}
    substrspecnrpspredictordict = {}
    substrspecminowadict = {}
    substrspecpkssigdict = {}
    substrspecconsensusdict = {}
    krpredictionsdict = {}
    for i in pksnrpsprots:
        domlist = []
        domsdetails = {}
        doms = domaindict[i]
        for j in doms:
            nr = 1
            while j[0] + str(nr) in domlist:
                nr += 1
            domname = j[0] + str(nr)
            domlist.append(domname)
            domsdetails[domname] = [j[1],j[2]]
            if "AMP-binding" in domname or "A-OX" in domname:
                domname2 = i + "_" + "A" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_nrps_preds[i + "_A" + str(nr)]
                substrspecnrpspredictordict[domname2] = [substr_spec_preds.nrps_code_preds[i + "_A" + str(nr)], substr_spec_preds.nrps_svm_preds[i + "_A" + str(nr)]]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_A" + str(nr)]
            if "PKS_AT" in domname:
                domname2 = i + "_" + "AT" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_pks_preds[i + "_AT" + str(nr)]
                substrspecpkssigdict[domname2] = substr_spec_preds.pks_code_preds[i + "_AT" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_AT" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "CAL_domain" in domname:
                domname2 = i + "_" + "CAL" + str(nr)
                substrspecminowadict[domname2] = substr_spec_preds.minowa_cal_preds[i + "_CAL" + str(nr)]
                substrspecconsensusdict[domname2] = substr_spec_preds.consensuspreds[i + "_CAL" + str(nr)]
            if "PKS_KR" in domname:
                domname2 = i + "_" + "KR" + str(nr)
                krpredictionsdict[domname2] = [substr_spec_preds.kr_activity_preds[i + "_KR" + str(nr)], substr_spec_preds.kr_stereo_preds[i + "_KR" + str(nr)]]
        pksnrpsdomains[i] = [domlist,domsdetails]
    structpred = utils.get_structure_pred(utils.get_cluster_by_nr(seq_record, geneclusternr))
    return pksnrpsprotsnames, pksnrpsdomains, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, krpredictionsdict, structpred
예제 #4
0
def write_BGC(txt, info, options):
    "Write BGC table to TXT"
    #TXT columns: BGC ID, BGC_type, detection_rules_used, BGC_range, genes, subclusters,
    # NRPSs_PKSs, signature_genes, RiPPs, pred_structure, monomers
    txt.write("\t".join([
        "BGC ID", "BGC type", "detection rules used", "BGC_range", "genes",
        "subclusters", "NRPSs/PKSs", "signature_genes", "RiPPs",
        "predicted structure", "monomers"
    ]) + "\n")
    for BGCnr in info.clusternrs:
        #Retrieve all data that will be written out
        BGC_ID = "%s_c%s" % (info.seq_record.id.partition(".")[0], BGCnr)
        cluster_feature = utils.get_cluster_by_nr(info.seq_record, BGCnr)
        cluster_gene_features = utils.get_cluster_cds_features(
            cluster_feature, info.seq_record)
        BGC_type = info.clustertypes[BGCnr].replace("-", ";")
        detection_rules_used = '"' + ";".join(
            get_detection_rules(cluster_feature)) + '"'
        BGC_range = ";".join([
            str(cluster_feature.location.start),
            str(cluster_feature.location.end)
        ])
        genes = ";".join(info.accessions[BGCnr])
        if 'subclusterblast' in cluster_feature.qualifiers:
            subclusters = ";".join([
                qual.partition("\t")[2]
                for qual in cluster_feature.qualifiers['subclusterblast']
            ])
        else:
            subclusters = ""
        #TODO The subclusterblast module should probably be changed for the precalcs to provide a list here of the 100% hits instead of all hits
        NRPSs_PKSs = ";".join([
            utils.get_gene_acc(cds).partition(".")[0]
            for cds in cluster_gene_features
            if 'sec_met' in cds.qualifiers and len([
                qual for qual in cds.qualifiers['sec_met']
                if qual.startswith('NRPS/PKS Domain:')
            ]) > 0
        ])
        signature_genes = ";".join([
            utils.get_gene_acc(cds).partition(".")[0]
            for cds in cluster_gene_features if 'sec_met' in cds.qualifiers
        ])
        if len(_find_core_peptides(cluster_feature, info.seq_record)) != 0:
            ripp_list = []
            for peptide in _find_core_peptides(cluster_feature,
                                               info.seq_record):
                for cds in cluster_gene_features:
                    if utils.features_overlap(cds, peptide):
                        ripp_list.append(
                            utils.get_gene_acc(cds).partition(".")[0])
                        break
#            RiPPs = ";".join([[utils.get_gene_acc(cds).partition(".")[0] for cds in cluster_gene_features
#                if utils.features_overlap(cds, peptide)][0] for peptide in
#               _find_core_peptides(cluster_feature, info.seq_record)])
            RiPPs = ";".join(ripp_list)
        else:
            RiPPs = "-"
        if 'structure' in cluster_feature.qualifiers:
            pred_structure = ";".join(cluster_feature.qualifiers['structure'])
        else:
            pred_structure = "N/A"
        monomers = utils.get_structure_pred(cluster_feature)
        #Write data to TXT
        txt.write("\t".join([
            BGC_ID, BGC_type, detection_rules_used, BGC_range, genes,
            subclusters, NRPSs_PKSs, signature_genes, RiPPs, pred_structure,
            monomers
        ]) + "\n")