def load_genecluster_info(seq_record, options, searchtype="general"): #Gather and store data on each gene cluster smcogdict, smcogdescriptions = utils.get_smcog_annotations(seq_record) gtrcoglist = ['SMCOG1045', 'SMCOG1062', 'SMCOG1102'] transportercoglist = [ 'SMCOG1000', 'SMCOG1005', 'SMCOG1011', 'SMCOG1020', 'SMCOG1029', 'SMCOG1033', 'SMCOG1035', 'SMCOG1044', 'SMCOG1065', 'SMCOG1067', 'SMCOG1069', 'SMCOG1074', 'SMCOG1085', 'SMCOG1096', 'SMCOG1106', 'SMCOG1118', 'SMCOG1131', 'SMCOG1166', 'SMCOG1169', 'SMCOG1184', 'SMCOG1202', 'SMCOG1205', 'SMCOG1214', 'SMCOG1234', 'SMCOG1243', 'SMCOG1245', 'SMCOG1252', 'SMCOG1254', 'SMCOG1288' ] seq_record.qgeneclusterdata = {} geneclusters = utils.get_sorted_cluster_features(seq_record) for genecluster in geneclusters: geneclusternr = utils.get_cluster_number(genecluster) clustergenes, clustertype, annotations, colors, starts, ends, strands, pksnrpsprots, gtrs, transporters, clustersize = retrieve_gene_cluster_annotations( seq_record, smcogdict, gtrcoglist, transportercoglist, geneclusternr) if options.clusterblast: hitgeneclusterdata = retrieve_clusterblast_info( seq_record, geneclusternr, searchtype=searchtype) else: hitgeneclusterdata = {} pksnrpsprotsnames, pksnrpsdomains, domlist, domsdetails, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, krpredictionsdict, structpred = retrieve_pksnrps_info( seq_record, geneclusternr, pksnrpsprots) seq_record.qgeneclusterdata[geneclusternr] = [ clustertype, clustersize, clustergenes, annotations, starts, ends, strands, pksnrpsprots, pksnrpsprotsnames, pksnrpsdomains, substrspecnrpspredictordict, substrspecminowadict, substrspecpkssigdict, substrspecconsensusdict, gtrs, transporters, colors, hitgeneclusterdata, structpred, krpredictionsdict ]
def generate_searchgtr_htmls(seq_records, options): #Generate lists of COGs that are glycosyltransferases or transporters gtrcoglist = ['SMCOG1045', 'SMCOG1062', 'SMCOG1102'] searchgtrformtemplateparts = load_searchgtr_search_form_template() options.searchgtr_links = {} for seq_record in seq_records: smcogdict, _ = utils.get_smcog_annotations(seq_record) for feature in utils.get_cds_features(seq_record): gene_id = utils.get_gene_id(feature) if smcogdict.has_key(gene_id): smcog = smcogdict[gene_id] if smcog in gtrcoglist: if not os.path.exists(options.full_outputfolder_path + os.sep + "html"): os.mkdir(options.full_outputfolder_path + os.sep + "html") formfileloc = options.full_outputfolder_path + os.sep + "html" + os.sep + utils.get_gene_id( feature) + "_searchgtr.html" link_loc = "html" + os.sep + utils.get_gene_id( feature) + "_searchgtr.html" options.searchgtr_links[seq_record.id + "_" + gene_id] = link_loc formfile = open(formfileloc, "w") specificformtemplate = searchgtrformtemplateparts[ 0].replace("GlycTr", gene_id) formfile.write(specificformtemplate) formfile.write("%s\n%s" % (gene_id, utils.get_aa_sequence(feature))) formfile.write(searchgtrformtemplateparts[1]) formfile.close()
def test_get_smcog_annotations(self): "Test utils.get_smcog_annotations()" expected_dict = { 'orf0001': 'SMCOG0001', 'orf0003': 'SMCOG0003', 'orf0006': 'SMCOG0006', } expected_desc = { 'SMCOG0001': 'FAKE0001 ', 'SMCOG0003': 'FAKE0003 ', 'SMCOG0006': 'FAKE0006 ', } smcogdict, smcog_desc = utils.get_smcog_annotations(self.rec) self.assertEqual(expected_dict, smcogdict) self.assertEqual(expected_desc, smcog_desc)