def mm9_8(): dm.extractFlanks("mm9_refseq_genes", "mm9_support.8.refseq_genes_plus10k", False,"whole_region",10000,10000) dm.deriveNewDataset("mm9.8.NonGenic_CGI.cgiHunter_oe0.6_gc50_len200_geneDistance10000.region", "mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", ["mm9_support.8.refseq_genes_plus10k"], [], None, None, True)
def hg18_3(): dm.extractFlanks("hg18_refseq_genes", "hg18_support.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS_flancks", True,"start",10000,2000) dm.deriveNewDataset("hg18.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS.region", "hg18_support.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS_flancks", [], [], "max", None, True, True)
def hg18_2(): dm.extractFlanks("hg18_refseq_genes", "hg18_support.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS_flancks", True,"start",1000,1000) dm.deriveNewDataset("hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", "hg18_support.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS_flancks", [], [], None, None, True, True)
def mm9_1(): dm.extractFlanks("mm9_ensembl_genes", "mm9_support.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS_flancks", True,"start",5000,1000) dm.deriveNewDataset("mm9.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS.region", "mm9_support.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS_flancks", [], [], None, None, True, True)
def mm9_15(): dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.15.CGI700_downstream", False,"start",2000,0) dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.15.CGI700_upstream", False,"end",0,2000) dm.mergeDatasets("mm9_support.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi", ["mm9_support.15.CGI700_downstream", "mm9_support.15.CGI700_upstream"]) dm.deriveNewDataset("mm9.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi.region", "mm9_support.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi", [], [], None, None, True)
def mm9_14(): dm.extractFlanks("mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_support.14.CGI200_downstream", False,"start",2000,0) dm.extractFlanks("mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_support.14.CGI200_upstream", False,"end",0,2000) dm.mergeDatasets("mm9_support.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi", ["mm9_support.14.CGI200_upstream", "mm9_support.14.CGI200_downstream"]) dm.deriveNewDataset("mm9.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi.region", "mm9_support.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi", [], [], "max", None, True)
def mm9_16(): dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.16.500bp_centered_downstream", False,"start", # 250,"int(min(250,(chromend-chromstart)*0.25))") 250,250) dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.16.500bp_centered_upstream", False,"end", # "int(min(250,(chromend-chromstart)*0.25))",250) 250,250) dm.mergeDatasets("mm9_support.16.BonaFide_CGI_Border.cgiHunter_oe0.6_gc50_len700_500bp_centeredOnBorders", ["mm9_support.16.500bp_centered_downstream", "mm9_support.16.500bp_centered_upstream"]) dm.deriveNewDataset("mm9.16.BonaFide_CGI_Border.cgiHunter_oe0.6_gc50_len700_500bp_centeredOnBorders.region", "mm9_support.16.BonaFide_CGI_Border.cgiHunter_oe0.6_gc50_len700_500bp_centeredOnBorders", [], [], "max", None, True)
def hg18_12_new(): dm.extractFlanks("hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008", "hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008_1kb", False,"whole_region", "max(0,int(500-(chromend-chromstart)/2))","max(0,int(500-(chromend-chromstart)/2))") dm.regionDatasets.update(regionDatasets_DNaseI_Duke) dm.mergeDatasets("hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged",regionDatasets_DNaseI_Duke.keys()) dm.extractFlanks("hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged", "hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged_1kb", False,"whole_region", "max(0,int(500-(chromend-chromstart)/2))","max(0,int(500-(chromend-chromstart)/2))") dm.regionDatasets.update(regionDatasets_DNaseI_Washington) dm.mergeDatasets("hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged", regionDatasets_DNaseI_Washington.keys()) dm.extractFlanks("hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged", "hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged_1kb", False,"whole_region", "max(0,int(500-(chromend-chromstart)/2))","max(0,int(500-(chromend-chromstart)/2))") # datasetCurrent = {"hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008_1kb":defaultFolder+"hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008_1kb"+".txt", # "hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged_1kb":defaultFolder+"hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged_1kb"+".txt", # "hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged_1kb":defaultFolder+"hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged_1kb"+".txt", # } # dm.regionDatasets.update(datasetCurrent) dm.mergeDatasets("hg18_support.12.All_DNaseI_datasets_merged", ["hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008_1kb", "hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged_1kb", "hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged_1kb" ]) dm.deriveNewDataset("hg18.12_new.Open_Chromatin.DNase_FAIRE_peaks_ENCODE_noPromoter.region", "hg18_support.12.All_DNaseI_datasets_merged", ["hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region"], [], "max", None, True, True)
def mm9_18(): dm.extractFlanks("mm9_imprintedGenes", "mm9.18.Imprinted_Promoter.Promoter_region_refSeqGenesMinus10000ToPlus2000AroundTSS_otagoList.region", True,"start",10000,2000)