def mm9_17(): dm.deriveNewDataset("mm9.17.Imprinted_DMR.alleleSpecific_differentiallyMethylatedRegions_manuallyCurated.region", "mm9_alleleSpecificDMR", [], [], None, None, True)
def mm9_12(): dm.deriveNewDataset("mm9.12.Enhancer_p300.ChIPseq_p300_binding_sites_Visel2009_noPromoter.region", "mm9_enhancers_Visel_all", ["mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region"], [], "max", None, True)
def mm9_19(): dm.deriveNewDataset("mm9.19.CancerGene_Promoter.Promoter_region_ensemblGenesMinus10000ToPlus2000AroundTSS_cancerGeneCensus.region", "mm9_CancerGene_Promoters", [], [], None, None, True)
def mm9_7(): dm.deriveNewDataset("mm9.7.NonCGI_Promoter.Promoter_centered_nonOverlapping_with_BonaFide_CGI.region", "mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", ["mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region"], [], None, None, True)
def mm9_5(): dm.deriveNewDataset("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_CGI_CGIHunter_GG", [], [], None, 700, True)
def mm9_4(): dm.deriveNewDataset("mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_CGI_CGIHunter_GG", [], [], None, 200, True)
def mm9_20(): dm.deriveNewDataset("mm9.20.CellStateGene_Promoter.Promoter_region_ensemblGenesMinus10000ToPlus2000AroundTSS_manuallyCurated.region", "mm9_CandidatePromoters", [], [], None, None, True)
def hg18_13(): dm.deriveNewDataset("hg18.13.Enhancer_H3K4me2.ChIPseq_H3K4me2_sites_Heintzman2009_noPromoter.region", "hg18_enhancers_Heintzman2009", ["hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region"], [], None, None, True)
def hg18_12(): dm.deriveNewDataset("hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008", "hg18_DNAseCD4Sites", [], [], None, None, True) dm.regionDatasets.update(regionDatasets_DNaseI_Duke) dm.mergeDatasets("hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged",regionDatasets_DNaseI_Duke.keys()) dm.deriveNewDataset("hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2", "hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2_SortedNotMerged", [], [], "max", None, True, True) # dm.regionDatasets.update(regionDatasets_DNaseI_Washington) dm.mergeDatasets("hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged", regionDatasets_DNaseI_Washington.keys()) dm.deriveNewDataset("hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites", "hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites_SortedNotMerged", [], [], "max", None, True, True) dm.mergeDatasets("hg18_support.12.All_DNaseI_datasets_merged", ["hg18_support.12.Open_Chromatin.ENCODE_DNaseI_hypersensitive_sites", "hg18_support.12.Duke_GM12878_HelaS3_K562_hepg2", "hg18_support.12.DNaseI_HS_site.hg18_tCell_DNaseI_hypersensitive_sites_Boyle2008" ]) dm.deriveNewDataset("hg18.12.Open_Chromatin.DNase_FAIRE_peaks_ENCODE_noPromoter.region", "hg18_support.12.All_DNaseI_datasets_merged", ["hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region"], [], "max", None, True, True) dm.deriveNewDataset("hg18.12.Open_Chromatin.DNase_FAIRE_peaks_ENCODE_noPromoter_downsampledTo500k.region", "hg18.12.Open_Chromatin.DNase_FAIRE_peaks_ENCODE_noPromoter.region", [], [], None, None, True, True, 500000)
def mm9_13(): dm.deriveNewDataset( "mm9.13.Enhancer_H3K4me2.ChIPseq_H3K4me2_sitesWithin1kbTo100kbOfPromoter_Meissner2008_noPromoter.region", "mm9_enhancers_meissner_all", ["mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region"], [], "max", None, True)
def mm9_22(): dm.regionDatasets.update(regionDatasetsRepeatsMM9) dm.deriveNewDataset("mm9.22.Repeat_DNA.repeatMasker_annotation_DNA_repeats.region", "mm9_Repeat_DNA", [], [], None, None, True, True)
def mm9_11(): dm.deriveNewDataset("mm9.11.Conserved_Region.UCSC_mostConserved_minLength100_noPromoter_noCGI_noExon_noUTR.region", "mm9_mostConserved_all", ["mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", "mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_RefSeq_allCodingExons","mm9_RefSeq_3UTR"], [], "max", 100, True)
def mm9_9(): dm.deriveNewDataset("mm9.9.Middle_Exon.refSeqGenes_exon_noPromoter_noUTR.region", "mm9_RefSeq_middleExons", ["mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", "mm9_RefSeq_3UTR"], [], "max", None, True, True)
def hg18_22(): dm.regionDatasets.update(regionDatasetsRepeatsHG18) dm.deriveNewDataset("hg18.22.Repeat_DNA.repeatMasker_annotation_DNA_repeats.region", "hg18_Repeat_DNA", [], [], None, None, True, True)
def hg18_24(): dm.regionDatasets.update(regionDatasetsRepeatsHG18) dm.deriveNewDataset("hg18.24.Repeat_LowComplex.repeatMasker_annotation_low_complexity_repeats.region", "hg18_Repeat_LowComplex", [], [], None, None, True, True)
def hg18_26(): dm.regionDatasets.update(regionDatasetsRepeatsHG18) dm.deriveNewDataset("hg18.26.Repeat_Simple.repeatMasker_annotation_simple_repeats.region", "hg18_Repeat_Simple", [], [], None, None, True, True)
def mm9_24(): dm.regionDatasets.update(regionDatasetsRepeatsMM9) dm.deriveNewDataset("mm9.24.Repeat_LowComplex.repeatMasker_annotation_low_complexity_repeats.region", "mm9_Repeat_LowComplex", [], [], None, None, True, True)
def mm9_8(): dm.extractFlanks("mm9_refseq_genes", "mm9_support.8.refseq_genes_plus10k", False,"whole_region",10000,10000) dm.deriveNewDataset("mm9.8.NonGenic_CGI.cgiHunter_oe0.6_gc50_len200_geneDistance10000.region", "mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", ["mm9_support.8.refseq_genes_plus10k"], [], None, None, True)
def mm9_10(): dm.deriveNewDataset( "mm9.10.3prim_UTR.refSeqGenes_3prim_UTR_noPromoter_noExon.region", "mm9_RefSeq_3UTR", ["mm9.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", "mm9_RefSeq_allCodingExons"], [], "max", None, True, True)
def mm9_1(): dm.extractFlanks("mm9_ensembl_genes", "mm9_support.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS_flancks", True,"start",5000,1000) dm.deriveNewDataset("mm9.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS.region", "mm9_support.1.Promoter_ensembl.ensemblGenes_minus_5000_to_1000_around_TSS_flancks", [], [], None, None, True, True)
def hg18_3(): dm.extractFlanks("hg18_refseq_genes", "hg18_support.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS_flancks", True,"start",10000,2000) dm.deriveNewDataset("hg18.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS.region", "hg18_support.3.Promoter_region.refSeqGenes_minus_10000_to_2000_around_TSS_flancks", [], [], "max", None, True, True)
def hg18_2(): dm.extractFlanks("hg18_refseq_genes", "hg18_support.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS_flancks", True,"start",1000,1000) dm.deriveNewDataset("hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", "hg18_support.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS_flancks", [], [], None, None, True, True)
def hg18_6(): # dm.deriveNewDataset("hg18.6.CGI_Promoter.Promoter_centered_overlapping_with_BonaFide_CGI.region", # "hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", # ["hg18.7.NonCGI_Promoter.Promoter_centered_nonOverlapping_with_BonaFide_CGI.region"], # [], # None, # None, # True) dm.deriveNewDataset("hg18.6.CGI_Promoter.Promoter_centered_overlapping_with_BonaFide_CGI.region", "hg18.2.Promoter_centered.refSeqGenes_minus_1000_to_1000_around_TSS.region", [], ["hg18.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region"], None, None, True)
def CancerGenePromoterSufficientMethylationOverlappingWithCGI(): cancerDataset = { "cancerPromoters_withMethylation":defaultFolder+"mm9.19.CancerGene_Promoter.Promoter_region_ensemblGenesMinus10000ToPlus2000AroundTSS_cancerGeneCensus.region_withMathylation.txt", } dm.regionDatasets.update(cancerDataset) mm9_4() dm.deriveNewDataset("mm9.cancerPRomoters_CGI", "cancerPromoters_withMethylation", [], ["mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region"], None, None, True, True)
def mm9_14(): dm.extractFlanks("mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_support.14.CGI200_downstream", False,"start",2000,0) dm.extractFlanks("mm9.4.CpG_Island.cgiHunter_oe0.6_gc50_len200.region", "mm9_support.14.CGI200_upstream", False,"end",0,2000) dm.mergeDatasets("mm9_support.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi", ["mm9_support.14.CGI200_upstream", "mm9_support.14.CGI200_downstream"]) dm.deriveNewDataset("mm9.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi.region", "mm9_support.14.CpG_Island_Shore.cgiHunter_oe0.6_gc50_len200_2kb_outsideCgi", [], [], "max", None, True)
def mm9_15(): dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.15.CGI700_downstream", False,"start",2000,0) dm.extractFlanks("mm9.5.BonaFide_CGI.cgiHunter_oe0.6_gc50_len700.region", "mm9_support.15.CGI700_upstream", False,"end",0,2000) dm.mergeDatasets("mm9_support.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi", ["mm9_support.15.CGI700_downstream", "mm9_support.15.CGI700_upstream"]) dm.deriveNewDataset("mm9.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi.region", "mm9_support.15.BonaFide_CGI_Shore.cgiHunter_oe0.6_gc50_len700_2kb_outsideCgi", [], [], None, None, True)
def hg18_21(): dm.regionDatasets.update(regionDatasetsRepeatsHG18) dm.deriveNewDataset("hg18.21.Tandem_Repeat.tandemRepeatFinder_annotation.region", "hg18_tandemRepeat", [], [], None, None, True) dm.deriveNewDataset("hg18.21.Tandem_Repeat.tandemRepeatFinder_annotation_downsampledTo500k.region", "hg18_tandemRepeat", [], [], None, None, True, True, 500000)
def mm9_21(): dm.regionDatasets.update(regionDatasetsRepeatsMM9) dm.deriveNewDataset("mm9.21.Tandem_Repeat.tandemRepeatFinder_annotation.region", "mm9_tandemRepeat", [], [], "max", None, True) dm.deriveNewDataset("mm9.21.Tandem_Repeat.tandemRepeatFinder_annotation_downsampledTo500k.region", "mm9_tandemRepeat", [], [], "max", None, True, True, 500000)
def mm9_27(): dm.regionDatasets.update(regionDatasetsRepeatsMM9) dm.deriveNewDataset("mm9.27.Repeat_SINE.repeatMasker_annotation_short_interspersed_nucleotide_elements.region", "mm9_Repeat_SINE", [], [], None, None, True, True) dm.deriveNewDataset("mm9.27.Repeat_SINE.repeatMasker_annotation_short_interspersed_nucleotide_elements_downsampledTo500k.region", "mm9_Repeat_SINE", [], [], None, None, True, True, 500000)
def mm9_26(): dm.regionDatasets.update(regionDatasetsRepeatsMM9) dm.deriveNewDataset("mm9.26.Repeat_Simple.repeatMasker_annotation_simple_repeats.region", "mm9_Repeat_Simple", [], [], None, None, True, True) dm.deriveNewDataset("mm9.26.Repeat_Simple.repeatMasker_annotation_simple_repeats_downsampledTo500k.region", "mm9_Repeat_Simple", [], [], None, None, True, True, 500000)