Ejemplo n.º 1
0
def mode_2(exp_matrix,thresh):
    #remember value of bedgraph, ugly way
    value = {}
    for regions in exp_matrix.get_regionsets():
        for region in regions:
            value[(region.chrom, region.initial, region.final)] = region.data
    
    for region in exp_matrix.get_regionsets():
        f = open("region_" + str(region.name) + ".data", 'w')
        
        region_set = GenomicRegionSet("")
        _, _, mappedGenes, _, gene_peaks_mapping = region_set.filter_by_gene_association_old(region.fileName, None, gene_file, genome_file, threshDist=thresh)

        print(mappedGenes)
        
        for k in gene_peaks_mapping.keys():
            chr, raw_positions = k.split(':')
            start, end = map(lambda x: int(x), raw_positions.split('-'))
            
            #if peak is not assigned, an empty string occurs
            if "" in gene_peaks_mapping[k]:
                gene_peaks_mapping[k].remove("")
            
            list = 'NA' if not gene_peaks_mapping[k] else ','.join(gene_peaks_mapping[k])
            
            print(chr, start, end, value[(chr, start, end)], list, sep='\t', file = f)
        
        f.close()
Ejemplo n.º 2
0
def mode_3(exp_matrix, thresh, type_file):
    #remember value of bedgraph, ugly way
    score = {}
    for regions in exp_matrix.get_regionsets():
        for region in regions:
            if type_file=="ODIN":
              aux=(region.data).split("\t")
              aux=aux[-1].split(";")
              score[(region.chrom + ':' + str(region.initial) + '-' + str(region.final))] = float(region.data[-1])
            if type_file=="THOR":
              aux=(region.data).split(";")
              score[(region.chrom + ':' + str(region.initial) + '-' + str(region.final))] = float(aux[-1])
            else:
               score[(region.chrom + ':' + str(region.initial) + '-' + str(region.final))] = region.data
    
    
    for i, region in enumerate(exp_matrix.get_regionsets()):
        f = open("region_" + str(region.name) + ".data", 'w')
        region_set = GenomicRegionSet("")
        _, _, mappedGenes, _, gene_peaks_mapping = region_set.filter_by_gene_association_old(region.fileName, None, gene_file, genome_file, threshDist=thresh)

        avg_score = {} #score per peak
        genes = {}
        
        print('Consider row %s of exp. matrix, number of mapped genes is %s' %(i, mappedGenes), file=sys.stderr)
        for peak, gene_list in gene_peaks_mapping.items():            
            for gen in gene_list: #reverse mapping peak -> gene to gene -> peak
                if not gen:
                    continue
                genes[gen] = genes.get(gen, set())
                genes[gen].add(peak)
                
                avg_score[gen] = avg_score.get(gen, [])
                avg_score[gen].append(score[peak]) #join all scores of peaks assigned to a gen
        
        for gen in genes.keys():
            if options.metric == 'mean':
                avg = np.mean(avg_score[gen])
            elif options.metric == 'max':
                avg = np.max(avg_score[gen])
            print(gen, avg, ", ".join(str(t) for t in genes[gen]), sep='\t', file = f)
        
        f.close()
Ejemplo n.º 3
0
    
    
#     experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1"
#     gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data"
#     annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/"
#     outputdir = "/home/manuel/test/"
    
    exps = ExperimentalMatrix()
    exps.read(experimental_matrix_file)
    regionsets = exps.get_regionsets()
    
    genome_file = annotation_path + "/chrom.sizes"
    gene_file = annotation_path + "/association_file.bed"
    
    genes = GeneSet("Expression")
    genes.read_expression(gene_exp)
    
    for region in regionsets:
        bedNew = GenomicRegionSet("")
        [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \
        = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file)
        
        [ct, labels] = averageExpression(region, genes, regionsToGenes)
        aux = region.fileName.split("/")
        fileName = aux[-1]
        fileName = fileName.split(".")
        output(genes.cond, labels, ct, outputdir + "/" + fileName[0] + ".txt")
        
        

    annotation_path = args[2]
    outputdir = args[3]
    
    
#     experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1"
#     gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data"
#     annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/"
#     outputdir = "/home/manuel/test/"
    
    exps = ExperimentalMatrix()
    exps.read(experimental_matrix_file)
    regionsets = exps.get_regionsets()
    
    genome_file = annotation_path + "/chrom.sizes"
    gene_file = annotation_path + "/association_file.bed"
    
    genes = GeneSet("Expression")
    genes.read_expression(gene_exp)
    
    for region in regionsets:
        bedNew = GenomicRegionSet("")
        [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \
        = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file, threshDist=options.dist)
        
        [ct, labels] = averageExpression(region, genes, regionsToGenes)
        fileName = path.splitext(path.basename(region.fileName))[0]
	output(genes.cond, labels, ct, path.join(outputdir, fileName + ".txt"))
        
        

Ejemplo n.º 5
0
def mode_1(exp_matrix,thresh):
    for region in exp_matrix.get_regionsets():
        region_set = GenomicRegionSet("")
        _, _, mappedGenes, _, _ = region_set.filter_by_gene_association_old(region.fileName, None, gene_file, genome_file, threshDist=thresh)
        print('#number of mapped genes:', mappedGenes)
        print(region.name+"\t"+("\t".join(region_set.genes)))
Ejemplo n.º 6
0
def mode_4(exp_matrix,thresh,type_file,geneexp_file):
    #remember value of bedgraph, ugly way
        
    gene_set = GeneSet("")    
    gene_set.read_expression(geneexp_file)

    score = {}
    for regions in exp_matrix.get_regionsets():
        for region in regions:
            if type_file=="ODIN":
              aux=(region.data).split("\t")
              aux=aux[-1].split(";")
              score[(region.chrom + ':' + str(region.initial) + '-' + str(region.final))] = aux[-1]
            else:
              score[(region.chrom + ':' + str(region.initial) + '-' + str(region.final))] = region.data
    
    
    for region in exp_matrix.get_regionsets():
        f = open("region_" + str(region.name) + ".data", 'w')
        
        region_set = GenomicRegionSet("")
        _, _, mappedGenes, _, gene_peaks_mapping = region_set.filter_by_gene_association_old(region.fileName, gene_set.genes, gene_file, genome_file, threshDist=thresh)

        print(mappedGenes)

        #region.filter_by_gene_association(organism=organism,threshDist=thresh)
        # _, _, mappedGenes, _, gene_peaks_mapping

         
        
        avg_score = {} #score per peak
        genes = {}
        
        print(region)
        for peak, gene_list in gene_peaks_mapping.items():            
            for gen in gene_list: #reverse mapping peak -> gene to gene -> peak
                if not gen:
                    continue
                genes[gen] = genes.get(gen, set())
                genes[gen].add(peak)
                    
                
                avg_score[gen] = avg_score.get(gen, [])
                avg_score[gen].append(score[peak]) #join all scores of peaks assigned to a gen

        print(avg_score)
        
        for gen in gene_set.genes:
            try:
              avg = sum(map(lambda x: float(x), avg_score[gen]))/ float(len(avg_score[gen]))
              peaks = ", ".join(str(t) for t in genes[gen])
              siz=avg*len(avg_score[gen])
            except:
              avg = 0.0 
              siz=0
              peaks = "_"           
            try:
              print(gen, "\t".join([str(t) for t in gene_set.values[gen.upper()]]),  avg, siz,peaks , sep='\t', file = f)
            except:
              pass
               
        f.close()
Ejemplo n.º 7
0
    annotation_path = args[2]
    outputdir = args[3]
    
    
#     experimental_matrix_file = "/home/manuel/workspace/cluster_p/THOR/exp/exp23_macs2_payton/1"
#     gene_exp = "/home/manuel/workspace/cluster_p/allhoff/project_THOR/data/payton/gene_expression/CCmean.data"
#     annotation_path = "/home/manuel/workspace/cluster_h/rgtdata/hg19/"
#     outputdir = "/home/manuel/test/"
    
    exps = ExperimentalMatrix()
    exps.read(experimental_matrix_file)
    regionsets = exps.get_regionsets()
    
    genome_file = annotation_path + "/chrom.sizes"
    gene_file = annotation_path + "/association_file.bed"
    
    genes = GeneSet("Expression")
    genes.read_expression(gene_exp)
    
    for region in regionsets:
        bedNew = GenomicRegionSet("")
        [degenes, de_peak_genes, mappedGenes, totalPeaks, regionsToGenes] \
        = bedNew.filter_by_gene_association_old(region.fileName, genes.genes, gene_file, genome_file, threshDist=options.dist)
        
        [ct, labels] = averageExpression(region, genes, regionsToGenes)
        fileName = path.splitext(path.basename(region.fileName))[0]
	output(genes.cond, labels, ct, path.join(outputdir, fileName + ".txt"))