Esempio n. 1
0
def render_token(query, token='Yoga', output_file='out/yoga_density.png'):
    locations = numpy.array(query.locations_for_token(token))
        
    m1 = locations[:,0] # x-coords
    m2 = locations[:,1] # y-coords
    
    # Perform a kernel density estimator on the coords in data.
  
    # FIXME: temporary hard code the max/min so all plots are on same scale
    xmin = -150
    xmax = 150
    ymin = -150
    ymax = 150
    #xmin = m1.min()
    #xmax = m1.max()
    #ymin = m2.min()
    #ymax = m2.max()
    
    
    X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
    positions = c_[X.ravel(), Y.ravel()]
    values = c_[m1, m2]
    kernel = stats.kde.gaussian_kde(values.T)
    Z = reshape(kernel(positions.T).T, X.T.shape)
    clf() #kinda insane that one has to do this *Before* you render but hrmm.. anyway necessary in case this function gets caleld twice
    imshow(rot90(Z), cmap=cm.gist_earth_r, extent=[xmin, xmax, ymin, ymax])
    
    # Plot the locations (assumes each 'mention' has same weight - which is a shame)
    plot(m1, m2, 'k.', markersize=1)
    text(xmin,ymax-15,"density map for '" + token + "'")
     
    axis('equal')
    print 'saving density map for ', token, 'to ', output_file
    savefig(output_file)    
Esempio n. 2
0
def random_rot_90(k, img):
    """main function"""
    if np.random.rand() > .5:
        print(k)
        return np.ascontiguousarray(rot90(img, k, axes=(-2, -1)))
    else:
        return img
Esempio n. 3
0
    def plot_density(self, plot_filename="out/density.png"):
        x, y, labels = self.load_data()

        figure(figsize=(self.fig_width, self.fig_height), dpi=80)
        # Perform a kernel density estimator on the coords in data.
        # The following 10 lines can be commented out if density map not needed.
        space_factor = 1.2
        xmin = space_factor * x.min()
        xmax = space_factor * x.max()
        ymin = space_factor * y.min()
        ymax = space_factor * y.max()
        X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
        positions = c_[X.ravel(), Y.ravel()]
        values = c_[x, y]
        kernel = stats.kde.gaussian_kde(values.T)
        Z = reshape(kernel(positions.T).T, X.T.shape)
        imshow(rot90(Z), cmap=cm.gist_earth_r, extent=[xmin, xmax, ymin, ymax])

        # Plot the labels
        num_labels_to_plot = min([len(labels), self.max_labels, len(x), len(y)])
        if self.has_labels:
            for i in range(num_labels_to_plot):
                text(x[i], y[i], labels[i])  # assumes m size and order matches labels
        else:
            plot(x, y, "k.", markersize=1)
        axis("equal")
        axis("off")
        savefig(plot_filename)
        print "wrote %s" % (plot_filename)
Esempio n. 4
0
def get_text(images, patterns):
    from lab.lab9.zad2.deskew import deskew

    for image in images.keys():
        line_map = {}
        im, imn = images[image]
        im = deskew(im)
        imn = deskew(imn)

        for p_key in patterns.keys():
            pattern = patterns[p_key][0]
            fp = np.fft.fft2(rot90(pattern, 2), im.shape)
            fi = np.fft.fft2(im)
            m = np.multiply(fp, fi)
            corr = np.fft.ifft2(m)
            corr = np.abs(corr)
            corr = corr.astype(float)
            i_M, j_M = corr.shape
            it = 0
            corr[corr < 0.99 * np.amax(corr)] = 0

            def mark(i, j, c):
                x, y = (i -
                        line_height // 2) // line_height, j - line_height // 2
                if x in line_map:
                    line_map[x][y] = c
                else:
                    line_map[x] = {}
                    line_map[x][y] = c

                for x in range(i - 10, i):
                    for y in range(j - 10, j):
                        imn[x, y][0] = 255
                        imn[x, y][1] = 255
                        imn[x, y][2] = 255

            for i in range(i_M):
                for j in range(j_M):
                    it += 1
                    if corr[i, j] > 0:
                        print(corr[i, j])
                        mark(i, j, patterns[p_key][1])

        lnns = []
        for line in sorted(line_map.keys()):
            line_st = ''
            chars_in_line = [x for x in line_map[line].keys()]
            chars_in_line = sorted(chars_in_line)

            for i, lk in enumerate(sorted(line_map[line].keys())):
                line_st += line_map[line][lk]
                if i + 1 < len(chars_in_line):
                    if (abs(chars_in_line[i] - chars_in_line[i + 1]) >
                            line_height):
                        line_st += ' '
            lnns.append(line_st)

        save_results(image, imn, lnns)
 def contour(self, p, dat):
     X, Y = mgrid[0.0:1.0:100j, 0.0:1.0:100j]
     positions = c_[X.ravel(), Y.ravel()]
     val           = c_[dat[0,:], dat[1,:]]
     kernel = stats.kde.gaussian_kde(val.T)
     Z = reshape(kernel(positions.T).T, X.T.shape)
     p.imshow(     rot90(Z) , cmap=p.cm.YlGnBu, extent=[0, 1, 0, 1])
     p.plot(dat[0,:], dat[1,:], 'r.')
     p.axis([0.0, 1.0, 0.0, 1.0])
Esempio n. 6
0
 def contour(self, p, dat):
     X, Y = mgrid[0.0:1.0:100j, 0.0:1.0:100j]
     positions = c_[X.ravel(), Y.ravel()]
     val           = c_[dat[0,:], dat[1,:]]
     kernel = stats.kde.gaussian_kde(val.T)
     Z = reshape(kernel(positions.T).T, X.T.shape)
     p.imshow(     rot90(Z) , cmap=p.cm.YlGnBu, extent=[0, 1, 0, 1])
     p.plot(dat[0,:], dat[1,:], 'r.')
     p.axis([0.0, 1.0, 0.0, 1.0])
Esempio n. 7
0
def plotmapheat(heat, title, outpath, plotdots=False):
	plt.figure(figsize=(4,5))
	plt.imshow(rot90(heat),
		    cmap=cm.gist_earth_r,
		    extent=extent,
		    aspect='auto')
	dpi = 150
	if plotdots:
		plt.plot(lons, lats, ',', markersize=0.2, color=(0,0,0,0.3))
		dpi = 300
	plt.title(title, fontsize=10)
	# city markers:
	plt.plot([pm[1] for pm in placemarkers], [pm[0] for pm in placemarkers], '.', markersize=2, color=(1,1,1,0.3))
	for pm in placemarkers:
		plt.text(pm[1], pm[0], pm[2], {'fontsize':4}, color=(0.95,0.95,0.75,0.3))
	plt.savefig(outpath, papertype='A4', format='png', dpi=dpi)
Esempio n. 8
0
def show_planes(im):
    r"""
    Create a quick montage showing a 3D image in all three directions

    Parameters
    ----------
    im : ND-array
        A 3D image of the porous material

    Returns
    -------
    image : ND-array
        A 2D array containing the views.  This single image can be viewed using
        ``matplotlib.pyplot.imshow``.

    """
    if sp.squeeze(im.ndim) < 3:
        raise Exception('This view is only necessary for 3D images')
    x, y, z = (sp.array(im.shape) / 2).astype(int)
    im_xy = im[:, :, z]
    im_xz = im[:, y, :]
    im_yz = sp.rot90(im[x, :, :])

    new_x = im_xy.shape[0] + im_yz.shape[0] + 10

    new_y = im_xy.shape[1] + im_xz.shape[1] + 10

    new_im = sp.zeros([new_x + 20, new_y + 20], dtype=im.dtype)

    # Add xy image to upper left corner
    new_im[10:im_xy.shape[0] + 10, 10:im_xy.shape[1] + 10] = im_xy
    # Add xz image to lower left coner
    x_off = im_xy.shape[0] + 20
    y_off = im_xy.shape[1] + 20
    new_im[10:10 + im_xz.shape[0], y_off:y_off + im_xz.shape[1]] = im_xz
    new_im[x_off:x_off + im_yz.shape[0], 10:10 + im_yz.shape[1]] = im_yz

    return new_im
Esempio n. 9
0
# now combine delaunay with KDE

colours = np.zeros((gridsize, gridsize, 4))
kdeZmin = np.min(kdeZ)
kdeZmax = np.max(kdeZ)
confdepth = 0.45
for x in range(gridsize):
    for y in range(gridsize):
        conf = (kdeZ[x, y] - kdeZmin) / (kdeZmax - kdeZmin)
        val = min(1., max(0., interped[x, y]))
        colour = list(cm.rainbow(val))
        # now fade it out to white according to conf
        for index in [0, 1, 2]:
            colour[index] = (colour[index] * conf) + (1.0 * (1. - conf))
        colours[x, y, :] = colour
        #colours[x,y,:] = np.hstack((hls_to_rgb(val, 0.5 + confdepth - (confdepth * conf), 1.0), 1.0))
        #colours[x,y,:] = [conf, conf, 1.0-conf, val]

print colours
plt.imshow(rot90(colours), cmap=cm.rainbow, norm=LogNorm(\
            vmin=zmin, vmax=zmax))
plt.title("interpolated & confidence-shaded")

plt.ylim([ymin, ymax])
plt.xlim([xmin, xmax])
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

############################################
plt.savefig("plot_heati_simple.svg", format='SVG')
Esempio n. 10
0
def gcbias(filelist, fileoutlist, bedfilelist):
    """************************************************************************************************************************************************************
	Task: draws coverage as a function of gc content
	Input:
		filelist: list of strings, each containing the full path of the bam file to analyze.
		fileoutlist: list of strings, each containing the full path of the png file where the corresponding figure will be saved.
		bedfilelist: 
	Output: a bmp file will be created named "fileout" where a graph that compares gc content and mean coverage will be saved.	
	************************************************************************************************************************************************************"""

    pid = str(os.getpid())

    numpy.random.seed(1)
    ntotal_positions = []
    bamlist = []

    # Process each file and store counting results
    for filename in filelist:
        # Check whether index already exists for the bam file, needed for pysam use
        if (not os.path.isfile(filename + '.bai')):
            print 'Creating index for ' + filename
            pysam.index(filename)
            print '	Done.'

        bamlist.append(bam_file.bam_file(filename))
    sizes = numpy.array([bam.nreads() for bam in bamlist])
    minsize = sizes.min()

    print 'The smaller bam is ' + filelist[
        sizes.argmin()] + ' and contains ' + str(minsize) + ' reads.'

    # Process each file and store counting results
    for i, bamfile in enumerate(bamlist):

        print 'Processing ' + bamfile.filename
        print 'Results will be written at ' + fileoutlist[i]

        # Check whether normalization should be run
        if (normalize): normalizedbam = bamfile.normalize(minsize)
        else: normalizedbam = bamfile

        coveragefile = TMP + '/' + pid + '.coverage'
        print 'Calculating coverage per position...'
        run(BEDTOOLSPATH + 'coverageBed -d -abam ' + normalizedbam.filename +
            ' -b ' + bedfilelist[i] + ' > ' + coveragefile)

        coverage = region_coverage(coveragefile)

        print 'Calculating nt content...'
        bedfd = pybedtools.BedTool(bedfilelist[i])
        pybedtools._bedtools_installed = True
        pybedtools.set_bedtools_path(BEDTOOLSPATH)
        ntcontent = bedfd.nucleotide_content(REF)

        # Each entry in ntcontent is parsed to extract the gc content of each exon
        gccontent = {}
        for entry in ntcontent:
            gccontent[(entry.fields[0], string.atoi(
                entry.fields[1]), string.atoi(
                    entry.fields[2]))] = string.atof(entry.fields[-8]) * 100
        print '	Done.'

        fig = pyplot.figure(figsize=(13, 6))
        ax = fig.add_subplot(111)

        region_ids = coverage.keys()
        coveragearray = numpy.array([coverage[id] for id in region_ids])
        gccontentarray = numpy.array([gccontent[id]
                                      for id in region_ids])  # Values in [0,1]

        xmin = gccontentarray.min()
        xmax = gccontentarray.max(
        )  # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
        ymin = coveragearray.min()
        ymax = coveragearray.max()

        # Perform a kernel density estimator on the results
        X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
        positions = c_[X.ravel(), Y.ravel()]
        values = c_[gccontentarray, coveragearray]
        kernel = stats.kde.gaussian_kde(values.T)
        Z = reshape(kernel(positions.T).T, X.T.shape)

        fig = pyplot.figure(figsize=(6, 6))
        ax = fig.add_subplot(111)
        sc = ax.imshow(
            rot90(Z),
            cmap=cm.gist_earth_r,
            extent=[xmin, 100, ymin, ymax],
            aspect="auto"
        )  # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
        cbar = fig.colorbar(sc, ticks=[numpy.min(Z), numpy.max(Z)])
        cbar.ax.set_yticklabels(['Low', 'High'])
        cbar.set_label('Density')
        ax.set_xlabel('GC content (%)')
        ax.set_ylabel('Mean coverage')
        fig.savefig(fileoutlist[i])
        matplotlib.pyplot.close(fig)

    print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Finished <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
Esempio n. 11
0
 def rot90(self, i):
     # Rotate by 90 degrees i times
     if CXP.reconstruction.verbose:
         CXP.log.info('Rotating data by {:d}'.format(i * 90))
     for j, data in enumerate(self.data):
         self.data[j] = sp.rot90(data, i)
Esempio n. 12
0
def math_func(table):
    """

    table = [[1, 2],
             [2, 3]]

    |                | Первое качество | Второе качество |
    | Первый эксперт |       2         |       3         |
    | Второй эксперт |       2         |       4         |

    """
    # table = [[10, 9, 7, 5, 9],
    #          [9, 8, 8, 6, 8],
    #          [10, 9, 8, 4, 9]]
    # table = scipy.array(table)
    print(table)

    # 1. Переводим оценки группы экспертов из баллов в ранги.
    # Первому рангу будет соответствовать наибольшая оценка в баллах.
    rank_table = scipy.array([scipy.stats.mstats.rankdata(a) for a in table])
    rank_table = scipy.array(
        [scipy.array([r.shape[0] + 1 - value if value > 0 else 0 for value in r]) for r in rank_table])
    print(rank_table)

    m, n = rank_table.shape  # Количество экспертов, количетво качеств

    # Находим критические точки распределения Пирсона по таблице [5],
    # вычисленные при заданном уровне значимости α = 0,05 и при числе степеней
    # свободы K = n - 1
    print("sdfsdf", n)
    critical_chi = scipy.stats.chi2.isf(0.05, n - 1)

    # 2. При обработке оценок, выданных экспертами в рангах, должна
    # соблюдаться нормировка рангов, то есть сумма рангов должна быть равна сумме
    # членов натурального ряда

    # 3. Вычисляем сумму рангов по каждому из ПВК
    sum_rank_for_q = scipy.array([scipy.sum(a) for a in scipy.rot90(rank_table, k=-1)])

    # 4. Получаем общую сумму рангов по всей матрице
    sum_all_rank = scipy.sum(sum_rank_for_q)

    # 5. Находим по формуле среднего арифметического коллективное мнение
    # группы экспертов.
    average_a = scipy.array([scipy.average(a) for a in scipy.rot90(rank_table, k=-1)])

    # 6. Вычисляем среднее пофакторное значение суммы рангов.
    average_value_sum_r = m * (n + 1) / 2

    # 7. Находим фактические отклонения пофакторных сумм рангов от
    # среднего значения.
    actual_deviation = sum_rank_for_q - average_value_sum_r

    # 8. Вычисляем квадраты фактических отклонений пофакторных сумм
    # рангов от общего среднего
    square_actual_deviation = actual_deviation ** 2

    # 9. Суммируем квадраты отклонений, находим
    sum_square = scipy.sum(square_actual_deviation)

    # 10. Вычисляем максимально возможное значение суммы квадратов
    # отклонений оценок по каждому из ПВК от общей средней
    max_sum_square = m ** 2 * (n ** 3 - n) / 12

    # 11. Находим выборочное значение коэффициента конкордации Кендэлла.
    W = sum_square / max_sum_square

    # 12. Вычисляем выборочное значение хи-квадрат Пирсона
    chi2 = W * m * (n - 1)
    print("chi2", chi2)
    print("critical", critical_chi)
    # 13. Проверка согласованности показаний всей группы экспертов с
    # помощью коэффициента конкордации Кендэлла производится согласно
    # следующему альтернативному соглашению:
    if chi2 >= critical_chi:
        return False
    else:
        return True
Esempio n. 13
0
def gcbias_lite(coveragefile, bedfilename, reference, fileout, graphtitle=None, executiongranted=None, status=None, bedTools=False):
	"""************************************************************************************************************************************************************
	Task: draws coverage as a function of gc content. IMPROVED VERSION of gcbias that avoids the use of bedtools (pybedtools)
	Input:
		coveragefile: string containing the full path of the bam.coverage file to analyze. This file has been built according to 1-base format
		bedfilename: target file -> assumes original-standard bed file
		reference: fasta file with reference genome
		fileout: string containing the full path of the bmp file where the restulting figure will be saved.
		bedTools: whether pybedtools are used instead of the own method
	Output: a png file will be created named "fileout" where a graph that compares gc content and mean coverage will be saved.	
	************************************************************************************************************************************************************"""
	   
	if(executiongranted<>None):
		executiongranted.acquire()
	
	pid = str(os.getpid())
 
#	print 'Processing '+coveragefile
#	print 'Results will be written at '+fileout
	coverage = region_coverage(coveragefile) # Calculate mean coverage per region
	
##	fdw=file('regionCoverage.txt','w')	
##	for element in sorted(coverage.keys()):
##		fdw.write(str(element)+'\n')		
##	fdw.close()

	if(len(coverage)>1):	
		
		if not bedTools:   # Own method
#			print 'Own method'
			chromosomes={}	 
			allKeys=coverage.keys()
			
			for currentKey in allKeys:
				chromosomes[currentKey[0]]=1 # Stores all chromosomes to be examined (the ones contained in the target file)
						
			# Load BED file -> since coverage information is in 1-base format, BED format must be transformed to 1-base
			bed=bed_file.bed_file(bedfilename)
			sortedBed=bed.my_sort_bed() # Sort bed avoiding bedtools
			nonOverlappingBed=sortedBed.non_overlapping_exons(1) # Base 1!!! # This generates a BED file in base 1 (Non-standard BED)
			finalBed=nonOverlappingBed.my_sort_bed() # BED file in base 1 (Non-standard BED)
			finalBed.load_custom(-1) # Load chromosome and positions in base 1....(finalBed is in base 1 -> Non-standard BED)	
	
						
			#Load FASTA file		
			fastaFile=file(reference,'r')
			
			storeSequence=False
			wholeChromosome=''
			currentChromosome=''
			gccontent={}		
	
		
			for line in fastaFile: # Read each line of the fasta file
				if line.startswith('>'): # New chromosome starts -> reading a new line until another '>' is found
#					print 'Processing ' +line+'\n' 
					if storeSequence: # a chromosome has been read run gc bias				
						currentGCcontent=measureGCbias(wholeChromosome,currentChromosome,finalBed)
						gccontent.update(currentGCcontent) # Update dictionary
						storeSequence=False
					currentChromosome=re.split(' +',line)[0] # Format: >1 dna:chromosome chromosome:GRCh37:1:1:249250621:1
					currentChromosome=currentChromosome.split('>')[1].strip() # Chromosome string
					if(currentChromosome in chromosomes): # If current chromosome read in the FASTA file is in the list of chromosomes in the BED file
						storeSequence=True
					wholeChromosome='' # To store whole sequence for the current chromosome
				elif (not re.search('>',line) and storeSequence):
					wholeChromosome=wholeChromosome+line.rstrip() # Remove '\n' from current line and concatenates to wholeChromosome
					
	
			if(storeSequence): # For the last chromosome
					currentGCcontent=measureGCbias(wholeChromosome,currentChromosome,finalBed)
					gccontent.update(currentGCcontent)  # Update dictionary
					
			fastaFile.close()  
			region_ids=[]					
			region_ids = coverage.keys()
			
			if(len(gccontent)==0):
				print 'ERROR: G+C content values can not be calculated. Probably the provided reference file '+reference+' does not match with '
				print '	the target file '+bedfilename+'. That is, sequences of regions in the target file are probably not included within the'
				print '	reference file.'
				sys.exit(1)
			   
		else:			
			print 'Calculating nt content by means of pybedtools...'
			bed=bed_file.bed_file(bedfilename)
			sortedBed=bed.my_sort_bed() # Sort bed avoiding bedtools
			nonOverlappingBed=sortedBed.non_overlapping_exons(1) # base one!!! 
			finalBed=nonOverlappingBed.my_sort_bed() # BED file in base 1
			bedfd = pybedtools.BedTool(finalBed.filename)
			bedfd=bedfd.remove_invalid() # Remove negative coordinates or features with length=0, which do not work with bedtools
			pybedtools._bedtools_installed = True
			pybedtools.set_bedtools_path(BEDTOOLSPATH)	
			ntcontent = bedfd.nucleotide_content(reference)
				
			# Each entry in ntcontent is parsed to extract the gc content of each exon
			gccontent = {}
			for entry in ntcontent:
				gccontent[(entry.fields[0], string.atoi(entry.fields[1]), string.atoi(entry.fields[2]))] = string.atof(entry.fields[-8])*100
			print '	Done.'						
			# gccontent keys in dictionary: chromosome, exon init, exon end   
			
			region_ids=[]
			for currentKey in coverage.keys(): # Pybedtools does not work with regions with zero length -> remove them (there are a few of them)
				if currentKey[1]!=currentKey[2]:
					region_ids.append(currentKey)
						
		
##		
##		fdw=file('gcContent.txt','w')	
##		for element in sorted(gccontent.keys()):
##			fdw.write(str(element)+'\n')		
##		fdw.close()
##			
		#region_ids = gccontent.keys()
		coveragearray = numpy.array([coverage[id] for id in region_ids])
		gccontentarray = numpy.array([gccontent[id] for id in region_ids]) # Values in [0,1]	
				
#		fig = pyplot.figure(figsize=(6,6))
#		ax = fig.add_subplot(111)
#		
#		ax.hist(gccontentarray,bins=100)
#		fig.suptitle('Dsitribution of GC content regardless of coverage value')	
#		ax.set_ylabel('Frequency')
#		ax.set_xlabel('GC content')
#		ax.set_xlim(0, 100)
#		fig.savefig('distribution.png')										
					
		xmin = gccontentarray.min()
		xmax = gccontentarray.max() # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
		ymin = coveragearray.min()
		ymax = coveragearray.max()
		 
		# Perform a kernel density estimator on the results
		X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
		positions = c_[X.ravel(), Y.ravel()]
		values = c_[gccontentarray, coveragearray]
		kernel = stats.kde.gaussian_kde(values.T)
		Z = reshape(kernel(positions.T).T, X.T.shape)
		
		
		fig = pyplot.figure(figsize=(6,6))
		ax = fig.add_subplot(111)
		sc=ax.imshow(rot90(Z),cmap=cm.gist_earth_r,extent=[xmin, 100, ymin, ymax], aspect="auto") # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
		cbar=fig.colorbar(sc,ticks=[numpy.min(Z),numpy.max(Z)])
		cbar.ax.set_yticklabels(['Low','High'])
		cbar.set_label('Density')
		ax.set_xlabel('GC content (%)')
		ax.set_ylabel('Mean coverage')
		
		if(len(graphtitle)>25):
			ax.set_title(graphtitle[:25]+'...')
		else:
			ax.set_title(graphtitle)
			
		fig.savefig(fileout)
		matplotlib.pyplot.close(fig)
		
		if(status<>None):
			meanvalue = gccontentarray.mean()
			status.value = (meanvalue>=45 and meanvalue<=55)
		

	else:
		print 'WARNING: only one region found in the bed file. Skipping GC bias calculation.'
		
	if(executiongranted<>None):
		executiongranted.release()
Esempio n. 14
0
 def rot90(self, i):
     # Rotate by 90 degrees i times
     if CXP.reconstruction.verbose:
         CXP.log.info('Rotating data by {:d}'.format(i*90))
     for j, data in enumerate(self.data):
         self.data[j] = sp.rot90(data, i)
Esempio n. 15
0
def gcbias_lite(coveragefile,
                bedfilename,
                reference,
                fileout,
                graphtitle=None,
                executiongranted=None,
                status=None,
                bedTools=False):
    """************************************************************************************************************************************************************
	Task: draws coverage as a function of gc content. IMPROVED VERSION of gcbias that avoids the use of bedtools (pybedtools)
	Input:
		coveragefile: string containing the full path of the bam.coverage file to analyze. This file has been built according to 1-base format
		bedfilename: target file -> assumes original-standard bed file
		reference: fasta file with reference genome
		fileout: string containing the full path of the bmp file where the restulting figure will be saved.
		bedTools: whether pybedtools are used instead of the own method
	Output: a png file will be created named "fileout" where a graph that compares gc content and mean coverage will be saved.	
	************************************************************************************************************************************************************"""

    if (executiongranted <> None):
        executiongranted.acquire()

    pid = str(os.getpid())

    #	print 'Processing '+coveragefile
    #	print 'Results will be written at '+fileout
    coverage = region_coverage(
        coveragefile)  # Calculate mean coverage per region

    ##	fdw=file('regionCoverage.txt','w')
    ##	for element in sorted(coverage.keys()):
    ##		fdw.write(str(element)+'\n')
    ##	fdw.close()

    if (len(coverage) > 1):

        if not bedTools:  # Own method
            #			print 'Own method'
            chromosomes = {}
            allKeys = coverage.keys()

            for currentKey in allKeys:
                chromosomes[currentKey[
                    0]] = 1  # Stores all chromosomes to be examined (the ones contained in the target file)

            # Load BED file -> since coverage information is in 1-base format, BED format must be transformed to 1-base
            bed = bed_file.bed_file(bedfilename)
            sortedBed = bed.my_sort_bed()  # Sort bed avoiding bedtools
            nonOverlappingBed = sortedBed.non_overlapping_exons(
                1
            )  # Base 1!!! # This generates a BED file in base 1 (Non-standard BED)
            finalBed = nonOverlappingBed.my_sort_bed(
            )  # BED file in base 1 (Non-standard BED)
            finalBed.load_custom(
                -1
            )  # Load chromosome and positions in base 1....(finalBed is in base 1 -> Non-standard BED)

            #Load FASTA file
            fastaFile = file(reference, 'r')

            storeSequence = False
            wholeChromosome = ''
            currentChromosome = ''
            gccontent = {}

            for line in fastaFile:  # Read each line of the fasta file
                if line.startswith(
                        '>'
                ):  # New chromosome starts -> reading a new line until another '>' is found
                    #					print 'Processing ' +line+'\n'
                    if storeSequence:  # a chromosome has been read run gc bias
                        currentGCcontent = measureGCbias(
                            wholeChromosome, currentChromosome, finalBed)
                        gccontent.update(currentGCcontent)  # Update dictionary
                        storeSequence = False
                    currentChromosome = re.split(
                        ' +', line
                    )[0]  # Format: >1 dna:chromosome chromosome:GRCh37:1:1:249250621:1
                    currentChromosome = currentChromosome.split(
                        '>')[1].strip()  # Chromosome string
                    if (
                            currentChromosome in chromosomes
                    ):  # If current chromosome read in the FASTA file is in the list of chromosomes in the BED file
                        storeSequence = True
                    wholeChromosome = ''  # To store whole sequence for the current chromosome
                elif (not re.search('>', line) and storeSequence):
                    wholeChromosome = wholeChromosome + line.rstrip(
                    )  # Remove '\n' from current line and concatenates to wholeChromosome

            if (storeSequence):  # For the last chromosome
                currentGCcontent = measureGCbias(wholeChromosome,
                                                 currentChromosome, finalBed)
                gccontent.update(currentGCcontent)  # Update dictionary

            fastaFile.close()
            region_ids = []
            region_ids = coverage.keys()

            if (len(gccontent) == 0):
                print 'ERROR: G+C content values can not be calculated. Probably the provided reference file ' + reference + ' does not match with '
                print '	the target file ' + bedfilename + '. That is, sequences of regions in the target file are probably not included within the'
                print '	reference file.'
                sys.exit(1)

        else:
            print 'Calculating nt content by means of pybedtools...'
            bed = bed_file.bed_file(bedfilename)
            sortedBed = bed.my_sort_bed()  # Sort bed avoiding bedtools
            nonOverlappingBed = sortedBed.non_overlapping_exons(
                1)  # base one!!!
            finalBed = nonOverlappingBed.my_sort_bed()  # BED file in base 1
            bedfd = pybedtools.BedTool(finalBed.filename)
            bedfd = bedfd.remove_invalid(
            )  # Remove negative coordinates or features with length=0, which do not work with bedtools
            pybedtools._bedtools_installed = True
            pybedtools.set_bedtools_path(BEDTOOLSPATH)
            ntcontent = bedfd.nucleotide_content(reference)

            # Each entry in ntcontent is parsed to extract the gc content of each exon
            gccontent = {}
            for entry in ntcontent:
                gccontent[(entry.fields[0], string.atoi(entry.fields[1]),
                           string.atoi(entry.fields[2]))] = string.atof(
                               entry.fields[-8]) * 100
            print '	Done.'
            # gccontent keys in dictionary: chromosome, exon init, exon end

            region_ids = []
            for currentKey in coverage.keys(
            ):  # Pybedtools does not work with regions with zero length -> remove them (there are a few of them)
                if currentKey[1] != currentKey[2]:
                    region_ids.append(currentKey)

##
##		fdw=file('gcContent.txt','w')
##		for element in sorted(gccontent.keys()):
##			fdw.write(str(element)+'\n')
##		fdw.close()
##
#region_ids = gccontent.keys()
        coveragearray = numpy.array([coverage[id] for id in region_ids])
        gccontentarray = numpy.array([gccontent[id]
                                      for id in region_ids])  # Values in [0,1]

        #		fig = pyplot.figure(figsize=(6,6))
        #		ax = fig.add_subplot(111)
        #
        #		ax.hist(gccontentarray,bins=100)
        #		fig.suptitle('Dsitribution of GC content regardless of coverage value')
        #		ax.set_ylabel('Frequency')
        #		ax.set_xlabel('GC content')
        #		ax.set_xlim(0, 100)
        #		fig.savefig('distribution.png')

        xmin = gccontentarray.min()
        xmax = gccontentarray.max(
        )  # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
        ymin = coveragearray.min()
        ymax = coveragearray.max()

        # Perform a kernel density estimator on the results
        X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
        positions = c_[X.ravel(), Y.ravel()]
        values = c_[gccontentarray, coveragearray]
        kernel = stats.kde.gaussian_kde(values.T)
        Z = reshape(kernel(positions.T).T, X.T.shape)

        fig = pyplot.figure(figsize=(6, 6))
        ax = fig.add_subplot(111)
        sc = ax.imshow(
            rot90(Z),
            cmap=cm.gist_earth_r,
            extent=[xmin, 100, ymin, ymax],
            aspect="auto"
        )  # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
        cbar = fig.colorbar(sc, ticks=[numpy.min(Z), numpy.max(Z)])
        cbar.ax.set_yticklabels(['Low', 'High'])
        cbar.set_label('Density')
        ax.set_xlabel('GC content (%)')
        ax.set_ylabel('Mean coverage')

        if (len(graphtitle) > 25):
            ax.set_title(graphtitle[:25] + '...')
        else:
            ax.set_title(graphtitle)

        fig.savefig(fileout)
        matplotlib.pyplot.close(fig)

        if (status <> None):
            meanvalue = gccontentarray.mean()
            status.value = (meanvalue >= 45 and meanvalue <= 55)

    else:
        print 'WARNING: only one region found in the bed file. Skipping GC bias calculation.'

    if (executiongranted <> None):
        executiongranted.release()
Esempio n. 16
0
def gcbias(filelist, fileoutlist, bedfilelist):
	"""************************************************************************************************************************************************************
	Task: draws coverage as a function of gc content
	Input:
		filelist: list of strings, each containing the full path of the bam file to analyze.
		fileoutlist: list of strings, each containing the full path of the png file where the corresponding figure will be saved.
		bedfilelist: 
	Output: a bmp file will be created named "fileout" where a graph that compares gc content and mean coverage will be saved.	
	************************************************************************************************************************************************************"""
	
	pid = str(os.getpid())
	
	numpy.random.seed(1)
	ntotal_positions = []
	bamlist = []
	
	# Process each file and store counting results
	for filename in filelist:
		# Check whether index already exists for the bam file, needed for pysam use
		if(not os.path.isfile(filename+'.bai')):
			print 'Creating index for '+filename
			pysam.index(filename)
			print '	Done.'
						
		bamlist.append(bam_file.bam_file(filename))
	sizes = numpy.array([bam.nreads() for bam in bamlist])
	minsize = sizes.min()
	
	print 'The smaller bam is '+filelist[sizes.argmin()]+' and contains '+str(minsize)+' reads.'
		
	# Process each file and store counting results
	for i,bamfile in enumerate(bamlist):
	
		print 'Processing '+bamfile.filename
		print 'Results will be written at '+fileoutlist[i]
		
		# Check whether normalization should be run
		if(normalize): normalizedbam = bamfile.normalize(minsize)
		else: normalizedbam = bamfile
		
		coveragefile = TMP+'/'+pid+'.coverage'
		print 'Calculating coverage per position...'
		run(BEDTOOLSPATH+'coverageBed -d -abam '+normalizedbam.filename+' -b '+bedfilelist[i]+' > '+coveragefile)   
	
		coverage = region_coverage(coveragefile)
	
		print 'Calculating nt content...'
		bedfd = pybedtools.BedTool(bedfilelist[i])
		pybedtools._bedtools_installed = True
		pybedtools.set_bedtools_path(BEDTOOLSPATH)	
		ntcontent = bedfd.nucleotide_content(REF)
		
		# Each entry in ntcontent is parsed to extract the gc content of each exon
		gccontent = {}
		for entry in ntcontent:
			gccontent[(entry.fields[0], string.atoi(entry.fields[1]), string.atoi(entry.fields[2]))] = string.atof(entry.fields[-8])*100
		print '	Done.'		
			
		fig = pyplot.figure(figsize=(13,6))
		ax = fig.add_subplot(111)
		
		region_ids = coverage.keys()
		coveragearray = numpy.array([coverage[id] for id in region_ids])
		gccontentarray = numpy.array([gccontent[id] for id in region_ids]) # Values in [0,1]
	
		xmin = gccontentarray.min()
		xmax = gccontentarray.max() # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
		ymin = coveragearray.min()
		ymax = coveragearray.max()
		 
		# Perform a kernel density estimator on the results
		X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
		positions = c_[X.ravel(), Y.ravel()]
		values = c_[gccontentarray, coveragearray]
		kernel = stats.kde.gaussian_kde(values.T)
		Z = reshape(kernel(positions.T).T, X.T.shape)
		
		
		fig = pyplot.figure(figsize=(6,6))
		ax = fig.add_subplot(111)
		sc=ax.imshow(rot90(Z),cmap=cm.gist_earth_r,extent=[xmin, 100, ymin, ymax], aspect="auto") # Due to the imshow sentence, we need to rescale gccontent from [0,1] to [0,100]
		cbar=fig.colorbar(sc,ticks=[numpy.min(Z),numpy.max(Z)])
		cbar.ax.set_yticklabels(['Low','High'])
		cbar.set_label('Density')
		ax.set_xlabel('GC content (%)')
		ax.set_ylabel('Mean coverage')
		fig.savefig(fileoutlist[i])
		matplotlib.pyplot.close(fig)
	
	print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Finished <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
Esempio n. 17
0
def fftcorrelatend(template, A):
    """
    Perform a 2D fft correlation using fftconvolve.
    """
    return fftconvolve(rot90(template, 2), A)
Esempio n. 18
0
plt.title("scatter", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

##################################################
# now make a KDE of it and plot that
fig = plt.subplot(2,2,2)

kdeX, kdeY = mgrid[xmin:xmax:gridsize*1j, ymin:ymax:gridsize*1j]
positions = c_[kdeX.ravel(), kdeY.ravel()]

values = c_[data[0,:], data[1,:]]
kernel = stats.kde.gaussian_kde(values.T)
kdeZ = reshape(kernel(positions.T).T, kdeX.T.shape)

plt.imshow(rot90(kdeZ), cmap=cm.binary, aspect='auto')
plt.title("density of points", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

##################################################
# now make a delaunay triangulation of it and plot that
fig = plt.subplot(2,2,3)

tt = matplotlib.delaunay.triangulate.Triangulation(data[0,:], data[1,:])
#triang = tri.Triangulation(data[0,:], data[1,:])
#plt.triplot(triang, 'bo-') # this plots the actual triangles of the triangulation. I'm more interested in their interpolated values

#extrap = tt.linear_extrapolator(data[2,:])
extrap = tt.nn_extrapolator(data[2,:])
interped = extrap[xmin:xmax:gridsize*1j, ymin:ymax:gridsize*1j]
Esempio n. 19
0
def fftcorrelate2d(template, A):
    """
    Perform a 2D fft correlation using fftconvolve2d.
    """
    return fftconvolve2d(rot90(template,2), A)
Esempio n. 20
0
colours = np.zeros((gridsize, gridsize, 4))
kdeZmin = np.min(kdeZ)
kdeZmax = np.max(kdeZ)
confdepth = 0.45
for x in range(gridsize):
	for y in range(gridsize):
		conf = (kdeZ[x,y] - kdeZmin) / (kdeZmax - kdeZmin)
		val  = min(1., max(0., interped[x,y]))
		colour = list(cm.rainbow(val))
		# now fade it out to white according to conf
		for index in [0,1,2]:
			colour[index] = (colour[index] * conf) + (1.0 * (1. -conf))
		colours[x,y,:] = colour
		#colours[x,y,:] = np.hstack((hls_to_rgb(val, 0.5 + confdepth - (confdepth * conf), 1.0), 1.0))
		#colours[x,y,:] = [conf, conf, 1.0-conf, val]
  
  
print colours
plt.imshow(rot90(colours), cmap=cm.rainbow, norm=LogNorm(\
            vmin=zmin, vmax=zmax))
plt.title("interpolated & confidence-shaded")

plt.ylim([ymin,ymax])
plt.xlim([xmin,xmax])
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

############################################
plt.savefig("plot_heati_simple.svg", format='SVG')
Esempio n. 21
0
from scipy import stats, mgrid, c_, reshape, random, rot90


def measure(n):
    """ Measurement model, return two coupled measurements.
    """
    m1 = random.normal(size=n)
    m2 = random.normal(scale=0.5, size=n)
    return m1 + m2, m1 - m2


# Draw experiments and plot the results
m1, m2 = measure(500)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m1.max()

# Perform a kernel density estimator on the results
X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = c_[X.ravel(), Y.ravel()]
values = c_[m1, m2]
kernel = stats.kde.gaussian_kde(values.T)
Z = reshape(kernel(positions.T).T, X.T.shape)

figure(figsize=(3, 3))
imshow(rot90(Z), cmap=cm.gist_earth_r, extent=[xmin, xmax, ymin, ymax])
plot(m1, m2, 'k.', markersize=2)

show()
Esempio n. 22
0
    def __init__(self, **kw):
        self.kw = kw
        self.E  = kw.get('E', 12.0) # Edge halflength of the containing cube.
        self.dE = kw.get('dE', 0.2) # Edge increment length.
        self.dR = kw.get('dR', 0.1) # Radial increment for coincidence.
        #print self.E, self.dE, self.dR
        rng     = list(arange(-self.E,self.E+self.dE, self.dE)) # Edge values.
        # Force rebalance
        rng    -= (rng[-1] + rng[0])/2.0
        self.X  = array([rng,] * len(rng))      # x values plane segment
        self.Y  = rot90(self.X)                 # y values plane segment
        self.r  = sqrt(self.X**2 + self.Y**2)   # r values plane segment
        #print self.kw
        #print self.X
        #print self.Y
        #print self.r
        # persistent information about nodes, rings of nodes, and edges between
        self.ring   = {}            # radius: ring dictionary (duplicates)
        self.unique = {}            # radius: ring dictionary (unique)
        self.edges  = []            # pairs of vertices
        self.point  = {}            # unique points in paraboloid
        self.keyed  = {}            # keyed access to rings of unique points
        last        = None
        # Build a ring stack comprising a discrete paraboloid of revolution.
        z           = -self.dE
        zdiv        = 30
        self.zmax   = 0
        while True:
            z          += self.dE
            self.zmax   = z / zdiv
            r = around(sqrt(z / self.E), decimals=1)
            if r > self.E: break
            Q = fabs(self.r-r)
            this = [(x,y,z)
                    for x,y,c in zip(
                        around(self.X[Q <= self.dR], decimals=2).flatten(),
                        around(self.Y[Q <= self.dR], decimals=2).flatten(),
                        around(     Q[Q <= self.dR], decimals=2).flatten())
                    if c]
            if last == None or this != last:
                if this != last:
                    # connect vertices from last to this
                    pass
                last = this

                self.ring[r] = this
                self.unique[r] = this
                self.keyed[r] = []

                for i,j,k in this:
                    xykey = '%+3.2e %+3.2e' % (i,j)
                    # Keep a dictionary of unique points.
                    self.point[xykey] = (i,j)
                    n = sqrt(i*i+j*j)
                    if not self.point.has_key(xykey):
                        self.keyed[r] += [
                                {'key':xykey, 'xy':(i,j), 'tip':(i/n,j/n) },]

                if kw.get('verbose', False):
                    print r, array(self.ring[r])
                    print where(Q <= self.dR, '*', ' ')
            else:
                # Don't eliminate duplicates.
                self.ring[r] = last

        filename = 'graph/E%3.1fe%3.1fr%3.2f' % (self.E,self.dE, self.dR)
        #terminal = "png nocrop enhanced font verdana 12 size 640,480"
        terminal = "png nocrop size 640,480"

        print filename, 'to', filename+'.png'
        with open(filename+'.obj', 'w') as objfile:
            with open(filename+'.dat', 'w') as datfile:
                print>>datfile, "# %s (this file)" % (filename)
                print>>datfile, "# %f %f %f" % (self.E, self.dE, self.dR)
                print>>datfile, "# %d points" % (len(self.point))
                print>>datfile, "# %d z values" % (len(self.ring.keys()))
                print>>datfile, "# %d (x,y) sets" % (len(self.unique.keys()))
                print>>datfile, "set terminal %s" % (terminal)
                print>>datfile, "set output '%s.png'" % (filename)
                print>>datfile, "set zrange[0:%d]" % (self.zmax+1)
                print>>datfile, "set isosamples 100"
                print>>datfile, "splot '-' with vectors title 'Paraboloid'"
                print>>datfile
                for r, layer in self.unique.iteritems():
                    for i,j,kl in layer:
                        n = sqrt(i*i+j*j)
                        n += float(n==0.0)
                        k = float(kl)/zdiv
                        print>>datfile, '%+3.2e '*6 % (i, j, k, i/n, j/n, 0)
                        print>>objfile, ('v'+' %+3.2e'*3) % (i, j, k)
        call(['gnuplot', filename+'.dat'])
Esempio n. 23
0
def measure(n):
    """ Measurement model, return two coupled measurements.
    """
    m1 = random.normal(size=n)
    m2 = random.normal(scale=0.5, size=n)
    return m1+m2, m1-m2

# Draw experiments and plot the results
m1, m2 = measure(500)
xmin = m1.min()
xmax = m1.max()
ymin = m2.min()
ymax = m1.max()

# Perform a kernel density estimator on the results
X, Y = mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = c_[X.ravel(), Y.ravel()]
values = c_[m1, m2]
kernel = stats.kde.gaussian_kde(values.T)
Z = reshape(kernel(positions.T).T, X.T.shape)

figure(figsize=(3, 3))
imshow(     rot90(Z),
            cmap=cm.gist_earth_r,
            extent=[xmin, xmax, ymin, ymax])
plot(m1, m2, 'k.', markersize=2)

show()

Esempio n. 24
0
File: zad1.py Progetto: moskalap/fft
import numpy as np
from scipy import ndimage, rot90

pattern = ndimage.imread('/res/fish1.png', flatten=True)
im = ndimage.imread('/res/school.jpg', flatten=True)
imn = ndimage.imread('/res/school.jpg')
print(im.shape)

fp = np.fft.fft2(rot90(pattern, 2), im.shape)
fi = np.fft.fft2(im)
m = np.multiply(fp, fi)
corr = np.fft.ifft2(m)
corr = np.abs(corr)
corr = corr.astype(float)
print(corr.size)
i_M, j_M = corr.shape
it = 0
corr[corr < 0.5 * np.amax(corr)] = 0
for i in range(i_M):
    for j in range(j_M):
        it += 1
        if corr[i, j] > 0:
            print(corr[i, j])
            imn[i, j][0] = 255
            imn[i, j][1] = 255
            imn[i, j][2] = 255

import matplotlib.pyplot as plt

plt.imshow(imn)
plt.show()
Esempio n. 25
0
plt.title("scatter", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

##################################################
# now make a KDE of it and plot that
fig = plt.subplot(2, 2, 2)

kdeX, kdeY = mgrid[xmin:xmax:gridsize * 1j, ymin:ymax:gridsize * 1j]
positions = c_[kdeX.ravel(), kdeY.ravel()]

values = c_[data[0, :], data[1, :]]
kernel = stats.kde.gaussian_kde(values.T)
kdeZ = reshape(kernel(positions.T).T, kdeX.T.shape)

plt.imshow(rot90(kdeZ), cmap=cm.binary, aspect='auto')
plt.title("density of points", fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)

##################################################
# now make a delaunay triangulation of it and plot that
fig = plt.subplot(2, 2, 3)

tt = matplotlib.delaunay.triangulate.Triangulation(data[0, :], data[1, :])
#triang = tri.Triangulation(data[0,:], data[1,:])
#plt.triplot(triang, 'bo-') # this plots the actual triangles of the triangulation. I'm more interested in their interpolated values

#extrap = tt.linear_extrapolator(data[2,:])
extrap = tt.nn_extrapolator(data[2, :])
interped = extrap[xmin:xmax:gridsize * 1j, ymin:ymax:gridsize * 1j]