def plot_zipf_law_on_corpus(corpus): words = getWordsFromCorpus(corpus) words = remove_stopwords_from_corpus_words(words) fdist = FreqDist(words) words = fdist.most_common() x = [math.log(i[1]) for i in words] y = [math.log(i) for i in range(1, len(x) + 1)] (m, b) = pylab.polyfit(x, y, 1) yp = pylab.polyval([m, b], x) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) pylab.plot(x, y, 'r') pylab.plot(x, yp, 'b') pylab.ylim([min(y), max(y)]) pylab.xlim([min(x), max(x)]) pylab.text(x=1, y=1, s="Best Fit Line (Blue) \nslope = {slope}".format( slope=np.round(slope, 2))) pylab.grid(True) pylab.ylabel('Counts of words (log)') pylab.xlabel('Ranks of words (log)') pylab.title( 'ZIPF LAW TEST ON CORPUS. IDEALLY SLOPE OF THE LINE MUST BE = -1 for IDEAL ZIPF CASE' ) pylab.show()
def plotScatter(pearsonStats,data,args,color='b'): """""" fig = pl.figure() ax = fig.add_subplot(111) if args.log: ax.set_xscale('log') ax.set_yscale('log') ax.scatter(data[0],data[1], s=15, c=color, marker='o', alpha=1) if not args.log: ax.set_autoscale_on(False) ax.set_xlabel(args.label1) ax.set_ylabel(args.label2) upperLim = max(data[0]+data[1]) m,b = pl.polyfit(data[0],data[1],1) bfYs = pl.polyval([m,b], [1,max(data[0])]) ax.plot([1,max(data[0])],bfYs,'r-') pl.text(0.02,0.95,'Pearson: %.4f, %s\nBest Fit: y=%.3f*x+%.3f' % (pearsonStats[0],pearsonStats[1],m,b), bbox=dict(facecolor='#87AACD', alpha=1), horizontalalignment='left', verticalalignment='top', transform = ax.transAxes) if args.pdf: pdf_or_png = 'pdf' else: pdf_or_png = 'png' # construct outfile name if not args.log: outName = '%s_%s_vs_%s.%s' % (args.out,args.label1.replace(' ','_'),args.label2.replace(' ','_'),pdf_or_png) else: outName = '%s_%s_vs_%s.log.%s' % (args.out,args.label1.replace(' ','_'),args.label2.replace(' ','_'),pdf_or_png) pl.savefig(outName) if args.galaxy: os.rename(outName,args.out) print 'Show? %s' % (args.show) if args.show: pl.show()
def evaluate_models_on_training(x, y, models): """ For each regression model, compute the R-squared value for this model with the standard error over slope of a linear regression line (only if the model is linear), and plot the data along with the best fit curve. For the plots, you should plot data points (x,y) as blue dots and your best fit curve (aka model) as a red solid line. You should also label the axes of this figure appropriately and have a title reporting the following information: degree of your regression model, R-square of your model evaluated on the given data points, and SE/slope (if degree of this model is 1 -- see se_over_slope). Args: x: an 1-d pylab array with length N, representing the x-coordinates of the N sample points y: an 1-d pylab array with length N, representing the y-coordinates of the N sample points models: a list containing the regression models you want to apply to your data. Each model is a pylab array storing the coefficients of a polynomial. Returns: None """ for model in models: predict_y = pylab.polyval(model, x) #Get type of model if len(model) <= 4: types_of_model = ["linear", "quadratic", "cubic"] model_type = types_of_model[len(model) - 2] else: model_type = f"{len(model)-1} degree" #make the title title = f"Years against degrees C with {model_type} model \n R2 = {round(r_squared(y,predict_y),5)}" #If model is linear get se_over_slope and add to title if len(model) == 2: title += f"\nStandard error to slope ratio = {round(se_over_slope(x,y,predict_y,model),5)}" #Draw two pairs of values pylab.figure() pylab.plot(x, y, "bo", x, predict_y, "-r") pylab.title(title) pylab.xlabel("Years") pylab.ylabel("Temperature in degrees C") pylab.show()
def evaluate_models_on_training(x, y, models): """ For each regression model, compute the R-square for this model with the standard error over slope of a linear regression line (only if the model is linear), and plot the data along with the best fit curve. For the plots, you should plot data points (x,y) as blue dots and your best fit curve (aka model) as a red solid line. You should also label the axes of this figure appropriately and have a title reporting the following information: degree of your regression model, R-square of your model evaluated on the given data points Args: x: a list of length N, representing the x-coords of N sample points y: a list of length N, representing the y-coords of N sample points models: a list containing the regression models you want to apply to your data. Each model is a numpy array storing the coefficients of a polynomial. Returns: None """ for model in models: est_y = pylab.polyval(model, x) error = r_squared(y, est_y) pylab.figure() pylab.plot(x, y, 'b o') pylab.plot(x, est_y, 'r', label="degree of your regression model" + "R - Squared is" + str(round(error, 5))) pylab.xlabel("x-coords of N sample points") pylab.ylabel("y-coords of N sample points") pylab.legend(loc="best") pylab.title("best fit") pylab.show()
def plotScatter(pearsonStats,normedTxCntsList,opts): """""" fig = pl.figure() ax = fig.add_subplot(111) if opts.log: ax.set_xscale('log') ax.set_yscale('log') ax.scatter(normedTxCntsList[0],normedTxCntsList[1], s=15, c='b', marker='o', alpha=1) if not opts.log: ax.set_autoscale_on(False) ax.set_xlabel(opts.name_a) ax.set_ylabel(opts.name_b) upperLim = max(normedTxCntsList[0]+normedTxCntsList[1]) m,b = pl.polyfit(normedTxCntsList[0],normedTxCntsList[1],1) bfYs = pl.polyval([m,b], [1,max(normedTxCntsList[0])]) ax.plot([1,max(normedTxCntsList[0])],bfYs,'r-') pl.text(0.01,0.99,'Pearson: %.4f, %s\nBest Fit: y=%.3f*x+%.3f' % (pearsonStats[0],pearsonStats[1],m,b), bbox=dict(facecolor='#87AACD', alpha=1), horizontalalignment='left', verticalalignment='top', transform = ax.transAxes) mkdirp(opts.dir) if not opts.log: pl.savefig('%s%s_vs_%s.png' % (opts.dir,opts.name_a,opts.name_b)) else: pl.savefig('%s%s_vs_%s.log.png' % (opts.dir,opts.name_a,opts.name_b)) print 'Show? %s' % (opts.show) if opts.show: pl.show()
def evaluate_models_on_training(x, y, models): """ For each regression model, compute the R-square for this model with the standard error over slope of a linear regression line (only if the model is linear), and plot the data along with the best fit curve. For the plots, you should plot data points (x,y) as blue dots and your best fit curve (aka model) as a red solid line. You should also label the axes of this figure appropriately and have a title reporting the following information: degree of your regression model, R-square of your model evaluated on the given data points Args: x: a list of length N, representing the x-coords of N sample points y: a list of length N, representing the y-coords of N sample points models: a list containing the regression models you want to apply to your data. Each model is a numpy array storing the coefficients of a polynomial. Returns: None """ r2 = [] for model in models: estimated = np.polyval(model, x) r2.append(r_squared(y, list(estimated))) xVals = pylab.array(x) yVals = pylab.array(y) #xVals = xVals * 9.81 # get force pylab.plot(xVals, yVals, 'bo', label='Measured points') estYVals = pylab.polyval(model, xVals) pylab.plot(xVals, estYVals, 'r', label='Linear fit, k = ' + str(round(1 / model[0], 5))) pylab.legend(loc='best') pylab.show() print(r2)
END of each time step, and fox_populations is a record of the fox population at the END of each time step. Both lists should be `numSteps` items long. """ ret_rabbit = [CURRENTRABBITPOP] ret_fox = [CURRENTFOXPOP] for i in range(numSteps): if CURRENTFOXPOP >= 10 and CURRENTRABBITPOP >= 10: rabbitGrowth() foxGrowth() ret_rabbit.append(CURRENTRABBITPOP) ret_fox.append(CURRENTFOXPOP) return ret_rabbit, ret_fox rp, fp = runSimulation(200) rl, = pyplot.plot(rp, label="Rabbit population") fl, = pyplot.plot(fp, label="Fox population") pyplot.legend(handles=[rl, fl]) pyplot.show() rcoeff = pylab.polyfit(range(len(rp)), rp, 2) rcl, = pyplot.plot(pylab.polyval(rcoeff, range(len(rp))), label="Rabbit Coefficients") fcoeff = pylab.polyfit(range(len(fp)), fp, 2) fcl, = pyplot.plot(pylab.polyval(fcoeff, range(len(fp))), label="Fox Coefficients") pyplot.legend(handles=[rcl, fcl]) pyplot.show()
from geobricks_raster_correlation.core.raster_correlation_core import get_correlation from matplotlib import pyplot as plt from matplotlib.pylab import polyfit, polyval # input to your raster files raster_path1 = "../../tests/data/geoserver_data_dir/data/workspace/wheat_actual_biomprod_201010_doukkala/wheat_actual_biomprod_201010_doukkala.geotiff" raster_path2 = "../../tests/data/geoserver_data_dir/data/workspace/wheat_potential_biomprod_201010_doukkala/wheat_potential_biomprod_201010_doukkala.geotiff" # Number of sampling bins bins = 150 corr = get_correlation(raster_path1, raster_path2, bins) x = [] y = [] colors = [] #print corr['series'] for serie in corr['series']: colors.append(serie['color']) for data in serie['data']: x.append(data[0]) y.append(data[1]) # Adding regression line (m, b) = polyfit(x, y, 1) yp = polyval([m, b], x) plt.plot(x, yp) # plotting scatter plt.scatter(x, y, c=colors) plt.show()
if opts.log: ax.set_xscale('log') ax.set_yscale('log') ax.scatter(vecFile[1],vecFile[2], s=15, c='b', marker='o', alpha=1) if not opts.log: ax.set_autoscale_on(False) ax.set_xlabel(vecFile[0][0]) ax.set_ylabel(vecFile[0][1]) upperLim = max(vecFile[1]+vecFile[2]) m,b = pl.polyfit(vecFile[1],vecFile[2],1) bfYs = pl.polyval([m,b], [1,max(vecFile[1])]) ax.plot([1,max(vecFile[1])],bfYs,'r-') pl.text(0.01,0.99,'Pearson: %.4f, %s\nBest Fit: y=%.3f*x+%.3f' % (pearson[0],pearson[1],m,b), bbox=dict(facecolor='#87AACD', alpha=1), horizontalalignment='left', verticalalignment='top', transform = ax.transAxes) pl.savefig('%s_vs_%s.png' % (vecFile[0][0],vecFile[0][1])) print 'Show? %s' % (opts.show) if opts.show: pl.show()
#ax.set_xscale('log') #ax.set_yscale('log') ax.scatter(vector0,vector1, s=10, c='b', marker='o', alpha=0.6) # -- set axis labels -- xLab = DEGseqParser._file.name.split('_')[0] yLab = DEGseqParser._file.name.split('_')[2] ax.set_xlabel(xLab) ax.set_ylabel(yLab) m,b = pl.polyfit(vector0,vector1,1) min_xVec = min(vector0) max_xVec = max(vector0) bfYs = pl.polyval([m,b], [min_xVec,max_xVec]) ax.plot([min_xVec,max_xVec],bfYs,'r-') pl.text(0.01,0.99,'Pearson: %.4f, %s\nBest Fit: y=%.3f*x+%.3f' % (pearson[0],pearson[1],m,b), bbox=dict(facecolor='#87AACD', alpha=1), horizontalalignment='left', verticalalignment='top', transform = ax.transAxes) pl.savefig('%s_vs_%s.indvDEG.png' % (xLab,yLab)) print 'Show? %s' % (opts.show) if opts.show: pl.show()
Both lists should be `numSteps` items long. """ ret_rabbit = [CURRENTRABBITPOP] ret_fox = [CURRENTFOXPOP] for i in range(numSteps): if CURRENTFOXPOP >= 10 and CURRENTRABBITPOP >= 10: rabbitGrowth() foxGrowth() ret_rabbit.append(CURRENTRABBITPOP) ret_fox.append(CURRENTFOXPOP) return ret_rabbit, ret_fox rp, fp = runSimulation(200) rl, = pyplot.plot(rp, label="Rabbit population") fl, = pyplot.plot(fp, label="Fox population") pyplot.legend(handles=[rl, fl]) pyplot.show() rcoeff = pylab.polyfit(range(len(rp)), rp, 2) rcl, = pyplot.plot(pylab.polyval(rcoeff, range(len(rp))), label="Rabbit Coefficients") fcoeff = pylab.polyfit(range(len(fp)), fp, 2) fcl, = pyplot.plot(pylab.polyval(fcoeff, range(len(fp))), label="Fox Coefficients") pyplot.legend(handles=[rcl, fcl]) pyplot.show()