def mdl_1d_cat(x, y): """builds univariate model to calculate AUC""" if x.nunique() > 10 and com.is_numeric_dtype(x): x = sb_cutz(x) series = pd.get_dummies(x, dummy_na=True) lr = LogisticRegressionCV(scoring='roc_auc') lr.fit(series, y) try: preds = (lr.predict_proba(series)[:, -1]) #preds = (preds > preds.mean()).astype(int) except ValueError: Tracer()() plot = plot_cat(x, y) imgdata = BytesIO() plot.savefig(imgdata) imgdata.seek(0) aucz = roc_auc_score(y, preds) cmatrix = 'data:image/png;base64,' + \ quote(base64.b64encode(imgdata.getvalue())) plt.close() return aucz, cmatrix
def to_greyscale(profile_picture): response = requests.get(profile_picture['source']) gs_image = Image.open(BytesIO(response.content)).convert('L') buffer_image = BytesIO() gs_image.save(buffer_image, 'JPEG', quality=90) buffer_image.seek(0) return buffer_image
def image(filename): gs_file_string = redis.get(filename) buffer_image = BytesIO() gs_image = Image.open(BytesIO(gs_file_string)) gs_image.save(buffer_image, 'JPEG', quality=90) buffer_image.seek(0) return Response(buffer_image.getvalue(), mimetype='image/jpeg')
def describe_numeric_1d(series, base_stats): stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(), 'max': series.max()} stats['range'] = stats['max'] - stats['min'] for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]): stats[pretty_name(x)] = series.quantile(x) stats['iqr'] = stats['75%'] - stats['25%'] stats['kurtosis'] = series.kurt() stats['skewness'] = series.skew() stats['sum'] = series.sum() stats['mad'] = series.mad() stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN stats['type'] = "NUM" stats['n_zeros'] = (len(series) - np.count_nonzero(series)) stats['p_zeros'] = stats['n_zeros'] / len(series) # Large histogram imgdata = BytesIO() plot = series.plot(kind='hist', figsize=(6, 4), facecolor='#337ab7', bins=bins) # TODO when running on server, send this off to a different thread plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) #TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) stats['mini_histogram'] = mini_histogram(series) return pd.Series(stats, name=series.name)
def mini_histogram(series, **kwargs): """Plot a small (mini) histogram of the data. Parameters ---------- series: Series The data to plot. Returns ------- str The resulting image encoded as a string. """ imgdata = BytesIO() plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs) plot.axes.get_yaxis().set_visible(False) if LooseVersion(matplotlib.__version__) <= '1.5.9': plot.set_axis_bgcolor("w") else: plot.set_facecolor("w") xticks = plot.xaxis.get_major_ticks() for tick in xticks[1:-1]: tick.set_visible(False) tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def render_poster(name_image): # TODO: Parse for name and base64 Image name = name_image.split('-')[0] poster_string = redis.get(name) buffer_image = BytesIO() poster_image = Image.open(BytesIO(poster_string)) poster_image.save(buffer_image, 'JPEG', quality=90) buffer_image.seek(0) return Response(buffer_image.getvalue(), mimetype='image/jpeg')
def save_poster(): buffer_image = BytesIO() buffer_image.seek(0) base64image = request.form['image'] name = request.form['name'] base64image = re.sub('data:image/png;base64,','',str(base64image)) base64image = re.sub('\n','',base64image) poster_image = Image.open(BytesIO(base64.b64decode(base64image))) poster_image.save(buffer_image, 'JPEG', quality=90) buffer_image.seek(0) redis.set(name, buffer_image.getvalue()) return json.dumps({'success':True}), 200, {'ContentType':'application/json'}
def mdl_1d(x, y): """builds univariate model to calculate AUC""" lr = LogisticRegressionCV(scoring='roc_auc') lars = LassoLarsIC(criterion='aic') if x.nunique() > 10 and com.is_numeric_dtype(x): x2 = sb_cutz(x) series = pd.get_dummies(x2, dummy_na=True) else: series = pd.get_dummies(x, dummy_na=True) lr.fit(series, y) lars.fit(series, y) try: preds = (lr.predict_proba(series)[:, -1]) #preds = (preds > preds.mean()).astype(int) except ValueError: Tracer()() # try: # cm = confusion_matrix(y, (preds > y.mean()).astype(int)) # except ValueError: # Tracer()() aucz = roc_auc_score(y, preds) ns = num_bin_stats(x, y) nplot = plot_num(ns) #plot = plot_confusion_matrix(cm, y) imgdata = BytesIO() nplot.savefig(imgdata) imgdata.seek(0) nplot = 'data:image/png;base64,' + \ quote(base64.b64encode(imgdata.getvalue())) plt.close() bplot = plot_bubble(ns) imgdatab = BytesIO() bplot.savefig(imgdatab) imgdatab.seek(0) bplot = 'data:image/png;base64,' + \ quote(base64.b64encode(imgdatab.getvalue())) plt.close() return aucz, nplot, bplot
def mini_histogram(series): # Small histogram imgdata = BytesIO() plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7', bins=bins) plot.axes.get_yaxis().set_visible(False) plot.set_axis_bgcolor("w") xticks = plot.xaxis.get_major_ticks() for tick in xticks[1:-1]: tick.set_visible(False) tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def histogram(series, **kwargs): """Plot an histogram of the data. Parameters ---------- series: Series, default None The data to plot. Returns ------- str, The resulting image encoded as a string. """ imgdata = BytesIO() plot = _plot_histogram(series, **kwargs) plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) # TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) return result_string
def send_fcgi_response(request, data, response): io = BytesIO(data) rec = Record() rec.read(io) resp = rec resp.type = FCGI_STDOUT resp.contentData = response resp.contentLength = len(response) out = BytesIO() resp.write(out) out.seek(0) request.sendall(out.read()) resp.type = FCGI_END_REQUEST resp.contentData = "" resp.contentLength = 0 out = BytesIO() resp.write(out) out.seek(0) request.sendall(out.read())
def mini_histogram(series, **kwargs): """Plot a small (mini) histogram of the data. Parameters ---------- series: Series The data to plot. Returns ------- str The resulting image encoded as a string. """ imgdata = BytesIO() #plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs) plot = _plot_histogram(series, figsize=(4, 2), **kwargs) #plot.axes.get_yaxis().set_visible(False) if LooseVersion(matplotlib.__version__) <= '1.5.9': plot.set_axis_bgcolor("w") else: plot.set_facecolor("w") xticks = plot.xaxis.get_major_ticks() #for tick in xticks[1:-1]: # tick.set_visible(False) # tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) every_nth = 2 for n, label in enumerate(plot.xaxis.get_ticklabels()): if n % every_nth == 0: label.set_visible(False) #plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.subplots_adjust(left=0.2, right=0.95, top=0.95 , wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def correlation_matrix(corrdf, title, **kwargs): """Plot image of a matrix correlation. Parameters ---------- corrdf: DataFrame The matrix correlation to plot. title: str The matrix title Returns ------- str, The resulting image encoded as a string. """ imgdata = BytesIO() fig_cor, axes_cor = plt.subplots(1, 1) labels = corrdf.columns matrix_image = axes_cor.imshow(corrdf, vmin=-1, vmax=1, interpolation="nearest", cmap='bwr') plt.title(title, size=18) plt.colorbar(matrix_image) num_labels = len(labels) if num_labels < 1: return '' axes_cor.set_xticks( np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / num_labels)) axes_cor.set_yticks( np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / num_labels)) axes_cor.set_xticklabels(labels, rotation=90) axes_cor.set_yticklabels(labels) matrix_image.figure.savefig(imgdata, bbox_inches='tight') imgdata.seek(0) result_string = 'data:image/png;base64,' + quote( base64.b64encode(imgdata.getvalue())) plt.close(matrix_image.figure) return result_string
def histogram(series, **kwargs): """Plot an histogram of the data. Parameters ---------- series: Series The data to plot. Returns ------- str The resulting image encoded as a string. """ imgdata = BytesIO() plot = _plot_histogram(series, **kwargs) plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) # TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) return result_string
def complete_histogram(hist_data): """Large histogram""" img_data = BytesIO() plt.figure(figsize=(6, 4)) plot = plt.subplot() plt.bar(hist_data['left_edge'], hist_data['count'], width=hist_data['width'], facecolor='#337ab7') plot.set_ylabel('Frequency') plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(img_data) img_data.seek(0) result_string = BASE + quote(base64.b64encode(img_data.getvalue())) # TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) return result_string
def correlation_matrix(corrdf, title, **kwargs): """Plot image of a matrix correlation. Parameters ---------- corrdf: DataFrame The matrix correlation to plot. title: str The matrix title Returns ------- str, The resulting image encoded as a string. """ imgdata = BytesIO() fig_cor, axes_cor = plt.subplots(1, 1) labels = corrdf.columns N = 256 blues = np.ones((N, 4)) blues[:, 0] = np.linspace(1, 66/256, N) blues[:, 1] = np.linspace(1, 136/256, N) blues[:, 2] = np.linspace(1, 181/256, N) reds = np.ones((N, 4)) reds[:, 0] = np.linspace(209/256, 1, N) reds[:, 1] = np.linspace(60/256, 1, N) reds[:, 2] = np.linspace(75/256, 1, N) newcmp = ListedColormap(np.concatenate((reds, blues))) matrix_image = axes_cor.imshow(corrdf, vmin=-1, vmax=1, interpolation="nearest", cmap=newcmp) plt.title(title, size=18) plt.colorbar(matrix_image) axes_cor.set_xticks(np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / len(labels))) axes_cor.set_yticks(np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / len(labels))) axes_cor.set_xticklabels(labels, rotation=90) axes_cor.set_yticklabels(labels) matrix_image.figure.savefig(imgdata, bbox_inches='tight') imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(matrix_image.figure) return result_string
def mini_histogram(histogram_data): # Small histogram imgdata = BytesIO() hist_data = histogram_data figure = plt.figure(figsize=(2, 0.75)) plot = plt.subplot() plt.bar(hist_data["left_edge"], hist_data["count"], width=hist_data["width"], facecolor='#337ab7') plot.axes.get_yaxis().set_visible(False) plot.set_facecolor("w") xticks = plot.xaxis.get_major_ticks() for tick in xticks[1:-1]: tick.set_visible(False) tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def mini_histogram(series): # Small histogram imgdata = BytesIO() plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7') plot.axes.get_yaxis().set_visible(False) plot.set_axis_bgcolor("w") xticks = plot.xaxis.get_major_ticks() for tick in xticks[1:-1]: tick.set_visible(False) tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote( base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def mini_histogram(histogram_data): # Small histogram imgdata = BytesIO() hist_data = histogram_data figure = plt.figure(figsize=(2, 0.75)) plot = plt.subplot() plt.bar(hist_data["left_edge"], hist_data["count"], width=hist_data["width"], facecolor='#337ab7') plot.axes.get_yaxis().set_visible(False) plot.set_facecolor("w") xticks = plot.xaxis.get_major_ticks() for tick in xticks[1:-1]: tick.set_visible(False) tick.label.set_visible(False) for tick in (xticks[0], xticks[-1]): tick.label.set_fontsize(8) plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) plt.close(plot.figure) return result_string
def create_front_page(council, postcode, multi_council=1): """ create information page - about service, where to send form. """ body = [] line = body.append line("Postal Vote Application") line("") if council: address = council.address.split("\n") naddress = ["Electoral Registration Officer"] for a in address: # removes duplicate lines from formatting a = a.strip() if a not in naddress: naddress.append(a) naddress.append(council.postcode) line("From your postcode ({0}) we think you live in:".format(postcode)) line("") line(council.name) line("") """ Adds Warning for postcodes that cross multiple areas https://democracyclub.org.uk/blog/2017/03/20/4314-times-when-postcodes-arent-good-enough/ """ if multi_council > 1: line( "BE CAREFUL: Your postcode covers multiple councils. Please check this is correct." ) line("") line( "If this is right - you need to sign the form on the next page and send it to:" ) line("") for a in naddress: line(a) line("") if council.phone: line("Phone: {0}".format(council.phone)) else: line( "We can't find the council for your postcode - is this postcode correct? {0}" .format(postcode)) line( "Visit aboutmyvote.co.uk to find the address for your local Electoral Registration Officer" ) line("") line( "For the Electoral Commission page on postal voting visit www.aboutmyvote.co.uk" ) line("") line("This form was pre-populated at postalvote.inkleby.com") packet = BytesIO() # create a new PDF with Reportlab can = canvas.Canvas(packet) x = 700 for r in body: can.drawString(40, x, r) x -= 15 can.save() packet.seek(0) new_pdf = PdfFileReader(packet) return new_pdf.getPage(0) #return a pdf page
def describe_float_1d(df, column, current_result, nrows): if spark_version == "1.6+": stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"), df_min(col(column)).alias("min"), df_max(col(column)).alias("max"), variance(col(column)).alias("variance"), kurtosis(col(column)).alias("kurtosis"), stddev(col(column)).alias("std"), skewness(col(column)).alias("skewness"), df_sum(col(column)).alias("sum"), count(col(column) == 0.0).alias('n_zeros') ).toPandas() else: stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"), df_min(col(column)).alias("min"), df_max(col(column)).alias("max"), df_sum(col(column)).alias("sum"), count(col(column) == 0.0).alias('n_zeros') ).toPandas() stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column), stats_df["mean"].iloc[0], current_result["count"])).toPandas().iloc[0][0] stats_df["std"] = np.sqrt(stats_df["variance"]) stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column), stats_df["mean"].iloc[0], current_result["count"])).toPandas().iloc[0][0] stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column), stats_df["mean"].iloc[0], current_result["count"])).toPandas().iloc[0][0] for x in [0.05, 0.25, 0.5, 0.75, 0.95]: stats_df[pretty_name(x)] = (df.select(column) .na.drop() .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))" .format(col=column, n=x)).toPandas().iloc[:,0] ) stats = stats_df.iloc[0].copy() stats.name = column stats["range"] = stats["max"] - stats["min"] stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)] stats["cv"] = stats["std"] / float(stats["mean"]) stats["mad"] = (df.select(column) .na.drop() .select(df_abs(col(column)-stats["mean"]).alias("delta")) .agg(df_sum(col("delta"))).toPandas().iloc[0,0] / float(current_result["count"])) stats["type"] = "NUM" stats['p_zeros'] = stats['n_zeros'] / float(nrows) # Large histogram imgdata = BytesIO() hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins) figure = plt.figure(figsize=(6, 4)) plot = plt.subplot() plt.bar(hist_data["left_edge"], hist_data["count"], width=hist_data["width"], facecolor='#337ab7') plot.set_ylabel("Frequency") plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) #TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) stats['mini_histogram'] = mini_histogram(hist_data) return stats
def report(uuid, template='default', css='github', missing='N/A', reporter=ESIgenReport, engine='html'): """The location we send them to at the end of the upload.""" if not uuid: return redirect(url_for("index", **URL_KWARGS)) if engine not in ('html', 'zip', 'json', 'gist', 'md'): engine = 'html' # POST / GET handling custom_template = False if request.method == 'POST': form = request.form template = form['template'] css = form['css'] missing = form['missing-value'] if form.get('missing') else '' if template == 'custom': custom_template = True template = form['template-custom'] else: template = request.args.get('template', template) css = request.args.get('css', css) if request.args.get('missing', missing): missing = request.args.get('missing-value', missing) if template == 'custom': custom_template = True template = request.args.get('template-custom', template) # Template if not custom_template: template_basename, template_ext = os.path.splitext(template) if template_ext != '.md': template = template_basename + '.md' # Style css_basename, css_ext = os.path.splitext(css) if css_ext != '.css': css = css_basename + '.css' # Get their reports. root = os.path.join(UPLOADS, uuid) if not os.path.isdir(root): return redirect( url_for("index", message="Upload error. Try again", **URL_KWARGS)) reports, molecules = [], [] html = engine == 'html' if html: preview = 'web' elif HAS_PYMOL and engine == 'zip': preview = 'static_server' else: preview = None missing = missing[:10] if missing is not None else None for fn in sorted(os.listdir(root)): if os.path.splitext(fn)[1] not in ALLOWED_EXTENSIONS: continue path = os.path.join(root, fn) molecule = reporter(path, missing=missing) report = molecule.report(template=template, preview=preview, process_markdown=html) reports.append((molecule, report)) with open(os.path.join(root, molecule.name + '.md'), 'w') as f: f.write(report) if molecule.data.has_coordinates: with open(os.path.join(root, molecule.name + '.pdb'), 'w') as f: f.write(molecule.data.pdb_block) with open(os.path.join(root, molecule.name + '.xyz'), 'w') as f: f.write(molecule.data.xyz_block) if not reports: return redirect( url_for("index", message="File(s) could not be parsed!", **URL_KWARGS)) if engine == 'html': return render_template('report.html', css=css, uuid=uuid, reports=reports, ngl='{{ viewer3d }}' in report, template=template) elif engine == 'zip': memfile = BytesIO() with ZipFile(memfile, 'w', ZIP_DEFLATED) as zf: for base, dirs, files in os.walk(root): for filename in files: zf.write(os.path.join(base, filename), arcname=filename) memfile.seek(0) return send_file(memfile, attachment_filename='{}.zip'.format(uuid), as_attachment=True) elif engine == 'json': d = {} for molecule, report in reports: d[molecule.basename] = { 'report': report, 'data': molecule.data_as_dict() } return jsonify(d) elif engine == 'gist': gist_data = { 'description': "ESIgen report #{}".format(uuid), 'public': True, 'files': { 'ESIgen.md': { 'content': "Created with [ESIgen](https://github.com/insilichem/esigen)" } } } for molecule, report in reports: gist_data['files'][molecule.name + '.md'] = {'content': report} if molecule.data.has_coordinates: gist_data['files'][molecule.name + '.pdb'] = { 'content': molecule.data.pdb_block } gist_data['files'][molecule.name + '.xyz'] = { 'content': molecule.data.xyz_block } response = requests.post('https://api.github.com/gists', json=gist_data) response.raise_for_status() return redirect(response.json()['html_url']) elif engine == 'md': return Response('\n'.join([r for (m, r) in reports]), content_type='text/plain')
def describe_float_1d(df, column, current_result, nrows): if spark_version == "1.6+": stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"), df_min(col(column)).alias("min"), df_max(col(column)).alias("max"), variance(col(column)).alias("variance"), kurtosis(col(column)).alias("kurtosis"), stddev(col(column)).alias("std"), skewness(col(column)).alias("skewness"), df_sum(col(column)).alias("sum") ).toPandas() else: stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"), df_min(col(column)).alias("min"), df_max(col(column)).alias("max"), df_sum(col(column)).alias("sum") ).toPandas() stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column), stats_df["mean"].ix[0], current_result["count"])).toPandas().ix[0][0] stats_df["std"] = np.sqrt(stats_df["variance"]) stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column), stats_df["mean"].ix[0], current_result["count"])).toPandas().ix[0][0] stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column), stats_df["mean"].ix[0], current_result["count"])).toPandas().ix[0][0] for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]): stats_df[pretty_name(x)] = (df.select(column) .na.drop() .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))" .format(col=column, n=x)).toPandas().ix[:,0] ) stats = stats_df.ix[0].copy() stats.name = column stats["range"] = stats["max"] - stats["min"] stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)] stats["cv"] = stats["std"] / float(stats["mean"]) stats["mad"] = (df.select(column) .na.drop() .select(df_abs(col(column)-stats["mean"]).alias("delta")) .agg(df_sum(col("delta"))).toPandas().ix[0,0] / float(current_result["count"])) stats["type"] = "NUM" stats['n_zeros'] = df.select(column).where(col(column)==0.0).count() stats['p_zeros'] = stats['n_zeros'] / float(nrows) # Large histogram imgdata = BytesIO() hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins) figure = plt.figure(figsize=(6, 4)) plot = plt.subplot() plt.bar(hist_data["left_edge"], hist_data["count"], width=hist_data["width"], facecolor='#337ab7') plot.set_ylabel("Frequency") plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0) plot.figure.savefig(imgdata) imgdata.seek(0) stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue())) #TODO Think about writing this to disk instead of caching them in strings plt.close(plot.figure) stats['mini_histogram'] = mini_histogram(hist_data) return stats
def create_pdf(form=None, sig_image=None): """ given form results - """ def get_from_form(value): if form: return form.cleaned_data.get(value, "") else: return "" email = get_from_form("email") phone_number = get_from_form("phone") postcode = get_from_form("postcode") surname = get_from_form("surname") first_names = get_from_form("first_names") add_1 = get_from_form("add_1") add_2 = get_from_form("add_2") city = get_from_form("city") county = get_from_form("county") alt_add_1 = get_from_form("alt_add_1") alt_add_2 = get_from_form("alt_add_2") alt_postcode = get_from_form("alt_postcode") alt_reason = get_from_form("reason") file_name = file_name_safe("{0}_{1}".format(surname, first_names).lower()) council = get_from_form("council") multi_council = get_from_form("multi_council") if city and county: add_3 = city + ", " + county elif city: add_3 = city elif county: add_3 = county until_further_notice = get_from_form("universal") one_date = get_from_form("single_day") date_range = get_from_form("time_range") date_of_birth = get_from_form("dob") packet = BytesIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) # add signature of present if sig_image: can.drawImage(ImageReader(sig_image), 293, 155, mask='auto') # core address info can.drawString(40, 667, surname.upper()) can.drawString(40, 620, first_names.upper()) can.drawString(40, 561, add_1.upper()) can.drawString(40, 541, add_2.upper()) can.drawString(40, 521, add_3.upper()) can.drawString(40, 390, email.upper()) can.drawString(40, 451, phone_number) can.drawString(100, 499, postcode.upper()) # alt address can.drawString(285, 646, alt_add_1.upper()) can.drawString(285, 626, alt_add_2.upper()) can.drawString(350, 606, alt_postcode.upper()) can.drawString(285, 548, alt_reason.upper()) # for how long we want this on if until_further_notice: can.drawString(30, 278, "X") if one_date: can.drawString(30, 248, "X") write_date(can.drawString, 153, 213, one_date) if date_range: can.drawString(30, 181, "X") write_date(can.drawString, 153, 156, date_range[0]) write_date(can.drawString, 153, 129, date_range[1]) # today's date write_date(can.drawString, 457, 44, datetime.datetime.now()) # birthdate can.setFont("Helvetica", 30) write_date(can.drawString, 310, 350, date_of_birth, 25, extra_spacing=[1, 3]) can.save() packet.seek(0) new_pdf = PdfFileReader(packet) front_page = create_front_page(council, postcode, multi_council) source_file = os.path.join(settings.PROJECT_PATH, "resources", "form.pdf") existing_pdf = PdfFileReader(open(source_file, "rb")) output = PdfFileWriter() # add the several pages objects to one pdf page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(front_page) output.addPage(page) # send the stream into a response and return it to the view outputStream = BytesIO() output.write(outputStream) response = HttpResponse(content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename="postal_vote_{0}.pdf"'.format( file_name) response.write(outputStream.getvalue()) outputStream.close() if council: council.increment_count() return response
def _download_single_zone_file(self, url): response = self._get(url) status_code = response.status_code if status_code == 200: zone_name = url.rsplit('/', 1)[-1].rsplit('.')[-2] compressed_file = BytesIO(response.content) _, option = cgi.parse_header( response.headers['content-disposition']) filename = option['filename'] if not filename: filename = zone_name + '.txt.gz' path_filename = "{}{}".format(self.save_path, filename) decompressed_file = gzip.GzipFile(fileobj=compressed_file, mode='rb') text_list = [] for line in decompressed_file.readlines(): domain = line.decode('utf-8').split('\t')[0].rstrip('.') text_list.append(domain) text_string_list = '\n'.join(list(set(text_list))) text_string_bytes_object = BytesIO() text_string_bytes_object.write(text_string_list.encode('utf-8')) text_string_bytes_object.seek(0) text_string_buf = text_string_bytes_object.read() gzip_object = gzip.compress(text_string_buf) gzip_size = gzip_object.__sizeof__() MAX_FILE_SIZE = 1024 * 1024 * 35 if gzip_size >= MAX_FILE_SIZE: chapters = 0 source_buf = gzip_object n = MAX_FILE_SIZE final = [ source_buf[i * n:(i + 1) * n] for i in range((len(source_buf) + n - 1) // n) ] # list comprehension chunker for chunk in final: chapters += 1 chapter_string = "{}".format(chapters) chapter_string = chapter_string.zfill(2) chapter_filename = "{}_{}{}".format( zone_name, chapter_string, '.txt.gz') chapter_path_filename = "{}{}".format( self.save_path, chapter_filename) with open(chapter_path_filename, 'wb+') as f: f.write(chunk) else: with open(path_filename, 'wb+') as f: f.write(gzip_object) elif status_code == 401: self.token = self.authenticate() elif status_code == 404: pass else: pass