Esempio n. 1
0
def mdl_1d(x, y):
    """builds univariate model to calculate AUC"""
    lr = LogisticRegressionCV(scoring='roc_auc')
    lars = LassoLarsIC(criterion='aic')

    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x2 = sb_cutz(x)
        series = pd.get_dummies(x2, dummy_na=True)
    else:
        series = pd.get_dummies(x, dummy_na=True)

    lr.fit(series, y)
    lars.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    # try:
    #    cm = confusion_matrix(y, (preds > y.mean()).astype(int))
    # except ValueError:
    #    Tracer()()

    aucz = roc_auc_score(y, preds)

    ns = num_bin_stats(x, y)

    nplot = plot_num(ns)
    #plot = plot_confusion_matrix(cm, y)

    imgdata = BytesIO()
    nplot.savefig(imgdata)
    imgdata.seek(0)
    nplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()

    bplot = plot_bubble(ns)
    imgdatab = BytesIO()
    bplot.savefig(imgdatab)
    imgdatab.seek(0)
    bplot = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdatab.getvalue()))
    plt.close()

    return aucz, nplot, bplot
Esempio n. 2
0
def mini_histogram(series, **kwargs):
    """Plot a small (mini) histogram of the data.

    Parameters
    ----------
    series: Series
        The data to plot.

    Returns
    -------
    str
        The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs)
    plot.axes.get_yaxis().set_visible(False)

    if LooseVersion(matplotlib.__version__) <= '1.5.9':
        plot.set_axis_bgcolor("w")
    else:
        plot.set_facecolor("w")

    xticks = plot.xaxis.get_major_ticks()
    for tick in xticks[1:-1]:
        tick.set_visible(False)
        tick.label.set_visible(False)
    for tick in (xticks[0], xticks[-1]):
        tick.label.set_fontsize(8)
    plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(plot.figure)
    return result_string
Esempio n. 3
0
def generate_image_placeholder(size, color):

    if not size:
        size = DEFAULT_SIZE

    if not color:
        color = DEFAULT_COLOR

    cache_key = 'image_placeholder{}'.format(hashlib.md5('{}{}'.format('x'.join(str(size)), color).encode('utf-8')).hexdigest())

    img_base64 = cache.get(cache_key)

    if img_base64:
        return u'data:image/png;base64,{}'.format(img_base64.decode("utf-8"))

    bg = struct.unpack('BBB', unhexlify(color.replace('#', '')))

    img = Image.new("RGB", (size[0], size[1]), bg)
    buffer = BytesIO()
    img.save(buffer, format="PNG")

    img_base64 = base64.b64encode(buffer.getvalue())

    cache.set(cache_key, img_base64, 60*60*24)

    return u'data:image/png;base64,{}'.format(img_base64.decode("utf-8"))
Esempio n. 4
0
def image(filename):
  gs_file_string = redis.get(filename)
  buffer_image = BytesIO()
  gs_image = Image.open(BytesIO(gs_file_string))
  gs_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  return Response(buffer_image.getvalue(), mimetype='image/jpeg')
Esempio n. 5
0
def mdl_1d_cat(x, y):
    """builds univariate model to calculate AUC"""
    if x.nunique() > 10 and com.is_numeric_dtype(x):
        x = sb_cutz(x)

    series = pd.get_dummies(x, dummy_na=True)
    lr = LogisticRegressionCV(scoring='roc_auc')

    lr.fit(series, y)

    try:
        preds = (lr.predict_proba(series)[:, -1])
        #preds = (preds > preds.mean()).astype(int)
    except ValueError:
        Tracer()()

    plot = plot_cat(x, y)

    imgdata = BytesIO()
    plot.savefig(imgdata)
    imgdata.seek(0)

    aucz = roc_auc_score(y, preds)
    cmatrix = 'data:image/png;base64,' + \
        quote(base64.b64encode(imgdata.getvalue()))
    plt.close()
    return aucz, cmatrix
Esempio n. 6
0
    def describe_numeric_1d(series, base_stats):
        stats = {'mean': series.mean(), 'std': series.std(), 'variance': series.var(), 'min': series.min(),
                'max': series.max()}
        stats['range'] = stats['max'] - stats['min']

        for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
            stats[pretty_name(x)] = series.quantile(x)
        stats['iqr'] = stats['75%'] - stats['25%']
        stats['kurtosis'] = series.kurt()
        stats['skewness'] = series.skew()
        stats['sum'] = series.sum()
        stats['mad'] = series.mad()
        stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
        stats['type'] = "NUM"
        stats['n_zeros'] = (len(series) - np.count_nonzero(series))
        stats['p_zeros'] = stats['n_zeros'] / len(series)

        # Large histogram
        imgdata = BytesIO()
        plot = series.plot(kind='hist', figsize=(6, 4),
                           facecolor='#337ab7', bins=bins)  # TODO when running on server, send this off to a different thread
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(series)

        return pd.Series(stats, name=series.name)
Esempio n. 7
0
 def test_dump_xml(self):
     try:
         expected = unicode(XML_RESULT % self.mc.creation.strftime('%Y-%m-%d %H:%M:%S'), 'utf-8')
     except NameError:
         expected = XML_RESULT % self.mc.creation.strftime('%Y-%m-%d %H:%M:%S')
     buf = BytesIO()
     dump_xml(self.mc, buf)
     result = buf.getvalue().decode('utf-8')
     self.assertEqual(expected, result)
Esempio n. 8
0
 def test_dump_js(self):
     buf = BytesIO()
     dump_js(self.mc, buf)
     result = buf.getvalue().decode('utf-8')
     try:
         JS_RESULT = unicode(JS_RESULT_PYTHON2, 'utf-8')  # python 2 add space at the end of lines
     except NameError:
         JS_RESULT = JS_RESULT_PYTHON3
     self.assertEqual(JS_RESULT % self.mc.creation.strftime('%Y-%m-%d %H:%M:%S'), result)
Esempio n. 9
0
def render_poster(name_image):
  # TODO: Parse for name and base64 Image
  name = name_image.split('-')[0]
  poster_string = redis.get(name)
  buffer_image = BytesIO()
  poster_image = Image.open(BytesIO(poster_string))
  poster_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  return Response(buffer_image.getvalue(), mimetype='image/jpeg')
Esempio n. 10
0
def save_poster():
  buffer_image = BytesIO()
  buffer_image.seek(0)
  base64image = request.form['image']
  name = request.form['name']
  base64image = re.sub('data:image/png;base64,','',str(base64image))
  base64image = re.sub('\n','',base64image)
  poster_image = Image.open(BytesIO(base64.b64decode(base64image)))
  poster_image.save(buffer_image, 'JPEG', quality=90)
  buffer_image.seek(0)
  redis.set(name, buffer_image.getvalue())
  return json.dumps({'success':True}), 200, {'ContentType':'application/json'}
Esempio n. 11
0
 def export(self, request, queryset, tool, content_type, extension='.xlsx'):
     try:
         from StringIO import BytesIO
     except ImportError:
         from io import BytesIO  #Python3 support
     output = BytesIO()
     writer = tool(output,
                   default_date_format=self.export_date_format,
                   in_memory=True)
     writer.perform(*self.resolve_list_fields(queryset),
                    sheet_name='default')
     response = HttpResponse(output.getvalue(), content_type)
     filename = self.get_spreadsheet_name(request, queryset, extension)
     response['Content-Disposition'] = 'attachment; filename=%s' % filename
     writer.close()
     return response
Esempio n. 12
0
 def mini_histogram(series):
     # Small histogram
     imgdata = BytesIO()
     plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7', bins=bins)
     plot.axes.get_yaxis().set_visible(False)
     plot.set_axis_bgcolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
Esempio n. 13
0
    def perform(self):
        global header

        curl = self.curl

        if sys.version_info >= (3, ):
            buffer = BytesIO()
        else:
            buffer = StringIO()
        curl.setopt(pycurl.WRITEFUNCTION, buffer.write)

        header = []
        try:
            curl.perform()
        except pycurl.error as pe:
            raise occi.TransportError(pe)

        # 'Server: Apache/2.2.22 (Debian)\r\n'
        h = {}
        http_status = None
        for item in header:
            m = Transport.reHeader.match(item.rstrip())
            if m and m.groups >= 2:
                key = m.group(1)
                value = m.group(2)
                h[key.lower()] = value
            else:
                if Transport.reStatus.match(item):
                    http_status = item.rstrip()
        content_type = None
        if 'content-type' in h:
            content_type = re.split(';', h['content-type'])[0]

        body = buffer.getvalue()
        buffer.close()
        if sys.version_info >= (3, ):
            encoding = 'iso-8859-1'
            if content_type:
                match = Transport.reEncoding.search(h['content-type'])
                if match:
                    encoding = match.group(1)
            body = body.decode(encoding)

        return [body, header, http_status, content_type, h]
Esempio n. 14
0
    def perform(self):
        global header

        curl = self.curl

        if sys.version_info >= (3,):
            buffer = BytesIO()
        else:
            buffer = StringIO()
        curl.setopt(pycurl.WRITEFUNCTION, buffer.write)

        header = []
        try:
            curl.perform()
        except pycurl.error as pe:
            raise occi.TransportError(pe)

        # 'Server: Apache/2.2.22 (Debian)\r\n'
        h = {}
        http_status = None
        for item in header:
            m = Transport.reHeader.match(item.rstrip())
            if m and m.groups >= 2:
                key = m.group(1)
                value = m.group(2)
                h[key.lower()] = value
            else:
                if Transport.reStatus.match(item):
                    http_status = item.rstrip()
        content_type = None
        if 'content-type' in h:
            content_type = re.split(';', h['content-type'])[0]

        body = buffer.getvalue()
        buffer.close()
        if sys.version_info >= (3,):
            encoding = 'iso-8859-1'
            if content_type:
                match = Transport.reEncoding.search(h['content-type'])
                if match:
                    encoding = match.group(1)
            body = body.decode(encoding)

        return [body, header, http_status, content_type, h]
Esempio n. 15
0
def mini_histogram(series, **kwargs):
    """Plot a small (mini) histogram of the data.

    Parameters
    ----------
    series: Series, default None
        The data to plot.

    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, figsize=(2, 0.75), **kwargs)
    plot.axes.get_yaxis().set_visible(False)

    if LooseVersion(matplotlib.__version__) <= '1.5.9':
        plot.set_axis_bgcolor("w")
    else:
        plot.set_facecolor("w")

    xticks = plot.xaxis.get_major_ticks()

    for tick in xticks[1:-1]:
        tick.set_visible(False)
        tick.label.set_visible(False)
    try:
        for tick in (xticks[0], xticks[-1]):
            tick.label.set_fontsize(8)
    except:
        print('error in setting ticks fontsize')
    plot.figure.subplots_adjust(left=0.15,
                                right=0.85,
                                top=1,
                                bottom=0.35,
                                wspace=0,
                                hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(
        base64.b64encode(imgdata.getvalue()))
    plt.close(plot.figure)
    return result_string
Esempio n. 16
0
 def test_full(self):
     base_path = os.path.dirname(__file__)
     tests = [
         (load_xml, dump_xml, ["sample_1.xml", "sample_2.xml", "sample_3.xml"]),
         (load_json, dump_json, ["sample_1.json", "sample_2.json", "sample_3.json"]),
         (load_js, dump_js, ["sample_1.js", "sample_2.js", "sample_3.js"]),
     ]
     for load_fct, dump_fct, files in tests:
         for name in files:
             path = os.path.join(base_path, name)
             with open(path, "rb") as fileobj:
                 mc = load_fct(fileobj)
             buf = BytesIO()
             dump_fct(mc, buf)
             result = buf.getvalue()
             try:
                 self.assertIsInstance(result.decode("utf-8"), str)
             except AssertionError:
                 self.assertIsInstance(result.decode("utf-8"), unicode)  # python 2 backward compatibility
Esempio n. 17
0
    def describe_numeric_1d(series, base_stats):
        stats = {
            'mean': series.mean(),
            'std': series.std(),
            'variance': series.var(),
            'min': series.min(),
            'max': series.max()
        }
        stats['range'] = stats['max'] - stats['min']

        for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
            stats[pretty_name(x)] = series.quantile(x)
        stats['iqr'] = stats['75%'] - stats['25%']
        stats['kurtosis'] = series.kurt()
        stats['skewness'] = series.skew()
        stats['sum'] = series.sum()
        stats['mad'] = series.mad()
        stats['cv'] = stats['std'] / stats['mean'] if stats['mean'] else np.NaN
        stats['type'] = "NUM"
        stats['n_zeros'] = (len(series) - np.count_nonzero(series))
        stats['p_zeros'] = stats['n_zeros'] / len(series)

        # Large histogram
        imgdata = BytesIO()
        plot = series.plot(
            kind='hist', figsize=(6, 4), facecolor='#337ab7', bins=bins
        )  # TODO when running on server, send this off to a different thread
        plot.figure.subplots_adjust(left=0.15,
                                    right=0.95,
                                    top=0.9,
                                    bottom=0.1,
                                    wspace=0,
                                    hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(
            base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(series)

        return pd.Series(stats, name=series.name)
Esempio n. 18
0
def missing_dendrogram(df):
    """Plot a missingno dendrogram

    Parameters
    ----------
    df: DataFrame
        The dataframe.

    Returns
    -------
    str
        The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = msno.dendrogram(df)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(plot.figure)
    return result_string
Esempio n. 19
0
def generate_placeholder_image(size=None):

    if not size:
        size = DEFAULT_SIZE

    cache_key = "image_placeholder_{}".format(
        hashlib.md5("x".join(str(size)).encode("utf-8")).hexdigest())

    img_base64 = cache.get(cache_key)

    if img_base64:
        return "data:image/png;base64,{}".format(img_base64)

    buffer = BytesIO()
    img = Image.new("RGBA", (size[0], size[1]), (255, 0, 0, 100))
    img.save(buffer, format="PNG")

    img_base64 = base64.b64encode(buffer.getvalue()).decode()
    cache.set(cache_key, img_base64, 60 * 60 * 24)

    return "data:image/png;base64,{}".format(img_base64)
Esempio n. 20
0
def histogram(series, **kwargs):
    """Plot an histogram of the data.

    Parameters
    ----------
    series: Series, default None
        The data to plot.

    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, **kwargs)
    plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
Esempio n. 21
0
def histogram(series, **kwargs):
    """Plot an histogram of the data.

    Parameters
    ----------
    series: Series, default None
        The data to plot.

    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    plot = _plot_histogram(series, **kwargs)
    plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
    plot.figure.savefig(imgdata)
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
Esempio n. 22
0
def correlation_matrix(corrdf, title, **kwargs):
    """Plot image of a matrix correlation.
    Parameters
    ----------
    corrdf: DataFrame
        The matrix correlation to plot.
    title: str
        The matrix title
    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    fig_cor, axes_cor = plt.subplots(1, 1)
    labels = corrdf.columns
    matrix_image = axes_cor.imshow(corrdf,
                                   vmin=-1,
                                   vmax=1,
                                   interpolation="nearest",
                                   cmap='bwr')
    plt.title(title, size=18)
    plt.colorbar(matrix_image)

    num_labels = len(labels)
    if num_labels < 1:
        return ''

    axes_cor.set_xticks(
        np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / num_labels))
    axes_cor.set_yticks(
        np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / num_labels))
    axes_cor.set_xticklabels(labels, rotation=90)
    axes_cor.set_yticklabels(labels)

    matrix_image.figure.savefig(imgdata, bbox_inches='tight')
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(
        base64.b64encode(imgdata.getvalue()))
    plt.close(matrix_image.figure)
    return result_string
Esempio n. 23
0
def complete_histogram(hist_data):
    """Large histogram"""
    img_data = BytesIO()
    plt.figure(figsize=(6, 4))
    plot = plt.subplot()
    plt.bar(hist_data['left_edge'],
            hist_data['count'],
            width=hist_data['width'],
            facecolor='#337ab7')
    plot.set_ylabel('Frequency')
    plot.figure.subplots_adjust(left=0.15,
                                right=0.95,
                                top=0.9,
                                bottom=0.1,
                                wspace=0,
                                hspace=0)
    plot.figure.savefig(img_data)
    img_data.seek(0)
    result_string = BASE + quote(base64.b64encode(img_data.getvalue()))
    # TODO Think about writing this to disk instead of caching them in strings
    plt.close(plot.figure)
    return result_string
Esempio n. 24
0
def correlation_matrix(corrdf, title, **kwargs):
    """Plot image of a matrix correlation.
    Parameters
    ----------
    corrdf: DataFrame
        The matrix correlation to plot.
    title: str
        The matrix title
    Returns
    -------
    str, The resulting image encoded as a string.
    """
    imgdata = BytesIO()
    fig_cor, axes_cor = plt.subplots(1, 1)
    labels = corrdf.columns
    N = 256
    blues = np.ones((N, 4))
    blues[:, 0] = np.linspace(1, 66/256, N)
    blues[:, 1] = np.linspace(1, 136/256, N)
    blues[:, 2] = np.linspace(1, 181/256, N)
    reds = np.ones((N, 4))
    reds[:, 0] = np.linspace(209/256, 1, N)
    reds[:, 1] = np.linspace(60/256, 1, N)
    reds[:, 2] = np.linspace(75/256, 1, N)
    newcmp = ListedColormap(np.concatenate((reds, blues)))
    matrix_image = axes_cor.imshow(corrdf, vmin=-1, vmax=1, interpolation="nearest", cmap=newcmp)
    plt.title(title, size=18)
    plt.colorbar(matrix_image)
    axes_cor.set_xticks(np.arange(0, corrdf.shape[0], corrdf.shape[0] * 1.0 / len(labels)))
    axes_cor.set_yticks(np.arange(0, corrdf.shape[1], corrdf.shape[1] * 1.0 / len(labels)))
    axes_cor.set_xticklabels(labels, rotation=90)
    axes_cor.set_yticklabels(labels)

    matrix_image.figure.savefig(imgdata, bbox_inches='tight')
    imgdata.seek(0)
    result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
    plt.close(matrix_image.figure)
    return result_string
Esempio n. 25
0
def request_data(start_time=None, date=None, start=None, end=None):
    header = create_header()

    t = time.localtime()
    if date is None:
        date = time.strftime("%Y-%m-%d", t)
    if start_time is None:
        start_time = time.strftime("%H:%M:%S", t)
    if start is None:
        #haltestellenid = Trinidadstr
        start = '1:000009014238'
    if end is None:
        #Haltestellenid = Bahnhof Sebaldsbrück
        end = '1:000009013744'

    buffer = BytesIO()
    c = pycurl.Curl()
    address = 'http://gtfsr.vbn.de/api/'
    address += ('routers/connect/plan?arriveBy=false'
                'time={0}'
                '&date={1}'
                '&fromPlace={2}'
                '&toPlace={3}').format(start_time, date, start, end)
    c.setopt(c.URL, address)
    c.setopt(c.HTTPHEADER, header)
    c.setopt(c.WRITEDATA, buffer)
    with Timeout(5, False):
        try:
            c.perform()
            c.close()
        except Exception as e:
            print('exeption: ')
            print(e)
            return [' '] * 4
    body = buffer.getvalue().decode('UTF8')

    return prepare_data(body)
Esempio n. 26
0
 def mini_histogram(histogram_data):
     # Small histogram
     imgdata = BytesIO()
     hist_data = histogram_data
     figure = plt.figure(figsize=(2, 0.75))
     plot = plt.subplot()
     plt.bar(hist_data["left_edge"],
             hist_data["count"],
             width=hist_data["width"],
             facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_facecolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
Esempio n. 27
0
 def mini_histogram(histogram_data):
     # Small histogram
     imgdata = BytesIO()
     hist_data = histogram_data
     figure = plt.figure(figsize=(2, 0.75))
     plot = plt.subplot()
     plt.bar(hist_data["left_edge"],
             hist_data["count"],
             width=hist_data["width"],
             facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_facecolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15, right=0.85, top=1, bottom=0.35, wspace=0, hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
Esempio n. 28
0
 def mini_histogram(series):
     # Small histogram
     imgdata = BytesIO()
     plot = series.plot(kind='hist', figsize=(2, 0.75), facecolor='#337ab7')
     plot.axes.get_yaxis().set_visible(False)
     plot.set_axis_bgcolor("w")
     xticks = plot.xaxis.get_major_ticks()
     for tick in xticks[1:-1]:
         tick.set_visible(False)
         tick.label.set_visible(False)
     for tick in (xticks[0], xticks[-1]):
         tick.label.set_fontsize(8)
     plot.figure.subplots_adjust(left=0.15,
                                 right=0.85,
                                 top=1,
                                 bottom=0.35,
                                 wspace=0,
                                 hspace=0)
     plot.figure.savefig(imgdata)
     imgdata.seek(0)
     result_string = 'data:image/png;base64,' + quote(
         base64.b64encode(imgdata.getvalue()))
     plt.close(plot.figure)
     return result_string
Esempio n. 29
0
    def describe_float_1d(df, column, current_result, nrows):
        if spark_version == "1.6+":
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       variance(col(column)).alias("variance"),
                                                       kurtosis(col(column)).alias("kurtosis"),
                                                       stddev(col(column)).alias("std"),
                                                       skewness(col(column)).alias("skewness"),
                                                       df_sum(col(column)).alias("sum"),
                                                       count(col(column) == 0.0).alias('n_zeros')
                                                       ).toPandas()
        else:
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       df_sum(col(column)).alias("sum"),
                                                       count(col(column) == 0.0).alias('n_zeros')
                                                       ).toPandas()
            stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]
            stats_df["std"] = np.sqrt(stats_df["variance"])
            stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]
            stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column),
                                                                                   stats_df["mean"].iloc[0],
                                                                                   current_result["count"])).toPandas().iloc[0][0]

        for x in [0.05, 0.25, 0.5, 0.75, 0.95]:
            stats_df[pretty_name(x)] = (df.select(column)
                                        .na.drop()
                                        .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))"
                                                    .format(col=column, n=x)).toPandas().iloc[:,0]
                                        )
        stats = stats_df.iloc[0].copy()
        stats.name = column
        stats["range"] = stats["max"] - stats["min"]
        stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)]
        stats["cv"] = stats["std"] / float(stats["mean"])
        stats["mad"] = (df.select(column)
                        .na.drop()
                        .select(df_abs(col(column)-stats["mean"]).alias("delta"))
                        .agg(df_sum(col("delta"))).toPandas().iloc[0,0] / float(current_result["count"]))
        stats["type"] = "NUM"
        stats['p_zeros'] = stats['n_zeros'] / float(nrows)

        # Large histogram
        imgdata = BytesIO()
        hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins)
        figure = plt.figure(figsize=(6, 4))
        plot = plt.subplot()
        plt.bar(hist_data["left_edge"],
                hist_data["count"],
                width=hist_data["width"],
                facecolor='#337ab7')
        plot.set_ylabel("Frequency")
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(hist_data)

        return stats
Esempio n. 30
0
def occi_curl(base_url=None, url='/-/', authtype=None, ignoressl=None, user=None, passwd=None, mimetype=None, headers=[], post='', custom_request=''):
    """Send HTTP request

    :param string base_url: OCCI server URL (default: from config)
    :param string url: path element of the URL
    :param string authtype: authentication type (default: from config)
    :param bool ignoressl: ignore SSL problems (default: from config)
    :param string user: user name for 'basic' auth (default: from config)
    :param string passwd: password for 'basic' auth (default: from config)
    :param string mimetype: accepted mimetype (empty string='\*/\*')
    :param string headers[]: HTTP Headers
    :param string post: HTTP Body
    :param string custom_request: HTTP Request type (default: 'GET' or 'POST')

    :return: [body, header, HTTP status, content type]
    :rtype: [string[], string[], string, string]
    """
    global header

    if base_url is None:
        base_url = occi_config['url']
    if authtype is None:
        authtype = occi_config['authtype']
    if ignoressl is None:
        ignoressl = occi_config['ignoressl']
    if user is None:
        user = occi_config['user']
    if passwd is None:
        passwd = occi_config['passwd']
    if mimetype is None:
        mimetype = occi_config['mimetype']
    curlverbose = occi_config['curlverbose']

    if sys.version_info >= (3,):
        buffer = BytesIO()
    else:
        buffer = StringIO()
    curl = pycurl.Curl()
    curl.setopt(pycurl.URL, str(base_url + url))
    curl.setopt(pycurl.WRITEFUNCTION, buffer.write)

    # Disable check of SSL certificate
    if ignoressl:
        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        curl.setopt(pycurl.SSL_VERIFYHOST, 0)

    if 'capath' in occi_config and occi_config['capath']:
        curl.setopt(pycurl.CAPATH, occi_config['capath'])

    if 'cachain' in occi_config and occi_config['cachain']:
        curl.setopt(pycurl.CAINFO, occi_config['cachain'])

    # Name and password for basic auth (ONLY SUPPORTED YET)
    if authtype == "basic":
        curl.setopt(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC)
        curl.setopt(pycurl.USERPWD, "%s:%s" % (user, passwd))
    elif authtype == "x509":
        if 'cert' in occi_config and occi_config['cert']:
            curl.setopt(pycurl.SSLCERT, occi_config['cert'])
        if 'key' in occi_config and occi_config['key']:
            curl.setopt(pycurl.SSLKEY, occi_config['key'])
        if 'passphrase' in occi_config and occi_config['passphrase']:
            curl.setopt(pycurl.SSLCERTPASSWD, occi_config['passphrase'])

    # Verbose mode
    curl.setopt(pycurl.VERBOSE, curlverbose)

    curl.setopt(pycurl.CONNECTTIMEOUT, occi_config['connectiontimeout'])
    curl.setopt(pycurl.TIMEOUT, occi_config['timeout'])

    # Set appropriate mime type
    if mimetype:
        headers = ['Accept: %s' % mimetype] + headers
    else:
        headers = ['Accept: */*'] + headers

    # Set requested HTTP headers
    if headers:
        curl.setopt(pycurl.HTTPHEADER, headers)

    # HTTP header response
    if sys.version_info >= (3,):
        curl.setopt(pycurl.HEADERFUNCTION, get_header3)
    else:
        curl.setopt(pycurl.HEADERFUNCTION, get_header2)

    if post or custom_request == 'POST':
        curl.setopt(pycurl.POST, 1)
        if post:
            curl.setopt(pycurl.POSTFIELDS, post)
        else:
            curl.setopt(pycurl.POSTFIELDS, 'OK')
        if curlverbose:
            print "==== POST ==== "
            print post
            print "============== "

    if custom_request and custom_request != 'POST':
        curl.setopt(pycurl.CUSTOMREQUEST, custom_request)

    # DO IT!
    header = []
    try:
        curl.perform()
        curl.close()
    except pycurl.error as pe:
        raise occi.Error(pe)

    ## 'Server: Apache/2.2.22 (Debian)\r\n'
    h = {}
    for item in header:
        if re.match(r'.*:.*', item):
            key = re.sub(r':.*', r'', item.rstrip())
            value = re.sub(r'([^:]*):\s*(.*)', r'\2', item.rstrip())

            h[key] = value
        else:
            if re.match(r'^HTTP', item):
                http_status = item.rstrip()
    content_type = None
    if 'Content-Type' in h:
        content_type = re.split(';', h['Content-Type'])[0]

    body = buffer.getvalue()
    if sys.version_info >= (3,):
        encoding = 'iso-8859-1'
        if content_type:
            match = re.search(r';\s*charset=(\S+)', h['Content-Type'])
            if match:
                encoding = match.group(1)
        body = body.decode(encoding)

    return [body.splitlines(), header, http_status, content_type]
Esempio n. 31
0
    def describe_float_1d(df, column, current_result, nrows):
        if spark_version == "1.6+":
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       variance(col(column)).alias("variance"),
                                                       kurtosis(col(column)).alias("kurtosis"),
                                                       stddev(col(column)).alias("std"),
                                                       skewness(col(column)).alias("skewness"),
                                                       df_sum(col(column)).alias("sum")
                                                       ).toPandas()
        else:
            stats_df = df.select(column).na.drop().agg(mean(col(column)).alias("mean"),
                                                       df_min(col(column)).alias("min"),
                                                       df_max(col(column)).alias("max"),
                                                       df_sum(col(column)).alias("sum")
                                                       ).toPandas()
            stats_df["variance"] = df.select(column).na.drop().agg(variance_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]
            stats_df["std"] = np.sqrt(stats_df["variance"])
            stats_df["skewness"] = df.select(column).na.drop().agg(skewness_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]
            stats_df["kurtosis"] = df.select(column).na.drop().agg(kurtosis_custom(col(column),
                                                                                   stats_df["mean"].ix[0],
                                                                                   current_result["count"])).toPandas().ix[0][0]

        for x in np.array([0.05, 0.25, 0.5, 0.75, 0.95]):
            stats_df[pretty_name(x)] = (df.select(column)
                                        .na.drop()
                                        .selectExpr("percentile_approx(`{col}`,CAST({n} AS DOUBLE))"
                                                    .format(col=column, n=x)).toPandas().ix[:,0]
                                        )
        stats = stats_df.ix[0].copy()
        stats.name = column
        stats["range"] = stats["max"] - stats["min"]
        stats["iqr"] = stats[pretty_name(0.75)] - stats[pretty_name(0.25)]
        stats["cv"] = stats["std"] / float(stats["mean"])
        stats["mad"] = (df.select(column)
                        .na.drop()
                        .select(df_abs(col(column)-stats["mean"]).alias("delta"))
                        .agg(df_sum(col("delta"))).toPandas().ix[0,0] / float(current_result["count"]))
        stats["type"] = "NUM"
        stats['n_zeros'] = df.select(column).where(col(column)==0.0).count()
        stats['p_zeros'] = stats['n_zeros'] / float(nrows)

        # Large histogram
        imgdata = BytesIO()
        hist_data = create_hist_data(df, column, stats["min"], stats["max"], bins)
        figure = plt.figure(figsize=(6, 4))
        plot = plt.subplot()
        plt.bar(hist_data["left_edge"],
                hist_data["count"],
                width=hist_data["width"],
                facecolor='#337ab7')
        plot.set_ylabel("Frequency")
        plot.figure.subplots_adjust(left=0.15, right=0.95, top=0.9, bottom=0.1, wspace=0, hspace=0)
        plot.figure.savefig(imgdata)
        imgdata.seek(0)
        stats['histogram'] = 'data:image/png;base64,' + quote(base64.b64encode(imgdata.getvalue()))
        #TODO Think about writing this to disk instead of caching them in strings
        plt.close(plot.figure)

        stats['mini_histogram'] = mini_histogram(hist_data)

        return stats
Esempio n. 32
0
def point_cloud_to_buffer(pc, data_compression=None):
    fileobj = BytesIO()
    point_cloud_to_fileobj(pc, fileobj, data_compression)
    return fileobj.getvalue()
Esempio n. 33
0
def create_pdf(form=None, sig_image=None):
    """
	given form results - 
	"""
    def get_from_form(value):
        if form:
            return form.cleaned_data.get(value, "")
        else:
            return ""

    email = get_from_form("email")
    phone_number = get_from_form("phone")
    postcode = get_from_form("postcode")
    surname = get_from_form("surname")
    first_names = get_from_form("first_names")
    add_1 = get_from_form("add_1")
    add_2 = get_from_form("add_2")
    city = get_from_form("city")
    county = get_from_form("county")
    alt_add_1 = get_from_form("alt_add_1")
    alt_add_2 = get_from_form("alt_add_2")
    alt_postcode = get_from_form("alt_postcode")
    alt_reason = get_from_form("reason")

    file_name = file_name_safe("{0}_{1}".format(surname, first_names).lower())

    council = get_from_form("council")
    multi_council = get_from_form("multi_council")

    if city and county:
        add_3 = city + ", " + county
    elif city:
        add_3 = city
    elif county:
        add_3 = county

    until_further_notice = get_from_form("universal")
    one_date = get_from_form("single_day")
    date_range = get_from_form("time_range")
    date_of_birth = get_from_form("dob")

    packet = BytesIO()
    # create a new PDF with Reportlab

    can = canvas.Canvas(packet, pagesize=letter)

    # add signature of present
    if sig_image:
        can.drawImage(ImageReader(sig_image), 293, 155, mask='auto')
    # core address info

    can.drawString(40, 667, surname.upper())
    can.drawString(40, 620, first_names.upper())

    can.drawString(40, 561, add_1.upper())
    can.drawString(40, 541, add_2.upper())
    can.drawString(40, 521, add_3.upper())

    can.drawString(40, 390, email.upper())
    can.drawString(40, 451, phone_number)
    can.drawString(100, 499, postcode.upper())

    # alt address

    can.drawString(285, 646, alt_add_1.upper())
    can.drawString(285, 626, alt_add_2.upper())
    can.drawString(350, 606, alt_postcode.upper())
    can.drawString(285, 548, alt_reason.upper())

    # for how long we want this on

    if until_further_notice:
        can.drawString(30, 278, "X")
    if one_date:
        can.drawString(30, 248, "X")
        write_date(can.drawString, 153, 213, one_date)
    if date_range:
        can.drawString(30, 181, "X")
        write_date(can.drawString, 153, 156, date_range[0])
        write_date(can.drawString, 153, 129, date_range[1])

    # today's date

    write_date(can.drawString, 457, 44, datetime.datetime.now())

    # birthdate

    can.setFont("Helvetica", 30)
    write_date(can.drawString,
               310,
               350,
               date_of_birth,
               25,
               extra_spacing=[1, 3])

    can.save()

    packet.seek(0)
    new_pdf = PdfFileReader(packet)

    front_page = create_front_page(council, postcode, multi_council)

    source_file = os.path.join(settings.PROJECT_PATH, "resources", "form.pdf")
    existing_pdf = PdfFileReader(open(source_file, "rb"))

    output = PdfFileWriter()

    # add the several pages objects to one pdf

    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(front_page)
    output.addPage(page)

    # send the stream into a response and return it to the view

    outputStream = BytesIO()
    output.write(outputStream)

    response = HttpResponse(content_type='application/pdf')
    response[
        'Content-Disposition'] = 'attachment; filename="postal_vote_{0}.pdf"'.format(
            file_name)
    response.write(outputStream.getvalue())

    outputStream.close()
    if council:
        council.increment_count()
    return response