Exemple #1
0
    def _get_train_costs(self):
        # Get train scores
        monary = Monary(host=self.mongo_host)
        iterations, loss, source_id = monary.query(
            db=self.mongo_db,
            coll='train_scores',
            query={'experiment_id': self.experiment_id},
            fields=['iteration', 'loss', 'source_id'],
            types=['int32', 'float32', 'int8']
        )

        scores_df = pd.DataFrame(
            {'loss': loss, 'source_id': source_id}, index=iterations)
        scores_df = scores_df.sort_index()

        return scores_df
Exemple #2
0
    def _get_train_costs(self):
        # Get train scores
        monary = Monary(host=self.mongo_host)
        iterations, loss, source_id = monary.query(
            db=self.mongo_db,
            coll='train_scores',
            query={'experiment_id': self.experiment_id},
            fields=['iteration', 'loss', 'source_id'],
            types=['int32', 'float32', 'int8'])

        scores_df = pd.DataFrame({
            'loss': loss,
            'source_id': source_id
        },
                                 index=iterations)
        scores_df = scores_df.sort_index()

        return scores_df
Exemple #3
0
    def _plot_validation_scores_for_source_and_fold(self, ax, source_id, fold,
                                                    show_axes_labels,
                                                    show_scales):
        fields = ['iteration'] + ['scores.' + metric_name for metric_name in
                                  self.validation_metric_names]
        monary = Monary(host=self.mongo_host)
        result = monary.query(
            db=self.mongo_db,
            coll='validation_scores',
            query={
                'experiment_id': self.experiment_id,
                'source_id': source_id,
                'fold': fold
            },
            fields=fields,
            types=['int32'] + ['float32'] * len(self.validation_metric_names)
        )

        index = result[0]
        data = {metric_name: result[i+1] for i, metric_name in
                enumerate(self.validation_metric_names)}
        df = pd.DataFrame(data, index=index)
        df = df.sort_index()
        df = self._downsample(df)

        # Create multiple independent axes.  Adapted from Joe Kington's answer:
        # http://stackoverflow.com/a/7734614

        # Colours
        n = len(self.validation_metric_names)
        colors = get_colors(n)

        # Twin the x-axis to make independent y-axes.
        axes = [ax]
        for metric_name in self.validation_metric_names[1:]:
            axes.append(ax.twinx())

        SEP = 0.2
        if show_scales:
            for i, axis in enumerate(axes):
                axis.yaxis.tick_right()
                if i != 0:
                    # To make the border of the right-most axis visible,
                    # we need to turn the frame on. This hides the other plots,
                    # however, so we need to turn its fill off.
                    axis.set_frame_on(True)
                    axis.patch.set_visible(False)
                    # Move the last y-axes spines over to the right.
                    axis.spines['right'].set_position(
                        ('axes', 1 + (SEP * i)))
        else:
            for axis in axes:
                axis.tick_params(labelright=False, labelleft=False)
                axis.yaxis.set_ticks_position('none')
                axis.spines['right'].set_visible(False)

        for axis in axes:
            for spine in ['top', 'left', 'bottom']:
                axis.spines[spine].set_visible(False)
            axis.xaxis.set_ticks_position('none')

        lines = []
        for i, (axis, metric_name, color) in enumerate(
                zip(axes, self.validation_metric_names, colors)):
            axis.tick_params(axis='y', colors=color, direction='out')
            label = metric_name.replace("regression.", "")
            label = label.replace("classification_", "")
            label = label.replace("_", " ")
            label = label.replace(".", " ")
            label = label.replace(" ", "\n")
            line, = axis.plot(
                df.index, df[metric_name].values, color=color, label=label)
            if show_axes_labels and show_scales:
                axis.set_ylabel(
                    label, color=color, rotation=0, fontsize=8, va='bottom')
                if i == 0:
                    coords = (1.05, 1.1)
                else:
                    coords = (1.05 + (SEP * i), 1.1)
                axis.yaxis.set_label_coords(*coords)
            lines.append(line)

        self._last_iteration_processed['validation'] = index[-1]
        return lines
from monary import Monary
import numpy as np
import pandas as pd
import time

mon = Monary()

columns = [
    'properties.total_residential_units',
    'properties.total_job_spaces',
    'properties.parcel_id',
    'properties.max_dua',
    'properties.max_far'
]

t1 = time.time()

numpy_arrays = mon.query(
    'togethermap',
    'places',
    {'collectionId': 'ZC7yyAyA8jkDFnRtf'},
    columns,
    ['float32']*len(columns)
)

df = np.matrix(numpy_arrays).transpose()
df = pd.DataFrame(df, columns=columns)

print time.time()-t1
print df.describe()
Exemple #5
0
from monary import Monary
import numpy as np
import pandas as pd
import time

mon = Monary()

columns = [
    'properties.total_residential_units', 'properties.total_job_spaces',
    'properties.parcel_id', 'properties.max_dua', 'properties.max_far'
]

t1 = time.time()

numpy_arrays = mon.query('togethermap', 'places',
                         {'collectionId': 'ZC7yyAyA8jkDFnRtf'}, columns,
                         ['float32'] * len(columns))

df = np.matrix(numpy_arrays).transpose()
df = pd.DataFrame(df, columns=columns)

print time.time() - t1
print df.describe()
Exemple #6
0
    def _plot_validation_scores_for_source_and_fold(self, ax, source_id, fold,
                                                    show_axes_labels,
                                                    show_scales):
        fields = ['iteration'] + [
            'scores.' + metric_name
            for metric_name in self.validation_metric_names
        ]
        monary = Monary(host=self.mongo_host)
        result = monary.query(db=self.mongo_db,
                              coll='validation_scores',
                              query={
                                  'experiment_id': self.experiment_id,
                                  'source_id': source_id,
                                  'fold': fold
                              },
                              fields=fields,
                              types=['int32'] +
                              ['float32'] * len(self.validation_metric_names))

        index = result[0]
        data = {
            metric_name: result[i + 1]
            for i, metric_name in enumerate(self.validation_metric_names)
        }
        df = pd.DataFrame(data, index=index)
        df = df.sort_index()
        df = self._downsample(df)

        # Create multiple independent axes.  Adapted from Joe Kington's answer:
        # http://stackoverflow.com/a/7734614

        # Colours
        n = len(self.validation_metric_names)
        colors = get_colors(n)

        # Twin the x-axis to make independent y-axes.
        axes = [ax]
        for metric_name in self.validation_metric_names[1:]:
            axes.append(ax.twinx())

        SEP = 0.2
        if show_scales:
            for i, axis in enumerate(axes):
                axis.yaxis.tick_right()
                if i != 0:
                    # To make the border of the right-most axis visible,
                    # we need to turn the frame on. This hides the other plots,
                    # however, so we need to turn its fill off.
                    axis.set_frame_on(True)
                    axis.patch.set_visible(False)
                    # Move the last y-axes spines over to the right.
                    axis.spines['right'].set_position(('axes', 1 + (SEP * i)))
        else:
            for axis in axes:
                axis.tick_params(labelright=False, labelleft=False)
                axis.yaxis.set_ticks_position('none')
                axis.spines['right'].set_visible(False)

        for axis in axes:
            for spine in ['top', 'left', 'bottom']:
                axis.spines[spine].set_visible(False)
            axis.xaxis.set_ticks_position('none')

        lines = []
        for i, (axis, metric_name, color) in enumerate(
                zip(axes, self.validation_metric_names, colors)):
            axis.tick_params(axis='y', colors=color, direction='out')
            label = metric_name.replace("regression.", "")
            label = label.replace("classification_", "")
            label = label.replace("_", " ")
            label = label.replace(".", " ")
            label = label.replace(" ", "\n")
            line, = axis.plot(df.index,
                              df[metric_name].values,
                              color=color,
                              label=label)
            if show_axes_labels and show_scales:
                axis.set_ylabel(label,
                                color=color,
                                rotation=0,
                                fontsize=8,
                                va='bottom')
                if i == 0:
                    coords = (1.05, 1.1)
                else:
                    coords = (1.05 + (SEP * i), 1.1)
                axis.yaxis.set_label_coords(*coords)
            lines.append(line)

        self._last_iteration_processed['validation'] = index[-1]
        return lines
Exemple #7
0
#script to save csv with comment data
import pymongo, urllib3
from pymongo import MongoClient
from hpfunctions import getUrl, stripWhite,stripWhiteList
from lxml import html
client=MongoClient()
db=client['hp']
import pandas
import numpy
from monary import Monary 

mon=Monary()

print 'available columns'

for i in db.comStats.find()[0]:print i

columns = ['typos', 'avSyllables', 'nPunct']
numpy_arrays = mon.query('hp', 
                        'comStats', 
                        {},
                        columns, 
                        ['int32', 'int32', 'int32:20'])

df = numpy.matrix(numpy_arrays).transpose() 
df = pandas.DataFrame(df, columns=columns)
print 'starting to write file pandasTest.csv'
df.to_csv('pandasTest.csv', sep='\t')