コード例 #1
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
 def initialize_session_capture(self, name):
   if self.session_capture_name is not None:
     return
   if name is not None:
     if name == False:
       with logged_query('count-beacon', None, name='single-opt-out'):
         pass
     self.session_capture_name = name
     return
   # Search for a session-capture name or opt-out saved as a file:
   filename = "bayesdb-session-capture-opt.txt"
   searchdir = os.getcwd()
   while searchdir != os.path.dirname(searchdir):  # While not at root.
     try:
       with open(os.path.join(searchdir, filename), 'r') as optinfile:
         self.session_capture_name = optinfile.read()
         if self.session_capture_name == 'False':
           with logged_query('count-beacon', None, name='saved-opt-out'):
             self.session_capture_name = False
         break
     except IOError:
       pass
     searchdir = os.path.dirname(searchdir)
   # No init option specified, no choice file found. Force the choice.
   if self.session_capture_name is None:
     raise BLE(ValueError(
       "Please set session_capture_name option to quickstart\n"
       "  to either opt-in or opt-out of sending details of your usage of\n"
       "  this software to the MIT Probabilistic Computing Group.\n\n"
       "If you see this in one of our example notebooks,\n"
       "  return to the starting page, the Index.ipynb, to\n"
       "  make that choice."))  # TODO: Index.ipynb is a promise.
コード例 #2
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
  def analyze(self, models=100, minutes=0, iterations=0, checkpoint=0):
    '''Run analysis.

    models : integer
        The number of models bounds the accuracy of predictive probabilities.
        With ten models, then you get one decimal digit of interpretability,
        with a hundred models, you get two, and so on.
    minutes : integer
        How long you want to let it run.
    iterations : integer
        How many iterations to let it run.

    Returns:
        A report indicating how many models have seen how many iterations,
        and other info about model stability.
    '''
    self.check_representation()
    with logged_query(query_string='recipes.analyze',
                      name=self.session_capture_name):
      if models > 0:
        self.query('INITIALIZE %d MODELS IF NOT EXISTS FOR %s' %
              (models, self.generator_name))
        assert minutes == 0 or iterations == 0
      else:
        models = self.analysis_status().sum()
      if minutes > 0:
        if checkpoint == 0:
          checkpoint = max(1, int(minutes * models / 200))
        analyzer = ('ANALYZE %s FOR %d MINUTES CHECKPOINT %d ITERATION WAIT' %
                    (self.generator_name, minutes, checkpoint))
        with logged_query(query_string=analyzer,
                          name=self.session_capture_name,
                          bindings=self.query('SELECT * FROM %t')):
          self.query(analyzer)
      elif iterations > 0:
        if checkpoint == 0:
          checkpoint = max(1, int(iterations / 20))
        self.query(
            '''ANALYZE %s FOR %d ITERATIONS CHECKPOINT %d ITERATION WAIT''' % (
              self.generator_name, iterations, checkpoint))
      else:
        raise BLE(NotImplementedError('No default analysis strategy yet.'
                                      ' Please specify minutes or iterations.'))
    # itrs = self.per_model_analysis_status()
    # models_with_fewest_iterations =
    #    itrs[itrs['iterations'] == itrs.min('index').head(0)[0]].index.tolist()
    # TODO(gremio): run each model with as many iterations as it needs to get
    # up to where it needs to get to, if that's larger?
    # Nope. Vikash said there's no reason to think that's a good idea. Perhaps
    # even better to have some young models mixed in with the old ones.
    # I still think we should make some recommendation that scales for what
    # "the right thing" is, where that's something that at least isn't known to
    # suck.

    return self.analysis_status()
コード例 #3
0
ファイル: test_loggers.py プロジェクト: joshnr13/bayeslite
def test_logged_query_dataframe():
    from pandas import DataFrame
    df = DataFrame({
        'a': [1, 2.3, -4],  # complex(4, -5)],
        # Complex is broken, even with the default
        # handler special case.
        # See https://github.com/pydata/pandas/issues/12554
        'b': [float('nan'), None, 'N/A']
    })

    query_stub = StubCallable()
    post_stub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=post_stub)
    with loggers.logged_query(logger=lgr,
                              query_string='q',
                              bindings=(df, ),
                              name='n'):
        query_stub('inside')
    assert 1 == len(query_stub.calls)
    assert "(('inside',), {})" == str(query_stub.calls[0])
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(post_stub.calls)
    posted = post_stub.calls[0][1]['data']['session_json']
    data = json.loads(posted)
    df = data['entries'][0][2][1]
    assert [
        '{"a":{"0":1.0,"1":2.3,"2":-4.0},'
        # {"mathjs":"Complex","re":4,"im":-5}},' Complex broken. See above.
        '"b":{"0":null,"1":null,"2":"N\/A"}}'
    ] == df
コード例 #4
0
ファイル: test_loggers.py プロジェクト: joshnr13/bayeslite
def test_logged_query_no_name():
    stubc = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=stubc)
    with loggers.logged_query(query_string='q', bindings=('b', ), logger=lgr):
        stubc('inside')
    # Just the one call: nothing posted.
    assert "[(('inside',), {})]" == str(stubc.calls)
コード例 #5
0
def test_logged_query_no_name():
    stubc = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=stubc)
    with loggers.logged_query(query_string='q', bindings=('b',), logger=lgr):
        stubc('inside')
    # Just the one call: nothing posted.
    assert "[(('inside',), {})]" == str(stubc.calls)
コード例 #6
0
def analyze(self,
            models=100,
            minutes=0,
            iterations=0,
            checkpoint=0,
            generator_name=None):
    '''Run analysis.

  models : integer
      The number of models bounds the accuracy of predictive probabilities.
      With ten models, then you get one decimal digit of interpretability,
      with a hundred models, you get two, and so on.
  minutes : integer
      How long you want to let it run.
  iterations : integer
      How many iterations to let it run.

  Returns:
      A report indicating how many models have seen how many iterations,
      and other info about model stability.
  '''
    assert generator_name is not None
    if models > 0:
        self.query('INITIALIZE %d MODELS IF NOT EXISTS FOR %s' %
                   (models, generator_name))
        assert minutes == 0 or iterations == 0
    else:
        models = self.analysis_status(generator_name=generator_name).sum()
    if minutes > 0:
        if checkpoint == 0:
            checkpoint = max(1, int(minutes * models / 200))
            analyzer = (
                'ANALYZE %s FOR %d MINUTES CHECKPOINT %d ITERATION WAIT' %
                (generator_name, minutes, checkpoint))
            with logged_query(query_string=analyzer,
                              name=self.session_capture_name,
                              bindings=self.query('SELECT * FROM %t')):
                self.query(analyzer)
    elif iterations > 0:
        if checkpoint == 0:
            checkpoint = max(1, int(iterations / 20))
        self.query(
            '''ANALYZE %s FOR %d ITERATIONS CHECKPOINT %d ITERATION WAIT''' %
            (generator_name, iterations, checkpoint))
    else:
        raise NotImplementedError('No default analysis strategy yet. '
                                  'Please specify minutes or iterations.')
    # itrs = self.per_model_analysis_status()
    # models_with_fewest_iterations =
    #    itrs[itrs['iterations'] == itrs.min('index').head(0)[0]].index.tolist()
    # TODO(gremio): run each model with as many iterations as it needs to get
    # up to where it needs to get to, if that's larger?
    # Nope. Vikash said there's no reason to think that's a good idea. Perhaps
    # even better to have some young models mixed in with the old ones.
    # I still think we should make some recommendation that scales for what
    # "the right thing" is, where that's something that at least isn't known to
    # suck.

    return self.analysis_status(generator_name=generator_name)
コード例 #7
0
def test_logged_query_success():
    query_stub = StubCallable()
    post_stub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=post_stub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        query_stub('inside')
    assert 1 == len(query_stub.calls)
    assert "(('inside',), {})" == str(query_stub.calls[0])
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(post_stub.calls)
    check_logcall(post_stub.calls[0])
コード例 #8
0
ファイル: test_loggers.py プロジェクト: joshnr13/bayeslite
def test_logged_query_success():
    query_stub = StubCallable()
    post_stub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=post_stub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        query_stub('inside')
    assert 1 == len(query_stub.calls)
    assert "(('inside',), {})" == str(query_stub.calls[0])
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(post_stub.calls)
    check_logcall(post_stub.calls[0])
コード例 #9
0
ファイル: test_loggers.py プロジェクト: joshnr13/bayeslite
def test_logged_query_successful_log_failure():
    okstub = StubCallable()
    failstub = StubCallable(throw=NotImplementedError('foo'))
    lgr = loggers.CallHomeStatusLogger(post=failstub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        okstub('inside')
    assert 1 == len(okstub.calls)
    assert "[(('inside',), {})]" == str(okstub.calls)
    # There will have been a failure on another thread,
    # and it will have been ignored.
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(failstub.calls)
    check_logcall(failstub.calls[0])
コード例 #10
0
def test_logged_query_successful_log_failure():
    okstub = StubCallable()
    failstub = StubCallable(throw=NotImplementedError('foo'))
    lgr = loggers.CallHomeStatusLogger(post=failstub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        okstub('inside')
    assert 1 == len(okstub.calls)
    assert "[(('inside',), {})]" == str(okstub.calls)
    # There will have been a failure on another thread,
    # and it will have been ignored.
    time.sleep(0.2) # To let the call-home thread run, so this is less flaky.
    assert 1 == len(failstub.calls)
    check_logcall(failstub.calls[0])
コード例 #11
0
 def as_population_method(self, *args, **kwargs):
   with logged_query(query_string=fn.__code__.co_name,
                     bindings=(args, kwargs),
                     name=self.session_capture_name):
     self.check_representation()
     (dargs, dkwargs) = apply_argspec_transforms(self, xfrms, args, kwargs)
     result = None
     try:
       result = fn(*dargs, **dkwargs)
     except:
       self.logger.exception("")
       raise
     self.check_representation()
     return result
コード例 #12
0
 def as_population_method(self, *args, **kwargs):
     with logged_query(query_string=fn.__code__.co_name,
                       bindings=(args, kwargs),
                       name=self.session_capture_name):
         self.check_representation()
         (dargs,
          dkwargs) = apply_argspec_transforms(self, xfrms, args, kwargs)
         result = None
         try:
             result = fn(*dargs, **dkwargs)
         except:
             self.logger.exception("")
             raise
         self.check_representation()
         return result
コード例 #13
0
def test_logged_query_fail():
    failstub = StubCallable(throw=NotImplementedError('foo'))
    okstub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=okstub)
    try:
        with loggers.logged_query(logger=lgr, **THE_USUAL):
            failstub('die')
        assert False
    except NotImplementedError:
        pass
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(okstub.calls)
    check_logcall(okstub.calls[0])

    assert 1 == len(failstub.calls)
    assert "[(('die',), {})]" == str(failstub.calls)
コード例 #14
0
ファイル: test_loggers.py プロジェクト: lessc0de/bayeslite
def test_logged_query_fail():
    failstub = StubCallable(throw=NotImplementedError('foo'))
    okstub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=okstub)
    try:
        with loggers.logged_query(logger=lgr, **THE_USUAL):
            failstub('die')
        assert False
    except NotImplementedError:
        pass
    time.sleep(0.2) # To let the call-home thread run, so this is less flaky.
    assert 1 == len(okstub.calls)
    check_logcall(okstub.calls[0])

    assert 1 == len(failstub.calls)
    assert "[(('die',), {})]" == str(failstub.calls)
コード例 #15
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
  def quick_explore_vars(self, vars, nsimilar=20, plotfile='explore_vars'):
    """Show dependence probabilities and neighborhoods based on those.

    vars: list of strings
        At least two column names to look at dependence probabilities of,
        and to explore neighborhoods of.
    nsimilar: positive integer
        The size of the neighborhood to explore.
    plotfile: string pathname
        Where to save plots, if not displaying them on console.
    """
    self.check_representation()
    with logged_query(query_string='quick_explore_vars', bindings=(vars,),
                    name=self.session_capture_name):
      if len(vars) < 2:
        raise BLE(ValueError('Need to explore at least two variables.'))
      self.pairplot_vars(vars)
      query_columns = '''"%s"''' % '''", "'''.join(vars)
      deps = self.query('''ESTIMATE DEPENDENCE PROBABILITY
                           FROM PAIRWISE COLUMNS OF %s
                           FOR %s;''' % (self.generator_name, query_columns))
      deps.columns = ['genid', 'name0', 'name1', 'value']
      self.heatmap(deps, plotfile=plotfile)
      deps.columns = ['genid', 'name0', 'name1', 'value']
      triangle = deps[deps['name0'] < deps['name1']]
      triangle = triangle.sort_values(ascending=False, by=['value'])
      self.logger.result("Pairwise dependence probability for: %s\n%s\n\n",
                         query_columns, triangle)

      for col in vars:
        neighborhood = self.query(
        '''ESTIMATE *, DEPENDENCE PROBABILITY WITH "%s"
           AS "Probability of Dependence with %s"
           FROM COLUMNS OF %s
           ORDER BY "Probability of Dependence with %s"
           DESC LIMIT %d;'''
           % (col, col, self.generator_name, col, nsimilar))
        neighbor_columns = ('''"%s"''' %
                            '''", "'''.join(neighborhood["name"].tolist()))
        deps = self.query('''ESTIMATE DEPENDENCE PROBABILITY
            FROM PAIRWISE COLUMNS OF %s
            FOR %s;''' % (self.generator_name, neighbor_columns))
        deps.columns = ['genid', 'name0', 'name1', 'value']
        self.heatmap(deps, plotfile=(plotfile + "-" + col))
        self.logger.result("Pairwise dependence probability of %s with its " +
                           "strongest dependents:\n%s\n\n", col, neighborhood)
コード例 #16
0
ファイル: test_loggers.py プロジェクト: joshnr13/bayeslite
def test_logged_query_reporting_timeout():
    okstub = StubCallable()
    slumbertime = 1
    slowstub = StubCallable(sleep=slumbertime)
    start_time = time.time()
    lgr = loggers.CallHomeStatusLogger(post=slowstub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        okstub('inside')
    elapsed_time = time.time() - start_time
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    # Success after done.
    assert 1 == len(okstub.calls)
    assert "(('inside',), {})" == str(okstub.calls[0])
    assert elapsed_time < .9 * slumbertime  # Shouldn't even be close.

    # But the call should have registered already:
    assert 1 == len(slowstub.calls)
    check_logcall(slowstub.calls[0])
コード例 #17
0
ファイル: recipes.py プロジェクト: amswak/bdbcontrib
  def pairplot(self, cols, plotfile=None, colorby=None, **kwargs):
    """Wrap bdbcontrib.plot_utils.pairplot to show the given columns.

    Specifies bdb, query with the given columns, and generator_name:
    bdbcontrib_pairplot
    """
    if len(cols) < 1:
        raise ValueError('Pairplot at least one variable.')
    qcols = cols if colorby is None else set(cols + [colorby])
    query_columns = '''"%s"''' % '''", "'''.join(qcols)
    with logged_query(query_string='pairplot cols=?', bindings=(query_columns,),
                      name=self.session_capture_name):
      self.logger.plot(plotfile,
                       bdbcontrib.pairplot(self.bdb, '''SELECT %s FROM %s''' %
                                             (query_columns, self.name),
                                           generator_name=self.generator_name,
                                           colorby=colorby,
                                           **kwargs))
コード例 #18
0
def test_logged_query_reporting_timeout():
    okstub = StubCallable()
    slumbertime = 1
    slowstub = StubCallable(sleep=slumbertime)
    start_time = time.time()
    lgr = loggers.CallHomeStatusLogger(post=slowstub)
    with loggers.logged_query(logger=lgr, **THE_USUAL):
        okstub('inside')
    elapsed_time = time.time() - start_time
    time.sleep(0.2) # To let the call-home thread run, so this is less flaky.
    # Success after done.
    assert 1 == len(okstub.calls)
    assert "(('inside',), {})" == str(okstub.calls[0])
    assert elapsed_time < .9 * slumbertime  # Shouldn't even be close.

    # But the call should have registered already:
    assert 1 == len(slowstub.calls)
    check_logcall(slowstub.calls[0])
コード例 #19
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
  def heatmap(self, deps, selectors=None, plotfile=None, **kwargs):
    '''Show heatmaps for the given dependencies

    Parameters
    ----------
    deps : pandas.DataFrame(columns=['generator_id', 'name0', 'name1', 'value'])
        The result of a .q('ESTIMATE ... PAIRWISE ...')
        E.g., DEPENDENCE PROBABILITY, MUTUAL INFORMATION, COVARIANCE, etc.

    selectors : {str: lambda name --> bool}
        Rather than plot the full NxN matrix all together, make separate plots
        for each combination of these selectors, plotting them in sequence.
        If selectors are specified, the actual selector functions are values of
        a dict, and the keys are their names, for purposes of plot legends and
        filenames.
        E.g.,
          {'A-E': lambda x: bool(re.search(r'^[a-eA-E]', x[0])),
           'F-O': lambda x: bool(re.search(r'^[f-oF-O]', x[0])),
           'P-Z': lambda x: bool(re.search(r'^[p-zP-Z]', x[0]))}

    plotfile : str
        If a plotfile is specified, savefig to that file. If selectors are also
        specified, savefig to name1.name2.plotfile.

    **kwargs : dict
        Passed to zmatrix: vmin, vmax, row_ordering, col_ordering
    '''
    self.check_representation()
    with logged_query(query_string='heatmap(deps, selectors)',
                      bindings=(str(deps), repr(selectors)),
                      name=self.session_capture_name):
      hmap = plt.figure()
      if selectors is None:
        cmap = bdbcontrib.plot_utils.heatmap(self.bdb, df=deps, **kwargs)
        self.logger.plot(plotfile, cmap)
      else:
        selfns = [selectors[k] for k in sorted(selectors.keys())]
        reverse = dict([(v, k) for (k, v) in selectors.items()])
        for (cmap, sel1, sel2) in bdbcontrib.plot_utils.selected_heatmaps(
             self.bdb, df=deps, selectors=selfns, **kwargs):
          self.logger.plot("%s.%s.%s" % (
              reverse[sel1], reverse[sel2], plotfile),
                           cmap)
      return hmap
コード例 #20
0
 def as_population_method(self, *args, **kwargs):
   with logged_query(query_string=fn.__code__.co_name,
                     bindings=(args, kwargs),
                     name=self.session_capture_name):
     self.check_representation()
     (dargs, dkwargs) = apply_argspec_transforms(self, xfrms, args, kwargs)
     result = None
     try:
       result = fn(*dargs, **dkwargs)
     except:
       self.logger.exception("")
       raise
     from matplotlib import pyplot
     if isinstance(result, pyplot.Figure):
       self.logger.plot(result, kwargs.get('plotfile', None))
     else:
       self.logger.info(result)
     self.check_representation()
     return result
コード例 #21
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
  def quick_similar_rows(self, identify_row_by, nsimilar=10):
    """Explore rows similar to the identified one.

    identify_row_by : dict
        Dictionary of column names to their values. These will be turned into
        a WHERE clause in BQL, and must identify one unique row.
    nsimilar : positive integer
        The number of similar rows to retrieve.
    """
    self.check_representation()
    with logged_query(query_string='quick_similar_rows(id_by, n)',
                      bindings=(identify_row_by, nsimilar),
                      name=self.session_capture_name):
      import hashlib
      table_name = 'tmptbl_' + hashlib.md5('\x00'.join(
          [repr(identify_row_by), str(self.status)])).hexdigest()
      column_name = 'similarity_to_' + "__".join(
          re.sub(r'\W', '_', str(val)) for val in identify_row_by.values())
      query_params = []
      query_columns = []
      for k, v in identify_row_by.iteritems():
        query_columns.append('''%s = ? ''' % bayeslite.bql_quote_name(k))
        query_params.append(v)
      query_attrs = ' and '.join(query_columns)

      with self.bdb.savepoint():
        row_exists = self.query('SELECT COUNT(*) FROM %s WHERE %s;' %
                                (self.name, query_attrs))
        if row_exists.ix[0][0] != 1:
          raise BLE(NotImplementedError(
              'identify_row_by found %d rows instead of exactly 1 in %s.' %
              (row_exists.ix[0][0], self.csv_path)))
        creation_query = ('''CREATE TEMP TABLE IF NOT EXISTS %s AS ESTIMATE *,
                             SIMILARITY TO (%s) AS %s FROM %%g LIMIT %d;''' %
                          (table_name, query_attrs, column_name, nsimilar))
        self.query(creation_query, query_params)
        result = self.query('''SELECT * FROM %s ORDER BY %s DESC;''' %
                            (table_name, column_name))
      return result
コード例 #22
0
def test_logged_query_dataframe():
    from pandas import DataFrame
    df = DataFrame({'a': [1, 2.3, -4], # complex(4, -5)],
                    # Complex is broken, even with the default
                    # handler special case.
                    # See https://github.com/pydata/pandas/issues/12554
                    'b': [float('nan'), None, 'N/A']})

    query_stub = StubCallable()
    post_stub = StubCallable()
    lgr = loggers.CallHomeStatusLogger(post=post_stub)
    with loggers.logged_query(logger=lgr,
                              query_string='q', bindings= (df,), name='n'):
        query_stub('inside')
    assert 1 == len(query_stub.calls)
    assert "(('inside',), {})" == str(query_stub.calls[0])
    time.sleep(0.2)  # To let the call-home thread run, so this is less flaky.
    assert 1 == len(post_stub.calls)
    posted = post_stub.calls[0][1]['data']['session_json']
    data = json.loads(posted)
    df = data['entries'][0][2][1]
    assert ['{"a":{"0":1.0,"1":2.3,"2":-4.0},'
            # {"mathjs":"Complex","re":4,"im":-5}},' Complex broken. See above.
            '"b":{"0":null,"1":null,"2":"N\/A"}}'] == df
コード例 #23
0
ファイル: population.py プロジェクト: jayelm/bdbcontrib
  def __init__(self, name, csv_path=None, bdb_path=None, df=None, logger=None,
               session_capture_name=None):
    """Create a Population object, wrapping a bayeslite.BayesDB.

    name : str  REQUIRED.
        A name for the population, should use letters and underscores only.
        This will also be used as a table name in the bdb, and %t in queries
        will expand to this name. %g in queries will expand to the current
        population metamodel, also based on this name.
    csv_path : str
        The path to a comma-separated values file. If specified, will be used
        to populate the bdb. It must exist and be both readable and non-empty.
    df : pandas.DataFrame
        If specified, these data will be used to populate the bdb, superseding
        any csv_path. It must not be empty.
    bdb_path : str
        If specified, store data and analysis results here. If no other data
        source (csv or df) is specified, then it must already have been
        populated. If not specified, we will use a volatile in-memory bdb.
    logger : object
        Something on which we can call .info or .warn to send messages to the
        user. By default a bayeslite.loggers.BqlLogger, but could be QuietLogger
        (only results), SilentLogger (nothing), IpyLogger, CaptureLogger,
        LoggingLogger, or anything else that implements the BqlLogger interface.
    session_capture_name : String
        Signing up with your name and email and sending your session details
        to the MIT Probabilistic Computing Group helps build a community of
        support and helps improve your user experience. You can save your choice
        in a file called 'bayesdb-session-capture-opt.txt' in the directory
        where you run the software, or any parent directory. This option
        overrides any setting in such a file. Any string is interpreted as
        opting in to sending session details. False is interpreted as opting
        out. You must choose. If you choose to use an organization name or
        email, then please send a note to [email protected] to help us connect
        your sessions to you.

        If you encounter a bug, or something surprising, please include your
        session capture name in your report.

        If you opt out, you still allow us to count how often users opt out.

        DO NOT USE THIS SOFTWARE FOR HIPAA-COVERED, PERSONALLY IDENTIFIABLE,
        OR SIMILARLY SENSITIVE DATA! Opting out does not guarantee security.
    """
    Population.method_imports()
    assert re.match(r'\w+', name)
    assert df is not None or csv_path or bdb_path
    self.name = name
    self.generator_name = name + '_cc' # Because we use the default metamodel.
    self.csv_path = csv_path
    self.df = df
    self.bdb_path = bdb_path
    if logger is None:
      if 'IPython' in sys.modules:
        from bdbcontrib.loggers import IPYTHON_LOGGER as ipy
        self.logger = ipy
      else:
        self.logger = BqlLogger()
    else:
      self.logger = logger
    self.bdb = None
    self.status = None
    self.session_capture_name = None
    self.generators = []
    with logged_query('count-beacon', None, name='count-beacon'):
      self.initialize_session_capture(session_capture_name)
    self.initialize()
コード例 #24
0
    def __init__(self,
                 name,
                 csv_path=None,
                 bdb_path=None,
                 df=None,
                 logger=None,
                 session_capture_name=None):
        """Create a Population object, wrapping a bayeslite.BayesDB.

    name : str  REQUIRED.
        A name for the population, should use letters and underscores only.
        This will also be used as a table name in the bdb, and %t in queries
        will expand to this name. %g in queries will expand to the current
        population metamodel, also based on this name.
    csv_path : str
        The path to a comma-separated values file. If specified, will be used
        to populate the bdb. It must exist and be both readable and non-empty.
    df : pandas.DataFrame
        If specified, these data will be used to populate the bdb, superseding
        any csv_path. It must not be empty.
    bdb_path : str
        If specified, store data and analysis results here. If no other data
        source (csv or df) is specified, then it must already have been
        populated. If not specified, we will use a volatile in-memory bdb.
    logger : object
        Something on which we can call .info or .warn to send messages to the
        user. By default a bayeslite.loggers.BqlLogger, but could be QuietLogger
        (only results), SilentLogger (nothing), IpyLogger, CaptureLogger,
        LoggingLogger, or anything else that implements the BqlLogger interface.
    session_capture_name : String
        Signing up with your name and email and sending your session details
        to the MIT Probabilistic Computing Group helps build a community of
        support and helps improve your user experience. You can save your choice
        in a file called 'bayesdb-session-capture-opt.txt' in the directory
        where you run the software, or any parent directory. This option
        overrides any setting in such a file. Any string is interpreted as
        opting in to sending session details. False is interpreted as opting
        out. You must choose. If you choose to use an organization name or
        email, then please send a note to [email protected] to help us connect
        your sessions to you.

        If you encounter a bug, or something surprising, please include your
        session capture name in your report.

        If you opt out, you still allow us to count how often users opt out.

        DO NOT USE THIS SOFTWARE FOR HIPAA-COVERED, PERSONALLY IDENTIFIABLE,
        OR SIMILARLY SENSITIVE DATA! Opting out does not guarantee security.
    """
        Population.method_imports()
        assert re.match(r'\w+', name)
        assert df is not None or csv_path or bdb_path
        self.name = name
        self.generator_name = name + '_cc'  # Because we use the default metamodel.
        self.csv_path = csv_path
        self.df = df
        self.bdb_path = bdb_path
        if logger is None:
            if 'IPython' in sys.modules:
                from bdbcontrib.loggers import IPYTHON_LOGGER as ipy
                self.logger = ipy
            else:
                self.logger = BqlLogger()
        else:
            self.logger = logger
        self.bdb = None
        self.status = None
        self.session_capture_name = None
        self.generators = []
        with logged_query('count-beacon', None, name='count-beacon'):
            self.initialize_session_capture(session_capture_name)
        self.initialize()
コード例 #25
0
ファイル: recipes.py プロジェクト: jayelm/bdbcontrib
 def q(self, query_string, *bindings):
   '''help_for_query'''
   with logged_query(query_string, bindings, name=self.session_capture_name):
     return self.query(query_string, *bindings)