Example #1
0
def main():
    '''
        Clustering and data mining resources.
    '''
    opts = options.options()

    # Set document database
    document = motor.MotorClient(opts.mongo_host, opts.mongo_port).mango

    # Set default database
    db = document

    if opts.ensure_indexes:
        logging.info('Ensuring indexes...')
        indexes.ensure_indexes(db)
        logging.info('DONE.')

    # base url
    base_url = opts.base_url

    # Simon
    application = web.Application(
        [
            # clustering model dataset
            (r'/clusters/(?P<cluster_uuid>.+)/trainers/(?P<trainer_uuid>.+)/?', kmeans.TrainerHandler),
            (r'/clusters/(?P<cluster_uuid>.+)/trainers/?', kmeans.TrainerHandler),
            # clustering model prediction
            (r'/clusters/(?P<cluster_uuid>.+)/predictions/(?P<prediction_uuid>.+)/?', kmeans.PredictionHandler),
            (r'/clusters/(?P<cluster_uuid>.+)/predictions/?', kmeans.PredictionHandler),
            # clustering models
            (r'/clusters/(?P<cluster_uuid>.+)/?', kmeans.Handler),
            (r'/clusters/?', kmeans.Handler)
        ],
        # system database
        db=db,
        # application timezone
        tz=arrow.now(opts.timezone),
        # application domain
        domain=opts.domain,
        # Latent Semantic Analysis (number of components)
        lsa=opts.lsa,
        # Number of centroid seed
        number_seeds=opts.number_seeds,
        # Maximum number of iterations over the complete dataset
        max_iter=opts.max_iterations,
        # Number of random initializations that are tried.
        number_init=opts.number_init,
        # Control early stopping based on the consecutive number of mini batches
        # that does not yield an improvement on the smoothed inertia.
        max_no_improvement=opts.max_no_improvement,
        # Size of the mini batches.
        batch_size=opts.batch_size,
        # Use ordinary k-means algorithm (in batch mode)
        minibatch=opts.minibatch,
        # Print progress reports inside k-means algorithm
        verbose=opts.verbose,
        # Inverse Document Frequency feature weighting
        idf=opts.idf,
        # Use a hashing feature vectorizer
        use_hashing=opts.use_hashing,
        # Maximum number of features (dimensions) to extract from text
        max_features=opts.max_features,
        # Pagination page size
        page_size=opts.page_size
    )

    # Set sql database
    # application.sql = momoko.Pool(
    #    dsn='dbname=simon user=postgres',
    #    size=1
    # )

    # Set graph database
    # application.graph = Neo4J?

    # Set key-value database
    # application.kvalue = Riak?

    # Setting up simon server process
    application.listen(opts.port)
    logging.info('Listening daemon swarm on http://%s:%s' % (
        opts.host, opts.port))

    IOLoop.instance().start()
    '''
Example #2
0
if __name__ == '__main__':
    '''
        Clustering and data mining system.
    '''

    _options = options.options()

    # Set document database
    document = motor.MotorClient().open_sync().simon

    # Set default database
    db = document

    if _options.ensure_indexes:
        logging.info('Ensuring indexes...')
        indexes.ensure_indexes(db)
        logging.info('DONE.')

    # base url
    base_url = _options.base_url

    # Simon
    application = web.Application(
        [
            # system status
            (r'/', IndexHandler),

            # clustering model dataset
            (r'/clusters/(?P<cluster_uuid>.+)/trainers/(?P<trainer_uuid>.+)/?', kmeans.TrainerHandler),
            (r'/clusters/(?P<cluster_uuid>.+)/trainers/?', kmeans.TrainerHandler),
            # clustering model prediction