コード例 #1
0
def clusters_processing(datasets,
                        clusters,
                        cluster_ids,
                        api,
                        args,
                        resume,
                        fields=None,
                        session_file=None,
                        path=None,
                        log=None):
    """Creates or retrieves clusters from the input data

    """

    # If we have a dataset but not a model, we create the model if the no_model
    # flag hasn't been set up.
    if datasets and not (has_clusters(args) or args.no_cluster):
        cluster_ids = []
        clusters = []

        # Only 1 cluster per bigmler command at present
        number_of_clusters = 1
        if resume:
            resume, cluster_ids = c.checkpoint(c.are_clusters_created,
                                               path,
                                               number_of_clusters,
                                               debug=args.debug)
            if not resume:
                message = u.dated("Found %s clusters out of %s. Resuming.\n" %
                                  (len(cluster_ids), number_of_clusters))
                u.log_message(message,
                              log_file=session_file,
                              console=args.verbosity)

            clusters = cluster_ids
            number_of_clusters -= len(cluster_ids)

        cluster_args = r.set_cluster_args(args,
                                          fields=fields,
                                          cluster_fields=args.cluster_fields_)
        clusters, cluster_ids = r.create_clusters(datasets, clusters,
                                                  cluster_args, args, api,
                                                  path, session_file, log)
    # If a cluster is provided, we use it.
    elif args.cluster:
        cluster_ids = [args.cluster]
        clusters = cluster_ids[:]

    elif args.clusters or args.cluster_tag:
        clusters = cluster_ids[:]

    # If we are going to predict we must retrieve the clusters
    if cluster_ids and args.test_set:
        clusters, cluster_ids = r.get_clusters(clusters, args, api,
                                               session_file)

    return clusters, cluster_ids, resume
コード例 #2
0
def clusters_processing(datasets, clusters, cluster_ids,
                        api, args, resume, fields=None,
                        session_file=None, path=None,
                        log=None):
    """Creates or retrieves clusters from the input data

    """

    # If we have a dataset but not a model, we create the model if the no_model
    # flag hasn't been set up.
    if datasets and not (has_clusters(args) or args.no_cluster):
        cluster_ids = []
        clusters = []

        # Only 1 cluster per bigmler command at present
        number_of_clusters = 1
        if resume:
            resume, cluster_ids = c.checkpoint(
                c.are_clusters_created, path, number_of_clusters,
                debug=args.debug)
            if not resume:
                message = u.dated("Found %s clusters out of %s. Resuming.\n"
                                  % (len(cluster_ids),
                                     number_of_clusters))
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)

            clusters = cluster_ids
            number_of_clusters -= len(cluster_ids)

        cluster_args = r.set_cluster_args(args,
                                          fields=fields,
                                          cluster_fields=args.cluster_fields_)
        clusters, cluster_ids = r.create_clusters(datasets, clusters,
                                                  cluster_args, args, api,
                                                  path, session_file, log)
    # If a cluster is provided, we use it.
    elif args.cluster:
        cluster_ids = [args.cluster]
        clusters = cluster_ids[:]

    elif args.clusters or args.cluster_tag:
        clusters = cluster_ids[:]

    # If we are going to predict we must retrieve the clusters
    if cluster_ids and args.test_set:
        clusters, cluster_ids = r.get_clusters(clusters, args, api,
                                               session_file)

    return clusters, cluster_ids, resume