def clusters_processing(datasets, clusters, cluster_ids, api, args, resume, fields=None, session_file=None, path=None, log=None): """Creates or retrieves clusters from the input data """ # If we have a dataset but not a model, we create the model if the no_model # flag hasn't been set up. if datasets and not (has_clusters(args) or args.no_cluster): cluster_ids = [] clusters = [] # Only 1 cluster per bigmler command at present number_of_clusters = 1 if resume: resume, cluster_ids = c.checkpoint(c.are_clusters_created, path, number_of_clusters, debug=args.debug) if not resume: message = u.dated("Found %s clusters out of %s. Resuming.\n" % (len(cluster_ids), number_of_clusters)) u.log_message(message, log_file=session_file, console=args.verbosity) clusters = cluster_ids number_of_clusters -= len(cluster_ids) cluster_args = r.set_cluster_args(args, fields=fields, cluster_fields=args.cluster_fields_) clusters, cluster_ids = r.create_clusters(datasets, clusters, cluster_args, args, api, path, session_file, log) # If a cluster is provided, we use it. elif args.cluster: cluster_ids = [args.cluster] clusters = cluster_ids[:] elif args.clusters or args.cluster_tag: clusters = cluster_ids[:] # If we are going to predict we must retrieve the clusters if cluster_ids and args.test_set: clusters, cluster_ids = r.get_clusters(clusters, args, api, session_file) return clusters, cluster_ids, resume
def clusters_processing(datasets, clusters, cluster_ids, api, args, resume, fields=None, session_file=None, path=None, log=None): """Creates or retrieves clusters from the input data """ # If we have a dataset but not a model, we create the model if the no_model # flag hasn't been set up. if datasets and not (has_clusters(args) or args.no_cluster): cluster_ids = [] clusters = [] # Only 1 cluster per bigmler command at present number_of_clusters = 1 if resume: resume, cluster_ids = c.checkpoint( c.are_clusters_created, path, number_of_clusters, debug=args.debug) if not resume: message = u.dated("Found %s clusters out of %s. Resuming.\n" % (len(cluster_ids), number_of_clusters)) u.log_message(message, log_file=session_file, console=args.verbosity) clusters = cluster_ids number_of_clusters -= len(cluster_ids) cluster_args = r.set_cluster_args(args, fields=fields, cluster_fields=args.cluster_fields_) clusters, cluster_ids = r.create_clusters(datasets, clusters, cluster_args, args, api, path, session_file, log) # If a cluster is provided, we use it. elif args.cluster: cluster_ids = [args.cluster] clusters = cluster_ids[:] elif args.clusters or args.cluster_tag: clusters = cluster_ids[:] # If we are going to predict we must retrieve the clusters if cluster_ids and args.test_set: clusters, cluster_ids = r.get_clusters(clusters, args, api, session_file) return clusters, cluster_ids, resume