def fetch_partitions(cluster_wrapper, subprocess_module):
    '''
    Construct a mapping of SLURM partition name -> relevant nodearray information.
    There must be a one-to-one mapping of partition name to nodearray. If not, first one wins.
    '''
    partitions = OrderedDict()

    _, status_response = _retry_rest(cluster_wrapper.get_cluster_status, True)
    # TODO status_response's nodes doesn't include off nodes...
    _, nodes_response = _retry_rest(cluster_wrapper.get_nodes)

    for nodearray_status in status_response.nodearrays:
        nodearray_name = nodearray_status.name
        if not nodearray_name:
            logging.error("Name is not defined for nodearray. Skipping")
            continue

        nodearray_record = nodearray_status.nodearray
        if not nodearray_record:
            logging.error(
                "Nodearray record is not defined for nodearray status. Skipping"
            )
            continue

        slurm_config = nodearray_record.get("Configuration",
                                            {}).get("slurm", {})
        is_autoscale = slurm_config.get("autoscale")
        partition_name = slurm_config.get("partition", nodearray_name)

        if is_autoscale is None:
            logging.warn(
                "Nodearray %s does not define slurm.autoscale, skipping.",
                nodearray_name)
            continue

        if is_autoscale is False:
            logging.debug(
                "Nodearray %s explicitly defined slurm.autoscale=false, skipping.",
                nodearray_name)
            continue

        machine_types = nodearray_record.get("MachineType", "")
        if isinstance(machine_types, basestring):
            machine_types = machine_types.split(",")

        if len(machine_types) > 1:
            logging.warn("Ignoring multiple machine types for nodearray %s",
                         nodearray_name)

        machine_type = machine_types[0]
        if not machine_type:
            logging.warn("MachineType not defined for nodearray %s. Skipping",
                         nodearray_name)
            continue

        if partition_name in partitions:
            logging.warn(
                "Same partition defined for two different nodearrays. Ignoring nodearray %s",
                nodearray_name)
            continue

        bucket = None
        for b in nodearray_status.buckets:
            if b.definition.machine_type == machine_type:
                bucket = b
                break

        if bucket is None:
            logging.error(
                "Invalid status response - missing bucket with machinetype=='%s', %s",
                machine_type, json.dumps(status_response))
            return 1

        vm = bucket.virtual_machine
        if not vm:
            logging.error(
                "Invalid status response - missing virtualMachine definition with machinetype=='%s', %s",
                machine_type, json.dumps(status_response))
            return 1

        if bucket.max_count is None:
            logging.error(
                "No max_count defined for  machinetype=='%s'. Skipping",
                machine_type)
            continue

        if bucket.max_count <= 0:
            logging.info(
                "Bucket has a max_count <= 0, defined for machinetype=='%s'. Skipping",
                machine_type)
            continue

        max_scaleset_size = Record(nodearray_record.get("Azure", {})).get(
            "MaxScalesetSize", 40)

        is_hpc = str(slurm_config.get("hpc", True)).lower() == "true"

        if not is_hpc:
            max_scaleset_size = 2**31

        partitions[partition_name] = Partition(
            partition_name, nodearray_name, machine_type,
            slurm_config.get("default_partition", False), is_hpc,
            max_scaleset_size, vm.vcpu_count, vm.memory, bucket.max_count)

        existing_nodes = []

        for node in nodes_response.nodes:
            if node.get("Template") == nodearray_name:
                existing_nodes.append(node.get("Name"))

        if existing_nodes:
            sorted_nodes = sorted(existing_nodes,
                                  key=_get_sort_key_func(
                                      partitions[partition_name].is_hpc))
            partitions[partition_name].node_list = _to_hostlist(
                subprocess_module, ",".join(sorted_nodes))

    partitions_list = partitions.values()
    default_partitions = [p for p in partitions_list if p.is_default]

    if len(default_partitions) == 0:
        logging.warn("slurm.default_partition was not set on any nodearray.")

        # one nodearray, just assume it is the default
        if len(partitions_list) == 1:
            logging.info("Only one nodearray was defined, setting as default.")
            partitions_list[0].is_default = True

    elif len(default_partitions) > 1:
        # no partition is default
        logging.warn(
            "slurm.default_partition was set on more than one nodearray!")

    return partitions
def _create_nodes(partitions,
                  cluster_wrapper,
                  subprocess_module,
                  existing_policy=ExistingNodePolicy.Error):
    request = NodeCreationRequest()
    request.request_id = str(uuid4())
    request.sets = []

    nodearray_counts = {}

    for partition in partitions.itervalues():
        placement_group_base = "{}-{}-pg".format(partition.nodearray,
                                                 partition.machine_type)
        if partition.is_hpc:
            name_format = "{}-pg{}-%d"
        else:
            name_format = "{}-%d"

        current_name_offset = None
        current_pg_index = None

        expanded_node_list = []
        if partition.node_list:
            expanded_node_list = _from_hostlist(subprocess_module,
                                                partition.node_list)

        for index in range(partition.max_vm_count):
            placement_group_index = index / partition.max_scaleset_size
            placement_group = placement_group_base + str(placement_group_index)

            if current_pg_index != placement_group_index:
                current_name_offset = 1
                current_pg_index = placement_group_index

            name_index = (index % partition.max_scaleset_size) + 1
            node_name = name_format.format(
                partition.nodearray, placement_group_index) % (name_index)
            if node_name in expanded_node_list:
                # Don't allow recreation of nodes
                if existing_policy == ExistingNodePolicy.Error:
                    raise CyclecloudSlurmError(
                        "Node %s already exists. Please pass in --policy AllowExisting if you want to go ahead and create the nodes anyways."
                        % node_name)
                current_name_offset = name_index + 1
                continue

            # we are grouping these by their name offset
            name_offset = current_name_offset

            key = (partition.name, placement_group, placement_group_index,
                   name_offset)
            nodearray_counts[key] = nodearray_counts.get(key, 0) + 1

    for key, instance_count in sorted(nodearray_counts.iteritems(),
                                      key=lambda x: x[0]):
        partition_name, pg, pg_index, name_offset = key
        partition = partitions[partition_name]

        request_set = NodeCreationRequestSet()

        request_set.nodearray = partition.nodearray

        if partition.is_hpc:
            request_set.placement_group_id = pg

        request_set.count = instance_count

        request_set.definition = NodeCreationRequestSetDefinition()
        request_set.definition.machine_type = partition.machine_type

        # TODO should probably make this a util function
        if partition.is_hpc:
            request_set.name_format = "{}-pg{}-%d".format(
                partition.nodearray, pg_index)
        else:
            request_set.name_format = "{}-%d".format(partition.nodearray)

        request_set.name_offset = name_offset
        request_set.node_attributes = Record()
        request_set.node_attributes["StartAutomatically"] = False
        request_set.node_attributes["Fixed"] = True

        request.sets.append(request_set)

    if not request.sets:
        # they must have been attempting to create at least one node.
        if existing_policy == ExistingNodePolicy.Error:
            raise CyclecloudSlurmError("No nodes were created!")

        logging.info("No new nodes to create with policy %s", existing_policy)
        return

    # one shot, don't retry as this is not monotonic.
    logging.debug("Creation request: %s",
                  json.dumps(json.loads(request.json_encode()), indent=2))
    try:
        _, result = cluster_wrapper.create_nodes(request)
    except Exception as e:
        logging.debug(traceback.format_exc())
        try:
            # attempt to parse the json response from cyclecloud to give a better message
            response = json.loads(str(e))
            message = "%s: %s" % (response["Message"], response["Detail"])
        except:
            logging.debug(traceback.format_exc())
            message = str(e)

        raise CyclecloudSlurmError("Creation of nodes failed: %s" % message)

    num_created = sum([s.added for s in result.sets])

    if num_created == 0 and existing_policy == ExistingNodePolicy.Error:
        raise CyclecloudSlurmError("Did not create a single node!")

    for set_result in result.sets:
        logging.info("Added %s nodes. %s", set_result.added,
                     set_result.message)