コード例 #1
0
    def vcenterdatacenter_get_clusters(self,
                                       label,
                                       vcenter,
                                       tenantname,
                                       xml=False):
        '''
        Makes a REST API call to retrieve details of a vcenterdatacenter
        based on its UUID
        '''
        uri = self.vcenterdatacenter_query(label, vcenter, tenantname)

        (s, h) = common.service_json_request(
            self.__ipAddr, self.__port, "GET",
            VcenterDatacenter.URI_DATACENTER_CLUSTERS.format(uri), None, None,
            xml)

        o = common.json_decode(s)

        from cluster import Cluster
        obj = Cluster(self.__ipAddr, self.__port)

        dtlslst = obj.cluster_get_details_list(o['cluster'])

        return dtlslst
コード例 #2
0
    def run(self, points, random_seed):
        random.seed(random_seed)
        # Randomly initiate clusters
        self._clusters = []
        initial_centroids = random.sample(points, self._k)
        for i, initial_centroid in enumerate(initial_centroids):
            new_cluster = Cluster(i, initial_centroid)
            self._clusters.append(new_cluster)

        for current_iteration in range(self._num_iterations):
            # Clear all clusters
            for cluster in self._clusters:
                cluster.remove_point()

            # Re-assign all points
            for point in points:
                distances_to_clusters = {x.id: point.distance_to(x.centroid) for x in self._clusters}
                closest_cluster_id = sorted(distances_to_clusters.keys(), key=lambda x: distances_to_clusters[x])[0]
                self._clusters[closest_cluster_id].add_point(point)

            # Recompute centroids and look if change happened
            changes = [cluster.compute_centroid() for cluster in self._clusters]
            if sum(changes) == 0:  # if everyone is False then sum is 0
                break
コード例 #3
0
def cluster_by_kmeans():
    """
    Use KMeans to group similar words.
    """
    import os, sys
    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
    from cluster import Cluster
    from collections import defaultdict
    import random

    clustered_corpus = []
    corpus = _unpickle(config['corpus']['fun2vec'])
    clf = Cluster(config['cluster']['kmeans'])
    m = Model('word2vec')
    cluster = dict(zip(m.vocab, clf.predict(m.vector)))
    del m, clf # memory friendly

    for i, words in enumerate(corpus, 1):
        centroids = defaultdict(list)
        for word in words:
            label = cluster.get(word)
            centroids[label].append(word)
        # if there are the words which have the same labels, randomly choose one of them and remove others.
        clustered_words = [random.choice(v) if k is not None and len(v) >= 2 else v[0] for k, v in centroids.items()]

        if len(clustered_words) >= 2:
            clustered_corpus.append(clustered_words)
            if i < 100:
                print('----------------------------')
                print(words)
                print(clustered_words)

        if i % 10000 == 0:
            _logger.info(f'Finished {i} profiles')
    _pickle(clustered_corpus, config['corpus']['fun2vec_clustered'])
    _logger.info(f"Saved corpus of {len(clustered_corpus)} profiles in {config['corpus']['fun2vec_clustered']}")
コード例 #4
0
    def __init__(self,
                 clustername,
                 ra=None,
                 dec=None,
                 diam=None,
                 dist=None,
                 pmra=None,
                 pmdec=None,
                 pmradius=5.0,
                 depth=0.5):
        '''
        Constructor
        '''
        self.clustername = clustername
        self.cluster = Cluster(clustername)
        self.coordinates = self.cluster.coordinates

        print(self.cluster)

        def setnotNone(default, clustervalue):
            if default is None:
                if clustervalue is None:
                    raise ValueError
                return clustervalue
            else:
                return default

        self.ra = setnotNone(ra, self.cluster['ra'])
        self.dec = setnotNone(dec, self.cluster['dec'])
        self.diam = setnotNone(diam, self.cluster['diam'])
        self.dist = setnotNone(dist, self.cluster['d'])
        self.pmra = setnotNone(pmra, self.cluster['pmra'])
        self.pmdec = setnotNone(pmdec, self.cluster['pmdec'])

        self.pmradius = pmradius
        self.depth = depth
コード例 #5
0
    def solve(self, data, init_method=0):
        self.data = data

        #print (
        #            'Solving for %d clusters with %d data points' % (
        #                self.count, len(self.data)))

        if len(self.data) < self.count:
            raise Exception('Fewer data points than expected clusters')

        self.initial_clusters(init_method)

        prior_centroids = []
        while self.centroids != prior_centroids:
            prior_centroids = self.centroids[:]
            self.centroids = [
                self.centroid(c.get_items()) for c in self.clusters
            ]
            self.clusters = [Cluster(c) for c in self.centroids]
            self.assign_to_clusters()

        if min([len(c.get_items()) for c in self.clusters]) == 0:
            print('Error clustering: empty cluster(s)')
            print([len(c.get_items()) for c in self.clusters])
コード例 #6
0
 def __init__(self):
     super(ClusterStack, self).__init__(Cluster(), 'config_records.py')
コード例 #7
0
def save_avg_result(*option):
    """
    Save results to file
    :param option: optional inputs can be: save_avg_result(pat_path) or save_avg_result(pat_path, bb_log_path) or 
    save_avg_result(pat_path, bb_log_path, BB_Phase) 
    :return: None
    """
    if len(option) == 1:  # only pat_path is assigned
        result_file = option[0] + os.sep + 'results.log'
        attrib_avg = Cluster(option[0]).get_cluster_data_by_time([0], [0],
                                                                 False)
        with open(result_file, 'w') as f:
            f.write('*' * 110 + '\n')
            f.write('All nodes average utilization\n')
            f.write('*' * 110 + '\n')
            for key in attrib_avg.keys():
                f.write('All nodes average {0} utilization: \n {1} \n'.format(
                    key,
                    attrib_avg.get(key).to_string(index=False)))
                f.write('.' * 75 + '\n')
        print 'Results have been saved to: {0}'.format(result_file)
        return
    elif len(option) == 2:  # pat_path and bb_log are assigned
        result_file = option[0] + os.sep + 'results.log'
        phase_name = ('BENCHMARK', 'LOAD_TEST', 'POWER_TEST',
                      'THROUGHPUT_TEST_1', 'VALIDATE_POWER_TEST',
                      'VALIDATE_THROUGHPUT_TEST_1')
        with open(result_file, 'w') as f:
            for phase in phase_name[0:4]:
                start_stamp, end_stamp = BBParse(
                    option[1]).get_stamp_by_phase(phase)
                start_time = datetime.fromtimestamp(start_stamp).strftime(
                    '%Y-%m-%d %H:%M:%S')
                end_time = datetime.fromtimestamp(end_stamp).strftime(
                    '%Y-%m-%d %H:%M:%S')
                attrib_avg = Cluster(option[0]).get_cluster_avg(
                    start_stamp, end_stamp)
                f.write('*' * 110 + '\n')
                f.write(
                    'All nodes average utilization for phase {0} between {1} and {2}:\n'
                    .format(phase, start_time, end_time))
                f.write('*' * 110 + '\n')
                for key in attrib_avg.keys():
                    f.write(
                        'All nodes average {0} utilization: \n {1} \n'.format(
                            key,
                            attrib_avg.get(key).to_string(index=False)))
                    f.write('.' * 75 + '\n')
        print 'Results have been saved to: {0}'.format(result_file)
        return
    elif len(option) == 3:  # pat_path, bb_log and phase_name are assigned
        result_file = option[0] + os.sep + 'results.log'
        with open(result_file, 'w') as f:
            start_stamp, end_stamp = BBParse(option[1]).get_stamp_by_phase(
                option[2])
            start_time = datetime.fromtimestamp(start_stamp).strftime(
                '%Y-%m-%d %H:%M:%S')
            end_time = datetime.fromtimestamp(end_stamp).strftime(
                '%Y-%m-%d %H:%M:%S')
            attrib_avg = Cluster(option[0]).get_cluster_avg(
                start_stamp, end_stamp)
            f.write('*' * 110 + '\n')
            f.write(
                'All nodes average utilization for phase {0} between {1} and {2}:\n'
                .format(option[2], start_time, end_time))
            f.write('*' * 110 + '\n')
            for key in attrib_avg.keys():
                f.write('All nodes average {0} utilization: \n {1} \n'.format(
                    key,
                    attrib_avg.get(key).to_string(index=False)))
                f.write('.' * 75 + '\n')
        print 'Results have been saved to: {0}'.format(result_file)
        return
    else:
        print 'Usage: save_avg_result(pat_path) or save_avg_result(pat_path, bb_log_path) or ' \
              'save_avg_result(pat_path, bb_log_path, BB_Phase)\n'
        exit(-1)
コード例 #8
0
ファイル: main.py プロジェクト: javierdemartin/neural-bikes
# Javier de Martin Gil @ 2020

from Data_mgmt import Data_mgmt
from neural_model import Neural_Model
import os
import sys
from cluster import Cluster

# Parameters
# [1]: CITY
# [2]: INFLUX DB PASSWORD
# [3]: LOCAL DATA OR QUERY REMOTE DB

cluster = Cluster(city=sys.argv[1])
labels = cluster.do_cluster()
data = Data_mgmt(city=sys.argv[1])
dataset = data.read_dataset()

data.iterate(dataset=dataset, cluster_data=labels)
data.supervised_learning()
data.split_sets(0.8, 0.15, 0.5)

m = Neural_Model()
m.fit_model()
m.test_models_score()

dataToPredict = data.prepare_tomorrow()

m.tomorrow(data=dataToPredict, append_to_db=False)
コード例 #9
0
ファイル: server.py プロジェクト: ZJUSCT/ZJUSPC-server
 def __init__(self):
     self.cluster = Cluster()
     self.nodeList = {}
     self.nodeInfo = []
     self.data = []
コード例 #10
0
from log_parser import LogParser
from cluster import Cluster, NodeFactory
from converter import LogsToEventsConverter
from events import EventLoop

N = 4
WIDTH = HEIGHT = 600
TITLE = "Fast Paxos"
LOGFILE = "../../../logs/test.log"

factory = NodeFactory((20, 20), 8)
cluster = Cluster(TITLE, N, (WIDTH, HEIGHT), factory)

loop = EventLoop()

logs = LogParser.parse(LOGFILE)
converter = LogsToEventsConverter(cluster, loop)
converter.convert(logs)

loop.run()

cluster.close()
コード例 #11
0
ファイル: main.py プロジェクト: xiandong79/FSC_code
                       initial_ownership).topTradingCycles()
end_time = datetime.now()
print("mttc_allocation =", mttc_allocation)
"""
register the 'Path' of input data (job.json, stage.json, runtime.json)
"""
json_dir = "./"
"""
Run this simulation with initial_ownership
"""
machines = [Machine(i, core_per_machine[i]) for i in range(0, machine_number)]
users = [
    User(i, initial_ownership[i], preference_value[i])
    for i in range(user_number)
]
cluster = Cluster(machines, users, num_core)
simulator = Simulator(cluster,
                      preference_value,
                      json_dir,
                      user_number,
                      flag="Initial")

cluster.totalJobNumber = 100
simulator.scheduler.scheduler_type = "isolated"
simulator.run()
"""
Run this simulation with Choosy + DS scheduling
"""

machines = [Machine(i, core_per_machine[i]) for i in range(0, machine_number)]
users = [
コード例 #12
0
    def __init__(self, config_file=None, cluster=None):
        # type: (str, Cluster) -> None
        """
        Constructor for Deploy
        :param config_file: location of the rs-conf.yml file
        :param cluster: A cluster object to initialize with
        """

        # Read main configuration file
        with open(config_file, 'r') as config_yaml_file:
            config_dict = yaml.load(config_yaml_file)

        self.redstack_version = config_dict['redstack_version']
        self.directory_base = config_dict['deployment_directory_base']
        self.installation_directory = config_dict['installation_directory']
        self.cookbook_directory = config_dict['cookbook_directory']

        self.log_path = config_dict['log_path']
        self.log_level = config_dict['log_level']

        self.stack_name = config_dict['stack_name']

        self.auth_version = config_dict['auth_version']
        self.image_name = config_dict['image_name']
        self.availability_zone = config_dict['availability_zone']
        self.region = config_dict['region']

        self.openstack_auth_url = config_dict['openstack_auth_url']
        self.external_network_id = config_dict['external_network_id']

        self.try_existing_network = config_dict['try_existing_network']
        self.subnet_cidr = config_dict['subnet_cidr']
        self.expose_ui_ssh = config_dict['expose_ui_ssh']
        self.subnet_dns_nameservers = config_dict['subnet_dns_nameservers']

        self.cacert = config_dict['cacert']

        self.ost_username = config_dict['ost_username']
        self.ost_password = config_dict['ost_password']
        self.ost_project_id = config_dict['ost_project_id']
        self.ost_project_name = config_dict['ost_project_name']
        self.ost_domain = config_dict['ost_domain']

        self.use_existing_openstack = config_dict['use_existing_openstack']

        self.key_name = config_dict['key_name']

        self.stack_type = config_dict['stack_type']

        self.template_name = config_dict['template_file']

        self.hdp_major_version = config_dict['hdp_major_version']
        self.hdp_version = config_dict['hdp_version']
        self.hdp_utils_version = config_dict['hdp_utils']
        self.define_custom_repos = config_dict['define_custom_repos']

        self.ambari_version = config_dict['ambari_version']
        self.ambari_password = config_dict['ambari_password']

        self.fqdn_address = config_dict['fqdn_address']
        self.kerberos_realm = config_dict['kerberos_realm']
        self.kerberos_password = config_dict['kerberos_password']

        self.volume_device = config_dict['volume_device']
        self.mount_location = config_dict['mount_location']

        self.chef_rpm_uri = config_dict['chef_rpm_uri']
        self.chef_version = config_dict['chef_version']
        self.chef_tries = config_dict['chef_tries']
        self.log_chef_to_stdout = config_dict['log_chef_to_stdout']

        self.ambari_db_password = config_dict['ambari_db_password']
        self.mysql_root_password = config_dict['mysql_root_password']

        # To be set when the blueprints are created
        self.blueprint = None
        self.host_mapping = None
        self.stack_definition = None
        self.utils_definition = None

        # Set the deploy name and directory based on the current time
        self.name = "{0}-{1}".format(config_dict["cluster_name"],
                                     str(int(time.time())))
        self.directory = os.path.join(config_dict['deployment_directory_base'],
                                      self.name)

        # Initialize the cluster object based on whether or not a cluster json file was passed
        if not cluster:
            # Read cluster template file
            template_file = '{0}/conf/templates/{1}'.format(
                config_dict['installation_directory'],
                config_dict['template_file'])
            self.cluster = Cluster(
                cluster_name=config_dict['cluster_name'],
                ssh_user=config_dict['ssh_user'],
                private_key=config_dict['existing_key_location'],
                key_name=config_dict['key_name'],
                template_file=template_file,
                fqdn_address=self.fqdn_address)
        else:
            self.cluster = cluster
コード例 #13
0
def FoF(
    galaxy_data,
    candidate_centers,
    richness,
    overdensity,
    max_velocity=2000 * u.km / u.s,
    linking_length_factor=0.1,
    virial_radius=1.5 * u.Mpc / u.littleh,
):
    """
    The Friends-of-Friends algorithm is a clustering algorithm used to identify groups of particles. In this instance, FoF is used to identify clusters of galaxies.

    FoF uses a linking length, l, whereby galaxies within a distance l from another galaxy are linked directly (as friends) and galaxies within a distance l from its friends are linked indirectly (as friends of friends). This network of particles are considered a cluster.
    After locating all candidate clusters, overlapping clusters are merged, with preference towards the center with larger N(d) and abs magnitude.
    A new cluster center is then defined as the brightess galaxy within 0.5 Mpc away from the current center.
    Finally, a cluster is only initialized if it has met the threshold richness and overdensity.

    The algorithm is sped up with:
    - numpy vectorization
    - grispy nearest neighbor implementation, which uses cell techniques to efficiently locate neighbors. This is preferred as it allows the use of the haversine metric for spherical coordinates.

    Parameters
    ----------
    galaxy_data: ndarray, shape (n,7)
        Galaxy data with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    candidate_centers: ndarray, shape (m,7)
        Array of candidate centers with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    max_velocity: float, units [km/s]
        Default value: 2000 km/s

    linking_length_factor: float
        Default value: 0.1

    virial_radius: float, units [Mpc/littleh]
        Default value: 1.5 hMpc

    richness: integer

    overdensity: float

    Returns
    -------
    candidates: list of cluster.Cluster object

    """
    candidates = []
    # sep_arr = [] # tracks change in linking length with redshift

    # tracker identifies galaxies that have been included in another cluster previously to speed up algorithm.
    # candidate_centers was sorted by N(0.5) before to ensure larger clusters are prioritized
    tracker = np.ones(len(candidate_centers))

    # identify cluster candidates
    for i, center in enumerate(
            candidate_centers
    ):  # each row is a candidate center to search around

        if tracker[i]:
            velocity_bin = galaxy_data[
                abs(redshift_to_velocity(galaxy_data[:, 2], center[2])) <=
                max_velocity]  # select galaxies within max velocity

            virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")

            # given virial radius is in proper distances, we convert to comoving distance to account for cosmological expansion.
            ang_virial_radius = linear_to_angular_dist(
                virial_radius, center[2]
            ).to("rad")  # convert proper virial radius to angular separation
            max_dist = (
                ang_virial_radius *
                cosmo.comoving_transverse_distance(center[2])).to(
                    u.Mpc,
                    u.dimensionless_angles())  # convert to comoving distance
            max_dist = linear_to_angular_dist(
                max_dist, center[2]
            ).value  # convert comoving distance to angular separation

            virial_dist, virial_idx = virial_gsp.bubble_neighbors(
                np.array([center[:2]]), distance_upper_bound=max_dist
            )  # center must be a ndarray of (n,2)
            virial_points = velocity_bin[tuple(
                virial_idx)]  # convert to tuple for deprecation warning

            if (
                    len(virial_points) >= 12
            ):  # reject if <12 galaxies within virial radius (to save time)
                mean_sep = mean_separation(
                    len(virial_points),
                    center[2],
                    max_dist * u.degree,
                    max_velocity,
                    survey_area=1.7,
                )  # Mpc
                linking_length = (
                    linking_length_factor * mean_sep
                )  # determine transverse LL from local mean separation
                # sep_arr.append([linking_length.value, center[2]])
                linking_length = linear_to_angular_dist(
                    linking_length, center[2]).value  # fix linking length here

                f_gsp = GriSPy(virial_points[:, :2], metric="haversine")
                f_dist, f_idx = f_gsp.bubble_neighbors(
                    np.array([center[:2]]),
                    distance_upper_bound=linking_length
                )  # select galaxies within linking length
                f_points = virial_points[tuple(f_idx)]

                member_galaxies = f_points
                fof_dist, fof_idx = f_gsp.bubble_neighbors(
                    f_points[:, :2], distance_upper_bound=linking_length
                )  # select all galaxies within 2 linking lengths

                for idx in fof_idx:
                    fof_points = virial_points[idx]

                    # ensure no repeated points in cluster
                    mask = np.isin(
                        fof_points, member_galaxies, invert=True
                    )  # filter for points not already accounted for
                    vec_mask = np.isin(mask.sum(axis=1), center.shape[0])
                    fof_points = fof_points[vec_mask].reshape(
                        (-1,
                         center.shape[0]))  # points of 2 linking lengths (FoF)

                    if len(fof_points):
                        member_galaxies = np.concatenate(
                            (member_galaxies, fof_points)
                        )  # merge all FoF points within 2 linking lengths

                if len(member_galaxies) >= richness:  # must have >= richness
                    c = Cluster(center, member_galaxies)
                    candidates.append(c)

                    if not i % 100:
                        logging.info(f"{i} " + c.__str__())

                    # locate centers within member_galaxies (centers of interest)
                    member_gal_id = member_galaxies[:, 4]
                    luminous_gal_id = candidate_centers[:, 4]
                    coi, _, coi_idx = np.intersect1d(member_gal_id,
                                                     luminous_gal_id,
                                                     return_indices=True)

                    # update tracker to 0 for these points
                    for i in coi_idx:
                        tracker[i] = 0

            # if len(candidates) >= 100: # for quick testing
            #     break

    # plot_clusters(candidates, flagging=False) # for quick check of clusters

    # tracks mean separation across redshift
    # sep_arr = np.array(sep_arr)
    # plt.plot(sep_arr[:,1], sep_arr[:,0], '.')
    # plt.show()

    # perform overlap removal and merger
    print("Performing overlap removal")
    candidate_clusters = np.array([
        [c.ra, c.dec, c.z, c.gal_id] for c in candidates
    ])  # get specific attributes from candidate center sample
    candidates = np.array(candidates)
    merged_candidates = candidates.copy()
    gal_id_space = candidate_clusters[:, 3]

    for center in candidates:

        # identity overlapping centers (centers lying within virial radius of current cluster)
        velocity_bin = candidate_clusters[
            abs(redshift_to_velocity(candidate_clusters[:, 2], center.z)) <=
            max_velocity]  # select galaxies within max velocity

        center_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")
        c_coords = [center.ra, center.dec]
        max_dist = linear_to_angular_dist(
            virial_radius,
            center.z).value  # convert virial radius to angular distance
        c_dist, c_idx = center_gsp.bubble_neighbors(
            np.array([c_coords]),
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        c_points = velocity_bin[tuple(c_idx)]

        # merge each overlapping cluster
        if len(c_points):
            for c in c_points:
                c = candidates[gal_id_space == c[-1]][0]

                if center.gal_id == c.gal_id:  # if same center, ignore
                    continue

                # modify the cluster's galaxies in merged_candidates array
                if len(c.galaxies) and len(
                        center.galaxies):  # check both clusters are not empty
                    S = setdiff2d(
                        c.galaxies,
                        center.galaxies)  # identify overlapping galaxies
                    if len(S):
                        new_c = merged_candidates[gal_id_space == c.gal_id][
                            0]  # c from merged_candidates
                        new_center = merged_candidates[
                            gal_id_space == center.gal_id][
                                0]  # center from merged_candidates

                        c_galaxies, center_galaxies = c.remove_overlap(center)
                        new_c.galaxies = c_galaxies
                        new_center.galaxies = center_galaxies

    merged_candidates = np.array([
        c for c in merged_candidates if c.richness >= richness
    ])  # select only clusters >= richness
    if len(merged_candidates) >= len(candidates):
        logging.warning("No candidates were merged!")

    bcg_clusters = merged_candidates.copy()

    # replace candidate center with brightest galaxy in cluster
    print("Searching for BCGs")
    merged_candidates = sorted(merged_candidates,
                               key=lambda x: x.N,
                               reverse=True)  # sort by N

    for center in merged_candidates:
        bcg_space_gal_id = np.array([c.gal_id for c in bcg_clusters])

        # identify galaxies within 0.25*virial radius
        cluster_gsp = GriSPy(
            center.galaxies[:, :2],
            metric="haversine")  # for galaxies within a cluster
        c_coords = [center.ra, center.dec]
        max_dist = 0.25 * (linear_to_angular_dist(virial_radius,
                                                  center.z).value
                           )  # convert virial radius to angular distance
        c_dist, c_idx = cluster_gsp.bubble_neighbors(
            np.array([c_coords]),
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        bcg_arr = center.galaxies[tuple(c_idx)]

        if len(bcg_arr) and len(
                center.galaxies
        ):  # check for galaxies within 0.25*virial radius

            mag_sort = bcg_arr[bcg_arr[:, 3].argsort(
            )]  # sort selected galaxies by abs mag (brightness)
            mask = np.isin(
                mag_sort[:, 4], bcg_space_gal_id, invert=True
            )  # filter for galaxies that are not existing centers
            mag_sort = mag_sort[mask]

            if len(mag_sort):
                bcg = mag_sort[0]  # brightest cluster galaxy (bcg)

                # if bcg brighter than current center, replace it as center
                if (abs(bcg[3]) > abs(center.bcg_absMag)) and (bcg[4] !=
                                                               center.gal_id):
                    new_cluster = Cluster(
                        bcg, center.galaxies)  # initialize new center

                    bcg_clusters = np.delete(
                        bcg_clusters,
                        np.where([c.gal_id
                                  for c in bcg_clusters] == center.gal_id),
                    )
                    bcg_clusters = np.concatenate(
                        (bcg_clusters,
                         np.array([new_cluster])))  # add new center to array

    bcg_clusters = np.array([
        c for c in bcg_clusters if c.richness >= richness
    ])  # select only clusters >= richness
    final_clusters = []

    # N(0.5) and galaxy overdensity
    print("Selecting appropriate clusters")
    for center in bcg_clusters:
        center.N = find_number_count(center,
                                     center.galaxies,
                                     distance=0.5 * u.Mpc /
                                     u.littleh)  # find number count N(0.5)
        center.D = center_overdensity(center, galaxy_data,
                                      max_velocity)  # find overdensity D

        # Initialize the cluster only if N(0.5) >= 8 and D >= overdensity
        if ((center.N >= 8) and (center.richness >= richness)
                and (center.D >= overdensity)):
            final_clusters.append(center)

    return final_clusters
コード例 #14
0
def perform_recursive_clustering(cluster_collection, startAt):
    """
    Performs a recursive clustering on a list of clusters given via cluster_collection.
    The recursion is performed according to the Discoverer paper by Cui et al.
    At first new number of distinct values for each token are calculated in each cluster and
    if this number is lower than a configurable number, the token is considered a FD.
    Then the number of subclusters that would be generated is calculated. If these subclusters
    contain at least one cluster containing more than a configurable amount of messages, the clustering
    is performed and the token is considered a FD. Then the recursion is performed on each of the new clusters
    with the next token.
    
    
    """

    # Scan for FD token, Phase 1
    clusters = cluster_collection.get_all_cluster(
    )[:]  # <-- "[:]" Very very important... otherwise our iterated list will change because of deletions...

    # Save startAt information over cluster iteration
    __startAt = startAt

    for cluster in clusters:
        if Globals.getConfig().debug:
            print "Starting processing for next cluster ({0} messages)".format(
                len(cluster.get_messages()))

        startAt = __startAt
        #tokenValue = token.get_token()
        # Check distinct number of values of token
        foundFD = False
        maxTokenIdx = len(cluster.get_messages()[0].get_tokenlist())
        while not foundFD and startAt < maxTokenIdx:
            l = []
            #print "Analyzing token %s" % startAt
            # Check whether this might be a length token
            if "lengthfield" in set(cluster.get_semantics_for_token(startAt)):
                # Current token is a length token. Do not treat as FD
                startAt += 1
                continue
            if not Globals.getConfig().allowAdjacentFDs:
                if startAt > 0:
                    if "FD" in set(cluster.get_semantics_for_token(
                            startAt - 1)):  # We have an adjacent FD
                        print "Two adjacent FDs forbidden by configuration, skipping to next token"
                        continue

            for message in cluster.get_messages():
                l.append(message.get_tokenAt(startAt).get_token())
            numOfDistinctValuesForToken = len(set(l))

            if Globals.getConfig(
            ).minDistinctFDValues < numOfDistinctValuesForToken <= Globals.getConfig(
            ).maxDistinctFDValues:
                # FD candidate found
                # Check number of potential clusters
                sumUp = Counter(l)
                wouldCluster = False
                for key in sumUp.keys():
                    if sumUp.get(key) > Globals.getConfig(
                    ).minimumClusterSize:  # Minimum cluster size of at least one cluster
                        wouldCluster = True
                        break
                if wouldCluster:
                    # Check if adjacent text/text FDs are allowed in text protocols
                    if Globals.getProtocolClassification(
                    ) == Globals.protocolText:
                        if not Globals.getConfig().allowAdjacentTextFDs:
                            if startAt > 0:
                                # Check whether the previous one is a text FD (type text and no semantic numeric)
                                if "FD" in set(
                                        cluster.get_semantics_for_token(
                                            startAt - 1)):
                                    if cluster.get_format(
                                            startAt -
                                            1) == Message.typeText and (
                                                cluster.get_format(startAt)
                                                == Message.typeText and
                                                ("numeric" not in cluster.
                                                 get_semantics_for_token(
                                                     startAt - 1))):
                                        print "Two adjacent text FDs forbidden by configuration, skipping to next token"
                                        continue
                    # Create new cluster
                    if Globals.getConfig().debug:
                        print "Subcluster prerequisites fulfilled. Adding FD semantic, splitting cluster and entering recursion"
                    # Senseless here: message.get_tokenAt(startAt).add_semantic("FD")
                    cluster.add_semantic_for_token(startAt, "FD")
                    newCollection = ClusterCollection()
                    for key in sumUp.keys():
                        messagesWithValue = cluster.get_messages_with_value_at(
                            startAt, key)
                        newCluster = Cluster(
                            messagesWithValue[0].get_tokenrepresentation(),
                            "recursion")
                        newCluster.setSplitpoint("{0}".format(startAt))
                        newCluster.add_messages(messagesWithValue)
                        newCluster.add_semantic_for_token(startAt, "FD")
                        newCollection.add_cluster(newCluster)
                    if Globals.getConfig().debug:
                        print "{0} sub clusters generated".format(
                            len(sumUp.keys()))

                    # Perform format inference on new cluster collection
                    formatinference.perform_format_inference_for_cluster_collection(
                        newCollection)
                    semanticinference.perform_semantic_inference(newCollection)

                    # Merge clusters with same format
                    while newCollection.mergeClustersWithSameFormat():
                        pass

                    # Perform needle wunsch
                    # Edit 20120120 - not here
                    #===========================================================
                    # cluster1 = newCollection.get_random_cluster()
                    # cluster2 = newCollection.get_random_cluster()
                    # format1 = cluster1.get_formats()
                    # format2 = cluster2.get_formats()
                    # needlewunsch.needlewunsch(format1, format2)
                    #
                    #===========================================================
                    # Perform recursive step
                    perform_recursive_clustering(newCollection, startAt + 1)
                    # Remove old parent cluster
                    cluster_collection.remove_cluster(cluster)
                    cluster_collection.add_clusters(
                        newCollection.get_all_cluster())
                    foundFD = True
                else:
                    pass
                    #print "Subclustering prerequisites not fulfilled. Will not sub-cluster"
            startAt += 1
        if Globals.getConfig().debug:
            print "Recursive clustering analysis for cluster finished"
コード例 #15
0
import zmq
import time
import threading
import json
import socket
from cluster import Cluster, Node
from pprint import pprint

def stop_all ():
    cluster.stop ()
    time.sleep (1)
    ctx.term ()

def print_status ():
    pprint (cluster.status_report (), indent=4)

ctx = zmq.Context (1)
node = Node (5760, 5770, 'node3')
cluster = Cluster (node, ctx)
cluster.start ()
コード例 #16
0
ファイル: extractor.py プロジェクト: tppolkow/brevity
    def extract(raw_txt, logger):

        c = Cleaner()
        cleaned_text_list = c.clean(raw_txt)

        logger.info('Done cleaning')
        logger.debug(len(cleaned_text_list))
        logger.debug(cleaned_text_list)

        matrix_builder = MatrixBuilder()
        matrix = matrix_builder.build_sim_matrix(cleaned_text_list, logger)

        logger.info('Done building sim matrix')
        logger.debug('Dimensions: {}'.format(matrix.shape))
        logger.debug(matrix)

        g = Grapher()
        pageranks = g.graph(matrix)

        logger.info('Generated graph and got pageranks')
        logger.debug(pageranks)

        total_doc_size = len(cleaned_text_list)
        if total_doc_size in range(0, 300):
            summary_length = int(0.4 * total_doc_size)
        elif total_doc_size in range(301, 800):
            summary_length = int(0.2 * total_doc_size)
        elif total_doc_size in range(801, 1500):
            summary_length = int(0.1 * total_doc_size)
        else:
            summary_length = int(0.05 * total_doc_size)

        top_ranked = nlargest(summary_length, pageranks, key=pageranks.get)
        top_ranked.sort()

        cl = Cluster()
        top_ranked = cl.splitIntoParagraph(top_ranked, 7.5)

        logger.debug(top_ranked)
        result = ''
        for paragraph in top_ranked:
            for key in paragraph:
                top_ranked_sentence = cleaned_text_list[key]
                result += '{}. '.format(top_ranked_sentence)
            result += '\n\n'

        try:
            del c
            del cleaned_text_list
            del matrix_builder
            del matrix
            del g
            del pageranks
            del total_doc_size
            del summary_length
            del top_ranked
            del cl
            del raw_txt
        except:
            pass

        return result
コード例 #17
0
 def setUp(self):
     print "### setUp ###"
     self.cluster = Cluster()
コード例 #18
0
 def set_gadget_snap_single(self, snapnr, path_to_snaphot, verbose=False):
     h = Cluster(None, verbose=verbose)
     h.name = "snap{0}".format(snapnr)
     h.time = snapnr * self.dt
     h.set_gadget_single_halo(snapnr, path_to_snaphot, verbose=verbose)
     setattr(self, h.name, h)
コード例 #19
0
ファイル: k-means.py プロジェクト: alemas/k-means-python
from iris_plant import IrisPlant

parser = argparse.ArgumentParser()
parser.add_argument('k', metavar='k', type=int,
                    help='The number of clusters to be used')
args = parser.parse_args()

k = args.k

clusters = []
plants = file_reader.readFile()
centroids = random.sample(plants, k)

# Inicia os clusters com centroids randômicos
for i in range(0, k):
    cluster = Cluster(i)
    cluster.centroid = centroids[i]
    clusters.append(cluster)

isConverging = False
iterationsLimit = 1000
currentIteration = 0

while (currentIteration < iterationsLimit and not isConverging):

    for cluster in clusters:
        cluster.plants = []

    for plant in plants:
        # O primeiro elemento representa o cluster mais próxima e o segundo
        # representa a distância até o centróide do mesmo
コード例 #20
0
import threading
from random import randrange
import logging

from monitor import send_state_update

logging.basicConfig(format='%(asctime)s - %(levelname)s: %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.INFO)

from Candidate import Candidate, VoteRequest
from Follower import Follower
from Leader import Leader
from cluster import Cluster, ELECTION_TIMEOUT_MAX

cluster = Cluster()


class TimerThread(threading.Thread):
    def __init__(self, node_id):
        threading.Thread.__init__(self)
        self.node = cluster[node_id]
        self.node_state = Follower(self.node)
        self.election_timeout = float(
            randrange(ELECTION_TIMEOUT_MAX / 2, ELECTION_TIMEOUT_MAX))
        self.election_timer = threading.Timer(self.election_timeout,
                                              self.become_candidate)

    def become_leader(self):
        logging.info(f'{self} become leader and start to send heartbeat ... ')
        send_state_update(self.node_state, self.election_timeout)
コード例 #21
0
    def dbscan(self, data):
        self.init_params()
        self.data = data

        ## Setting up the plot
        fig = plt.figure()

        axis_proj = 'rectilinear'
        if self.dim > 2:
            axis_proj = '%dd' % self.dim

        ax = fig.add_subplot(111, projection=axis_proj)

        #default noise cluster
        noise = Cluster('Noise', self.dim)
        self.clusters.add(noise)

        for point in data:
            if point not in self.visited:
                self.visited.append(point)
                neighbour_pts = self.region_query(point)
                if len(neighbour_pts) < self.min_pts:
                    noise.add_point(point)
                else:
                    name = 'cluster-%d' % self.cluster_count
                    new_cluster = Cluster(name, self.dim)

                    self.cluster_count += 1
                    self.expand_cluster(new_cluster, point, neighbour_pts)

                    if self.dim == 2:
                        ax.scatter(new_cluster.get_X(),
                                   new_cluster.get_Y(),
                                   c=self.color[self.cluster_count %
                                                len(self.color)],
                                   marker='o',
                                   label=name)
                    elif self.dim == 3:
                        ax.scatter(new_cluster.get_X(),
                                   new_cluster.get_Y(),
                                   new_cluster.get_Z(),
                                   marker='o',
                                   c=self.color[self.cluster_count %
                                                len(self.color)],
                                   label=name)

                    ax.hold(True)

        if len(noise.get_points()) != 0:
            if self.dim > 2:
                ax.scatter(noise.get_X(),
                           noise.get_Y(),
                           noise.get_Z(),
                           marker='x',
                           label=noise.name)
            else:
                ax.scatter(noise.get_X(),
                           noise.get_Y(),
                           marker='x',
                           label=noise.name)

        print("Number of clusters found: %d" % self.cluster_count)

        ax.hold(False)
        ax.legend(loc='lower left')
        ax.grid(True)
        plt.title(r'DBSCAN Clustering', fontsize=18)
        plt.show()
コード例 #22
0
def create_clusters(node_data):
    for node_name, node_tags in node_data:
        c = Cluster(ID=int(node_name))
        c.add_node(node_name, node_tags)
        clusterglobals.clusters_list.append(c)
コード例 #23
0
    for j in range(len(nodes)):
        e = np.zeros((len(nodes), 1))
        e[i] = 1
        e[j] = -1
        N[i][j] = vg * np.matrix.dot(
            np.matrix.dot(np.matrix.transpose(e), lPlus), e)

# Initialize clusters
clusters = []
prototypes = set()
for i in range(numOfClusters):
    rand = np.random.randint(0, len(nodes) - 1)
    while rand in prototypes:
        rand = np.random.randint(0, len(nodes) - 1)
    prototypes.add(rand)
    clusters.append(Cluster(i, rand))

# Do labeling
labels = np.zeros(len(nodes))
iteration = 0
while True:
    iteration += 1
    if iteration > maxIterations: break
    print('Iteration ', iteration + 1, '/', maxIterations)
    changed = False

    # Allocation of the observations
    for i in range(len(nodes)):
        min = math.inf
        nearestCluster = None
        for cluster in clusters:
コード例 #24
0
    def ABHclustering(self, constraints, final_n_of_clusters, clusters=None):
        """
        Main hierarhical clustering loop
        """
        self.l.log("Creating transitive ML closure...")
        stevec = len(clusters)

        for x in constraints:
            if 'must-link' in x:
                #print("omejitev: ", x)
                kluc1 = self.getClusterID(x['point'][0], clusters)
                kluc2 = self.getClusterID(x['must-link'][0], clusters)
                #print(kluc1, " | " , kluc2," | ", stevec, kluc1 == kluc2)
                if kluc1 != kluc2:
                    tocke = []
                    tocke.append(clusters[kluc1].points)
                    tocke.append(clusters[kluc2].points)
                    clusters.pop(kluc1)
                    clusters.pop(kluc2)
                    nov = Cluster(stevec)
                    nov.update(kluc1, kluc2, 0, tocke)  #TLE DEJ NOT
                    clusters.update({stevec: nov})
                    stevec += 1
        m = stevec
        self.l.log("Creating distance matrix....")
        self.distances = {}
        self.clusters = clusters
        stevec = 0
        for c in self.clusters:
            print(self.clusters[c].points, self.clusters[c].clusterId)
            for p in self.clusters[c].points:
                stevec += 1

        print(len(self.clusters), stevec)

        z = [(clusters[a].clusterId, clusters[b].clusterId)
             for a in self.clusters for b in self.clusters]
        for l in z:
            kljuc1 = str(l[0]) + " " + str(l[1])
            kljuc2 = str(l[1]) + " " + str(l[0])
            if l[0] != l[1]:
                if kljuc1 in self.distances:
                    continue
                elif kljuc2 in self.distances:
                    continue
                else:
                    if self.linkage == "Ward":
                        c = []
                        u = []
                        v = []

                        for p in self.clusters[l[0]].points:
                            c.append(p.coords)
                            u.append(p.coords)
                        for r in self.clusters[l[1]].points:
                            c.append(r.coords)
                            v.append(r.coords)
                        centroid_uv = np.average(c, axis=0)
                        centroid_u = np.average(u, axis=0)
                        centroid_v = np.average(v, axis=0)
                        dist1 = 0
                        dist2 = 0
                        dist3 = 0
                        for point in c:
                            if self.distance_type == "Cosine":
                                dist1 += spatial.distance.cosine(
                                    centroid_uv, point)**2
                            elif self.distance_type == "Euclidean":
                                dist1 += spatial.distance.euclidean(
                                    centroid_uv, point)**2
                        for point in u:
                            if self.distance_type == "Cosine":
                                dist2 += spatial.distance.cosine(
                                    centroid_u, point)**2
                            elif self.distance_type == "Euclidean":
                                dist2 += spatial.distance.euclidean(
                                    centroid_u, point)**2
                        for point in v:
                            if self.distance_type == "Cosine":
                                dist3 += spatial.distance.cosine(
                                    centroid_v, point)**2
                            elif self.distance_type == "Euclidean":
                                dist3 += spatial.distance.euclidean(
                                    centroid_v, point)**2
                        dist = dist1 - dist2 - dist3
                        self.distances.update({kljuc1: dist})
                    elif self.linkage == "Average":
                        u = [(a, b) for a in self.clusters[l[0]].points
                             for b in self.clusters[l[1]].points]
                        dist = self.average_linkage(u)
                        self.distances.update({kljuc1: dist})
                    else:
                        print("Error creating distance matrix...")
                        exit(1)

        self.l.log("Finding clusters...")
        '''
        s = sorted(self.distances.items(), key=lambda x: x[1])
        for k, v in s:
            print(k, v)
        '''
        print("st. omejitev: ", len(constraints))
        #print("clustri: ", self.clusters.keys())

        self.Z = np.array([])

        #n = len(self.points)  #na začetku je vsak primer svoj cluster
        n = len(self.clusters)
        idZ = 0
        stop_clustering = False
        while (n != final_n_of_clusters):
            #print("### ",n," ###")
            condition = True
            #clusters_checked = []
            while condition:
                """
                if len(clusters_checked) == len(self.clusters):
                    print("Ni mozno nadaljne zruzevanje, ostalo je ",len(self.clusters)," clustrov.")
                    break
                dist, pair = self.closest_clusters(clusters_checked)
                if(pair is None):
                    stop_clustering = True
                    break
                par = list()
                for el in pair:
                    par.append(el)
                self.constraints = self.sort_constraints()
              
                #ali ima katerakoli tocka iz obeh clustrov ML, jo zdruzi in ponovno poisci najblizja clustra
                #ML_pair = self.check_must_link(constraints, self.clusters[par[0]].points)
                
                if ML_pair == -1:
                    ML_pair = self.check_must_link(constraints, self.clusters[par[1]].points)
                if ML_pair != -1:
                    par[0] = ML_pair[0]
                    par[1] = ML_pair[1]
                condition = self.check_cannot_link(constraints, self.clusters[par[0]].points, self.clusters[par[1]].points)
                if condition:
                    clusters_checked.append([par[0], par[1]])
                dist = self.cluster_distance(par[0], par[1])
                
                
            if stop_clustering:
                break
            #print("par: ", par, ", dist: ", round(dist,2), " ", len(self.clusters))
                """
                key = min(self.distances, key=self.distances.get)
                kljuc = key
                par = key.split(' ')
                par = [int(i) for i in par]
                dist = self.distances[kljuc]
                #print("   ->",key, " ", self.check_cannot_link(constraints, self.clusters[par[0]].points, self.clusters[par[1]].points))
                if self.check_cannot_link(constraints,
                                          self.clusters[par[0]].points,
                                          self.clusters[par[1]].points):
                    self.distances[kljuc] = sys.maxsize
                    if dist == sys.maxsize:
                        self.l.log(
                            "ABHC cannot find clusters under those constraints..."
                        )
                        return self.clusters
                    print("   Cannot link:", par)
                else:
                    break
                # print("--------------------")
                # print(par[0], par[1])
            #print(self.distances.keys())
            self.distances.pop(kljuc, None)
            self.izbrisi_razdalje(par[0])
            self.izbrisi_razdalje(par[1])
            #print(self.distances.keys())
            tocke = []
            tocke.append(self.clusters[par[0]].points)
            tocke.append(self.clusters[par[1]].points)
            #print("tocke: ", len(tocke))
            novCluster = Cluster(m + idZ)
            novCluster.update(par[0], par[1], dist, tocke)
            novCluster.centroid = novCluster.calculateCentroid()
            self.clusters.pop(par[0])
            self.clusters.pop(par[1])
            self.clusters.update({(m + idZ): novCluster})
            #print("clustri:")
            #print(self.clusters.keys())
            #print("dodajam razdalje...")

            #print("NOV:" ,m+idZ)
            self.dodaj_razdalje(m + idZ)

            print("par: ", par, "dist: ", '%.08f' % dist)
            if idZ == 0:
                self.Z = [par[0], par[1], dist, novCluster.n]
            else:
                newrow = [par[0], par[1], dist, novCluster.n]
                self.Z = np.vstack([self.Z, newrow])

            n = len(self.clusters)
            idZ += 1
        #zapomni si primere, kateri so v drugi skupini kot v prejšni iteraciji.
        self.diff = []
        clusters_checked = set()
        hm = 0

        for cluster in self.clusters:
            val = -1
            for point in self.clusters[cluster].points:
                hm += 1
                if val < 0:
                    val = self.prev_dict[point.reference]
                    if val in clusters_checked:
                        self.diff.append(point.reference)
                else:
                    if val != self.prev_dict[point.reference]:
                        self.diff.append(point.reference)
            clusters_checked.add(val)
        self.prev_dict = self.make_dict()
        print(len(self.diff))
        print(sorted(self.diff))
        print("stevilo primerov: ", hm)
        return self.clusters
コード例 #25
0
def runAlgorithm(state: str, populationVar: float, compactnessLvl: str):

    globalPrecinctDict.clear()
    globalClusterList.clear()
    tempClusters.clear()
    clusterToPrecinctListDict.clear()

    globalNXGraph = nx.Graph()
    noValidEdgeCount = 0

    precinctsJSONData = {}
    if state.lower() == myconstants.ARKANSAS or state.lower(
    ) == myconstants.ARKANSAS_ABBREVIATION:
        requestedNumDistricts = myconstants.ARKANSAS_NUM_DISTRICTS
        with open(myconstants.ARKANSAS_NEIGHBOR_FILENAME) as f:
            precinctsJSONData = json.load(f)
        #print("arkansas selected")
    elif state.lower() == myconstants.VIRGINIA or state.lower(
    ) == myconstants.VIRGINIA_ABBREVIATION:
        requestedNumDistricts = myconstants.VIRGINIA_NUM_DISTRICTS
        with open(myconstants.VIRGINIA_NEIGHBOR_FILENAME) as f:
            precinctsJSONData = json.load(f)
        #print("virginia selected")
    else:
        requestedNumDistricts = myconstants.SOUTHCAROLINA_NUM_DISTRICTS
        with open(myconstants.SOUTHCAROLINA_NEIGHBOR_FILENAME) as f:
            precinctsJSONData = json.load(f)
        #print("south carolina selected")

    for id in precinctsJSONData:
        globalPrecinctDict.update({id: Precinct(precinctsJSONData[id])})
        globalPrecinctDict[id].setPrecinctID(id)

    # load initial cluster objects into list, initialize precinct neighborLists with objects instead of numbers
    for p in globalPrecinctDict.values():
        neighborLst = []

        for id in p.getPrecinctNeighbors():
            neighborLst.append(globalPrecinctDict[id])
        p.setPrecinctNeighbors(set(neighborLst))
        globalClusterList.append(
            Cluster(p))  # create a cluster out of precinct

    # turn the precinct neighbor list into its corresponding cluster neighbor list
    for c in globalClusterList:
        correspondingClusterNeighborList = []
        for p in c.getClusterNeighborsList():
            for c1 in globalClusterList:
                if p.getPrecinctID() == c1.getClusterID():
                    correspondingClusterNeighborList.append(c1)
        c.setClusterNeighborsList(correspondingClusterNeighborList)

    # set up job params
    idealPopulation = calcIdealPopulation()
    populationVariance = populationVar
    compactness = compactnessLvl  # possible compactness levels: not, somewhat, very, extremely

    # try with contracted_nodes and nx
    for c in globalClusterList:
        globalNXGraph.add_node(c.getClusterID())

    edgeList = []
    for c in globalClusterList:
        for n in c.getClusterNeighborsList():
            if c.getClusterID() is not n.getClusterID():
                edgeList.append(tuple((c.getClusterID(), n.getClusterID())))
    edgeList = list(set(tuple(sorted(edge)) for edge in edgeList))
    globalNXGraph.add_edges_from(edgeList)

    for c in globalClusterList:
        clusterToPrecinctListDict.update(
            {c.getClusterID(): [c.getClusterID()]})

    # UC29: Generate Seed Districting
    # Merge random clusters until there are {requestedNumDistricts} clusters left
    #currNumClusters = len(globalClusterList)
    currNumClusters = len(list(globalNXGraph.nodes))
    while (currNumClusters > requestedNumDistricts):
        randomNode = random.sample(list(globalNXGraph.nodes), 1)
        randomNode = randomNode[0]

        if list(globalNXGraph.adj[randomNode]):
            randomNodeNeighbor = random.sample(
                list(globalNXGraph.adj[randomNode]), 1)
            randomNodeNeighbor = randomNodeNeighbor[0]

            globalNXGraph = nx.contracted_nodes(globalNXGraph,
                                                randomNode,
                                                randomNodeNeighbor,
                                                self_loops=False)

            precinctList1 = clusterToPrecinctListDict[randomNode]
            precinctList2 = clusterToPrecinctListDict[randomNodeNeighbor]
            clusterToPrecinctListDict[
                randomNode] = precinctList1 + precinctList2
            del clusterToPrecinctListDict[randomNodeNeighbor]
        else:
            continue

        currNumClusters = len(list(globalNXGraph.nodes))

    currIterationCount = 1
    start_time = time.time()

    # keep iterating until all clusters are acceptable or until we've hit our iteration limit
    while currIterationCount <= numIterations:  #or allClustersAcceptableCheck():
        # UC30: Generate a random districting satisfying constraints
        # Combine the two sub-graphs to form a new sub-graph of simple nodes.
        #print()
        #print("ITERATION {}".format(currIterationCount))
        #print("Picking two random clusters to merge...")
        randomNode = random.sample(list(globalNXGraph.nodes), 1)
        randomNode = randomNode[0]

        randomNodeNeighbor = random.sample(list(globalNXGraph.adj[randomNode]),
                                           1)
        randomNodeNeighbor = randomNodeNeighbor[0]
        #randomCluster = random.sample(globalClusterList, 1) # random.sample returns a list
        #randomClusterNeighbor = random.sample(randomCluster[0].getClusterNeighborsList(), 1)
        #print("Merging {} and {}".format(randomCluster[0], randomClusterNeighbor[0]))
        #print()

        tempClusters.clear(
        )  # update tempvar to keep track of currently merged clusters
        tempClusters.append(randomNode)
        tempClusters.append(randomNodeNeighbor)

        #print(tempClusters)

        combinedCluster = combineClusters(randomNode, randomNodeNeighbor)
        #print("Precincts: ")
        #print(stringifyList(combinedCluster.getClusterPrecinctsList()))
        #print(len(combinedCluster.getClusterPrecinctsList()))
        #print("Edges: ")
        #print(combinedCluster.getClusterEdgeList())
        #print(len(combinedCluster.getClusterEdgeList()))

        # UC31: Generate a spanning tree of the combined sub-graph above
        #print()
        #print("Generating a spanning tree...")
        stEdgeList = generateSpanningTree(combinedCluster)
        #print(stringifyList(stEdgeList))

        # UC33: Generate a feasible set of edges in the spanning tree to cut
        # UC32: Calculate the acceptability of each newly generated sub-graph
        #validEdgeList = findValidEdges(stEdgeList, idealPopulation, populationVar, compactness)
        validEdge = findValidEdge(stEdgeList, idealPopulation, populationVar,
                                  compactness)
        #print()
        #print("Generated valid edge list...")
        #print(len(validEdgeList))
        #print(validEdgeList)
        #print()

        #if not validEdgeList:
        if not validEdge:
            #print("No valid edges, moving onto next iteration.")
            noValidEdgeCount = noValidEdgeCount + 1
            pass
        else:
            # UC34: Cut the edge in the combined sub-graph
            #print("Choosing a random edge from valid edge list...")
            #randomValidEdge = random.sample(validEdgeList, 1) # random.sample returns a list
            #randomValidEdge = randomValidEdge[0]
            #print("Chosen edge: {}".format(randomValidEdge))

            #print()
            #print("Resulting new clusters from edge cut...")
            #newlyCreatedClusters = cutEdge(stEdgeList, randomValidEdge, globalNXGraph)
            newlyCreatedClusters = cutEdge(stEdgeList, validEdge,
                                           globalNXGraph)

            del clusterToPrecinctListDict[tempClusters[0]]
            del clusterToPrecinctListDict[tempClusters[1]]

            clusterToPrecinctListDict.update(
                {newlyCreatedClusters[0][0]: newlyCreatedClusters[0]})
            clusterToPrecinctListDict.update(
                {newlyCreatedClusters[1][0]: newlyCreatedClusters[1]})

            # print cluster - population - num precincts
            '''
            print("Merged and Cut Clusters")
            print(tempClusters[0])
            print(tempClusters[1])
            print("Results:")
            print(list(globalNXGraph.nodes))
            tempList = []
            for id in clusterToPrecinctListDict:
                tempList.append(len(clusterToPrecinctListDict[id]))
            print(tempList)
            tempList.clear()
            for id in clusterToPrecinctListDict:
                tempList.append(getClusterTotalPopulation(id))
            print(tempList)
            tempList.clear()
            for id in clusterToPrecinctListDict:
                print(list(globalNXGraph.adj[id]))
            print()
            '''

        currIterationCount = currIterationCount + 1  # increment counter

    returnPlan = []
    for id in clusterToPrecinctListDict:
        returnPlan.append(clusterToPrecinctListDict[id])
    #print("No valid edge times: {}". format(noValidEdgeCount))

    return returnPlan
コード例 #26
0
ファイル: wrfxctrl.py プロジェクト: yaron1000/wrfxctrl
        if sim_info['state']['wrf'] != 'completed':
            sim_state = get_simulation_state(sim_info['log_file'])
            sim_info['state'] = sim_state
            sim_info['last_updated'] = to_esmf(datetime.now())
            json.dump(sim_info, open('simulations/' + sim_id + '.json', 'w'))
        return json.dumps(sim_state)


@app.route("/remove_sim/<sim_id>")
def remove_sim(sim_id=None):
    if sim_id is not None:
        if sim_id in simulations:
            del simulations[sim_id]
            os.remove('simulations/' + sim_id + '.json')
            return "OK"
        else:
            return "NotFound"


@app.route("/all_sims")
def get_all_sims():
    return json.dumps(simulations, indent=4, separators=(',', ': '))


if __name__ == '__main__':
    profiles = load_profiles()
    cluster = Cluster(json.load(open('etc/cluster.json')))
    wrfxpy = json.load(open('etc/wrfxpy.json'))
    simulations = load_simulations()
    app.run(debug=True)
コード例 #27
0
    def hierarhicalClustering(self, clusters=None):
        """
        Main hierarhical clustering loop
        """
        distanca = 0
        self.l.log("Building distance matrix...")
        n = len(self.points)  #na začetku je vsak primer svoj cluster
        data = []
        for c in self.clusters:
            p = [point.coords for point in self.clusters[c].points]
            data.append(p[0])
        df = pd.DataFrame(data, columns=np.array([a for a in self.attributes]))
        n_df = (df.values)
        self.d_matrix = np.zeros(((df.values).shape[0], (df.values).shape[0]))
        for i in range((df.values).shape[0]):
            for j in range((df.values).shape[0]):
                kljuc1 = str(i) + ' ' + str(j)
                kljuc2 = str(j) + ' ' + str(i)
                if i != j:
                    if kljuc1 in self.distances:
                        continue
                    elif kljuc2 in self.distances:
                        continue
                    else:
                        if self.linkage == "Ward":
                            l = []
                            l.append(n_df[i])
                            l.append(n_df[j])
                            centroid = np.average(l, axis=0)
                            dist = 0
                            if self.distance_type == "Cosine":
                                dist += spatial.distance.cosine(
                                    centroid, n_df[i])**2
                                dist += spatial.distance.cosine(
                                    centroid, n_df[j])**2
                            elif self.distance_type == "Euclidean":
                                dist += spatial.distance.euclidean(
                                    centroid, n_df[i])**2
                                dist += spatial.distance.euclidean(
                                    centroid, n_df[i])**2
                            self.distances.update({kljuc1: dist})
                        elif self.linkage == "Average":
                            if self.distance_type == "Cosine":
                                dist = spatial.distance.cosine(
                                    n_df[i], n_df[j])
                            elif self.distance_type == "Euclidean":
                                dist = spatial.distance.euclidean(
                                    n_df[i], n_df[j])
                            self.distances.update({kljuc1: dist})
                        else:
                            print("Error creating distance matrix...")
                            exit(1)

        idZ = 0
        m = len(self.points)
        self.l.log("Finding clusters...")
        while n > 1:
            """
            dist, pair = self.closest_clusters()
            par = list()
            for el in pair:
                par.append(el)
           
            dist = np.amin(self.d_matrix)
            result = np.where(self.d_matrix == dist)

            par = list()
            for el in result[0]:
                par.append(el)
            print("--",par)
            """
            key = min(self.distances, key=self.distances.get)
            par = key.split(' ')
            par = [int(i) for i in par]
            dist = self.distances[key]
            #print("--------------------")
            #print(par[0], par[1])
            self.distances.pop(key, None)
            self.izbrisi_razdalje(par[0])
            self.izbrisi_razdalje(par[1])
            #print("5 238" in self.distances)
            tocke = []
            tocke.append(self.clusters[par[0]].points)
            tocke.append(self.clusters[par[1]].points)

            #print("tocke: ", len(tocke))
            novCluster = Cluster(m + idZ)
            novCluster.update(par[0], par[1], dist, tocke)
            novCluster.centroid = novCluster.calculateCentroid()
            self.clusters.pop(par[0])
            self.clusters.pop(par[1])
            self.clusters.update({(m + idZ): novCluster})
            #print("dodajam razdalje...")
            self.dodaj_razdalje(m + idZ)
            """
            novCluster = Cluster(par[0])
            novCluster.update(par[0], par[1], dist, tocke)
            novCluster.centroid = novCluster.calculateCentroid()
            self.clusters.pop(par[0])
            self.clusters.pop(par[1])
            self.clusters.update({(par[0]): novCluster})
            #TODO: preracunaj razdalje v matriki razdalj
            """
            if idZ == 0:
                self.Z = [par[0], par[1], dist, novCluster.n]
            else:
                newrow = [par[0], par[1], dist, novCluster.n]
                self.Z = np.vstack([self.Z, newrow])

            n = len(self.clusters)
            #self.vseSilhuete.update({idZ: self.metodaSilhuet()})
            print("par: ", par, ", dist: ", '%.08f' % dist)
            #print(idZ, n, m+idZ)
            idZ += 1

        self.l.log("Dendrogram created...")

        #vrnil naj bi matriko Z, in rezultate metod, ki nam povejo koliko clustrov je
        #print("Optimalno stevilo clustrov po metodi silhuet: ", len(self.points)-1-max(self.vseSilhuete.items(), key=operator.itemgetter(1))[0])
        return self.clusters
コード例 #28
0
def new_cluster(x, frac, fac, p, m):
    k = 1
    inv_cov = get_inv_cov(x, frac)
    S = [x.tolist()]
    return Cluster(centroid=x, inv_cov=inv_cov, k=k, S=S)
コード例 #29
0
    def CAclustering(self, constraints, final_n_of_clusters, clusters=None):
        """
        Main hierarhical clustering loop
        """
        self.l.log("Creating transitive ML closure...")
        stevec = len(clusters)

        for c in clusters:
            print(clusters[c].clusterId, clusters[c].points)

        for x in constraints:
            if 'must-link' in x:
                print("omejitev: ", x)
                kluc1 = self.getClusterID(x['point'][0], clusters)
                kluc2 = self.getClusterID(x['must-link'][0], clusters)
                print(kluc1, " | ", kluc2, " | ", stevec, kluc1 == kluc2)
                if kluc1 != kluc2:
                    tocke = []
                    tocke.append(clusters[kluc1].points)
                    tocke.append(clusters[kluc2].points)
                    clusters.pop(kluc1)
                    clusters.pop(kluc2)
                    nov = Cluster(stevec)
                    nov.update(kluc1, kluc2, 0, tocke)  # TLE DEJ NOT
                    clusters.update({stevec: nov})
                    stevec += 1
        m = stevec
        self.l.log("Creating distance matrix....")
        self.distances = {}
        self.clusters = clusters
        stevec = 0
        for c in self.clusters:
            print(self.clusters[c].points, self.clusters[c].clusterId)
            for p in self.clusters[c].points:
                stevec += 1

        print(len(self.clusters), stevec)

        z = [(clusters[a].clusterId, clusters[b].clusterId)
             for a in self.clusters for b in self.clusters]
        for l in z:
            kljuc1 = str(l[0]) + " " + str(l[1])
            kljuc2 = str(l[1]) + " " + str(l[0])
            if l[0] != l[1]:
                if kljuc1 in self.distances:
                    continue
                elif kljuc2 in self.distances:
                    continue
                else:
                    if self.linkage == "Ward":
                        c = []
                        u = []
                        v = []

                        for p in self.clusters[l[0]].points:
                            c.append(p.coords)
                            u.append(p.coords)
                        for r in self.clusters[l[1]].points:
                            c.append(r.coords)
                            v.append(r.coords)
                        centroid_uv = np.average(c, axis=0)
                        centroid_u = np.average(u, axis=0)
                        centroid_v = np.average(v, axis=0)
                        dist1 = 0
                        dist2 = 0
                        dist3 = 0
                        for point in c:
                            if self.distance_type == "Cosine":
                                dist1 += spatial.distance.cosine(
                                    centroid_uv, point)**2
                            elif self.distance_type == "Euclidean":
                                dist1 += spatial.distance.euclidean(
                                    centroid_uv, point)**2
                        for point in u:
                            if self.distance_type == "Cosine":
                                dist2 += spatial.distance.cosine(
                                    centroid_u, point)**2
                            elif self.distance_type == "Euclidean":
                                dist2 += spatial.distance.euclidean(
                                    centroid_u, point)**2
                        for point in v:
                            if self.distance_type == "Cosine":
                                dist3 += spatial.distance.cosine(
                                    centroid_v, point)**2
                            elif self.distance_type == "Euclidean":
                                dist3 += spatial.distance.euclidean(
                                    centroid_v, point)**2
                        dist = dist1 - dist2 - dist3
                        self.distances.update({kljuc1: dist})
                    elif self.linkage == "Average":
                        u = [(a, b) for a in self.clusters[l[0]].points
                             for b in self.clusters[l[1]].points]
                        dist = self.average_linkage(u)
                        self.distances.update({kljuc1: dist})
                    else:
                        print("Error creating distance matrix...")
                        exit(1)

        self.l.log("Finding clusters...")
        '''
        s = sorted(self.distances.items(), key=lambda x: x[1])
        for k, v in s:
            print(k, v)
        '''
        print("st. omejitev: ", len(constraints))
        # print("clustri: ", self.clusters.keys())

        self.Z = np.array([])

        # n = len(self.points)  #na začetku je vsak primer svoj cluster
        n = len(self.clusters)
        idZ = 0
        stop_clustering = False
        while (n != final_n_of_clusters):
            # print("### ",n," ###")
            condition = True
            # clusters_checked = []
            while condition:

                key = min(self.distances, key=self.distances.get)
                kljuc = key
                par = key.split(' ')
                par = [int(i) for i in par]
                dist = self.distances[kljuc]
                # print("   ->",key, " ", self.check_cannot_link(constraints, self.clusters[par[0]].points, self.clusters[par[1]].points))
                if self.check_cannot_link(constraints,
                                          self.clusters[par[0]].points,
                                          self.clusters[par[1]].points):
                    self.distances[kljuc] = sys.maxsize
                    if dist == sys.maxsize:
                        self.l.log(
                            "ABHC cannot find clusters under those constraints..."
                        )
                        return self.clusters
                    print("   Cannot link:", par)
                else:
                    break
                # print("--------------------")
                # print(par[0], par[1])
            # print(self.distances.keys())
            self.distances.pop(kljuc, None)
            self.izbrisi_razdalje(par[0])
            self.izbrisi_razdalje(par[1])
            # print(self.distances.keys())
            tocke = []
            tocke.append(self.clusters[par[0]].points)
            tocke.append(self.clusters[par[1]].points)
            # print("tocke: ", len(tocke))
            novCluster = Cluster(m + idZ)
            novCluster.update(par[0], par[1], dist, tocke)
            novCluster.centroid = novCluster.calculateCentroid()
            self.clusters.pop(par[0])
            self.clusters.pop(par[1])
            self.clusters.update({(m + idZ): novCluster})
            # print("clustri:")
            # print(self.clusters.keys())
            # print("dodajam razdalje...")

            # print("NOV:" ,m+idZ)
            self.dodaj_razdalje(m + idZ)

            print("par: ", par, "dist: ", '%.08f' % dist)
            if idZ == 0:
                self.Z = [par[0], par[1], dist, novCluster.n]
            else:
                newrow = [par[0], par[1], dist, novCluster.n]
                self.Z = np.vstack([self.Z, newrow])

            n = len(self.clusters)
            idZ += 1
        return self.clusters
コード例 #30
0
sys.path.append(src_path)

from config import Config
from preprocessor import Preprocessor
from points import Points
from cluster import Cluster
from transfer_network import TransferNetwork
from transfer_probability import TransferProbability
from most_popular_route import MostPopularRoute
from figure import Figure

# get points from trajectories
preprocessor = Preprocessor(
    Config.DATASET_ROOT_DIR,
    Config.DATASET_SCALE)
points = preprocessor.get_points()

# use coherence expanded algorithm to form clusters
clusters = Cluster(points).coherence_expanding()
network = TransferNetwork(points, clusters)

# derive transfer probability
tp = TransferProbability(network)
tp.derive()

# search the most popular route
mpr = MostPopularRoute(network)
route = mpr.search(0, 6)
print(route)
figure = Figure()
figure.most_popular_route(points, network, route).show()