コード例 #1
0
ファイル: reduce.py プロジェクト: patrocloschris/MR-KMeans
def main():
    newCentroids = dict()

    # collect all data from mappers
    for line in sys.stdin:

        keyValue = line.split(',')
        dataInfo = keyValue[1].split('|')

        if keyValue[0] not in newCentroids:
            centroid = Centroid()
            centroid.addMovieId(dataInfo[0])
            centroid.addCurrentData(dataInfo[1])
            newCentroids[keyValue[0]]= centroid
        else:
            newCentroids[keyValue[0]].addMovieId(dataInfo[0])
            newCentroids[keyValue[0]].addCurrentData(dataInfo[1])

    #for new centroids calculate means and store them into file
    text_file = open('./centroids/centroidinfo', "w")
    for key in newCentroids:
        print key
        newCentroids[key].calculateMean()
        result = newCentroids[key].exportCentroid()
        text_file.write(result)
    text_file.close()
コード例 #2
0
ファイル: mapper.py プロジェクト: subhadra-chinnu/K-means
def readCentroids():
    centroids = []
    data = [line.rstrip('\n') for line in open('./centroid')]
    for line in data:
        centroid = Centroid()
        centroid.setData(line)
        centroids.append(centroid)
    return centroids
コード例 #3
0
ファイル: mapper.py プロジェクト: patrocloschris/MR-KMeans
def readCentroids():
    centroids = []
    lines = [line.rstrip('\n') for line in open('./centroids/centroidinfo')]
    for line in lines:
        centroid = Centroid()
        centroid.setData(line)
        centroids.append(centroid)
    return centroids
コード例 #4
0
    def fit(self, data):
        # Create a point object for each point and save to points list
        self.points = [Point(x) for x in data]

        # Get three random indices to make as the k initial centroids
        random_indices = np.random.randint(0, len(data), self.k)

        # Create these centroids
        for index in random_indices:
            centroid = Centroid(data[index])
            self.centroids.append(centroid)

        # Calculate which centroid each point belongs to
        for point in self.points:
            distances = []
            for centroid in self.centroids:
                dst = distance.euclidean(point.coordinates,
                                         centroid.coordinates)
                distances.append(dst)

            centroid_index = distances.index(min(distances))
            point.assigned_centroid = self.centroids[centroid_index]

        # The following loop until an iteration where no point is reassigned to a different centroid is reached
        point_reassigned = True
        while point_reassigned == True:
            # Start loop with no points reassigned
            point_reassigned = False
            # Calculate new centroid by taking mean of points in that cluster
            for centroid in self.centroids:
                points = []
                for point in self.points:
                    if point.assigned_centroid is centroid:
                        points.append(point.coordinates)
                # Updating centroid coordinates
                centroid.coordinates = np.mean(points, axis=0)

            # Re-assign data points to the closest cluster center
            for point in self.points:
                distances = []
                for centroid in self.centroids:
                    dst = distance.euclidean(point.coordinates,
                                             centroid.coordinates)
                    distances.append(dst)

                centroid_index = distances.index(min(distances))
                if point.assigned_centroid is not self.centroids[
                        centroid_index]:
                    point.assigned_centroid = self.centroids[centroid_index]
                    point_reassigned = True
コード例 #5
0
    def generate_centroids(elf, points, centroids):
        cntrds = KMC.generate_centroid_points(points, centroids)
        centroids_by_trial = [
            Centroid.initialise_centroids(cntrd) for cntrd in cntrds
        ]

        return centroids_by_trial
コード例 #6
0
ファイル: kmean.py プロジェクト: noah-curran/SE4ML
def cluster(dataArray, kval):
    numdp = len(dataArray)
    centroidArray = []
    dim = dataArray[0].getDimension()
    for ind in range(0, kval):
        ctd = Centroid(dim, ind)
        centroidArray.append(ctd)
    done = False
    while (done == False):
        done = True
        for cluindex in range(0, kval):
            centroidArray[cluindex].reset()
        # calculate the centroid of each cluster
        for dpindex in range(0, numdp):
            clu = dataArray[dpindex].getCluster()
            centroidArray[clu].addPoint(dataArray[dpindex])
        for cluindex in range(0, kval):
            centroidArray[cluindex].findCenter()
        # find the closet centroid
        for dpindex in range(0, numdp):
            mindist = sys.maxsize # minimum distance so far
            minclu  = -1
            for cluindex in range(0, kval):
                dist = distance(dataArray[dpindex], centroidArray[cluindex])
                if (mindist > dist):
                    mindist = dist
                    minclu = cluindex
            curclu = dataArray[dpindex].getCluster()
            if (curclu != minclu):
                # this data point is assigned to a different cluster
                done = False
                dataArray[dpindex].changeCluster(minclu)
    return centroidArray
コード例 #7
0
    def _strongNegatives(self, unlabeledSet, positiveSet):
        posX = self.helperObj.dictOfFeaturesToList(
            self.dict_X,
            set(positiveSet) - self.crossValExcludeSet)
        unlabX = self.helperObj.dictOfFeaturesToList(
            self.dict_X,
            set(unlabeledSet) - self.crossValExcludeSet)

        centroidPosX = Centroid().getCentroid(posX)
        indices = Centroid().getNFarthestPoints(unlabX, centroidPosX,
                                                len(posX))

        strongNegatives = set()
        for i in indices:
            strongNegatives.add(unlabeledSet[i])

        return strongNegatives
コード例 #8
0
def gener_spriral_sqr_matrix(matrix):
    """
    Генератор линейного списка сформированного спирально из квадратной матрицы.

    :param matrix: двумерный список, длина каждого члена которого == длине списка.
    :return:
    """

    def get_vector_from_matrix(x, y):
        return matrix[x][y]

    col_size = len(matrix)
    if not all(len(row) == col_size for row in matrix):
        raise TypeError("Must be square")
    elif col_size < 3:
        raise TypeError("Min size = 3x3")

    median = int(col_size / 2)
    centroid = Centroid(median, median, col_size, col_size)
    yield get_vector_from_matrix(*centroid.vector)

    start_centroid = centroid

    for level in range(1, median+1):

        # left one step always
        start_centroid.left()
        yield get_vector_from_matrix(*centroid.vector)

        # down
        down_steps = 1+2*(level-1)
        for _ in range(down_steps):
            start_centroid.down()
            yield get_vector_from_matrix(*centroid.vector)

        # right
        right_steps = 2*level
        for _ in range(right_steps):
            start_centroid.right()
            yield get_vector_from_matrix(*centroid.vector)

        # up
        up_steps = 2*level
        for _ in range(up_steps):
            start_centroid.up()
            yield get_vector_from_matrix(*centroid.vector)

        # left
        left_steps = 2*level
        for _ in range(left_steps):
            start_centroid.left()
            yield get_vector_from_matrix(*centroid.vector)
コード例 #9
0
 def generateCentroids(self):
     if self.dimensions < self.centre_num:
         centres = lhs(self.dimensions,
                       samples=self.centre_num,
                       criterion=self.criterion)
     else:
         centres = lhs(self.dimensions, criterion=self.criterion)
         centres = centres[:self.centre_num]
     for i, centre in enumerate(centres):
         centres[i] = map(lambda x: x * self.hyper_cube_length, centre)
     std = sqrt(self.variance)
     weight = 1
     self.weights = [1 for i in xrange(self.centre_num)]
     self.centroids = [
         Centroid(centre, i, std, weight)
         for i, centre in enumerate(centres)
     ]
コード例 #10
0
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn import datasets

datasets = {
    'iris': datasets.load_iris(),
    'digits': datasets.load_digits(),
    'wine': datasets.load_wine(),
    'breast-cancer': datasets.load_breast_cancer()
}

classifiers = {
    'zeror': ZeroR(),
    'oner': OneR(),
    'oner-prob': OneRProb(),
    'centroid': Centroid(),
    'centroid-oner': CentroidOneR(),
    'gaussian': GaussianNB()
}

std_frame = pd.DataFrame(index=classifiers.keys(),
                         columns=['media', 'dp', 'scores'])

dataset_frames = {
    'iris': std_frame.copy(),
    'digits': std_frame.copy(),
    'wine': std_frame.copy(),
    'breast-cancer': std_frame.copy()
}

for dataset_name, dataset in datasets.items():
コード例 #11
0
from matplotlib.pyplot import show, title, xlabel, ylabel

# ------- Datasets ------- #

ds_iris = datasets.load_iris()
ds_digits = datasets.load_digits()
ds_wine = datasets.load_wine()
ds_breast_cancer = datasets.load_breast_cancer()

# ------- Classifiers ------- #

classifiers = dict()
classifiers['ZeroR'] = ZeroR()
classifiers['OneR'] = OneR()
classifiers['ProbOneR'] = ProbabilisticOneR()
classifiers['Centroid'] = Centroid()
classifiers['CentOneR'] = CentroidOneR()
classifiers['GaussianNB'] = GaussianNB()

# ------- DataFrames with Media / DP / Score ------- #

df_iris = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score'])
df_digits = DataFrame(index=classifiers.keys(),
                      columns=['media', 'dp', 'score'])
df_wine = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score'])
df_breast_cancer = DataFrame(index=classifiers.keys(),
                             columns=['media', 'dp', 'score'])

# ------- Score from all classifiers for all datasets ------- #

for classifier_index, classifier in classifiers.items():
コード例 #12
0
ファイル: run_baseline.py プロジェクト: lucastassis/ai
from zeroR import ZeroR
from centroid import Centroid
from oneR_centroid import CentroidOneR
from probabilistic_oneR import ProbabilisticOneR
from sklearn.naive_bayes import GaussianNB

# ------- Declarando os dicionarios dos algoritmos ------- #
"""
Caso queira adicionar mais algum algoritmo para teste basta adicionar no classifiers_dict
"""

# Dicionario dos classificadores
classifiers_dict = dict()
classifiers_dict['oneR'] = OneR()
classifiers_dict['zeroR'] = ZeroR()
classifiers_dict['centroid'] = Centroid()
classifiers_dict['prob_oneR'] = ProbabilisticOneR()
classifiers_dict['oneR_centroid'] = CentroidOneR()
classifiers_dict['gaussianNB'] = GaussianNB()

# Dicionario com os datasets
datasets_dict = dict()
datasets_dict['iris'] = datasets.load_iris()
datasets_dict['digits'] = datasets.load_digits()
datasets_dict['wine'] = datasets.load_wine()
datasets_dict['breast_cancer'] = datasets.load_breast_cancer()

# Dicionario que guarda os resultados em cada dataset
results_dict = dict()
results_dict['iris'] = pd.DataFrame(index=classifiers_dict.keys(),
                                    columns=['scores', 'mean', 'std'])
コード例 #13
0
    def __init__(self, args, num_classes):
        super(MultiStageModel, self).__init__()
        # ====== collect arguments ====== #
        # this function only
        num_stages = args.num_stages
        num_layers = args.num_layers
        num_f_maps = args.num_f_maps
        dim_in = args.features_dim
        method_centroid = args.method_centroid

        # cross-function
        self.use_target = args.use_target
        self.multi_adv = args.multi_adv
        self.DA_adv_video = args.DA_adv_video
        self.ps_lb = args.ps_lb
        self.use_attn = args.use_attn
        self.num_seg = args.num_seg
        self.pair_ssl = args.pair_ssl
        self.DA_ens = args.DA_ens
        self.SS_video = args.SS_video

        # ====== main architecture ====== #
        self.stage1 = SingleStageModel(num_layers, num_f_maps, dim_in,
                                       num_classes, self.DA_ens)
        self.stages = nn.ModuleList([
            copy.deepcopy(
                SingleStageModel(num_layers, num_f_maps, num_classes,
                                 num_classes, self.DA_ens))
            for s in range(num_stages - 1)
        ])

        # domain discriminators
        self.ad_net_base = nn.ModuleList()
        self.ad_net_base += [
            AdvDomainClsBase(num_f_maps, num_f_maps, 'frame', args)
        ]
        self.ad_net_cls = nn.ModuleList()
        self.ad_net_cls += [nn.Linear(num_f_maps, 2)]

        # domain discriminators (video-level)
        if 'rev_grad' in self.DA_adv_video and self.use_target != 'none':
            num_domain_class = 2
            num_concat = 1
            if 'rev_grad_ssl' in self.DA_adv_video:
                num_domain_class = int(
                    math.factorial(self.num_seg * 2) /
                    (math.factorial(self.num_seg)**2))

                num_concat = self.num_seg * 2
                if self.DA_adv_video == 'rev_grad_ssl_2':
                    if self.pair_ssl == 'all':
                        num_concat = int(
                            math.factorial(self.num_seg * 2) /
                            (2 * math.factorial(self.num_seg * 2 - 2)))
                    elif self.pair_ssl == 'adjacent':
                        num_concat = self.num_seg * 2 - 1

            self.ad_net_video_base = nn.ModuleList()
            self.ad_net_video_base += [
                AdvDomainClsBase(num_f_maps * num_concat, num_f_maps, 'video',
                                 args)
            ]
            self.ad_net_video_cls = nn.ModuleList()
            self.ad_net_video_cls += [nn.Linear(num_f_maps, num_domain_class)]

        # video-order classifier
        if self.SS_video == 'VCOP':
            num_order_pair = int(self.num_seg * (self.num_seg - 1) / 2)
            num_order_class = math.factorial(self.num_seg)
            self.video_order_base = nn.Sequential(
                nn.Linear(num_f_maps * 2, num_f_maps), nn.ReLU(), nn.Dropout())
            self.video_order_cls = nn.Linear(num_f_maps * num_order_pair,
                                             num_order_class)

        # for class-based domain discriminators (frame-level only)
        if self.multi_adv[1] == 'Y':  # separate weights for domain classifiers
            for i in range(1, num_classes):
                self.ad_net_cls += [nn.Linear(num_f_maps, 2)]

            # if separating feature weights, classifier weights must be separate
            if self.multi_adv[
                    0] == 'Y':  # separate weights for domain features
                for i in range(1, num_classes):
                    self.ad_net_base += [
                        AdvDomainClsBase(num_f_maps, num_f_maps, 'frame', args)
                    ]

        # store the centroids
        if method_centroid != 'none':
            self.centroids = nn.ModuleList()
            for s in range(num_stages):
                self.centroids += [Centroid(num_f_maps, num_classes)]