def cluster(dataArray, kval): numdp = len(dataArray) centroidArray = [] dim = dataArray[0].getDimension() for ind in range(0, kval): ctd = Centroid(dim, ind) centroidArray.append(ctd) done = False while (done == False): done = True for cluindex in range(0, kval): centroidArray[cluindex].reset() # calculate the centroid of each cluster for dpindex in range(0, numdp): clu = dataArray[dpindex].getCluster() centroidArray[clu].addPoint(dataArray[dpindex]) for cluindex in range(0, kval): centroidArray[cluindex].findCenter() # find the closet centroid for dpindex in range(0, numdp): mindist = sys.maxsize # minimum distance so far minclu = -1 for cluindex in range(0, kval): dist = distance(dataArray[dpindex], centroidArray[cluindex]) if (mindist > dist): mindist = dist minclu = cluindex curclu = dataArray[dpindex].getCluster() if (curclu != minclu): # this data point is assigned to a different cluster done = False dataArray[dpindex].changeCluster(minclu) return centroidArray
def readCentroids(): centroids = [] data = [line.rstrip('\n') for line in open('./centroid')] for line in data: centroid = Centroid() centroid.setData(line) centroids.append(centroid) return centroids
def _strongNegatives(self, unlabeledSet, positiveSet): posX = self.helperObj.dictOfFeaturesToList( self.dict_X, set(positiveSet) - self.crossValExcludeSet) unlabX = self.helperObj.dictOfFeaturesToList( self.dict_X, set(unlabeledSet) - self.crossValExcludeSet) centroidPosX = Centroid().getCentroid(posX) indices = Centroid().getNFarthestPoints(unlabX, centroidPosX, len(posX)) strongNegatives = set() for i in indices: strongNegatives.add(unlabeledSet[i]) return strongNegatives
def gener_spriral_sqr_matrix(matrix): """ Генератор линейного списка сформированного спирально из квадратной матрицы. :param matrix: двумерный список, длина каждого члена которого == длине списка. :return: """ def get_vector_from_matrix(x, y): return matrix[x][y] col_size = len(matrix) if not all(len(row) == col_size for row in matrix): raise TypeError("Must be square") elif col_size < 3: raise TypeError("Min size = 3x3") median = int(col_size / 2) centroid = Centroid(median, median, col_size, col_size) yield get_vector_from_matrix(*centroid.vector) start_centroid = centroid for level in range(1, median+1): # left one step always start_centroid.left() yield get_vector_from_matrix(*centroid.vector) # down down_steps = 1+2*(level-1) for _ in range(down_steps): start_centroid.down() yield get_vector_from_matrix(*centroid.vector) # right right_steps = 2*level for _ in range(right_steps): start_centroid.right() yield get_vector_from_matrix(*centroid.vector) # up up_steps = 2*level for _ in range(up_steps): start_centroid.up() yield get_vector_from_matrix(*centroid.vector) # left left_steps = 2*level for _ in range(left_steps): start_centroid.left() yield get_vector_from_matrix(*centroid.vector)
def fit(self, data): # Create a point object for each point and save to points list self.points = [Point(x) for x in data] # Get three random indices to make as the k initial centroids random_indices = np.random.randint(0, len(data), self.k) # Create these centroids for index in random_indices: centroid = Centroid(data[index]) self.centroids.append(centroid) # Calculate which centroid each point belongs to for point in self.points: distances = [] for centroid in self.centroids: dst = distance.euclidean(point.coordinates, centroid.coordinates) distances.append(dst) centroid_index = distances.index(min(distances)) point.assigned_centroid = self.centroids[centroid_index] # The following loop until an iteration where no point is reassigned to a different centroid is reached point_reassigned = True while point_reassigned == True: # Start loop with no points reassigned point_reassigned = False # Calculate new centroid by taking mean of points in that cluster for centroid in self.centroids: points = [] for point in self.points: if point.assigned_centroid is centroid: points.append(point.coordinates) # Updating centroid coordinates centroid.coordinates = np.mean(points, axis=0) # Re-assign data points to the closest cluster center for point in self.points: distances = [] for centroid in self.centroids: dst = distance.euclidean(point.coordinates, centroid.coordinates) distances.append(dst) centroid_index = distances.index(min(distances)) if point.assigned_centroid is not self.centroids[ centroid_index]: point.assigned_centroid = self.centroids[centroid_index] point_reassigned = True
def generateCentroids(self): if self.dimensions < self.centre_num: centres = lhs(self.dimensions, samples=self.centre_num, criterion=self.criterion) else: centres = lhs(self.dimensions, criterion=self.criterion) centres = centres[:self.centre_num] for i, centre in enumerate(centres): centres[i] = map(lambda x: x * self.hyper_cube_length, centre) std = sqrt(self.variance) weight = 1 self.weights = [1 for i in xrange(self.centre_num)] self.centroids = [ Centroid(centre, i, std, weight) for i, centre in enumerate(centres) ]
from sklearn.model_selection import cross_val_score, GridSearchCV from sklearn.naive_bayes import GaussianNB from sklearn import datasets datasets = { 'iris': datasets.load_iris(), 'digits': datasets.load_digits(), 'wine': datasets.load_wine(), 'breast-cancer': datasets.load_breast_cancer() } classifiers = { 'zeror': ZeroR(), 'oner': OneR(), 'oner-prob': OneRProb(), 'centroid': Centroid(), 'centroid-oner': CentroidOneR(), 'gaussian': GaussianNB() } std_frame = pd.DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'scores']) dataset_frames = { 'iris': std_frame.copy(), 'digits': std_frame.copy(), 'wine': std_frame.copy(), 'breast-cancer': std_frame.copy() } for dataset_name, dataset in datasets.items():
from matplotlib.pyplot import show, title, xlabel, ylabel # ------- Datasets ------- # ds_iris = datasets.load_iris() ds_digits = datasets.load_digits() ds_wine = datasets.load_wine() ds_breast_cancer = datasets.load_breast_cancer() # ------- Classifiers ------- # classifiers = dict() classifiers['ZeroR'] = ZeroR() classifiers['OneR'] = OneR() classifiers['ProbOneR'] = ProbabilisticOneR() classifiers['Centroid'] = Centroid() classifiers['CentOneR'] = CentroidOneR() classifiers['GaussianNB'] = GaussianNB() # ------- DataFrames with Media / DP / Score ------- # df_iris = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score']) df_digits = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score']) df_wine = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score']) df_breast_cancer = DataFrame(index=classifiers.keys(), columns=['media', 'dp', 'score']) # ------- Score from all classifiers for all datasets ------- # for classifier_index, classifier in classifiers.items():
from zeroR import ZeroR from centroid import Centroid from oneR_centroid import CentroidOneR from probabilistic_oneR import ProbabilisticOneR from sklearn.naive_bayes import GaussianNB # ------- Declarando os dicionarios dos algoritmos ------- # """ Caso queira adicionar mais algum algoritmo para teste basta adicionar no classifiers_dict """ # Dicionario dos classificadores classifiers_dict = dict() classifiers_dict['oneR'] = OneR() classifiers_dict['zeroR'] = ZeroR() classifiers_dict['centroid'] = Centroid() classifiers_dict['prob_oneR'] = ProbabilisticOneR() classifiers_dict['oneR_centroid'] = CentroidOneR() classifiers_dict['gaussianNB'] = GaussianNB() # Dicionario com os datasets datasets_dict = dict() datasets_dict['iris'] = datasets.load_iris() datasets_dict['digits'] = datasets.load_digits() datasets_dict['wine'] = datasets.load_wine() datasets_dict['breast_cancer'] = datasets.load_breast_cancer() # Dicionario que guarda os resultados em cada dataset results_dict = dict() results_dict['iris'] = pd.DataFrame(index=classifiers_dict.keys(), columns=['scores', 'mean', 'std'])
def __init__(self, args, num_classes): super(MultiStageModel, self).__init__() # ====== collect arguments ====== # # this function only num_stages = args.num_stages num_layers = args.num_layers num_f_maps = args.num_f_maps dim_in = args.features_dim method_centroid = args.method_centroid # cross-function self.use_target = args.use_target self.multi_adv = args.multi_adv self.DA_adv_video = args.DA_adv_video self.ps_lb = args.ps_lb self.use_attn = args.use_attn self.num_seg = args.num_seg self.pair_ssl = args.pair_ssl self.DA_ens = args.DA_ens self.SS_video = args.SS_video # ====== main architecture ====== # self.stage1 = SingleStageModel(num_layers, num_f_maps, dim_in, num_classes, self.DA_ens) self.stages = nn.ModuleList([ copy.deepcopy( SingleStageModel(num_layers, num_f_maps, num_classes, num_classes, self.DA_ens)) for s in range(num_stages - 1) ]) # domain discriminators self.ad_net_base = nn.ModuleList() self.ad_net_base += [ AdvDomainClsBase(num_f_maps, num_f_maps, 'frame', args) ] self.ad_net_cls = nn.ModuleList() self.ad_net_cls += [nn.Linear(num_f_maps, 2)] # domain discriminators (video-level) if 'rev_grad' in self.DA_adv_video and self.use_target != 'none': num_domain_class = 2 num_concat = 1 if 'rev_grad_ssl' in self.DA_adv_video: num_domain_class = int( math.factorial(self.num_seg * 2) / (math.factorial(self.num_seg)**2)) num_concat = self.num_seg * 2 if self.DA_adv_video == 'rev_grad_ssl_2': if self.pair_ssl == 'all': num_concat = int( math.factorial(self.num_seg * 2) / (2 * math.factorial(self.num_seg * 2 - 2))) elif self.pair_ssl == 'adjacent': num_concat = self.num_seg * 2 - 1 self.ad_net_video_base = nn.ModuleList() self.ad_net_video_base += [ AdvDomainClsBase(num_f_maps * num_concat, num_f_maps, 'video', args) ] self.ad_net_video_cls = nn.ModuleList() self.ad_net_video_cls += [nn.Linear(num_f_maps, num_domain_class)] # video-order classifier if self.SS_video == 'VCOP': num_order_pair = int(self.num_seg * (self.num_seg - 1) / 2) num_order_class = math.factorial(self.num_seg) self.video_order_base = nn.Sequential( nn.Linear(num_f_maps * 2, num_f_maps), nn.ReLU(), nn.Dropout()) self.video_order_cls = nn.Linear(num_f_maps * num_order_pair, num_order_class) # for class-based domain discriminators (frame-level only) if self.multi_adv[1] == 'Y': # separate weights for domain classifiers for i in range(1, num_classes): self.ad_net_cls += [nn.Linear(num_f_maps, 2)] # if separating feature weights, classifier weights must be separate if self.multi_adv[ 0] == 'Y': # separate weights for domain features for i in range(1, num_classes): self.ad_net_base += [ AdvDomainClsBase(num_f_maps, num_f_maps, 'frame', args) ] # store the centroids if method_centroid != 'none': self.centroids = nn.ModuleList() for s in range(num_stages): self.centroids += [Centroid(num_f_maps, num_classes)]