def pseudo_labeling(base_network, cluster_data_loader, class_num): with torch.no_grad(): CLS = clustering.Clustering(base_network, iter(cluster_data_loader["source"]), class_num) CLS.skmeans_clustering(iter(cluster_data_loader["target"])) pseudo_labeled_targets = CLS.clustered_targets max_dist = torch.mean(pseudo_labeled_targets["Dis2C"]) confidence_mask = (pseudo_labeled_targets["Dis2C"] < 0.7 * max_dist) pseudo_labeled_targets["Dis2C"] = torch.masked_select( pseudo_labeled_targets["Dis2C"], confidence_mask) pseudo_labeled_targets["ps_label"] = torch.masked_select( pseudo_labeled_targets["ps_label"], confidence_mask) pseudo_labeled_targets["path"] = [ pseudo_labeled_targets["path"][k] for k in range(confidence_mask.size(0)) if confidence_mask[k] ] logits = base_network.fc(CLS.target_ctr) target_ctr_softmax = torch.nn.Softmax(dim=1)(logits) pseudo_labeled_target_ctr = torch.argmax(target_ctr_softmax, dim=0) uniq = torch.unique(pseudo_labeled_target_ctr) pseudo_labeled_targets["ps_label"] = pseudo_labeled_target_ctr[ pseudo_labeled_targets["ps_label"]] pseudo_labeled_targets["label_list"] = pseudo_labeled_targets[ "ps_label"].tolist() print("num of pls :::::", uniq.shape ) # torch.unique(pseudo_labeled_targets["ps_label"]).shape) del pseudo_labeled_targets["Dis2C"] del pseudo_labeled_targets["ps_label"] # del logits,target_ctr_softmax,pseudo_labeled_target_ctr,uniq,confidence_mask,max_dist return pseudo_labeled_targets, CLS.target_ctr, CLS.source_ctr
def run(self): self.Clustering = clustering.Clustering(self.image, n_clusters=self.n_cluster) print('<|', end='', flush=True) self.image, self.label = self.Clustering.run() print('|', end='', flush=True) self.Smooth = smoothing.Smooth(self.image.copy(), self.label, n=self.n_s, m=self.m_s) self.image = self.Smooth.run() print('|', end='', flush=True) self.Median = find_median.Median(self.image, self.label) self.median = self.Median.run() print('|', end='', flush=True) self.FindMole = find_mole.FindMole(self.image, self.median, self.label) self.mole = self.FindMole.run() print('|', end='', flush=True) self.Perimeter = perimeter.Perimeter(self.image, self.label, self.mole) self.p = self.Perimeter.run() print('|', end='', flush=True) self.Filter = filter_perimeter.FilterPerimeter(self.p, self.image) self.p = self.Filter.run() print('|', end='', flush=True) self.perimeter = len(self.p) self.s = len(self.mole) self.Compute = compute_ratio.Computer(self.perimeter, self.s) print('|', end='', flush=True) self.ratio = self.Compute.run() print('|>') return self
def main(args): if args["--true"]: data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') sentences = [row["text"] for row in data if row["type"] == "trusted"] data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') index = { i: row["id"] for i, row in enumerate(data) if row["type"] == "trusted" } elif args["--false"]: data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') sentences = [row["text"] for row in data if row["type"] == "fakeNews"] data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') index = { i: row["id"] for i, row in enumerate(data) if row["type"] == "fakeNews" } else: data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') sentences = [ "\n".join([row["text"], row["title"], row["uri"]]) for row in data ] data = csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"') index = {i: row["id"] for i, row in enumerate(data)} analyzer = clustering.Clustering(stopwords=True, tfidf=True, stemming=True, nbclusters=2, algo="spectral", dist="manhattan") dtm, vocab = analyzer.preprocess(sentences) dm = analyzer.compute_distances(dtm) y_pred, nb = analyzer.cluster(dm) visu = clustervisualizer.ClusterVisualizer(nb) visu.make_plot(dm, sentences, y_pred, index, output=args["<out>"]) data = list( csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"')) results = {} for docid, val in enumerate(y_pred): results[str(val)] = results.get(str(val), []) + [data[docid]] with open(args["<out>"] + ".json", "w") as f: json.dump(results, f, indent=2)
def load_model(self, model, state): sys.stdout.write(str(id(self))) if model == "word2vec": if state == "new": w2v_model = w2v_gensim.W2V() self.models["word2vec"] = w2v_model elif state == "old": w2v_model = w2v_gensim.W2V() w2v_model.load_old() self.models["word2vec"] = w2v_model if model == "clustering": cluster_model = clustering.Clustering() self.models["clustering"] = cluster_model sys.stdout.write(str(self.models))
data = readData.loadResidentialData() n_customer = data.shape[1] # load sum, 2 years of data sumLoad = np.zeros((365 * 2 * T,)) # sum up the load data for i in range(n_customer): customer_load = readData.getUserData(data, i) sumLoad += np.nan_to_num(customer_load) minLoad = np.min(sumLoad) maxLoad = np.max(sumLoad) sumLoad = (sumLoad - minLoad) / (maxLoad - minLoad) # call clustering function N_cluster = 3 (X_train0, y_train0, X_train1, y_train1, X_train2, y_train2, X_test0, X_test1, X_test2, y_test0, y_test1, y_test2) = clustering.Clustering(T, N_cluster, n_train, n_lag, sumLoad) # neural network forecast print("start NN forecast on group 0") (MAPE0, RMSPE0, days0) = CNN_forecast(n_lag, T, X_train0, y_train0, X_test0, y_test0, maxLoad, minLoad) print('forecast result group 0 : MAPE: %.2f, RMSPE: %.2f' % (MAPE0, RMSPE0)) print("start NN forecast on group 1") (MAPE1, RMSPE1, days1) = CNN_forecast(n_lag, T, X_train1, y_train1, X_test1, y_test1, maxLoad, minLoad) print('forecast result group 1 : MAPE: %.2f, RMSPE: %.2f' % (MAPE1, RMSPE1)) print("start NN forecast on group 2") (MAPE2, RMSPE2, days2) = CNN_forecast(n_lag, T, X_train2, y_train2, X_test2, y_test2, maxLoad, minLoad) print('forecast result group 2 : MAPE: %.2f, RMSPE: %.2f' % (MAPE2, RMSPE2))
# load sum, 2 years of data sumLoad = np.zeros((365 * 2 * T, )) # sum up the load data for i in range(n_customer): customer_load = readData.getUserData(data, i) sumLoad += np.nan_to_num(customer_load) minLoad = np.min(sumLoad) maxLoad = np.max(sumLoad) sumLoad = (sumLoad - minLoad) / (maxLoad - minLoad) # call clustering function N_cluster = 3 (X_train0, y_train0, X_train1, y_train1, X_train2, y_train2, X_test0, X_test1, X_test2, y_test0, y_test1, y_test2) = clustering.Clustering(T, N_cluster, n_train, n_lag, sumLoad) # neural network forecast print("start NN forecast on group 0") (MAPE0, RMSPE0, days0) = SVR_forecast(n_lag, T, X_train0, y_train0, X_test0, y_test0, maxLoad, minLoad) print('forecast result group 0 : MAPE: %.2f, RMSPE: %.2f' % (MAPE0, RMSPE0)) print("start NN forecast on group 1") (MAPE1, RMSPE1, days1) = SVR_forecast(n_lag, T, X_train1, y_train1, X_test1, y_test1, maxLoad, minLoad) print('forecast result group 1 : MAPE: %.2f, RMSPE: %.2f' % (MAPE1, RMSPE1)) print("start NN forecast on group 2")
t = md.load(trajectory, top=topology) sel = t.topology.select(UserInput.sel) t = t.atom_slice(sel) # Format trajectory temp = t.xyz frames = t.xyz.shape[0] atoms = t.xyz.shape[1] original_data = temp.reshape((frames, atoms * 3)) original_data = original_data.astype('float64') temp = [] t = [] #Figure out what P is np.seterr(all='raise') cl = clustering.Clustering() if frames > 10000: sample_size = 10000 else: sample_size = None original_data = cl.my_math.standardize( original_data) #Not clear if I should do this # Trying to find the optimal p #data = copy.copy(original_data) #data = cl.my_math.standardize(data) #Not clear if I should do this #p_to_try = np.arange(1.1,5.1,0.1) #Amorim's suggestion #silhouette_scores = np.zeros(p_to_try.size) #for q in range(0, p_to_try.size): # print('Testing Minkowski Weight ' + str(p_to_try[q]) + ' with max of 5.0')
def main(): try: nbArgv = len(sys.argv) for arg in sys.argv: # Entrainement if arg == '-e': if nbArgv < 8: raise Exception('Message d\'erreur: nombre d\'arguments incorrect.') taille = None encodage = None chemins = None for i in range(nbArgv): if sys.argv[i] == '-t': taille = sys.argv[i + 1] elif sys.argv[i] == '--enc': encodage = sys.argv[i + 1] elif sys.argv[i] == '--chemin': chemins = [] compteur = 1 while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-": chemins.append(sys.argv[i + compteur]) compteur += 1 if taille is None or encodage is None or chemins is None: raise Exception('Message d\'erreur: il manque des arguments pour effectuer l\'enregistrement.') entrainement.Entrainement(int(taille), encodage, chemins) return 0 # Recherche elif arg == '-r': if nbArgv != 4: raise Exception('Message d\'erreur: nombre d\'arguments incorrect.') taille = None for i in range(nbArgv): if sys.argv[i] == '-t': taille = sys.argv[i + 1] if taille is None: raise Exception('Message d\'erreur: il manque des arguments pour effectuer la recherche.') recherche.Recherche(int(taille)) return 0 # Clustering elif arg == '-c': if nbArgv < 8: raise Exception('Message d\'erreur: nombre d\'arguments incorrect.') taille = None nbResultats = None nbCentroides = None mots = None chemin = None for i in range(len(sys.argv)): if sys.argv[i] == '-t': taille = int(sys.argv[i + 1]) elif sys.argv[i] == '-n': nbResultats = int(sys.argv[i + 1]) elif sys.argv[i] == '--nc': nbCentroides = int(sys.argv[i + 1]) elif sys.argv[i] == '--mots': mots = [] compteur = 1 while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-": mots.append(sys.argv[i + compteur]) compteur += 1 mots[0] = mots[0][1:] mots[-1] = mots[-1][:-1] elif sys.argv[i] == '>': chemin = sys.argv[i + 1] if taille is None or nbResultats is None or (nbCentroides is None and mots is None): raise Exception('Message d\'erreur: il manque des arguments pour effectuer l\'enregistrement.') elif nbCentroides is not None and mots is not None: raise Exception('Message d\'erreur: seul un type de centroide peut être testé à la fois.') clustering.Clustering(taille, nbResultats, nbCentroides, mots, chemin) return 0 # KNN # -knn = Indique que l'on veut appliquer le KNN. # -t = Taille de la fenêtre. # -k = Nombre de mots au tour pris en compte. # --mots = Mots sur les quels on désire un résultat. # Example: # -knn -t5 -k 5 --mots 'Banane Maison Manger' elif arg == '-knn': if nbArgv < 10: raise Exception('Message d\'erreur: nombre d\'arguments incorrect.') taille = None encodage = None kMots = None mots = None for i in range(len(sys.argv)): if sys.argv[i] == '-t': taille = int(sys.argv[i + 1]) elif sys.argv[i] == '-k': kMots = int(sys.argv[i + 1]) elif sys.argv[i] == '--enc': encodage = sys.argv[i + 1] elif sys.argv[i] == '--mots': mots = [] compteur = 1 while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-": mots.append(sys.argv[i + compteur]) compteur += 1 mots[0] = mots[0][1:] mots[-1] = mots[-1][:-1] if taille is None or kMots is None or encodage is None or mots is None: raise Exception('Message d\'erreur: il manque des arguments pour effectuer KNN.') knn.KNN(taille, kMots, encodage, mots) return 0 raise Exception('Message d\'erreur: aucun argument pour l\'entrainement ou la recherche.') except Exception as e: print(traceback.format_exc()) return 1
def test4(): y = clustering.Clustering(cv_path) y.load_tfidf() y.top_terms()