예제 #1
0
def pseudo_labeling(base_network, cluster_data_loader, class_num):
    with torch.no_grad():
        CLS = clustering.Clustering(base_network,
                                    iter(cluster_data_loader["source"]),
                                    class_num)
        CLS.skmeans_clustering(iter(cluster_data_loader["target"]))
        pseudo_labeled_targets = CLS.clustered_targets

        max_dist = torch.mean(pseudo_labeled_targets["Dis2C"])
        confidence_mask = (pseudo_labeled_targets["Dis2C"] < 0.7 * max_dist)
        pseudo_labeled_targets["Dis2C"] = torch.masked_select(
            pseudo_labeled_targets["Dis2C"], confidence_mask)
        pseudo_labeled_targets["ps_label"] = torch.masked_select(
            pseudo_labeled_targets["ps_label"], confidence_mask)
        pseudo_labeled_targets["path"] = [
            pseudo_labeled_targets["path"][k]
            for k in range(confidence_mask.size(0)) if confidence_mask[k]
        ]

        logits = base_network.fc(CLS.target_ctr)
        target_ctr_softmax = torch.nn.Softmax(dim=1)(logits)
        pseudo_labeled_target_ctr = torch.argmax(target_ctr_softmax, dim=0)

        uniq = torch.unique(pseudo_labeled_target_ctr)

        pseudo_labeled_targets["ps_label"] = pseudo_labeled_target_ctr[
            pseudo_labeled_targets["ps_label"]]
        pseudo_labeled_targets["label_list"] = pseudo_labeled_targets[
            "ps_label"].tolist()
        print("num of pls :::::", uniq.shape
              )  # torch.unique(pseudo_labeled_targets["ps_label"]).shape)
        del pseudo_labeled_targets["Dis2C"]
        del pseudo_labeled_targets["ps_label"]
        # del logits,target_ctr_softmax,pseudo_labeled_target_ctr,uniq,confidence_mask,max_dist
        return pseudo_labeled_targets, CLS.target_ctr, CLS.source_ctr
예제 #2
0
 def run(self):
     self.Clustering = clustering.Clustering(self.image,
                                             n_clusters=self.n_cluster)
     print('<|', end='', flush=True)
     self.image, self.label = self.Clustering.run()
     print('|', end='', flush=True)
     self.Smooth = smoothing.Smooth(self.image.copy(),
                                    self.label,
                                    n=self.n_s,
                                    m=self.m_s)
     self.image = self.Smooth.run()
     print('|', end='', flush=True)
     self.Median = find_median.Median(self.image, self.label)
     self.median = self.Median.run()
     print('|', end='', flush=True)
     self.FindMole = find_mole.FindMole(self.image, self.median, self.label)
     self.mole = self.FindMole.run()
     print('|', end='', flush=True)
     self.Perimeter = perimeter.Perimeter(self.image, self.label, self.mole)
     self.p = self.Perimeter.run()
     print('|', end='', flush=True)
     self.Filter = filter_perimeter.FilterPerimeter(self.p, self.image)
     self.p = self.Filter.run()
     print('|', end='', flush=True)
     self.perimeter = len(self.p)
     self.s = len(self.mole)
     self.Compute = compute_ratio.Computer(self.perimeter, self.s)
     print('|', end='', flush=True)
     self.ratio = self.Compute.run()
     print('|>')
     return self
예제 #3
0
def main(args):
    if args["--true"]:
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        sentences = [row["text"] for row in data if row["type"] == "trusted"]
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        index = {
            i: row["id"]
            for i, row in enumerate(data) if row["type"] == "trusted"
        }
    elif args["--false"]:
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        sentences = [row["text"] for row in data if row["type"] == "fakeNews"]
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        index = {
            i: row["id"]
            for i, row in enumerate(data) if row["type"] == "fakeNews"
        }

    else:
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        sentences = [
            "\n".join([row["text"], row["title"], row["uri"]]) for row in data
        ]
        data = csv.DictReader(open(args["<infile>"]),
                              delimiter="\t",
                              quotechar='"')
        index = {i: row["id"] for i, row in enumerate(data)}

    analyzer = clustering.Clustering(stopwords=True,
                                     tfidf=True,
                                     stemming=True,
                                     nbclusters=2,
                                     algo="spectral",
                                     dist="manhattan")
    dtm, vocab = analyzer.preprocess(sentences)
    dm = analyzer.compute_distances(dtm)
    y_pred, nb = analyzer.cluster(dm)
    visu = clustervisualizer.ClusterVisualizer(nb)
    visu.make_plot(dm, sentences, y_pred, index, output=args["<out>"])
    data = list(
        csv.DictReader(open(args["<infile>"]), delimiter="\t", quotechar='"'))
    results = {}
    for docid, val in enumerate(y_pred):
        results[str(val)] = results.get(str(val), []) + [data[docid]]
    with open(args["<out>"] + ".json", "w") as f:
        json.dump(results, f, indent=2)
예제 #4
0
 def load_model(self, model, state):
     sys.stdout.write(str(id(self)))
     if model == "word2vec":
         if state == "new":
             w2v_model = w2v_gensim.W2V()
             self.models["word2vec"] = w2v_model
         elif state == "old":
             w2v_model = w2v_gensim.W2V()
             w2v_model.load_old()
             self.models["word2vec"] = w2v_model
     if model == "clustering":
         cluster_model = clustering.Clustering()
         self.models["clustering"] = cluster_model
     sys.stdout.write(str(self.models))
예제 #5
0
 data = readData.loadResidentialData()
 n_customer = data.shape[1]
 # load sum, 2 years of data
 sumLoad = np.zeros((365 * 2 * T,))
 # sum up the load data
 for i in range(n_customer):
     customer_load = readData.getUserData(data, i)
     sumLoad += np.nan_to_num(customer_load)
 
 minLoad = np.min(sumLoad)
 maxLoad = np.max(sumLoad)
 sumLoad = (sumLoad - minLoad) / (maxLoad - minLoad)
        
 # call clustering function
 N_cluster = 3
 (X_train0, y_train0, X_train1, y_train1, X_train2, y_train2, X_test0, X_test1, X_test2, y_test0, y_test1, y_test2) = clustering.Clustering(T, N_cluster, n_train, n_lag, sumLoad)
 
 
 # neural network forecast
 print("start NN forecast on group 0")
 (MAPE0, RMSPE0, days0) = CNN_forecast(n_lag, T, X_train0, y_train0, X_test0, y_test0, maxLoad, minLoad)
 print('forecast result group 0 : MAPE: %.2f, RMSPE: %.2f' % (MAPE0, RMSPE0))
 
 print("start NN forecast on group 1")
 (MAPE1, RMSPE1, days1) = CNN_forecast(n_lag, T, X_train1, y_train1, X_test1, y_test1, maxLoad, minLoad)
 print('forecast result group 1 : MAPE: %.2f, RMSPE: %.2f' % (MAPE1, RMSPE1))
 
 print("start NN forecast on group 2")
 (MAPE2, RMSPE2, days2) = CNN_forecast(n_lag, T, X_train2, y_train2, X_test2, y_test2, maxLoad, minLoad)
 print('forecast result group 2 : MAPE: %.2f, RMSPE: %.2f' % (MAPE2, RMSPE2))
 
예제 #6
0
    # load sum, 2 years of data
    sumLoad = np.zeros((365 * 2 * T, ))
    # sum up the load data
    for i in range(n_customer):
        customer_load = readData.getUserData(data, i)
        sumLoad += np.nan_to_num(customer_load)

    minLoad = np.min(sumLoad)
    maxLoad = np.max(sumLoad)
    sumLoad = (sumLoad - minLoad) / (maxLoad - minLoad)

    # call clustering function
    N_cluster = 3
    (X_train0, y_train0, X_train1, y_train1, X_train2, y_train2, X_test0,
     X_test1, X_test2, y_test0, y_test1,
     y_test2) = clustering.Clustering(T, N_cluster, n_train, n_lag, sumLoad)

    # neural network forecast
    print("start NN forecast on group 0")
    (MAPE0, RMSPE0, days0) = SVR_forecast(n_lag, T, X_train0, y_train0,
                                          X_test0, y_test0, maxLoad, minLoad)
    print('forecast result group 0 : MAPE: %.2f, RMSPE: %.2f' %
          (MAPE0, RMSPE0))

    print("start NN forecast on group 1")
    (MAPE1, RMSPE1, days1) = SVR_forecast(n_lag, T, X_train1, y_train1,
                                          X_test1, y_test1, maxLoad, minLoad)
    print('forecast result group 1 : MAPE: %.2f, RMSPE: %.2f' %
          (MAPE1, RMSPE1))

    print("start NN forecast on group 2")
예제 #7
0
t = md.load(trajectory, top=topology)
sel = t.topology.select(UserInput.sel)
t = t.atom_slice(sel)

# Format trajectory
temp = t.xyz
frames = t.xyz.shape[0]
atoms = t.xyz.shape[1]
original_data = temp.reshape((frames, atoms * 3))
original_data = original_data.astype('float64')
temp = []

t = []
#Figure out what P is
np.seterr(all='raise')
cl = clustering.Clustering()
if frames > 10000:
    sample_size = 10000
else:
    sample_size = None

original_data = cl.my_math.standardize(
    original_data)  #Not clear if I should do this

# Trying to find the optimal p
#data = copy.copy(original_data)
#data = cl.my_math.standardize(data) #Not clear if I should do this
#p_to_try = np.arange(1.1,5.1,0.1) #Amorim's suggestion
#silhouette_scores = np.zeros(p_to_try.size)
#for q in range(0, p_to_try.size):
#    print('Testing Minkowski Weight ' + str(p_to_try[q]) + ' with max of 5.0')
예제 #8
0
파일: main.py 프로젝트: dan129/Portfolio
def main():
    try:
        nbArgv = len(sys.argv)
        
        for arg in sys.argv:
            # Entrainement
            if arg == '-e':
                if nbArgv < 8:
                    raise Exception('Message d\'erreur: nombre d\'arguments incorrect.')

                taille = None
                encodage = None
                chemins = None

                for i in range(nbArgv):
                    if sys.argv[i] == '-t':
                        taille = sys.argv[i + 1]
                    elif sys.argv[i] == '--enc':
                        encodage = sys.argv[i + 1]
                    elif sys.argv[i] == '--chemin':
                        chemins = []
                        compteur = 1
                        
                        while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-":
                            chemins.append(sys.argv[i + compteur])
                            compteur += 1
                            
                if taille is None or encodage is None or chemins is None:
                    raise Exception('Message d\'erreur: il manque des arguments pour effectuer l\'enregistrement.')

                entrainement.Entrainement(int(taille), encodage, chemins)

                return 0

            # Recherche
            elif arg == '-r':
                if nbArgv != 4:
                    raise Exception('Message d\'erreur: nombre d\'arguments incorrect.')

                taille = None

                for i in range(nbArgv):
                    if sys.argv[i] == '-t':
                        taille = sys.argv[i + 1]

                if taille is None:
                    raise Exception('Message d\'erreur: il manque des arguments pour effectuer la recherche.')

                recherche.Recherche(int(taille))

                return 0
            
            # Clustering
            elif arg == '-c':
                if nbArgv < 8:
                    raise Exception('Message d\'erreur: nombre d\'arguments incorrect.')

                taille = None
                nbResultats = None
                nbCentroides = None
                mots = None
                chemin = None

                for i in range(len(sys.argv)):
                    if sys.argv[i] == '-t':
                        taille = int(sys.argv[i + 1])
                    elif sys.argv[i] == '-n':
                        nbResultats = int(sys.argv[i + 1])
                    elif sys.argv[i] == '--nc':
                        nbCentroides = int(sys.argv[i + 1])
                    elif sys.argv[i] == '--mots':
                        mots = []
                        compteur = 1
                        
                        while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-":
                            mots.append(sys.argv[i + compteur])
                            compteur += 1
                            
                        mots[0] = mots[0][1:]
                        mots[-1] = mots[-1][:-1]
                    elif sys.argv[i] == '>':
                        chemin = sys.argv[i + 1]

                if taille is None or nbResultats is None or (nbCentroides is None and  mots is None):
                    raise Exception('Message d\'erreur: il manque des arguments pour effectuer l\'enregistrement.')
                elif nbCentroides is not None and mots is not None:
                    raise Exception('Message d\'erreur: seul un type de centroide peut être testé à la fois.')

                clustering.Clustering(taille, nbResultats, nbCentroides, mots, chemin)

                return 0
            
            # KNN
            # -knn = Indique que l'on veut appliquer le KNN.
            # -t = Taille de la fenêtre.
            # -k = Nombre de mots au tour pris en compte.
            # --mots = Mots sur les quels on désire un résultat.
            # Example:
            # -knn -t5 -k 5 --mots 'Banane Maison Manger'
            elif arg == '-knn':
                if nbArgv < 10:
                    raise Exception('Message d\'erreur: nombre d\'arguments incorrect.')

                taille = None
                encodage = None
                kMots = None
                mots = None

                for i in range(len(sys.argv)):
                    if sys.argv[i] == '-t':
                        taille = int(sys.argv[i + 1])
                    elif sys.argv[i] == '-k':
                        kMots = int(sys.argv[i + 1])
                    elif sys.argv[i] == '--enc':
                        encodage = sys.argv[i + 1]
                    elif sys.argv[i] == '--mots':
                        mots = []
                        compteur = 1
                        
                        while i + compteur < nbArgv and sys.argv[i + compteur][0] != "-":
                            mots.append(sys.argv[i + compteur])
                            compteur += 1
                            
                        mots[0] = mots[0][1:]
                        mots[-1] = mots[-1][:-1]

                if taille is None or kMots is None or encodage is None or mots is None:
                    raise Exception('Message d\'erreur: il manque des arguments pour effectuer KNN.')

                knn.KNN(taille, kMots, encodage, mots)

                return 0

        raise Exception('Message d\'erreur: aucun argument pour l\'entrainement ou la recherche.')
    
    except Exception as e:
        print(traceback.format_exc())
        return 1
예제 #9
0
 def test4():
     y = clustering.Clustering(cv_path)
     y.load_tfidf()
     y.top_terms()