Exemple #1
0
	def train(self, mtd = "margin", backupfile = "backupfile"): # TODO implement sample_weight + make method to shuffle and return sublist with data_limit
		backupfile += ".opt-"+str(self.optimization_limit)+"-"+self.optimization_method+".txt"
		for i in range(self.budget):
			if len(self.Ux) <= 1: break
			# self.viz_A = []; self.viz_B = []; self.viz_C = []; self.viz_D = []; self.viz_E = []; self.viz_F = []
			
			ids, scores = self.sortForInformativeness(mtd)
			id = ids[0]
			
			qx = self.Ux[id]
			qy = self.Uy[id]
			
			self.Lx.append(qx)
			self.Ly.append(qy)
			self.Ux.pop(id)
			self.Uy.pop(id)
			
			self.clf.X = self.Lx; self.clf.Y = self.Ly
			self.clf.train()
			
			test_accuracy = self.clf.getTestAccuracy( self.Tx, self.Ty )
			self.accuracys.append( test_accuracy )
			
			print "i=", i+1, "; acc=%.4f"%(test_accuracy*100), "%.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), scores[0]
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self)
				viz = Visualize(); viz.plot( [range(len(self.accuracys)), self.accuracys], fig = backupfile+".png", color = 'r', marker = '-' )
Exemple #2
0
    def threadFn(self):
        while True:
            refImg, refBox, refClasses, pBoxes, pClasses, pScores, pProposals, pProposalScores, pAllBoxes, pAllScores, pAllClasses = self.queue.get(
            )
            refImg = refImg[0]

            a = Visualize.drawBoxes(refImg, refBox, refClasses,
                                    self.dataset.getCaptions(refClasses),
                                    self.palette)
            b = Visualize.drawBoxes(refImg,
                                    pBoxes,
                                    pClasses,
                                    self.dataset.getCaptions(pClasses),
                                    self.palette,
                                    scores=pScores)
            c = Visualize.drawBoxes(refImg,
                                    pProposals,
                                    None,
                                    None,
                                    self.palette,
                                    scores=pProposalScores * 0.3)
            d = Visualize.drawBoxes(refImg,
                                    pAllBoxes,
                                    pAllClasses,
                                    self.dataset.getCaptions(pAllClasses),
                                    self.palette,
                                    scores=pAllScores)

            preview = Visualize.tile(
                2, 2, [a, b, c, d],
                ["input", "output", "proposals", "all detections"])

            cv2.imwrite(self.opt.name + "/preview/preview.jpg", preview)

            self.queue.task_done()
Exemple #3
0
    def plot_colored_signals(self, times, axes, labels, path, figname):
        viz = Visualize()
        signame_labels = [viz.colors[y % len(viz.colors)] for y in labels]

        if len(axes) < len(self.sigReaders):
            return

        for isr, sr in enumerate(self.sigReaders):
            figurename = path + sr.signal_name + "_" + str(time.time()) + figname
            viz.plot([times, axes[isr]], axs_labels=['Time', sr.signal_name], color=signame_labels, fig=figurename)

        figurename = path + "_clustering_projection_AllSignals_" + str(time.time()) + figname
        viz.plot(axes, color=signame_labels, fig=figurename)
Exemple #4
0
    def __init__(self, *params):
        Visualize.__init__(self, params)
        self.host = params[0]
        self.port = params[1]
        self.index = params[2]

        if len(params) > 3:
            self.connectionDict = params[-1]

        else:
            self.connectionDict = {}

        self.es = Elasticsearch([self.host + ":" + str(self.port)],
                                *self.connectionDict)
	def plot(self, fig=None):
		if not self.done(): return
		
		viz = Visualize()
		if len(self.X[0]) > 3:
			X = viz.PCA_Transform( list(zip(*self.X)) )
		else:
			X = self.X
		
		unique_labels = np.unique(self.Y)
		clusters = { ul:[] for ul in unique_labels }
		
		for i in range( len(X) ):
			clusters[ self.Y[i] ].append( X[i] )
		
		centers_for_plot = [] # Not the real centers because dimension was reduced using PCA
		for label in clusters:
			centers_for_plot.append( [np.mean(col) for col in list(zip(* clusters[label] )) ] )
		
		viz.do_plot(list(zip(*centers_for_plot)), marker='o', color='m')
		viz.plot_groups(clusters, fig)
Exemple #6
0
class Explore:
	def __init__(self, data):
		self.data = data
		self.viz = Visualize()

	#---------------------------------------
	def fire(self):
		range_features = range( len(self.data.X_transpose) )
		
		for i in range_features:
			axs = [ self.data.X_transpose[i] ]
			axs_labels = [ self.data.features_name[i] ]
			self.viz.plot(axs, axs_labels = axs_labels, color = self.data.Y, marker = '.', fig = "explore_1D_"+str(i)+".png")
		
		pairs = [ (i,j) for i in range_features for j in range_features ]
		for pair in pairs:
			if pair[0] != pair[1]:
				axs = [self.data.X_transpose[id] for id in pair]
				axs_labels = [self.data.features_name[id] for id in pair]
				self.viz.plot(axs, axs_labels = axs_labels, color = self.data.Y, marker = '.', fig = "explore_2D_"+str(pair)+".png")
			
		triplets = [ (i,j,k) for i in range_features for j in range_features for k in range_features ]
		for triplet in triplets:
			if triplet[0] != triplet[1] and triplet[1] != triplet[2] and triplet[0] != triplet[2]:
				axs = [self.data.X_transpose[id] for id in triplet]
				axs_labels = [self.data.features_name[id] for id in triplet]
				self.viz.plot(axs, axs_labels = axs_labels, color = self.data.Y, marker = '.', fig = "explore_3D_"+str(triplet)+".png")
Exemple #7
0
    def __init__(self, opt, runManager, dataset, net, images, boxes, classes):
        self.opt = opt
        self.queue = queue.Queue()
        self.dataset = dataset
        self.palette = Visualize.Palette(dataset.categoryCount())

        predBoxes, predScores, predClasses = net.getBoxes()
        allPredBoxes, allPredScores, allPredClasses = net.getBoxes(
            scoreThreshold=0)
        proposals, proposalScores = net.getProposals()

        runManager.add("preview", [
            images, boxes, classes, predBoxes, predClasses, predScores,
            proposals, proposalScores, allPredBoxes, allPredScores,
            allPredClasses
        ],
                       modRun=self.opt.displayInterval)
        self.startThread()
def main():    
    stop_words = get_stop_words(STOP_WORDS_PATH)
    data = Initialize_Data();
    visualizer = Visualize();

    data.initialize_twitter_posts(TWITTER_POSTS_CSV, TWITTER_DATA_DIR)
    data.initialize_facebook_posts(FACEBOOK_POSTS_CSV, FACEBOOK_DATA_DIR)

    # Visalize daya
    df = np.array(data.posts);
    lf= np.array(data.labels);

    pos_ind = lf == "positive";
    neg_ind = lf == "negative"

    pos = df[pos_ind]
    neg = df[neg_ind]

    visualizer.plot_data_distibution([pos.shape[0], neg.shape[0]], ["positive", "negative"], "Training set distribution")

    # Cleanup posts
    text_Cleanuper = Posts_Cleansing(data)
    text_Cleanuper.cleanup(Text_Cleanuper())

    # Train and Test Model
    clf = train_test_model(create_ngram_model(frozenset(stop_words)), np.array(data.posts), np.array(data.labels) == "positive")

    # Find best Model params and train
    clf = grid_search_model(create_ngram_model, np.array(data.posts), np.array(data.labels) == "positive", frozenset(stop_words))

    print('Saving model')
    save_model(clf, NAIVE_BAYES_MODEL_PATH);

    print('Loading model')
    trained_model = load_model(NAIVE_BAYES_MODEL_PATH)

    train_test_model(trained_model, np.array(data.posts), np.array(data.labels) == "positive")

    importance = get_most_important_features(trained_model.named_steps['vect'].vocabulary_.items(), trained_model.named_steps['clf'], 10)

    top_scores = [a[0] for a in importance[0]['tops']]
    top_words = [a[1] for a in importance[0]['tops']]
    bottom_scores = [a[0] for a in importance[0]['bottom']]
    bottom_words = [a[1] for a in importance[0]['bottom']]

    visualizer.plot_important_words(top_scores, top_words, bottom_scores, bottom_words, "Most important words for relevance")


    Y_predicted_word2vec = trained_model.predict(["Նա վատ աղջիկ է"])
    print(Y_predicted_word2vec)
Exemple #9
0
    def plot_colored_signals(self, times, axes, labels, path, figname):
        viz = Visualize()
        signame_labels = [viz.colors[y % len(viz.colors)] for y in labels]

        if len(axes) < len(self.sigReaders):
            return

        for isr, sr in enumerate(self.sigReaders):
            figurename = path + sr.signal_name + "_" + str(
                time.time()) + figname
            viz.plot([times, axes[isr]],
                     axs_labels=['Time', sr.signal_name],
                     color=signame_labels,
                     fig=figurename)

        figurename = path + "_clustering_projection_AllSignals_" + str(
            time.time()) + figname
        viz.plot(axes, color=signame_labels, fig=figurename)
    def plot(self, fig=None):
        if not self.done(): return

        viz = Visualize()
        if len(self.X[0]) > 3:
            X = viz.PCA_Transform(list(zip(*self.X)))
        else:
            X = self.X

        unique_labels = np.unique(self.Y)
        clusters = {ul: [] for ul in unique_labels}

        for i in range(len(X)):
            clusters[self.Y[i]].append(X[i])

        centers_for_plot = [
        ]  # Not the real centers because dimension was reduced using PCA
        for label in clusters:
            centers_for_plot.append(
                [np.mean(col) for col in list(zip(*clusters[label]))])

        viz.do_plot(list(zip(*centers_for_plot)), marker='o', color='m')
        viz.plot_groups(clusters, fig)
Exemple #11
0
    'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
    'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
    'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
    'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
    'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
    'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
    'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
    'hair drier', 'toothbrush'
]

# categories=['diningtable', 'person', 'bottle', 'boat', 'train', 'bird', 'dog', 'cat', 'tvmonitor', 'cow', 'car', 'sofa',
# 			'horse', 'chair', 'pottedplant', 'bicycle', 'motorbike', 'aeroplane', 'sheep', 'bus']

palette = Visualize.Palette(len(categories))

image = tf.placeholder(tf.float32, [None, None, None, 3])
net = BoxInceptionResnet(image, len(categories), name="boxnet")

boxes, scores, classes = net.getBoxes(scoreThreshold=opt.threshold)

input = PreviewIO.PreviewInput(opt.i)
output = PreviewIO.PreviewOutput(opt.o, input.getFps())


def preprocessInput(img):
    def calcPad(size):
        m = size % 32
        p = int(m / 2)
        s = size - m
Exemple #12
0
def main_fuction(readmidfile=False):
    log=ProcessLog('ClusterModel.log',1,'cluster model')
    ProcessLog.loggerName='cluster model'
    dataBasePaths=[]

    datatemp=dataBasePaths[1:]
    dataBasePaths.append('D:/NJ-KING-CAO3.db')
    distance_matrix=[]
    dm = Distance_Matrix()

    V=Visualize()
    moduleLabelDict={}
    ctkm=None
    ft = lambda x: x.nodesCount <= 25


    #     mt=MemoryTest()
#     print('test begin!')
#     mt.test()
#     print('sleep begin!')
#     time.sleep(500)
#     print('sleep end!')
#
# def eee():



    pp = Preprocess()
    begin=time.time()
    pp.extractFromFiles_Robert(True, True, treeNumberLimit=5000, nodeLimit=25)
    end=time.time()
    log.getlog().debug("Read file took %f seconds."%(end-begin))
    pp.generateTrees()
    if readmidfile:
        try:
            modulelabelfile=open("ModuleLabel.txt",'r')
            for l in modulelabelfile:
                row=[x for x in l.split(',')]
                if len(row)==2:
                    moduleLabelDict[row[0]]=int(row[1])
            modulelabelfile.close()

            # mc = ModuleCluster()
            # feature_names, feature_matrix = mc.moduleStatistic_feature_extraction(pp.moduleStatistic)
            # for epsNumber in range(100):
            #     eps = float((epsNumber + 1) / 100)
            #     labels = mc.cluster(eps)
            #     file_open = open("ModuleLabel_eps=" + str(eps) + ".txt", 'w')
            #     i = 0
            #     for moduleName in mc.moduleNameList:
            #         file_open.write("%s,%d\n" % (moduleName, labels[i]))
            #         i += 1
            #     file_open.close()
            #     print("eps=%f is done." % eps)
            # V.cluster_result_linechart(eps_list, cluster_num_list, noise_num_list)
            # labels = mc.cluster()
            distancefile=open("Distancenew.txt",'r')
            colnum=0
            rownum=0
            row=[]
            for l in distancefile:
                row = [float(x) for x in l.split()]
                if len(row) > 0:
                    distance_matrix.append(row)
                    rownum+=1
            print("Distance matrix read. %d trees"%rownum)
            distancefile.close()
            if rownum!=len(row):
                raise Exception("Distance file format wrong!")
        except Exception as e:
            print(e)
            #readmidfile=False



    if readmidfile==False:
        mc=ModuleCluster()
        feature_names,feature_matrix=mc.moduleStatistic_feature_extraction(pp.moduleStatistic)

        for epsNumber in range(100):
            eps=float((epsNumber+1)/100)
            labels = mc.cluster(eps)
            file_open = open("ModuleLabel_eps="+str(eps)+".txt", 'w')
            i = 0
            for moduleName in mc.moduleNameList:
                file_open.write("%s,%d\n" % (moduleName, labels[i]))
                i += 1
            file_open.close()
            print("eps=%f is done."%eps)
        labels=mc.cluster()
        moduleLabelDict=mc.moduleLabelDict
        file_open=open("ModuleLabel5000.txt",'w')
        i=0
        for moduleName in mc.moduleNameList:
            file_open.write("%s,%d\n"%(moduleName,labels[i]))
            i+=1
        file_open.close()
        ctkm = Convolution_Tree_Kernel_Mutation(moduleLabelDict)
        begin=time.time()
        distance_matrix=dm.compute([x for j in pp.allTrees for x in filter(ft,j)],ctkm)
        end=time.time()
        log.getlog().debug("Distance computation took %f seconds." % (end - begin))
        file_open=open("Distancenew5000.txt",'w')
        for r,row in enumerate(distance_matrix):
            for c,col in enumerate(row):
                file_open.write("%.6f "%(col))
            file_open.write("\n")
        file_open.close()



    ctkm = Convolution_Tree_Kernel_Mutation(moduleLabelDict)
    print("test begin!")
    eps_list = []
    cluster_num_list = []
    noise_num_list = []
    for epsNumber in range(100):
        eps=float((epsNumber+1)/100)
        tc=TreeCluster(eps=eps,min_samples=5,metric="precomputed",n_jobs=4)
        tc.Train(distance_matrix)
        eps_list.append(eps)
        cluster_num_list.append(tc.clusterNumber)
        noise_num_list.append(tc.noiseNumber)
    #V.cluster_result_linechart(eps_list,cluster_num_list,noise_num_list)
    tc=TreeCluster(eps=0.5,n_jobs=4)
    begin=time.time()
    tc.Train(distance_matrix)
    end=time.time()
    log.getlog().debug("Clustering took %f seconds." % (end - begin))
    V.cluster_result_linechart(range(2000),tc.DB.labels_,tc.DB.labels_)
Exemple #13
0
	def plot_graph(self, data = None, iter = None, directory = "graph_plots\\"): # TODO: this should be generalized and added to Vizualize.py
		viz = Visualize()
		
		if data is not None:	
			viz.do_plot( zip( *data[:iter] ), color = 'y', marker = '.')
			# viz.do_plot( zip( *data[:iter] ), color = self.data.Y[:iter], marker = '.')
		
		viz.do_plot( zip( *self.get_nodes_positions() ), color = 'r', marker = 'o')
		
		for e in self.graph.edges:
			pos_head = e.head.data.pos
			pos_tail = e.tail.data.pos
			
			viz.do_plot( zip(* [pos_head, pos_tail] ) , color = 'r', marker='-')
		
		
		if not os.path.exists(directory): os.makedirs(directory)
		
		filename = str(time.time()) + '.png'
		
		
		if iter is None: viz.end_plot(fig = directory+'_'+filename)
		else: viz.end_plot(fig = directory+filename)
from ArtificialData import ArtificialData
import itertools
import os
import math
import random
import datetime
import time
import warnings
import numpy as np
import matplotlib.pyplot as plt2

# =================================================================
if __name__ == '__main__':
    warnings.simplefilter(action="ignore", category=FutureWarning)
    random.seed(1234)
    viz = Visualize()

    # -----------------------------
    # agg = 0 # Always calm
    # agg = 1 # Always aggressive
    agg = None  # Mix of calm periods and aggressive periods

    # Ks = [3, 6, 8, 10] # Clusters
    # Ds = [5, 10, 15, 30, 60, 90, 120] # Duration window
    Ks = [3]  # Clusters
    Ds = [5]
    # Ds = [.5, 1, 5, 15, 60] # Duration window
    Ps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
          1.]  # Patterns similarity (difficulty)
    Ns = [3.]  # Noise level
    MODE_NAMES = ["b", "g", "r"]
Exemple #15
0
	features_combinations = range(2, len(DATA[0]))
	
	combos=[]; qualitiesFSP=[]; qualitiesSSP=[]
	for id_combin, n_features in enumerate( features_combinations ):
		clust = Clustering(DATA, scale=True, features=None).gmm(k=gb.K) # kmeans, dpgmm
		# clust = Clustering(DATA, scale=True, features=n_features).gmm(k=gb.K)
		
		app.init_clust_tracker(clust, AXES_INFO)
		
		PLOT_PATH = gb.PLOT_PATH + str(id_combin) + '/'
		
		if not os.path.exists(PLOT_PATH): os.makedirs(PLOT_PATH)
		path = PLOT_PATH+str(id_combin)+'_'
		app.logInformations( id_combin=id_combin, clust=clust, path=path )
		
		qualityFSP, qualitySSP = app.tracking(path=path)
		
		combos.append( id_combin )
		qualitiesFSP.append(qualityFSP)
		qualitiesSSP.append(qualitySSP)
		break
		
	print("qualitiesFSP/qualitiesSSP", list(zip(qualitiesFSP, qualitiesSSP)))
	Visualize().plot( [combos, qualitiesFSP], axs_labels=['Combination (over features)', 'Quality'], marker="-", label="id_combin="+str(id_combin), fig="plots/quality-combos-"+str(time.time())+".png" )
	Visualize().plot( [combos, qualitiesSSP], axs_labels=['Combination (over features)', 'qualitySS'], marker="-", label="id_combin="+str(id_combin), fig="plots/qualitySS-combos-"+str(time.time())+".png" )
	
	# -----------------------------
	map(lambda sr: sr.closeDB(), sigReaders)
	print("FINISH.")
	input()
	
Exemple #16
0
	def plot_graph(self, data = None, iter = None, directory = "graph_plots\\"): # TODO: this should be generalized and added to Vizualize.py
		viz = Visualize()
		
		if data is not None:	
			viz.do_plot( zip( *data[:iter] )[:3], color = 'y', marker = '.')
			# viz.do_plot( zip( *data[:iter] )[:3], color = self.data.Y[:iter], marker = '.')
		
		viz.do_plot( zip( *self.get_nodes_positions() ), color = 'r', marker = 'o')
			
		dis_avg = np.mean([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.gng.graph.edges ])
		dis_std = np.std([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.gng.graph.edges ])

		for e in self.gng.graph.edges:
			
			
			
			if distance.euclidean(e.head.data.pos, e.tail.data.pos) - dis_avg > 1. * dis_std:
				
				viz.do_plot( zip(* [e.head.data.pos, e.tail.data.pos] ) , color = 'y', marker='-')
				
			else:
				viz.do_plot( zip(* [e.head.data.pos, e.tail.data.pos] ) , color = 'r', marker='-')

		
		
		if not os.path.exists(directory): os.makedirs(directory)
		
		filename = str(time.time()) + '.png'
		
		
		if iter is None: viz.end_plot(fig = directory+'_'+filename)
		else: viz.end_plot(fig = directory+filename)
def main():
    stop_words = get_stop_words(STOP_WORDS_PATH)
    data = Initialize_Data()
    visualizer = Visualize()

    data.initialize_twitter_posts(TWITTER_POSTS_CSV, TWITTER_DATA_DIR)
    data.initialize_facebook_posts(FACEBOOK_POSTS_CSV, FACEBOOK_DATA_DIR)

    # Cleanup posts
    text_Cleanuper = Posts_Cleansing(data)
    text_Cleanuper.cleanup(Text_Cleanuper())

    # Divide data into test and train set

    X_train, X_test, Y_train, Y_test = train_test_split(data.posts,
                                                        data.labels,
                                                        test_size=0.2,
                                                        random_state=40)

    # Bag of Words model vectorization
    bag_of_words_model = Bag_Of_Words(X_train)
    bag_of_words_model.build_vectorizer(stop_words)

    X_train_counts = bag_of_words_model.data_counts
    X_test_counts = bag_of_words_model.vectorizer.transform(X_test)

    forest = RandomForestClassifier(n_estimators=100)
    forest = forest.fit(X_train_counts, Y_train)

    y_predicted_counts_train = forest.predict(X_train_counts)

    accuracy, precision, recall, f1 = get_metrics(Y_train,
                                                  y_predicted_counts_train)
    print("Train accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" %
          (accuracy, precision, recall, f1))

    y_predicted_counts = forest.predict(X_test_counts)

    accuracy, precision, recall, f1 = get_metrics(Y_test, y_predicted_counts)
    print("Test accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" %
          (accuracy, precision, recall, f1))

    # Find best hyperparams

    # Number of trees in random forest
    n_estimators = [int(x) for x in np.linspace(start=200, stop=2000, num=10)]
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt']
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(10, 110, num=11)]
    max_depth.append(None)
    # Minimum number of samples required to split a node
    min_samples_split = [2, 5, 10]
    # Minimum number of samples required at each leaf node
    min_samples_leaf = [1, 2, 4]
    # Method of selecting samples for training each tree
    bootstrap = [True, False]
    # Create the random grid
    random_grid = {
        'n_estimators': n_estimators,
        'max_features': max_features,
        'max_depth': max_depth,
        'min_samples_split': min_samples_split,
        'min_samples_leaf': min_samples_leaf,
        'bootstrap': bootstrap
    }

    # First create the model to tune
    rf = RandomForestClassifier()

    rf_random = RandomizedSearchCV(estimator=rf,
                                   param_distributions=random_grid,
                                   n_iter=100,
                                   cv=3,
                                   verbose=2,
                                   random_state=42,
                                   n_jobs=-1)
    # Fit the random search model
    rf_random.fit(X_train_counts, Y_train)
    print('Get Best Params')
    print(rf_random.best_params_)

    print('Saving model')
    save_model(rf_random, RANDOM_FOREST_MODEL_PATH)

    print('Load model')
    trained_model = load_model(RANDOM_FOREST_MODEL_PATH)
    y_predicted_counts_train = trained_model.predict(X_train_counts)

    accuracy, precision, recall, f1 = get_metrics(Y_train,
                                                  y_predicted_counts_train)
    print(
        "Train accuracy = %.3f, precisionս = %.3f, recall = %.3f, f1 = %.3f" %
        (accuracy, precision, recall, f1))

    y_predicted_counts = trained_model.predict(X_test_counts)

    accuracy, precision, recall, f1 = get_metrics(Y_test, y_predicted_counts)
    print("Test accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" %
          (accuracy, precision, recall, f1))
Exemple #18
0
	def plot_graph(self, data = None, iter = None, dir = "graph_plots\\"):
		if len(self.graph.edges) < 1: return
		if not os.path.exists(dir): os.makedirs(dir)
		viz = Visualize(); viz2 = Visualize()
		self.separate()
		
		H, C = self.get_templates()
		hs = [ self.edgeHist(e) for e in self.graph.edges ]
		COLOR = self.group_by_ref(hs, H, C)
		
		carac_edges = [ self.edge_features(e) for i, e in enumerate(self.graph.edges) ]
		# carac_edges = [ self.edge_features(e) + [1. if COLOR[i]=='r' else 0.] for i, e in enumerate(self.graph.edges) ]
		
		# viz2.MDS_Plot( zip(*hs), dim=2, fig=dir+str(time.time())+'--.png', marker='o', color=COLOR )
		# viz2.MDS_Plot( zip(*carac_edges), dim=2, fig=dir+str(time.time())+'__.png', marker='o', color=COLOR )
		# viz2.plot( zip(*carac_edges), fig=dir+str(time.time())+'__.png', marker='o', color=COLOR )
		
		'''
		for ih, h in enumerate(H): plt.bar(range(len(h)), h, color=C[ih]); plt.savefig(dir+str(time.time())+'.png'); plt.close()
		for ie, e in enumerate(self.graph.edges):
			c = COLOR[ie]
			plt.title('-'.join(str(e) for e in e.head.data.pos)); plt.bar(range(len(hs[ie])), hs[ie], color=c); plt.savefig(dir+str(time.time())+'.png'); plt.close()
		'''
		
		# '''
		if data is not None:	
			viz.do_plot( zip( *data[:iter] ), color = 'y', marker = '.')
		
		matures = [node.data.pos for node in self.graph.nodes if node.data.age > self.mature_age ]
		if len(matures) > 0: viz.do_plot( zip( *matures ), color = 'r', marker = 'o')
		embryon = [node.data.pos for node in self.graph.nodes if node.data.age <= self.mature_age ]
		if len(embryon) > 0: viz.do_plot( zip( *embryon ), color = 'g', marker = 'o')
		low_dens = [ node.data.pos for node in self.graph.nodes if not self.isDenseNode(node) ]
		if len(low_dens) > 0: viz.do_plot( zip( *low_dens ), color = 'b', marker = 'o')
		
		for ie, e in enumerate(self.graph.edges):
			# if e.head.data.age > self.mature_age and e.tail.data.age > self.mature_age:
			# if self.isDenseNode(e.head) and self.isDenseNode(e.tail):
				
				c = COLOR[ie]
				# c = 'r' if self.isDenseEdgeMean(e) else 'y'
				# c = 'r' if self.isDenseEdgeMean(e) else COLOR[ie]
				# c = 'r' if self.isDenseEdgeMean(e) or e.head.data.label == e.tail.data.label else COLOR[ie]
				viz.do_plot( zip(* [e.head.data.pos, e.tail.data.pos] ) , color = c, marker='-', lw = 1)
				
		filename = str(time.time()) + '.png'
		if iter is None: viz.end_plot(fig = dir+'_'+filename)
		else: viz.end_plot(fig = dir+filename)
Exemple #19
0
from Maze import Maze
from Visualize import Visualize
from MazeSolverUCS import MazeSolverUCS
from MazeSolverAStar import MazeSolverAStar
from MazeSolverIDDFS import MazeSolverIDDFS

SIZE = 100
maze = Maze(SIZE)
maze.createMaze()
pathma, op = MazeSolverAStar(maze, 0).aStar()  #Manhattan
pathea, _ = MazeSolverAStar(maze, 1).aStar()  #Euclidian
pathucs = MazeSolverUCS(maze).UCS()  #Uniform cost search
pathi = MazeSolverIDDFS(maze).IDDFS()  #Iterative deepening search
Visualize(maze, None, "Empty Maze").visualizeMaze()  #Empty maze
Visualize(maze, op, "Optimal Path Maze").visualizeMazeAStar()  #Optimum path
Visualize(maze, pathma, "AStar with Manhattan Heuristic").visualizeMazeAStar(
)  #the maze with a path which is used by a*(manhattan)
Visualize(maze, pathea, "AStar with Euclidian Heuristic").visualizeMazeAStar(
)  #the maze with a path which is used by a*(euclidian)
Visualize(maze, pathucs, "UCS").visualizeMaze(
)  #the maze with a path which is used by uniform cost search
Visualize(maze, pathi, "IDDFS").visualizeIDDFS(
)  #the maze with a path which is used by iterative deeping search
from ArtificialData import ArtificialData
import itertools
import os
import math
import random
import datetime
import time
import warnings
import numpy as np
import matplotlib.pyplot as plt2

# =================================================================
if __name__ == '__main__':
    warnings.simplefilter(action = "ignore", category = FutureWarning)
    random.seed(1234)
    viz = Visualize()

    # -----------------------------
    # agg = 0 # Always calm
    # agg = 1 # Always aggressive
    agg = None # Mix of calm periods and aggressive periods

    # Ks = [3, 6, 8, 10] # Clusters
    # Ds = [5, 10, 15, 30, 60, 90, 120] # Duration window
    Ks = [3] # Clusters
    Ds = [5]
    # Ds = [.5, 1, 5, 15, 60] # Duration window
    Ps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.] # Patterns similarity (difficulty)
    Ns = [3.] # Noise level
    MODE_NAMES = ["b","g", "r"]
Exemple #21
0
	def plot_graph(self, data = None, iter = None, directory = "graph_plots\\"): # TODO: this should be generalized and added to Vizualize.py
		if len(self.graph.edges) < 1: return
		viz = Visualize()
		
		if data is not None:
			viz.do_plot( zip( *data[:iter] ), color = 'y', marker = '.')
			viz.do_plot( zip( *data[max(0,iter-400):iter] ), color = 'y', marker = '.')
		
		matures = [node.data.pos for node in self.graph.nodes if node.data.age > self.mature_age ]
		embryon = [node.data.pos for node in self.graph.nodes if node.data.age <= self.mature_age ]
		if len(matures) > 0: viz.do_plot( zip( *matures ), color = 'r', marker = 'o')
		if len(embryon) > 0: viz.do_plot( zip( *embryon ), color = 'g', marker = 'o')
		
		
		# dis_avg = np.mean([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.graph.edges ])
		# dis_std = np.std([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.graph.edges ])
		
		for e in self.graph.edges:
			# if e.head.data.age > self.mature_age and e.tail.data.age > self.mature_age:
				viz.do_plot( zip(* [e.head.data.pos, e.tail.data.pos] ) , color = 'r', marker='-')
		
		
		if not os.path.exists(directory): os.makedirs(directory)
		filename = str(time.time()) + '.png'
		
		if iter is None: viz.end_plot(fig = directory+'_'+filename)
		else: viz.end_plot(fig = directory+filename)
import warnings
import random
from Mode import Mode
import matplotlib.pyplot as plt
#import matplotlib.pylab as plt
import statistics as st
import numpy as np
import SignalReaderArtificial
import app
import Clustering
import globals as gb

if __name__ == '__main__':
    warnings.simplefilter(action="ignore", category=FutureWarning)
    random.seed(1234)
    viz = Visualize()

    modesNum = [2]  # Clusters
    duration = [60]  # Duration window
    patternOverlap = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9,
                      1.]  # Patterns similarity (difficulty)
    noise = [3.]  # Noise level

    countryside_lowerLimit = 40  #50
    countryside_higherLimit = 40  #75
    countryside_waveLength = 100
    countryside_noise = 2

    highway_lowerLimit = 70
    highway_higherLimit = 70
    highway_waveLength = 100
Exemple #23
0
	def plot_graph(self, data = None, iter = None, directory = "graph_plots\\"): # TODO: this should be generalized and added to Vizualize.py
		viz = Visualize()
		colors = ['r', 'b', 'k', 'g', 'm', 'c']*1000 # FIXME
		
		if data is not None:
			viz.do_plot( zip( *data[:iter] ), color = 'y', marker = '.')
			# viz.do_plot( zip( *data[:iter] ), color = self.data.Y[:iter], marker = '.')
		
		
		labels=set([n.data.label for n in self.graph.nodes])
		d = {l: [n for n in self.graph.nodes if n.data.label == l] for l in labels}
		for ico,label in enumerate(d):
			viz.do_plot( zip( *[n.data.pos for n in d[label]] ), color = colors[ico], marker = 'o')
			for node in d[label]:
				node_links = [[node.data.pos, n.data.pos] for n in node.neighbors()]
				for nl in node_links: viz.do_plot( zip( *nl ), color = colors[ico], marker = '-')
		
		
		
		dis_avg = np.mean([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.graph.edges ])
		dis_std = np.std([ distance.euclidean(edg.head.data.pos, edg.tail.data.pos) for edg in self.graph.edges ])
		
		for e in self.graph.edges:
			if e.head.data.label != e.tail.data.label:
				if distance.euclidean(e.head.data.pos, e.tail.data.pos) - dis_avg > 1. * dis_std:
					viz.do_plot( zip(* [e.head.data.pos, e.tail.data.pos] ) , color = 'w', marker='-')
					
		
		
		if not os.path.exists(directory): os.makedirs(directory)
		filename = str(time.time()) + '.png'
		
		if iter is None: viz.end_plot(fig = directory+'_'+filename)
		else: viz.end_plot(fig = directory+filename)
def main():
    stop_words = get_stop_words(STOP_WORDS_PATH)
    data = Initialize_Data()
    visualizer = Visualize()

    data.initialize_twitter_posts(TWITTER_POSTS_CSV, TWITTER_DATA_DIR)
    data.initialize_facebook_posts(FACEBOOK_POSTS_CSV, FACEBOOK_DATA_DIR)

    # Cleanup posts
    text_Cleanuper = Posts_Cleansing(data)
    text_Cleanuper.cleanup(Text_Cleanuper())

    tokenidez_list = get_labeled_list(data.posts, data.labels, stop_words)

    # Divide data into test and train set

    X_train, X_test, Y_train, Y_test = train_test_split(data.posts,
                                                        data.labels,
                                                        test_size=0.2,
                                                        random_state=40)

    # Bag of Words model vectorization
    bag_of_words_model = Bag_Of_Words(X_train)
    bag_of_words_model.build_vectorizer(stop_words)

    X_train_counts = bag_of_words_model.data_counts
    X_test_counts = bag_of_words_model.vectorizer.transform(X_test)

    # Visualize vectorized data
    visualizer.plot_vectorized_data(X_train_counts,
                                    np.array(Y_train) == 'positive')

    # Logistic Regression model
    clf = LogisticRegression(C=1.0,
                             class_weight='balanced',
                             solver='liblinear',
                             multi_class='ovr',
                             n_jobs=-1,
                             random_state=40)
    clf.fit(X_train_counts, Y_train)

    print('Saving model')
    save_model(clf, LOG_REG_MODEL_PATH)

    print('Load model')
    trained_model = load_model(LOG_REG_MODEL_PATH)

    # Predict on text labels
    y_predicted_counts = trained_model.predict(X_test_counts)

    # Get model scorce

    accuracy, precision, recall, f1 = get_metrics(Y_test, y_predicted_counts)
    print("accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" %
          (accuracy, precision, recall, f1))

    # Print words importance matrix
    importance = get_most_important_features(
        bag_of_words_model.vectorizer.vocabulary_.items(), trained_model, 10)

    # Visualize important features
    top_scores = [a[0] for a in importance[0]['tops']]
    top_words = [a[1] for a in importance[0]['tops']]
    bottom_scores = [a[0] for a in importance[0]['bottom']]
    bottom_words = [a[1] for a in importance[0]['bottom']]

    visualizer.plot_important_words(top_scores, top_words, bottom_scores,
                                    bottom_words,
                                    "Most important words for relevance")

    # Word2Vec vectorization
    word2vecVectorization = Word_2_Vec(WORD_VEC_FILE)
    word2vecVectorization.build_vectorizer(tokenidez_list)

    X_train_word2vec, X_test_word2vec, Y_train_word2vec, Y_test_word2vec = train_test_split(
        word2vecVectorization.embeddings,
        data.labels,
        test_size=0.2,
        random_state=40)

    # Visualize data
    visualizer.plot_vectorized_data(word2vecVectorization.embeddings,
                                    np.array(data.labels) == 'positive')

    clf_w2v = LogisticRegression(C=30.0,
                                 class_weight='balanced',
                                 solver='newton-cg',
                                 multi_class='multinomial',
                                 random_state=40)
    clf_w2v.fit(X_train_word2vec, Y_train_word2vec)

    print('Saving model')
    save_model(clf_w2v, LOG_REG_MODEL_PATH)

    print('Load model')
    clf_w2v = load_model(LOG_REG_MODEL_PATH)

    Y_predicted_word2vec = clf_w2v.predict(X_test_word2vec)

    accuracy_word2vec, precision_word2vec, recall_word2vec, f1_word2vec = get_metrics(
        Y_test_word2vec, Y_predicted_word2vec)

    print(
        "accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" %
        (accuracy_word2vec, precision_word2vec, recall_word2vec, f1_word2vec))

    testLabel = "Խնդիրը վատ է գրված"

    word2vecVectorization.build_vectorizer([{
        "tokens":
        get_tokens_without_stop_words(testLabel.split(" "), stop_words)
    }])
    Y_predicted_word2vec = clf_w2v.predict(word2vecVectorization.embeddings)
    print(Y_predicted_word2vec)
	def train(self, mtd = "margin", backupfile = "backupfile.txt"):
		for i, x in enumerate(self.Ux):
			y1 = self.clf.predict_label(x)
			
			if mtd == "supervised": informativeness = sys.float_info.max
			if mtd == "margin": informativeness = self.clf.uncertainty_margin(x)
			
			# ===============================
			id_th = self.mab.choose()
			self.th = self.mab.algos[ id_th ]
			print "Choosen =", self.th, "nb_choices =", self.mab.nb_choices, ("avg rwd=", [ np.mean(L) for L in self.mab.rewards ] if self.mab.rewards[0]!=[] else " "), "expected=", sum([ a*l for a,l in zip(self.mab.algos,self.mab.nb_choices) ]) / sum(self.mab.nb_choices)
			prev_clf = Classification(self.Lx, self.Ly, method = self.clf.method)
			prev_clf.GAMMA, prev_clf.C = self.clf.GAMMA, self.clf.C; prev_clf.train()
			# ===============================
			# avg_rewards = [ np.mean(L[:-20]) if len(L)>0 else 1. for L in self.mab.rewards ]
			# self.th = sum([ a*l for a,l in zip(self.mab.algos,avg_rewards) ]) / sum(avg_rewards)
			# print "Choosen =", self.th, "avg rwd=", avg_rewards
			# ===============================
			
			if informativeness > self.th:
				qx = x
				qy = self.Uy[i]
				
				self.Lx.append(qx)
				self.Ly.append(qy)
				self.queried += 1
			
				self.clf.X = self.Lx; self.clf.Y = self.Ly; self.clf.train()
			
			# ===============================
			reward = 1. - abs( 0.1 - self.queried / (i+1.) )
			self.mab.update(id_th, reward)
			# ===============================
			# for idt in range(len(self.mab.algos)):
				# reward = 1. - abs( 0.3 - (self.queried-1+1) / (i+1.) ) if informativeness > self.mab.algos[idt] else 1. - abs( 0.4 - (self.queried-1) / (i+1.) )
				# self.mab.update(idt, reward)
			# ===============================
			
			
			self.ths.append( self.th )
			self.infos.append( informativeness )
			self.accuracys.append( self.clf.getTestAccuracy( self.Tx, self.Ty ) )
			self.queries.append( self.queried )
			
			self.sup_infos.append( self.sup_clf.uncertainty_margin(x) ) # TODO should not be here
			self.sup_clf.X = self.Lx0+self.Ux[:i+1]; self.sup_clf.Y = self.Ly0+self.Uy[:i+1]; self.sup_clf.train() # TODO should not be here
			self.sup_accuracys.append( self.sup_clf.getTestAccuracy( self.Tx, self.Ty ) ) # TODO should not be here
			
			
			'''
			if i>10:
				# last_infos = self.infos[-100:] if len(self.infos) > 100 else self.infos[:]
				# self.th = np.mean( last_infos )
				
				if informativeness > self.th: # queried
					if y1 == qy: # but was correctly predicted
						self.th = self.th + 0.1 * (informativeness - self.th)
				else:
					if y1 != qy:
						self.th = self.th - 0.1 * (self.th - informativeness )
			'''
			
			
			print "i=", i+1, self.queried, self.queried / (i+1.), "-- acc=%.4f"%(self.accuracys[-1]*100), "%.4f"%(self.sup_accuracys[-1]*100), "-- %.4f"%(np.mean(self.accuracys)*100), "%.4f"%(np.average(self.accuracys, weights = range(1,1+len(self.accuracys)))*100), "--", informativeness
			
			if (i+1)%10 == 0:
				Util.pickleSave(backupfile, self); viz = Visualize()
				
				viz.do_plot( [range(len(self.infos)), self.ths], color = 'b', marker = '-' )
				viz.do_plot( [range(len(self.infos)), self.infos], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_infos)), self.sup_infos], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_inf.png" )
				
				viz.do_plot( [range(len(self.accuracys)), self.accuracys], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.sup_accuracys)), self.sup_accuracys], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_acc.png" )
				
				viz.do_plot( [range(len(self.queries)), self.queries], color = 'r', marker = '-' )
				viz.do_plot( [range(len(self.queries)), range(len(self.queries))], color = 'y', marker = '-' )
				viz.end_plot( fig = backupfile+"_stream_lab.png" )
				'''
Exemple #26
0
from part2 import part2
from part3 import part3
from part4 import part4
from part5 import part5

# -----------------------------------------------------------------------------
# Load datasets from ass 1 (unscaled)
# -----------------------------------------------------------------------------
datasets = LoadPreprocessDataset()

# -----------------------------------------------------------------------------
# Visualize DataSet - Scatter Matrix
# -----------------------------------------------------------------------------
if 1:  # set to 1 to enable
    print(82 * '_')
    Visualize(datasets['wifi'])
    print(82 * '_')
    Visualize(datasets['letter'])

# -----------------------------------------------------------------------------
# PART 1 - Run EM and K-means on two datasets
# -----------------------------------------------------------------------------
if 1:  # set to 1 to enable
    print(82 * '_')
    print("PART 1a - Run K-means on two datasets")
    print(82 * '_')
    print()
    part1_km(datasets['wifi'])
    print(82 * '_')
    print()
    part1_km(datasets['letter'])
import random
import numpy as np
from Data import Data
from ActiveLearning import ActiveLearning
import Util
from Visualize import Visualize

#-----------------------------------
if __name__ == "__main__":
	random.seed( 12345 )
	
	#-----------------------------------
	viz = Visualize()
	colors = ['y','c','m','b','g','k','r']
	
	# datasetname = "optdigits"
	datasetname = "pendigits"
	# datasetname = "CNAE9"
	data = Data( source_file = datasetname )
	print "nb data points:", len(data.X), "nb features in data:", data.nb_features
	
	#-----------------------------------
	'''
	opt = {}
	opt["random"] = Util.pickleLoad('___AL Results\\optdigits\\random\\_optdigits.random.50.opt-10-margin.txt')
	# opt["entropy"] = Util.pickleLoad('___AL Results\\optdigits\\entropy\\_optdigits.entropy.50.opt-10-margin.txt')
	# opt["margin"] = Util.pickleLoad('___AL Results\\optdigits\\margin\\_optdigits.margin.50.opt-10-margin.txt')
	opt["proba"] = Util.pickleLoad('___AL Results\\optdigits\\proba\\_optdigits.proba.50.opt-10-margin.txt')
	# opt["weight"] = Util.pickleLoad('___AL Results\\optdigits\\weight\\_optdigits.weight.50.opt-20-entropy.txt')
	opt["etc_"] = Util.pickleLoad('___AL Results\\optdigits\\etc\\_optdigits.etc_.50.opt-20-margin.txt')
	opt["etc"] = Util.pickleLoad('___AL Results\\optdigits\\etc\\_optdigits.etc.50.opt-20-margin.txt')
Exemple #28
0
	def __init__(self, data):
		self.data = data
		self.viz = Visualize()
Exemple #29
0
	def sortForInformativeness(self, mtd):
		if mtd in ["etc", "etc_", "expectedErrorReduction", "weight", "optimal", "test", "intuition"] :
			ids, scores = self.sortForInformativeness(self.optimization_method)
			
		scores = []
		for ix, x in enumerate(self.Ux):
			y1, y2, p1, p2 = self.clf.getMarginInfo(x)
			
			if mtd == "intuitionM":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_margin(x)
				else:
					informativeness = 0.
			#---------------------------------------------------------
			if mtd == "margin":
				informativeness = self.clf.uncertainty_margin(x)
			
			#---------------------------------------------------------
			elif mtd == "proba":
				informativeness = self.clf.uncertainty_prediction(x)
			
			#---------------------------------------------------------
			elif mtd == "entropy":
				informativeness = self.clf.uncertainty_entropy(x)
			
			#---------------------------------------------------------
			elif mtd == "random":
				informativeness = random.uniform(0., 1.)
			
			#---------------------------------------------------------
			elif mtd == "weight":
				if ix in ids[:self.optimization_limit]:
					informativeness = self.clf.uncertainty_weight(x, self.Lx, self.Ly)
				else: informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "expectedErrorReduction":
				if ix in ids[:self.optimization_limit]:
					sums = 0.
					YP = self.clf.predict(x, all = True)
					YP.sort(key=operator.itemgetter(1), reverse=True)
					for ir, (yy, proba) in enumerate(YP):
						if ir == 5: break
						temp_clf = Classification(self.Lx + [x], self.Ly + [yy], method = self.clf.method); temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C # TODO FIXME: do it in general not specifically for svm
						temp_clf.train()
						e_h1 = sum( [ temp_clf.uncertainty_entropy(dp) for dp in self.Ux if dp != x ] )
						
						sums += (proba) * e_h1
					informativeness = 1. / sums
				else:
					informativeness = 0.
			
			#---------------------------------------------------------
			elif mtd == "etc":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "etc_":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = sum( [ 1. if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = sum( [ 1. if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Ux if dp != x ] ) / (len(self.Ux) - 1.)
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "test":
				if ix in ids[:self.optimization_limit]:
					temp_clf1 = Classification(self.Lx + [x], self.Ly + [y1], method = self.clf.method); temp_clf1.GAMMA, temp_clf1.C = self.clf.GAMMA, self.clf.C
					temp_clf1.train()
					diff1 = np.mean( [0.]+[ abs(temp_clf1.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf1.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					temp_clf2 = Classification(self.Lx + [x], self.Ly + [y2], method = self.clf.method); temp_clf2.GAMMA, temp_clf2.C = self.clf.GAMMA, self.clf.C
					temp_clf2.train()
					diff2 = np.mean( [0.]+[ abs(temp_clf2.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp)) for dp in self.Ux if temp_clf2.predict_label(dp) != self.clf.predict_label(dp) and dp != x ] )
					
					informativeness = diff1 # this one is particularly good for rejection (to be confirmed)
					informativeness = diff1 if p1/(1+diff1) >= p2/(1+diff2) else diff2
					informativeness = p1*diff1 + p2*diff2 + 1.
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					ucts = [ temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) for dp in self.Tx ]
					ids_ucts = (-np.array(ucts)).argsort()[:50]
					
					# diff = np.mean( [ 1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) else 0. for dp in self.Tx ] )
					diff = np.mean([1. if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and idp in ids_ucts else 0. for idp,dp in enumerate(self.Tx) ])
					
					informativeness = diff
				else:
					informativeness = 0.
			#---------------------------------------------------------
			elif mtd == "intuition":
				if ix in ids[:self.optimization_limit]:
					true_y = self.Uy[ self.Ux.index(x) ]
					
					temp_clf = Classification(self.Lx + [x], self.Ly + [true_y], method = self.clf.method)
					temp_clf.GAMMA, temp_clf.C = self.clf.GAMMA, self.clf.C; temp_clf.train()
					
					# ---------------------
					imp_x = [ xdp for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					imp_y_hh = [ temp_clf.predict_label(xdp) for xdp in self.Tx if temp_clf.predict_label(xdp) != self.clf.predict_label(xdp) ]
					
					if len( set(imp_y_hh) ) > 1: 
						# hh = Classification(imp_x, imp_y_hh, method = self.clf.method)
						hh = Classification(imp_x + [x], imp_y_hh + [true_y], method = self.clf.method, tuning = False)
						hh.GAMMA, hh.C = self.clf.GAMMA, self.clf.C; hh.train()
					else:
						hh = self.clf
					# ---------------------
					
					h_inconsistant_truth = 0; hh_inconsistant_truth = 0; hh_inconsistant_h = 0; h_consistency = []; hh_consistency = []
					for ilx, lx in enumerate(self.Lx):
						h_consistency.append( self.clf.getProbaOf( self.Ly[ilx], lx ) )
						# hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) )
						hh_consistency.append( hh.getProbaOf( self.Ly[ilx], lx ) if hh.predict_label(lx) == self.Ly[ilx] else 0. )
						
						if self.clf.predict_label(lx) != self.Ly[ilx]: h_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.Ly[ilx]: hh_inconsistant_truth += 1.
						if hh.predict_label(lx) != self.clf.predict_label(lx): hh_inconsistant_h += 1.
					h_consistency = np.mean(h_consistency)
					hh_consistency = np.mean(hh_consistency) if len( set(imp_y_hh) ) > 1 else 0.
					
					consistency_dif = hh_consistency - h_consistency
					
					# ---------------------
					diff = []; errors = 0.; trues = 0.; impacted = 0; impacted_probs = [];
					for idp, dp in enumerate(self.Tx):
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): ##################
							impacted += 1.
							impacted_probs.append( abs( temp_clf.getPredictProba(1,dp) - self.clf.getPredictProba(1,dp) ) )
							if self.Ty[idp]!=temp_clf.predict_label(dp): errors += 1.
							else: trues += 1.
						
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and self.Ty[idp]==temp_clf.predict_label(dp): diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp) and trues - errors > 0: diff.append( 1. )
						# if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						if temp_clf.predict_label(dp) != self.clf.predict_label(dp): diff.append( 1. )
						
						else: diff.append( 0. )
					diff = np.mean( diff )
					
					# diff = diff * np.mean(impacted_probs) # seems to be working ...
					
					# ---------------------
					# self.viz_A.append( consistency_dif )
					self.viz_A.append( hh_consistency )
					self.viz_B.append( errors )
					self.viz_C.append( trues )
					self.viz_D.append( trues - errors ); posI = [inb for inb,nbD in enumerate(self.viz_D) if nbD >= 0.]
					self.viz_E.append( impacted )
					self.viz_F.append( np.mean(impacted_probs) )
					viz = Visualize(); viz.plot( [self.viz_A, self.viz_B], fig = "test_errors.png", color = 'r', marker = 'o' )
					vizu = Visualize(); vizu.plot( [self.viz_A, self.viz_C], fig = "test_trues.png", color = 'r', marker = 'o' )
					vizuu = Visualize(); vizuu.plot( [self.viz_A, self.viz_D], fig = "test_trues_errors.png", color = 'r', marker = 'o' )
					
					vizuuu = Visualize(); vizuuu.do_plot( [self.viz_A, self.viz_E], color = 'r', marker = 'o' )
					vizuuu.do_plot( [[self.viz_A[inb] for inb in posI], [self.viz_E[inb] for inb in posI]], color = 'b', marker = 'o' )
					vizuuu.end_plot(fig = "impacted.png")
					
					print hh_consistency, hh_inconsistant_truth, "---", len(imp_x), len( set(imp_y_hh) ), "============>", impacted, trues - errors
					
					informativeness = diff
				else:
					informativeness = 0.
				
			#---------------------------------------------------------
			
			scores.append( informativeness )
		
		ids = (-np.array(scores)).argsort()
		sorted_scores = [ scores[id] for id in ids ]	
		# sorted_scores = [ 1.*scores[id] / sum(scores) for id in ids ]	
		
		return ids, sorted_scores
Exemple #30
0
	def plot_graph(self, data = None, iter = None, directory = "graph_plots\\"): # TODO: this should be generalized and added to Vizualize.py
		viz = Visualize()
		
		if data is not None:	
			viz.do_plot( zip( *data[:iter] ), color = 'y', marker = '.')
			# viz.do_plot( zip( *data[:iter] ), color = self.data.Y[:iter], marker = '.')
		
		matures = [node.data.pos for node in self.graph.nodes if node.data.age > self.mature_age ]
		embryon = [node.data.pos for node in self.graph.nodes if node.data.age <= self.mature_age ]
		if len(matures) > 0: viz.do_plot( zip( *matures ), color = 'r', marker = 'o')
		if len(embryon) > 0: viz.do_plot( zip( *embryon ), color = 'y', marker = 'o')
		
		for e in self.graph.edges:
			pos_head = e.head.data.pos
			pos_tail = e.tail.data.pos
			
			viz.do_plot( zip(* [pos_head, pos_tail] ) , color = 'r', marker='-')
		
		
		if not os.path.exists(directory): os.makedirs(directory)
		
		filename = str(time.time()) + '.png'
		
		
		if iter is None: viz.end_plot(fig = directory+'_'+filename)
		else: viz.end_plot(fig = directory+filename)
Exemple #31
0
	def vizualize_buses( all_buses, dates_all_buses, dim = 2, path = "buses_viz/", m = '-' ):
		Util.mkdir(path)
		
		# '''
		viz0 = Visualize(); viz1 = Visualize(); viz2 = Visualize(); viz3 = Visualize()
		c = Visualize.colors( len(all_buses) )
		
		D = Util.flatList(all_buses)
		viz1.PCA_Plot( zip(*D), dim = dim, fig=path+"_Buses_All.png", color='b', marker = m )
		
		X = viz1.PCA_Transform( zip(*D), dim = dim )
		all_buses_transformed = []
		for ib in range( len(all_buses) ):
			print ib+1, 
			Xb = [ x for i,x in enumerate(X) if D[i] in all_buses[ib] ]
			all_buses_transformed.append( Xb )
			viz0.do_plot( zip(*Xb), color = c[ib], marker = m )
			viz1.plot( zip(*Xb), fig=path+"Bus"+str(ib)+".png", color = c[ib], marker = m )
			
		viz0.end_plot(fig=path+"_Buses_All_c.png")
		# '''
		
		window = 30; step = 10; window_t = datetime.timedelta(days = window); step_t = datetime.timedelta(days = step)
		t = datetime.datetime(year=2011, month=6, day=1)
		while t <= datetime.datetime(year=2015, month=9, day=1):
			viz2.do_plot( [[-0.39, 0.39], [-0.39, 0.39]], color='w' )
			for ib, bus in enumerate(all_buses_transformed):
				# bus_tt = [x for ix,x in enumerate(bus) if ix < len(dates_all_buses[ib]) and dates_all_buses[ib][ix] > t and dates_all_buses[ib][ix] <= t+window_t]
				bus_tt = [x for ix,x in enumerate(bus) if ix < len(dates_all_buses[ib]) and dates_all_buses[ib][ix] <= t+window_t]
				if len( bus_tt ) > 0:
					viz2.do_plot( zip(* bus_tt ), color = c[ib], marker = m )
					viz3.do_plot( [[-0.39, 0.39], [-0.39, 0.39]], color='w' ); viz3.do_plot( zip(* bus_tt ), color = c[ib], marker = m ); viz3.end_plot(fig=path+"Bus"+str(ib)+"_"+Util.date2str(t+window_t)+".png")
			viz2.end_plot(fig=path+"_Buses_"+Util.date2str(t+window_t)+".png")
			t += step_t
		
		'''