Exemple #1
0
    def visualize(self,
                  predictions,
                  model,
                  save_dir='../../save/keras',
                  plt_name='keras'):
        # Evaluate predictions using accuracy metrics
        accuracy = accuracy_score(self.y_test, predictions)
        print('{} Classification'.format(model))
        print("Accuracy: %.2f%%" % (accuracy * 100.0))

        # Evaluate predictions using confusion metrics and plot confusion matrix
        classification_report = metrics.classification_report(
            predictions,
            self.y_test,
            target_names=['NadaSportswear', 'Sportswear'])
        print(classification_report)

        # Calculating confusion matrix
        cnf_matrix = confusion_matrix(self.y_test, predictions)
        np.set_printoptions(precision=2)

        # Plot module is used for plotting confusion matrix, classification report
        plot = Plot()
        plot.plotly(cnf_matrix, classification_report,
                    os.path.join(self.args.save_dir, embedding_type), plt_name)
Exemple #2
0
def init_data():
    X, y = import_power_plant_data()
    X, y = X.to_numpy(), y.to_numpy()
    #print(X,y)
    #exit()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,shuffle=True, random_state=1234)
    print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
    opt = SGD(lr=0.01)
    epoch = 10000
    regressor = LinearRegression(opt, epoch=epoch)
    x_plot = list(range(1,epoch+1))
    all_mse = regressor.fit(X_train, y_train)
    predicted = regressor.predict(X_test)
    #print(len(predicted))
    #exit()
    mse_value = Metrics.mse(y_test, predicted)
    #print(len(x_plot), len(all_mse))
    #print(mse_value)
    #y_pred_line = regressor.predict(X)
    #cmap = plt.get_cmap('viridis')
    #fig = plt.figure(figsize=(8,6))
    #m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10)
    #m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10)
    #plt.plot(x_plot, all_mse, color = "blue", linewidth=2)
    Plot.plot_time_series(x_plot, all_mse, "mse_plot", "number of iterations", "Mean Square Error (MSE)", "MSE vs Number of iterations")

    plt.show()
 def __init__(self, sax_engine, export = True):
     self.se_instance = sax_engine
     self.data = sax_engine.sax_data
     self.process_data = []
     self.ps = None
     self.ploter = Plot(self)
     if export:
         self.export_format()
Exemple #4
0
    def __init__(self, dataset, net, common_params, solver_params):
        '''

        :param dataset:
        :param net:
        :param common_params:
        :param solver_params:
        '''
        self.learning_rate = solver_params['learning_rate']
        self.beta1 = float(solver_params['beta1'])
        self.beta2 = float(solver_params['beta2'])
        self.batch_size = int(common_params['batch_size'])
        self.width = common_params['width']
        self.height = common_params['height']
        self.depth = common_params['depth']
        self.channel = int(common_params['channel'])
        self.testing = common_params['testing']
        if self.testing:
            self.test_batch_size = common_params['test_batch_size']
        if 'pretrain_model_path' in solver_params:
            self.pretrain_path = solver_params['pretrain_model_path']
        else:
            self.pretrain_path = 'None'
        self.model_name = solver_params['model_name']
        self.train_dir = str(solver_params['train_dir'])
        self.max_iterators = int(solver_params['max_iterators'])
        self.eval_names = solver_params['eval_names']
        if 'keep_prob' in solver_params:
            self.keep_prob = solver_params['keep_prob']
        else:
            self.keep_prob = 1.0
        if 'net_input' in solver_params:
            self.net_input = solver_params['net_input']
        else:
            self.net_input = {}

        self.dataset = dataset
        self.net = net

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True

        self.construct_graph()

        self.do_plot = solver_params['plot']
        if self.do_plot:
            self.plot = Plot(solver_params['plot_params'])
        return
Exemple #5
0
 def __init__(self, ss):
     self.ts = None
     self.ts_clust = None
     self.ts_name = None
     self.ss = ss
     self.sampler = 168  # 24/d - 168/w - 744[31](720[30]-696[29]-672[28])/m - 8760(8784)/y
     self.ploter = Plot(self)
     self.n = 5
     self.capteurs_names = []
     self.from_save = False
     self.proto = []
     self.last_readed = {}
     self.store_path = "cluster/13_06/"
     self.name_file = None
     self.clust_name = "Master"
     self.metric = ""
     self.geo = Geo(self.ss.cwd)
     self.cluster_by_name = {}
     self.cluster_by_fullname = {}
     self.size_min = 0
     self.nb_capteur = []
     self.nb_week = []
Exemple #6
0
	def __init__(self, board_size: int, black: Agent, train_configs: List[Config], eval_configs: List[Config],
	             test_configs: List[Config], human_configs: List[Config]) -> None:
		assert black.color is Color.BLACK, f'Invalid black agent: black agent\'s color is not black'

		self.board_size: int = board_size
		self.black = black
		self.train_configs: List[Config] = train_configs
		self.eval_configs: List[Config] = eval_configs
		self.test_configs: List[Config] = test_configs
		self.human_configs: List[Config] = human_configs

		self.total_episodes: int = 0

		# initialize plot
		if isinstance(self.black, TrainableAgent):
			self.plot: Plot = Plot()
			self.scores: defaultdict = defaultdict(list)

		# initialize colors
		init()
Exemple #7
0
                                       running_batch_elapsed_time) / 60.0

        print(
            "===== TRAINING STEP {} | ~{:.0f} MINUTES REMAINING =====".format(
                training_step, estimated_minutes_remaining))
        print("CRITIC LOSS:     {0}".format(running_critic_loss))
        print("GENERATOR LOSS:  {0}\n".format(running_generator_loss))

        # Loss histories
        critic_losses_per_vis_interval.append(running_critic_loss)
        generator_losses_per_vis_interval.append(running_generator_loss)
        running_critic_loss = 0.0
        running_generator_loss = 0.0

        Plot.plot_histories(
            [critic_losses_per_vis_interval], ["Critic"],
            "{0}critic_loss_history.png".format(MODEL_OUTPUT_DIR))
        Plot.plot_histories(
            [generator_losses_per_vis_interval], ["Generator"],
            "{0}generator_loss_history.png".format(MODEL_OUTPUT_DIR))

        # Save model at checkpoint
        torch.save(generator.state_dict(),
                   "{0}generator".format(MODEL_OUTPUT_DIR))
        torch.save(critic.state_dict(), "{0}critic".format(MODEL_OUTPUT_DIR))

        # Upsample and save samples
        sample_tags = brainpedia.preprocessor.decode_label(
            labels_batch.data[0])
        real_sample_data = real_brain_img_data_batch[0].cpu().data.numpy(
        ).squeeze()
class PrefixSpanManager:
    """
    Classe d'outil a l'utilisation de prefixspan

    Parameters:
        * sax_engine: SaxEngine
            Instance de preprocessing SAX
        * export: Boolean
            Si oui ou non les donnees sont deja exportees au bon format

    Variables:
        * se_instance: SaxEngine
            L'instance de class SAX
        * data: Array[]
            Les donnees au format SAX
    """
    def __init__(self, sax_engine, export = True):
        self.se_instance = sax_engine
        self.data = sax_engine.sax_data
        self.process_data = []
        self.ps = None
        self.ploter = Plot(self)
        if export:
            self.export_format()

    def run(self):
        """
        Creer l'instance PrefixSpan avec les donnees pretraites
        """
        self.ps = PrefixSpan(self.process_data)

    def export_format(self):
        """
        Modifie le format pour correspondre au besoin de l'instance de PrefixSpan
        """
        tmp = []
        for elmt in self.data:
            tmp.append(elmt.ravel())
        self.process_data = tmp

    def topk(self, n, c = True):
        """
        Affiche les motifs les plus frequents(plus grand support) et par defaut les fermes

        Parameters:
            * n: int
                Nombre de motifs a afficher
        Returns:
            Liste de motifs frequent
        """
        return self.ps.topk(n, closed = c)

    def frequent(self, n):
        """
        Retourne les frequent de support n

        Parameters:
            * n: int
                Support minimal
        Returns:
            Liste des motifs de support minimal n
        """
        return self.ps.frequent(n)

    def plot(self, l):
        self.ploter.plot_prefixspan(l)
Exemple #9
0
    def __init__(self, dataset, net, common_params, solver_params):
        '''

        :param dataset:
        :param net:
        :param common_params:
        :param solver_params:
        '''
        self.learning_rate = solver_params['learning_rate']
        self.beta1 = float(solver_params['beta1'])
        self.beta2 = float(solver_params['beta2'])
        self.batch_size = int(common_params['batch_size'])
        self.width = common_params['width']
        self.height = common_params['height']
        self.channel = int(common_params['channel'])
        self.testing = common_params['testing']
        if self.testing:
            self.test_batch_size = common_params['test_batch_size']
        if 'pretrain_model_path' in solver_params:
            self.pretrain_path = solver_params['pretrain_model_path']
        else:
            self.pretrain_path = 'None'
        self.model_name = solver_params['model_name']
        self.train_dir = str(solver_params['train_dir'])
        self.max_iterators = int(solver_params['max_iterators'])
        self.eval_names = solver_params['eval_names']
        if 'keep_prob' in solver_params:
            self.keep_prob = solver_params['keep_prob']
        else:
            self.keep_prob = 1.0
        if 'net_input' in solver_params:
            self.net_input = solver_params['net_input']
        else:
            self.net_input = {}
        if 'aug' in solver_params:
            self.aug = solver_params['aug']
        else:
            self.aug = None
        if 'label_type' in common_params:
            self.label_type = common_params['label_type']
        else:
            self.label_type = 'matrix'
        if self.label_type == 'array':
            if 'label_len' in common_params:
                self.label_len = common_params['label_len']
            else:
                raise Exception(
                    'Label type is array while not given label length!')

        self.dataset = dataset
        self.net = net

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True

        self.construct_graph()

        self.do_plot = solver_params['plot']
        if self.do_plot:
            self.plot = Plot(solver_params['plot_params'])
        return
Exemple #10
0
class Solver2D(Solver):
    '''2-D model solver
    '''
    def __init__(self, dataset, net, common_params, solver_params):
        '''

        :param dataset:
        :param net:
        :param common_params:
        :param solver_params:
        '''
        self.learning_rate = solver_params['learning_rate']
        self.beta1 = float(solver_params['beta1'])
        self.beta2 = float(solver_params['beta2'])
        self.batch_size = int(common_params['batch_size'])
        self.width = common_params['width']
        self.height = common_params['height']
        self.channel = int(common_params['channel'])
        self.testing = common_params['testing']
        if self.testing:
            self.test_batch_size = common_params['test_batch_size']
        if 'pretrain_model_path' in solver_params:
            self.pretrain_path = solver_params['pretrain_model_path']
        else:
            self.pretrain_path = 'None'
        self.model_name = solver_params['model_name']
        self.train_dir = str(solver_params['train_dir'])
        self.max_iterators = int(solver_params['max_iterators'])
        self.eval_names = solver_params['eval_names']
        if 'keep_prob' in solver_params:
            self.keep_prob = solver_params['keep_prob']
        else:
            self.keep_prob = 1.0
        if 'net_input' in solver_params:
            self.net_input = solver_params['net_input']
        else:
            self.net_input = {}
        if 'aug' in solver_params:
            self.aug = solver_params['aug']
        else:
            self.aug = None
        if 'label_type' in common_params:
            self.label_type = common_params['label_type']
        else:
            self.label_type = 'matrix'
        if self.label_type == 'array':
            if 'label_len' in common_params:
                self.label_len = common_params['label_len']
            else:
                raise Exception(
                    'Label type is array while not given label length!')

        self.dataset = dataset
        self.net = net

        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True

        self.construct_graph()

        self.do_plot = solver_params['plot']
        if self.do_plot:
            self.plot = Plot(solver_params['plot_params'])
        return

    def _train(self, lr):
        '''Train model using ADAM optimizer
        '''
        train = tf.train.AdamOptimizer(lr, self.beta1, self.beta2).minimize(
            self.loss,
            global_step=self.global_step,
            var_list=self.net.trainable_collection)
        #grads = opt.compute_gradients(self.loss)
        #train = opt.apply_gradients(grads, global_step=self.global_step)
        return train

    def construct_graph(self):
        self.global_step = tf.Variable(0, trainable=False)
        self.images = tf.placeholder(
            tf.float32, [None, self.height, self.width, self.channel])
        if self.label_type == 'binary':
            self.labels = tf.placeholder(tf.float32, [None, 1, 1, 1])
        elif self.label_type == 'array':
            self.labels = tf.placeholder(tf.float32,
                                         [None, 1, 1, self.label_len])
        else:
            self.labels = tf.placeholder(
                tf.float32, [None, self.height, self.width, self.channel])
        self.lr = tf.placeholder(tf.float32)
        self.keep_prob_holder = tf.placeholder(tf.float32)
        self.net_input['keep_prob'] = self.keep_prob_holder

        self.predicts = self.net.inference(self.images, **self.net_input)
        self.loss, self.evals = self.net.loss(self.predicts['out'],
                                              self.labels, self.eval_names)
        loss_summaries(self.loss)

        tf.summary.scalar('loss', self.loss)
        for key, value in self.evals.items():
            tf.summary.scalar(key, value)
        self.train_op = self._train(self.lr)

    def initialize(self):
        #saver = tf.train.Saver()

        try:
            init = tf.global_variables_initializer()
        except:
            init = tf.initialize_all_variables()

        self.sess = tf.Session(config=self.config)

        self.sess.run(init)
        if self.pretrain_path != 'None':
            saver = tf.train.Saver(self.net.pretrained_collection,
                                   write_version=1)
            saver.restore(self.sess, self.pretrain_path)

    def solve(self):
        saver = tf.train.Saver(self.net.all_collection, write_version=1)
        #saver = tf.train.Saver()

        summary_op = tf.summary.merge_all()
        write_dir = self.train_dir + '/' + self.model_name + '/' + str(
            datetime.now()) + '/'
        train_writer = tf.summary.FileWriter(write_dir + 'train',
                                             self.sess.graph)
        test_writer = tf.summary.FileWriter(write_dir + 'test',
                                            self.sess.graph)
        if self.testing:
            n_batch = self.dataset.get_n_test_batch()
        for step in xrange(self.max_iterators):
            start_time = time.time()
            np_images, np_labels = self.dataset.batch()
            if self.aug is not None:
                np_images = self.aug.process(np_images)
            _, loss, evals = self.sess.run(
                [self.train_op, self.loss, self.evals],
                feed_dict={
                    self.images: np_images,
                    self.labels: np_labels,
                    self.lr: self.learning_rate[step],
                    self.keep_prob_holder: self.keep_prob
                })
            duration = time.time() - start_time
            assert not np.isnan(loss), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                examples_per_sec = self.dataset.batch_size / duration
                sec_per_batch = float(duration)
                print(
                    '%s: step %d, loss = %f (%.2f examples/sec; %.3f sec/batch)'
                    % (datetime.now(), step, loss, examples_per_sec,
                       sec_per_batch))
                print(evals)
                sys.stdout.flush()
                summary_str = self.sess.run(summary_op,
                                            feed_dict={
                                                self.images:
                                                np_images,
                                                self.labels:
                                                np_labels,
                                                self.keep_prob_holder:
                                                self.keep_prob
                                            })
                train_writer.add_summary(summary_str, step)
                t_images, t_labels = self.dataset.test_random_batch()
                test_summary = self.sess.run(summary_op,
                                             feed_dict={
                                                 self.images: t_images,
                                                 self.labels: t_labels,
                                                 self.keep_prob_holder: 1.0
                                             })
                test_writer.add_summary(test_summary, step)
                if self.do_plot:
                    self.plot.plot_train(step, loss, 0)
                    if 'precision' in self.eval_names:
                        self.plot.plot_train(step, evals['precision'], 1)
                    if 'recall' in self.eval_names:
                        self.plot.plot_train(step, evals['recall'], 2)
                    if 'dice' in self.eval_names:
                        self.plot.plot_train(step, evals['dice'], 3)
                    elif 'f1' in self.eval_names:
                        self.plot.plot_train(step, evals['f1'], 3)
            if step % 1000 == 999:
                saver.save(self.sess,
                           self.train_dir + '/' + self.model_name + '.cpkt',
                           global_step=self.global_step)
                if self.do_plot:
                    self.plot.save_fig()
            if self.testing:
                if (step % 500 == 0) & (step != 0):
                    temp_eval = {}
                    for name in self.eval_names:
                        temp_eval[name] = 0.0
                    temp_eval['loss'] = 0.0
                    for i in xrange(n_batch):
                        t_start_time = time.time()
                        t_images, t_labels = self.dataset.test_batch()
                        if self.aug is not None:
                            t_images = self.aug.process(t_images)
                        t_loss, t_evals, t_summary = self.sess.run(
                            [self.loss, self.evals, summary_op],
                            feed_dict={
                                self.images: t_images,
                                self.labels: t_labels,
                                self.keep_prob_holder: 1.0
                            })
                        t_duration = (time.time() - t_start_time)
                        print('%s: testing %d, loss = %f (%.3f sec/batch)' %
                              (datetime.now(), i, t_loss, t_duration))
                        print(t_evals)
                        temp_eval['loss'] += t_loss
                        for name in self.eval_names:
                            temp_eval[name] += t_evals[name]
                    for key, value in temp_eval.items():
                        temp_eval[key] /= float(n_batch)
                    print('testing finished.')
                    print(temp_eval)
                    if self.do_plot:
                        self.plot.plot_test(step, temp_eval['loss'], 0)
                        if 'precision' in temp_eval:
                            self.plot.plot_test(step, temp_eval['precision'],
                                                1)
                        if 'recall' in temp_eval:
                            self.plot.plot_test(step, temp_eval['recall'], 2)
                        if 'dice' in temp_eval:
                            self.plot.plot_test(step, temp_eval['dice'], 3)
                        elif 'f1' in temp_eval:
                            self.plot.plot_test(step, temp_eval['f1'], 3)
        # self.sess.close()
        if self.do_plot:
            self.plot.save_fig()
        return

    def forward(self, input):
        '''

        :param input:
        :return:
        '''
        if len(input.shape) == 1:
            input.shape = [
                int(input.shape[0] / self.width / self.height / self.channel),
                self.width, self.height, self.channel
            ]
        elif len(input.shape) == 3:
            input.shape = [
                int(input.shape[0] / self.channel), input.shape[1],
                input.shape[2], self.channel
            ]
        i = 0
        if self.label_type == 'binary':
            predict = np.zeros([input.shape[0], 1, 1, 1], dtype=np.float32)
        elif self.label_type == 'array':
            predict = np.zeros([input.shape[0], 1, 1, self.label_len],
                               dtype=np.float32)
        else:
            predict = np.zeros(input.shape, dtype=np.float32)
        while i < input.shape[0]:
            images = input[i:i + self.test_batch_size, :, :, :]
            if self.aug is not None:
                images = self.aug.process(images)
            predict_temp = self.sess.run([self.predicts['out']],
                                         feed_dict={
                                             self.images: images,
                                             self.keep_prob_holder: 1.0
                                         })
            predict[i:i + self.test_batch_size, :, :, :] = predict_temp[0]
            i += self.test_batch_size
        return predict
Exemple #11
0
                    type=int,
                    default=640,
                    help='the width of the input image to network')
parser.add_argument(
    '--input_vid',
    default=None,
    help='Input video file to process. Training will be turned off.')

opt = parser.parse_args()
print(opt)
print('=============================================================')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Object for class with visualization functions
plotter = Plot()

torch.manual_seed(opt.manual_seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(opt.manual_seed)
np.random.seed(opt.manual_seed)

# Create reader to process input video if provided
# Training will be turned off in this case
if opt.input_vid is not None:
    if not os.path.exists(opt.input_vid):
        sys.exit('Error: ' + opt.input_vid + ' file does not exist.')
    reader = imageio.get_reader(opt.input_vid)
    opt.image_width, opt.image_height = reader.get_meta_data()['size']
    print('Video reader created. Frame Size: ({}, {})'.format(
        opt.image_height, opt.image_width))
Exemple #12
0
from service.calendar import Calendar


server_ip = "http://140.115.87.197:8090/"
cal = Calendar('TW')

payloads = {
    'ticker_list': ['1524'],
    'start_date': cal.get_trade_date('2010-01-01', (1+30)*-1, 'd'),
    'end_date': cal.get_trade_date('2010-03-31', 1, 'd'),
}
response = requests.get(server_ip+"stk/get_ticker_period_stk", params=payloads)
stk_dict = json.loads(response.text)['result']

stk_df = pd.DataFrame(stk_dict[ticker_list[0]])
stk_df['date'] = [datetime.datetime.strptime(elm, "%Y-%m-%d") for elm in stk_df['date']]
stk_df.set_index("date", inplace=True)
stk_df.columns = ['Close', 'High', 'Low', 'Open', 'Volume', 'outstanding_share']
stk_df = stk_df.drop('outstanding_share', axis=1)
stk_df = stk_df.dropna()
print(stk_df)

up_band, mid, down_band = BBANDS(
    stk_df['Close'], timeperiod=30,
    nbdevup=1.5, nbdevdn=1.5,
    matype=0
)

plot = Plot()
plot.plot_candlestick(df=stk_df, addplot_list=[up_band, mid, down_band])
            classifier_running_losses[2]))

        print("NN CLASSIFIER TEST ACCURACY:               {0:.2f}%".format(
            100.0 * accuracies[0]))
        print("NN SYNTHETIC CLASSIFIER TEST ACCURACY:     {0:.2f}%".format(
            100.0 * accuracies[1]))
        print("NN MIXED CLASSIFIER TEST ACCURACY:         {0:.2f}%\n\n".format(
            100.0 * accuracies[2]))

        # Loss histories
        for i in range(num_classifiers):
            classifier_losses[i].append(classifier_running_losses[i])
            classifier_running_losses[i] = 0.0

        Plot.plot_histories(
            classifier_losses,
            ['[REAL] Loss', '[SYNTHETIC] Loss', '[REAL + SYNTHETIC] Loss'],
            "{0}loss_histories".format(args.output_dir))
        Plot.plot_histories(classifier_accuracies, [
            '[REAL] Test Accuracy', '[SYNTHETIC] Test Accuracy',
            '[REAL + SYNTHETIC] Test Accuracy'
        ], "{0}accuracy_histories".format(args.output_dir))

        # Save model at checkpoint
        torch.save(classifiers[0].state_dict(),
                   "{0}nn_classifier".format(args.output_dir))
        torch.save(classifiers[1].state_dict(),
                   "{0}synthetic_nn_classifier".format(args.output_dir))
        torch.save(classifiers[2].state_dict(),
                   "{0}mixed_nn_classifier".format(args.output_dir))

# Save final NN classifier results to results_f:
Exemple #14
0
                                       running_batch_elapsed_time) / 60.0

        print(
            "===== TRAINING STEP {} | ~{:.0f} MINUTES REMAINING =====".format(
                training_step, estimated_minutes_remaining))
        print("CRITIC LOSS:     {0}".format(running_critic_loss))
        print("GENERATOR LOSS:  {0}\n".format(running_generator_loss))

        # Loss histories
        critic_losses_per_vis_interval.append(running_critic_loss)
        generator_losses_per_vis_interval.append(running_generator_loss)
        running_critic_loss = 0.0
        running_generator_loss = 0.0

        Plot.plot_histories(
            [critic_losses_per_vis_interval], ["Critic"],
            "{0}critic_loss_history.png".format(MODEL_OUTPUT_DIR))
        Plot.plot_histories(
            [generator_losses_per_vis_interval], ["Generator"],
            "{0}generator_loss_history.png".format(MODEL_OUTPUT_DIR))

        # Save model at checkpoint
        torch.save(generator.state_dict(),
                   "{0}generator".format(MODEL_OUTPUT_DIR))
        torch.save(critic.state_dict(), "{0}critic".format(MODEL_OUTPUT_DIR))

        # Upsample and save samples
        sample_tags = brainpedia.preprocessor.decode_label(
            labels_batch.data[0])
        real_sample_data = real_brain_img_data_batch[0].cpu().data.numpy(
        ).squeeze()
Exemple #15
0
class ClusterTs:
    """Classe disposant des methodes de transformations et de manipulations des donnees a des fins de partitionnements

    classe mere de:
        * :class:`kmean`
        * :class:`kshape`

    Parameters:
        * ss : SeriesSupp
            instance du manager de series temporelles

    Variables:
        * ts: Array[[[float]]]
            les series temporelle au format desiree pour le clustering
        * ts_clust: Array[int]
            Chaque entier est selon son index le cluster auquel appartient l'index referant de *ts*
        * ts_name: Array[String]
            Nom de la serie temporelle, du capteur a sa granularite (annee, mois, semaine)
        * ss: SeriesSupp
            instance du manager de series temporelles
        * sampler: int
            Taille du sampling :func:`sampler`
        * ploter: :class:Plot
            Instance d'un objet d'affichage
        * n: int
            Nombre de cluster
        * capteurs_names: Array[String]
            Nom de la serie temporelle, du capteur a sa granularite (annee, mois, semaine) *Bientot supprime*
        * from_save: Bool
            True si les infos sont recuperees d'un cluster sauvegarde
        * proto: Array[[[float]]]
            Prototype de chaque cluster
        * last_readed: {Dict}
            Informations recuperer depuis le fichier 'Pickle' sauvegarde du cluster etudier
        * store_path: String
            Chemin vers le dossier de stockage des sauvegardes.
            N'est plus utilise depuis l'implementation d'une boite de dialogue pour la recherche de fichier de sauvegarde
        * name_file: String
            Chemin absolue vers fichier 'Pickle'
        * clust_name: String
            Nom de la technique de clustering de l'instance
        * metric: String
            Nom de la technique de clacul de distance de l'instance
        * geo: :class:Geo
            Instance Geo
        * cluster_by_name: {Dict}
            Clustering des series temporelles uniquement par le nom des capteurs sans redondance
        * cluster_by_fullname: {Dict}
            Clustering des series temporelles uniquement par le nom des capteurs et leurs granularite
        * size_min: int
            Taille minimale d'une serie pour etre garde lors du preprocessing
        * nb_capteur: {Dict}
            Clustering des series temporelles uniquement par le nom des capteurs redondance
        * nb_week: {Dict}
            Lors d'un decoupage en semaine, represente la redondance par capteur des semaines

    Example:
        See: Cluster_engine.ipynb

    Notes:
        *Dependencies*:
            - tslearn
            - pandas
            - Pickle
    """
    def __init__(self, ss):
        self.ts = None
        self.ts_clust = None
        self.ts_name = None
        self.ss = ss
        self.sampler = 168  # 24/d - 168/w - 744[31](720[30]-696[29]-672[28])/m - 8760(8784)/y
        self.ploter = Plot(self)
        self.n = 5
        self.capteurs_names = []
        self.from_save = False
        self.proto = []
        self.last_readed = {}
        self.store_path = "cluster/13_06/"
        self.name_file = None
        self.clust_name = "Master"
        self.metric = ""
        self.geo = Geo(self.ss.cwd)
        self.cluster_by_name = {}
        self.cluster_by_fullname = {}
        self.size_min = 0
        self.nb_capteur = []
        self.nb_week = []
        #self.read_txt_line_info = {}

    def __repr__(self):
        """
        Representation de l'instance via une chaine de caracteres explicative.

        Parameters:
            NA

        Returns:
            my_repr : str
                representation.
        """
        my_repr = [
            "Algorithm de clustering: " + self.clust_name,
            "Metric mesure: " + self.metric,
            "Espace de stockage: " + self.store_path,
            "Nombre de Clusters: " + str(self.n),
            "Sampler de taille : " + str(self.sampler)
        ]
        return '\n'.join('%s' % v for v in my_repr)

    def tslearn_format_export(self, other_data=None):
        """
        Export la variable data vers le format utilise par tslearn pour la partitionnements

        Parameters:
            NA

        Returns::
            NA
        """
        df = []
        dn = []
        if self.ss.days:
            size_max = 170
        else:
            size_max = 750
        if other_data != None:
            data_dict = other_data
        else:
            data_dict = self.ss.get_data()
        for k, v in data_dict.items():
            if not self.check_equal(v["Valeur"].values):
                if len(v["Valeur"].values) > self.size_min and len(
                        v["Valeur"].values) < size_max:
                    df.append(v["Valeur"].values)
                    dn.append(k)
                    self.capteurs_names.append(k)
        df_set = to_time_series_dataset(df)
        if self.sampler != 0:
            df_set = TimeSeriesResampler(self.sampler).fit_transform(df_set)
        self.ts = df_set
        self.ts_name = dn

    def set_size_min(self, size):
        """
        Set taille minimale d'une TS pour etre gardee

        Parameters:
            * size: int
                Taille minimale

        Returns:
            NA
        """
        self.size_min = size

    def check_equal(self, iterator):
        """
        Verifie si la TS reste tout le temps sur la meme valeur

        Parameters:
            * iterator: iterator
                la TS

        Returns:
            Bool
        """
        iterator = iter(iterator)
        try:
            first = next(iterator)
        except StopIteration:
            return True
        return all(first == rest for rest in iterator)

    def show_info(self):
        """
        Affiche les informations recuperees depuis le txt d'info de la sauvegarde cluster lie a l'instance

        Parameters:
            NA

        Returns:
            NA
        """
        file = open(str(self.name_file[:-4]) + ".txt", "r")
        print(file.read())
        file.close()
        #i = 0
        #with open(str(self.store_path) + str(self.name_file) + ".txt", "r") as f:
        #    self.read_txt_line_info[i] = f.readlines()
        #    i += 1

    def store_cluster(self, name):
        """
        Sauvegarde sur forme de fichier pickle associe a un txt d'information la partitionnement actuelle de l'instance

        Parameters:
            * name: String
                Nom du fichier, represente les parametre principaux de la partitionnement

        Returns:
            NA
        """
        info_dict = {}
        info_dict["trace"] = self.ts
        info_dict["classe"] = self.ts_clust
        info_dict["name"] = self.ts_name
        info_dict["proto"] = self.km.cluster_centers_
        info_dict["sample"] = self.sampler
        info_dict["years"] = self.ss.years
        info_dict["months"] = self.ss.months
        info_dict["days"] = self.ss.days
        info_dict["size_min"] = self.size_min
        info_dict["round"] = self.ss.rounded
        info_dict["smoothed"] = self.ss.smoothed

        outfile = open(self.store_path + name + ".pkl", "wb")
        pickle.dump(info_dict, outfile)
        outfile.close()

        file = open(self.store_path + name + ".txt", "w")
        file.write(str([i for i in self.ss.years]) + "\n")
        file.write(str([i for i in self.ss.months]) + "\n")
        file.write("Weeks split: " + str(self.ss.days) + "\n")
        file.write("Normalized: " + str(self.ss.norm) + "\n")
        file.write("min size of TS selected: " + str(self.size_min) + "\n")
        file.write("Sample size(0=None): " + str(self.sampler) + "\n")
        file.write("Algorithm used: " + str(self.clust_name) + "\n")
        file.write("nb cluster: " + str(self.n) + "\n")
        file.write("Distance measure: " + str(self.metric) + "\n")
        file.write("Rounded values: " + str(self.ss.rounded) + "\n")
        file.write("smoothed values: " + str(self.ss.smoothed) + "\n")
        file.close()

    def read_cluster(self, path=""):
        """
        Ouvre et recupere toutes les informations d'un fichier pickle(sauvegarde d'un clustering) et update les variable de l'instance pour correspondre

        Parameters:
            * path: String
                Chemin d'acces au fichier

        Returns:
            NA
        """
        infile = open(str(path), 'rb')
        info_dict = pickle.load(infile)
        infile.close()
        self.store_path = path
        self.name_file = path
        self.ts = info_dict["trace"]
        self.ts_clust = info_dict["classe"]
        self.ts_name = info_dict["name"]
        self.capteurs_names = info_dict["name"]
        self.proto = info_dict["proto"]
        self.n = len(info_dict["proto"])
        self.sampler = info_dict["sample"]
        self.from_save = True
        self.last_readed = info_dict
        try:
            self.ss.years = info_dict["years"]
            self.ss.months = info_dict["months"]
            self.ss.days = info_dict["days"]
        except:
            pass
        try:
            self.ss.rounded = info_dict["round"]
        except:
            self.ss.rounded = "no information"
        try:
            self.ss.smoothed = info_dict["smoothed"]
        except:
            self.ss.smoothed = "no information"

    def get_cluster_n(self, n):
        """
        Retourne les TS d'un cluster **n**

        Parameters:
            * n: int
                Numero de cluster souhaite

        Returns:
            res: Array[float]
                Ensemble des TS du cluster
        """
        res = []
        for xx in self.ts[self.ts_clust == n]:
            res.append(xx)
        return res

    def capteur_parser(self):
        """
        Parser des noms de capteurs, pour pouvoir garder en memoire les nom des capteur et leur extension de date selon la TS

        Parameters:
            NA

        Returns:
            NA
        """
        res = {}
        res_full = {}
        nb_capteur = {}
        nb_week = {}
        for i in range(0, self.n):
            res[i], res_full[i], nb_capteur[i], nb_week[i] = [], [], [], []
        for elmt in self.ts_name:
            non_parse = str(elmt)
            parse = str(elmt[0:2] + elmt[3:6])
            if parse not in res[self.ts_clust[self.ts_name.index(elmt)]]:
                res[self.ts_clust[self.ts_name.index(elmt)]].append(parse)
            nb_capteur[self.ts_clust[self.ts_name.index(elmt)]].append(parse)
            nb_week[self.ts_clust[self.ts_name.index(elmt)]].append(
                elmt[-2:].replace("_", "0"))
            res_full[self.ts_clust[self.ts_name.index(elmt)]].append(non_parse)
        self.cluster_by_name = res
        self.cluster_by_fullname = res_full
        self.nb_capteur = nb_capteur
        self.nb_week = nb_week

    def get_part_of_ts(self, data, elmt):
        """
        Selon les Parameters d'elmt retrouve une partie des donnes depuis data

        Parameters:
            * data: {Dict}
                Donnee depuis les quelles on souhaite recuperer une partie precise
            * elmt: {Dict}
                Information liee a la demande (date)

        Returns:
            res_ts: Array[float]
                TS souhaitee
        """
        res_ts = data[elmt["capteur"]].copy()
        res_ts = res_ts.set_index("Date")
        if elmt["week"] and not elmt["month"]:
            res_ts = res_ts[str(elmt["year"])]
            res_ts = res_ts.groupby(pd.Grouper(freq='W'))
            for i in res_ts:
                if i[0].week == elmt["week"]:
                    res_ts = i[1]
        elif elmt["week"] and elmt["month"]:
            res_ts = res_ts[str(elmt["year"]) + "-" + str(elmt["month"])]
            res_ts = res_ts.groupby(pd.Grouper(freq='W'))
            for i in res_ts:
                if i[0].week == elmt["week"]:
                    res_ts = i[1]
        elif elmt["month"]:
            res_ts = res_ts[str(elmt["year"]) + "-" + str(elmt["month"])]
        else:
            res_ts = res_ts[str(elmt["month"])]
        res_ts = res_ts.reset_index()
        res_ts = self.ss.normalize(res_ts)
        return res_ts

    def clust_hoverview_rng(self, n):
        """
        DEPRECATED: Tire une TS random d'un cluster **n** pour se donner une idee des membres de ce dernier

        Parameters:
            * n: int
                Le cluster numero n

        Returns:
            NA
        """
        #r_RG, r_GW = ss.SeriesSupp(cwd, self.ss.factory, "RG24"), ss.SeriesSupp(cwd, factory, "GW")
        rng_elmt = self.cluster_by_fullname[n][0]
        elmt = self.parse_capteur_split(rng_elmt)
        gw = self.get_part_of_ts(self.ss.dataset, elmt)
        elmt2 = elmt.copy()
        elmt2["capteur"] = "24h_RG007"  # EN DUR TROUVER LE PLUS PROCHE
        rg = self.get_part_of_ts(self.ss.factory.get_RG24(), elmt2)
        self.ploter.plot_single_scatter({
            elmt["capteur"]: gw,
            elmt2["capteur"]: rg
        })

    def clust_hoverview(self, n):
        """
        Affiche les TS d'un cluster **n** donnee

        Parameters:
            * n: int
                cluster selectionne

        Returns:
            NA
        """
        elmt_clust = self.cluster_by_fullname[n]
        all_clust_origin_ts = {}
        for elmt in elmt_clust:
            parse = self.parse_capteur_split(elmt)
            #print(parse)
            all_clust_origin_ts[elmt] = self.get_part_of_ts(
                self.ss.dataset, parse)
        self.ploter.plot_scatter(all_clust_origin_ts)

    def parse_capteur_split(self, elmt):
        """
        Recupere les information d'une TS depuis son nom comme le nom de son capteur et la date

        Parameters:
            * elmt: String
                Nom du capteur avec info

        Returns:
            res: {Dict}
                Les infos decoupes et range dans un dico
        """
        elmt = elmt.split("_")
        capteur = elmt[0] + "_" + elmt[1]
        year = int(elmt[2])
        if len(elmt) > 3 and not self.ss.days:
            month = int(elmt[3])
        else:
            month = 0
        if len(elmt) > 4:
            week = int(elmt[4])
        else:
            week = 0
        if len(elmt) > 3 and self.ss.days:
            week = int(elmt[3])
        else:
            week = 0
        res = {}
        res["capteur"], res["year"], res["month"], res[
            "week"] = capteur, year, month, week
        return res

    def highlight_max(self, s):
        """
        Parametre d'affichage surligne les max par ligne de DataFrame

        Parameters:
            * s: pandas
                Ligne du tableau

        Returns:
            unnamed: pands.style
                Affichage des max
        """
        is_max = s == s.max()
        return ['background-color: red' if v else '' for v in is_max]

    def style_df(self, opt, t):
        if opt == "max":
            t_style = t.style.apply(self.highlight_max, axis=1)
            return t_style

    def get_captor_distribution_in_cluster(self):
        """
        Retourne le nombre d'occurance des cpateur dans chacun des clusters

        Parameters:
            NA

        Returns:
            unnamed: DataFrame
                Tableau d'occurance
        """
        tot = {}
        for k, v in self.nb_capteur.items():
            tot[k] = Counter(v)
        return pd.DataFrame(tot)

    def get_ts_by_captor(self, cpt):
        """
        recupere les TS pour capteur **cpt** donne et leur distribution au sein des cluster

        Parameters:
            * cpt: String
                Capteur target

        Returns:
            res: tuple(String, {Dict})
                String represente le capteur et le dictionnaires la distribution des sous TS dans chaque clusters (key = cluster)
        """
        res = (cpt, {})
        i = 0
        for elmt in range(len(self.proto)):
            res[1][i] = []
            i += 1
        i = 0
        for string in self.ts_name:
            if cpt in string:
                res[1][self.ts_clust[i]].append([string, self.ts[i].ravel()])
            i += 1
        return res

    def get_clust_part_for_captor(self, cpt):
        """
        recupere les cluster pour capteur **cpt** donne et leur distribution au sein des cluster

        Parameters:
            * cpt: String
                Capteur target

        Returns:
            res: String
                Seulement les noms des date pour chacun des clusters
        """
        res = (cpt, {})
        i = 0
        for elmt in range(len(self.proto)):
            res[1][i] = []
            i += 1
        i = 0
        for string in self.ts_name:
            if cpt in string:
                res[1][self.ts_clust[i]].append(string)
            i += 1
        return res

    def aff_color(self):
        """
        Affiche les couleurs utilise dans le clustering
        """
        c = COLORS[:self.n + 1]
        df = pd.DataFrame({'colors': c})
        print(df.T)
Exemple #16
0
    def run_pipeline(self):
        """
        run_pipeline function runs the actual pipeline.
        :return:
        """

        # Train & Test data split using sklearn train_test_split module
        X_train, X_test, y_train, y_test = train_test_split(
            self.data['url'],
            self.data['label'],
            test_size=0.33,
            random_state=21,
            stratify=self.data['label'])
        print(
            "*******************\nTrain set : {} \n Test set : {}\n*******************\n"
            .format(X_train.shape[0], X_test.shape[0]))

        # Running the pipeline
        model = self.pipeline.fit(X_train, y_train)

        print('Saving the {} model after fitting on training data.'.format(
            str(self.args.model).upper()))
        # Dumping tokenizer
        joblib.dump(
            model,
            os.path.join(self.args.checkpoint_dir,
                         '{}.pickle'.format(self.args.model)))

        # Calculating time per prediction
        # Start time ******************************************************************************
        start = timeit.default_timer()

        # Predicting label, confidence probability on the test data set
        predictions = model.predict(X_test)
        predictions_prob = model.predict_proba(X_test)

        # Binary class values : rounding them to 0 or 1
        predictions = [round(value) for value in predictions]

        end = timeit.default_timer()
        # End Time ******************************************************************************

        print('Time per prediction : {}'.format(
            (end - start) / X_test.shape[0]))

        # evaluate predictions using accuracy metrics
        accuracy = accuracy_score(y_test, predictions)
        print('{} Classification'.format(self.args.model))
        print("Accuracy: %.2f%%" % (accuracy * 100.0))

        # evaluate predictions using confusion metrics and plot confusion matrix
        classification_report = metrics.classification_report(
            predictions, y_test, target_names=['NadaSportswear', 'Sportswear'])
        print(classification_report)

        # Plotting confusion matrix
        cnf_matrix = confusion_matrix(y_test, predictions)
        np.set_printoptions(precision=2)

        # Plot module is used for plotting confusion matrix, classification report
        plot = Plot()
        plot.plotly(cnf_matrix, classification_report, self.args.save_dir,
                    self.args.model)