def predict(self, point, last_price): """Point debe ser un Data Frame de Pandas con las información necesaria para realizar la predicción.""" # 1. Standardize point with training mean and standard deviation. test_data = self.__standardize_features_for_test( point, self.columns_to_standardize, self.column_means, self.column_stds) # 2. Add it to the data. df = pd.concat([self.data, test_data]) # 3. Windowize. fmt = DataFormatter() X, Y = fmt.windowize_series(df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 4. Extract the last window. last_window = fmt.get_last_window( df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 5. Compute the error. train_score = self.model.evaluate(X, Y, verbose=0) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) # 6. Make the prediction. prediction = self.model.predict(last_window) # 7. Computing prediction intervals pred_upper = prediction + 1.96 * train_score[1] pred_lower = prediction - 1.96 * train_score[1] # 8. Transform back the prediction. prediction = last_price * np.exp(prediction) pred_upper = last_price * np.exp(pred_upper) pred_lower = last_price * np.exp(pred_lower) return prediction, [pred_lower, pred_upper]
def stormDump(dmonEndpoint, qgte, qlte, qsize, qinterval): ''' :param dmonEndpoint: -> DMON endpoint :param qgte: -> greater than timestamp :param qlte: -> less than timestamp :param qsize: -> query size :param qinterval: -> query interval :return: ''' dmonConnector = Connector(dmonEndpoint) stormTopology = dmonConnector.getStormTopology() bolts = stormTopology['bolts'] spouts = stormTopology['spouts'] print "Detected %s bolts" % str(bolts) print "Detected %s spouts" % str(spouts) qConstructor = QueryConstructor() dformat = DataFormatter(dataDir) storm, storm_file = qConstructor.stormString() print "Query string -> %s" % storm qstorm = qConstructor.stormQuery(storm, qgte, qlte, qsize, qinterval, bolts=bolts, spouts=spouts) print "Query -> %s" % qstorm gstorm = dmonConnector.aggQuery(qstorm) print "Response:" print gstorm dformat.dict2csv(gstorm, qstorm, storm_file)
def process_csv(self, csv_line, updown): data = csv_line.split(',') if self.cli: print ' %s sec \t%s\t%s/sec' % \ (data[self.TIME_RANGE], DataFormatter.format_bytes(data[IPerfTest.TRANSFERED]), DataFormatter.format_bits(data[IPerfTest.SPEED]) ) if updown == 'up': self.process_up_csv(data) else: self.process_down_csv(data)
def fit_model(self, epochs=200, verbose=0): """Entrenar el modelo para producción.""" # Patching self.column_means = {} self.column_stds = {} # Windowize dataset fmt = DataFormatter() self.X, self.Y = fmt.windowize_series( self.data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) self.model.fit(self.X, self.Y, epochs=epochs, batch_size=32, verbose=verbose)
def cassandraDump(dmonEndpoint, qgte, qlte, qsize, qinterval): ''' :param dmonEndpoint: -> DMON endpoint :param qgte: -> greater than timestamp :param qlte: -> less than timestamp :param qsize: -> query size :param qinterval: -> query interval :return: ''' dmonConnector = Connector(dmonEndpoint) qConstructor = QueryConstructor() dformat = DataFormatter(dataDir) roles = dmonConnector.roles() cassandra_hosts = [] for el in roles['Nodes']: for k, v in el.iteritems(): if 'cassandra' in v: print 'Found cassandra role at %s' % k cassandra_hosts.append(k) for host in cassandra_hosts: print "-" * 50 cassandra, cassandra_file = qConstructor.cassandraCounterString(host=host) print "Query string -> %s" % cassandra qcassandra = qConstructor.cassandraQuery(cassandra, qgte, qlte, qsize, qinterval) print "Query -> %s" % qcassandra gcassandra = dmonConnector.aggQuery(qcassandra) print "Response:" print gcassandra dformat.dict2csv(gcassandra, qcassandra, cassandra_file) cassandragauge, cassandragauge_file = qConstructor.cassandraGaugeString(host=host) qcassandragauge = qConstructor.cassandraQuery(cassandragauge, qgte, qlte, qsize, qinterval) print "Query -> %s" % qcassandragauge gcassandragauge = dmonConnector.aggQuery(qcassandragauge) print "Response:" print gcassandragauge dformat.dict2csv(gcassandragauge, qcassandragauge, cassandragauge_file) print "-" * 50
def predict(self, point=None): """Point debe ser un Data Frame de Pandas con las información necesaria para realizar la predicción.""" # 1. Standardize point with training mean and standard deviation. # 2. Add it to the data. if point is None: df = self.data else: test_data = self.__standardize_features_for_test( point, self.columns_to_standardize, self.column_means, self.column_stds) df = pd.concat([self.data, test_data]) # 3. Windowize. fmt = DataFormatter() X, Y = fmt.windowize_series(df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # 4. Extract the last window. last_window = fmt.get_last_window( df.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) last_window = last_window[None, :] # 5. Compute the error. train_score = self.model.evaluate(X, Y, verbose=0) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) # 6. Make the prediction. prediction = np.squeeze(self.model.predict(last_window)) # 7. Computing prediction intervals pred_upper = prediction + 1.96 * train_score[1] pred_lower = prediction - 1.96 * train_score[1] # Revert standardization prediction = prediction * self.column_stds[ u'Close'] + self.column_means[u'Close'] pred_upper = pred_upper * self.column_stds[ u'Close'] + self.column_means[u'Close'] pred_lower = pred_lower * self.column_stds[ u'Close'] + self.column_means[u'Close'] return prediction, pred_lower, pred_upper
def get_summary(self): try: summary = 'Upload:\n' summary += ' %s: %s\t%s: %s\n' % \ ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_up_transferred()), 'Speed'.ljust(11), DataFormatter.format_bits(self.get_up_speed())) summary += ' %s: %s\t%s: %s\n' % \ ('Min'.ljust(11), DataFormatter.format_bits(self.get_up_speed_min()), 'Max'.ljust(11), DataFormatter.format_bits(self.get_up_speed_max())) summary += ' %s: %2.2f%%\n' % ('Jitter'.ljust(11), self.get_up_jitter()) summary += 'Download:\n' summary += ' %s: %s\t%s: %s\n' % \ ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_down_transferred()), 'Speed'.ljust(11), DataFormatter.format_bits(self.get_down_speed())) summary += ' %s: %s\t%s: %s\n' % \ ('Min'.ljust(11), DataFormatter.format_bits(self.get_down_speed_min()), 'Max'.ljust(11), DataFormatter.format_bits(self.get_down_speed_max())) summary += ' %s: %2.2f%%\n' % ('Jitter'.ljust(11), self.get_down_jitter()) return summary except: pass
def print_summary(self): print '-' * IPerfTest.PRINT_WIDTH print '|%s|' % self.description.center(IPerfTest.PRINT_WIDTH - 2) print '|%s|' % 'Summary'.center(IPerfTest.PRINT_WIDTH - 2) print '-' * IPerfTest.PRINT_WIDTH try: print 'Upload:' print ' %s: %s\t%s: %s' % \ ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_up_transferred()), 'Speed'.ljust(11), DataFormatter.format_bits(self.get_avg_up_speed())) print ' %s: %s\t%s: %s' % \ ('Min'.ljust(11), DataFormatter.format_bits(self.total_up_min), 'Max'.ljust(11), DataFormatter.format_bits(self.total_up_max)) print ' %s: %2.2f%%' % ('Jitter'.ljust(11), self.get_up_jitter()) print 'Download:' print ' %s: %s\t%s: %s' % \ ('Transferred'.ljust(11), DataFormatter.format_bytes(self.get_down_transferred()), 'Speed'.ljust(11), DataFormatter.format_bits(self.get_avg_down_speed())) print ' %s: %s\t%s: %s' % \ ('Min'.ljust(11), DataFormatter.format_bits(self.total_down_min), 'Max'.ljust(11), DataFormatter.format_bits(self.total_down_max)) print ' %s: %2.2f%%' % ('Jitter'.ljust(11), self.get_down_jitter()) except: pass
import os import tempfile import traceback import weka.core.jvm as jvm import edeweka.helper as helper from weka.clusterers import Clusterer import weka.core.converters as converters import weka.core.serialization as serialization from dataformatter import DataFormatter import weka.core.packages as packages dataDir = os.path.join(os.path.dirname(os.path.abspath('')), 'data') modelDir = os.path.join(os.path.dirname(os.path.abspath('')), 'models') dformat = DataFormatter(dataDir) dformat.dict2arff(os.path.join(dataDir, 'System.csv'), os.path.join(dataDir, 'System.arff')) #Arff_file = os.path.join(dataDir, 'System.arff') jvm.start(packages=True) data = converters.load_any_file(os.path.join(dataDir, 'System.arff')) clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "10", "-S", "10"]) clusterer.build_clusterer(data) # print clusterer # cluster the data # for inst in data:
def test_model(self, n_splits=9, cv_runs=10, epochs=100, verbose=2): """Evaluación del modelo usando validación cruzada hacia adelante.""" from sklearn.model_selection import TimeSeriesSplit self.metrics = ['MSE', 'RMSE', 'MAE', 'MAPE'] train_scores = np.zeros((cv_runs, n_splits, len(self.metrics))) test_scores = np.zeros((cv_runs, n_splits, len(self.metrics))) fmt = DataFormatter() tscv = TimeSeriesSplit(n_splits=n_splits) for j in xrange(cv_runs): # print('\nCross-validation run %i' % (j+1)) i = 1 for train_index, test_index in tscv.split( self.data['LogReturn'].values): # División del conjunto de datos en entrenamiento y prueba train_df = self.data.loc[train_index] test_df = self.data.loc[test_index] # Estandarización del conjunto de datos if len(self.columns_to_standardize) != 0: train_data, training_means, training_stds = self.__standardize_features( train_df, self.columns_to_standardize) test_data = self.__standardize_features_for_test( test_df, self.columns_to_standardize, training_means, training_stds) else: train_data = train_df test_data = test_df # Extracción de ventanas de datos trainX, trainY = fmt.windowize_series( train_data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) testX, testY = fmt.windowize_series( test_data.as_matrix(), size=self.input_window_size, column_indexes=self.columns_to_windowize) # Ajustando el modelo # print('Fold %i' % (i)) self.model.fit(trainX, trainY, epochs=epochs, batch_size=32, validation_data=(testX, testY), verbose=verbose) # Evaluando cada partición de la validación cruzada hacia adelante train_score = self.model.evaluate(trainX, trainY, verbose=verbose) train_score = np.array([ train_score[0], np.sqrt(train_score[0]), train_score[1], train_score[2] * 100 ]) test_score = self.model.evaluate(testX, testY, verbose=verbose) test_score = np.array([ test_score[0], np.sqrt(test_score[0]), test_score[1], test_score[2] * 100 ]) # print('Train Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE' % (train_score[0], train_score[1], train_score[2], train_score[3])) # print('Test Score: %.5f MSE, %.5f RMSE, %.5f MAE, %.5f%% MAPE\n' % (test_score[0], test_score[1], test_score[2], test_score[3])) # [0: MSE, 1: RMSE, 2: MAE, 3: MAPE] train_scores[j, i - 1, :] = train_score test_scores[j, i - 1, :] = test_score i += 1 self.train_results = train_scores.mean(axis=0).mean(axis=0) self.test_results = test_scores.mean(axis=0).mean(axis=0)
import os, sys from dataformatter import DataFormatter from pyQueryConstructor import QueryConstructor from edeconnector import Connector if __name__ == '__main__': dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') prometheus_endpoint = '194.102.62.155' prometheus_port = '9090' print("Collecting data from Monitoring at: {}".format(prometheus_endpoint)) prometheus_query = {"query": '''{__name__=~"node.+"}[80m]'''} # prometheus_query = qContructor.pr_query_node(time="1h") edeConnector = Connector(prEndpoint=prometheus_endpoint, MInstancePort=prometheus_port) test = edeConnector.pr_targets() print("Current target information:") print(test) test1 = edeConnector.pr_labels('cpu') print(test1) test2 = edeConnector.pr_status() print("Status information") print(test2) print("Executing query ....") test3 = edeConnector.pr_query(query=prometheus_query) dformat = DataFormatter(dataDir) print("Query completed ....") print("Saving ...") test_format = dformat.prtoDF(test3, checkpoint=True, verbose=True) print("Saved")
dataDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') modelDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models') #Standard query values # qte = 1475842980000 # qlte = 1475845200000 qgte = 1477911300000 qlte = 1477914720000 qsize = 0 qinterval = "10s" dmonEndpoint = '85.120.206.27' dmonConnector = Connector(dmonEndpoint) qConstructor = QueryConstructor() dformat = DataFormatter(dataDir) nodeList = dmonConnector.getNodeList() interval = dmonConnector.getInterval() if int(qinterval[:-1]) < interval['System']: logger.warning('[%s] : [WARN] System Interval smaller than set interval!', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) # per slave unique process name list nodeProcessReduce = {} nodeProcessMap = {} # Get host based metrics for node in nodeList:
modelDir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models') #Standard query values # qte = 1475842980000 # qlte = 1475845200000 qgte = 1477911300000 qlte = 1477914720000 qsize = 0 qinterval = "10s" dmonEndpoint = '85.120.206.27' dmonConnector = Connector(dmonEndpoint) qConstructor = QueryConstructor() dformat = DataFormatter(dataDir) nodeList = dmonConnector.getNodeList() interval = dmonConnector.getInterval() if int(qinterval[:-1]) < interval['System']: logger.warning( '[%s] : [WARN] System Interval smaller than set interval!', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) # per slave unique process name list nodeProcessReduce = {} nodeProcessMap = {} # Get host based metrics for node in nodeList: