def test_should_be_return_number_intances(self): instances = '7' response = '{ "pagination": { "offset": 100000000, "total": 7, "page_size": 10 }, "pipelines": []}' pipeline = Pipeline() self.assertEquals(pipeline.get_instance(response), instances)
def test_should_be_return_pipeline_status_Passed(self): path = os.getcwd() response_fine_name = 'instance_pipeline_passed.json' response_file = path + '/test/resources/' + response_fine_name response = open(response_file, 'r').read() pipeline = Pipeline() self.assertEquals(pipeline.get_status(response), 'Passed')
def tuning(hp): import tensorflow as tf pipeline = Pipeline(tfrecords_filenames=hp["tfrecords_filenames"]) train_dataset = pipeline.get_train_data(int(hp["batch_size"])) val_dataset = pipeline.get_val_data(int(hp["batch_size"])) model = keras_model.create_model( learning_rate=float(hp["lr"]), dense_1=int(hp["dense_1"]), dense_2=int(hp["dense_2"])) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( "model.h5", monitor='loss', save_best_only=True, save_freq=2) callbacks = [checkpoint_callback, TuneReporterCallback()] model.fit( train_dataset, validation_data=val_dataset, verbose=1, epochs=int(hp["epochs"]), callbacks=callbacks)
def task_retrain_model(): try: logging.info("Query data from database") sql2tfrecord = PostgreSQL2Tfrecord() data = sql2tfrecord.query_db() formated_data = sql2tfrecord.format_data(data) del data gc.collect() sql2tfrecord.write2tfrecord( data=formated_data, filename=os.path.join( package_dir, "..", "data", "data.tfrecord")) except Exception as e: logging.error("Error in process data to tfrecord: {}".format(e)) return 0 logging.critical("Writed data to tfrecord") try: logging.info("Start initializing training process") pipeline = Pipeline( tfrecords_filenames=os.path.join( package_dir, "..", "data", "data.tfrecord")) train_keras_model = TrainKerasModel(pipeline=pipeline) hyperparameter_space.update({ "tfrecords_filenames": os.path.join( package_dir, "..", "data", "data.tfrecord") }) except Exception as e: logging.error( "Error in initializing training process: {}".format(e)) return 0 try: logging.info("Start Searching Best Model") best_model = train_keras_model.get_best_model( hyperparameter_space=hyperparameter_space, num_samples=num_samples) except Exception as e: logging.error("Error in searching best model: {}".format(e)) return 0 try: logging.info("Start saving model") result = train_keras_model.save_model( model=best_model, filename=os.path.join( models_dir, str(int(time.time())))) except Exception as e: logging.error("Error in saving model: {}".format(e)) logging.critical("Retrain Finish. Training result: {}".format(result))
def _setup_pipelines(self): self.pipelines = {} print self.configuration for pipeline_group in self.configuration.cruise.pipelines: for pipeline in pipeline_group.pipeline: stages = [] for stage in pipeline.stage: pipeline_name = pipeline.get_attribute("name") stages.append(stage.get_attribute("name")) self.pipelines[pipeline_name] = Pipeline(pipeline_name, stages)
def run_video(input_filepath, detector_config, output_filepath=None): """ Args: input_filepath: input video filepath. Set to 0 for webcam, or other device no. detector_config: path to detector config file. output_filepath: filepath to save result video, set to None to disable saving to disk.Default=None. """ if input_filepath == "0": input_filepath = 0 video_capture = cv2.VideoCapture(input_filepath) # exit if video not opened if not video_capture.isOpened(): logger.error('Cannot open video') sys.exit() # Default resolutions of the frame are obtained.The default resolutions are system dependent. # We convert the resolutions from float to integer. frame_width = int(video_capture.get(3)) frame_height = int(video_capture.get(4)) # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. video_out = None if output_filepath is not None: video_out = cv2.VideoWriter(output_filepath, cv2.VideoWriter_fourcc(*'MPEG'), 20., (frame_width, frame_height)) # init detector device = init_device() detector = setup_detector(detector_config, device) # init tracker tracker = setup_tracker() # init detection pipeline # TODO: pass image_size config pipeline = Pipeline(detector=detector, tracker=tracker, resize_image_size=(300, 300)) # run processing process(video_capture, pipeline, video_out=video_out, headless=False) # When everything is done, release the capture video_capture.release() cv2.destroyAllWindows() # shutdown device detector.close() device.close() device.destroy()
def main(): directory_path, text_path, quotes_path, speech_path, output_path = _get_data_from_cmd( ) quotes_rules = _read_csv(quotes_path, ';') speech_rules = _read_csv(speech_path, ';') quotes_adapter = QuotesAdapter(quotes_rules) speech_detector = SpeechDetector(speech_rules) character_detector = CharacterDetector() pipeline = Pipeline(quotes_adapter, speech_detector, character_detector) if directory_path == None: list_of_textfiles = [text_path] else: list_of_textfiles = _get_list_of_text_files(directory_path) for textpath in list_of_textfiles: text = _read_file(textpath) text = pipeline.apply_to(text) _write_to_file(output_path, directory_path, textpath, text)
print(f"OOB score = {rf.oob_score_}") oob_diff.append(rf.score(X_train, y_train) - rf.oob_score_) oob.append(rf.oob_score_) ax.plot(oob_diff, color='red') ax.plot(oob, color='blue') ax.set_title("Reducing OOB Error by limiting max_depth") plt.savefig('images/oob.png') if __name__ == '__main__': # energy_df = pd.read_csv('data/energy_dataset.csv',index_col=0, parse_dates=[0]) # weather_df = pd.read_csv('data/weather_features.csv',index_col=0, parse_dates=[0]) print("Loading Data") # read in files from s3 bucket energy = Pipeline('s3://ajzcap2/energy_dataset.csv') weather = Pipeline('s3://ajzcap2/weather_features.csv') #make index a datetime object energy.my_reset_index() weather.my_reset_index() # Drop columns weather_drop_cols = [ 'weather_icon', 'weather_main', 'weather_id', 'temp_min', 'temp_max' ] energy_drop_cols = [ 'generation fossil coal-derived gas', 'generation fossil oil shale', 'generation fossil peat', 'generation geothermal', 'generation marine', 'generation hydro pumped storage aggregated', 'forecast wind offshore eday ahead', 'generation wind offshore',
from src.pipeline import Pipeline from src.train import TrainKerasModel from tests.resources.test_data import ( test_hyperparameter_space, test_simple_train_hyperparameters) import os import tensorflow as tf import shutil package_dir = os.path.dirname(os.path.abspath(__file__)) pipeline = Pipeline( tfrecords_filenames=os.path.join( package_dir, "resources", "test_data.tfrecord")) train_keras_model = TrainKerasModel(pipeline=pipeline) class TestTrainKerasModel: def test_simple_train(self): model = train_keras_model.simple_train( test_simple_train_hyperparameters) assert model is not None def test_get_best_model(self): test_hyperparameter_space.update( {"tfrecords_filenames": os.path.join( package_dir, "resources", "test_data.tfrecord")}) tuned_model = train_keras_model.get_best_model( hyperparameter_space=test_hyperparameter_space, num_samples=1) print(tuned_model) assert tuned_model is not None
from src.pipeline import Pipeline import OpenGL.GL as gl from src.shape import Shape import numpy print("Numpy: {:s}".format(numpy.__version__)) # fg = FrameGrabber() shape = Shape() pipeline = Pipeline(shape, None) pipeline.loadShaderFile('shaders/diffuse.vert', gl.GL_VERTEX_SHADER) pipeline.loadShaderFile('shaders/diffuse.frag', gl.GL_FRAGMENT_SHADER) pipeline.initGl() pipeline.sendData() pipeline.run() # fg.finish()
# matplotlib.use("Agg") from sklearn.model_selection import train_test_split, KFold from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor from sklearn.linear_model import Lasso, LassoCV, Ridge, LinearRegression from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline as SKPipe from sklearn.inspection import permutation_importance, plot_partial_dependence from sklearn.model_selection import GridSearchCV from src.pipeline import Pipeline from src.helpers import plot_corr_matrix, scree_plot, plot_num_estimators_mse, gridsearch, pdplots, compare_default_models, pca_with_scree, feat_imp_plots, plot_oob_error from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error from statsmodels.stats.outliers_influence import variance_inflation_factor if __name__ == '__main__': df = Pipeline('s3://ajzcap3/spain_data.csv') us = Pipeline('data/us_data.csv') # make spanish data look more like us data drop_cols = [ 'Valencia_wind_speed', 'Madrid_temp', 'Madrid_wind_speed', 'Seville_temp', 'Bilbao_temp', 'Bilbao_wind_speed', ' Barcelona_temp' ] for i in drop_cols: df.df.drop(i, inplace=True, axis=1) # Combine hydro df.df['conventional hydro'] = df.df[ 'generation hydro run-of-river and poundage'] + df.df[ 'generation hydro water reservoir']
def train(self): # TODO not relevant for paper but important X = self.load() x_train, x_test, y_train, y_test, indices_train, indices_test = \ train_test_split( X['data'], X['target'], range(0, len(X['data'])), test_size=0.2, random_state=42) print('data loaded') # order of labels in `target_names` can be different from `categories` target_names = X['target_names'] def size_mb(docs): return sum(len(s.encode('utf-8')) for s in docs) / 1e6 data_train_size_mb = size_mb(x_train) data_test_size_mb = size_mb(x_test) print("%d documents - %0.3fMB (training set)" % (len(x_train), data_train_size_mb)) print("%d documents - %0.3fMB (test set)" % (len(x_test), data_test_size_mb)) print("%d categories" % len(target_names)) print() print( "Extracting features from the training data using a sparse vectorizer" ) t0 = time() if False: vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False, n_features=2**16) X_train = vectorizer.transform(x_train) else: vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english') X_train = vectorizer.fit_transform(x_train) duration = time() - t0 print("done in %fs at %0.3fMB/s" % (duration, data_train_size_mb / duration)) print("n_samples: %d, n_features: %d" % X_train.shape) print() print( "Extracting features from the test data using the same vectorizer") t0 = time() X_test = vectorizer.transform(x_test) duration = time() - t0 print("done in %fs at %0.3fMB/s" % (duration, data_test_size_mb / duration)) print("n_samples: %d, n_features: %d" % X_test.shape) print() # mapping from integer feature name to original token string if False: feature_names = None else: feature_names = vectorizer.get_feature_names() if feature_names: feature_names = np.asarray(feature_names) def trim(s): """Trim string to fit on terminal (assuming 80-column display)""" return s if len(s) <= 80 else s[:77] + "..." # ############################################################################# # Benchmark classifiers def benchmark(clf): print('_' * 80) print("Training: ") print(clf) t0 = time() clf.fit(X_train, y_train) train_time = time() - t0 print("train time: %0.3fs" % train_time) t0 = time() pred = clf.predict(X_test) test_time = time() - t0 print("test time: %0.3fs" % test_time) score = metrics.accuracy_score(y_test, pred) print("accuracy: %0.3f" % score) if hasattr(clf, 'coef_'): print("dimensionality: %d" % clf.coef_.shape[1]) print("density: %f" % density(clf.coef_)) if False and feature_names is not None: print("top 10 keywords per class:") for i, label in enumerate(target_names): top10 = np.argsort(clf.coef_[i])[-10:] print( trim("%s: %s" % (label, " ".join(feature_names[top10])))) print() print("classification report:") print( metrics.classification_report(y_test, pred, target_names=target_names)) print("confusion matrix:") print(metrics.confusion_matrix(y_test, pred)) print() clf_descr = str(clf).split('(')[0] return clf_descr, score, train_time, test_time results = [] for clf, name in ((RidgeClassifier(tol=1e-2, solver="lsqr"), "Ridge Classifier"), (Perceptron(n_iter=50), "Perceptron"), (PassiveAggressiveClassifier(n_iter=50), "Passive-Aggressive"), (KNeighborsClassifier(n_neighbors=10), "kNN"), (RandomForestClassifier(n_estimators=100), "Random forest")): print('=' * 80) print(name) results.append(benchmark(clf)) for penalty in ["l2", "l1"]: print('=' * 80) print("%s penalty" % penalty.upper()) # Train Liblinear model results.append( benchmark(LinearSVC(penalty=penalty, dual=False, tol=1e-3))) # Train SGD model results.append( benchmark( SGDClassifier(alpha=.0001, n_iter=50, penalty=penalty))) # Train SGD with Elastic Net penalty print('=' * 80) print("Elastic-Net penalty") results.append( benchmark( SGDClassifier(alpha=.0001, n_iter=50, penalty="elasticnet"))) # Train NearestCentroid without threshold print('=' * 80) print("NearestCentroid (aka Rocchio classifier)") results.append(benchmark(NearestCentroid())) # Train sparse Naive Bayes classifiers print('=' * 80) print("Naive Bayes") results.append(benchmark(MultinomialNB(alpha=.01))) results.append(benchmark(BernoulliNB(alpha=.01))) print('=' * 80) print("LinearSVC with L1-based feature selection") # The smaller C, the stronger the regularization. # The more regularization, the more sparsity. results.append( benchmark( Pipeline([('feature_selection', SelectFromModel( LinearSVC(penalty="l1", dual=False, tol=1e-3))), ('classification', LinearSVC(penalty="l2"))]))) # make some plots indices = np.arange(len(results)) results = [[x[i] for x in results] for i in range(4)] clf_names, score, training_time, test_time = results training_time = np.array(training_time) / np.max(training_time) test_time = np.array(test_time) / np.max(test_time) plt.figure(figsize=(12, 8)) plt.title("Score") plt.barh(indices, score, .2, label="score", color='navy') plt.barh(indices + .3, training_time, .2, label="training time", color='c') plt.barh(indices + .6, test_time, .2, label="test time", color='darkorange') plt.yticks(()) plt.legend(loc='best') plt.subplots_adjust(left=.25) plt.subplots_adjust(top=.95) plt.subplots_adjust(bottom=.05) for i, c in zip(indices, clf_names): plt.text(-.3, i, c) plt.show() return self._container
sys.path.append('../') from src.pipeline import Pipeline from src.arduino_client import Arduino from src.server import Server while True: protocol = 'http' host = 'localhost' pipeline = 'piarm' gocd = Server(protocol, host, pipeline) arduino = Arduino() instance_request = requests.get(gocd.history_url) pipeline = Pipeline() state_request = requests.get( gocd.pipeline_url + pipeline.get_instance(instance_request.content)) pipeline_status = pipeline.get_status(state_request.content) print pipeline_status if pipeline_status == 'Passed': passed = 'b' arduino.send(passed) else: failed = 'a' arduino.send(failed)