def test_normal_against_creme(): try: from creme.preprocessing import StandardScaler xs = list(np.random.randn(100)) machine = RunningVariance() scalar = StandardScaler() for x in xs: machine.update(value=x) scalar.fit_one({'x': x}) var1 = np.var(xs) var2 = machine.var() var3 = variance(xs) var4 = pvariance(xs) var5 = scalar.vars['x'] var6 = machine.pvar() print([var1, var2, var3, var4, var5, var6]) assert abs(var2 - var3) < 0.0001 assert abs(var5 - var1) < 0.0001 assert abs(var5 - var6) < 0.0001 except ImportError: pass
args = vars(ap.parse_args()) # construct our data dictionary which maps the data types of the # columns in the CSV file to built-in data types print("[INFO] building column names...") types = {"feat_{}".format(i): float for i in range(0, args["cols"])} types["class"] = int # create a CSV data generator for the extracted Keras features dataset = stream.iter_csv(args["csv"], target_name="class", types=types) # construct our pipeline model = Pipeline([ ("scale", StandardScaler()), ("learn", OneVsRestClassifier(binary_classifier=LogisticRegression()))]) # initialize our metric print("[INFO] starting training...") metric = Accuracy() # loop over the dataset for (i, (X, y)) in enumerate(dataset): # make predictions on the current set of features, train the # model on the features, and then update our metric preds = model.predict_one(X) model = model.fit_one(X, y) metric = metric.update(y, preds) print("INFO] update {} - {}".format(i, metric))
help="# of feature columns in the CSV file (excluding class column") args = vars(ap.parse_args()) # construct our data dictionary which maps the data types of the # columns in the CSV file to built-in data types print("[INFO] building column names...") types = {f'feat_{i}': float for i in range(args['num_cols'])} types["class"] = int # create a CSV data generator for the extracted Keras features dataset = stream.iter_csv(filepath_or_buffer=args["csv"], target_name="class", converters=types) # construct our pipeline model = Pipeline(StandardScaler(), OneVsRestClassifier(binary_classifier=PAClassifier())) # initialize our metric print("[INFO] starting training...") metric = ClassificationReport() # loop over the dataset for i, (X, y) in enumerate(dataset): # make predictions on the current set of features, train the # model on the features, and then update our metric preds = model.predict_one(X) model = model.fit_one(X, y) metric = metric.update(y, preds) print("[INFO] update {} - {}".format(i, metric))
def __init__(self, data_collector): dc = data_collector data = dc.get_data_frame() metric = metrics.MAE() # delete NA examples data = data.dropna() # shuffle data X_y = data.sample(frac=1).reset_index(drop=True) data = X_y[['x', 'y', 'theta']].to_dict('records') target_1 = X_y[['sensor_1']] target_2 = X_y[['sensor_3']] target_3 = X_y[['sensor_5']] target_4 = X_y[['sensor_7']] print('constructing models') # construct our pipeline model_1 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_2 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_3 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) # construct our pipeline model_4 = Pipeline([ ("scale", StandardScaler()), ("learn", ensemble.HedgeRegressor([ linear_model.LinearRegression(optim.SGD()), linear_model.LinearRegression(optim.RMSProp()), linear_model.LinearRegression(optim.Adam()) ])) ]) print('start training') for x, y_1, y_2, y_3, y_4 in zip( data, target_1.values, target_2.values, target_3.values, target_4.values, ): model_1, y_pred_1 = self._update_model(model_1, x, y_1) model_2, y_pred_2 = self._update_model(model_2, x, y_2) model_3, y_pred_3 = self._update_model(model_3, x, y_3) model_4, y_pred_4 = self._update_model(model_4, x, y_4) self.models = [model_1, model_2, model_3, model_4] print('done...')
df = pandas.read_sql(query, engine) df = df.drop("index", axis=1) timeList = list(df["time(second)"]) df = df.drop("time(second)", axis=1) x = df.drop("class", axis=1).to_dict(orient="row") y = list(df["class"]) metrics = ( MSE(), Accuracy() ) model = ( StandardScaler() | DecisionTreeClassifier() ) # Mse Accuracy Real outputfile = open('C:\\Users\\YigitCan\\Desktop\\Tez-Workspace\\Real-Time-Big-Data-Analytics\\Elderly Sensor\\Output'+str(session)+'.txt', 'w') previous_time = 0.0 for row, target, time_passed in zip(x, y, timeList): time_range = time_passed - previous_time if time_range != 0.0: time.sleep(time_range) y_pred = model.predict_one(row) model.fit_one(row, target)
argument_parser.add_argument( '-n', '--num-cols', type=int, required=True, help='Number of columns in the feature CSV file (excluding label).') arguments = vars(argument_parser.parse_args()) print('[INFO] Building column names...') types = {f'feature_{i}': float for i in range(arguments['num_cols'])} # Data type per feature types['class'] = int dataset = stream.iter_csv(arguments['train'], target_name='class', types=types) model = Pipeline([('scaler', StandardScaler()), ('learner', OneVsRestClassifier(binary_classifier=PAClassifier()))]) metric = Accuracy() print('[INFO] Training started...') for index, (X, y) in enumerate(dataset): try: predictions = model.predict_one(X) model = model.fit_one(X, y) metric = metric.update(y, predictions) if index % 10 == 0: print(f'[INFO] Update {index} - {metric}') except OverflowError as e:
db = h5py.File(dataset_path, 'r') TRAIN_PROPORTION = 0.8 SPLIT_INDEX = int(db['labels'].shape[0] * TRAIN_PROPORTION) BATCH_SIZE = 256 write_dataset('train.csv', db['features'][:SPLIT_INDEX], db['labels'][:SPLIT_INDEX], BATCH_SIZE) write_dataset('test.csv', db['features'][SPLIT_INDEX:], db['labels'][SPLIT_INDEX:], BATCH_SIZE) FEATURE_SIZE = db['features'].shape[1] types = {f'feature_{i}': float for i in range(FEATURE_SIZE)} types['class'] = int model = StandardScaler() model |= OneVsRestClassifier(LogisticRegression()) metric = Accuracy() dataset = stream.iter_csv('train.csv', target_name='class', converters=types) print('Training started...') for i, (X, y) in enumerate(dataset): predictions = model.predict_one(X) model = model.fit_one(X, y) metric = metric.update(y, predictions) if i % 100 == 0: print(f'Update {i} - {metric}') print(f'Final - {metric}')
df = pandas.read_sql(query, engine) #logging.info("Data retrieved by Session = " + str(session)) logging.info("Data retrieved by all") df = df.drop("index", axis=1) timeList = list(df["time(second)"]) df = df.drop("time(second)", axis=1) x = df.drop("class", axis=1).to_dict(orient="row") y = list(df["class"]) acc = Accuracy() fbeta = MultiFBeta(betas=({1: 0.5, 2: 0.5, 3: 0.5, 4: 0.5})) model = (StandardScaler() | DecisionTreeClassifier()) logging.info("Initial model created") # Mse Accuracy Real recordNumber = len(y) text = "" previous_time = 0.0 logging.info("Learning process has been started") startTime = time.time() for row, target, time_passed in tqdm.tqdm(zip(x, y, timeList)): ''' time_range = time_passed - previous_time if time_range > 0.0: time.sleep(time_range) previous_time = time_passed '''
logging.info("Creating connection") engine = create_engine('mysql+pymysql://root:@localhost/tez') logging.info("Connection is ready") n = 10 acc = Accuracy() fbeta = MultiFBeta(betas=({ 'bike': 0.5, 'sit': 0.5, 'stairsdown': 0.5, 'stairsup': 0.5, 'stand': 0.5, 'walk': 0.5 })) model = (StandardScaler() | KNeighborsClassifier()) logging.info("Initial model created for phone type " + phoneType) #modelName = ["nexus4", "s3", "s3mini", "samsungold"] classHistory = [] classNum = 6 trainFlag = True startTime = time.time() previousTime = None logging.info("Learning stage started with total step of " + str(stepNumber)) for step in tqdm.tqdm(range(stepNumber + 1)): logging.info("Data retrieved at step " + str(step + 1) + "/" + str(stepNumber + 1)) if step < stepNumber: query = "select x,y,z,gt from " + tableName + "_" + phoneType + " where id >= " + str( step * dataPackageLimit) + " and id <= " + str(