def test_normal_against_creme():
    try:
        from creme.preprocessing import StandardScaler
        xs = list(np.random.randn(100))
        machine = RunningVariance()
        scalar = StandardScaler()
        for x in xs:
            machine.update(value=x)
            scalar.fit_one({'x': x})
        var1 = np.var(xs)
        var2 = machine.var()
        var3 = variance(xs)
        var4 = pvariance(xs)
        var5 = scalar.vars['x']
        var6 = machine.pvar()
        print([var1, var2, var3, var4, var5, var6])
        assert abs(var2 - var3) < 0.0001
        assert abs(var5 - var1) < 0.0001
        assert abs(var5 - var6) < 0.0001
    except ImportError:
        pass
예제 #2
0
args = vars(ap.parse_args())



# construct our data dictionary which maps the data types of the
# columns in the CSV file to built-in data types
print("[INFO] building column names...")
types = {"feat_{}".format(i): float for i in range(0, args["cols"])}
types["class"] = int

# create a CSV data generator for the extracted Keras features
dataset = stream.iter_csv(args["csv"], target_name="class", types=types)

# construct our pipeline
model = Pipeline([
    ("scale", StandardScaler()),
    ("learn", OneVsRestClassifier(binary_classifier=LogisticRegression()))])

# initialize our metric
print("[INFO] starting training...")
metric = Accuracy()

# loop over the dataset
for (i, (X, y)) in enumerate(dataset):
    # make predictions on the current set of features, train the
    # model on the features, and then update our metric
    preds = model.predict_one(X)
    model = model.fit_one(X, y)
    metric = metric.update(y, preds)
    print("INFO] update {} - {}".format(i, metric))
    help="# of feature columns in the CSV file (excluding class column")
args = vars(ap.parse_args())

# construct our data dictionary which maps the data types of the
# columns in the CSV file to built-in data types
print("[INFO] building column names...")
types = {f'feat_{i}': float for i in range(args['num_cols'])}
types["class"] = int

# create a CSV data generator for the extracted Keras features
dataset = stream.iter_csv(filepath_or_buffer=args["csv"],
                          target_name="class",
                          converters=types)

# construct our pipeline
model = Pipeline(StandardScaler(),
                 OneVsRestClassifier(binary_classifier=PAClassifier()))

# initialize our metric
print("[INFO] starting training...")
metric = ClassificationReport()

# loop over the dataset
for i, (X, y) in enumerate(dataset):
    # make predictions on the current set of features, train the
    # model on the features, and then update our metric
    preds = model.predict_one(X)
    model = model.fit_one(X, y)
    metric = metric.update(y, preds)
    print("[INFO] update {} - {}".format(i, metric))
예제 #4
0
    def __init__(self, data_collector):
        dc = data_collector
        data = dc.get_data_frame()
        metric = metrics.MAE()

        # delete NA examples
        data = data.dropna()

        # shuffle data
        X_y = data.sample(frac=1).reset_index(drop=True)

        data = X_y[['x', 'y', 'theta']].to_dict('records')
        target_1 = X_y[['sensor_1']]
        target_2 = X_y[['sensor_3']]
        target_3 = X_y[['sensor_5']]
        target_4 = X_y[['sensor_7']]

        print('constructing models')

        # construct our pipeline
        model_1 = Pipeline([
            ("scale", StandardScaler()),
            ("learn",
             ensemble.HedgeRegressor([
                 linear_model.LinearRegression(optim.SGD()),
                 linear_model.LinearRegression(optim.RMSProp()),
                 linear_model.LinearRegression(optim.Adam())
             ]))
        ])

        # construct our pipeline
        model_2 = Pipeline([
            ("scale", StandardScaler()),
            ("learn",
             ensemble.HedgeRegressor([
                 linear_model.LinearRegression(optim.SGD()),
                 linear_model.LinearRegression(optim.RMSProp()),
                 linear_model.LinearRegression(optim.Adam())
             ]))
        ])

        # construct our pipeline
        model_3 = Pipeline([
            ("scale", StandardScaler()),
            ("learn",
             ensemble.HedgeRegressor([
                 linear_model.LinearRegression(optim.SGD()),
                 linear_model.LinearRegression(optim.RMSProp()),
                 linear_model.LinearRegression(optim.Adam())
             ]))
        ])

        # construct our pipeline
        model_4 = Pipeline([
            ("scale", StandardScaler()),
            ("learn",
             ensemble.HedgeRegressor([
                 linear_model.LinearRegression(optim.SGD()),
                 linear_model.LinearRegression(optim.RMSProp()),
                 linear_model.LinearRegression(optim.Adam())
             ]))
        ])

        print('start training')

        for x, y_1, y_2, y_3, y_4 in zip(
                data,
                target_1.values,
                target_2.values,
                target_3.values,
                target_4.values,
        ):
            model_1, y_pred_1 = self._update_model(model_1, x, y_1)
            model_2, y_pred_2 = self._update_model(model_2, x, y_2)
            model_3, y_pred_3 = self._update_model(model_3, x, y_3)
            model_4, y_pred_4 = self._update_model(model_4, x, y_4)

        self.models = [model_1, model_2, model_3, model_4]

        print('done...')
예제 #5
0
df = pandas.read_sql(query, engine)
df = df.drop("index", axis=1)
timeList = list(df["time(second)"])

df = df.drop("time(second)", axis=1)

x = df.drop("class", axis=1).to_dict(orient="row")
y = list(df["class"])

metrics = (
    MSE(), 
    Accuracy()
    )

model = (
     StandardScaler() |
     DecisionTreeClassifier()
)

# Mse Accuracy Real
outputfile = open('C:\\Users\\YigitCan\\Desktop\\Tez-Workspace\\Real-Time-Big-Data-Analytics\\Elderly Sensor\\Output'+str(session)+'.txt', 'w')



previous_time = 0.0
for row, target, time_passed in zip(x, y, timeList):
    time_range = time_passed - previous_time
    if time_range != 0.0:
        time.sleep(time_range)
    y_pred = model.predict_one(row)
    model.fit_one(row, target)
예제 #6
0
argument_parser.add_argument(
    '-n',
    '--num-cols',
    type=int,
    required=True,
    help='Number of columns in the feature CSV file (excluding label).')
arguments = vars(argument_parser.parse_args())

print('[INFO] Building column names...')
types = {f'feature_{i}': float
         for i in range(arguments['num_cols'])}  # Data type per feature
types['class'] = int

dataset = stream.iter_csv(arguments['train'], target_name='class', types=types)

model = Pipeline([('scaler', StandardScaler()),
                  ('learner',
                   OneVsRestClassifier(binary_classifier=PAClassifier()))])

metric = Accuracy()

print('[INFO] Training started...')
for index, (X, y) in enumerate(dataset):
    try:
        predictions = model.predict_one(X)
        model = model.fit_one(X, y)
        metric = metric.update(y, predictions)

        if index % 10 == 0:
            print(f'[INFO] Update {index} - {metric}')
    except OverflowError as e:
예제 #7
0
db = h5py.File(dataset_path, 'r')

TRAIN_PROPORTION = 0.8
SPLIT_INDEX = int(db['labels'].shape[0] * TRAIN_PROPORTION)

BATCH_SIZE = 256
write_dataset('train.csv', db['features'][:SPLIT_INDEX],
              db['labels'][:SPLIT_INDEX], BATCH_SIZE)
write_dataset('test.csv', db['features'][SPLIT_INDEX:],
              db['labels'][SPLIT_INDEX:], BATCH_SIZE)

FEATURE_SIZE = db['features'].shape[1]
types = {f'feature_{i}': float for i in range(FEATURE_SIZE)}
types['class'] = int

model = StandardScaler()
model |= OneVsRestClassifier(LogisticRegression())

metric = Accuracy()
dataset = stream.iter_csv('train.csv', target_name='class', converters=types)
print('Training started...')
for i, (X, y) in enumerate(dataset):
    predictions = model.predict_one(X)
    model = model.fit_one(X, y)
    metric = metric.update(y, predictions)

    if i % 100 == 0:
        print(f'Update {i} - {metric}')

print(f'Final - {metric}')
df = pandas.read_sql(query, engine)
#logging.info("Data retrieved by Session = " + str(session))
logging.info("Data retrieved by all")
df = df.drop("index", axis=1)
timeList = list(df["time(second)"])

df = df.drop("time(second)", axis=1)

x = df.drop("class", axis=1).to_dict(orient="row")
y = list(df["class"])

acc = Accuracy()
fbeta = MultiFBeta(betas=({1: 0.5, 2: 0.5, 3: 0.5, 4: 0.5}))

model = (StandardScaler() | DecisionTreeClassifier())
logging.info("Initial model created")
# Mse Accuracy Real
recordNumber = len(y)
text = ""
previous_time = 0.0
logging.info("Learning process has been started")
startTime = time.time()
for row, target, time_passed in tqdm.tqdm(zip(x, y, timeList)):
    '''
    time_range = time_passed - previous_time
    
    if time_range > 0.0:
        time.sleep(time_range)
    previous_time = time_passed
    '''
logging.info("Creating connection")
engine = create_engine('mysql+pymysql://root:@localhost/tez')
logging.info("Connection is ready")

n = 10
acc = Accuracy()
fbeta = MultiFBeta(betas=({
    'bike': 0.5,
    'sit': 0.5,
    'stairsdown': 0.5,
    'stairsup': 0.5,
    'stand': 0.5,
    'walk': 0.5
}))

model = (StandardScaler() | KNeighborsClassifier())
logging.info("Initial model created for phone type " + phoneType)

#modelName = ["nexus4", "s3", "s3mini", "samsungold"]
classHistory = []
classNum = 6
trainFlag = True
startTime = time.time()
previousTime = None
logging.info("Learning stage started with total step of " + str(stepNumber))
for step in tqdm.tqdm(range(stepNumber + 1)):
    logging.info("Data retrieved at step " + str(step + 1) + "/" +
                 str(stepNumber + 1))
    if step < stepNumber:
        query = "select x,y,z,gt from " + tableName + "_" + phoneType + " where id >= " + str(
            step * dataPackageLimit) + " and id <= " + str(