def cron(): video_ids = Youtube().get_data() for video_id in video_ids: bow = Preprocess(video_id).make_bow() mongoDB(getDate()).insert(video_id,bow) return jsonify("T")
def getPlugClusterList(): pd_drop = ['_id', 'timestamp', '__v'] DRAW = False # mongo = mongoDB() # getData = mongo.getAllDatas(tablename = 'rssi_real') # # print(getData) # dataset = pd.DataFrame(getData) # # dataset = dataset.drop(dataset.index[:100]) # dataset = dataset.drop(pd_drop, axis=1) # dataset_plugs = dataset.fillna(0) mongo = mongoDB() getData_rssi = mongo.getAllDatas(tablename='rssi_real') getData_current = mongo.getAllDatas(tablename='current') # print(getData) dataset_rssi = pd.DataFrame(getData_rssi) dataset_current = pd.DataFrame(getData_current) # dataset = dataset.drop(dataset.index[:101]) #remove dataset_plugs_rssi = dataset_rssi.drop(pd_drop, axis=1) dataset_plugs_rssi = dataset_plugs_rssi.fillna(0) dataset_plugs_rssi.apply(pd.to_numeric, errors='ignore') dataset_plugs_current = dataset_current.drop(pd_drop, axis=1) dataset_plugs_current = dataset_plugs_current.fillna(0) dataset_plugs_current.apply(pd.to_numeric, errors='ignore') dataset_plugs_list = [] for (index_rssi, row_rssi), (index_current, row_current) in zip(dataset_plugs_rssi.iterrows(), dataset_plugs_current.iterrows()): row_to_float_rssi = pd.to_numeric(row_rssi) row_to_float_current = pd.to_numeric(row_current) # df_list = pd.to_numeric(row).values.tolist() df_list_rssi = row_to_float_rssi.round(0).astype(int).values.tolist() df_list_current = row_to_float_current.round(0).astype( int).values.tolist() dataset_plugs_list.append(df_list_rssi + df_list_current) dataset_plugs = pd.DataFrame(dataset_plugs_list) draw_PlugState(dataset_plugs) list_plugs = dataset_plugs.values.tolist( ) #[1,2,3,4][1,2,3,4][1,2,3,4]....*10000 list_plugs_T = dataset_plugs.T.values.tolist( ) #[[1 1 1 1 .. ][2 2 2 2..]..[4 4 4 ....]] # drawAgglomerative(dataset_plugs) list_plugs_kalman_T = KalmanFilter(list_plugs_T) # # list_plugs_kalman_T = KalmanFilter(list_plugs_kalman_T, Q_var = 0.01, R_var = 0.2,draw=True) list_plugs_kalman = list(map(list, zip(*list_plugs_kalman_T))) list_plugs_cluster = Agglomerative(list_plugs_kalman, draw=True) return list_plugs_cluster
def getPlugClusterList(): pd_drop = ['_id', 'timestep', '__v'] DRAW = False mongo = mongoDB() getData = mongo.getAllDatas(tablename='rssi_real') # print(getData) dataset = pd.DataFrame(getData) dataset = dataset.drop(dataset.index[:700]) dataset = dataset.drop(pd_drop, axis=1) dataset_plugs = dataset.fillna(0) # draw_PlugState(dataset_plugs) list_plugs = dataset_plugs.values.tolist( ) #[1,2,3,4][1,2,3,4][1,2,3,4]....*10000 list_plugs_T = dataset_plugs.T.values.tolist( ) #[[1 1 1 1 .. ][2 2 2 2..]..[4 4 4 ....]] # drawAgglomerative(dataset_plugs) list_plugs_kalman_T = KalmanFilter(list_plugs_T, Q_var=0.01, R_var=0.6, draw=DRAW) list_plugs_kalman_T = KalmanFilter(list_plugs_kalman_T, Q_var=0.01, R_var=0.2, draw=DRAW) list_plugs_kalman = list(map(list, zip(*list_plugs_kalman_T))) list_plugs_cluster = Agglomerative(list_plugs_kalman, draw=DRAW) return list_plugs_cluster
def putData_csv(self, csv_filepath, tablename='predict'): mongodb = mongoDB() predict_data = pd.read_csv(csv_filepath) current_data = predict_data[predict_data.columns[self.totalPlug:]] for _, row in current_data.iterrows(): res = self.res_csv(row.values.tolist()) mongodb.putData(data=res, tablename=tablename)
def get_DataSet_from_mongoDB(self): pd_drop = ['_id', 'timestamp', '__v'] pd_drop_current = ['_id', 'timestamp', '__v', 'cmd'] # pd_drop = ['_id', 'timestep','__v'] mongo = mongoDB() getData_rssi = mongo.getAllDatas(tablename='rssi_real') getData_current = mongo.getAllDatas(tablename='current') # print(getData) dataset_rssi = pd.DataFrame(getData_rssi) dataset_current = pd.DataFrame(getData_current) # dataset = dataset.drop(dataset.index[:101]) #remove dataset_plugs_rssi = dataset_rssi.drop(pd_drop, axis=1) dataset_plugs_rssi = dataset_plugs_rssi.fillna(0) dataset_plugs_rssi.apply(pd.to_numeric, errors='ignore') dataset_plugs_current = dataset_current.drop(pd_drop_current, axis=1) dataset_plugs_current = dataset_plugs_current.fillna(0) dataset_plugs_current.apply(pd.to_numeric, errors='ignore') plugs_mac = list(dataset_plugs_rssi) for index, plug_mac in enumerate(plugs_mac): plug_number = 'plug' + str(index + 1) self.plugs_map[plug_number] = plug_mac dataset_plugs_list = [] for (index_rssi, row_rssi), (index_current, row_current) in zip(dataset_plugs_rssi.iterrows(), dataset_plugs_current.iterrows()): row_to_float_rssi = pd.to_numeric(row_rssi) row_to_float_current = pd.to_numeric(row_current) # df_list = pd.to_numeric(row).values.tolist() df_list_rssi = row_to_float_rssi.round(0).astype( int).values.tolist() df_list_current = row_to_float_current.round(0).astype( int).values.tolist() dataset_plugs_list.append(df_list_rssi + df_list_current) dataset_plugs = pd.DataFrame(dataset_plugs_list) self.totalSize = len(dataset_plugs) # self.trainSize = int(self.totalSize / 2) self.trainSize = self.totalSize self.batchSize = self.setBatchSize() self.features = dataset_plugs.shape[1] self.totalPlug = int(self.features / 2) self.save_dataSet_info() return dataset_plugs
def kerasModel_preprocessing_trainset(dir, fileName): mongodb = mongoDB() myjson = Json() mongodb_getData = mongodb.getAllDatas(tablename='rssi') rssi, current, _ = myjson.decodingJson(mongodb_getData) path = dir + "/" + fileName csvList = [] for _rssi, _current in zip(rssi, current): appendData = np.append(_rssi, _current) csvList.append(appendData.tolist()) pdList = pd.DataFrame(csvList) pdList.to_csv(path, header=False) # pdList.to_csv(path, header = False, index = False) first_row_name = [] first_row_name = np.append(first_row_name, 'plug_state') first_row_name = np.append(first_row_name, 'current_state') # # first_row_name = np.append(first_row_name, ['rssi' + str(i) for i in range(1, 1+self.training_field_length)]) # # first_row_name = np.append(first_row_name, ['current' + str(i) for i in range(1, 1+self.training_field_length)]) df = pd.read_csv(filepath, header=None, names=first_row_name, sep=' ') df.index = range(len(df.index)) # print("convert df from csv {}".format(df)) # scale the train columns ## 4. Min Max -> scale control (Normalizate(defalut) or standardize) # print("Scaling...") # if self.scaler == 'mm': # df, minVal, maxVal = MinMaxScaler(df, start_col_index=0, end_col_index=self.training_field_length) # elif self.scaler == 'norm': # df, meanVal, stdVal = NormalDistributionScaler(df, start_col_index=1, end_col_index=self.training_field_length+1) # else: # raise ValueError("Argument scaler must be mm or norm!") print("df : {}".format(df)) # df = df.astype(int) df_new, _, _ = MinMaxScaler(df, start_col_index=0, end_col_index=df.shape[1] - 1) ## 6. train, validate, test set # print("Randomly selecting training set and test set...") train_x, train_y = seq2dataset(df_new.values, self.timesteps) # all_data_x = np.asarray(df_bin.ix[:, 1:1+self.training_field_length]).reshape((len(df_bin.index), 1, self.training_field_length)) # all_data_y = np.asarray(df_bin.ix[:, 1+self.training_field_length]) # convert y label to one-hot dummy label # train_y = np_utils.to_categorical(train_y) train_x = np.reshape(train_x, (train_y.shape[0], self.timesteps, self.features)) return train_x, train_y
def putData(): time = datetime.utcnow() - timedelta(days=1) mongo = mongoDB() for i in range(0, 86400): time = time + timedelta(seconds=1) plug_data = ({ "title": "DeepLearning", "plug1": int((i / 5400) % 2), "plug2": int((i / 10800) % 2), "plug3": int((i / 21600) % 2), "plug4": int((i / 43200) % 2), "timestep": str(time) }) mongo.putData(data=plug_data, tablename='predict')
def putData_Mongo(): # csv = pd.read_csv(local_DIR_csvFile) csv = pd.read_csv(DIR_csvFile) data = csv.values # tag = ['time', 'rssi_data', 'current_data', 'cluster_kmean'] tag = ['time', 'rssi_data', 'current_data'] # for _data, _cluster in zip(data, cluster) : for _data in data: time = str(datetime.now()) rssi = _data[1:13] current = _data[13:25] # cluster_num = _cluster # value = [time, rssi, current, cluster_num] value = [time, rssi, current] json = Json(tag, value) json_obj = json.encodingJson() mongodb = mongoDB() mongodb.putData(data=json_obj, tablename='rssi_test')
def get_DataSet_from_mongoDB(): pd_drop = ['_id', 'timestamp', '__v'] # pd_drop = ['_id', 'timestep','__v'] mongo = mongoDB() getData_rssi = mongo.getAllDatas(tablename='rssi_real') getData_current = mongo.getAllDatas(tablename='current') # print(getData) dataset_rssi = pd.DataFrame(getData_rssi) dataset_current = pd.DataFrame(getData_current) # dataset = dataset.drop(dataset.index[:101]) #remove dataset_plugs_rssi = dataset_rssi.drop(pd_drop, axis=1) dataset_plugs_rssi = dataset_plugs_rssi.fillna(0) dataset_plugs_rssi.apply(pd.to_numeric, errors='ignore') dataset_plugs_current = dataset_current.drop(pd_drop, axis=1) dataset_plugs_current = dataset_plugs_current.fillna(0) dataset_plugs_current.apply(pd.to_numeric, errors='ignore') # plugs_mac = list(dataset_plugs) # for index, plug_mac in enumerate(plugs_mac) : # plug_number = 'plug' + str(index+1) # self.plugs_map[plug_number] = plug_mac dataset_plugs_list = [] for (index_rssi, row_rssi), (index_current, row_current) in zip(dataset_plugs_rssi.iterrows(), dataset_plugs_current.iterrows()): row_to_float_rssi = pd.to_numeric(row_rssi) row_to_float_current = pd.to_numeric(row_current) # df_list = pd.to_numeric(row).values.tolist() df_list_rssi = row_to_float_rssi.round(0).astype(int).values.tolist() df_list_current = row_to_float_current.round(0).astype( int).values.tolist() dataset_plugs_list.append(df_list_rssi + df_list_current) dataset_plugs = pd.DataFrame(dataset_plugs_list) return dataset_plugs
def get_DataSet_from_mongoDB(self): pd_drop = ['_id', 'timestep', '__v'] mongo = mongoDB() getData = mongo.getAllDatas(tablename='rssi_real') # print(getData) dataset = pd.DataFrame(getData) dataset_plugs = dataset.drop(pd_drop, axis=1) dataset_plugs = dataset_plugs.fillna(0) dataset_plugs.apply(pd.to_numeric, errors='ignore') dataset_plugs_list = [] append_list = [0, 0, 0] for index, row in dataset_plugs.iterrows(): row_to_float = pd.to_numeric(row) # df_list = pd.to_numeric(row).values.tolist() df_list = row_to_float.round(0).astype(int).values.tolist() df_list = df_list + append_list dataset_plugs_list.append(df_list) dataset_plugs = pd.DataFrame(dataset_plugs_list) return dataset_plugs
import requests import json import sys import os import time import pandas as pd sys.path.append("..") from mongo import mongoDB from datetime import datetime mongodb = mongoDB() plugs_data = dict() rssi_HashMap = dict() file_name = '/Users/gyeongmin/Documents/Final_project/DeepLearning/data/keras_trainResult_model_default_dataset_batch.csv' restore_data = pd.read_csv(file_name, sep=',') restore_data = restore_data[[ 'plug1_rssi_00:13', 'plug2_rssi_00:25', 'plug3_rssi_00:72' ]] print(restore_data) for index, row in restore_data.iterrows(): rssi_HashMap['00:13'] = row['plug1_rssi_00:13'] rssi_HashMap['00:25'] = row['plug2_rssi_00:25'] rssi_HashMap['00:72'] = row['plug3_rssi_00:72'] rssi_HashMap['timestamp'] = str(datetime.utcnow()) mongodb.putData(data=rssi_HashMap, tablename='rssi_real')
import numpy as np import pandas as pd import json import sys import os import time sys.path.append("..") from mongo import mongoDB from datetime import datetime """ This is for save test on MongoDB(<-> interface) the dataset will rssi value of plug1~4. """ mongo = mongoDB() def getData(): filepath = "/Users/gyeongmin/Documents/Final_project/DeepLearning/data/rssi_test1.csv" first_row_name = [] first_row_name = np.append(first_row_name, 'plug_state') first_row_name = np.append(first_row_name, 'current_state') # # first_row_name = np.append(first_row_name, ['rssi' + str(i) for i in range(1, 1+self.training_field_length)]) # # first_row_name = np.append(first_row_name, ['current' + str(i) for i in range(1, 1+self.training_field_length)]) df = pd.read_csv(filepath, header=None, names=first_row_name, sep=' ') return df