def make_dataset(capteur_list, text_input_start, text_input_end, nom_capteur, L_influx): client = DataFrameClient(host=L_influx[3], port=L_influx[4], username=L_influx[1], password=L_influx[2]) client.switch_database(L_influx[0]) src = pd.DataFrame() for elt in capteur_list: requete = "SELECT " + '"' + elt + '"' + " FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'" datasets = client.query(requete) df = pd.DataFrame(datasets['measure']) df = df.rename_axis('Date') df.index = df.index.astype('datetime64[ns]') src = pd.concat([df, src], axis=1) client.close() src['Date'] = src.index Index = [i for i in range(0, df.shape[0])] src.index = Index cols = src.columns.tolist() cols = cols[-1:] + cols[:-1] src = src[cols] return (src)
def save_to_database(dataframe, user='******', password='******', host='localhost', port=8086): ''' 存pandas.DataFrame类型数据到influxdb数据库中 ''' from influxdb import DataFrameClient import time dbname = 'aiops' print("连接数据库,host={}, port={}, dbname={}, user={}".format( host, port, user, password, dbname)) client = DataFrameClient(host, port, user, password, dbname) # print("Create database: " + dbname) # client.create_database(dbname) tablename = 'sample_label' + time.strftime("_%Y%m%d_%H%M%S", time.localtime()) df = pd.DataFrame(data=np.array(dataframe), index=pd.date_range(start=time.strftime( "%Y-%m-%d", time.localtime()), periods=len(dataframe), freq='ms')) client.write_points(df, tablename, protocol='line') client.close()
def main (host, port, csv): # Open the file and parse it pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) # Thanks to # https://stackoverflow.com/questions/29442370/how-to-correctly-read-csv-in-pandas-while-changing-the-names-of-the-columns print("Parsing csv...") headers = ['start_date', 'end_date', 'measurement_duration', 'detector_number', 'sensitivity', 'bubble_count', 'exposure', 'name'] data_frame = pd.read_csv(csv, parse_dates=True, index_col=0, names=headers, header=0) # Format the date time values data_frame.index = pd.to_datetime(data_frame.index, utc=True) print(data_frame); # Create the database connection # Thanks to the influxdb pip docs # https://influxdb-python.readthedocs.io/en/latest/examples.html#tutorials-basic print("Connecting to db...") client_connection = DataFrameClient(host=host, port=port, database=DB_NAME) print("Writing data...") client_connection.write_points(data_frame, DB_NAME, protocol="line") # Close the connection client_connection.close()
def update_to_latest(client: DataFrameClient, noncache_provider: typing.Callable, new_symbols: set = None, time_delta_back: relativedelta = relativedelta(years=5), skip_if_older_than: relativedelta = None): """ Update existing entries in the database to the most current values :param client: DataFrameClient client :param noncache_provider: Non cache data provider :param new_symbols: additional symbols to add {(symbol, interval_len, interval_type), ...}} :param time_delta_back: start :param skip_if_older_than: skip symbol update if the symbol is older than... :return: """ filters = dict() new_symbols = set() if new_symbols is None else new_symbols if skip_if_older_than is not None: skip_if_older_than = (datetime.datetime.utcnow().replace(tzinfo=tz.gettz('UTC')) - skip_if_older_than).astimezone(tz.gettz('US/Eastern')) for key, time in [(e[0], e[1][1]) for e in ranges(client).items()]: if key in new_symbols: new_symbols.remove(key) if skip_if_older_than is None or time > skip_if_older_than: bgn_prd = datetime.datetime.combine(time.date(), datetime.datetime.min.time()).replace(tzinfo=tz.gettz('US/Eastern')) filters[BarsFilter(ticker=key[0], bgn_prd=bgn_prd, interval_len=key[1], interval_type=key[2])] = None bgn_prd = datetime.datetime.combine(datetime.datetime.utcnow().date() - time_delta_back, datetime.datetime.min.time()).replace(tzinfo=tz.gettz('US/Eastern')) for (symbol, interval_len, interval_type) in new_symbols: filters[BarsFilter(ticker=symbol, bgn_prd=bgn_prd, interval_len=interval_len, interval_type=interval_type)] = None logging.getLogger(__name__).info("Updating " + str(len(filters)) + " total symbols and intervals; New symbols and intervals: " + str(len(new_symbols))) q = queue.Queue(maxsize=100) threading.Thread(target=partial(noncache_provider, filters=filters, q=q), daemon=True).start() try: for i, tupl in enumerate(iter(q.get, None)): ft, to_cache = filters[tupl[0]], tupl[1] if to_cache is not None and not to_cache.empty: # Prepare data for c in [c for c in to_cache.columns if c not in ['symbol', 'open', 'high', 'low', 'close', 'volume']]: to_cache.drop(c, axis=1, inplace=True) to_cache['interval'] = str(ft.interval_len) + '_' + ft.interval_type if to_cache.iloc[0].name == ft.bgn_prd: to_cache = to_cache.iloc[1:] try: client.write_points(to_cache, 'bars', protocol='line', tag_columns=['symbol', 'interval'], time_precision='s') except Exception as err: logging.getLogger(__name__).exception(err) if i > 0 and (i % 20 == 0 or i == len(filters)): logging.getLogger(__name__).info("Cached " + str(i) + " queries") finally: client.close()
def test_03_influx_wr(self): self.system_con() influxdb_host = 'localhost' influxdb_port = 8086 influxdb_id = 'krmim' influxdb_pwd = 'krmin_2017' influxdb_db = 'facility' influxdb_client = self.mqtt_agent.get_influxdb_mgr() influxdb_Dfclient = DataFrameClient('localhost', 8086, 'krmim', 'krmim_2017', 'facility') if influxdb_client == None: print('influxdb configuration fail') packet = self.make_packet(facility_id='TS0001', sensor_code='0001', pv=330) _, _, modbus_udp = self.async_svr.convert_hex2decimal( packet, 'localhost', 6379) fac_daq = get_fac_inf(self.redis_con) redis_fac_info = json.loads(self.redis_con.get('facilities_info')) equipment_id = modbus_udp['equipment_id'] if equipment_id in redis_fac_info.keys(): fac_msg = config_fac_msg(equipment_id, fac_daq, modbus_udp, redis_fac_info) fields = {} facility_msg_json = json.loads(fac_msg) me_timestamp = time.time() del facility_msg_json[equipment_id]['ms_time'] del facility_msg_json[equipment_id]['pub_time'] for key in facility_msg_json[equipment_id].keys(): print(key) fields[key] = float(facility_msg_json[equipment_id][key]) # fields['me_time'] = me_timestamp influx_json = [{'measurement': equipment_id, 'fields': fields}] try: influxdb_Dfclient.query('DROP SERIES FROM TS0001') if influxdb_client.write_points(influx_json) is True: print('influx write success:' + str(influx_json)) else: print('influx write faile:' + str(influx_json)) except Exception as e: print(str(e.args)) influxdb_client.close() ts5_no_coefficient = influxdb_Dfclient.query( 'SELECT * FROM TS0001 where time >= now() - 15m') ts5_no_coefficient = ts5_no_coefficient['TS0001'] rows_size = ts5_no_coefficient.shape[0] row_json = ts5_no_coefficient.to_dict(orient='records')[0] cmp = influx_json[0]['fields'] == row_json print(cmp) print(influx_json[0]['fields']) print(row_json) self.assertEqual(rows_size, 1) influxdb_Dfclient.close()
def read_influxdb_data(host='192.168.123.245', port=8086, dbname='c9377a95-82f3-4af3-ac14-40d14f6d2abe', ChannelName='1Y520210100', time_start='', time_end='', user='******', password='******', keyword=''): client = DataFrameClient(host, port, user, password, dbname) measurements = client.get_list_measurements() if keyword is None: keyword = '' if keyword == '': measurement = [ mea.get(u'name') for mea in measurements if mea.get(u'name').find(ChannelName) >= 0 ] else: measurement = [ mea.get(u'name') for mea in measurements if mea.get(u'name').find(ChannelName) >= 0 and mea.get(u'name').find(keyword) >= 0 ] if len(measurement) == 0: print('No data retrieved.') return None measurement = measurement[-1] time_end = 'now()' if time_end == '' else "'" + time_end + "'" time_start = 'now()' if time_start == '' else "'" + time_start + "'" querystr = 'select * from "{}" where time > {} and time < {}'.format( measurement, time_start, time_end) #print(querystr) df = client.query(querystr).get(measurement) client.close() if df is None: print('InfluxDB no data retrieved.') return None dff = df.groupby('id') columns = [name for name, group in dff] groups = [group['val'] for name, group in dff] #check datatime alginment: all([all(groups[i].index==groups[0].index) for i in range(1,len(groups))]) result = pd.concat(groups, axis=1) result.columns = columns result.index = groups[0].index return measurement, result
def random_shapeable_order(): # Retrieve random order df = randomorders.random_shapeable_orderbook() # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'sbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def forecast(times: List[str], values: List[float]): df = pandas.DataFrame(index=pandas.DatetimeIndex(times).round('5T'), data={'uncontr': values}) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'uncontr') client.close() # Run optimization optimization() return {"status": "sucess"}
def random_deferrable_order(): # Retrieve random order TIMESTEP = 12 df = randomorders.random_deferrable_orderbook(timestep=60 / TIMESTEP) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'dbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def __fQueryInflux(self, query: str) -> dict: if self.permissionDB is True: logger.debug(f"Perform the query {query} to the database.") client = DataFrameClient(host=self.host, port=self.port, database=self.database) result = client.query(query) client.close() else: logger.error("No permission to connect to the database.") result = None return result
def _check_influx(self): conn = DataFrameClient(**config.INFLUX_CONN_SETTING) sql = f'''SELECT * FROM "{config.INFLUX_TABLE_CHECK}" WHERE table='{config.INFLUX_TABLE_POINT}' ORDER BY time DESC LIMIT 1''' row = conn.query(sql) if row: row = row[config.INFLUX_TABLE_CHECK] self.log_file = row['file'].values[0] self.log_pos = row['end_log_pos'].values[0] logging.log( logging.WARNING, 'found binlog check table, now log_file is {} and restart pos is {}' .format(self.log_file, self.log_pos)) conn.close() return row
def remove_deferrable_order(t: str): # Create fake order with 0 data = {'duration': [1], 'profile_kw': [[0.0]]} # minus 2 hours is a work around #@?! timezone df = pandas.DataFrame( index=[datetime.strptime(t, '%Y-%m-%d %H:%M:%S') - timedelta(hours=2)], data=data) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'dbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def get_tide_data(): with open("last_data_retrieved.txt") as f: previous_date_time = f.readlines() url = "https://tidesandcurrents.noaa.gov/api/datagetter?date=latest&station=8638610&product=water_level&datum=NAVD&units=english&time_zone=lst_ldt&format=json" response = requests.get(url) result = json.loads(response.text) station_id = str(result['metadata']['id']) station_name = str(result['metadata']['name']) lat = str(result['metadata']['lat']) lon = str(result['metadata']['lon']) value = float(result['data'][0]['v']) date_time_str = str(result['data'][0]['t']) date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M') current_date_time = str(date_time) if current_date_time not in previous_date_time: cols = ['station_id','station_name','latitude','longitude','date_time','value'] line = pd.DataFrame({'station_id': station_id,'station_name': station_name,'latitude': lat, 'longitude': lon,'date_time': date_time,'value': value}, columns=cols, index=[0]) line['date_time'] = pd.to_datetime(line['date_time']) line = line.set_index('date_time') # print line time_values = line[['value']] # print time_values tags = {'station_id': line[['station_id']], 'station_name': line[['station_name']],'latitude': line[['latitude']], 'longitude': line[['longitude']]} client = DataFrameClient(host, port, user, password, dbname) # Write DataFrame with Tags client.write_points(line, dbname, protocol=protocol) # , tags client.close() f = open('last_data_retrieved.txt', 'w') f.write(current_date_time + '\n') f.close() print "New data has been added to the database!" else: print "No new data available"
def get_tide_data(): with open("last_data_retrieved.txt") as f: previous_date_time = f.readlines() url = "https://gentle-falls-78142.herokuapp.com/" response = requests.get(url) result = json.loads(response.text) sensor_id = str(result['metadata']['id']) sensor_name = str(result['metadata']['name']) lat = str(result['metadata']['lat']) lon = str(result['metadata']['lon']) flood = str(result['data'][0]['f']) value = float(result['data'][0]['v']) date_time_str = str(result['data'][0]['t']) date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M') current_date_time = str(date_time) if current_date_time not in previous_date_time: cols = ['sensor_id','sensor_name','latitude','longitude', 'flood', 'date_time','value'] line = pd.DataFrame({'sensor_id': sensor_id,'sensor_name': sensor_name,'latitude': lat, 'longitude': lon,'flood': flood,'date_time': date_time,'value': value}, columns=cols, index=[0]) line['date_time'] = pd.to_datetime(line['date_time']) line = line.set_index('date_time') # print line time_values = line[['value']] print(time_values) tags = {'sensor_id': line[['sensor_id']], 'sensor_name': line[['sensor_name']],'latitude': line[['latitude']], 'longitude': line[['longitude']], 'flood': line[['flood']]} client = DataFrameClient(host, port, user, password, dbname) # Write DataFrame with Tags client.write_points(line, dbname, protocol=protocol) # , tags client.close() f = open('last_data_retrieved.txt', 'w') f.write(current_date_time) # + '\n') f.close() print "New data has been added to the database!" else: print "No new data available"
def save_total_demand(): # Limit the number of call to avoid # high cardinality of influxdb tags # Query total demand data client = DataFrameClient(host, port, user, password, dbname) start = datetime.now() query = ("select * from contr " + "WHERE time >= '" + (start).strftime("%Y-%m-%dT%H:%M:%SZ") + "' AND time <= '" + (start + timedelta(hours=24)).strftime("%Y-%m-%dT%H:%M:%SZ") + "'") contr = client.query(query)['contr'] # Save it to a different measurement client.write_points( contr, 'versioncontr', { 'version': str( int(datetime.now().replace(second=0, microsecond=0).timestamp() * 1000)) }) client.close() return {"status": "sucess"}
def remove_battery_order(t: str): # Create fake order with 0 data = { 'min_kw': [0.0], 'max_kw': [0.0], 'max_kwh': [0.0], 'initial_kwh': [0.0], 'end_kwh': [0.0] } # minus 2 hours is a work around #@?! timezone df = pandas.DataFrame( index=[datetime.strptime(t, '%Y-%m-%d %H:%M:%S') - timedelta(hours=2)], data=data) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'bbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def battery_order(order: BatteryOrder): # Convert start and end time in second since epoch # minus 2 hours is a work around #@?! timezone # times 1000 for milliseconds order.startby = (datetime.strptime(order.startby, '%Y-%m-%dT%H:%M:%SZ') - timedelta(hours=2)).timestamp() * 1000 order.endby = (datetime.strptime(order.endby, '%Y-%m-%dT%H:%M:%SZ') - timedelta(hours=2)).timestamp() * 1000 # Create a dataframe to be saved to influxdb df = pandas.DataFrame( index=[datetime.now().replace(second=0, microsecond=0)], data=json.loads(order.json())) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'bbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def forecasting_task(params: dict): logger = params['LOGGER'] logger.info('Starting forecasting task...') start_time = time() # Connect to the database "Emon" in InfluxDB client = DataFrameClient(host=params['INFLUXDB_HOST'], port=params['INFLUXDB_PORT'], username=params['INFLUXDB_USER'], password=params['INFLUXDB_PASS'], database=params['INFLUXDB_DB']) # Retrieve and update weather historical measurements and predictions W = forecasting.weather_preprocessing(client, params) # Query and preprocess power load measurements y, X, Xh = forecasting.preprocessing(client, params, W) # Predict power load consumptions df_pred = forecasting.forecasting(y, X, Xh, params) # Write power load forecasts to InfluxDB try: # Write data to InfluxDB for the current source client.write_points(dataframe=df_pred, database=params['INFLUXDB_DB'], measurement=params['FORECAST_TS'], protocol='line', time_precision='s') except Exception as e: logger.error(e) finally: logger.info( f'Forecasting task completed in {round(time()-start_time)} seconds!' ) client.close()
class TestInfluxDBCache(unittest.TestCase): """ Test InfluxDBCache """ def setUp(self): self._client = InfluxDBClient(host='localhost', port=8086, username='******', password='******', database='test_cache') self._client.drop_database('test_cache') self._client.create_database('test_cache') self._client.switch_database('test_cache') self._df_client = DataFrameClient(host='localhost', port=8086, username='******', password='******', database='test_cache') def tearDown(self): self._client.drop_database('test_cache') self._client.close() self._df_client.close() def test_update_to_latest_intraday(self): with IQFeedHistoryProvider(num_connections=2) as history: cache_requests = inf_cache.InfluxDBOHLCRequest( client=self._df_client, interval_len=3600, interval_type='s') end_prd = datetime.datetime(2017, 3, 2) filters = (BarsInPeriodFilter(ticker="IBM", bgn_prd=datetime.datetime( 2017, 3, 1), end_prd=end_prd, interval_len=3600, ascend=True, interval_type='s'), BarsInPeriodFilter(ticker="AAPL", bgn_prd=datetime.datetime( 2017, 3, 1), end_prd=end_prd, interval_len=3600, ascend=True, interval_type='s'), BarsInPeriodFilter(ticker="AAPL", bgn_prd=datetime.datetime( 2017, 3, 1), end_prd=end_prd, interval_len=600, ascend=True, interval_type='s')) filters_no_limit = (BarsInPeriodFilter(ticker="IBM", bgn_prd=datetime.datetime( 2017, 3, 1), end_prd=None, interval_len=3600, ascend=True, interval_type='s'), BarsInPeriodFilter(ticker="AAPL", bgn_prd=datetime.datetime( 2017, 3, 1), end_prd=None, interval_len=3600, ascend=True, interval_type='s')) data = [ history.request_data(f, sync_timestamps=False) for f in filters ] for datum, f in zip(data, filters): datum.drop('timestamp', axis=1, inplace=True) datum['interval'] = str(f.interval_len) + '_' + f.interval_type self._df_client.write_points( datum, 'bars', protocol='line', tag_columns=['symbol', 'interval'], time_precision='s') latest_old = ranges(self._client) update_to_latest(self._df_client, noncache_provider=noncache_provider(history), new_symbols={('AAPL', 3600, 's'), ('MSFT', 3600, 's'), ('MSFT', 600, 's')}, time_delta_back=relativedelta(years=10)) latest_current = ranges(self._client) self.assertEqual(len(latest_current), len(latest_old) + 2) self.assertEqual( len([k for k in latest_current.keys() & latest_old.keys()]) + 2, len(latest_current)) for k in latest_current.keys() & latest_old.keys(): self.assertGreater(latest_current[k][1], latest_old[k][1]) data_no_limit = [ history.request_data(f, sync_timestamps=False) for f in filters_no_limit ] cache_data_no_limit = [ cache_requests.request(symbol=f.ticker, bgn_prd=f.bgn_prd)[0] for f in filters_no_limit ] for df1, df2 in zip(data_no_limit, cache_data_no_limit): del df1['total_volume'] del df1['number_of_trades'] del df1['volume'] del df2['volume'] assert_frame_equal(df1, df2, check_exact=False, check_less_precise=True) def test_update_to_latest_daily(self): with IQFeedHistoryProvider(num_connections=2) as history: cache_requests = inf_cache.InfluxDBOHLCRequest( client=self._df_client, interval_len=1, interval_type='d') bgn_prd = datetime.datetime(2017, 3, 1).date() end_prd = datetime.datetime(2017, 3, 2).date() filters = (BarsDailyForDatesFilter(ticker="IBM", bgn_dt=bgn_prd, end_dt=end_prd, ascend=True), BarsDailyForDatesFilter(ticker="AAPL", bgn_dt=bgn_prd, end_dt=end_prd, ascend=True)) filters_no_limit = (BarsDailyForDatesFilter(ticker="IBM", bgn_dt=bgn_prd, end_dt=None, ascend=True), BarsDailyForDatesFilter(ticker="AAPL", bgn_dt=bgn_prd, end_dt=None, ascend=True), BarsDailyForDatesFilter(ticker="AMZN", bgn_dt=bgn_prd, end_dt=None, ascend=True)) data = [ history.request_data(f, sync_timestamps=False) for f in filters ] for datum, f in zip(data, filters): datum.drop('timestamp', axis=1, inplace=True) datum['interval'] = '1_d' self._df_client.write_points( datum, 'bars', protocol='line', tag_columns=['symbol', 'interval'], time_precision='s') latest_old = ranges(self._client) update_to_latest(self._df_client, noncache_provider=noncache_provider(history), new_symbols={('AAPL', 1, 'd'), ('AMZN', 1, 'd')}, time_delta_back=relativedelta(years=10)) latest_current = ranges(self._df_client) self.assertEqual(len(latest_current), len(latest_old) + 1) self.assertEqual( len([k for k in latest_current.keys() & latest_old.keys()]) + 1, len(latest_current)) for k in latest_current.keys() & latest_old.keys(): self.assertGreater(latest_current[k][1], latest_old[k][1]) data_no_limit = [ history.request_data(f, sync_timestamps=False) for f in filters_no_limit ] cache_data_no_limit = [ cache_requests.request( symbol=f.ticker, bgn_prd=datetime.datetime.combine( f.bgn_dt, datetime.datetime.min.time()).astimezone( tz.tzutc()) + relativedelta(microseconds=1)) for f in filters_no_limit ] for df1, (_, df2) in zip(data_no_limit, cache_data_no_limit): del df1['open_interest'] df1 = df1[[ 'open', 'high', 'low', 'close', 'volume', 'timestamp', 'symbol' ]] assert_frame_equal(df1, df2) def test_bars_in_period(self): with IQFeedHistoryProvider(num_connections=2) as history: now = datetime.datetime.now() filters = (BarsInPeriodFilter(ticker="IBM", bgn_prd=datetime.datetime( now.year - 1, 3, 1), end_prd=None, interval_len=3600, ascend=True, interval_type='s'), BarsInPeriodFilter(ticker="AAPL", bgn_prd=datetime.datetime( now.year - 1, 3, 1), end_prd=None, interval_len=3600, ascend=True, interval_type='s'), BarsInPeriodFilter(ticker="AAPL", bgn_prd=datetime.datetime( now.year - 1, 3, 1), end_prd=None, interval_len=600, ascend=True, interval_type='s')) data = [ history.request_data(f, sync_timestamps=False) for f in filters ] for datum, f in zip(data, filters): datum.drop('timestamp', axis=1, inplace=True) datum['interval'] = str(f.interval_len) + '_' + f.interval_type self._df_client.write_points( datum, 'bars', protocol='line', tag_columns=['symbol', 'interval'], time_precision='s') # test all symbols bgn_prd = datetime.datetime(now.year - 1, 3, 1, tzinfo=tz.gettz('UTC')) bars_in_period = inf_cache.BarsInPeriodProvider( influxdb_cache=inf_cache.InfluxDBOHLCRequest( client=self._df_client, interval_len=3600, interval_type='s'), bgn_prd=bgn_prd, delta=relativedelta(days=30)) for i, (orig_df, processed_df) in enumerate(bars_in_period): self.assertFalse(orig_df.empty) self.assertFalse(processed_df.empty) start, end = bars_in_period._periods[bars_in_period._deltas] self.assertGreaterEqual(orig_df.iloc[0].name[0], start) self.assertGreater(end, orig_df.iloc[-1].name[0]) self.assertGreater(end, orig_df.iloc[0].name[0]) self.assertEqual(i, len(bars_in_period._periods) - 1) self.assertGreater(i, 0) # test symbols group bgn_prd = datetime.datetime(now.year - 1, 3, 1, tzinfo=tz.gettz('UTC')) bars_in_period = inf_cache.BarsInPeriodProvider( influxdb_cache=inf_cache.InfluxDBOHLCRequest( client=self._df_client, interval_len=3600, interval_type='s'), symbol=['AAPL', 'IBM'], bgn_prd=bgn_prd, delta=relativedelta(days=30)) for i, (orig_df, processed_df) in enumerate(bars_in_period): self.assertFalse(orig_df.empty) self.assertFalse(processed_df.empty) start, end = bars_in_period._periods[bars_in_period._deltas] self.assertGreaterEqual(orig_df.iloc[0].name[0], start) self.assertGreater(end, orig_df.iloc[-1].name[0]) self.assertGreater(end, orig_df.iloc[0].name[0]) self.assertEqual(i, len(bars_in_period._periods) - 1) self.assertGreater(i, 0) def test_update_fundamentals(self): funds = get_fundamentals({'IBM', 'AAPL', 'GOOG', 'MSFT'}) update_fundamentals(self._client, list(funds.values())) result = get_cache_fundamentals(self._client, ['IBM', 'AAPL', 'GOOG', 'MSFT']) self.assertEqual(len(result), 4) self.assertEqual({k for k in result.keys()}, {'IBM', 'AAPL', 'GOOG', 'MSFT'}) self.assertGreater(len(result['IBM']), 0) def test_update_adjustments(self): funds = get_fundamentals({'IBM', 'AAPL', 'GOOG', 'MSFT'}) update_splits_dividends(self._client, list(funds.values())) adjustments = inf_cache.get_adjustments(client=self._df_client, symbol=['IBM', 'AAPL'], provider='iqfeed') self.assertEqual(len(adjustments), 6) self.assertTrue(isinstance(adjustments, pd.DataFrame)) self.assertTrue(set(adjustments.index.levels[1]) == {'IBM', 'AAPL'})
class tsdb(object): def __init__(self, dbname, host='localhost', port=8086, user='******', password='******'): self.host = host self.port = port self.user = user self.password = password self.dbname = dbname self.client = None self.protocol = 'json' def _connect(self): if self.client is None: self.client = DataFrameClient(host=self.host, port=self.port, username=self.user, password=self.password, database=self.dbname) #self.client.switch_database(self.dbname) def _disconnect(self): if self.cleint is not None: self.client.close() self.client = None def _reconnet(self): self._disconnect() self._connect() def create_db(self): self._connect() dbs = self.client.get_list_database() for e in dbs: if self.dbname in e.values(): logger.debug("Database {} is already exist.".format( self.dbname)) return logger.info("Creating database:{}".format(self.dbname)) self.client.create_database(self.dbname) #self._set_retantion_policy() def _set_retantion_policy(self): self._connect() self.client.create_retention_policy(name='raw', duration='12h', replication=1, default=True) self.client.create_retention_policy(name='cooked', duration='52w', replication=1, default=False) def check_db(self): self._connect() db = self.client.get_list_database() ms = self.client.get_list_measurements() rp = self.client.get_list_retention_policies(self.dbname) user = self.client.get_list_users() print('db: {}, measurements: {}'.format(db, ms)) print('retention policy: {}'.format(rp)) print('users: {}'.format(user)) def insert(self, df, measurement, tags=None): self._connect() try: result = self.client.write_points(df, measurement, tags=tags, time_precision='n', protocol=self.protocol) except: logger.info('influxdb write error') result = False return result def query(self, sql): self._connect() result = self.client.query(sql) return result
logging.getLogger(__name__).info("Updating database with arguments: " + str(args)) if args.drop: client.drop_database(args.database) if args.database not in [d['name'] for d in client.get_list_database()]: client.create_database(args.database) client.query( "ALTER RETENTION POLICY autogen ON cache DURATION INF REPLICATION 1 SHARD DURATION 2600w DEFAULT" ) client.switch_database(args.database) with IQFeedHistoryProvider(num_connections=args.iqfeed_conn) as history: all_symbols = { (s, args.interval_len, args.interval_type) for s in set( iqutil.get_symbols(symbols_file=args.symbols_file).keys()) } update_to_latest( client=client, noncache_provider=noncache_provider(history), new_symbols=all_symbols, time_delta_back=relativedelta(years=args.delta_back), skip_if_older_than=relativedelta(days=args.skip_if_older) if args.skip_if_older is not None else None) client.close()
class InsyteInfluxIO: logger = logging.getLogger('insyte_analytics.db.insyte_influx_io') def __init__(self): """ Constructor. """ self.logger.debug("Initialization") self.host = None self.port = None self.username = None self.password = None self.database = None self.client = None # db connection instance self.device_id = None self.data_source_id = None self.time_upload = None def _set_connection_parameters(self, host=None, database=None, port=None, username=None, password=None): """ Set connection parameters. """ self.logger.debug("Setting connection parameters") self.host = host self.logger.debug("host = " + str(host)) self.database = database self.logger.debug("database = " + str(database)) self.port = port self.logger.debug("port = " + str(port)) self.username = username self.logger.debug("username = "******"password = "******""" Check connection parameters. """ self.logger.debug("Checking connection parameters") if self.host is None: self.logger.warning("Connection parameter 'host' not set") raise Exception("Connection parameter 'host' not set") if self.database is None: self.logger.warning("Connection parameter 'database' not set") raise Exception("Connection parameter 'database' not set") if self.port is None: self.logger.warning("Connection parameter 'port' not set") raise Exception("Connection parameter 'port' not set") if self.username is None: self.logger.warning("Connection parameter 'username' not set") raise Exception("Connection parameter 'username' not set") if self.password is None: self.logger.warning("Connection parameter 'password' not set") raise Exception("Connection parameter 'password' not set") self.logger.debug("Connection parameters successfully checked") async def connect(self, host: str, port: int, username: str, password: str, database: str): """ Open connection. """ try: self.logger.error("Connecting to DB") self._set_connection_parameters(host[0], database, port, username, password) self._check_connection_parameters() self.client = DataFrameClient(self.host, self.port, self.username, self.password, self.database) except Exception as err: self.logger.error("Connection failed: " + str(err)) raise Exception("Connection failed: " + str(err)) async def disconnect(self): """ Close connection. """ self.logger.debug("Closing connection") if self.client is not None: self.client.close() self.logger.debug("Connection closed") def _set_read_parameters(self, device_id=None, data_source_id=None, time_upload=None): """ Set reading parameters. :param device_id: list of ids [uuid1, uuid2, ..., uuidN] :param data_source_id: list of ids [id1, id2, ..., idN] :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)] """ self.logger.debug("Setting reading parameters") self.device_id = device_id self.logger.debug("device_id = " + str(device_id)) self.data_source_id = data_source_id self.logger.debug("data_source_id = " + str(data_source_id)) self.time_upload = time_upload self.logger.debug("time_upload = " + str(time_upload)) def _check_read_parameters(self): """ Check reading parameters. """ self.logger.debug("Checking reading parameters") if self.device_id is None: self.logger.warning("Reading parameter 'device_id' not set") raise Exception("Reading parameter 'device_id' not set") if self.data_source_id is None: self.logger.warning("Reading parameter 'data_source_id' not set") raise Exception("Reading parameter 'data_source_id' not set") if self.time_upload is None: self.logger.warning("Reading parameter 'time_upload' not set") raise Exception("Reading parameter 'time_upload' not set") if len(self.device_id) != len(self.data_source_id) or len( self.device_id) != len(self.time_upload): self.logger.warning("Reading parameters have different lengths") raise Exception("Reading parameters have different lengths") self.logger.debug("Reading parameters successfully checked") async def read_data(self, device_id=None, data_source_id=None, time_upload=None, limit=None): """ Read data from db according to object's parameters. :param device_id: list of ids [uuid1, uuid2, ..., uuidN] :param data_source_id: list of ids [id1, id2, ..., idN] :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)] :param limit: retrieved data rows limit :return: list of queries results """ results = pd.DataFrame() try: self.logger.debug("Reading data") self._set_read_parameters(device_id, data_source_id, time_upload) if limit is not None: self.logger.debug("Data reading limit set to " + str(limit)) limit = "LIMIT " + str(limit) else: limit = "" self._check_read_parameters() for di, dsi, tu in zip(self.device_id, self.data_source_id, self.time_upload): params = { "di": str(di), "dsi": str(dsi), "limit": limit, "from": datetime.datetime.strftime(tu[0], "%Y-%m-%dT%H:%M:%SZ"), "to": datetime.datetime.strftime(tu[1], "%Y-%m-%dT%H:%M:%SZ") } query = r"SELECT value FROM data WHERE device_id='{di}' ".format( **params) query += r"and data_source_id='{dsi}' ".format(**params) query += r"and time >= '{from}' and time <= '{to}' ".format( **params) query += r"{limit}".format(**params) self.logger.debug("Executing query " + str(query)) result = self.client.query(query) name = str(di) + '_' + str(dsi) if len(result) != 0: r = result['data'] self.logger.debug("Column " + name + " contains " + str(len(r)) + " rows") r.rename(columns={"value": name}, inplace=True) results = pd.merge(results, r, how='outer', left_index=True, right_index=True) else: self.logger.debug("Column " + name + " contains " + str(0) + " rows") results[name] = np.nan except Exception as err: self.logger.error("Impossible to read: " + str(err)) raise Exception("Impossible to read: " + str(err)) self.logger.debug("Reading complete: " + str(results.shape) + " entries returned") return results def _set_write_parameters(self, result_id=None, output_data=None): """ Set writing parameters :param result_id: uuid of the result :param output_data: DataFrame """ self.logger.debug("Setting writing parameters") if result_id is not None: self.result_id = result_id self.logger.debug("result_id = " + str(result_id)) if output_data is not None: self.output_data = output_data self.logger.debug("output_data shape = " + str(output_data.shape)) def _check_write_parameters(self): """ Check writing parameters """ self.logger.debug("Checking writing parameters") if self.result_id is None: self.logger.warning("Writing parameter 'result_id' not set") raise Exception("Writing parameter 'result_id' not set") if self.output_data is None: self.logger.warning("Writing parameter 'output_data' not set") raise Exception("Writing parameter 'output_data' not set") if self.output_data.empty: self.logger.warning("'output_data' is empty") raise Exception("'output_data' is empty") #if self.output_data.shape[1] != len(self.result_id): # self.logger.warning("'output_data' and 'result_id' have different lengths: " # + str(self.output_data.shape[1]) + " and " + str(len(self.result_id))) # raise Exception("'output_data' and 'result_id' have different lengths: " # + str(self.output_data.shape[1]) + " and " + str(len(self.result_id))) self.logger.debug("Writing parameters successfully checked") async def write_data(self, result_id=None, output_data=None): """ Depricated. Write data from this object to db. :param result_id: list of ids [uuid1, uuid2, ..., uuidK] :param output_data: DataFrame :return: list of result objects """ self.logger.debug("Writing data") self._set_write_parameters(result_id, output_data) results = [] try: self._check_write_parameters() for col, ri in zip(self.output_data.columns, self.result_id): df = pd.DataFrame(output_data[col]) if col.startswith('boolean'): df.rename(columns={col: 'boolean'}, inplace=True) else: df.rename(columns={col: 'value'}, inplace=True) _ = self.client.write_points(df, 'data_result', {'result_id': str(ri)}) results.append(str(ri)) except Exception as err: self.logger.error("Writing failed: " + str(err)) raise Exception("Writing failed " + str(err)) self.logger.debug("Writing complete " + str(results)) return results async def write_data_new(self, result_id=None, output_data=None): """ Write data from this object to db. :param result_id: list of ids [uuid1, uuid2, ..., uuidK] :param output_data: DataFrame :return: list of result objects """ self.logger.debug("Writing data") self._set_write_parameters(result_id, output_data) results = [] try: self._check_write_parameters() _ = self.client.write_points(self.output_data, 'data_result', {'result_id': str(self.result_id[0])}) results.append(str(self.result_id[0])) except Exception as err: self.logger.error("Writing failed: " + str(err)) raise Exception("Writing failed " + str(err)) self.logger.debug("Writing complete " + str(results)) return results
print(result) for stock in sub_stock_list: ret_code, ret_data = quote_ctx.get_rt_ticker(stock, num=1000) if ret_code == 0: #print(ret_data) # write_points 传入的 dataframe 时间戳必须是转换成标准的格式,并且必须是按照时间排序的 ret_data["time"] = pd.to_datetime(ret_data["time"]) ret_data = ret_data.set_index("time") # 存入 influxdb 之前,先将时间戳转换为 本地时间,write_points 在存的时候会以 UTC 时间存储 ret_data.index = ret_data.index.tz_localize('Asia/Shanghai') #print(ret_data) dbclient.write_points(ret_data, "futu_rt_ticker", tag_columns=['sequence'], database="test") time.sleep(60) result = quote_ctx.unsubscribe(sub_stock_list, subscribe_subtypes) print("unsubscribe result: " + time.asctime()) print(result) # unsubscirbe 后要等一会再订阅,FutuOpenD 在数据量积累到很大的时候会有点慢 time.sleep(2) return True if __name__ == '__main__': #atexit.register(clean_atexit) get_all_data("HK") # get_ticker_data("HK", "all") consumer.close() quote_ctx.close() dbclient.close() dbquery.close()
def write_data_to_influxdb(data): influxClient = DataFrameClient("localhost", 8087, "admin", "admin", "corona_data") influxClient.drop_measurement("dus") influxClient.write_points(data, "dus") influxClient.close()
def main(): # Initializes the default logger logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(APPLICATION_NAME) # Checks the Python Interpeter version if (sys.version_info < (3, 0)): logger.error('Python 3 is requested! Leaving the program.') sys.exit(-1) # Parse arguments args = configuration_parser() logger.setLevel(args.logging_level) logger.info(f'Starting application "{APPLICATION_NAME}"...') logger.debug(f'Arguments: {vars(args)}') v_latitude, v_longitude = map(float, args.gps_location.split(',')) v_influxdb_host = args.influxdb_host v_influxdb_port = args.influxdb_port v_influxdb_database = args.influxdb_database v_influxdb_username = args.influxdb_username v_influxdb_password = args.influxdb_password # Check if "Emon" database exists _client = DataFrameClient(host=v_influxdb_host, port=v_influxdb_port, username=v_influxdb_username, password=v_influxdb_password, database=v_influxdb_database) _dbs = _client.get_list_database() logger.debug(f'List of InfluxDB databases: {_dbs}') if v_influxdb_database not in [_d['name'] for _d in _dbs]: logger.info( f'InfluxDB database "{v_influxdb_database}" not found. Creating a new one.' ) _client.create_database(v_influxdb_database) _client.close() # Pack all parameters in a dictionary _userdata = { 'LOGGER': logger, 'LATITUDE': v_latitude, 'LONGITUDE': v_longitude, 'INFLUXDB_HOST': v_influxdb_host, 'INFLUXDB_PORT': v_influxdb_port, 'INFLUXDB_USER': v_influxdb_username, 'INFLUXDB_PASS': v_influxdb_password, 'INFLUXDB_DB': v_influxdb_database, 'MEASUREMENT_TS': args.measurement_ts, 'PROCESSED_TS': args.processed_ts, 'FORECAST_TS': args.forecast_ts, 'WEATHER_TS': args.weather_ts, 'HORIZON_LENGTH': args.horizon_length, 'WEATHER_SERVER_URL': args.weather_server_url, 'WEATHER_FORECAST_INTERVAL': args.weather_forecast_interval, 'WEATHER_START_TIMESTAMP': args.weather_start_timestamp } # Instantiate the scheduler and repeatedly run the "forecasting task" # "forecast interval" seconds after its previous execution _main_scheduler = continuous_scheduler.MainScheduler() _main_scheduler.add_task(forecasting_task, 0, args.forecast_interval, 0, _userdata) _main_scheduler.start()
class InfluxServerIO: logger = logging.getLogger('influx_server_io') def __init__(self, host=None, database=None, port=None, username=None, password=None): """ Constructor. """ self.logger.debug("Setting connection parameters") self.host = host self.database = database self.port = port self.username = username self.password = password self.client = None def connect(self): """ Open connection. """ try: self.logger.debug("Connecting to DB") self.client = DataFrameClient(self.host, self.port, self.username, self.password, self.database, timeout=15) self.client.ping() self.logger.debug("DB Connection set") except Exception as err: self.logger.error("DB Connection failed: " + str(err)) raise Exception("DB Connection failed: " + str(err)) def disconnect(self): """ Close connection. """ try: self.logger.debug("Closing DB connection") if self.client is not None: self.client.close() self.client = None self.logger.debug("DB Connection closed") except Exception as err: self.logger.error("Can't disconnect from DB: " + str(err)) raise Exception("Can't disconnect from DB: " + str(err)) def read_data(self, device_id=None, data_source_id=None, time_upload=None, limit=None): """ Read data from db according to object's parameters. :param device_id: list of ids [uuid1, uuid2, ..., uuidN] :param data_source_id: list of ids [id1, id2, ..., idN] :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)] :param limit: retrieved data rows limit :return: list of queries results """ results = pd.DataFrame() try: self.logger.debug("Reading data") if limit is not None: self.logger.debug("Data reading limit set to " + str(limit)) limit = "LIMIT " + str(limit) else: limit = "" for di, dsi, tu in zip(device_id, data_source_id, time_upload): params = { "di": str(di), "dsi": str(dsi), "limit": limit, "from": datetime.datetime.strftime(tu[0], "%Y-%m-%dT%H:%M:%SZ"), "to": datetime.datetime.strftime(tu[1], "%Y-%m-%dT%H:%M:%SZ") } query = r"SELECT value FROM data WHERE device_id='{di}' ".format( **params) query += r"and data_source_id='{dsi}' ".format(**params) query += r"and time >= '{from}' and time <= '{to}' ".format( **params) query += r"{limit}".format(**params) self.logger.debug("Executing query " + str(query)) result = self.client.query(query) name = str(di) + '_' + str(dsi) if len(result) != 0: r = result['data'] self.logger.debug("Column " + name + " contains " + str(len(r)) + " rows") r.rename(columns={"value": name}, inplace=True) results = pd.merge(results, r, how='outer', left_index=True, right_index=True) else: self.logger.debug("Column " + name + " contains " + str(0) + " rows") results[name] = np.nan except Exception as err: self.logger.error("Impossible to read: " + str(err)) raise Exception("Impossible to read: " + str(err)) self.logger.debug("Reading complete: " + str(results.shape) + " entries returned") return results def write_data(self, result_id=None, output_data=None): """ Write data from this object to db. :param result_id: list of ids [uuid1, uuid2, ..., uuidK] :param output_data: DataFrame :return: list of result objects """ self.logger.debug("Writing data") results = [] try: for col, ri in zip(output_data.columns, result_id): df = pd.DataFrame(output_data[col]) if col.startswith('bool'): df.rename(columns={col: 'boolean'}, inplace=True) elif col.startswith('val'): df.rename(columns={col: 'value'}, inplace=True) else: self.logger.warning( "Column name: " + str(col) + " (doesnt's start with 'val' or 'bool', renaming to 'val')" ) df.rename(columns={col: 'value'}, inplace=True) v = self.client.write_points(df, 'data_result', {'result_id': str(ri)}) results.append(str(ri)) except Exception as err: self.logger.error("Writing to DB failed: " + str(err)) raise Exception("Writing to DB failed " + str(err)) self.logger.debug("Writing to DB complete " + str(results)) return results
def optimization(): # Optimization timestep TIMESTEP = 12 # 5min interval (60/5) # Query uncontrolled demand # Note: uncontrolled demand is already on a 5min timestep client = DataFrameClient(host, port, user, password, dbname) start = datetime.now() query = ("select * from uncontr " + "WHERE time >= '" + (start + timedelta(minutes=5)).strftime("%Y-%m-%dT%H:%M:%SZ") + "' AND time <= '" + (start + timedelta(hours=24)).strftime("%Y-%m-%dT%H:%M:%SZ") + "'") uncontr = client.query(query)['uncontr'] # Get the reference t=0 first_t = uncontr.iloc[0].name uncontr_t = uncontr.index uncontr_index = list(range(0, len(uncontr_t))) # Change index to integers and rename column opt_uncontr = uncontr.copy() opt_uncontr['index'] = uncontr_index opt_uncontr.set_index('index', drop=True, inplace=True) opt_uncontr.rename(columns={'uncontr': 'p'}, inplace=True) # Query order books try: query = ("select * from bbook " + "WHERE startby >= " + str(int((start + timedelta(minutes=5)).timestamp() * 1000)) + " AND endby <= " + str(int((start + timedelta(hours=24)).timestamp() * 1000))) bbook = client.query(query)['bbook'] # Set startby and endby as integers opt_bbook = bbook.copy() opt_bbook['startby'] -= first_t.timestamp() * 1000 opt_bbook['startby'] /= 60 * 1000 * 60 / TIMESTEP opt_bbook['endby'] -= first_t.timestamp() * 1000 opt_bbook['endby'] /= 60 * 1000 * 60 / TIMESTEP opt_bbook['id'] = list(range(0, len(opt_bbook))) opt_bbook.set_index('id', drop=True, inplace=True) except: # No orders at the moment opt_bbook = pandas.DataFrame() try: query = ("select * from sbook " + "WHERE startby >= " + str(int((start + timedelta(minutes=5)).timestamp() * 1000)) + " AND endby <= " + str(int((start + timedelta(hours=24)).timestamp() * 1000))) sbook = client.query(query)['sbook'] # Set startby and endby as integers opt_sbook = sbook.copy() opt_sbook['startby'] -= first_t.timestamp() * 1000 opt_sbook['startby'] /= 60 * 1000 * 60 / TIMESTEP opt_sbook['endby'] -= first_t.timestamp() * 1000 opt_sbook['endby'] /= 60 * 1000 * 60 / TIMESTEP opt_sbook['id'] = list(range(0, len(opt_sbook))) opt_sbook.set_index('id', drop=True, inplace=True) except: # No orders at the moment opt_sbook = pandas.DataFrame() try: query = ("select * from dbook " + "WHERE startby >= " + str(int((start + timedelta(minutes=5)).timestamp() * 1000)) + " AND endby <= " + str(int((start + timedelta(hours=24)).timestamp() * 1000))) dbook = client.query(query)['dbook'] opt_dbook = dbook.copy() opt_dbook['startby'] -= first_t.timestamp() * 1000 opt_dbook['startby'] /= 60 * 1000 * 60 / TIMESTEP opt_dbook['endby'] -= first_t.timestamp() * 1000 opt_dbook['endby'] /= 60 * 1000 * 60 / TIMESTEP opt_dbook['id'] = list(range(0, len(opt_dbook))) opt_dbook.set_index('id', drop=True, inplace=True) # Turn profile_kw from str to floats opt_dbook['profile_kw'] = opt_dbook['profile_kw'].apply( lambda x: [float(v) for v in x[1:][:-1].replace(" ", "").split(',')]) except: # No orders at the moment opt_dbook = pandas.DataFrame() # Run the optimization tic = datetime.now() result = maximize_self_consumption(opt_uncontr, opt_bbook, opt_sbook, opt_dbook, timestep=1 / TIMESTEP, solver='glpk', verbose=False, timelimit=60) logger.info('GLPK time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - tic)) # Save results back to influxDB (and remove previous schedule) total = uncontr.copy() total.rename(columns={'uncontr': 'contr'}, inplace=True) total['contr'] += result['demand_controllable'] client.write_points(total, 'contr') client.drop_measurement('bschedule') if result['batteryin'] is not None: bschedule = (result['batteryin'] - result['batteryout']).copy() bschedule['index'] = uncontr_t bschedule.set_index('index', drop=True, inplace=True) bschedule.rename_axis(None, inplace=True) client.write_points(bschedule, 'bschedule') client.drop_measurement('sschedule') if result['demandshape'] is not None: sschedule = result['demandshape'].copy() sschedule['index'] = uncontr_t sschedule.set_index('index', drop=True, inplace=True) sschedule.rename_axis(None, inplace=True) client.write_points(sschedule, 'sschedule') client.drop_measurement('dschedule') if result['demanddeferr'] is not None: dschedule = result['demanddeferr'].copy() dschedule['index'] = uncontr_t dschedule.set_index('index', drop=True, inplace=True) dschedule.rename_axis(None, inplace=True) client.write_points(dschedule, 'dschedule') # Close DB connection client.close()
def make_dataset(capteur_list, text_input_start, text_input_end, nom_capteur, L_influx): xs = [] ys = [] colors = [] labels = [] client = DataFrameClient(host=L_influx[3], port=L_influx[4], username=L_influx[1], password=L_influx[2]) client.switch_database(L_influx[0]) #Granularity to avoid to display too much points on the figure end = datetime.strptime(text_input_end, "%Y-%m-%d %H:%M:%S") start = datetime.strptime(text_input_start, "%Y-%m-%d %H:%M:%S") ecartSecondes = (end - start).total_seconds() if ecartSecondes < 86401: groupby = None elif ecartSecondes > 86401 and ecartSecondes < 5000000: groupby = '1m' elif ecartSecondes > 5000000 and ecartSecondes < 77000000: groupby = '15m' else: groupby = '1h' #print(ecartSecondes, groupby) #Construction of vectors x and y for each register to be displayed for elt in capteur_list: if groupby == None: requete = "SELECT " + '"' + elt + '"' + " FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'" datasets = client.query(requete) df = pd.DataFrame(datasets['measure']) df = df.rename_axis('Date') df.index = df.index.astype('datetime64[ns]') client.close() #Convert data in list of lists a = [] b = [] for i in range(df.shape[0]): a.append(df[elt][i]) b.append(df.index[i]) xs.append(a) ys.append(b) colors.append(capteur_color[capteur_list.index(elt)]) labels.append(elt) else: requete = "SELECT MEAN(" + '"' + elt + '"' + ") FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'" + " GROUP BY time(" + groupby + ") fill(0)" datasets = client.query(requete) df = pd.DataFrame(datasets['measure']) df = df.rename_axis('Date') df.index = df.index.astype('datetime64[ns]') client.close() #Conversion des données en liste de liste a = [] b = [] for i in range(df.shape[0]): a.append(df['mean'][i]) b.append(df.index[i]) xs.append(a) ys.append(b) colors.append(capteur_color[capteur_list.index(elt)]) labels.append(elt) #Construction of the source of the figure new_src = ColumnDataSource(data={ 'x': xs, 'y': ys, 'color': colors, 'label': labels }) return new_src
def main(): # Connect to influxdb client = InfluxDBClient(host=IP_RP4, port=PORT, username=USER_NAME, password=PWD, database=DB_NAME, timeout=10) # String of the query to make to get the data to clean last_cleaned_time, query = query_data_to_clean(client, 'sensehat') # Make the query and get the generator of points points = query_to_points(query, client) # Create panda dataframe df = pd.DataFrame(points) # If there is no data to clean, exit if len(df.index) <= 0: client.close() sys.exit() # ------------------------------------------------------------------------------ # Cleaning # ------------------------------------------------------------------------------ # Do the convolutions # First create a new dataframe which will contains the convolution signals df_convol = pd.DataFrame() # Indexes must have a datetime format in order to be directly written to influxdb df_convol['Datetime'] = pd.to_datetime(df['time']) df_convol = df_convol.set_index('Datetime') # Convolve the entire dataframe previous_row = query_last_raw_values(last_cleaned_time, client) convolve_dataframe(df, dst_df=df_convol, previous_row=previous_row) # Connect to DB with a DataFrameClient df_client = DataFrameClient(host=IP_RP4, port=PORT, username=USER_NAME, password=PWD, database=DB_NAME, timeout=10) # Write the convolved signals to the influxdb server write_df(df_convol, 'convol_signals', df_client) # ------------------------------------------------------------------------------ # Correct the anormal values # The result dataframe is now the original df # !!! Normally, elements at the same index in dataframe df and df_convol # correspond to the same timestamp (as this script is exectued in its # completeness via a cron job, so both index the same time range). However, # if e.g. only the convolution is done and later the cleaning, then they won't # have the same indexing. To avoid this, we query again the db for the correct # time range, so we don't have problems with indexing from_time = df['time'].to_numpy()[0] df_convol = pd.DataFrame(query_convolution(client, from_time)) # Clean the data clean_data(df, df_convol, previous_row) # Now the dataframe df is clean # ------------------------------------------------------------------------------ # Now fetch the meteo suisse data and adjust, # we don't want to again search for last sample in the convolution signal # as we just wrote the new convolved samples last_cleaned_time, query = query_data_to_clean(client, source='meteosuisse', last_cleaned=last_cleaned_time, update_last_cleaned=False, fields='"temperature", "humidity"') points_ms = query_to_points(query, client) df_ms = pd.DataFrame(points_ms) # Format the meteo suisse data dataframe form_df = prepare_ms_df(df_ms, df['time']) if form_df is not None: # Get the average differences for each columns # (i.e. sensehat data - meteo suisse data) avg_diffs = avg_diff_df(form_df, df) # Adjust the clean sensehat dataframe "df" with the average differences # for each column (in place) adjust_df(df, avg_diffs) # Now "df" is the sensehat data, cleaned and adjusted # Write the result to the measurement "clean_sh_data" # (just have to set the index to the time column to be able to write # directly to the influxdb database) df['time'] = pd.to_datetime(df['time']) df = df.set_index('time') write_df(df, 'clean_sh_data', df_client) # Close the connections to the influxdb server df_client.close() client.close()