def test_write_points_from_dataframe(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 3600] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_tag_cols_and_global_tags(self): """Test write points from df w/tag + cols in TestDataFrameClient.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[['blue', 1, "1", 1, 1.0], ['red', 0, "2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["tag_one", "tag_two", "column_one", "column_two", "column_three"]) expected = ( b"foo,global_tag=value,tag_one=blue,tag_two=1 " b"column_one=\"1\",column_two=1i,column_three=1.0 " b"0\n" b"foo,global_tag=value,tag_one=red,tag_two=0 " b"column_one=\"2\",column_two=2i,column_three=2.0 " b"3600000000000\n" ) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, 'foo', tag_columns=['tag_one', 'tag_two'], tags={'global_tag': 'value'}) self.assertEqual(m.last_request.body, expected)
def test_write_points_from_dataframe_with_period_index(self): dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[pd.Period('1970-01-01'), pd.Period('1970-01-02')], columns=["column_one", "column_two", "column_three"]) expected = { 'points': [ {'name': 'foo', 'tags': {}, 'fields': { 'column_one': '1', 'column_two': 1, 'column_three': 1.0}, 'time': '1970-01-01T00:00:00+00:00'}, {'name': 'foo', 'tags': {}, 'fields': { 'column_one': '2', 'column_two': 2, 'column_three': 2.0}, 'time': '1970-01-02T00:00:00+00:00'}], 'database': 'db', } with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo") self.assertEqual(json.loads(m.last_request.body), expected)
def test_write_points_from_dataframe_with_period_index(self): dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[pd.Period('1970-01-01'), pd.Period('1970-01-02')], columns=["column_one", "column_two", "column_three"]) points = [ { "points": [ ["1", 1, 1.0, 0], ["2", 2, 2.0, 86400] ], "name": "foo", "columns": ["column_one", "column_two", "column_three", "time"] } ] with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe}) self.assertListEqual(json.loads(m.last_request.body), points)
def test_write_points_from_dataframe_with_numeric_column_names(self): now = pd.Timestamp('1970-01-01 00:00+00:00') # df with numeric column names dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)]) expected = { 'database': 'db', 'points': [ {'fields': { '0': '1', '1': 1, '2': 1.0}, 'tags': {'hello': 'there'}, 'time': '1970-01-01T00:00:00+00:00', 'name': 'foo'}, {'fields': { '0': '2', '1': 2, '2': 2.0}, 'tags': {'hello': 'there'}, 'time': '1970-01-01T01:00:00+00:00', 'name': 'foo'}], } with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo", {"hello": "there"}) self.assertEqual(json.loads(m.last_request.body), expected)
def test_write_points_from_dataframe(self): """Test write points from df in TestDataFrameClient object.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) expected = ( b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n" b"foo column_one=\"2\",column_two=2i,column_three=2.0 " b"3600000000000\n" ) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, 'foo') self.assertEqual(m.last_request.body, expected) cli.write_points(dataframe, 'foo', tags=None) self.assertEqual(m.last_request.body, expected)
def threading_put_data(df, measurement_name, tags_dict): dbname = "CKG_QAR" str_IP_address = LOCAL().server_ip() user = '' password = '' client = DataFrameClient(str_IP_address, 8086, user, password, dbname) client.write_points(df, measurement = measurement_name, tags= tags_dict)
def _check_influxdb_connected(self, host, port, username, password, db_name): client = DataFrameClient(host, port, username, password, db_name) result = True try: client.get_list_database() print "Connect to database server" except: result = False print "Cannot connect. Please check configuration server" return result
def test_write_points_from_dataframe_fails_with_series(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.Series(data=[1.0, 2.0], index=[now, now + timedelta(hours=1)]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe})
def test_multiquery_into_dataframe(self): """Test multiquery into df for TestDataFrameClient object.""" data = { "results": [ { "series": [ { "name": "cpu_load_short", "columns": ["time", "value"], "values": [ ["2015-01-29T21:55:43.702900257Z", 0.55], ["2015-01-29T21:55:43.702900257Z", 23422], ["2015-06-11T20:46:02Z", 0.64] ] } ] }, { "series": [ { "name": "cpu_load_short", "columns": ["time", "count"], "values": [ ["1970-01-01T00:00:00Z", 3] ] } ] } ] } pd1 = pd.DataFrame( [[0.55], [23422.0], [0.64]], columns=['value'], index=pd.to_datetime([ "2015-01-29 21:55:43.702900257+0000", "2015-01-29 21:55:43.702900257+0000", "2015-06-11 20:46:02+0000"])) if pd1.index.tzinfo is None: pd1.index = pd1.index.tz_localize('UTC') pd2 = pd.DataFrame( [[3]], columns=['count'], index=pd.to_datetime(["1970-01-01 00:00:00+00:00"])) if pd2.index.tzinfo is None: pd2.index = pd2.index.tz_localize('UTC') expected = [{'cpu_load_short': pd1}, {'cpu_load_short': pd2}] cli = DataFrameClient('host', 8086, 'username', 'password', 'db') iql = "SELECT value FROM cpu_load_short WHERE region=$region;"\ "SELECT count(value) FROM cpu_load_short WHERE region=$region" bind_params = {'region': 'us-west'} with _mocked_session(cli, 'GET', 200, data): result = cli.query(iql, bind_params=bind_params) for r, e in zip(result, expected): for k in e: assert_frame_equal(e[k], r[k])
def test_write_points_from_dataframe_fails_without_time_index(self): dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series") cli = DataFrameClient(database='db') cli.write_points({"foo": dataframe})
def data(): client = DataFrameClient(host, port, user, password, dbname) data = client.query("select * from cpu where cpu = 'cpu-total' AND host='etlnode1' limit 10") dataframe = data['cpu'] dict={} dict["usage_idle"]=json.loads(dataframe['usage_idle'].to_json(orient='values')) dict["usage_user"]=json.loads(dataframe['usage_user'].to_json(orient='values')) dict["usage_softirq"]=json.loads(dataframe['usage_softirq'].to_json(orient='values')) dict["usage_system"]=json.loads(dataframe['usage_system'].to_json(orient='values')) dict["index"] = dataframe.index.tolist() return jsonify(dict),200,{'Access-Control-Allow-Origin': '*'}
def test_list_series(self): response = [ { 'columns': ['time', 'name'], 'name': 'list_series_result', 'points': [[0, 'seriesA'], [0, 'seriesB']] } ] with _mocked_session('get', 200, response): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') series_list = cli.get_list_series() assert series_list == ['seriesA', 'seriesB']
def test_write_points_from_dataframe_in_batches(self): now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') self.assertTrue(cli.write_points(dataframe, "foo", batch_size=1))
def test_write_points_from_dataframe_fails_without_time_index(self): """Test failed write points from df without time index.""" dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], columns=["column_one", "column_two", "column_three"]) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/db/db/series", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo")
def test_drop_numeric_named_database(self): """Test drop numeric db for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') with requests_mock.Mocker() as m: m.register_uri( requests_mock.POST, "http://localhost:8086/query", text='{"results":[{}]}' ) cli.drop_database('123') self.assertEqual( m.last_request.qs['q'][0], 'drop database "123"' )
def test_drop_measurement(self): """Test drop measurement for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') with requests_mock.Mocker() as m: m.register_uri( requests_mock.POST, "http://localhost:8086/query", text='{"results":[{}]}' ) cli.drop_measurement('new_measurement') self.assertEqual( m.last_request.qs['q'][0], 'drop measurement "new_measurement"' )
def test_create_database(self): """Test create database for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') with requests_mock.Mocker() as m: m.register_uri( requests_mock.POST, "http://localhost:8086/query", text='{"results":[{}]}' ) cli.create_database('new_db') self.assertEqual( m.last_request.qs['q'][0], 'create database "new_db"' )
def test_datetime_to_epoch(self): timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00') cli = DataFrameClient('host', 8086, 'username', 'password', 'db') self.assertEqual( cli._datetime_to_epoch(timestamp), 1356998400.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='h'), 1356998400.0 / 3600 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='m'), 1356998400.0 / 60 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='s'), 1356998400.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='ms'), 1356998400000.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='u'), 1356998400000000.0 ) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='n'), 1356998400000000000.0 )
def test_write_points_from_dataframe_with_numeric_precision(self): """Test write points from df with numeric precision.""" now = pd.Timestamp('1970-01-01 00:00+00:00') # df with numeric column names dataframe = pd.DataFrame(data=[["1", 1, 1.1111111111111], ["2", 2, 2.2222222222222]], index=[now, now + timedelta(hours=1)]) if numpy.lib.NumpyVersion(numpy.__version__) <= '1.13.3': expected_default_precision = ( b'foo,hello=there 0=\"1\",1=1i,2=1.11111111111 0\n' b'foo,hello=there 0=\"2\",1=2i,2=2.22222222222 3600000000000\n' ) else: expected_default_precision = ( b'foo,hello=there 0=\"1\",1=1i,2=1.1111111111111 0\n' b'foo,hello=there 0=\"2\",1=2i,2=2.2222222222222 3600000000000\n' # noqa E501 line too long ) expected_specified_precision = ( b'foo,hello=there 0=\"1\",1=1i,2=1.1111 0\n' b'foo,hello=there 0=\"2\",1=2i,2=2.2222 3600000000000\n' ) expected_full_precision = ( b'foo,hello=there 0=\"1\",1=1i,2=1.1111111111111 0\n' b'foo,hello=there 0=\"2\",1=2i,2=2.2222222222222 3600000000000\n' ) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo", {"hello": "there"}) print(expected_default_precision) print(m.last_request.body) self.assertEqual(m.last_request.body, expected_default_precision) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo", {"hello": "there"}, numeric_precision=4) self.assertEqual(m.last_request.body, expected_specified_precision) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo", {"hello": "there"}, numeric_precision='full') self.assertEqual(m.last_request.body, expected_full_precision)
def test_drop_retention_policy(self): """Test drop retention policy for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') example_response = '{"results":[{}]}' with requests_mock.Mocker() as m: m.register_uri( requests_mock.POST, "http://localhost:8086/query", text=example_response ) cli.drop_retention_policy('somename', 'db') self.assertEqual( m.last_request.qs['q'][0], 'drop retention policy "somename" on "db"' )
class InfluxDBClientProxy(object): client_cls = DataFrameClient def __init__(self): self._client = None self._dfclient = None self.engine = None self.dfengine = None def init_app(self, app): client_args = ( app.config['INFLUXDB_HOST'], app.config['INFLUXDB_PORT'], app.config['INFLUXDB_USER'], app.config['INFLUXDB_PASSWORD'], app.config['INFLUXDB_DB'] ) self._client = InfluxDBClient(*client_args) self._dfclient = DataFrameClient(*client_args) self.engine = Engine(self._client) self.dfengine = Engine(self._dfclient) def dfquery(self, query): return self._dfclient.query(query) def __getattr__(self, name): return getattr(self._client, name)
def test_get_list_database(self): data = {'results': [ {'series': [ {'measurement': 'databases', 'values': [ ['new_db_1'], ['new_db_2']], 'columns': ['name']}]} ]} cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'get', 200, json.dumps(data)): self.assertListEqual( cli.get_list_database(), [{'name': 'new_db_1'}, {'name': 'new_db_2'}] )
class DataSource(object): def __init__(self,host="localhost",port='8086',username="******",password="******",db_name=None,measurement=None): self.host = host self.port = port self.username = username self.password = password self.db_name = db_name self.measurement = measurement if self._check_influxdb_connected(host, port, username, password, db_name): self.client_api = DataFrameClient(host, port, username, password, db_name) def _check_influxdb_connected(self, host, port, username, password, db_name): client = DataFrameClient(host, port, username, password, db_name) result = True try: client.get_list_database() print "Connect to database server" except: result = False print "Cannot connect. Please check configuration server" return result def check_connected(self): return self._check_influxdb_connected(self.host,self.port,self.username,self.password,self.db_name) def _return(self): return self.host,self.port,self.username,self.password,self.db_name,self.measurement def query_analyzed(self, number_of_days =30): print "Get latest time series points" query_set = 'select value from %s order by time desc limit %s ;' % ( self.measurement, number_of_days * 24 * 60) result = self.client_api.query(query_set)[self.measurement] return result def query_all(self): print "Get latest time series points" query_set = 'select value from %s limit 10000;' % (self.measurement) result = self.client_api.query(query_set)[self.measurement] return result def update_db(self,data): result = False try: result = self.client_api.write_points(data,self.measurement) except Exception as e: print e.message return result
def test_get_list_retention_policies(self): """Test get retention policies for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') example_response = \ '{"results": [{"series": [{"values": [["fsfdsdf", "24h0m0s", 2]],'\ ' "columns": ["name", "duration", "replicaN"]}]}]}' with requests_mock.Mocker() as m: m.register_uri( requests_mock.GET, "http://localhost:8086/query", text=example_response ) self.assertListEqual( cli.get_list_retention_policies("db"), [{'duration': '24h0m0s', 'name': 'fsfdsdf', 'replicaN': 2}] )
def test_get_list_measurements(self): """Test get list of measurements for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') data = { "results": [{ "series": [ {"name": "measurements", "columns": ["name"], "values": [["cpu"], ["disk"] ]}]} ] } with _mocked_session(cli, 'get', 200, json.dumps(data)): self.assertListEqual( cli.get_list_measurements(), [{'name': 'cpu'}, {'name': 'disk'}] )
def write_habitslab_df_to_influx(pd_df, pId, studyName, dataType, userId): if pd_df is None: return # index of the dataframe pd_df['time'] = pd.to_datetime(pd_df['date']) pd_df = pd_df.set_index('time') pd_df['deviceId'] = "NL1" pd_df['pId'] = pId pd_df['studyName'] = studyName pd_df['type'] = dataType pd_df['userId'] = userId tags = {"deviceId": "NL1", "pId": pId, "studyId": studyName, "type": dataType, "userId": userId} dbConnDF = DataFrameClient(host='localhost', port=8086) dbConnDF.write_points(pd_df, 'habitsDB', tags=tags)
def test_dataframe_write_points_with_whitespace_in_column_names(self): """write_points should escape white space in column names.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column one", "column two", "column three"]) expected = ( b"foo column\\ one=\"1\",column\\ two=1i,column\\ three=1.0 0\n" b"foo column\\ one=\"2\",column\\ two=2i,column\\ three=2.0 " b"3600000000000\n" ) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, 'foo') self.assertEqual(m.last_request.body, expected)
def test_query_into_dataframe(self): """Test query into df for TestDataFrameClient object.""" data = { "results": [{ "series": [ {"measurement": "network", "tags": {"direction": ""}, "columns": ["time", "value"], "values":[["2009-11-10T23:00:00Z", 23422]] }, {"measurement": "network", "tags": {"direction": "in"}, "columns": ["time", "value"], "values": [["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422]] } ] }] } pd1 = pd.DataFrame( [[23422]], columns=['value'], index=pd.to_datetime(["2009-11-10T23:00:00Z"])) if pd1.index.tzinfo is None: pd1.index = pd1.index.tz_localize('UTC') pd2 = pd.DataFrame( [[23422], [23422], [23422]], columns=['value'], index=pd.to_datetime(["2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z"])) if pd2.index.tzinfo is None: pd2.index = pd2.index.tz_localize('UTC') expected = { ('network', (('direction', ''),)): pd1, ('network', (('direction', 'in'),)): pd2 } cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'GET', 200, data): result = cli.query('select value from network group by direction;') for k in expected: assert_frame_equal(expected[k], result[k])
def __init__(self,host="localhost",port='8086',username="******",password="******",db_name=None,measurement=None): self.host = host self.port = port self.username = username self.password = password self.db_name = db_name self.measurement = measurement if self._check_influxdb_connected(host, port, username, password, db_name): self.client_api = DataFrameClient(host, port, username, password, db_name)
class DataGetter(object): """ Connects to influxdb database and gets time series data as a pandas dataframe. """ def __init__(self, host="localhost", port=8086, user="******", password="******"): self.client = DataFrameClient(host, 8086, user, password, "collectd") #self.query_handler = Query() def make_query(self): return self.client.query("select difference(mean(\"value\")) from cpu_value where ( \"host\" = 'icarus') and time >= now() - 15d group by time(5s), \"type_instance\", \"instance\" ")
def test_datetime_to_epoch(self): """Test convert datetime to epoch in TestDataFrameClient object.""" timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00') cli = DataFrameClient('host', 8086, 'username', 'password', 'db') self.assertEqual(cli._datetime_to_epoch(timestamp), 1356998400.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='h'), 1356998400.0 / 3600) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='m'), 1356998400.0 / 60) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='s'), 1356998400.0) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='ms'), 1356998400000.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='u'), 1356998400000000.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='n'), 1356998400000000000.0)
def upload_to_influxdb(pd_obj=None, dbname='Test', measurement='Test', TEST=True, host_id='sw-wus-hx501q2'): chunk_size = 30000 start_row = 0 host = host_id port = 8086 """Instantiate the connection to the InfluxDB client.""" user = '******' password = '******' protocol = 'line' client = DataFrameClient(host, port, user, password, dbname) if TEST: print("Create pandas DataFrame") pd_obj = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2019-1-16', periods=30, freq='H'), columns=['测试']) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame: " + measurement) (row_size, col_size) = pd_obj.shape # truncate the dataframe to 30000 entries per item before sending to influxdb while start_row < (row_size - 1): if (start_row + chunk_size) < row_size: print('size too large, chucking, start ' + str(start_row) \ + ' to ' + str(start_row + chunk_size)) response = client.write_points(pd_obj[start_row:(start_row + chunk_size)], measurement, protocol=protocol) start_row += chunk_size + 1 else: print('last bit, start ' + str(start_row) \ + ' to ' + str(row_size - 1)) response = client.write_points(pd_obj[start_row:row_size - 1], measurement, protocol=protocol) start_row = row_size - 1 print(response)
def influxdb_establish_connection(self, raw=False): if raw: return InfluxDBClient(host=self.config['influxdb']['host'], port=int(self.config['influxdb']['port']), username=self.config['influxdb']['username'], password=self.config['influxdb']['password'], database=self.config['influxdb']['db_name']) else: return DataFrameClient( host=self.config['influxdb']['host'], port=int(self.config['influxdb']['port']), username=self.config['influxdb']['username'], password=self.config['influxdb']['password'], database=self.config['influxdb']['db_name'])
def getKernelDetailsDefault(host, port, user, password, dbname, host_name, field_name): client = DataFrameClient(host, port, user, password, dbname) query = "SELECT mean(" + field_name + ")/1000000 FROM kernel WHERE host ='" + host_name + "' AND time > now() - 7d GROUP BY time(1h) fill(0)" data = client.query(query) dataframe = data['kernel'] dict = {} dict["mean"] = json.loads(dataframe['mean'].to_json(orient='values', force_ascii=True)) li = dataframe['mean'].tolist() list = dataframe.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime( datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) dict["time"] = list_final dict["unit"] = "X10^6" return dict, query
def write_habitslab_df_to_influx(pd_df, studyName, dataType, pId, wearable, deviceId, userId, fname): if pd_df is None: return # index of the dataframe pd_df['time'] = pd.to_datetime(pd_df['date']) pd_df = pd_df.set_index('time') tags = { "device": wearable, "deviceId": deviceId, "pId": pId, # optional to add 'filename' "studyId": studyName, "type": dataType, "userId": userId, "fileName": fname } pd_df.drop(['date'], inplace=True, axis=1) # connection to db dbConnDF = DataFrameClient(host='localhost', port=8086, database='habitsDB') dbConnDF.write_points(pd_df, 'habitsDB', tags=tags, batch_size=1000) return
def _process(self, _data): logging.info('Processing data at ' + self.__class__.__name__) meta, df = _data # process fields field_columns = get_cols(self.__fields, df.columns) if field_columns is None: logging.error("Bad parameter 'fields'" + str(self.__fields)) raise ValueError # process tags tag_columns = get_cols(self.__tags, df.columns) # process timestamp if self.__ts['method'] == TS_FILE_MODIFIED and 'ts' in meta: df["ts"] = meta['ts'] elif self.__ts['method'] == TS_FILE_NAME and 'name' in meta: # extract date time from filename s = self.__ts['method'].search(meta['name']).group(1) try: df["ts"] = datetime.strptime(s, self.__ts['format']) except ValueError as e: logging.error(e) elif self.__ts['method'] == TS_COLUMN: ts_column = get_cols(self.__ts['column'], df.columns) if ts_column is None: logging.error("Bad parameter ts.column" + str(self.__tags)) raise ValueError df.rename(columns={ts_column: 'ts'}, inplace=True) if self.__ts['format'] in ['D', 's', 'ms', 'us', 'ns']: df['ts'] = pd.to_datetime(df['ts'], unit=self.__ts['format']) else: df['ts'] = pd.to_datetime(df['ts'], format=self.__ts['format']) else: logging.error( "Couldn't extract timestamp of measurement, defaulting to now") df['ts'] = datetime.now() df.set_index('ts', inplace=True) lines = DataFrameClient()._convert_dataframe_to_lines( df, self.__measurement, field_columns, tag_columns, numeric_precision=6) # print all lines print("\n".join(lines)) logging.info("Exported %d records." % len(lines)) yield None
class THDLoss(): def __init__(self): self.DFDBClient = DataFrameClient(host=cg.INFLUX_DB_IP, port=cg.INFLUX_DB_PORT, database=cg.INFLUX_DB) def __call__(self): self.output() def read_Data(self): con_obj = InfluxDBClient(host=cg.INFLUX_DB_IP, port=cg.INFLUX_DB_PORT, database=cg.INFLUX_DB) query = 'select * from ' + cg.WRITE_MEASUREMENT + ' where time > now() - 1d ' df = pd.DataFrame(con_obj.query(query, chunked=True, chunk_size=10000).get_points()) df['time'] = df['time'].astype('datetime64[ns]') df['time'] = df['time'] + datetime.timedelta(hours=5, minutes=30) return df def total_sum(self,df): y = pd.DataFrame(df.groupby('DeviceID')['kvah_loss_thd'].sum()).astype(np.float64) y.columns = ['Sum'] y = y.reset_index() return y def time(self,df): df['Time_max'] = df['time'].dt.time a=df.loc[list(df.groupby('DeviceID')['kvah_loss_thd'].idxmax())][['Time_max','DeviceID']] b=df.loc[list(df.groupby('DeviceID')['kvah_loss_thd'].idxmin())][['Time_max','DeviceID']] a.index=a['DeviceID'] a=a.drop('DeviceID',axis=1) a['Time_min']=list(b['Time_max']) return a def time_as_index(self,df): t = pd.DataFrame(df.groupby(['DeviceID'])['time'].max()) t.reset_index(inplace = True) return t def output(self): df = self.read_Data() x = df.groupby('DeviceID')['kvah_loss_thd'].describe() x.columns = ['Total_Count', 'Average', 'SD', 'Minimum', '25th_percentile', 'Median', '75th_percentile', 'Maximum'] y = self.total_sum(df) x = x.merge(y, on = 'DeviceID', how = "outer") t = self.time_as_index(df) x=x.merge(t,on='DeviceID', how = "outer") a=self.time(df) x=x.merge(a,on='DeviceID', how = "outer") x = x.fillna(0) x.set_index('time', inplace = True) print(self.DFDBClient.write_points(x, cg.THD_LOSS)) return x
def __make_client(self): ''' This function is not necessary for the user. Setup client both InfluxDBClient and DataFrameClient DataFrameClient is for queries and InfluxDBClient is for writes Not needed by user ''' self.client = InfluxDBClient(host=self.host, port=self.port, username=self.username, password=self.password, database=self.database, ssl=self.use_ssl, verify_ssl=self.verify_ssl_is_on) self.df_client = DataFrameClient(host=self.host, port=self.port, username=self.username, password=self.password, database=self.database, ssl=self.use_ssl, verify_ssl=self.verify_ssl_is_on)
def test_write_points_from_dataframe_with_period_index(self): """Test write points from df with period index.""" dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[pd.Period('1970-01-01'), pd.Period('1970-01-02')], columns=["column_one", "column_two", "column_three"]) expected = ( b"foo column_one=\"1\",column_two=1i,column_three=1.0 0\n" b"foo column_one=\"2\",column_two=2i,column_three=2.0 " b"86400000000000\n" ) with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo") self.assertEqual(m.last_request.body, expected)
def test_list_series(self): response = { 'results': [ { 'series': [{ 'columns': ['id'], 'name': 'seriesA', 'values': [[0]], }] }, { 'series': [{ 'columns': ['id'], 'name': 'seriesB', 'values': [[1]], }] }, ] } with _mocked_session('get', 200, response): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') series_list = cli.get_list_series() assert series_list == ['seriesA', 'seriesB']
def write_dataframe(db, dataframe, measurement): """ Writes a given dataframe in a given measurement in a given database :param db: The database name as String :param dataframe: The dataframe to write. :param measurement: The measurement name as String :return: """ DataFrameClient(url, port, user, password, db).write_points( batch_size=batch_size, dataframe=dataframe, protocol=default_protocol, measurement=measurement )
def get_query_result(query_message, measurement_name, influx_db): try: # get result as a dataframe client = DataFrameClient(host='141.142.211.122', port=8086, database=influx_db) # extract the result from dataframe query_result = client.query(query_message)[measurement_name] except KeyError: return False,False # make in to correct form column_name = list(query_result) second_list = [] for index, row in query_result.iterrows(): first_list = [] first_list.append(str(index)) for c in column_name: first_list.append(row[c]) second_list.append(first_list) column_name = list(query_result) out_dicts = [] for first_list in second_list: first_dict = {} count = 0 for i in first_list: if count == 0: first_dict['time'] = i count += 1 else: col_name = column_name[count - 1] first_dict[col_name] = i count += 1 out_dicts.append(first_dict) return out_dicts, column_name
def clean_DHT22_outliers(cfg): client = InfluxDBClient(host=cfg['database']['host'], port=cfg['database']['port'], username=cfg['database']['user'], password=cfg['database']['password'], database=cfg['database']['name']) query = f"select * from {cfg['DHT22']['measurement']}" df = pd.DataFrame(client.query(query, epoch='ns').get_points()) # filter false readings false_readings = df[df['humidity'] > 100].index fixed_measurements = df.iloc[false_readings] # fix humidity readings false_readings = df[df['humidity'] > 100].index s1 = df.iloc[false_readings - 1]['humidity'].reset_index(drop=True) s2 = df.iloc[false_readings + 1]['humidity'].reset_index(drop=True) fixed = pd.concat([s1, s2], axis=1).mean(axis=1) fixed.index = false_readings fixed_measurements['humidity'] = fixed # fix temperature readings s1 = df.iloc[false_readings - 1]['temperature'].reset_index(drop=True) s2 = df.iloc[false_readings + 1]['temperature'].reset_index(drop=True) fixed = pd.concat([s1, s2], axis=1).mean(axis=1) fixed.index = false_readings fixed_measurements['temperature'] = fixed fixed_measurements['time'] = pd.to_datetime(fixed_measurements['time']) fixed_measurements = fixed_measurements.set_index('time') # write fixed values back to database df_client = DataFrameClient(host=cfg['database']['host'], port=cfg['database']['port'], username=cfg['database']['user'], password=cfg['database']['password'], database=cfg['database']['name']) df_client.write_points(fixed_measurements, cfg['DHT22']['measurement'])
def start(self): while not self._terminated: try: self.db_client = DataFrameClient(self.INFLUXDB_HOST, self.INFLUXDB_PORT, self._db_user, self._db_password, self.INFLUXDB_NAME) self.db_client.create_database(self.INFLUXDB_NAME) self._init_OPCUA() while not self._terminated and not self.ticker.wait( self.UPDATE_PERIOD / 1000): try: # check is server node browsename still exists/is valid browse_name = self.opc_client.client.get_server_node( ).get_browse_name() df = self.opc_client.get_last_dataframe() self.update_database(df) except Exception as ex: print(DateHelper.get_local_datetime(), self.__class__.__name__, " lost client connection") print(ex) break except Exception as ex: print(ex) finally: if not self._terminated: print(DateHelper.get_local_datetime(), 'Restart ', self.__class__.__name__) time.sleep(1) self._finalize()
def do_query(): host = 'localhost' port = 8086 user = '******' password = '' dbname = 'mydb' protocol = 'json' client = DataFrameClient(host, port, user, password, dbname) # df = client.query(""" # SELECT mean("Close") AS "mean_Close" FROM "mydb"."autogen"."price" # WHERE time > '2015-01-01T05:48:00.000Z' AND time < '2018-08-06T05:48:00.000Z' # GROUP BY time(1d), stock FILL(previous) # """) df = client.query(""" SELECT mean("Close") AS "mean_Close", max("High") AS "max_High", min("Low") AS "min_Low" FROM "mydb"."autogen"."price" WHERE time > '2015-01-01T05:48:00.000Z' AND time < '2018-08-06T05:48:00.000Z' AND "stock"='0001.HK' GROUP BY time(4w) FILL(previous) """) print(df)
def df_print(): """Form and submit a query, print pretty pandas DataFrame output""" try: client = DataFrameClient(host='127.0.0.1', port=8086, username='******', password='******', database='sensors') query = client.query(__query__) except InfluxDBClientError as ice: print('InfluxDB client error: {}\n'.format(ice)) exit(1) except InfluxDBServerError as ise: print('InfluxDB server error: {}\n'.format(ise)) exit(1) except ConnectionRefusedError as cre: print('Connection refused. Is database accessible?\n{}'.format(cre)) exit(1) except Exception as e: print('An error occurred, ' + 'check your database credentials.\n{}'.format(e)) exit(1) print(query)
class InfluxDBWriter(Connection): def __init__(self, host: str = None, port: int = None, username: str = None, password: str = None): """ A class to write measurements to an InfluxDB time-series database. """ super(InfluxDBWriter, self).__init__() self.client = DataFrameClient(host=host, port=port, username=username, password=password) @catch_external_errors def write_measurement(self, msr: Measurement): self.client.write_points(dataframe=msr.df, measurement=msr.name, tags=msr.tags, time_precision=msr.time_precision, database=msr.database) _logger.info('{measurement} successfully written to database.'.format( measurement=msr.name))
def kernel_holtwinter(host, port, user, password, dbname, query, number_of_prediction): client = DataFrameClient(host, port, user, password, dbname) data = client.query(query) dataframe = data['kernel'] dict = {} list = dataframe.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") dt = datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S") dt64 = np.datetime64(dt) list_final.append(dt64) dataframe2 = pd.DataFrame({'mean': dataframe['mean'].tolist()}, index=list_final) ts = dataframe2['mean'] predict = triple_exponential_smoothing(ts, 24, 0.3, 0.029, 0.3, number_of_prediction) list = predict.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime( datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) dict["mean"] = predict.tolist() dict["time"] = list_final dict["unit"] = "%" return dict
def connect_db(): ''' Connects to Infludb. ''' # returning InfluxDBClient object. try: conn = DataFrameClient(var.db_host, var.db_port, var.db_user, var.db_password, var.db_name) except Exception as err: log("[ERROR] " + str(err), 0) sys.exit(1) return conn
def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'example' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H')) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points({'demo': df}) print("Read DataFrame") client.query("select * from demo") print("Delete database: " + dbname) client.delete_database(dbname)
def main(pair=None, logic=None): df = DataFrameClient(host='localhost', port=8086) database = df.get_list_database() lis = [i for x in database for i in x.values() if i in MARKET_PAIRS] if logic: for x in lis: df.drop_database(x) return 'Finished' elif pair in lis: df.drop_database(pair) return
def read_influxdb_data(host='192.168.123.245', port=8086, dbname='c9377a95-82f3-4af3-ac14-40d14f6d2abe', ChannelName='1Y520210100', time_start='', time_end='', user='******', password='******', keyword=''): client = DataFrameClient(host, port, user, password, dbname) measurements = client.get_list_measurements() if keyword is None: keyword = '' if keyword == '': measurement = [ mea.get(u'name') for mea in measurements if mea.get(u'name').find(ChannelName) >= 0 ] else: measurement = [ mea.get(u'name') for mea in measurements if mea.get(u'name').find(ChannelName) >= 0 and mea.get(u'name').find(keyword) >= 0 ] if len(measurement) == 0: print('No data retrieved.') return None measurement = measurement[-1] time_end = 'now()' if time_end == '' else "'" + time_end + "'" time_start = 'now()' if time_start == '' else "'" + time_start + "'" querystr = 'select * from "{}" where time > {} and time < {}'.format( measurement, time_start, time_end) #print(querystr) df = client.query(querystr).get(measurement) client.close() if df is None: print('InfluxDB no data retrieved.') return None dff = df.groupby('id') columns = [name for name, group in dff] groups = [group['val'] for name, group in dff] #check datatime alginment: all([all(groups[i].index==groups[0].index) for i in range(1,len(groups))]) result = pd.concat(groups, axis=1) result.columns = columns result.index = groups[0].index return measurement, result
def test_datetime_to_epoch(self): timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00') cli = DataFrameClient('host', 8086, 'username', 'password', 'db') self.assertEqual(cli._datetime_to_epoch(timestamp), 1356998400.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='s'), 1356998400.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='m'), 1356998400000.0) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='ms'), 1356998400000.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='u'), 1356998400000000.0)
class INSERTDATA: def __init__(self): host = 'r4-influxdb.ricplt' self.client = DataFrameClient(host, '8086', 'root', 'root') self.dropdb('UEData') self.createdb('UEData') def createdb(self, dbname): print("Create database: " + dbname) self.client.create_database(dbname) self.client.switch_database(dbname) def dropdb(self, dbname): print("DROP database: " + dbname) self.client.drop_database(dbname) def dropmeas(self, measname): print("DROP MEASUREMENT: " + measname) self.client.query('DROP MEASUREMENT ' + measname)
def random_shapeable_order(): # Retrieve random order df = randomorders.random_shapeable_orderbook() # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'sbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def forecast(times: List[str], values: List[float]): df = pandas.DataFrame(index=pandas.DatetimeIndex(times).round('5T'), data={'uncontr': values}) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'uncontr') client.close() # Run optimization optimization() return {"status": "sucess"}
def connectInfluxDatabase(self): try: # prepare database self.logger.debug( f'Connecting to Influx with: Host:{self.influx_host}, Port: {self.influx_port}, User: {self.influx_user}, DB: {self.influx_db}' ) if (self.influx_version == 1): pass self.influxClient = DataFrameClient(self.influx_host, self.influx_port, self.influx_user, self.influx_pwd, self.influx_db) elif (self.influx_version == 2): retries = WritesRetry(total=20, backoff_factor=1, exponential_base=1) self.influxClient = InfluxDBClient( url=f"http://{self.influx_host}:{self.influx_port}", token=self.influx_token, org=self.influx_org, retries=retries, timeout=180_000) self.influx_query_api = self.influxClient.query_api() self.influx_write_api = self.influxClient.write_api( write_options=WriteOptions( batch_size=500, write_type=WriteType.synchronous, flush_interval=10_000, jitter_interval=2_000, retry_interval=30_000, max_retries=25, max_retry_delay=60_000, exponential_base=2)) #self.influx_write_api = self.influxClient.write_api(write_options=SYNCHRONOUS) except Exception as e: self.logger.exception('Crash!', exc_info=e) sys.exit(99)
def loss_rate_to_csv(start_time: int, end_time: int, csv_path: str, _: dict): """Collects loss events and netem stats from InfluxDB, computes loss rates and stores them as csv.""" client = DataFrameClient(database='telegraf') sequence_df = get_sequence_df(client, start_time, end_time) netem_df = get_netem_df(client, start_time, end_time) joined_df = sequence_df.join(netem_df, lsuffix="_sequence", rsuffix="_netem") # Use sequence packet counts to netem sequence to prevent netem oddities joined_df['rate_sequence'] = compute_loss_rate(joined_df, 'losses_sequence', 'packets') joined_df['rate_netem'] = compute_loss_rate(joined_df, 'losses_netem', 'packets') joined_df.to_csv(csv_path)
def random_deferrable_order(): # Retrieve random order TIMESTEP = 12 df = randomorders.random_deferrable_orderbook(timestep=60 / TIMESTEP) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'dbook') client.close() # Run optimization optimization() return {"status": "sucess"}