def test_multiquery_into_dataframe(self): """Test multiquyer into df for TestDataFrameClient object.""" data = { "results": [ { "series": [ { "name": "cpu_load_short", "columns": ["time", "value"], "values": [ ["2015-01-29T21:55:43.702900257Z", 0.55], ["2015-01-29T21:55:43.702900257Z", 23422], ["2015-06-11T20:46:02Z", 0.64] ] } ] }, { "series": [ { "name": "cpu_load_short", "columns": ["time", "count"], "values": [ ["1970-01-01T00:00:00Z", 3] ] } ] } ] } pd1 = pd.DataFrame( [[0.55], [23422.0], [0.64]], columns=['value'], index=pd.to_datetime([ "2015-01-29 21:55:43.702900257+0000", "2015-01-29 21:55:43.702900257+0000", "2015-06-11 20:46:02+0000"])).tz_localize('UTC') pd2 = pd.DataFrame( [[3]], columns=['count'], index=pd.to_datetime(["1970-01-01 00:00:00+00:00"]))\ .tz_localize('UTC') expected = [{'cpu_load_short': pd1}, {'cpu_load_short': pd2}] cli = DataFrameClient('host', 8086, 'username', 'password', 'db') iql = "SELECT value FROM cpu_load_short WHERE region='us-west';"\ "SELECT count(value) FROM cpu_load_short WHERE region='us-west'" with _mocked_session(cli, 'GET', 200, data): result = cli.query(iql) for r, e in zip(result, expected): for k in e: assert_frame_equal(e[k], r[k])
def connect_db(): ''' Connects to Infludb. ''' # returning InfluxDBClient object. try: conn = DataFrameClient(var.db_host, var.db_port, var.db_user, var.db_password, var.db_name) except Exception as err: log("[ERROR] " + str(err), 0) sys.exit(1) return conn
def main(): """Instantiate the connection to the InfluxDB client.""" client = DataFrameClient(host, port, user, password, database) query = """SELECT * from device WHERE {} """.format(where_device_string) result = client.query(query) data_frame = result['device'] data_file = './data/consistency_data.txt' data_frame.to_csv(data_file, sep='|', index=True, index_label='time')
def __init__(self, app_slo, app_config_file, config_file, app_id): with open(config_file) as json_data: self.config = json.load(json_data) with open(app_config_file) as json_data: self.app_metric_config = json.load(json_data) if "metric" in app_slo: self.app_metric_config["metric"] = app_slo["metric"] if "threshold" in app_slo: self.app_metric_config["threshold"] = app_slo["threshold"] self.group_keys = {"intel/docker": "io.kubernetes.pod.name"} self.default_group_key = "nodename" influx_host = config.get("INFLUXDB", "HOST") influx_port = config.get("INFLUXDB", "PORT") influx_user = config.get("INFLUXDB", "USERNAME") influx_password = config.get("INFLUXDB", "PASSWORD") self.influx_client = DataFrameClient( influx_host, influx_port, influx_user, influx_password, config.get("INFLUXDB", "RAW_DB_NAME")) self.app_influx_client = DataFrameClient( influx_host, influx_port, influx_user, influx_password, config.get("INFLUXDB", "APP_DB_NAME")) self.deployment_id = '' derived_db = config.get("INFLUXDB", "DERIVED_METRIC_DB_NAME") self.derived_influx_client = DataFrameClient(influx_host, influx_port, influx_user, influx_password, derived_db) self.derived_influx_client.create_database(derived_db) self.derived_influx_client.create_retention_policy( 'derived_metric_policy', '5w', 1, default=True) self.logger = logging.getLogger(app_id)
def main(host, port, username, password, database, ssl): parser = CHMIParser() client = DataFrameClient(host=host, port=port, username=username, password=password, database=database, ssl=ssl, verify_ssl=ssl) df = parser.parse() client.write_points(dataframe=df.dropna(), database=database, measurement='soil_temp', tag_columns=['Stanice'], protocol='line')
def test_create_retention_policy(self): """Test create retention policy for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') example_response = '{"results":[{}]}' with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/query", text=example_response) cli.create_retention_policy('somename', '1d', 4, database='db') self.assertEqual( m.last_request.qs['q'][0], 'create retention policy "somename" on ' '"db" duration 1d replication 4 shard duration 0s')
def __make_client(self): ''' This function is not necessary for the user. Setup client both InfluxDBClient and DataFrameClient DataFrameClient is for queries and InfluxDBClient is for writes Not needed by user ''' self.client = InfluxDBClient(host=self.host, port=self.port, username=self.username, password=self.password, database=self.database,ssl=self.use_ssl, verify_ssl=self.verify_ssl_is_on) self.df_client = DataFrameClient(host=self.host, port=self.port, username=self.username, password=self.password, database=self.database,ssl=self.use_ssl, verify_ssl=self.verify_ssl_is_on)
def load(): df = pd.read_csv('GHI_DHI_Temp_Wind_20130101_english_units.csv', skiprows=1) df.index = pd.to_datetime(df['DATE (MM/DD/YYYY)'] + ' ' + df['MST'], format='%m/%d/%Y %H:%M') df.columns = [ u'DATE (MM/DD/YYYY)', u'MST', u'AtmosphericAnalogKind_irradanceGlobalHorizontal', u'AtmosphericAnalogKind_irradanceDirectNormal', u'AtmosphericAnalogKind_irradanceDiffuseHorizontal', u'AtmosphericAnalogKind_ambientTemperature', u'AtmosphericAnalogKind_humidity', u'AtmosphericAnalogKind_speed', u'AtmosphericAnalogKind_bearing' ] dbname = 'proven' protocol = 'json' client = DataFrameClient(host='localhost', port=8086) # print("Delete database: " + dbname) # client.drop_database(dbname) print("Create pandas DataFrame") print("Create database: " + dbname) # client.drop_database(dbname) client.create_database(dbname) dbs = client.get_list_database() print(dbs) client.switch_database(dbname) # print("Write DataFrame") client.write_points(df.loc['2013-7-1':'2013-7-31'], 'weather', protocol=protocol) client.write_points(df.loc['2013-8-1':'2013-8-31'], 'weather', protocol=protocol) client.write_points(df.loc['2013-9-1':'2013-9-30'], 'weather', protocol=protocol) print("Write DataFrame with Tags") # client.write_points(df, 'demo', # {'k1': 'v1', 'k2': 'v2'}, protocol=protocol) print("Read DataFrame")
def test_query_into_dataframe(self): """Test query into df for TestDataFrameClient object.""" data = { "results": [{ "series": [{ "measurement": "network", "tags": { "direction": "" }, "columns": ["time", "value"], "values": [["2009-11-10T23:00:00Z", 23422]] }, { "measurement": "network", "tags": { "direction": "in" }, "columns": ["time", "value"], "values": [["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422]] }] }] } pd1 = pd.DataFrame([[23422]], columns=['value'], index=pd.to_datetime(["2009-11-10T23:00:00Z"])) if pd1.index.tzinfo is None: pd1.index = pd1.index.tz_localize('UTC') pd2 = pd.DataFrame([[23422], [23422], [23422]], columns=['value'], index=pd.to_datetime([ "2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z" ])) if pd2.index.tzinfo is None: pd2.index = pd2.index.tz_localize('UTC') expected = { ('network', (('direction', ''), )): pd1, ('network', (('direction', 'in'), )): pd2 } cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'GET', 200, data): result = cli.query('select value from network group by direction;') for k in expected: assert_frame_equal(expected[k], result[k])
def test_drop_retention_policy(self): """Test drop retention policy for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') example_response = '{"results":[{}]}' with requests_mock.Mocker() as m: m.register_uri( requests_mock.POST, "http://localhost:8086/query", text=example_response ) cli.drop_retention_policy('somename', 'db') self.assertEqual( m.last_request.qs['q'][0], 'drop retention policy "somename" on "db"' )
def remove_deferrable_order(t: str): # Create fake order with 0 data = {'duration': [1], 'profile_kw': [[0.0]]} # minus 2 hours is a work around #@?! timezone df = pandas.DataFrame( index=[datetime.strptime(t, '%Y-%m-%d %H:%M:%S') - timedelta(hours=2)], data=data) # Open connection and write to DB client = DataFrameClient(host, port, user, password, dbname) client.write_points(df, 'dbook') client.close() # Run optimization optimization() return {"status": "sucess"}
def test_get_list_database(self): data = {'results': [ {'series': [ {'measurement': 'databases', 'values': [ ['new_db_1'], ['new_db_2']], 'columns': ['name']}]} ]} cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'get', 200, json.dumps(data)): self.assertListEqual( cli.get_list_database(), [{'name': 'new_db_1'}, {'name': 'new_db_2'}] )
def fault_tagging(vnf_num): #Tagging based on Packet Processing Time #Not Use with open('../server_info.yaml') as f: server_info = yaml.load(f)['InDB'] user, password, host = server_info['id'], server_info['pwd'], server_info[ 'ip'] client = DataFrameClient(host, 8086, user, password, 'pptmon') ppt = client.query('select * from "%d"' % vnf_num) ppt = list(ppt.values())[0].tz_convert('Asia/Seoul') ppt.index = ppt.index.map(lambda x: x.replace(microsecond=0, second=0)) ppt.reset_index(inplace=True) ppt.rename(columns={'index': 'time'}, inplace=True) fault = ppt[ppt['value'] > 10000][['time']].values.tolist() fault = [x[0].strftime("%m-%d %H:%M") for x in fault] return fault
def setUp(self): self._client = InfluxDBClient(host='localhost', port=8086, username='******', password='******', database='test_cache') self._client.drop_database('test_cache') self._client.create_database('test_cache') self._client.switch_database('test_cache') self._df_client = DataFrameClient(host='localhost', port=8086, username='******', password='******', database='test_cache')
def get_tide_data(): with open("last_data_retrieved.txt") as f: previous_date_time = f.readlines() url = "https://tidesandcurrents.noaa.gov/api/datagetter?date=latest&station=8638610&product=water_level&datum=NAVD&units=english&time_zone=lst_ldt&format=json" response = requests.get(url) result = json.loads(response.text) station_id = str(result['metadata']['id']) station_name = str(result['metadata']['name']) lat = str(result['metadata']['lat']) lon = str(result['metadata']['lon']) value = float(result['data'][0]['v']) date_time_str = str(result['data'][0]['t']) date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M') current_date_time = str(date_time) if current_date_time not in previous_date_time: cols = ['station_id','station_name','latitude','longitude','date_time','value'] line = pd.DataFrame({'station_id': station_id,'station_name': station_name,'latitude': lat, 'longitude': lon,'date_time': date_time,'value': value}, columns=cols, index=[0]) line['date_time'] = pd.to_datetime(line['date_time']) line = line.set_index('date_time') # print line time_values = line[['value']] # print time_values tags = {'station_id': line[['station_id']], 'station_name': line[['station_name']],'latitude': line[['latitude']], 'longitude': line[['longitude']]} client = DataFrameClient(host, port, user, password, dbname) # Write DataFrame with Tags client.write_points(line, dbname, protocol=protocol) # , tags client.close() f = open('last_data_retrieved.txt', 'w') f.write(current_date_time + '\n') f.close() print "New data has been added to the database!" else: print "No new data available"
def get_tide_data(): with open("last_data_retrieved.txt") as f: previous_date_time = f.readlines() url = "https://gentle-falls-78142.herokuapp.com/" response = requests.get(url) result = json.loads(response.text) sensor_id = str(result['metadata']['id']) sensor_name = str(result['metadata']['name']) lat = str(result['metadata']['lat']) lon = str(result['metadata']['lon']) flood = str(result['data'][0]['f']) value = float(result['data'][0]['v']) date_time_str = str(result['data'][0]['t']) date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M') current_date_time = str(date_time) if current_date_time not in previous_date_time: cols = ['sensor_id','sensor_name','latitude','longitude', 'flood', 'date_time','value'] line = pd.DataFrame({'sensor_id': sensor_id,'sensor_name': sensor_name,'latitude': lat, 'longitude': lon,'flood': flood,'date_time': date_time,'value': value}, columns=cols, index=[0]) line['date_time'] = pd.to_datetime(line['date_time']) line = line.set_index('date_time') # print line time_values = line[['value']] print(time_values) tags = {'sensor_id': line[['sensor_id']], 'sensor_name': line[['sensor_name']],'latitude': line[['latitude']], 'longitude': line[['longitude']], 'flood': line[['flood']]} client = DataFrameClient(host, port, user, password, dbname) # Write DataFrame with Tags client.write_points(line, dbname, protocol=protocol) # , tags client.close() f = open('last_data_retrieved.txt', 'w') f.write(current_date_time) # + '\n') f.close() print "New data has been added to the database!" else: print "No new data available"
def get_flow_matrix(influxdb_host, influxdb_port): client = DataFrameClient(influxdb_host, influxdb_port, "", "", "telegraf") apps = [ app["value"] for app in list( client.query( 'SHOW TAG VALUES ON "telegraf" WITH KEY="host_app_src"'))[0] if 'salt' not in app["value"] ] matrix = pd.DataFrame.from_items(items=[ (a, [matrix_value(query_template, client, a, b) for b in apps]) for a in apps ], columns=apps, orient="index") return matrix
def test_get_list_retention_policies(self): """Test get retention policies for TestInfluxDBClient object.""" cli = DataFrameClient(database='db') example_response = \ '{"results": [{"series": [{"values": [["fsfdsdf", "24h0m0s", 2]],'\ ' "columns": ["name", "duration", "replicaN"]}]}]}' with requests_mock.Mocker() as m: m.register_uri(requests_mock.GET, "http://localhost:8086/query", text=example_response) self.assertListEqual(cli.get_list_retention_policies("db"), [{ 'duration': '24h0m0s', 'name': 'fsfdsdf', 'replicaN': 2 }])
def _initialize_engine(self, db_engine=None, db_host=None, db_name=None, db_username=None, db_password=None): """Initialize DB engine.""" # Parse engine = db_engine if db_engine is not None else self.db_defaults.engine username = db_username if db_username is not None else self.db_defaults.username password = db_password if db_password is not None else self.db_defaults.password host = db_host if db_host is not None else self.db_defaults.host database = db_name if db_name is not None else self.db_defaults.database # Substitute self._config.current_db = { 'engine': engine, 'host': host, 'username': username, 'password': password, 'database': database, } # Connect if engine in ['influxdb']: if not _extra_supports['influxdb']: raise ImportError('Module `influxdb` cannot be not imported') hostname = host hostport = 8086 if ':' in host: hostname, hostport = host.split(':') self._engine = DataFrameClient(hostname, hostport, username, password, database) else: engine = 'postgresql' if engine == 'timescaledb' \ else 'mysql' if engine == 'mariadb' \ else engine username_and_password = '' \ if all([username == '', password == '']) \ else '{0}:{1}@'.format(username, password) self._engine = sqlalchemy.create_engine('{0}://{1}{2}{3}'.format( engine, username_and_password if engine != 'sqlite' else '', '/{0}'.format(host) if engine == 'sqlite' else host, '' if engine == 'sqlite' else '/' + database), echo=False) self._session = scoped_session(sessionmaker(bind=self._engine))
def analyseWarn(name, qname, topk=5): client = DataFrameClient(host='127.0.0.1', port=8086, username='******', password='******', database='testdb') query_positive = 'select * from ganglia where ' + qname + ' >0 ORDER BY time DESC limit 10' #默认10个,可以修改 query_negative = 'select * from ganglia where ' + qname + ' <0 ORDER BY time DESC limit 5' #默认要和warnfactor一致 data_p = client.query(query_positive, chunked=False) data_positive = data_p['ganglia'] normalSample = data_positive[name] #取出正常样本 data_n = client.query(query_negative, chunked=False) data_negative = data_n['ganglia'] anamolySample = data_negative[name] #取出异常样本 return analyseReasonWithTreeBaesd(anamolySample, normalSample, name)
def test_query_into_dataframe(self): data = [{ "name": "foo", "columns": ["time", "sequence_number", "column_one"], "points": [[3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1]] }] # dataframe sorted ascending by time first, then sequence_number dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]], index=pd.to_datetime([0, 0, 3600, 3600], unit='s', utc=True), columns=['sequence_number', 'column_one']) with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert_frame_equal(dataframe, result)
def write_influxdb(host, port, user, password, dbname, protocol, filename): client = DataFrameClient(host, port, user, password, dbname) data = pd.read_json(filename) data['timestamp'] = pd.to_datetime(data['timestamp']) data = data.set_index('timestamp') # usar tag_columns=None para tags # print('#######################################') # print('printeamos data a impactar en influx: ' + str(data)) print("Create database: " + dbname) if client.create_database(dbname): print('database created succesfully!') print("Write DataFrame") if client.write_points(data, 'transporte', protocol=protocol): print('data saved succesfully!')
def getHostPageCount(host, port, user, password, dbname): client = DataFrameClient(host, port, user, password, dbname) query2 = "show tag values from cpu with key = host" data2 = client.query(query2) dataframe2 = data2['cpu'] count = 0 for x in dataframe2: count = count + 1 page_count = 0 if ((count % 5) == 0): page_count = count / 5 else: page_count = (count / 5) + 1 return page_count
def data(): client = DataFrameClient(host, port, user, password, dbname) data = client.query( "select * from cpu where cpu = 'cpu-total' AND host='etlnode1' limit 10" ) dataframe = data['cpu'] dict = {} dict["usage_idle"] = json.loads( dataframe['usage_idle'].to_json(orient='values')) dict["usage_user"] = json.loads( dataframe['usage_user'].to_json(orient='values')) dict["usage_softirq"] = json.loads( dataframe['usage_softirq'].to_json(orient='values')) dict["usage_system"] = json.loads( dataframe['usage_system'].to_json(orient='values')) dict["index"] = dataframe.index.tolist() return jsonify(dict), 200, {'Access-Control-Allow-Origin': '*'}
def connectInfluxDatabase(self): try: # prepare database self.logger.debug( f'Connecting to Influx with: Host:{self.influx_host}, Port: {self.influx_port}, User: {self.influx_user}, DB: {self.influx_db}' ) if (self.influx_version == 1): pass self.influxClient = DataFrameClient(self.influx_host, self.influx_port, self.influx_user, self.influx_pwd, self.influx_db) elif (self.influx_version == 2): retries = WritesRetry(total=20, backoff_factor=1, exponential_base=1) self.influxClient = InfluxDBClient( url=f"http://{self.influx_host}:{self.influx_port}", token=self.influx_token, org=self.influx_org, retries=retries, timeout=180_000) self.influx_query_api = self.influxClient.query_api() self.influx_write_api = self.influxClient.write_api( write_options=WriteOptions( batch_size=500, write_type=WriteType.synchronous, flush_interval=10_000, jitter_interval=2_000, retry_interval=30_000, max_retries=25, max_retry_delay=60_000, exponential_base=2)) #self.influx_write_api = self.influxClient.write_api(write_options=SYNCHRONOUS) except Exception as e: self.logger.exception('Crash!', exc_info=e) sys.exit(99)
def import_csv(stock): # open the socket # resp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # resp_socket.connect(socket.getaddrinfo('localhost', 8282)[0][-1]) host = 'localhost' port = 8086 user = '******' password = '' dbname = 'mydb' protocol = 'json' client = DataFrameClient(host, port, user, password, dbname) df = pd.DataFrame(columns=['Open', 'High', 'Low', 'Close', 'Volume']) # prepare the file with open('{}'.format('data/{}.csv'.format(stock)), 'r') as csv_file: reader = csv.DictReader(csv_file, delimiter=',') for row in reader: quote_date = moment.date(row['Date'], '%Y-%m-%d') open_price = row['Open'] high_price = row['High'] low_price = row['Low'] close_price = row['Close'] adj_close = row['Adj Close'] volume = row['Volume'] if not open_price.replace('.', '', 1).isdigit(): continue current_series = pd.Series( { 'Open': float(open_price), 'High': float(high_price), 'Low': float(low_price), 'Close': float(close_price), 'Volume': int(volume), }, name=quote_date.date) df = df.append(current_series) print(df) client.write_points(df, 'price', {'stock': stock})
def test_write_points_from_dataframe_with_numeric_column_names(self): now = pd.Timestamp('1970-01-01 00:00+00:00') # df with numeric column names dataframe = pd.DataFrame(data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)]) expected = (b'foo,hello=there 0=\"1\",1=1i,2=1.0 0\n' b'foo,hello=there 0=\"2\",1=2i,2=2.0 3600000000000\n') with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, "foo", {"hello": "there"}) self.assertEqual(m.last_request.body, expected)
def test_datetime_to_epoch(self): timestamp = pd.Timestamp('2013-01-01 00:00:00.000+00:00') cli = DataFrameClient('host', 8086, 'username', 'password', 'db') self.assertEqual(cli._datetime_to_epoch(timestamp), 1356998400.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='h'), 1356998400.0 / 3600) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='m'), 1356998400.0 / 60) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='s'), 1356998400.0) self.assertEqual( cli._datetime_to_epoch(timestamp, time_precision='ms'), 1356998400000.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='u'), 1356998400000000.0) self.assertEqual(cli._datetime_to_epoch(timestamp, time_precision='n'), 1356998400000000000.0)
def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'Stock Data' protocol = 'line' client = DataFrameClient(host, port, user, password, dbname) print('passed DataFrameClient creation') start = datetime.datetime(2015, 1, 1) end = datetime.datetime(2016, 1, 1) df = pdr.DataReader('F', 'iex', start, end) df.index = pd.DatetimeIndex(pd.to_datetime(list(df.index))) print(type(df.index)) print('Trying to create database') client.create_database(dbname) client.write_points(df, 'Stock', protocol=protocol) client.query("select * from stock") client.drop_database(dbname) print('Finished')
def test_dataframe_write_points_with_whitespace_in_column_names(self): """write_points should escape white space in column names.""" now = pd.Timestamp('1970-01-01 00:00+00:00') dataframe = pd.DataFrame( data=[["1", 1, 1.0], ["2", 2, 2.0]], index=[now, now + timedelta(hours=1)], columns=["column one", "column two", "column three"]) expected = ( b"foo column\\ one=\"1\",column\\ two=1i,column\\ three=1.0 0\n" b"foo column\\ one=\"2\",column\\ two=2i,column\\ three=2.0 " b"3600000000000\n") with requests_mock.Mocker() as m: m.register_uri(requests_mock.POST, "http://localhost:8086/write", status_code=204) cli = DataFrameClient(database='db') cli.write_points(dataframe, 'foo') self.assertEqual(m.last_request.body, expected)