def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'example' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H')) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points(df, 'demo') print("Write DataFrame with Tags") client.write_points(df, 'demo', {'k1': 'v1', 'k2': 'v2'}) print("Read DataFrame") client.query("select * from demo") print("Delete database: " + dbname) client.delete_database(dbname)
def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'example' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H')) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points(df, 'demo') print("Write DataFrame with Tags") client.write_points(df, 'demo', {'k1': 'v1', 'k2': 'v2'}) print("Read DataFrame") client.query("select * from demo") print("Delete database: " + dbname) client.drop_database(dbname)
def get_df_client(config: dict): """Creates a pandas dataframe client based on the passed config dictionary. :param config: (mandatory, dict) the loaded configuration. :return: InfluxDBClient """ cfg_db = config['influxdb'] # check for optional port if 'port' in cfg_db.keys(): port = cfg_db['port'] else: port = 8086 # create influx db client dbclient = DataFrameClient(host=cfg_db['host'], port=port, username=cfg_db['user'], password=cfg_db['password']) # make sure the data base exists (if database exists a new will not be created) dbclient.query(f"CREATE DATABASE {cfg_db['database']}") # select the wanted database dbclient.switch_database(cfg_db['database']) return dbclient
def get_data_full(influxdb_host, influxdb_port, formatter=sizeof_fmt): client = DataFrameClient(influxdb_host, influxdb_port, "", "", "telegraf") apps = [ app["value"] for app in list( client.query( 'SHOW TAG VALUES ON "telegraf" WITH KEY="host_app_dst_port"')) [0] if 'salt' not in app["value"] ] apps2 = [ app["value"] for app in list( client.query( 'SHOW TAG VALUES ON "telegraf" WITH KEY="host_app_src"'))[0] if 'salt' not in app["value"] ] matrix = pd.DataFrame.from_items(items=[ (a, [matrix_value(query_template, client, a, b) for b in apps2]) for a in apps ], columns=apps2, orient="index") svg = get_svg(matrix) matrix.applymap(formatter) return matrix, svg
def main(host='localhost', port=8086): """Instantiate the connection to the InfluxDB client.""" user = '******' password = '******' dbname = 'demo' # Temporarily avoid line protocol time conversion issues #412, #426, #431. protocol = 'json' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H')) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points(df, 'demo', protocol=protocol) print("Write DataFrame with Tags") client.write_points(df, 'demo', {'k1': 'v1', 'k2': 'v2'}, protocol=protocol) print("Read DataFrame") client.query("select * from demo") print("Delete database: " + dbname) client.drop_database(dbname)
class CInflux: def __init__(self, dbinfo, dbname): self.dbname = dbname self.l2_dbname = "%s_l2" % self.dbname self.client = InfluxDBClient(dbinfo['host'], dbinfo['port'], dbinfo['user'], dbinfo['password'], self.l2_dbname) self.df_client = DataFrameClient(dbinfo['host'], dbinfo['port'], dbinfo['user'], dbinfo['password'], self.dbname) def get(self): return self.df_client.query("select * from %s" % self.dbname) def get_newset_row(self): return self.df_client.query("select last(*) from %s" % self.dbname) def set(self, df): return self.df_client.write_points(df, self.dbname, protocol='json') def create(self): self.df_client.create_database(self.dbname) def delete(self): self.df_client.drop_database(self.dbname)
def example_read_iroc_data(): client = DataFrameClient(host, port, user, password, iroc_db) get_well_list = 'SHOW TAG VALUES WITH KEY = "well"' res = client.query(get_well_list) # fifth element in list, fifth well, you can also iterate over this with: for element in res['iroc_test']: all_wells_as_list = list(res[iroc_db]) fifth_well_result = all_wells_as_list[4] wellname = fifth_well_result['value'] print("Print fifth wellname in list") print(wellname) # Find all tags for a given well (wellname) get_tags_for_well = 'SHOW TAG VALUES WITH KEY = "tag" WHERE well =~ /{}/'.format(wellname) tag_result = client.query(get_tags_for_well) all_tags_as_list = list(tag_result[iroc_db]) first_tag = all_tags_as_list[0]['value'] print("Number of tags (sensors) for well {} is {}".format(wellname, len(all_tags_as_list))) # Now, give me the dataframe for this tag. In this case, the tag is unique, but if not, # you could combine several tags (well, sensor, facility...) # In the query, you can select mean, or omit it. If you omit, you'll get raw values. If you use mean() you also need # a group by time(), to tell the interval. # This query fetch all data, but you can also use "AND time >= from_time" type syntax (see grafana query inspector) data_query = 'SELECT mean("value") as "{}" FROM {} WHERE ("tag" =~ /^{}$/) GROUP BY time(10m) fill(previous)'.\ format(first_tag, iroc_db, first_tag) data_result = client.query(data_query) dataframe = list(data_result.values())[0] print(dataframe.describe()) print(dataframe.head())
def main(host='qwerty.com.ar', port=8086): """Instantiate the connection to the InfluxDB client.""" user = '******' password = '' dbname = 'pandas' protocol = 'line' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H'), columns=['0']) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points(df, 'demo', protocol=protocol) print("Write DataFrame with Tags") client.write_points(df, 'demo', { 'k1': 'v1', 'k2': 'v2' }, protocol=protocol) print("Read DataFrame") client.query("select * from demo")
def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'demo' # Temporarily used to avoid line protocol time conversion issues #412, #426, #431. protocol = 'json' client = DataFrameClient(host, port, user, password, dbname) print("Create pandas DataFrame") df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H')) print("Create database: " + dbname) client.create_database(dbname) print("Write DataFrame") client.write_points(df, 'demo', protocol=protocol) print("Write DataFrame with Tags") client.write_points(df, 'demo', { 'k1': 'v1', 'k2': 'v2' }, protocol=protocol) print("Read DataFrame") client.query("select * from demo") print("Delete database: " + dbname) client.drop_database(dbname)
class DataSource(object): def __init__(self, host="localhost", port='8086', username="******", password="******", db_name=None, measurement=None): self.host = host self.port = port self.username = username self.password = password self.db_name = db_name self.measurement = measurement if self._check_influxdb_connected(host, port, username, password, db_name): self.client_api = DataFrameClient(host, port, username, password, db_name) def _check_influxdb_connected(self, host, port, username, password, db_name): client = DataFrameClient(host, port, username, password, db_name) result = True try: client.get_list_database() print "Connect to database server" except: result = False print "Cannot connect. Please check configuration server" return result def check_connected(self): return self._check_influxdb_connected(self.host, self.port, self.username, self.password, self.db_name) def _return(self): return self.host, self.port, self.username, self.password, self.db_name, self.measurement def query_analyzed(self, number_of_days=30): print "Get latest time series points" query_set = 'select value from %s order by time desc limit %s ;' % ( self.measurement, number_of_days * 24 * 60) result = self.client_api.query(query_set)[self.measurement] return result def query_all(self): print "Get latest time series points" query_set = 'select value from %s limit 10000;' % (self.measurement) result = self.client_api.query(query_set)[self.measurement] return result def update_db(self, data): result = False try: result = self.client_api.write_points(data, self.measurement) except Exception as e: print e.message return result
def influx_to_csv(start_time: int, end_time: int, csv_path: str, _: dict): """Collects loss events and netem stats from InfluxDB, computes loss rates and stores them as csv.""" client = DataFrameClient(database='telegraf') sequence_df = client.query( f'select "packets", "bursty", "mode", "median" ' f'from "telegraf"."autogen"."httpjson_knownsequence" ' f'where time > {start_time} and time < {end_time};')['httpjson_knownsequence'] domain = "client-domain-uplink" netem_df = client.query( f'select "{domain}.packet.dropped" AS "losses" ' f'from "telegraf"."autogen"."httpjson_netem" ' f'where time > {start_time} and time < {end_time};')['httpjson_netem'] # bucketize data first_packets_value = sequence_df['packets'][0] sequence_df['packets'] = sequence_df['packets'].diff() sequence_df['packets'].iloc[0] = first_packets_value diff_df = netem_df.diff() diff_df[diff_df < 0] = netem_df[diff_df < 0] # netemd values are regularly reset netem_df = diff_df def compute_loss_rate(df, losses_name, packets_name): return (df[losses_name] / (df[losses_name] + df[packets_name])).fillna(0) joined_df = sequence_df.join(netem_df) # Use sequence packet counts to netem sequence to prevent netem oddities joined_df['loss_rate'] = compute_loss_rate(joined_df, 'losses', 'packets') joined_df.to_csv(csv_path)
def main(host='localhost', port=8086): user = '******' password = '******' dbname = 'demo' protocol = 'line' client = DataFrameClient(host, port, user, password, dbname) print('Create pandas DataFrame') df = pd.DataFrame(data=list(range(30)), index=pd.date_range(start='2014-11-16', periods=30, freq='H'), columns=['0']) print('Create database: ' + dbname) client.create_database(dbname) print('Write DataFrame') client.write_points(df, 'demo', protocol=protocol) print('Write DataFrame with Tags') client.write_points(df, 'demo', { 'k1': 'v1', 'k2': 'v2' }, protocol=protocol) print('Read DataFrame') client.query('select * from demo') print('Delete database: ' + dbname) client.drop_database(dbname)
class CInflux: def __init__(self, dbinfo, dbname, iredis=create_redis_obj()): self.redis = iredis self.dbname = dbname self.df_client = DataFrameClient(dbinfo['host'], dbinfo['port'], dbinfo['user'], dbinfo['password'], self.dbname, timeout=10) def __del__(self): self.redis = None self.df_client = None def get_all_databases(self): if self.redis.exists(ALL_IN_DATABASES): return set( str(dbname, encoding="utf8") for dbname in self.redis.smembers(ALL_IN_DATABASES)) else: all_dbs = self._get_all_databses() for _db in all_dbs: self.redis.sadd(ALL_IN_DATABASES, _db) return all_dbs def _get_all_databses(self): return [x['name'] for x in self.df_client.get_list_database()] def get(self, dbname=None): if dbname is None: dbname = self.dbname return self.df_client.query("select * from %s" % dbname) def get_newset_row(self, dbname=None): if dbname is None: dbname = self.dbname return self.df_client.query("select last(*) from %s" % dbname) def set(self, df, dbname=None): dbname = dbname if dbname is not None else self.dbname try: self.df_client.write_points(df, dbname, protocol='json') return True except InfluxDBServerError as e: logger.error(e) return False def create(self, dbname=None): if dbname is None: dbname = self.dbname if dbname in self.get_all_databases(): return True self.df_client.create_database(dbname) self.redis.sadd(ALL_IN_DATABASES, dbname) return True def delete(self, dbname=None): if dbname is None: dbname = self.dbname if dbname not in self.get_all_databases(): return True self.df_client.drop_database(dbname) self.redis.srem(ALL_IN_DATABASES, dbname) return True
def test_03_influx_wr(self): self.system_con() influxdb_host = 'localhost' influxdb_port = 8086 influxdb_id = 'krmim' influxdb_pwd = 'krmin_2017' influxdb_db = 'facility' influxdb_client = self.mqtt_agent.get_influxdb_mgr() influxdb_Dfclient = DataFrameClient('localhost', 8086, 'krmim', 'krmim_2017', 'facility') if influxdb_client == None: print('influxdb configuration fail') packet = self.make_packet(facility_id='TS0001', sensor_code='0001', pv=330) _, _, modbus_udp = self.async_svr.convert_hex2decimal( packet, 'localhost', 6379) fac_daq = get_fac_inf(self.redis_con) redis_fac_info = json.loads(self.redis_con.get('facilities_info')) equipment_id = modbus_udp['equipment_id'] if equipment_id in redis_fac_info.keys(): fac_msg = config_fac_msg(equipment_id, fac_daq, modbus_udp, redis_fac_info) fields = {} facility_msg_json = json.loads(fac_msg) me_timestamp = time.time() del facility_msg_json[equipment_id]['ms_time'] del facility_msg_json[equipment_id]['pub_time'] for key in facility_msg_json[equipment_id].keys(): print(key) fields[key] = float(facility_msg_json[equipment_id][key]) # fields['me_time'] = me_timestamp influx_json = [{'measurement': equipment_id, 'fields': fields}] try: influxdb_Dfclient.query('DROP SERIES FROM TS0001') if influxdb_client.write_points(influx_json) is True: print('influx write success:' + str(influx_json)) else: print('influx write faile:' + str(influx_json)) except Exception as e: print(str(e.args)) influxdb_client.close() ts5_no_coefficient = influxdb_Dfclient.query( 'SELECT * FROM TS0001 where time >= now() - 15m') ts5_no_coefficient = ts5_no_coefficient['TS0001'] rows_size = ts5_no_coefficient.shape[0] row_json = ts5_no_coefficient.to_dict(orient='records')[0] cmp = influx_json[0]['fields'] == row_json print(cmp) print(influx_json[0]['fields']) print(row_json) self.assertEqual(rows_size, 1) influxdb_Dfclient.close()
def get_rp_list(backend_host, backend_port, user, password, schema_list=[]): # influx_client = InfluxDBClient(backend_host, backend_port, user, password) pd_influx_client = DataFrameClient(backend_host, backend_port, user, password) if not schema_list: schema_list_raw = pd_influx_client.query('SHOW DATABASES') schema_list = [ e['name'] for e in list(schema_list_raw.get_points(measurement='databases')) ] cq_list_raw = pd_influx_client.query('SHOW CONTINUOUS QUERIES') rp_dict = {} for schema in schema_list: result_df_dict = pd_influx_client.query( 'SHOW RETENTION POLICIES ON "' + schema + '"') rp_list = list(result_df_dict.get_points(measurement='results')) cq_list = list(cq_list_raw.get_points(measurement=schema)) # enrich RPs whith intervals gotten from CQs cq_into_rp_set = set() for cq in cq_list: parsed_cq = sqlparse_query(cq['query']) from_m = parse_measurement_path(schema, get_cq_from(parsed_cq)) into = parse_measurement_path(schema, get_cq_into(parsed_cq)) if into['measurement'] != ':MEASUREMENT' \ and into['measurement'] != from_m['measurement']: # NB: if insertion in another measurement, skip continue into_rp = into['rp'] cq_into_rp_set.add(from_m['rp']) cq_into_rp_set.add(into_rp) rp_conf_raw = next([rp, i] for i, rp in enumerate(rp_list) if rp['name'] == into_rp) if not rp_conf_raw: continue rp_conf, rp_conf_i = rp_conf_raw if 'interval' in rp_conf: # NB: we assumme all CQ for all measurements with a same INTO RP use the same GROUP BY time interval # this is a strong and limitative assumption continue rp_list[rp_conf_i]['interval'] = get_cq_interval(parsed_cq) # remove RPs from rp_dict that don't match a CQ active_rp_list = [] for i, rp in enumerate(rp_list): if rp['name'] in cq_into_rp_set: active_rp_list.append(rp) if active_rp_list: rp_dict[schema] = active_rp_list return rp_dict
def analyseWarn(name,qname,topk=6): client = DataFrameClient(host='127.0.0.1', port=8086, username='******', password='******', database='testdb') query_positive = 'select * from simulate where '+qname+' >0 ORDER BY time DESC limit 10' query_negative = 'select * from simulate where '+qname+' <0 ORDER BY time DESC limit 5' data_p = client.query(query_positive, chunked=False) data_positive = data_p['simulate'] normalSample = data_positive[name] data_n = client.query(query_negative, chunked=False) data_negative = data_n['simulate'] anamolySample = data_negative[name] return analyseReasonWithXsqure(anamolySample, normalSample, topk, name)
def main(host='localhost', port=8086): logging.basicConfig(level=logging.INFO) """Instantiate the connection to the InfluxDB client.""" user = '******' password = '******' dbname = 'demo' protocol = 'json' client = DataFrameClient(host, port, user, password, dbname) logging.info("Create pandas DataFrame") today = datetime.datetime.today() date_list = get_week_list_by_date(today) df = pd.DataFrame(data=list(range(len(date_list))), index=date_list, columns=['W']) logging.info("Create database: " + dbname) client.create_database(dbname) logging.info("Write DataFrame to dsm_power table") client.write_points(df.copy(), 'dsm_power', tags={'device_id': 'II8583-Z5EKI-N9700'}, protocol=protocol) logging.info("Write DataFrame to electric_power table") client.write_points(df.copy(), 'electric_power', tags={'device_id': 'II8583-H9871-78D4F'}, protocol=protocol) logging.info("origin dataframe: {}".format(df)) logging.info("Read DataFrame from dsm_power table") fetch_df = client.query("select * from dsm_power")['dsm_power'] fetch_df.index = fetch_df.index.tz_localize(None) logging.info("fetch: {}".format(fetch_df)) logging.info("Read DataFrame from electric_power table") fetch_df = client.query("select * from electric_power")['electric_power'] fetch_df.index = fetch_df.index.tz_localize(None) logging.info("fetch: {}".format(fetch_df)) logging.info("get data by specfic time range") start_date = "2018-06-04" end_date = "2018-06-06" fetch_df = client.query("select * from dsm_power where time > '" + start_date + "' and time < '" + end_date + "'")['dsm_power'] fetch_df.index = fetch_df.index.tz_localize(None) logging.info("fetch: {}".format(fetch_df)) logging.info("Delete database: " + dbname) client.drop_database(dbname)
def get_alt_data(self, user, password, schema, queries, precision): if not user and not password \ and self.backend_user and self.backend_password: user = self.backend_user password = self.backend_password got_alt_data = False alt_data_list = [] for query_string in queries: logging.debug("Checking {}".format(query_string)) query_sanitized = urllib.parse.unquote(query_string)#.decode('string_escape') alt_data = self.guard.get_data(user, password, schema, query_sanitized) if alt_data is not None: # logging.debug("Got alternative data for query") got_alt_data = True alt_data_list.append(alt_data) if not got_alt_data: return None pd_influx_client = DataFrameClient(self.backend_host, self.backend_port, user, password, schema) i = 0 for query_string in queries: if alt_data_list[i] is None: result_df_dict = pd_influx_client.query(query_string) alt_data_list[i] = result_df_dict i = i + 1 my_json = pd_result_to_influx_result(alt_data_list, precision) # logging.debug("formatted output: {0}".format(my_json)) return my_json
def water(request): chart_type = None graph = None client = DataFrameClient(host='localhost', port=8086) client.switch_database('NOAA_water_database') f = client.query("Select * from h2o_pH") df = pd.concat(f) df = df.reset_index(level=[0, 1]) #df = pd.DataFrame(Air.objects.all().values(),columns=["date", "Carbon_Monoxide", "NOx", "Ozone","Particulate_Matter","SOx"]) #df=df.set_index('date') if request.method == 'POST': chart_type = request.POST['sales'] date_from = request.POST['date_from'] date_to = request.POST['date_to'] df['level_1'] = df['level_1'].apply(lambda x: x.strftime('%Y-%m-%d')) df2 = df.groupby('level_1', as_index=False)['pH'].agg('sum') if chart_type != "": if date_from != "" and date_to != "": df = df[(df['level_1'] > date_from) & (df['level_1'] < date_to)] df2 = df.groupby('level_1', as_index=False)['pH'].agg('sum') graph = get_simple_plot(chart_type, x=df2['level_1'], y=df2['pH'], data=df) context = { 'graph': graph, } #print(df2) return render(request, 'Water.html', context)
def get_adjustments(client: DataFrameClient, symbol: typing.Union[list, str] = None, typ: str = None, provider: str = None): query = "SELECT * FROM splits_dividends" where = list() if symbol is not None: if isinstance(symbol, list) and len(symbol) > 0: where.append("symbol =~ /{}/".format("|".join( ['^' + s + '$' for s in symbol]))) elif isinstance(symbol, str) and len(symbol) > 0: where.append("symbol = '{}'".format(symbol)) if typ is not None: where.append("type='{}'".format(typ)) if provider is not None: where.append("provider='{}'".format(provider)) if len(where) > 0: query += " WHERE " + " AND ".join(where) result = DataFrameClient.query(client, query) if result: result = result['splits_dividends'] result.set_index(['symbol', 'type', 'provider'], inplace=True, drop=True, append=True) result.sort_index(inplace=True) return result return pd.DataFrame()
class LinktoInfluxDB(): def __init__(self, host, port, user, password, database, measurement, time_presicion='s'): self.host = host self.port = port self.user = user self.password = password self.database = database self.measurement = measurement self.client = DataFrameClient(self.host, self.port, self.database) self.time_presicion = time_presicion def get_data(self, query): try: list_of_df = self.client.query(query) return list_of_df except Exception as e: logger.error(e) def put_data(self, data, columns): try: self.client.write_points(dataframe=data, database=self.database, measurement=self.measurement, field_columns=columns, batch_size=5000, protocol='line', time_precision=self.time_presicion) except Exception as e: logger.error(e)
def getDataYMax(code: str, influx: DataFrameClient, start: datetime, end: datetime) -> DataFrame: query = ''' SELECT max("price") AS "max" FROM "shareDev"."autogen"."share_tick" WHERE time > '{start:s}Z' AND time < '{end:s}Z' AND ("code"='{code:s}') GROUP BY time(3h), "code" fill(linear) '''.format(code=code, start=start.isoformat(timespec="milliseconds"), end=end.isoformat(timespec="milliseconds")) step = int(5 * 24 / 3) # 5days * 24h / 3h (pts) df = influx.query(query).get(('share_tick', (('code', code), ))) df['max'].fillna(method='backfill', inplace=True) df['max'].fillna(method='ffill', inplace=True) source = pd.Series(df['max'], index=df.index) # calc rolling max of step and shift steps # maxR_d-5 res = source.rolling(step).max() # .shift(-1*step) result: Series = res.combine(source, lambda r, s: (r - s) / s) result: DataFrame = result.to_frame() indexName = '{code:s}_maxR_d-5'.format(code=code) result.columns = [indexName] return result
def getKernelDetails(host, port, user, password, dbname, host_name, field_name, from_date, to_date): datetime_object1 = datetime.strptime(from_date, '%Y-%m-%d %H:%M:%S') datetime_object2 = datetime.strptime(to_date, '%Y-%m-%d %H:%M:%S') diff = (datetime_object2 - datetime_object1).total_seconds() group_val = int(diff / 500) if (group_val < 10): group_val = 10 client = DataFrameClient(host, port, user, password, dbname) query = "SELECT mean(" + field_name + ")/1000000 FROM kernel WHERE host='" + host_name + "' AND (time >= '" + from_date + "' AND time <= '" + to_date + "') GROUP BY time(" + str( group_val) + "s) fill(0)" #print query data = client.query(query) dataframe = data['kernel'] dict = {} dict["mean"] = json.loads(dataframe['mean'].to_json(orient='values', force_ascii=True)) list = dataframe.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime( datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) dict["time"] = list_final dict["unit"] = "X10^6" return dict, query
def updateWindow(l_sys, l_namenode, l_FS, l_RPC, cont): ilf = IsolationForest(n_estimators=100, contamination=cont) query = 'select * from ganglia where w_fs >0 and w_namenode>0 and w_rpc >0 limit 1024;' # 筛选条件 可设置 client = DataFrameClient(host='127.0.0.1', port=8086, username='******', password='******', database='testdb') result = client.query(query, chunked=False) data = result['ganglia'] d_sys = data[l_sys] d_namenode = data[l_namenode] d_FS = data[l_FS] d_RPC = data[l_RPC] ilf_sys = IsolationForest(n_estimators=100, contamination=cont) ilf_namenode = IsolationForest(n_estimators=100, contamination=cont) ilf_FS = IsolationForest(n_estimators=100, contamination=cont) ilf_RPC = IsolationForest(n_estimators=100, contamination=cont) ilf_sys.fit(d_sys) ilf_namenode.fit(d_namenode) ilf_FS.fit(d_FS) ilf_RPC.fit(d_RPC) print "update finished" return ilf_sys, ilf_namenode, ilf_FS, ilf_RPC
def getNetDetails(host, port, user, password, dbname, host_name,field_name,from_date,to_date): datetime_object1 = datetime.strptime(from_date, '%Y-%m-%d %H:%M:%S') datetime_object2 = datetime.strptime(to_date, '%Y-%m-%d %H:%M:%S') diff = (datetime_object2-datetime_object1).total_seconds() group_val = int(diff / 500) if(group_val < 10): group_val = 10 client = DataFrameClient(host, port, user, password, dbname) query="SELECT non_negative_derivative(mean("+field_name+"), 10s)/1048576 as mean FROM net WHERE host='"+host_name+"' AND (time >= '"+from_date+"' AND time <= '"+to_date+"') GROUP BY time("+str(group_val)+"s) fill(0)" data = client.query(query) dataframe = data['net'] dict={} #dict["mean"]=json.loads(dataframe['mean'].to_json(orient='values',force_ascii=True)) list = dataframe.index.tolist() list_two = dataframe['mean'].tolist() list_final = [] list_data = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime(datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) for x in list_two: if x > 5120 : list_data.append(0) else : list_data.append(x) dict["time"] = list_final dict["mean"] = list_data dict["unit"] = "MBPS" return dict,query
class InfluxdbAccessor(BaseAccesor): def __index__(self, host, port, database): self.client = DataFrameClient(host=host, port=port, database=database) def get_data(self, table, time_from=None, time_to=None, include_from=True, include_to=True, others=None, empty=None, **kwargs): terms = [where_term(k, v) for k, v in kwargs.items()] terms = terms + time_terms(time_from, time_to, include_from, include_to) clauses = where_clause(terms) if others is None else [where_clause(terms)] + to_iter(others) qstring = select_query(table, clauses=clauses) return self.client.query(qstring).get(table, empty) def get_continuous_contracts(self, table, time_from=None, time_to=None, **kwargs): contracts = self.get_data(table, time_from, time_to, empty=pd.DataFrame(), **kwargs) if contracts.empty or contracts.index[0] > time_from: others = [order_by('DESC'), limit(1)] prev_contract = self.get_data(table, time_to=time_from, others=others, empty=pd.DataFrame(), **kwargs) prev_contract.index = pd.Index([time_from]) contracts = prev_contract.append(contracts) start, end = 'start', 'end' contracts[start] = contracts.index sr_end = contracts[start][1:].append(pd.Series([time_to]), ignore_index=True) contracts[end] = sr_end.tolist() for i, row in contracts.iterrows(): yield (row[start], row[end]), row
def getNetDetailsDefault(host, port, user, password, dbname, host_name, field_name): client = DataFrameClient(host, port, user, password, dbname) query="SELECT non_negative_derivative(mean("+field_name+"), 10s)/1048576 as mean FROM net WHERE host ='"+host_name+"' AND time > now() - 7d GROUP BY time(1h) fill(0)" data = client.query(query) dataframe = data['net'] dict={} #dict["mean"]=json.loads(dataframe['mean'].to_json(orient='values',force_ascii=True)) list = dataframe.index.tolist() list_two = dataframe['mean'].tolist() list_final = [] list_data = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime(datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) for x in list_two: if x > 5120 : list_data.append(0) else : list_data.append(x) dict["time"] = list_final dict["mean"] = list_data dict["unit"] = "MBPS" return dict,query
def net_holtwinter(host, port, user, password, dbname , query,number_of_prediction): client = DataFrameClient(host, port, user, password, dbname) data = client.query(query) dataframe = data['net'] dict={} list = dataframe.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") dt = datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S") dt64 = np.datetime64(dt) list_final.append(dt64) dataframe2 = pd.DataFrame({'mean' : dataframe['mean'].tolist()},index=list_final) ts = dataframe2['mean'] predict = triple_exponential_smoothing(ts, 24, 0.3, 0.029, 0.3, number_of_prediction) list = predict.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() inter_date = iso.split("+") t = time.mktime(datetime.strptime(inter_date[0], "%Y-%m-%dT%H:%M:%S").timetuple()) list_final.append(t) dict["mean"] = predict.tolist() dict["time"] = list_final dict["unit"] = "%" return dict
class InfluxDBClientProxy(object): client_cls = DataFrameClient def __init__(self): self._client = None self._dfclient = None self.engine = None self.dfengine = None def init_app(self, app): client_args = ( app.config['INFLUXDB_HOST'], app.config['INFLUXDB_PORT'], app.config['INFLUXDB_USER'], app.config['INFLUXDB_PASSWORD'], app.config['INFLUXDB_DB'] ) self._client = InfluxDBClient(*client_args) self._dfclient = DataFrameClient(*client_args) self.engine = Engine(self._client) self.dfengine = Engine(self._dfclient) def dfquery(self, query): return self._dfclient.query(query) def __getattr__(self, name): return getattr(self._client, name)
def query_tag(tag): client = DataFrameClient(host, port, user, password, dbname) tag_query = 'SHOW TAG VALUES WITH KEY = "' + str(tag) + '"' tag_res = client.query(tag_query) res = list(set([v['value'] for v in list(tag_res.get_points())])) res.sort() return res
def getResultsDataFrame(host, querystr, port=8086, dbuser='******', dbpass='******', dbname='DBname'): """ Attempts to distinguish queries that have results grouped by a tag vs. those which are just of multiple fields. May be buggy still. """ idfc = DataFrameClient(host, port, dbuser, dbpass, dbname) results = idfc.query(querystr) betterResults = {} # results is a dict of dataframes, but it's a goddamn mess. Clean it up. for rkey in results.keys(): # If you had a tag that you "GROUP BY" in the query, you'll now have # a tuple of the metric name and the tag + value pair. If you had # no tag to group by, you'll have just the flat result. if isinstance(rkey, tuple): # Someone tell me again why Pandas is so great? # I suppose it could be jankiness in influxdb-python? # This magic 'tval' line below is seriously dumb though. tval = rkey[1][0][1] dat = results[rkey] betterResults.update({tval: dat}) elif isinstance(rkey, str): betterResults = results[rkey] # This is at least a little better return betterResults
class InfluxDBClientProxy(object): client_cls = DataFrameClient def __init__(self): self._client = None self._dfclient = None self.engine = None self.dfengine = None def init_app(self, app): client_args = (app.config['INFLUXDB_HOST'], app.config['INFLUXDB_PORT'], app.config['INFLUXDB_USER'], app.config['INFLUXDB_PASSWORD'], app.config['INFLUXDB_DB']) self._client = InfluxDBClient(*client_args) self._dfclient = DataFrameClient(*client_args) self.engine = Engine(self._client) self.dfengine = Engine(self._dfclient) def dfquery(self, query): return self._dfclient.query(query) def __getattr__(self, name): return getattr(self._client, name)
def report_energy_use(): client = DataFrameClient('localhost', 8086, '', '', 'MulHomeAutomation') result = client.query( "select * from power where location='house' and time > now() - 1h limit 400" ) db = result['power'] db.reset_index(level=0, inplace=True) db = db.rename(columns={'index': 'date'}) x1 = db['date'] y1 = db['value'] dataSet = [] for i in range(0, len(x1)): dataPoint = [] t0 = time.mktime( datetime.strptime(str(x1[i])[0:19], "%Y-%m-%d %H:%M:%S").timetuple()) dataPoint.append(t0 * 1000 - 43200000) #*1000 for js time, -43.2mil for -12h if (i > 0): delta_t = ((x1[i] - x1[i - 1]).total_seconds()) / 3600 dataPoint.append( dataSet[i - 1][1] + (y1[i] * delta_t)) #integrate power for d_index == 8 else: dataPoint.append(0) dataSet.append(dataPoint) # print dataSet[len(dataSet)-1][1] publish.single("house/energy", dataSet[len(dataSet) - 1][1], hostname="192.168.0.105")
class DataSource(object): def __init__(self,host="localhost",port='8086',username="******",password="******",db_name=None,measurement=None): self.host = host self.port = port self.username = username self.password = password self.db_name = db_name self.measurement = measurement if self._check_influxdb_connected(host, port, username, password, db_name): self.client_api = DataFrameClient(host, port, username, password, db_name) def _check_influxdb_connected(self, host, port, username, password, db_name): client = DataFrameClient(host, port, username, password, db_name) result = True try: client.get_list_database() print "Connect to database server" except: result = False print "Cannot connect. Please check configuration server" return result def check_connected(self): return self._check_influxdb_connected(self.host,self.port,self.username,self.password,self.db_name) def _return(self): return self.host,self.port,self.username,self.password,self.db_name,self.measurement def query_analyzed(self, number_of_days =30): print "Get latest time series points" query_set = 'select value from %s order by time desc limit %s ;' % ( self.measurement, number_of_days * 24 * 60) result = self.client_api.query(query_set)[self.measurement] return result def query_all(self): print "Get latest time series points" query_set = 'select value from %s limit 10000;' % (self.measurement) result = self.client_api.query(query_set)[self.measurement] return result def update_db(self,data): result = False try: result = self.client_api.write_points(data,self.measurement) except Exception as e: print e.message return result
def test_multiquery_into_dataframe(self): """Test multiquery into df for TestDataFrameClient object.""" data = { "results": [ { "series": [ { "name": "cpu_load_short", "columns": ["time", "value"], "values": [ ["2015-01-29T21:55:43.702900257Z", 0.55], ["2015-01-29T21:55:43.702900257Z", 23422], ["2015-06-11T20:46:02Z", 0.64] ] } ] }, { "series": [ { "name": "cpu_load_short", "columns": ["time", "count"], "values": [ ["1970-01-01T00:00:00Z", 3] ] } ] } ] } pd1 = pd.DataFrame( [[0.55], [23422.0], [0.64]], columns=['value'], index=pd.to_datetime([ "2015-01-29 21:55:43.702900257+0000", "2015-01-29 21:55:43.702900257+0000", "2015-06-11 20:46:02+0000"])) if pd1.index.tzinfo is None: pd1.index = pd1.index.tz_localize('UTC') pd2 = pd.DataFrame( [[3]], columns=['count'], index=pd.to_datetime(["1970-01-01 00:00:00+00:00"])) if pd2.index.tzinfo is None: pd2.index = pd2.index.tz_localize('UTC') expected = [{'cpu_load_short': pd1}, {'cpu_load_short': pd2}] cli = DataFrameClient('host', 8086, 'username', 'password', 'db') iql = "SELECT value FROM cpu_load_short WHERE region=$region;"\ "SELECT count(value) FROM cpu_load_short WHERE region=$region" bind_params = {'region': 'us-west'} with _mocked_session(cli, 'GET', 200, data): result = cli.query(iql, bind_params=bind_params) for r, e in zip(result, expected): for k in e: assert_frame_equal(e[k], r[k])
def data(): client = DataFrameClient(host, port, user, password, dbname) data = client.query("select * from cpu where cpu = 'cpu-total' AND host='etlnode1' limit 10") dataframe = data['cpu'] dict={} dict["usage_idle"]=json.loads(dataframe['usage_idle'].to_json(orient='values')) dict["usage_user"]=json.loads(dataframe['usage_user'].to_json(orient='values')) dict["usage_softirq"]=json.loads(dataframe['usage_softirq'].to_json(orient='values')) dict["usage_system"]=json.loads(dataframe['usage_system'].to_json(orient='values')) dict["index"] = dataframe.index.tolist() return jsonify(dict),200,{'Access-Control-Allow-Origin': '*'}
def test_query_into_dataframe(self): """Test query into df for TestDataFrameClient object.""" data = { "results": [{ "series": [ {"measurement": "network", "tags": {"direction": ""}, "columns": ["time", "value"], "values":[["2009-11-10T23:00:00Z", 23422]] }, {"measurement": "network", "tags": {"direction": "in"}, "columns": ["time", "value"], "values": [["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422], ["2009-11-10T23:00:00Z", 23422]] } ] }] } pd1 = pd.DataFrame( [[23422]], columns=['value'], index=pd.to_datetime(["2009-11-10T23:00:00Z"])) if pd1.index.tzinfo is None: pd1.index = pd1.index.tz_localize('UTC') pd2 = pd.DataFrame( [[23422], [23422], [23422]], columns=['value'], index=pd.to_datetime(["2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z", "2009-11-10T23:00:00Z"])) if pd2.index.tzinfo is None: pd2.index = pd2.index.tz_localize('UTC') expected = { ('network', (('direction', ''),)): pd1, ('network', (('direction', 'in'),)): pd2 } cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'GET', 200, data): result = cli.query('select value from network group by direction;') for k in expected: assert_frame_equal(expected[k], result[k])
def test_query_into_dataframe(self): data = [ { "name": "foo", "columns": ["time", "sequence_number", "column_one"], "points": [ [3600, 16, 2], [3600, 15, 1], [0, 14, 2], [0, 13, 1] ] } ] # dataframe sorted ascending by time first, then sequence_number dataframe = pd.DataFrame(data=[[13, 1], [14, 2], [15, 1], [16, 2]], index=pd.to_datetime([0, 0, 3600, 3600], unit='s', utc=True), columns=['sequence_number', 'column_one']) with _mocked_session('get', 200, data): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert_frame_equal(dataframe, result)
def test_query_with_empty_result(self): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') with _mocked_session(cli, 'GET', 200, {"results": [{}]}): result = cli.query('select column_one from foo;') self.assertEqual(result, {})
def services(): if request.method == 'GET': conn = dbConnection() cur = conn.cursor() try: #cur.execute("select array_to_json(array_agg(report)) FROM report") cur.execute("select * FROM report") except: print("Error executing select") results = cur.fetchall() list_res = [] for row in results: d = collections.OrderedDict() d['report_id'] = row[0] d['host_id'] = row[1] d['sub_system_id'] = row[2] d['report_json'] = json.loads(row[3]) d['time'] = row[4] list_res.append(d) return jsonify({'results':list_res}),200,{'Access-Control-Allow-Origin': '*'} elif request.method == 'POST': if not request.json or not 'host_id' or not 'sub_system_id' or not 'fromDate' or not 'toDate' or not 'time' in request.json: abort(401) print("Entered......") conn = dbConnection() cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) try: cur.execute("select host_name FROM host where host_id="+str(request.json['host_id'])) except: print("Error executing select") results_host = cur.fetchall() host_name = '' for row in results_host: host_name = row[0] print(host_name+".....") try: cur.execute("select sub_system_name FROM sub_system where sub_system_id="+str(request.json['sub_system_id'])) except: print("Error executing select") results_sub_system = cur.fetchall() sub_system_name = '' for row in results_sub_system: sub_system_name = row[0] json_data = {} if sub_system_name=='cpu': print("In cpu") client = DataFrameClient(host, port, user, password, dbname) toDate = request.json['toDate'].replace("/","-")+":00" fromDate = request.json['fromDate'].replace("/","-")+":00" query = "select * from cpu where cpu = 'cpu-total' AND host='"+host_name+"' AND (time<='"+toDate+"' OR time>='"+fromDate+"') limit 10" print(query) data = client.query("select * from cpu where cpu = 'cpu-total' AND host='"+host_name+"' AND (time<='"+toDate+"' OR time>='"+fromDate+"') limit 10") dataframe = data['cpu'] dict={} dict["usage_idle"]=json.loads(dataframe['usage_idle'].to_json(orient='values')) dict["usage_user"]=json.loads(dataframe['usage_user'].to_json(orient='values')) dict["usage_softirq"]=json.loads(dataframe['usage_softirq'].to_json(orient='values')) dict["usage_system"]=json.loads(dataframe['usage_system'].to_json(orient='values')) list = dataframe.index.tolist() list_final = [] for x in list: date = (parser.parse(str(x))) iso = date.isoformat() list_final.append(iso) dict["index"] = list_final json_data = dict #print(json.dumps(json_data)) print("After influx query") print("before postgres query") print(request.json['time']) query = """INSERT INTO report (host_id,sub_system_id,report_json,time) VALUES (%(int)s,%(int)s,%(json)s,%(timestamp)s)""" data = {'int':request.json['host_id'],'int':request.json['sub_system_id'],'json':json.dumps(json_data),'timestamp':request.json['time']} try: cur.execute(query,data) print("After postgres query") except Exception as e: print str(e) print("error inserting") conn.rollback() abort(400) conn.commit() result = { "host_id":request.json['host_id'], "sub_system_id":request.json['sub_system_id'], "report_json": json.dumps(json_data), "fromDate": request.json['fromDate'], "toDate": request.json['toDate'], "time": request.json['time'] } return jsonify({'results':result}),200,{'Access-Control-Allow-Origin': '*'} elif request.method == 'PUT': if not request.json or not 'report_id' or not 'host_id' or not 'sub_system_id' or not 'report_json' or not 'time' in request.json: abort(400) res = { "report_id":request.json['report_id'], "host_id":request.json['host_id'], "sub_system_id":request.json['sub_system_id'], "report_json": request.json['report_json'], "time": request.json['time'] } conn = dbConnection() cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) query = """UPDATE report SET host_id=(%s),sub_system_id=(%s),report_json=(%s),time=(%s) WHERE report_id=(%s)""" data = (request.json['host_id'],request.json['sub_system_id'],json.dumps(request.json['report_json']),request.json['time'],request.json['report_id'],) try: cur.execute(query,data) except Exception as e: print str(e) print("ERROR Updating into service") conn.rollback() abort(400) conn.commit() return jsonify({'results':res}),201,{'Access-Control-Allow-Origin': '*'}
filename = test.replace('.lvm', '.csv').replace('\xd6', 'oe')\ .replace('\xfc', 'ue').replace('\xe4', 'ae') tag = os.path.basename(filename).replace('.csv', '') if os.path.isfile(filename): pass else: index = 0 # Abfrage ob Datei shon in Datenbank vorhanden try: query_str = abfrage_datenbank(filename, messung, tag) check = client.query(query_str) except: check = True # wenn leer (tag nicht vorhanden) if bool(check) is False: # oeffnen csv und auslesen inhalt sr = open(dateiname, "rb") in_txt = csv.reader(sr, delimiter='\t') output = open(filename, 'wb') writer = csv.writer(output) index = 0 a = [] b = []
def test_query_with_empty_result(self): with _mocked_session('get', 200, []): cli = DataFrameClient('host', 8086, 'username', 'password', 'db') result = cli.query('select column_one from foo;') assert result == []
logging.warning('*** No argument found...') bin_filenames = sorted(glob.iglob(BIN_DIR+MASK+'.bin')) logging.info('%d bin files to process...' % len(bin_filenames)) if len(bin_filenames) > 0: client = DataFrameClient(DATABASE['HOST'], DATABASE['PORT'], DATABASE['USER'], DATABASE['PASSWORD'], DATABASE['NAME']) for f in bin_filenames: metadata = bin_to_df.get_metadata(f) if metadata is not None: if metadata['NetId'] is not None: net_id = metadata['NetId'] first_channel = metadata['Channels'][0] tag_to_search = net_id + '-' + first_channel last_measurement = client.query('select last(*) from "measurement" where "sensor"= tag_to_search ;') if not last_measurement: ld = datetime.datetime(1970, 1, 1, 0, 0, 0).replace(tzinfo=datetime.timezone.utc) else: ld = last_measurement['measurement'].index.to_pydatetime()[0] status = bin_to_influx(f, ld) if status == 0 or status == 1: rename(f, path.join(path.dirname(f), PROCESSED_DIR, path.basename(f))) rename(f + '.jsn', path.join(path.dirname(f), PROCESSED_DIR, path.basename(f) + '.jsn')) else: logging.warning('%s could not be processed...' % f) if not path.exists(path.join(BIN_DIR, UNPROCESSED_DIR)): makedirs(path.join(BIN_DIR, UNPROCESSED_DIR)) rename(f, path.join(path.dirname(f), UNPROCESSED_DIR, path.basename(f))) rename(f + '.jsn', path.join(path.dirname(f), UNPROCESSED_DIR, path.basename(f) + '.jsn')) else: