Ejemplo n.º 1
0
    def make_dataset(capteur_list, text_input_start, text_input_end,
                     nom_capteur, L_influx):

        client = DataFrameClient(host=L_influx[3],
                                 port=L_influx[4],
                                 username=L_influx[1],
                                 password=L_influx[2])
        client.switch_database(L_influx[0])

        src = pd.DataFrame()

        for elt in capteur_list:
            requete = "SELECT " + '"' + elt + '"' + " FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'"
            datasets = client.query(requete)
            df = pd.DataFrame(datasets['measure'])
            df = df.rename_axis('Date')
            df.index = df.index.astype('datetime64[ns]')
            src = pd.concat([df, src], axis=1)

        client.close()
        src['Date'] = src.index
        Index = [i for i in range(0, df.shape[0])]
        src.index = Index

        cols = src.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        src = src[cols]

        return (src)
Ejemplo n.º 2
0
def save_to_database(dataframe,
                     user='******',
                     password='******',
                     host='localhost',
                     port=8086):
    '''
    存pandas.DataFrame类型数据到influxdb数据库中
    '''
    from influxdb import DataFrameClient
    import time
    dbname = 'aiops'
    print("连接数据库,host={}, port={}, dbname={}, user={}".format(
        host, port, user, password, dbname))
    client = DataFrameClient(host, port, user, password, dbname)
    #   print("Create database: " + dbname)
    #   client.create_database(dbname)
    tablename = 'sample_label' + time.strftime("_%Y%m%d_%H%M%S",
                                               time.localtime())
    df = pd.DataFrame(data=np.array(dataframe),
                      index=pd.date_range(start=time.strftime(
                          "%Y-%m-%d", time.localtime()),
                                          periods=len(dataframe),
                                          freq='ms'))
    client.write_points(df, tablename, protocol='line')
    client.close()
Ejemplo n.º 3
0
def main (host, port, csv):
	# Open the file and parse it

	pd.set_option('display.max_rows', 500)
	pd.set_option('display.max_columns', 500)
	pd.set_option('display.width', 1000)

	# Thanks to
	# https://stackoverflow.com/questions/29442370/how-to-correctly-read-csv-in-pandas-while-changing-the-names-of-the-columns
	print("Parsing csv...")
	headers = ['start_date', 'end_date', 'measurement_duration', 'detector_number', 'sensitivity', 'bubble_count', 'exposure', 'name']
	data_frame = pd.read_csv(csv, parse_dates=True, index_col=0, names=headers, header=0)

	# Format the date time values
	data_frame.index = pd.to_datetime(data_frame.index, utc=True)

	print(data_frame);

	# Create the database connection
	# Thanks to the influxdb pip docs 
	# https://influxdb-python.readthedocs.io/en/latest/examples.html#tutorials-basic

	print("Connecting to db...")
	client_connection = DataFrameClient(host=host, port=port, database=DB_NAME)
	print("Writing data...")
	client_connection.write_points(data_frame, DB_NAME, protocol="line")

	# Close the connection
	client_connection.close()
Ejemplo n.º 4
0
def update_to_latest(client: DataFrameClient, noncache_provider: typing.Callable, new_symbols: set = None, time_delta_back: relativedelta = relativedelta(years=5), skip_if_older_than: relativedelta = None):
    """
    Update existing entries in the database to the most current values
    :param client: DataFrameClient client
    :param noncache_provider: Non cache data provider
    :param new_symbols: additional symbols to add {(symbol, interval_len, interval_type), ...}}
    :param time_delta_back: start
    :param skip_if_older_than: skip symbol update if the symbol is older than...
    :return:
    """
    filters = dict()

    new_symbols = set() if new_symbols is None else new_symbols

    if skip_if_older_than is not None:
        skip_if_older_than = (datetime.datetime.utcnow().replace(tzinfo=tz.gettz('UTC')) - skip_if_older_than).astimezone(tz.gettz('US/Eastern'))

    for key, time in [(e[0], e[1][1]) for e in ranges(client).items()]:
        if key in new_symbols:
            new_symbols.remove(key)

        if skip_if_older_than is None or time > skip_if_older_than:
            bgn_prd = datetime.datetime.combine(time.date(), datetime.datetime.min.time()).replace(tzinfo=tz.gettz('US/Eastern'))
            filters[BarsFilter(ticker=key[0], bgn_prd=bgn_prd, interval_len=key[1], interval_type=key[2])] = None

    bgn_prd = datetime.datetime.combine(datetime.datetime.utcnow().date() - time_delta_back, datetime.datetime.min.time()).replace(tzinfo=tz.gettz('US/Eastern'))
    for (symbol, interval_len, interval_type) in new_symbols:
        filters[BarsFilter(ticker=symbol, bgn_prd=bgn_prd, interval_len=interval_len, interval_type=interval_type)] = None

    logging.getLogger(__name__).info("Updating " + str(len(filters)) + " total symbols and intervals; New symbols and intervals: " + str(len(new_symbols)))

    q = queue.Queue(maxsize=100)

    threading.Thread(target=partial(noncache_provider, filters=filters, q=q), daemon=True).start()

    try:
        for i, tupl in enumerate(iter(q.get, None)):
            ft, to_cache = filters[tupl[0]], tupl[1]

            if to_cache is not None and not to_cache.empty:
                # Prepare data
                for c in [c for c in to_cache.columns if c not in ['symbol', 'open', 'high', 'low', 'close', 'volume']]:
                    to_cache.drop(c, axis=1, inplace=True)

                to_cache['interval'] = str(ft.interval_len) + '_' + ft.interval_type

                if to_cache.iloc[0].name == ft.bgn_prd:
                    to_cache = to_cache.iloc[1:]

                try:
                    client.write_points(to_cache, 'bars', protocol='line', tag_columns=['symbol', 'interval'], time_precision='s')
                except Exception as err:
                    logging.getLogger(__name__).exception(err)

            if i > 0 and (i % 20 == 0 or i == len(filters)):
                logging.getLogger(__name__).info("Cached " + str(i) + " queries")
    finally:
        client.close()
    def test_03_influx_wr(self):
        self.system_con()
        influxdb_host = 'localhost'
        influxdb_port = 8086
        influxdb_id = 'krmim'
        influxdb_pwd = 'krmin_2017'
        influxdb_db = 'facility'
        influxdb_client = self.mqtt_agent.get_influxdb_mgr()
        influxdb_Dfclient = DataFrameClient('localhost', 8086, 'krmim',
                                            'krmim_2017', 'facility')
        if influxdb_client == None:
            print('influxdb configuration fail')

        packet = self.make_packet(facility_id='TS0001',
                                  sensor_code='0001',
                                  pv=330)
        _, _, modbus_udp = self.async_svr.convert_hex2decimal(
            packet, 'localhost', 6379)
        fac_daq = get_fac_inf(self.redis_con)
        redis_fac_info = json.loads(self.redis_con.get('facilities_info'))
        equipment_id = modbus_udp['equipment_id']
        if equipment_id in redis_fac_info.keys():
            fac_msg = config_fac_msg(equipment_id, fac_daq, modbus_udp,
                                     redis_fac_info)

        fields = {}
        facility_msg_json = json.loads(fac_msg)
        me_timestamp = time.time()
        del facility_msg_json[equipment_id]['ms_time']
        del facility_msg_json[equipment_id]['pub_time']
        for key in facility_msg_json[equipment_id].keys():
            print(key)
            fields[key] = float(facility_msg_json[equipment_id][key])

        # fields['me_time'] = me_timestamp
        influx_json = [{'measurement': equipment_id, 'fields': fields}]
        try:
            influxdb_Dfclient.query('DROP SERIES FROM TS0001')
            if influxdb_client.write_points(influx_json) is True:
                print('influx write success:' + str(influx_json))
            else:
                print('influx write faile:' + str(influx_json))
        except Exception as e:
            print(str(e.args))
        influxdb_client.close()

        ts5_no_coefficient = influxdb_Dfclient.query(
            'SELECT * FROM TS0001 where time >= now() - 15m')
        ts5_no_coefficient = ts5_no_coefficient['TS0001']
        rows_size = ts5_no_coefficient.shape[0]
        row_json = ts5_no_coefficient.to_dict(orient='records')[0]
        cmp = influx_json[0]['fields'] == row_json
        print(cmp)
        print(influx_json[0]['fields'])
        print(row_json)
        self.assertEqual(rows_size, 1)
        influxdb_Dfclient.close()
Ejemplo n.º 6
0
        def read_influxdb_data(host='192.168.123.245',
                               port=8086,
                               dbname='c9377a95-82f3-4af3-ac14-40d14f6d2abe',
                               ChannelName='1Y520210100',
                               time_start='',
                               time_end='',
                               user='******',
                               password='******',
                               keyword=''):

            client = DataFrameClient(host, port, user, password, dbname)
            measurements = client.get_list_measurements()

            if keyword is None: keyword = ''

            if keyword == '':
                measurement = [
                    mea.get(u'name') for mea in measurements
                    if mea.get(u'name').find(ChannelName) >= 0
                ]
            else:
                measurement = [
                    mea.get(u'name') for mea in measurements
                    if mea.get(u'name').find(ChannelName) >= 0
                    and mea.get(u'name').find(keyword) >= 0
                ]

            if len(measurement) == 0:
                print('No data retrieved.')
                return None

            measurement = measurement[-1]
            time_end = 'now()' if time_end == '' else "'" + time_end + "'"
            time_start = 'now()' if time_start == '' else "'" + time_start + "'"
            querystr = 'select * from "{}" where time > {} and time < {}'.format(
                measurement, time_start, time_end)
            #print(querystr)

            df = client.query(querystr).get(measurement)
            client.close()

            if df is None:
                print('InfluxDB no data retrieved.')
                return None

            dff = df.groupby('id')
            columns = [name for name, group in dff]
            groups = [group['val'] for name, group in dff]

            #check datatime alginment: all([all(groups[i].index==groups[0].index) for i in range(1,len(groups))])
            result = pd.concat(groups, axis=1)
            result.columns = columns
            result.index = groups[0].index

            return measurement, result
def random_shapeable_order():
    # Retrieve random order
    df = randomorders.random_shapeable_orderbook()

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'sbook')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
def forecast(times: List[str], values: List[float]):
    df = pandas.DataFrame(index=pandas.DatetimeIndex(times).round('5T'),
                          data={'uncontr': values})

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'uncontr')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
def random_deferrable_order():
    # Retrieve random order
    TIMESTEP = 12
    df = randomorders.random_deferrable_orderbook(timestep=60 / TIMESTEP)

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'dbook')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
Ejemplo n.º 10
0
    def __fQueryInflux(self, query: str) -> dict:
        if self.permissionDB is True:
            logger.debug(f"Perform the query {query} to the database.")

            client = DataFrameClient(host=self.host,
                                     port=self.port,
                                     database=self.database)
            result = client.query(query)
            client.close()
        else:
            logger.error("No permission to connect to the database.")
            result = None

        return result
Ejemplo n.º 11
0
 def _check_influx(self):
     conn = DataFrameClient(**config.INFLUX_CONN_SETTING)
     sql = f'''SELECT * FROM "{config.INFLUX_TABLE_CHECK}" WHERE table='{config.INFLUX_TABLE_POINT}' ORDER BY time DESC LIMIT 1'''
     row = conn.query(sql)
     if row:
         row = row[config.INFLUX_TABLE_CHECK]
         self.log_file = row['file'].values[0]
         self.log_pos = row['end_log_pos'].values[0]
         logging.log(
             logging.WARNING,
             'found binlog check table, now log_file is {} and restart pos is {}'
             .format(self.log_file, self.log_pos))
     conn.close()
     return row
def remove_deferrable_order(t: str):
    # Create fake order with 0
    data = {'duration': [1], 'profile_kw': [[0.0]]}
    # minus 2 hours is a work around #@?! timezone
    df = pandas.DataFrame(
        index=[datetime.strptime(t, '%Y-%m-%d %H:%M:%S') - timedelta(hours=2)],
        data=data)

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'dbook')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
Ejemplo n.º 13
0
def get_tide_data():
    with open("last_data_retrieved.txt") as f:
        previous_date_time = f.readlines()

    url = "https://tidesandcurrents.noaa.gov/api/datagetter?date=latest&station=8638610&product=water_level&datum=NAVD&units=english&time_zone=lst_ldt&format=json"
    response = requests.get(url)
    result = json.loads(response.text)

    station_id = str(result['metadata']['id'])
    station_name = str(result['metadata']['name'])
    lat = str(result['metadata']['lat'])
    lon = str(result['metadata']['lon'])
    value = float(result['data'][0]['v'])
    date_time_str = str(result['data'][0]['t'])
    date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M')

    current_date_time = str(date_time)

    if current_date_time not in previous_date_time:
        cols = ['station_id','station_name','latitude','longitude','date_time','value']
        line = pd.DataFrame({'station_id': station_id,'station_name': station_name,'latitude': lat,
                             'longitude': lon,'date_time': date_time,'value': value}, columns=cols, index=[0])


        line['date_time'] = pd.to_datetime(line['date_time'])
        line = line.set_index('date_time')
        # print line

        time_values = line[['value']]
        # print time_values

        tags = {'station_id': line[['station_id']], 'station_name': line[['station_name']],'latitude': line[['latitude']],
                'longitude': line[['longitude']]}

        client = DataFrameClient(host, port, user, password, dbname)

        # Write DataFrame with Tags
        client.write_points(line, dbname, protocol=protocol)  # , tags

        client.close()

        f = open('last_data_retrieved.txt', 'w')
        f.write(current_date_time + '\n')
        f.close()
        print "New data has been added to the database!"
    else:
        print "No new data available"
Ejemplo n.º 14
0
def get_tide_data():
    with open("last_data_retrieved.txt") as f:
        previous_date_time = f.readlines()
        
    url = "https://gentle-falls-78142.herokuapp.com/"
    response = requests.get(url)
    result = json.loads(response.text)
    
    sensor_id = str(result['metadata']['id'])
    sensor_name = str(result['metadata']['name'])
    lat = str(result['metadata']['lat'])
    lon = str(result['metadata']['lon'])
    flood = str(result['data'][0]['f'])
    value = float(result['data'][0]['v'])
    date_time_str = str(result['data'][0]['t'])
    
    date_time = datetime.strptime(date_time_str, '%Y-%m-%d %H:%M')
    current_date_time = str(date_time)
    
    if current_date_time not in previous_date_time:
        cols = ['sensor_id','sensor_name','latitude','longitude', 'flood', 'date_time','value']
        line = pd.DataFrame({'sensor_id': sensor_id,'sensor_name': sensor_name,'latitude': lat,
                             'longitude': lon,'flood': flood,'date_time': date_time,'value': value}, columns=cols, index=[0])
    
        line['date_time'] = pd.to_datetime(line['date_time'])
        
        line = line.set_index('date_time')
        # print line
        
        time_values = line[['value']]
        print(time_values)
        
        tags = {'sensor_id': line[['sensor_id']], 'sensor_name': line[['sensor_name']],'latitude': line[['latitude']],
                'longitude': line[['longitude']], 'flood': line[['flood']]}
    
        client = DataFrameClient(host, port, user, password, dbname)
        # Write DataFrame with Tags
        
        client.write_points(line, dbname, protocol=protocol)  # , tags
        client.close()

        f = open('last_data_retrieved.txt', 'w')
        f.write(current_date_time) # + '\n')
        f.close()
        print "New data has been added to the database!"
    else:
        print "No new data available"
def save_total_demand():
    # Limit the number of call to avoid
    # high cardinality of influxdb tags
    # Query total demand data
    client = DataFrameClient(host, port, user, password, dbname)
    start = datetime.now()
    query = ("select * from contr " + "WHERE time >= '" +
             (start).strftime("%Y-%m-%dT%H:%M:%SZ") + "' AND time <= '" +
             (start + timedelta(hours=24)).strftime("%Y-%m-%dT%H:%M:%SZ") +
             "'")
    contr = client.query(query)['contr']

    # Save it to a different measurement
    client.write_points(
        contr, 'versioncontr', {
            'version':
            str(
                int(datetime.now().replace(second=0,
                                           microsecond=0).timestamp() * 1000))
        })
    client.close()
    return {"status": "sucess"}
def remove_battery_order(t: str):
    # Create fake order with 0
    data = {
        'min_kw': [0.0],
        'max_kw': [0.0],
        'max_kwh': [0.0],
        'initial_kwh': [0.0],
        'end_kwh': [0.0]
    }
    # minus 2 hours is a work around #@?! timezone
    df = pandas.DataFrame(
        index=[datetime.strptime(t, '%Y-%m-%d %H:%M:%S') - timedelta(hours=2)],
        data=data)

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'bbook')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
def battery_order(order: BatteryOrder):
    # Convert start and end time in second since epoch
    # minus 2 hours is a work around #@?! timezone
    # times 1000 for milliseconds
    order.startby = (datetime.strptime(order.startby, '%Y-%m-%dT%H:%M:%SZ') -
                     timedelta(hours=2)).timestamp() * 1000
    order.endby = (datetime.strptime(order.endby, '%Y-%m-%dT%H:%M:%SZ') -
                   timedelta(hours=2)).timestamp() * 1000

    # Create a dataframe to be saved to influxdb
    df = pandas.DataFrame(
        index=[datetime.now().replace(second=0, microsecond=0)],
        data=json.loads(order.json()))

    # Open connection and write to DB
    client = DataFrameClient(host, port, user, password, dbname)
    client.write_points(df, 'bbook')
    client.close()

    # Run optimization
    optimization()
    return {"status": "sucess"}
def forecasting_task(params: dict):

    logger = params['LOGGER']
    logger.info('Starting forecasting task...')
    start_time = time()

    # Connect to the database "Emon" in InfluxDB
    client = DataFrameClient(host=params['INFLUXDB_HOST'],
                             port=params['INFLUXDB_PORT'],
                             username=params['INFLUXDB_USER'],
                             password=params['INFLUXDB_PASS'],
                             database=params['INFLUXDB_DB'])

    # Retrieve and update weather historical measurements and predictions
    W = forecasting.weather_preprocessing(client, params)

    # Query and preprocess power load measurements
    y, X, Xh = forecasting.preprocessing(client, params, W)

    # Predict power load consumptions
    df_pred = forecasting.forecasting(y, X, Xh, params)

    # Write power load forecasts to InfluxDB
    try:
        # Write data to InfluxDB for the current source
        client.write_points(dataframe=df_pred,
                            database=params['INFLUXDB_DB'],
                            measurement=params['FORECAST_TS'],
                            protocol='line',
                            time_precision='s')
    except Exception as e:
        logger.error(e)
    finally:
        logger.info(
            f'Forecasting task completed in {round(time()-start_time)} seconds!'
        )
        client.close()
Ejemplo n.º 19
0
class TestInfluxDBCache(unittest.TestCase):
    """
    Test InfluxDBCache
    """
    def setUp(self):
        self._client = InfluxDBClient(host='localhost',
                                      port=8086,
                                      username='******',
                                      password='******',
                                      database='test_cache')

        self._client.drop_database('test_cache')
        self._client.create_database('test_cache')
        self._client.switch_database('test_cache')

        self._df_client = DataFrameClient(host='localhost',
                                          port=8086,
                                          username='******',
                                          password='******',
                                          database='test_cache')

    def tearDown(self):
        self._client.drop_database('test_cache')
        self._client.close()
        self._df_client.close()

    def test_update_to_latest_intraday(self):
        with IQFeedHistoryProvider(num_connections=2) as history:
            cache_requests = inf_cache.InfluxDBOHLCRequest(
                client=self._df_client, interval_len=3600, interval_type='s')

            end_prd = datetime.datetime(2017, 3, 2)
            filters = (BarsInPeriodFilter(ticker="IBM",
                                          bgn_prd=datetime.datetime(
                                              2017, 3, 1),
                                          end_prd=end_prd,
                                          interval_len=3600,
                                          ascend=True,
                                          interval_type='s'),
                       BarsInPeriodFilter(ticker="AAPL",
                                          bgn_prd=datetime.datetime(
                                              2017, 3, 1),
                                          end_prd=end_prd,
                                          interval_len=3600,
                                          ascend=True,
                                          interval_type='s'),
                       BarsInPeriodFilter(ticker="AAPL",
                                          bgn_prd=datetime.datetime(
                                              2017, 3, 1),
                                          end_prd=end_prd,
                                          interval_len=600,
                                          ascend=True,
                                          interval_type='s'))

            filters_no_limit = (BarsInPeriodFilter(ticker="IBM",
                                                   bgn_prd=datetime.datetime(
                                                       2017, 3, 1),
                                                   end_prd=None,
                                                   interval_len=3600,
                                                   ascend=True,
                                                   interval_type='s'),
                                BarsInPeriodFilter(ticker="AAPL",
                                                   bgn_prd=datetime.datetime(
                                                       2017, 3, 1),
                                                   end_prd=None,
                                                   interval_len=3600,
                                                   ascend=True,
                                                   interval_type='s'))

            data = [
                history.request_data(f, sync_timestamps=False) for f in filters
            ]

            for datum, f in zip(data, filters):
                datum.drop('timestamp', axis=1, inplace=True)
                datum['interval'] = str(f.interval_len) + '_' + f.interval_type
                self._df_client.write_points(
                    datum,
                    'bars',
                    protocol='line',
                    tag_columns=['symbol', 'interval'],
                    time_precision='s')

            latest_old = ranges(self._client)
            update_to_latest(self._df_client,
                             noncache_provider=noncache_provider(history),
                             new_symbols={('AAPL', 3600, 's'),
                                          ('MSFT', 3600, 's'),
                                          ('MSFT', 600, 's')},
                             time_delta_back=relativedelta(years=10))

            latest_current = ranges(self._client)
            self.assertEqual(len(latest_current), len(latest_old) + 2)
            self.assertEqual(
                len([k
                     for k in latest_current.keys() & latest_old.keys()]) + 2,
                len(latest_current))
            for k in latest_current.keys() & latest_old.keys():
                self.assertGreater(latest_current[k][1], latest_old[k][1])

            data_no_limit = [
                history.request_data(f, sync_timestamps=False)
                for f in filters_no_limit
            ]
            cache_data_no_limit = [
                cache_requests.request(symbol=f.ticker, bgn_prd=f.bgn_prd)[0]
                for f in filters_no_limit
            ]
            for df1, df2 in zip(data_no_limit, cache_data_no_limit):
                del df1['total_volume']
                del df1['number_of_trades']
                del df1['volume']
                del df2['volume']

                assert_frame_equal(df1,
                                   df2,
                                   check_exact=False,
                                   check_less_precise=True)

    def test_update_to_latest_daily(self):
        with IQFeedHistoryProvider(num_connections=2) as history:
            cache_requests = inf_cache.InfluxDBOHLCRequest(
                client=self._df_client, interval_len=1, interval_type='d')

            bgn_prd = datetime.datetime(2017, 3, 1).date()
            end_prd = datetime.datetime(2017, 3, 2).date()
            filters = (BarsDailyForDatesFilter(ticker="IBM",
                                               bgn_dt=bgn_prd,
                                               end_dt=end_prd,
                                               ascend=True),
                       BarsDailyForDatesFilter(ticker="AAPL",
                                               bgn_dt=bgn_prd,
                                               end_dt=end_prd,
                                               ascend=True))

            filters_no_limit = (BarsDailyForDatesFilter(ticker="IBM",
                                                        bgn_dt=bgn_prd,
                                                        end_dt=None,
                                                        ascend=True),
                                BarsDailyForDatesFilter(ticker="AAPL",
                                                        bgn_dt=bgn_prd,
                                                        end_dt=None,
                                                        ascend=True),
                                BarsDailyForDatesFilter(ticker="AMZN",
                                                        bgn_dt=bgn_prd,
                                                        end_dt=None,
                                                        ascend=True))

            data = [
                history.request_data(f, sync_timestamps=False) for f in filters
            ]

            for datum, f in zip(data, filters):
                datum.drop('timestamp', axis=1, inplace=True)
                datum['interval'] = '1_d'
                self._df_client.write_points(
                    datum,
                    'bars',
                    protocol='line',
                    tag_columns=['symbol', 'interval'],
                    time_precision='s')

            latest_old = ranges(self._client)
            update_to_latest(self._df_client,
                             noncache_provider=noncache_provider(history),
                             new_symbols={('AAPL', 1, 'd'), ('AMZN', 1, 'd')},
                             time_delta_back=relativedelta(years=10))

            latest_current = ranges(self._df_client)
            self.assertEqual(len(latest_current), len(latest_old) + 1)
            self.assertEqual(
                len([k
                     for k in latest_current.keys() & latest_old.keys()]) + 1,
                len(latest_current))
            for k in latest_current.keys() & latest_old.keys():
                self.assertGreater(latest_current[k][1], latest_old[k][1])

            data_no_limit = [
                history.request_data(f, sync_timestamps=False)
                for f in filters_no_limit
            ]
            cache_data_no_limit = [
                cache_requests.request(
                    symbol=f.ticker,
                    bgn_prd=datetime.datetime.combine(
                        f.bgn_dt, datetime.datetime.min.time()).astimezone(
                            tz.tzutc()) + relativedelta(microseconds=1))
                for f in filters_no_limit
            ]
            for df1, (_, df2) in zip(data_no_limit, cache_data_no_limit):
                del df1['open_interest']
                df1 = df1[[
                    'open', 'high', 'low', 'close', 'volume', 'timestamp',
                    'symbol'
                ]]
                assert_frame_equal(df1, df2)

    def test_bars_in_period(self):
        with IQFeedHistoryProvider(num_connections=2) as history:
            now = datetime.datetime.now()
            filters = (BarsInPeriodFilter(ticker="IBM",
                                          bgn_prd=datetime.datetime(
                                              now.year - 1, 3, 1),
                                          end_prd=None,
                                          interval_len=3600,
                                          ascend=True,
                                          interval_type='s'),
                       BarsInPeriodFilter(ticker="AAPL",
                                          bgn_prd=datetime.datetime(
                                              now.year - 1, 3, 1),
                                          end_prd=None,
                                          interval_len=3600,
                                          ascend=True,
                                          interval_type='s'),
                       BarsInPeriodFilter(ticker="AAPL",
                                          bgn_prd=datetime.datetime(
                                              now.year - 1, 3, 1),
                                          end_prd=None,
                                          interval_len=600,
                                          ascend=True,
                                          interval_type='s'))

            data = [
                history.request_data(f, sync_timestamps=False) for f in filters
            ]

            for datum, f in zip(data, filters):
                datum.drop('timestamp', axis=1, inplace=True)
                datum['interval'] = str(f.interval_len) + '_' + f.interval_type
                self._df_client.write_points(
                    datum,
                    'bars',
                    protocol='line',
                    tag_columns=['symbol', 'interval'],
                    time_precision='s')

            # test all symbols
            bgn_prd = datetime.datetime(now.year - 1,
                                        3,
                                        1,
                                        tzinfo=tz.gettz('UTC'))
            bars_in_period = inf_cache.BarsInPeriodProvider(
                influxdb_cache=inf_cache.InfluxDBOHLCRequest(
                    client=self._df_client,
                    interval_len=3600,
                    interval_type='s'),
                bgn_prd=bgn_prd,
                delta=relativedelta(days=30))

            for i, (orig_df, processed_df) in enumerate(bars_in_period):
                self.assertFalse(orig_df.empty)
                self.assertFalse(processed_df.empty)

                start, end = bars_in_period._periods[bars_in_period._deltas]
                self.assertGreaterEqual(orig_df.iloc[0].name[0], start)
                self.assertGreater(end, orig_df.iloc[-1].name[0])
                self.assertGreater(end, orig_df.iloc[0].name[0])

            self.assertEqual(i, len(bars_in_period._periods) - 1)
            self.assertGreater(i, 0)

            # test symbols group
            bgn_prd = datetime.datetime(now.year - 1,
                                        3,
                                        1,
                                        tzinfo=tz.gettz('UTC'))
            bars_in_period = inf_cache.BarsInPeriodProvider(
                influxdb_cache=inf_cache.InfluxDBOHLCRequest(
                    client=self._df_client,
                    interval_len=3600,
                    interval_type='s'),
                symbol=['AAPL', 'IBM'],
                bgn_prd=bgn_prd,
                delta=relativedelta(days=30))

            for i, (orig_df, processed_df) in enumerate(bars_in_period):
                self.assertFalse(orig_df.empty)
                self.assertFalse(processed_df.empty)

                start, end = bars_in_period._periods[bars_in_period._deltas]
                self.assertGreaterEqual(orig_df.iloc[0].name[0], start)
                self.assertGreater(end, orig_df.iloc[-1].name[0])
                self.assertGreater(end, orig_df.iloc[0].name[0])

            self.assertEqual(i, len(bars_in_period._periods) - 1)
            self.assertGreater(i, 0)

    def test_update_fundamentals(self):
        funds = get_fundamentals({'IBM', 'AAPL', 'GOOG', 'MSFT'})
        update_fundamentals(self._client, list(funds.values()))
        result = get_cache_fundamentals(self._client,
                                        ['IBM', 'AAPL', 'GOOG', 'MSFT'])

        self.assertEqual(len(result), 4)
        self.assertEqual({k
                          for k in result.keys()},
                         {'IBM', 'AAPL', 'GOOG', 'MSFT'})
        self.assertGreater(len(result['IBM']), 0)

    def test_update_adjustments(self):
        funds = get_fundamentals({'IBM', 'AAPL', 'GOOG', 'MSFT'})
        update_splits_dividends(self._client, list(funds.values()))

        adjustments = inf_cache.get_adjustments(client=self._df_client,
                                                symbol=['IBM', 'AAPL'],
                                                provider='iqfeed')

        self.assertEqual(len(adjustments), 6)
        self.assertTrue(isinstance(adjustments, pd.DataFrame))
        self.assertTrue(set(adjustments.index.levels[1]) == {'IBM', 'AAPL'})
Ejemplo n.º 20
0
class tsdb(object):
    def __init__(self,
                 dbname,
                 host='localhost',
                 port=8086,
                 user='******',
                 password='******'):
        self.host = host
        self.port = port
        self.user = user
        self.password = password
        self.dbname = dbname
        self.client = None
        self.protocol = 'json'

    def _connect(self):
        if self.client is None:
            self.client = DataFrameClient(host=self.host,
                                          port=self.port,
                                          username=self.user,
                                          password=self.password,
                                          database=self.dbname)
            #self.client.switch_database(self.dbname)

    def _disconnect(self):
        if self.cleint is not None:
            self.client.close()
            self.client = None

    def _reconnet(self):
        self._disconnect()
        self._connect()

    def create_db(self):
        self._connect()
        dbs = self.client.get_list_database()
        for e in dbs:
            if self.dbname in e.values():
                logger.debug("Database {} is already exist.".format(
                    self.dbname))
                return

        logger.info("Creating database:{}".format(self.dbname))
        self.client.create_database(self.dbname)
        #self._set_retantion_policy()

    def _set_retantion_policy(self):
        self._connect()
        self.client.create_retention_policy(name='raw',
                                            duration='12h',
                                            replication=1,
                                            default=True)
        self.client.create_retention_policy(name='cooked',
                                            duration='52w',
                                            replication=1,
                                            default=False)

    def check_db(self):
        self._connect()
        db = self.client.get_list_database()
        ms = self.client.get_list_measurements()
        rp = self.client.get_list_retention_policies(self.dbname)
        user = self.client.get_list_users()

        print('db: {}, measurements: {}'.format(db, ms))
        print('retention policy: {}'.format(rp))
        print('users: {}'.format(user))

    def insert(self, df, measurement, tags=None):
        self._connect()
        try:
            result = self.client.write_points(df,
                                              measurement,
                                              tags=tags,
                                              time_precision='n',
                                              protocol=self.protocol)
        except:
            logger.info('influxdb write error')
            result = False
        return result

    def query(self, sql):
        self._connect()
        result = self.client.query(sql)
        return result
Ejemplo n.º 21
0
    logging.getLogger(__name__).info("Updating database with arguments: " +
                                     str(args))

    if args.drop:
        client.drop_database(args.database)

    if args.database not in [d['name'] for d in client.get_list_database()]:
        client.create_database(args.database)
        client.query(
            "ALTER RETENTION POLICY autogen ON cache DURATION INF REPLICATION 1 SHARD DURATION 2600w DEFAULT"
        )

    client.switch_database(args.database)

    with IQFeedHistoryProvider(num_connections=args.iqfeed_conn) as history:
        all_symbols = {
            (s, args.interval_len, args.interval_type)
            for s in set(
                iqutil.get_symbols(symbols_file=args.symbols_file).keys())
        }
        update_to_latest(
            client=client,
            noncache_provider=noncache_provider(history),
            new_symbols=all_symbols,
            time_delta_back=relativedelta(years=args.delta_back),
            skip_if_older_than=relativedelta(days=args.skip_if_older)
            if args.skip_if_older is not None else None)

    client.close()
Ejemplo n.º 22
0
class InsyteInfluxIO:
    logger = logging.getLogger('insyte_analytics.db.insyte_influx_io')

    def __init__(self):
        """
        Constructor.
        """
        self.logger.debug("Initialization")
        self.host = None
        self.port = None
        self.username = None
        self.password = None
        self.database = None
        self.client = None  # db connection instance

        self.device_id = None
        self.data_source_id = None
        self.time_upload = None

    def _set_connection_parameters(self,
                                   host=None,
                                   database=None,
                                   port=None,
                                   username=None,
                                   password=None):
        """
        Set connection parameters.
        """
        self.logger.debug("Setting connection parameters")

        self.host = host
        self.logger.debug("host = " + str(host))

        self.database = database
        self.logger.debug("database = " + str(database))

        self.port = port
        self.logger.debug("port = " + str(port))

        self.username = username
        self.logger.debug("username = "******"password = "******"""
        Check connection parameters.
        """
        self.logger.debug("Checking connection parameters")

        if self.host is None:
            self.logger.warning("Connection parameter 'host' not set")
            raise Exception("Connection parameter 'host' not set")

        if self.database is None:
            self.logger.warning("Connection parameter 'database' not set")
            raise Exception("Connection parameter 'database' not set")

        if self.port is None:
            self.logger.warning("Connection parameter 'port' not set")
            raise Exception("Connection parameter 'port' not set")

        if self.username is None:
            self.logger.warning("Connection parameter 'username' not set")
            raise Exception("Connection parameter 'username' not set")

        if self.password is None:
            self.logger.warning("Connection parameter 'password' not set")
            raise Exception("Connection parameter 'password' not set")

        self.logger.debug("Connection parameters successfully checked")

    async def connect(self, host: str, port: int, username: str, password: str,
                      database: str):
        """
        Open connection.
        """
        try:
            self.logger.error("Connecting to DB")

            self._set_connection_parameters(host[0], database, port, username,
                                            password)

            self._check_connection_parameters()

            self.client = DataFrameClient(self.host, self.port, self.username,
                                          self.password, self.database)
        except Exception as err:
            self.logger.error("Connection failed: " + str(err))
            raise Exception("Connection failed: " + str(err))

    async def disconnect(self):
        """
        Close connection.
        """
        self.logger.debug("Closing connection")
        if self.client is not None:
            self.client.close()
        self.logger.debug("Connection closed")

    def _set_read_parameters(self,
                             device_id=None,
                             data_source_id=None,
                             time_upload=None):
        """
        Set reading parameters.

        :param device_id: list of ids [uuid1, uuid2, ..., uuidN]
        :param data_source_id: list of ids [id1, id2, ..., idN]
        :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)]
        """
        self.logger.debug("Setting reading parameters")

        self.device_id = device_id
        self.logger.debug("device_id = " + str(device_id))

        self.data_source_id = data_source_id
        self.logger.debug("data_source_id = " + str(data_source_id))

        self.time_upload = time_upload
        self.logger.debug("time_upload = " + str(time_upload))

    def _check_read_parameters(self):
        """
        Check reading parameters.
        """
        self.logger.debug("Checking reading parameters")

        if self.device_id is None:
            self.logger.warning("Reading parameter 'device_id' not set")
            raise Exception("Reading parameter 'device_id' not set")

        if self.data_source_id is None:
            self.logger.warning("Reading parameter 'data_source_id' not set")
            raise Exception("Reading parameter 'data_source_id' not set")

        if self.time_upload is None:
            self.logger.warning("Reading parameter 'time_upload' not set")
            raise Exception("Reading parameter 'time_upload' not set")

        if len(self.device_id) != len(self.data_source_id) or len(
                self.device_id) != len(self.time_upload):
            self.logger.warning("Reading parameters have different lengths")
            raise Exception("Reading parameters have different lengths")

        self.logger.debug("Reading parameters successfully checked")

    async def read_data(self,
                        device_id=None,
                        data_source_id=None,
                        time_upload=None,
                        limit=None):
        """
        Read data from db according to object's parameters.

        :param device_id: list of ids [uuid1, uuid2, ..., uuidN]
        :param data_source_id: list of ids [id1, id2, ..., idN]
        :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)]
        :param limit: retrieved data rows limit

        :return: list of queries results
        """
        results = pd.DataFrame()
        try:
            self.logger.debug("Reading data")

            self._set_read_parameters(device_id, data_source_id, time_upload)

            if limit is not None:
                self.logger.debug("Data reading limit set to " + str(limit))
                limit = "LIMIT " + str(limit)
            else:
                limit = ""

            self._check_read_parameters()

            for di, dsi, tu in zip(self.device_id, self.data_source_id,
                                   self.time_upload):

                params = {
                    "di": str(di),
                    "dsi": str(dsi),
                    "limit": limit,
                    "from": datetime.datetime.strftime(tu[0],
                                                       "%Y-%m-%dT%H:%M:%SZ"),
                    "to": datetime.datetime.strftime(tu[1],
                                                     "%Y-%m-%dT%H:%M:%SZ")
                }

                query = r"SELECT value FROM data WHERE device_id='{di}' ".format(
                    **params)
                query += r"and data_source_id='{dsi}' ".format(**params)
                query += r"and time >= '{from}' and time <= '{to}' ".format(
                    **params)
                query += r"{limit}".format(**params)

                self.logger.debug("Executing query " + str(query))

                result = self.client.query(query)
                name = str(di) + '_' + str(dsi)

                if len(result) != 0:
                    r = result['data']
                    self.logger.debug("Column " + name + " contains " +
                                      str(len(r)) + " rows")
                    r.rename(columns={"value": name}, inplace=True)
                    results = pd.merge(results,
                                       r,
                                       how='outer',
                                       left_index=True,
                                       right_index=True)
                else:
                    self.logger.debug("Column " + name + " contains " +
                                      str(0) + " rows")
                    results[name] = np.nan

        except Exception as err:
            self.logger.error("Impossible to read: " + str(err))
            raise Exception("Impossible to read: " + str(err))
        self.logger.debug("Reading complete: " + str(results.shape) +
                          " entries returned")
        return results

    def _set_write_parameters(self, result_id=None, output_data=None):
        """
        Set writing parameters

        :param result_id: uuid of the result
        :param output_data: DataFrame
        """
        self.logger.debug("Setting writing parameters")
        if result_id is not None:
            self.result_id = result_id
            self.logger.debug("result_id = " + str(result_id))
        if output_data is not None:
            self.output_data = output_data
            self.logger.debug("output_data shape = " + str(output_data.shape))

    def _check_write_parameters(self):
        """
        Check writing parameters
        """
        self.logger.debug("Checking writing parameters")
        if self.result_id is None:
            self.logger.warning("Writing parameter 'result_id' not set")
            raise Exception("Writing parameter 'result_id' not set")

        if self.output_data is None:
            self.logger.warning("Writing parameter 'output_data' not set")
            raise Exception("Writing parameter 'output_data' not set")

        if self.output_data.empty:
            self.logger.warning("'output_data' is empty")
            raise Exception("'output_data' is empty")

        #if self.output_data.shape[1] != len(self.result_id):
        #    self.logger.warning("'output_data' and 'result_id' have different lengths: "
        #                        + str(self.output_data.shape[1]) + " and " + str(len(self.result_id)))
        #    raise Exception("'output_data' and 'result_id' have different lengths: "
        #                    + str(self.output_data.shape[1]) + " and " + str(len(self.result_id)))

        self.logger.debug("Writing parameters successfully checked")

    async def write_data(self, result_id=None, output_data=None):
        """
        Depricated. Write data from this object to db.

        :param result_id: list of ids [uuid1, uuid2, ..., uuidK]
        :param output_data: DataFrame

        :return: list of result objects
        """
        self.logger.debug("Writing data")
        self._set_write_parameters(result_id, output_data)
        results = []
        try:
            self._check_write_parameters()
            for col, ri in zip(self.output_data.columns, self.result_id):
                df = pd.DataFrame(output_data[col])
                if col.startswith('boolean'):
                    df.rename(columns={col: 'boolean'}, inplace=True)
                else:
                    df.rename(columns={col: 'value'}, inplace=True)
                _ = self.client.write_points(df, 'data_result',
                                             {'result_id': str(ri)})
                results.append(str(ri))
        except Exception as err:
            self.logger.error("Writing failed: " + str(err))
            raise Exception("Writing failed " + str(err))
        self.logger.debug("Writing complete " + str(results))
        return results

    async def write_data_new(self, result_id=None, output_data=None):
        """
        Write data from this object to db.

        :param result_id: list of ids [uuid1, uuid2, ..., uuidK]
        :param output_data: DataFrame

        :return: list of result objects
        """
        self.logger.debug("Writing data")
        self._set_write_parameters(result_id, output_data)
        results = []
        try:
            self._check_write_parameters()
            _ = self.client.write_points(self.output_data, 'data_result',
                                         {'result_id': str(self.result_id[0])})
            results.append(str(self.result_id[0]))
        except Exception as err:
            self.logger.error("Writing failed: " + str(err))
            raise Exception("Writing failed " + str(err))
        self.logger.debug("Writing complete " + str(results))
        return results
Ejemplo n.º 23
0
		print(result)
		
		for stock in sub_stock_list:
			ret_code, ret_data = quote_ctx.get_rt_ticker(stock, num=1000)
			if ret_code == 0:
				#print(ret_data)
				# write_points 传入的 dataframe 时间戳必须是转换成标准的格式,并且必须是按照时间排序的
				ret_data["time"] = pd.to_datetime(ret_data["time"])
				ret_data = ret_data.set_index("time")
				# 存入 influxdb 之前,先将时间戳转换为 本地时间,write_points 在存的时候会以 UTC 时间存储
				ret_data.index = ret_data.index.tz_localize('Asia/Shanghai')
				#print(ret_data)
				dbclient.write_points(ret_data, "futu_rt_ticker", tag_columns=['sequence'], database="test")

		time.sleep(60)
		result = quote_ctx.unsubscribe(sub_stock_list, subscribe_subtypes)
		print("unsubscribe result: " + time.asctime())
		print(result)
		# unsubscirbe 后要等一会再订阅,FutuOpenD 在数据量积累到很大的时候会有点慢
		time.sleep(2)

	return True

if __name__ == '__main__':
	#atexit.register(clean_atexit)
	get_all_data("HK")
	# get_ticker_data("HK", "all")
	consumer.close()
	quote_ctx.close()
	dbclient.close()
	dbquery.close()
Ejemplo n.º 24
0
def write_data_to_influxdb(data):
    influxClient = DataFrameClient("localhost", 8087, "admin", "admin",
                                   "corona_data")
    influxClient.drop_measurement("dus")
    influxClient.write_points(data, "dus")
    influxClient.close()
def main():

    # Initializes the default logger
    logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)
    logger = logging.getLogger(APPLICATION_NAME)

    # Checks the Python Interpeter version
    if (sys.version_info < (3, 0)):
        logger.error('Python 3 is requested! Leaving the program.')
        sys.exit(-1)

    # Parse arguments
    args = configuration_parser()

    logger.setLevel(args.logging_level)
    logger.info(f'Starting application "{APPLICATION_NAME}"...')
    logger.debug(f'Arguments: {vars(args)}')

    v_latitude, v_longitude = map(float, args.gps_location.split(','))

    v_influxdb_host = args.influxdb_host
    v_influxdb_port = args.influxdb_port

    v_influxdb_database = args.influxdb_database
    v_influxdb_username = args.influxdb_username
    v_influxdb_password = args.influxdb_password

    # Check if "Emon" database exists
    _client = DataFrameClient(host=v_influxdb_host,
                              port=v_influxdb_port,
                              username=v_influxdb_username,
                              password=v_influxdb_password,
                              database=v_influxdb_database)

    _dbs = _client.get_list_database()
    logger.debug(f'List of InfluxDB databases: {_dbs}')
    if v_influxdb_database not in [_d['name'] for _d in _dbs]:
        logger.info(
            f'InfluxDB database "{v_influxdb_database}" not found. Creating a new one.'
        )
        _client.create_database(v_influxdb_database)

    _client.close()

    # Pack all parameters in a dictionary
    _userdata = {
        'LOGGER': logger,
        'LATITUDE': v_latitude,
        'LONGITUDE': v_longitude,
        'INFLUXDB_HOST': v_influxdb_host,
        'INFLUXDB_PORT': v_influxdb_port,
        'INFLUXDB_USER': v_influxdb_username,
        'INFLUXDB_PASS': v_influxdb_password,
        'INFLUXDB_DB': v_influxdb_database,
        'MEASUREMENT_TS': args.measurement_ts,
        'PROCESSED_TS': args.processed_ts,
        'FORECAST_TS': args.forecast_ts,
        'WEATHER_TS': args.weather_ts,
        'HORIZON_LENGTH': args.horizon_length,
        'WEATHER_SERVER_URL': args.weather_server_url,
        'WEATHER_FORECAST_INTERVAL': args.weather_forecast_interval,
        'WEATHER_START_TIMESTAMP': args.weather_start_timestamp
    }

    # Instantiate the scheduler and repeatedly run the "forecasting task"
    # "forecast interval" seconds after its previous execution
    _main_scheduler = continuous_scheduler.MainScheduler()
    _main_scheduler.add_task(forecasting_task, 0, args.forecast_interval, 0,
                             _userdata)
    _main_scheduler.start()
Ejemplo n.º 26
0
class InfluxServerIO:
    logger = logging.getLogger('influx_server_io')

    def __init__(self,
                 host=None,
                 database=None,
                 port=None,
                 username=None,
                 password=None):
        """
        Constructor.
        """
        self.logger.debug("Setting connection parameters")
        self.host = host
        self.database = database
        self.port = port
        self.username = username
        self.password = password

        self.client = None

    def connect(self):
        """
        Open connection.
        """
        try:
            self.logger.debug("Connecting to DB")

            self.client = DataFrameClient(self.host,
                                          self.port,
                                          self.username,
                                          self.password,
                                          self.database,
                                          timeout=15)
            self.client.ping()

            self.logger.debug("DB Connection set")
        except Exception as err:
            self.logger.error("DB Connection failed: " + str(err))
            raise Exception("DB Connection failed: " + str(err))

    def disconnect(self):
        """
        Close connection.
        """
        try:
            self.logger.debug("Closing DB connection")
            if self.client is not None:
                self.client.close()
            self.client = None
            self.logger.debug("DB Connection closed")
        except Exception as err:
            self.logger.error("Can't disconnect from DB: " + str(err))
            raise Exception("Can't disconnect from DB: " + str(err))

    def read_data(self,
                  device_id=None,
                  data_source_id=None,
                  time_upload=None,
                  limit=None):
        """
        Read data from db according to object's parameters.
        :param device_id: list of ids [uuid1, uuid2, ..., uuidN]
        :param data_source_id: list of ids [id1, id2, ..., idN]
        :param time_upload: list of tuples of dates [(d_min1 d_max1), (d_min2 d_max2), ..., (d_minN d_maxN)]
        :param limit: retrieved data rows limit
        :return: list of queries results
        """
        results = pd.DataFrame()
        try:
            self.logger.debug("Reading data")

            if limit is not None:
                self.logger.debug("Data reading limit set to " + str(limit))
                limit = "LIMIT " + str(limit)
            else:
                limit = ""

            for di, dsi, tu in zip(device_id, data_source_id, time_upload):

                params = {
                    "di": str(di),
                    "dsi": str(dsi),
                    "limit": limit,
                    "from": datetime.datetime.strftime(tu[0],
                                                       "%Y-%m-%dT%H:%M:%SZ"),
                    "to": datetime.datetime.strftime(tu[1],
                                                     "%Y-%m-%dT%H:%M:%SZ")
                }

                query = r"SELECT value FROM data WHERE device_id='{di}' ".format(
                    **params)
                query += r"and data_source_id='{dsi}' ".format(**params)
                query += r"and time >= '{from}' and time <= '{to}' ".format(
                    **params)
                query += r"{limit}".format(**params)

                self.logger.debug("Executing query " + str(query))

                result = self.client.query(query)
                name = str(di) + '_' + str(dsi)

                if len(result) != 0:
                    r = result['data']
                    self.logger.debug("Column " + name + " contains " +
                                      str(len(r)) + " rows")
                    r.rename(columns={"value": name}, inplace=True)
                    results = pd.merge(results,
                                       r,
                                       how='outer',
                                       left_index=True,
                                       right_index=True)
                else:
                    self.logger.debug("Column " + name + " contains " +
                                      str(0) + " rows")
                    results[name] = np.nan

        except Exception as err:
            self.logger.error("Impossible to read: " + str(err))
            raise Exception("Impossible to read: " + str(err))
        self.logger.debug("Reading complete: " + str(results.shape) +
                          " entries returned")
        return results

    def write_data(self, result_id=None, output_data=None):
        """
        Write data from this object to db.
        :param result_id: list of ids [uuid1, uuid2, ..., uuidK]
        :param output_data: DataFrame
        :return: list of result objects
        """
        self.logger.debug("Writing data")
        results = []
        try:
            for col, ri in zip(output_data.columns, result_id):
                df = pd.DataFrame(output_data[col])
                if col.startswith('bool'):
                    df.rename(columns={col: 'boolean'}, inplace=True)
                elif col.startswith('val'):
                    df.rename(columns={col: 'value'}, inplace=True)
                else:
                    self.logger.warning(
                        "Column name: " + str(col) +
                        " (doesnt's start with 'val' or 'bool', renaming to 'val')"
                    )
                    df.rename(columns={col: 'value'}, inplace=True)
                v = self.client.write_points(df, 'data_result',
                                             {'result_id': str(ri)})
                results.append(str(ri))
        except Exception as err:
            self.logger.error("Writing to DB failed: " + str(err))
            raise Exception("Writing to DB failed " + str(err))
        self.logger.debug("Writing to DB complete " + str(results))
        return results
def optimization():
    # Optimization timestep
    TIMESTEP = 12  # 5min interval (60/5)

    # Query uncontrolled demand
    # Note: uncontrolled demand is already on a 5min timestep
    client = DataFrameClient(host, port, user, password, dbname)
    start = datetime.now()
    query = ("select * from uncontr " + "WHERE time >= '" +
             (start + timedelta(minutes=5)).strftime("%Y-%m-%dT%H:%M:%SZ") +
             "' AND time <= '" +
             (start + timedelta(hours=24)).strftime("%Y-%m-%dT%H:%M:%SZ") +
             "'")
    uncontr = client.query(query)['uncontr']

    # Get the reference t=0
    first_t = uncontr.iloc[0].name
    uncontr_t = uncontr.index
    uncontr_index = list(range(0, len(uncontr_t)))

    # Change index to integers and rename column
    opt_uncontr = uncontr.copy()
    opt_uncontr['index'] = uncontr_index
    opt_uncontr.set_index('index', drop=True, inplace=True)
    opt_uncontr.rename(columns={'uncontr': 'p'}, inplace=True)

    # Query order books
    try:
        query = ("select * from bbook " + "WHERE startby >= " +
                 str(int((start + timedelta(minutes=5)).timestamp() * 1000)) +
                 " AND endby <= " +
                 str(int((start + timedelta(hours=24)).timestamp() * 1000)))
        bbook = client.query(query)['bbook']

        # Set startby and endby as integers
        opt_bbook = bbook.copy()
        opt_bbook['startby'] -= first_t.timestamp() * 1000
        opt_bbook['startby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_bbook['endby'] -= first_t.timestamp() * 1000
        opt_bbook['endby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_bbook['id'] = list(range(0, len(opt_bbook)))
        opt_bbook.set_index('id', drop=True, inplace=True)
    except:
        # No orders at the moment
        opt_bbook = pandas.DataFrame()

    try:
        query = ("select * from sbook " + "WHERE startby >= " +
                 str(int((start + timedelta(minutes=5)).timestamp() * 1000)) +
                 " AND endby <= " +
                 str(int((start + timedelta(hours=24)).timestamp() * 1000)))
        sbook = client.query(query)['sbook']

        # Set startby and endby as integers
        opt_sbook = sbook.copy()
        opt_sbook['startby'] -= first_t.timestamp() * 1000
        opt_sbook['startby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_sbook['endby'] -= first_t.timestamp() * 1000
        opt_sbook['endby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_sbook['id'] = list(range(0, len(opt_sbook)))
        opt_sbook.set_index('id', drop=True, inplace=True)
    except:
        # No orders at the moment
        opt_sbook = pandas.DataFrame()

    try:
        query = ("select * from dbook " + "WHERE startby >= " +
                 str(int((start + timedelta(minutes=5)).timestamp() * 1000)) +
                 " AND endby <= " +
                 str(int((start + timedelta(hours=24)).timestamp() * 1000)))
        dbook = client.query(query)['dbook']

        opt_dbook = dbook.copy()
        opt_dbook['startby'] -= first_t.timestamp() * 1000
        opt_dbook['startby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_dbook['endby'] -= first_t.timestamp() * 1000
        opt_dbook['endby'] /= 60 * 1000 * 60 / TIMESTEP
        opt_dbook['id'] = list(range(0, len(opt_dbook)))
        opt_dbook.set_index('id', drop=True, inplace=True)
        # Turn profile_kw from str to floats
        opt_dbook['profile_kw'] = opt_dbook['profile_kw'].apply(
            lambda x:
            [float(v) for v in x[1:][:-1].replace(" ", "").split(',')])
    except:
        # No orders at the moment
        opt_dbook = pandas.DataFrame()

    # Run the optimization
    tic = datetime.now()
    result = maximize_self_consumption(opt_uncontr,
                                       opt_bbook,
                                       opt_sbook,
                                       opt_dbook,
                                       timestep=1 / TIMESTEP,
                                       solver='glpk',
                                       verbose=False,
                                       timelimit=60)
    logger.info('GLPK time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() -
                                                            tic))

    # Save results back to influxDB (and remove previous schedule)
    total = uncontr.copy()
    total.rename(columns={'uncontr': 'contr'}, inplace=True)
    total['contr'] += result['demand_controllable']
    client.write_points(total, 'contr')

    client.drop_measurement('bschedule')
    if result['batteryin'] is not None:
        bschedule = (result['batteryin'] - result['batteryout']).copy()
        bschedule['index'] = uncontr_t
        bschedule.set_index('index', drop=True, inplace=True)
        bschedule.rename_axis(None, inplace=True)
        client.write_points(bschedule, 'bschedule')

    client.drop_measurement('sschedule')
    if result['demandshape'] is not None:
        sschedule = result['demandshape'].copy()
        sschedule['index'] = uncontr_t
        sschedule.set_index('index', drop=True, inplace=True)
        sschedule.rename_axis(None, inplace=True)
        client.write_points(sschedule, 'sschedule')

    client.drop_measurement('dschedule')
    if result['demanddeferr'] is not None:
        dschedule = result['demanddeferr'].copy()
        dschedule['index'] = uncontr_t
        dschedule.set_index('index', drop=True, inplace=True)
        dschedule.rename_axis(None, inplace=True)
        client.write_points(dschedule, 'dschedule')

    # Close DB connection
    client.close()
Ejemplo n.º 28
0
    def make_dataset(capteur_list, text_input_start, text_input_end,
                     nom_capteur, L_influx):

        xs = []
        ys = []
        colors = []
        labels = []

        client = DataFrameClient(host=L_influx[3],
                                 port=L_influx[4],
                                 username=L_influx[1],
                                 password=L_influx[2])
        client.switch_database(L_influx[0])

        #Granularity to avoid to display too much points on the figure
        end = datetime.strptime(text_input_end, "%Y-%m-%d %H:%M:%S")
        start = datetime.strptime(text_input_start, "%Y-%m-%d %H:%M:%S")
        ecartSecondes = (end - start).total_seconds()

        if ecartSecondes < 86401:
            groupby = None
        elif ecartSecondes > 86401 and ecartSecondes < 5000000:
            groupby = '1m'
        elif ecartSecondes > 5000000 and ecartSecondes < 77000000:
            groupby = '15m'
        else:
            groupby = '1h'

        #print(ecartSecondes, groupby)

        #Construction of vectors x and y for each register to be displayed
        for elt in capteur_list:

            if groupby == None:
                requete = "SELECT " + '"' + elt + '"' + " FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'"
                datasets = client.query(requete)
                df = pd.DataFrame(datasets['measure'])
                df = df.rename_axis('Date')
                df.index = df.index.astype('datetime64[ns]')

                client.close()

                #Convert data in list of lists
                a = []
                b = []
                for i in range(df.shape[0]):
                    a.append(df[elt][i])
                    b.append(df.index[i])
                xs.append(a)
                ys.append(b)

                colors.append(capteur_color[capteur_list.index(elt)])

                labels.append(elt)

            else:
                requete = "SELECT MEAN(" + '"' + elt + '"' + ") FROM measure WHERE time >= " + "'" + text_input_start + "'" + " AND " + "time <= " + "'" + text_input_end + "'" + " AND " + "ID=" + "'" + nom_capteur + "'" + " GROUP BY time(" + groupby + ") fill(0)"
                datasets = client.query(requete)
                df = pd.DataFrame(datasets['measure'])
                df = df.rename_axis('Date')
                df.index = df.index.astype('datetime64[ns]')

                client.close()

                #Conversion des données en liste de liste
                a = []
                b = []
                for i in range(df.shape[0]):
                    a.append(df['mean'][i])
                    b.append(df.index[i])
                xs.append(a)
                ys.append(b)

                colors.append(capteur_color[capteur_list.index(elt)])

                labels.append(elt)

        #Construction of the source of the figure
        new_src = ColumnDataSource(data={
            'x': xs,
            'y': ys,
            'color': colors,
            'label': labels
        })

        return new_src
Ejemplo n.º 29
0
def main():
    # Connect to influxdb
    client = InfluxDBClient(host=IP_RP4,
                            port=PORT,
                            username=USER_NAME,
                            password=PWD,
                            database=DB_NAME,
                            timeout=10)

    # String of the query to make to get the data to clean
    last_cleaned_time, query = query_data_to_clean(client, 'sensehat')

    # Make the query and get the generator of points
    points = query_to_points(query, client)

    # Create panda dataframe
    df = pd.DataFrame(points)

    # If there is no data to clean, exit
    if len(df.index) <= 0:
        client.close()
        sys.exit()

    # ------------------------------------------------------------------------------
    # Cleaning
    # ------------------------------------------------------------------------------
    # Do the convolutions

    # First create a new dataframe which will contains the convolution signals
    df_convol = pd.DataFrame()

    # Indexes must have a datetime format in order to be directly written to influxdb
    df_convol['Datetime'] = pd.to_datetime(df['time'])
    df_convol = df_convol.set_index('Datetime')

    # Convolve the entire dataframe
    previous_row = query_last_raw_values(last_cleaned_time, client)

    convolve_dataframe(df, dst_df=df_convol, previous_row=previous_row)

    # Connect to DB with a DataFrameClient
    df_client = DataFrameClient(host=IP_RP4,
                                port=PORT,
                                username=USER_NAME,
                                password=PWD,
                                database=DB_NAME,
                                timeout=10)

    # Write the convolved signals to the influxdb server
    write_df(df_convol, 'convol_signals', df_client)

    # ------------------------------------------------------------------------------
    # Correct the anormal values

    # The result dataframe is now the original df
    # !!! Normally, elements at the same index in dataframe df and df_convol
    # correspond to the same timestamp (as this script is exectued in its
    # completeness via a cron job, so both index the same time range). However,
    # if e.g. only the convolution is done and later the cleaning, then they won't
    # have the same indexing. To avoid this, we query again the db for the correct
    # time range, so we don't have problems with indexing
    from_time = df['time'].to_numpy()[0]
    df_convol = pd.DataFrame(query_convolution(client, from_time))

    # Clean the data
    clean_data(df, df_convol, previous_row)
    # Now the dataframe df is clean

    # ------------------------------------------------------------------------------
    # Now fetch the meteo suisse data and adjust,
    # we don't want to again search for last sample in the convolution signal
    # as we just wrote the new convolved samples
    last_cleaned_time, query = query_data_to_clean(client,
                                                   source='meteosuisse',
                                                   last_cleaned=last_cleaned_time,
                                                   update_last_cleaned=False,
                                                   fields='"temperature", "humidity"')
    points_ms = query_to_points(query, client)
    df_ms = pd.DataFrame(points_ms)

    # Format the meteo suisse data dataframe
    form_df = prepare_ms_df(df_ms, df['time'])
    if form_df is not None:
        # Get the average differences for each columns
        # (i.e. sensehat data - meteo suisse data)
        avg_diffs = avg_diff_df(form_df, df)

        # Adjust the clean sensehat dataframe "df" with the average differences
        # for each column (in place)
        adjust_df(df, avg_diffs)

        # Now "df" is the sensehat data, cleaned and adjusted
        # Write the result to the measurement "clean_sh_data"
        # (just have to set the index to the time column to be able to write
        # directly to the influxdb database)
        df['time'] = pd.to_datetime(df['time'])
        df = df.set_index('time')
        write_df(df, 'clean_sh_data', df_client)

    # Close the connections to the influxdb server
    df_client.close()
    client.close()