Esempio n. 1
0
 def load_data(self,uni,data_dir,interval,startdate=None):
     """
     ftn to import data from a local directory
     """
     dataname = 'ohlcv'
     for col in uni:
         output = pd.DataFrame()
         tick = col.split("-")
         exch = tick[2]
         tablename = dataname.upper() + "_SPOT_" + tick[0] + "_" + tick[1] # + "_" + str(interval) + "M"
         csv_dir = data_dir + exch + "/" + exch + "-" + dataname.upper() + "_SPOT_" + tick[0] + "_" + tick[1] + ".csv"
         csv_path = data_dir + exch + "/"
         if not (os.path.isdir(csv_path)):
             os.makedirs(csv_path)
         if os.path.isfile(csv_dir):
             raw_data = pd.read_csv(csv_dir, index_col=0)
             try:
                 enddate = self.get_last_time_csv(raw_data['time'])
             except:
                 enddate = None
         else:
             raw_data = pd.DataFrame()
             try:
                 enddate = self.get_last_time_csv(raw_data['time'])
             except:
                 enddate = None
         startdate = self.get_last_time_db(exch,tablename,startdate,self.env)
         if startdate is None:
             startdate = self.get_first_time_csv(raw_data['time'])
         startdate = pd.Timestamp(startdate)
         startdate = startdate - timedelta(minutes=startdate.minute % interval)
         enddate = pd.Timestamp(enddate)
         if (startdate < enddate):
             print '[DM] Uploading {} from {} to {} ...'.format(exch + "." + tablename,str(pd.to_datetime(startdate)),str(pd.to_datetime(enddate)))
             try:
                 #raw_data['ticker'] = col
                 raw_data['time'] = pd.to_datetime(raw_data['time'])
                 raw_data = raw_data.set_index(['time'])
                 raw_data['time'] = raw_data.index
                 mask = (raw_data['time'] >= str(pd.to_datetime(startdate)-timedelta(hours=1)))
                 subdata = raw_data.loc[mask]
                 interval_min = str(1) + 'min'
                 idx = pd.date_range(pd.to_datetime(startdate)-timedelta(hours=1),pd.to_datetime(enddate),freq=interval_min)
                 subdata = subdata.reindex(idx,fill_value=np.nan)
                 #subdata = self.recalc_hl(subdata,interval,startdate,enddate)
                 subdata = subdata.fillna(method='ffill')
                 subdata['time'] = pd.to_datetime(subdata.index)
                 interval_min = str(interval) + 'min'
                 days_range = pd.date_range(pd.to_datetime(startdate), pd.to_datetime(enddate), freq=interval_min)
                 subdata = subdata[subdata['time'].isin(days_range)]
                 #subdata = subdata.set_index(['time'])
                 #import pdb; pdb.set_trace()
                 #subdata['time'] = str(subdata['time'])
                 subdata = subdata.fillna(method='ffill')
                 subdata = subdata.fillna(method='bfill')
                 pgConn.storeInDb(subdata, tablename, self.env, schema=exch)
             except Exception as e:
                 print 'ERROR: {}'.format(exch + '_' + tablename)
                 print(e)
Esempio n. 2
0
def get_and_store_ohlcv_data(is_csv, exchangeSymbol, marketName, schemaName,
                             tableName, environment, startTime, endTime, limit,
                             coinapiKey):
    # if table exists
    # startTime = get_last_time(schemaName, tableName, environment, startTime)
    # print(marketName, exchangeSymbol)
    exsiting = False
    try:
        _, base, quote = marketName.split('_')
        if is_csv:
            # folder = '/home/ubuntu/codes/AltSim/2.0/data/ohlcv/{}/{}-OHLCV_SPOT_{}_{}.csv'
            filepath = path + '{}/{}-OHLCV_SPOT_{}_{}.csv'
            filename = filepath.format(schemaName, schemaName, base, quote)
            # TODO erase after coinapi update
            # if (base == "XRP") and (quote == "USD") and (schemaName == "KRKN"):
            #    filename = filepath.format("BINA", "BINA", "XRP", "USDT")
            ###
            if (os.path.isfile(filename)):
                ohlcv_data = pd.read_csv(filename, index_col=0)
                tempTime = startTime
                startTime = get_last_time_csv(ohlcv_data['time'])
                if startTime is not None:
                    exsiting = True
                else:
                    startTime = tempTime

        else:
            startTime = get_last_time_db(schemaName, tableName, environment,
                                         startTime)

        print(schemaName, tableName, startTime)
        if startTime >= endTime:
            return False
        # get_coinapi_data

        data = get_coinapi_data(exchangeSymbol, marketName, startTime, endTime,
                                limit, coinapiKey)

        if data is None:
            return False
        # reformat data (time, open, high, low, close, baseVolume, tradesCount)
        data = reformat_data(data)

        if is_csv:
            data['in_z'] = pd.to_datetime(time.strftime("%Y%m%dT%H%M%S"))

            if exsiting:
                ohlcv_data = ohlcv_data.append(data, ignore_index=True)
                ohlcv_data.to_csv(filename)
            else:
                data.to_csv(filename)

        else:
            pgConn.storeInDb(data, tableName, environment, schema=schemaName)

        return limit == len(data)
    except Exception as e:
        print(e)
Esempio n. 3
0
 def store_first_time(self):
     df = pd.read_csv(self.dir, index_col=0)
     out_df = df.T
     temp = out_df.iloc[0]
     temp.name = None
     out_df.columns = temp
     out_df.insert(loc=0,column='time',value=out_df.index)
     #import pdb; pdb.set_trace()
     storeInDb(out_df, tableName=self.table_name, environment='aws_dev', addMilestoneCol=False, schema=self.schema)
Esempio n. 4
0
 def store_daily_uni(self):
     df = pd.read_csv(self.dir, index_col=0)
     days_range = df.columns[3:]
     last_date = '2017-10-01'#self.last_db_date()
     mask = (days_range > last_date)
     if sum(mask) > 0 :
         days_range = days_range[mask]
         out_df = df[days_range]
         out_df = out_df.T
         temp = df.T.iloc[0]
         temp.name = None
         out_df.columns = temp
         out_df.insert(loc=0,column='time',value=out_df.index)
         storeInDb(out_df, tableName=self.table_name, environment='aws_dev', addMilestoneCol=False, schema=self.schema)
Esempio n. 5
0
def store_it(tablename,f):
    try:
        data = f()
        return storeInDb(pd.DataFrame(data), tablename, 'aws_dev', schema='coincap')
    except ChunkedEncodingError as CEE:
        print("There was a CEE: {}".format(CEE))
Esempio n. 6
0
import time
from collections import defaultdict
from utils.postgresConnection import storeInDb, query
from get_data_fns import get_coins, get_history_of_coin, join_history_data

if __name__ == '__main__':
    coins = get_coins()
    ts_df = query('select distinct ts, symbol from coincap.history',
                  environment='aws_dev',
                  dataframe=True)
    unix_ts_already_done_dict = defaultdict(set)
    for coin in set(ts_df['symbol'].values):
        unix_ts_already_done_dict[coin] = set(
            ts_df[ts_df['symbol'] == coin]['ts'].values)
    while True:
        for coin in get_coins():
            data = get_history_of_coin(coin)
            if data is None:
                continue
            df = join_history_data(data)
            df['symbol'] = coin
            ts_already_done = unix_ts_already_done_dict[coin]
            df = df[~df['ts'].isin(ts_already_done)]  # take away duplicates
            unix_ts_already_done_dict[coin] |= set(df['ts'].values)  # union
            storeInDb(df, 'history', 'aws_dev', schema='coincap')
        time.sleep(2.5)