def load_file(filename): assert 'daily_' in filename ticker = filename.split('_')[1] full_path = op.join(BASE_DIR, filename) data = readCSV(full_path, date_name = 'Date', date_format='%Y%m%d') data = np.sort(data, 0, order=['Date']) data = data[np.where(data['Date'] > datetime.date(2004, 1, 1))] iso_week, new_data = rearrange(data) return ticker, iso_week, new_data
def get_sig_data(filename, sig_funcs): data = readCSV(filename, datetime_name="Datetime", datetime_format="%Y%m%d%H%M") data = np.sort(data, 0, order=["Datetime"]) dates = np.array([dt.date() for dt in data["Datetime"]]) unique_dates = np.unique(dates) print len(unique_dates) print unique_dates result = [] for ii in range(0, len(unique_dates)): tmp = data[np.where(dates == unique_dates[ii])] sig = [sf(tmp) for sf in sig_funcs] row = sig result.append(row) return np.array(result)
def load_all_data(self, filenames, min_date='20040101'): result = {} dates = [] for fname in filenames: assert 'daily_' in fname ticker = op.split(fname)[-1].split('_')[1] rec = readCSV(fname, date_name='Date', date_format='%Y%m%d') rec = np.sort(rec, 0, order=['Date']) result[ticker] = rec dates = np.append(dates, rec['Date']) self._result = result self._dates = [dt.strftime('%Y%m%d') for dt in np.unique(dates) if dt.strftime('%Y%m%d') >= min_date ] self._dates.sort() print "Tickers: " print " ", ", ".join(result.keys())
def get_data(filename, sig_funcs, fut_func): data = readCSV(filename, datetime_name="Datetime", datetime_format="%Y%m%d%H%M") data = np.sort(data, 0, order=["Datetime"]) dates = np.array([dt.date() for dt in data["Datetime"]]) unique_dates = np.unique(dates) print len(unique_dates) print unique_dates result = [] for ii in range(1, len(unique_dates) - 1): tmp = data[np.where(dates == unique_dates[ii - 1])] sig = [sf(tmp) for sf in sig_funcs] fut_tmp = data[np.where((dates == unique_dates[ii]) | (dates == unique_dates[ii + 1]))] fut = fut_func(tmp, fut_tmp) row = sig + [fut] result.append(row) return np.array(result)
def load_all_data(self, filenames, ret_func, min_date='20040101'): result = {} dates = [] for fname in filenames: assert 'daily_' in fname ticker = op.split(fname)[-1].split('_')[1] rec = readCSV(fname, date_name='Date', date_format='%Y%m%d') rec = np.sort(rec, 0, order=['Date']) result[ticker] = ret_func(rec, ticker) tickers, recs = zip(*result.items()) self._tickers = tickers self._result = recs_inner_join('Date', recs, postfixes=tickers) self._result = np.sort(self._result, 0, order=['Date']) self._dates = [dt.strftime('%Y%m%d') for dt in self._result['Date'] if dt.strftime('%Y%m%d') >= min_date ] print "Tickers: " print " ", ", ".join(result.keys())