def fetch(self): session = Connection.get_instance().get_session() req = Request("http://rates.fxcm.com/RatesXML", headers={'User-Agent': "Magic Browser"}) file = urlopen(req) data = file.read() file.close() data = xmltodict.parse(data) rates: OrderedDict = data.get('Rates').get('Rate') # TODO probably shouldn't save if last quote datetime hasn't changed for rate in rates: symbol = rate.get('@Symbol') #TODO get from saved list of trained symbols if len(symbol) == 6: dt = datetime.datetime.now() time = datetime.datetime.strptime(rate.get('Last'), '%H:%M:%S') # For now we're saving in local timezone TODO does it make a difference? # timezone = tz.gettz('America/New_York') # dt.replace(tzinfo=timezone) # time = time.replace(tzinfo=timezone) dt = dt.replace(hour=time.hour, minute=time.minute, second=time.second) quote = PriceQuote(symbol, dt, rate.get('High'), rate.get('Low')) session.add(quote) session.commit()
def run(): #TODO event subscriber should be auto initialized with every command (use framework?) # CalendarEventSubscriber.get_instance() _to = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) session = Connection.get_instance().get_session() signals = aliased(Signal) #TODO add some optional arg to fetch after some date # entries_to_update = session.query(CalendarEntry)\ # .outerjoin(signals, CalendarEntry.signals)\ # .filter(signals.id == None)\ # .filter(CalendarEntry.datetime <= _to)\ # .order_by(CalendarEntry.datetime.asc()).all() #TODO protect form duplicates # Run only if any news can be updated # if len(entries_to_update) is 0 or est_to_utc(entries_to_update[0].datetime) <= _to: # scrapper = NewsScrapper() # scrapper.run(entries_to_update[0].datetime) scrapper = NewsScrapper() scrapper.run(datetime.datetime.strptime('Jan 29 2018 01:00AM', '%b %d %Y %I:%M%p'), datetime.datetime.now(), to_file=True)
def fetch_to_db(self, _from: datetime.datetime, _to: datetime.datetime, gran: str, symbol: str): instr = symbol[:3] + '_' + symbol[-3:] params = { "granularity": gran, "from": _from.strftime(self.date_format_in), "to": _to.strftime(self.date_format_in) } session = Connection.get_instance().get_session() existing_quotes = session.query(PriceQuote) \ .filter_by(symbol=symbol) \ .filter(PriceQuote.datetime >= (_from - datetime.timedelta(minutes=1)))\ .filter(PriceQuote.datetime <= _to).all() existing_quote_dts = list( map(lambda _quote: _quote.datetime.strftime(self.date_format_out), existing_quotes)) try: for r in InstrumentsCandlesFactory(instrument=instr, params=params): print("REQUEST: {} {} {}".format(r, r.__class__.__name__, r.params)) rv = self.client.request(r) for candle in r.response.get('candles'): dt = candle.get('time')[0:19] print(candle) if candle['complete'] and dt not in existing_quote_dts: quote = PriceQuote( symbol, datetime.datetime.strptime(dt, self.date_format_out), candle['mid']['h'], candle['mid']['l'], candle['volume']) existing_quote_dts.append(dt) session.add(quote) session.commit() except SQLAlchemyError as e: session.rollback() print(e) except Exception as e: print(e)
def check(self, calendar_entry: CalendarEntry): session = Connection.get_instance().get_session() # 64 hours should contain sufficient margin quotes_since = calendar_entry.datetime - datetime.timedelta(hours=64) if self.__all_currency_pairs is None: self.__all_currency_pairs = PreProcessedDataProvider.get_currency_pair_strings( ) currency_pairs = list( filter( lambda currency_pair: calendar_entry.currency in currency_pair, self.__all_currency_pairs)) for symbol in currency_pairs: quotes = session.query(PriceQuote)\ .filter_by(symbol=symbol)\ .filter(PriceQuote.datetime >= quotes_since).all() if len(quotes) > 0: data_set = self.data_set_provider.prepare_single_data_set( calendar_entry, quotes, symbol) if data_set.shape[0] == 0: continue prediction = self.nn.predict(data_set[0]) # print(data_set[0]) if math.isnan(prediction): # TODO log # print('CalendarEntry', calendar_entry.id) # print('symbol', symbol) continue else: print('symbol', symbol) print('datetime', calendar_entry.datetime) print('prediction', prediction) existing_signal = session.query(Signal)\ .filter_by(symbol=symbol, calendar_entry=calendar_entry)\ .first() if existing_signal is None and prediction: signal = Signal(prediction, symbol, calendar_entry) session.add(signal) session.commit()
def run(): data_set_provider = DataSetProvider() data_visualizer = DataVisualizer() currency_pair = 'EURUSD' # currency_pair = 'EURCAD' # currency_pair = 'AUDJPY' curr_1 = currency_pair[:3] curr_2 = currency_pair[-3:] session = Connection.get_instance().get_session() now = datetime.datetime.now() date_from = now - datetime.timedelta(days=365) date_to = now entries_and_signals = session.query(CalendarEntry, Signal) \ .join(CalendarEntry.signals)\ .filter(Signal.symbol == currency_pair)\ .filter(CalendarEntry.datetime >= date_from)\ .filter(CalendarEntry.datetime <= date_to) \ .filter(or_(CalendarEntry.currency == curr_1, CalendarEntry.currency == curr_2))\ .all() entries_to_show = list(map(lambda x: x[0], entries_and_signals)) signals_to_show = list(map(lambda x: x[1], entries_and_signals)) prices_to_show = session.query(PriceQuote) \ .filter(PriceQuote.datetime >= date_from)\ .filter(PriceQuote.datetime <= date_to)\ .filter(PriceQuote.symbol == currency_pair)\ .all() prices, news = data_set_provider.data_frames_from_records( entries_to_show, prices_to_show, currency_pair) # prices = prices.fillna(0) labels = list(map(lambda signal: signal.value, signals_to_show)) data_visualizer.visualize(prices, news, labels, currency_pair + '_latest')
from app.database.Connection import Connection from sqlalchemy_utils import database_exists, create_database from app.model.PriceQuote import PriceQuote from app.model.CalendarEntry import CalendarEntry from app.model.Signal import Signal engine_url = Connection.get_instance().get_engine_url() engine = Connection.get_instance().get_engine() if not database_exists(engine_url): create_database(engine_url) PriceQuote.metadata.create_all(engine) CalendarEntry.metadata.create_all(engine) Signal.metadata.create_all(engine)
def getEconomicCalendar(self, start_date, end_date, to_file=False): startlink, endlink = self.__date_to_link( start_date), self.__date_to_link(end_date) session = Connection.get_instance().get_session() print(startlink) # write to console current status logging.info("Scraping data for link: {}".format(startlink)) # get the page and make the soup baseURL = "https://www.forexfactory.com/" r = requests.get(baseURL + startlink) data = r.text soup = BeautifulSoup(data, "lxml") # get and parse table data, ignoring details and graph table = soup.find("table", class_="calendar__table") # do not use the ".calendar__row--grey" css selector (reserved for historical data) trs = table.select("tr.calendar__row.calendar_row") fields = [ "date", "time", "currency", "impact", "event", "actual", "forecast", "previous" ] # some rows do not have a date (cells merged) curr_year = startlink[-4:] curr_date = "" curr_time = "" for tr in trs: # fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending # in that case we append to errors.csv the date time where the error is try: for field in fields: data = tr.select( "td.calendar__cell.calendar__{}.{}".format( field, field))[0] # print(data) if field == "date" and data.text.strip() != "": curr_date = data.text.strip() elif field == "time" and data.text.strip() != "": # time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings) if data.text.strip().find("Day") != -1: curr_time = "12:00am" else: curr_time = data.text.strip() elif field == "currency": currency = data.text.strip() elif field == "impact": # when impact says "Non-Economic" on mouseover, the relevant # class name is "Holiday", thus we do not use the classname impact = data.find("span")["title"] elif field == "event": event = data.text.strip() elif field == "actual": actual = data.text.strip() elif field == "forecast": forecast = data.text.strip() elif field == "previous": previous = data.text.strip() dt = datetime.datetime.strptime( ",".join([curr_year, curr_date, curr_time]), "%Y,%a%b %d,%I:%M%p") if dt > end_date: return if to_file and actual and dt >= start_date: rec = '{dt};"{symbol}";"{title}";"{actual}";"{forecast}";"{previous}"'.format( dt=dt.strftime(self.dt_format_out), symbol=currency, title=event, actual=actual, forecast=forecast, previous=previous, ) print(rec) with open(self.news_file_path, 'a') as news_file: news_file.write("\n" + rec) #TODO save only news that have titles konwn by nn calendar_entry = session.query(CalendarEntry).filter_by( currency=currency, datetime=dt, title=event).first() if calendar_entry is None and previous != '': calendar_entry = CalendarEntry(currency, dt, event, actual, forecast, previous) session.add(calendar_entry) elif actual != '' and len(calendar_entry.signals) == 0: calendar_entry.actual = actual calendar_entry.updated_at = datetime.datetime.now() calendar_event = CalendarEntryUpdatedEvent(calendar_entry) zope.event.notify(calendar_event) except Exception as e: with open( os.path.join(os.path.abspath(os.getcwd()), 'output', 'news-scrapper-errors.csv'), "a") as f: csv.writer(f).writerow( [curr_year, curr_date, curr_time, str(e)]) session.commit() # exit recursion when last available link has reached if startlink == endlink: logging.info("Successfully retrieved data") return # get the link for the next week and follow # follow = soup.select("a.calendar__pagination.calendar__pagination--next.next") # follow = follow[0]["href"] self.getEconomicCalendar(start_date + datetime.timedelta(days=7), end_date, to_file=to_file)