def drill(self, gid): response = requests.get(TARGET) if not response.ok: raise RuntimeError(response.json()) load_time = time.time() messages = response.json()['messages'] parsed = map( lambda msg: StockTwitMessage( id=hruuid(), gid=gid, timestamp=load_time, body=msg['body'], create_date=msg['created_at'], username=msg['user']['username'], join_date=msg['user']['join_date'], official=msg['user']['official'], followers=msg['user']['followers'], following=msg['user']['following'], ideas=msg['user']['ideas'], likes=msg.get('likes', {}).get('total'), symbols=list({s['symbol'] for s in msg['symbols']}), sentiment=(msg['entities'].get('sentiment') or {}).get( 'basic')), messages) return list(parsed)
def drill(self, gid): response = scraper.get(TARGET.format(subreddit=self.name)) if not response.ok: raise RuntimeError(response.json()) load_time = time.time() messages = map(lambda child: child['data'], response.json()['data']['children']) parsed = map( lambda msg: RedditPostSchema( id=hruuid(), gid=gid, timestamp=load_time, subreddit=self.name, author=msg['author'], title=msg['title'], flairs= [_['t'] for _ in msg['link_flair_richtext'] if 't' in _], ups=msg['ups'], downs=msg['downs'], awards=[*{_['name'] for _ in msg['all_awardings']}], created=msg['created_utc'], url=msg['url'], comments=msg['num_comments']), messages) return list(parsed)
def drill(self, gid): table = self.pull_table() pull_time = time.time() schemas = map(lambda t: NyseWeeklyOptionsSchema( id=hruuid(), gid=gid, timestamp=pull_time, ticker=t ), table['ticker']) return list(schemas)
def drill(self): response = requests.get(TARGET) if not response.ok: raise RuntimeError(response.json()) symbols = set() load_time = time.time() messages = response.json()['messages'] for msg in messages: symbols |= {s['symbol'] for s in msg['symbols']} parsed = map( lambda symbol: StockTwitTrendingSymbol( id=hruuid(), timestamp=load_time, symbol=symbol), symbols) return list(parsed)
def run(self) -> List[str]: DEFAULT_LOGGER.info(f"Running {self.index} ({self.name})") urls = [] gid = hruuid() schemas = self.drill(gid) self.cache(schemas) DEFAULT_LOGGER.info(f"Using index: {self.index}") response = requests.put(f'{ELASTIC_SEARCH_URL}/{self.index}') if not response.ok and 'already exists' not in response.json()['error']['root_cause'][0]['reason']: raise RuntimeError(response.json()) for schema in schemas: url = self.upload(schema) urls.append(url) DEFAULT_LOGGER.info(f'Uploaded {len(urls)} data points as {gid}') self.meta = {'gid': gid} return urls
def drill(self): if self.name is None: raise RuntimeError("No ticker specified") base = f'https://query1.finance.yahoo.com/v7/finance/download/{self.name}' query = '&'.join( f'{k}={v}' for k, v in { 'period1': 0, # Start 'period2': int(time.time()), # End 'interval': '1d', # interval of data 'events': 'history', # static 'includeAdjustedClose': True # static }.items()) target = f'{base}?{query}' # Get and parse data load_time = datetime.now() data: pd.DataFrame = pd.read_csv(target, parse_dates=['Date']) data.dropna(inplace=True) data['Volume'] = data['Volume'].astype(int) last_pull = self.meta.get(self.name, 0) self.meta = {self.name: int(data['Date'].max().strftime('%s'))} new_data = data[data['Date'] > datetime.fromtimestamp(last_pull)] # Convert to List[YahooDailyTickerSchema] rows = map(lambda t: t[1], new_data.iterrows()) ticker_data = map( lambda row: YahooDailyTickerSchema( id=hruuid(), timestamp=load_time, ticker=self.name, **{ key.lower().replace(' ', '_'): value for key, value in dict(row).items() }), rows) return list(ticker_data)
def drill(self, gid): ticker_data = [] for date in self.get_options_expiration_dates(): table = self.process_yahoo_table(date) table.drop(['change', 'pct_change'], axis=1, inplace=True) load_time = time.time() # Convert to List[YahooOptionsTickerSchema] rows = map(lambda t: t[1], table.iterrows()) ticker_data.extend( map( lambda row: YahooOptionsTickerSchema( id=hruuid(), gid=gid, timestamp=load_time, ticker=self.name, expires=date, **{ key.lower().replace(' ', '_'): value for key, value in dict(row).items() }), rows)) return ticker_data