def __init__(self, symbol, start=None, end=None): super().__init__(symbol, start=start, end=end) ''' proper format: {symbol}_{start_date}_{end_date} ''' self.cloud = Cloud() self.set_start_and_end()
def test_getDefaulttBucketName(): c = Cloud() assert c.getDefaulttBucketName( ) == 'xttest71af37fb-e935-43cc-994e-fdf320a72059'
class MultiDay(Frequency): def __init__(self, symbol, start=None, end=None): super().__init__(symbol, start=start, end=end) ''' proper format: {symbol}_{start_date}_{end_date} ''' self.cloud = Cloud() self.set_start_and_end() def set_start_and_end(self): try: x = ((self.ValidFiles()).split('.')[0]).split('_')[-2:] self.start, self.end = x assert self.start < self.end except IndexError: print('getting new data') x = self.collectData() self.start = datetimeToDate(x.index[-1]) self.end = datetimeToDate(x.index[0]) print(self.start, self.end) start = strToDate(self.start) end = strToDate(self.end) assert start < end x.to_csv(self.fileFormat()) self.cloud.storeDataonBucket((self.fileFormat())) finally: start = strToDate(self.start) end = strToDate(self.end) assert start < end def __str__(self): return f'{self.symbol}_{type(self).__name__.lower()}_{self.start}_{self.end}' def ValidFiles(self) -> list: '''returns a list of files that matches the prefix''' return [i for i in self.cloud.get_s3_keys() if self.prefix() in i][0] def prefix(self): k = (type(self).__name__).lower() return f'{self.symbol}_{k}' def cloud_df(self): '''returns the data on the cloud in a DataFrame''' self.cloud.downloadData(self.ValidFiles(), self.ValidFiles()) return pd.read_csv(self.ValidFiles(), index_col='date') def combineCloudandAPI(self): ''' Combines data recently created with data on the cloud ''' cdf = self.cloud_df() newdf = self.collectData() temp_data = cdf.combine_first(newdf) x = self.cleanData(temp_data) #might cause issues in the future self.start = (x.index[0]).__str__().split(' ')[0] self.end = (x.index[-1]).__str__().split(' ')[0] self.properplace() return x def UpdateCloud(self) -> None: """ If The appropiate time is reached. This will update data in the buckets """ if not self.NewInterval(): raise PermissionError('not the right time') else: new_data = self.combineCloudandAPI() new_data.to_csv(self.fileFormat()) self.cloud.deleteFile(self.ValidFiles()) self.cloud.storeDataonBucket(self.fileFormat()) print('presto') @abstractmethod def NewInterval(self) -> bool: pass @abstractmethod def collectData(self) -> pd.DataFrame: pass
def __init__(self, symbol, start=None, end=None): self.symbol = symbol.upper() self.start = start self.end = end self.cloud = Cloud()
class Frequency: def __init__(self, symbol, start=None, end=None): self.symbol = symbol.upper() self.start = start self.end = end self.cloud = Cloud() def properplace(self): start = strToDate(self.start) end = strToDate(self.end) assert start < end def fileFormat(self) -> str: return f'{self}.csv' def __repr__(self): ''' returns the start and end date of a file''' return f'start:{self.start} end:{self.end}' def SaveToCloud(self) -> None: '''Saves data to s3 bucket''' self.c.storeDataonBucket(self.fileFormat()) def loadData(self) -> None: '''will attempt to retrieve data on the local machine. If unseccessful, it will search the cloud ''' try: return pd.read_csv(self.fileFormat(), index_col='date') except FileNotFoundError: self.cloud.downloadData(self.fileFormat(), self.fileFormat()) return pd.read_csv(self.fileFormat(), index_col='date') else: raise FileNotFoundError def saveDataLocally(self, df: pd.DataFrame) -> None: '''saves data to Local machine''' fileform = self.fileFormat() if path.isfile(fileform): print(f"reading data from {fileform}") read_df = pd.read_csv(fileform, index_col='date') temp_df = read_df.combine_first(df) temp_df.to_csv(fileform) self.cleanData() else: print(f"Creating {fileform}....") df.to_csv(fileform) def cleanData(self, df=None) -> pd.DataFrame: '''Removes duplicate data''' if df is None: try: x = pd.read_csv(self.fileFormat(), index_col='date') duplicates = x.index.duplicated() keep = duplicates == False clean_data = x.loc[keep, :] return clean_data except FileExistsError: print("file doesn't exist") else: try: x = pd.DataFrame(df) print(x) duplicates = x.index.duplicated() keep = duplicates == False clean_data = x.loc[keep, :] return clean_data except FileExistsError: print("file doesn't exist") def UpdateCloud(self): if self.NewInterval: self.cloud_Data() @abstractmethod def NewInterval(self): pass @abstractmethod def collectData(self) -> pd.DataFrame: pass @abstractmethod def prefix(self): pass @abstractmethod def ValidFiles(self): pass
def __init__(self, symbol, interval, end=date.today()): super().__init__(symbol, end=end) self.cloud = Cloud() if interval not in [1, 5, 10, 15, 30, 60]: raise ValueError(f"valid intervals are{[1,5,10,15,30,60]}") self.interval = interval
class IntraDay(Frequency): def __init__(self, symbol, interval, end=date.today()): super().__init__(symbol, end=end) self.cloud = Cloud() if interval not in [1, 5, 10, 15, 30, 60]: raise ValueError(f"valid intervals are{[1,5,10,15,30,60]}") self.interval = interval def ValidFiles(self): return [i for i in self.cloud.get_s3_keys() if self.prefix() in i] def __repr__(self): return f'end:{self.end}' def __str__(self): return f'{self.symbol}_{self.interval}min_{self.end}' def prefix(self): return "_".join((str(self).split('_')[:2])) def collectData(self): week_day = date(*[int(i) for i in str(self.end).split('-')]).weekday() #This means that the market is open start = '9:31:00' date_form_start = f'{self.end} {start}' ts = TimeSeries(key=key, output_format='pandas') data, metadata = ts.get_intraday(symbol=self.symbol, interval=f'{self.interval}min', outputsize='full') date_form_end = data.head(1).index[0].__str__() print([date_form_start, date_form_end]) return data.loc[self.end.__str__()] def collectDataToCloud(self): week_day = date(*[int(i) for i in str(self.end).split('-')]).weekday() #This means that the market is open ts = TimeSeries(key=key, output_format='pandas') data, metadata = ts.get_intraday(symbol=self.symbol, interval=f'{self.interval}min', outputsize='full') while len(data) != 0: date_form_end = data.head(1).index[0].__str__() d = date_form_end.__str__().split(' ')[0] k = data.loc[d] data = data[~data.isin(k)].dropna() print(k) k.to_csv(IntraDay(self.symbol, self.interval, d).fileFormat()) x = [ i for i in listdir() if self.prefix() in i and i not in self.ValidFiles() ] [self.cloud.storeDataonBucket(i) for i in x] # print( [ i in listdir() if self.prefix() ] ) async def dataStream(self): if isWeekday() and markethours(): flag = True while markethours(): x = self.collectData() self.saveDataLocally(x) await asyncio.sleep(self.interval * 60) if len(x) == 0: flag == False raise "Something is off" self.cloud.storeDataonBucket(self.fileFormat()) else: print('not now') raise InterruptedError def UpdateCloud(self) -> None: if not self.NewInterval(): raise PermissionError('not the right time') else: self.collectDataToCloud()