def debug(message: str, ts: datetime = None, include_exception: bool = False) -> None: try: timestamp = now() if ts is None else ts.__str__() if include_exception and (sys.exc_info()[2] is not None): message += "\n" + traceback.format_exc() if config.file_loglevel >= 6: try: with open(logfile, "a", encoding=logfile_encoding) as lf: lf.write(f"[{timestamp}] [D] {message}\n") except: pass if config.terminal_loglevel >= 6: sys.stdout.write(f"[{timestamp}] [D] {message}\n") sys.stdout.flush() except Exception as e: # There are some cases that we could end up having a problem writing out logged information. # It's once happened to me where PyCharm crashed but disconnected my running bots, causing the stdio pipes to # break and any stdio failed, returning an exception back to Discord for every single message the bot received. # This is obviously *kinda* bad, but in reality the only cause for a broken pipe or a unwritable logfile is # because of user error, so if the user broke the log system it's their fault. We tried, but you screwed up and # we have no way to tell you, so we're simply going to ignore the log messages. You're on your own without them # because it's your fault. Sorry. pass
def set_url_flag_to_scraped(url_id: str, scraped_at: datetime) -> None: resp: Response = requests.put( f'http://{article_service_uri}:{port}/v1/urls/{url_id}/unscraped', data=json.dumps({"scraped_at": scraped_at.__str__()}), headers={'Content-Type': 'application/json'}) if resp.status_code != 200: logging.error(f"ERROR when posting url scraped \ flag to microservice: {resp.json()}")
def get_log_filename(current_time: datetime.datetime) -> str: """Returns the filename to which logs should be written for this run. The filename will be based on the time of the run. """ current_time_as_string: str = current_time.__str__() symbols_to_replace: Tuple[str, ...] = ('-', ':', ' ', '.') symbol: str for symbol in symbols_to_replace: current_time_as_string = current_time_as_string.replace(symbol, '_') filename: str = '{}.{}'.format(current_time_as_string, LOG_FILE_EXTENSION) return filename
def info(message: str, ts: datetime = None, include_exception: bool = False) -> None: try: timestamp = now() if ts is None else ts.__str__() if include_exception and (sys.exc_info()[2] is not None): message += "\n" + traceback.format_exc() if config.file_loglevel >= 4: try: with open(logfile, "a", encoding=logfile_encoding) as lf: lf.write(f"[{timestamp}] [I] {message}\n") except: pass if config.terminal_loglevel >= 4: sys.stdout.write(f"[{timestamp}] [I] {message}\n") sys.stdout.flush() except Exception as e: # See comment in debug() function here pass
def return_data(self, ticker: str = None, start_date: datetime = None, end_date: datetime = None) -> DataFrame: """ Returns the DataFrame containing the financial data for the prescribed company. This function will pull the data from the Yahoo API built into :ref:`pandas_datareader` if it has not been cached and will then cache the data, or it will read the data from the cached ``csv`` file. The cached files are named with the ticker, start date, and end dates that specify the API query, and exist in the ``.cache/`` folder located under the current working directory. :param ticker: ticker string for the company whose data will be retrieved :param start_date: start date for the data record :param end_date: end date for the data record :return: DataFrame of financial data """ if start_date is None: start_date = self.start_date if end_date is None: end_date = self.end_date if ticker is None: ticker = self.ticker start_date_str = start_date.__str__().strip(ZERO_TIME) end_date_str = end_date.__str__().strip(ZERO_TIME) rel_file_path = os.path.join(".cache", "&".join([ticker, start_date_str, end_date_str])) + ".csv" if os.path.exists(os.path.join(os.getcwd(), rel_file_path)): try: data_frame = read_csv(os.path.join(os.getcwd(), rel_file_path)) print( " > Loaded data requested for {} from {} to {} from '.cache/' folder".format(ticker, start_date_str, end_date_str)) return data_frame except errors.ParserError: print("Could not load data for {} from {} to {} from .cache/ folder (although the path exists" .format(ticker, start_date, end_date)) pass try: data_frame = web.get_data_yahoo(ticker, start_date, end_date) print(" > Loaded data requested for {} from {} to {} from internet".format(ticker, start_date_str, end_date_str)) except requests.exceptions.SSLError: print("ERROR: A 'requests.exceptions.SSLError' was raised, which may be indicative of a lack of " "internet connection; try again after verifying that you have a successful internet " "connection.") raise requests.exceptions.SSLError except requests.exceptions.ConnectionError: print("ERROR: A 'requests.exceptions.ConnectionError' was raised, which may be indicative of a " "lack of internet connection; try again after verifying that you have a successful " "internet connection.") raise requests.exceptions.ConnectionError if self.cache_bool: self.cache(rel_file_path, data_frame) # loading the dataframe from the internet as opposed to from the csv cache results in a different handling of # the timestamp index, where the timestamp index is converted to a "Date" column when cached. Consequently, # a "Date" column needs to be inserted data_frame.insert(0, "Date", data_frame.index) data_frame.index = np.arange(0, len(data_frame), 1) return data_frame
def datetimeToDate(x: datetime): return date(*[int(i) for i in x.__str__().split(' ')[0].split('-')])
def __init__(self, ticker: str, lstm_hidden_size: int = 100, lstm_num_layers: int = 2, to_compare: [ str, ] = None, train_start_date: datetime = datetime(2017, 1, 1), train_end_date: datetime = datetime(2018, 1, 1), sequence_segment_length: int = 50, drop_prob: float = 0.3, device: str = DEVICE, auto_populate: bool = True, train_data_prop: float = 0.8, lr: float = 1e-4, train_batch_size: int = 10, test_batch_size: int = 4, num_workers: int = 2, label_length: int = 30, try_load_weights: bool = False, save_state_dict: bool = True): r""" :param lstm_hidden_size: size of the lstm hidden layer :param lstm_num_layers: number of layers for the lstm :param ticker: ticker of company whose stock you want to predict :param to_compare: ticker of companies whose stock will be part of the features of the dataset :param train_start_date: date to request data from :param train_end_date: date to request data to :param sequence_segment_length: length of sequences to train the model on :param drop_prob: probability for dropout layers :param device: string for device to try sending the tensors to (i.e. "cuda") :param auto_populate: automatically calls all 'populate' functions in the constructor :param train_data_prop: proportion of data set to allocate to training data :param lr: learning rate for the optimizer :param train_batch_size: batch size for the training data :param test_batch_size:batch size for the testing data :param num_workers: parameter for Pytorch DataLoaders :param label_length: length of data (starting at the end of each sequence segment) to consider for the loss :param try_load_weights: boolean for whether the model should search for a cached model state dictionary :param save_state_dict: boolean for whether the model should cache its weights as a state dictionary """ super(StockRNN, self).__init__() # variable indicating success of calling self.to(DEVICE), where 0 indicates that it hasn't been tried yet, -1 # indicates that it failed, and 1 indicates that it was successful self.__togpu_works__ = 0 # __init__ params self.lstm_hidden_size = lstm_hidden_size self.lstm_num_layers = lstm_num_layers self.drop_prob = drop_prob self.device = device self.ticker = ticker self.train_start_date = train_start_date self.train_end_date = train_end_date self.sequence_segment_length = sequence_segment_length self.auto_populate = auto_populate self.train_data_prop = train_data_prop self.lr = lr self.train_batch_size = train_batch_size self.test_batch_size = test_batch_size self.num_workers = num_workers self.save_state_dict = save_state_dict if label_length >= self.sequence_segment_length: print( "Label length was specified to be {}, but cannot be >= self.sequence_segment_length; setting " "self.label_length to self.sequence_segment_length - 1.") self.label_length = self.sequence_segment_length - 1 else: self.label_length = label_length # company in index 0 is the company whose stock is being predicted self.companies = [ Company(self.ticker, self.train_start_date, self.train_end_date) ] start_date_changes = [] end_date_changes = [] if to_compare is not None: to_compare.sort() for company_ticker in to_compare: try: self.companies.append( Company(company_ticker, self.train_start_date, self.train_end_date)) except KeyError: print( "There was a KeyError exception raised when accessing data for the ticker {}; will skip this " "ticker".format(company_ticker)) continue except _libs.tslibs.np_datetime.OutOfBoundsDatetime: print( "There was a _libs.tslibs.np_datetime.OutOfBoundsDatetime exception raised when accessing " "data for the ticker {}; will skip this ticker".format( company_ticker)) continue except RemoteDataError: print( "There was a RemoteDataError when fetching data for ticker '{}'; will skip this ticker" .format(company_ticker)) continue if self.companies[-1].start_date_changed: start_date_changes.append(self.companies[-1].start_date) if self.companies[-1].end_date_changed: end_date_changes.append(self.companies[-1].end_date) self.num_companies = len(self.companies) if len(start_date_changes ) != 0: # revise the start date of all of the data if necessary self.train_start_date = max(start_date_changes) for company in self.companies: company.revise_start_date(self.train_start_date) print( "Data did not exist for every ticker at start date of {}; revising to the most recent starting time " "(common among all companies' data) of {}".format( train_start_date.__str__().strip(ZERO_TIME), self.train_start_date.__str__().strip(ZERO_TIME))) # revise the end date of all of the data if len(end_date_changes) != 0: self.train_end_date = min(end_date_changes) for company in self.companies: company.revise_end_date(self.train_end_date) print( "Data did not exist for every ticker at end date of {}; revising to the earliest ending time " "(common among all companies' data) of {}".format( train_end_date.__str__().strip(ZERO_TIME), self.train_end_date.__str__().strip(ZERO_TIME))) self.start_date_str = self.train_start_date.__str__().strip(ZERO_TIME) self.end_date_str = self.train_end_date.__str__().strip(ZERO_TIME) # sting that describes the parameters for this model such that files for weights can be successfully loaded if self.num_companies > 1: considering_string = "_CONSIDERING_" + "&".join( list(map(lambda company: company.ticker, self.companies[1:]))) else: considering_string = "" self.identifier = "MODEL_FOR_" + self.companies[0].ticker + considering_string + \ "_WITH_lstm_hidden_size_{}_lstm_num_layers_{}_input_size_{}_sequence_" \ "segment_length_{}".format( self.lstm_hidden_size, self.lstm_num_layers, self.num_companies, self.sequence_segment_length) self.model_weights_path = os.path.join(os.getcwd(), ".cache", self.identifier + ".bin") # initialize objects used during forward pass self.lstm = nn.LSTM(input_size=self.num_companies, hidden_size=self.lstm_hidden_size, num_layers=self.lstm_num_layers, dropout=self.drop_prob, batch_first=True) self.post_lstm_dropout = nn.Dropout(p=self.drop_prob) self.fc_1 = nn.Linear(self.lstm_hidden_size, 10) self.fc_2 = nn.Linear(10, self.num_companies) self.tanh = nn.Tanh() # self.rescaler = Rescaler(-0.5, 0.5) # initialize attributes with placeholder arrays self.daily_stock_data = np.array(0) self.train_sample_indices = np.array(0) self.test_sample_indices = np.array(0) self.train_loader_len = 0 self.test_loader_len = 0 self.data_len = 0 # initialize optimizer and loss self.loss = nn.MSELoss() self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) if self.auto_populate: self.populate_daily_stock_data() self.populate_test_train() self.populate_loaders() if try_load_weights: try: weights = torch.load(self.model_weights_path) self.load_state_dict(weights) print("Loded weights from file") except FileNotFoundError: print( "Tried loading state dict from file but could not find cached file" ) except: print( "WARNING: Could not load state dict for an unknown reason")
def datetime_convertor(date: datetime): if isinstance(date, datetime): return date.__str__()
def datetime_to_str(o: datetime.datetime): if isinstance(o, datetime.datetime): return o.__str__()
def convert_dt(dt: datetime) -> str: return dt.__str__()