Esempio n. 1
0
def debug(message: str,
          ts: datetime = None,
          include_exception: bool = False) -> None:
    try:
        timestamp = now() if ts is None else ts.__str__()
        if include_exception and (sys.exc_info()[2] is not None):
            message += "\n" + traceback.format_exc()
        if config.file_loglevel >= 6:
            try:
                with open(logfile, "a", encoding=logfile_encoding) as lf:
                    lf.write(f"[{timestamp}] [D] {message}\n")
            except:
                pass
        if config.terminal_loglevel >= 6:
            sys.stdout.write(f"[{timestamp}] [D] {message}\n")
            sys.stdout.flush()
    except Exception as e:
        # There are some cases that we could end up having a problem writing out logged information.
        # It's once happened to me where PyCharm crashed but disconnected my running bots, causing the stdio pipes to
        # break and any stdio failed, returning an exception back to Discord for every single message the bot received.
        # This is obviously *kinda* bad, but in reality the only cause for a broken pipe or a unwritable logfile is
        # because of user error, so if the user broke the log system it's their fault. We tried, but you screwed up and
        # we have no way to tell you, so we're simply going to ignore the log messages. You're on your own without them
        # because it's your fault. Sorry.
        pass
Esempio n. 2
0
def set_url_flag_to_scraped(url_id: str, scraped_at: datetime) -> None:
    resp: Response = requests.put(
        f'http://{article_service_uri}:{port}/v1/urls/{url_id}/unscraped',
        data=json.dumps({"scraped_at": scraped_at.__str__()}),
        headers={'Content-Type': 'application/json'})
    if resp.status_code != 200:
        logging.error(f"ERROR when posting url scraped \
                flag to microservice: {resp.json()}")
Esempio n. 3
0
def get_log_filename(current_time: datetime.datetime) -> str:
    """Returns the filename to which logs should be written for this run.

    The filename will be based on the time of the run.
    """
    current_time_as_string: str = current_time.__str__()
    symbols_to_replace: Tuple[str, ...] = ('-', ':', ' ', '.')
    symbol: str
    for symbol in symbols_to_replace:
        current_time_as_string = current_time_as_string.replace(symbol, '_')

    filename: str = '{}.{}'.format(current_time_as_string, LOG_FILE_EXTENSION)
    return filename
Esempio n. 4
0
def info(message: str,
         ts: datetime = None,
         include_exception: bool = False) -> None:
    try:
        timestamp = now() if ts is None else ts.__str__()
        if include_exception and (sys.exc_info()[2] is not None):
            message += "\n" + traceback.format_exc()
        if config.file_loglevel >= 4:
            try:
                with open(logfile, "a", encoding=logfile_encoding) as lf:
                    lf.write(f"[{timestamp}] [I] {message}\n")
            except:
                pass
        if config.terminal_loglevel >= 4:
            sys.stdout.write(f"[{timestamp}] [I] {message}\n")
            sys.stdout.flush()
    except Exception as e:
        # See comment in debug() function here
        pass
    def return_data(self, ticker: str = None, start_date: datetime = None, end_date: datetime = None) -> DataFrame:
        """
        Returns the DataFrame containing the financial data for the prescribed company. This function will pull the
        data from the Yahoo API built into :ref:`pandas_datareader` if it has not been cached and will then cache the
        data, or it will read the data from the cached ``csv`` file. The cached files are named with the ticker, start
        date, and end dates that specify the API query, and exist in the ``.cache/`` folder located under the current
        working directory.

        :param ticker: ticker string for the company whose data will be retrieved
        :param start_date: start date for the data record
        :param end_date: end date for the data record
        :return: DataFrame of financial data
        """
        if start_date is None:
            start_date = self.start_date
        if end_date is None:
            end_date = self.end_date
        if ticker is None:
            ticker = self.ticker

        start_date_str = start_date.__str__().strip(ZERO_TIME)
        end_date_str = end_date.__str__().strip(ZERO_TIME)
        rel_file_path = os.path.join(".cache", "&".join([ticker,
                                                         start_date_str,
                                                         end_date_str])) + ".csv"
        if os.path.exists(os.path.join(os.getcwd(), rel_file_path)):
            try:
                data_frame = read_csv(os.path.join(os.getcwd(), rel_file_path))
                print(
                    " > Loaded data requested for {} from {} to {} from '.cache/' folder".format(ticker, start_date_str,
                                                                                         end_date_str))
                return data_frame
            except errors.ParserError:
                print("Could not load data for {} from {} to {} from .cache/ folder (although the path exists"
                      .format(ticker, start_date, end_date))
                pass

        try:
            data_frame = web.get_data_yahoo(ticker, start_date, end_date)
            print(" > Loaded data requested for {} from {} to {} from internet".format(ticker, start_date_str,
                                                                                       end_date_str))
        except requests.exceptions.SSLError:
            print("ERROR: A 'requests.exceptions.SSLError' was raised, which may be indicative of a lack of "
                  "internet connection; try again after verifying that you have a successful internet "
                  "connection.")
            raise requests.exceptions.SSLError
        except requests.exceptions.ConnectionError:
            print("ERROR: A 'requests.exceptions.ConnectionError' was raised, which may be indicative of a "
                  "lack of internet connection; try again after verifying that you have a successful "
                  "internet connection.")
            raise requests.exceptions.ConnectionError

        if self.cache_bool:
            self.cache(rel_file_path, data_frame)

        # loading the dataframe from the internet as opposed to from the csv cache results in a different handling of
        # the timestamp index, where the timestamp index is converted to a "Date" column when cached. Consequently,
        # a "Date" column needs to be inserted
        data_frame.insert(0, "Date", data_frame.index)
        data_frame.index = np.arange(0, len(data_frame), 1)

        return data_frame
Esempio n. 6
0
def datetimeToDate(x: datetime):
    return date(*[int(i) for i in x.__str__().split(' ')[0].split('-')])
    def __init__(self,
                 ticker: str,
                 lstm_hidden_size: int = 100,
                 lstm_num_layers: int = 2,
                 to_compare: [
                     str,
                 ] = None,
                 train_start_date: datetime = datetime(2017, 1, 1),
                 train_end_date: datetime = datetime(2018, 1, 1),
                 sequence_segment_length: int = 50,
                 drop_prob: float = 0.3,
                 device: str = DEVICE,
                 auto_populate: bool = True,
                 train_data_prop: float = 0.8,
                 lr: float = 1e-4,
                 train_batch_size: int = 10,
                 test_batch_size: int = 4,
                 num_workers: int = 2,
                 label_length: int = 30,
                 try_load_weights: bool = False,
                 save_state_dict: bool = True):
        r"""
        :param lstm_hidden_size: size of the lstm hidden layer
        :param lstm_num_layers: number of layers for the lstm
        :param ticker: ticker of company whose stock you want to predict
        :param to_compare: ticker of companies whose stock will be part of the features of the dataset
        :param train_start_date: date to request data from
        :param train_end_date: date to request data to
        :param sequence_segment_length: length of sequences to train the model on
        :param drop_prob: probability for dropout layers
        :param device: string for device to try sending the tensors to (i.e. "cuda")
        :param auto_populate: automatically calls all 'populate' functions in the constructor
        :param train_data_prop: proportion of data set to allocate to training data
        :param lr: learning rate for the optimizer
        :param train_batch_size: batch size for the training data
        :param test_batch_size:batch size for the testing data
        :param num_workers: parameter for Pytorch DataLoaders
        :param label_length: length of data (starting at the end of each sequence segment) to consider for the loss
        :param try_load_weights: boolean for whether the model should search for a cached model state dictionary
        :param save_state_dict: boolean for whether the model should cache its weights as a state dictionary
        """
        super(StockRNN, self).__init__()

        # variable indicating success of calling self.to(DEVICE), where 0 indicates that it hasn't been tried yet, -1
        # indicates that it failed, and 1 indicates that it was successful
        self.__togpu_works__ = 0

        # __init__ params
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_num_layers = lstm_num_layers
        self.drop_prob = drop_prob
        self.device = device
        self.ticker = ticker
        self.train_start_date = train_start_date
        self.train_end_date = train_end_date
        self.sequence_segment_length = sequence_segment_length
        self.auto_populate = auto_populate
        self.train_data_prop = train_data_prop
        self.lr = lr
        self.train_batch_size = train_batch_size
        self.test_batch_size = test_batch_size
        self.num_workers = num_workers
        self.save_state_dict = save_state_dict

        if label_length >= self.sequence_segment_length:
            print(
                "Label length was specified to be {}, but cannot be >= self.sequence_segment_length; setting "
                "self.label_length to self.sequence_segment_length - 1.")
            self.label_length = self.sequence_segment_length - 1
        else:
            self.label_length = label_length

        # company in index 0 is the company whose stock is being predicted
        self.companies = [
            Company(self.ticker, self.train_start_date, self.train_end_date)
        ]

        start_date_changes = []
        end_date_changes = []
        if to_compare is not None:
            to_compare.sort()
            for company_ticker in to_compare:
                try:
                    self.companies.append(
                        Company(company_ticker, self.train_start_date,
                                self.train_end_date))
                except KeyError:
                    print(
                        "There was a KeyError exception raised when accessing data for the ticker {}; will skip this "
                        "ticker".format(company_ticker))
                    continue
                except _libs.tslibs.np_datetime.OutOfBoundsDatetime:
                    print(
                        "There was a _libs.tslibs.np_datetime.OutOfBoundsDatetime exception raised when accessing "
                        "data for the ticker {}; will skip this ticker".format(
                            company_ticker))
                    continue
                except RemoteDataError:
                    print(
                        "There was a RemoteDataError when fetching data for ticker '{}'; will skip this ticker"
                        .format(company_ticker))
                    continue

                if self.companies[-1].start_date_changed:
                    start_date_changes.append(self.companies[-1].start_date)
                if self.companies[-1].end_date_changed:
                    end_date_changes.append(self.companies[-1].end_date)

        self.num_companies = len(self.companies)

        if len(start_date_changes
               ) != 0:  # revise the start date of all of the data if necessary
            self.train_start_date = max(start_date_changes)
            for company in self.companies:
                company.revise_start_date(self.train_start_date)
            print(
                "Data did not exist for every ticker at start date of {}; revising to the most recent starting time "
                "(common among all companies' data) of {}".format(
                    train_start_date.__str__().strip(ZERO_TIME),
                    self.train_start_date.__str__().strip(ZERO_TIME)))
        # revise the end date of all of the data
        if len(end_date_changes) != 0:
            self.train_end_date = min(end_date_changes)
            for company in self.companies:
                company.revise_end_date(self.train_end_date)
            print(
                "Data did not exist for every ticker at end date of {}; revising to the earliest ending time "
                "(common among all companies' data) of {}".format(
                    train_end_date.__str__().strip(ZERO_TIME),
                    self.train_end_date.__str__().strip(ZERO_TIME)))
        self.start_date_str = self.train_start_date.__str__().strip(ZERO_TIME)
        self.end_date_str = self.train_end_date.__str__().strip(ZERO_TIME)

        # sting that describes the parameters for this model such that files for weights can be successfully loaded
        if self.num_companies > 1:
            considering_string = "_CONSIDERING_" + "&".join(
                list(map(lambda company: company.ticker, self.companies[1:])))
        else:
            considering_string = ""
        self.identifier = "MODEL_FOR_" + self.companies[0].ticker + considering_string + \
                          "_WITH_lstm_hidden_size_{}_lstm_num_layers_{}_input_size_{}_sequence_" \
                          "segment_length_{}".format(
                              self.lstm_hidden_size,
                              self.lstm_num_layers,
                              self.num_companies,
                              self.sequence_segment_length)

        self.model_weights_path = os.path.join(os.getcwd(), ".cache",
                                               self.identifier + ".bin")

        # initialize objects used during forward pass
        self.lstm = nn.LSTM(input_size=self.num_companies,
                            hidden_size=self.lstm_hidden_size,
                            num_layers=self.lstm_num_layers,
                            dropout=self.drop_prob,
                            batch_first=True)
        self.post_lstm_dropout = nn.Dropout(p=self.drop_prob)
        self.fc_1 = nn.Linear(self.lstm_hidden_size, 10)
        self.fc_2 = nn.Linear(10, self.num_companies)
        self.tanh = nn.Tanh()
        # self.rescaler = Rescaler(-0.5, 0.5)

        # initialize attributes with placeholder arrays
        self.daily_stock_data = np.array(0)
        self.train_sample_indices = np.array(0)
        self.test_sample_indices = np.array(0)
        self.train_loader_len = 0
        self.test_loader_len = 0
        self.data_len = 0

        # initialize optimizer and loss
        self.loss = nn.MSELoss()

        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)

        if self.auto_populate:
            self.populate_daily_stock_data()
            self.populate_test_train()
            self.populate_loaders()

        if try_load_weights:
            try:
                weights = torch.load(self.model_weights_path)
                self.load_state_dict(weights)
                print("Loded weights from file")
            except FileNotFoundError:
                print(
                    "Tried loading state dict from file but could not find cached file"
                )
            except:
                print(
                    "WARNING: Could not load state dict for an unknown reason")
def datetime_convertor(date: datetime):
    if isinstance(date, datetime):
        return date.__str__()
Esempio n. 9
0
def datetime_to_str(o: datetime.datetime):
    if isinstance(o, datetime.datetime):
        return o.__str__()
Esempio n. 10
0
 def convert_dt(dt: datetime) -> str:
     return dt.__str__()