def prepare_ts_file(self, start_index, end_index, case_observation_size, labeling_index, label_window): file_full_path = self.prepare_ts_file_name(start_index, end_index, case_observation_size, labeling_index, label_window) if Path(file_full_path).is_file(): logger("MODEL-DATA-PREP").debug("Using existing file: " + file_full_path) return file_full_path try: fit_data_file = open(file_full_path, 'w') fit_data_file.write( "@problemName fit_data\n@timeStamps false\n@univariate false\n@classLabel true True False\n@data\n") no_cases = 0 case_last_data_point_index = end_index while case_last_data_point_index > start_index + case_observation_size: case_data_points = self.data_points[ case_last_data_point_index - case_observation_size:case_last_data_point_index] case_filter_flags = self.data_points_filter_results[ case_last_data_point_index - case_observation_size:case_last_data_point_index] case_label = self.correct_decision_labels[case_last_data_point_index - 1][labeling_index] case_str = sktime_case_string_of(case_data_points, case_filter_flags, case_label, self.every_m_observations_for_dimension) fit_data_file.write(case_str + "\n") no_cases += 1 case_last_data_point_index -= 1 logger("MODEL-DATA-PREP").debug("No cases written: " + str(no_cases) + " -> " + file_full_path) return file_full_path finally: fit_data_file.close()
def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=4): x = self.data_points_filtered if until is not None: until_filtered = find_index_of_last_timestamp_before(x, self.data_points[until][0]) if until_filtered < 0: self.prediction_failed_in_fit = True logger("MODEL-FIT").warn("Prediction failed in fit phase") return x = self.data_points_filtered[:until_filtered] self.pred_stride = int(len(assessment_windows) * self.pred_stride) self.horizon = max_horizon logger("MODEL-FIT").debug( "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags, self.pred_stride, self.fit_intercept, self.horizon)) occurrence_times = [data_point[0] for data_point in x] y = np.array(occurrence_times) X = y.reshape(-1, 1).copy() self.pipeline = self.get_pipeline() if self.learning_method == "deep": self.pipeline.fit(X[:-1].astype(np.float32), y[:-1].astype(np.float32)) else: self.pipeline.fit(X[:-1], y[:-1])
def sktime_case_string_of(observations, observation_identified_flags, label, every_m_observations_for_dimension=None): if len(observations) == 0: return None if len(observations) != len(observation_identified_flags): return None no_dimensions = len(observations[0]) if every_m_observations_for_dimension is None: every_m_observations_for_dimension = [1 for i in range(0, no_dimensions)] elif len(every_m_observations_for_dimension) != no_dimensions: logger("MODEL-DATA-PREP").warn("Wrong number of dimensions in every_m_observations_for_dimension parameter.") every_m_observations_for_dimension = [1 for i in range(0, no_dimensions)] dimension_strings = [] for d in range(0, no_dimensions): every_nth = every_m_observations_for_dimension[d] observation_strings = [] for o_idx, o in enumerate(observations): if o_idx % every_nth != 0: observation_strings.append('?') else: if observation_identified_flags[o_idx]: observation_strings.append(str(o[d])) else: observation_strings.append('?') dimension_strings.append(','.join(observation_strings)) x_part = ':'.join(dimension_strings) return x_part + ':' + str(label)
def compute_coocurance(self, documents): """ Computes the sparse co-occurance matrix storing only the rows and and values """ rprint('counting unique tokens') V = set() for document in documents: tokens = self.preprocessor(document, to_tokens = True) V |= set(tokens) logger(f'counted {len(V):,d} unique tokens') # Vocabulary dictionary - map each token to an integer for indexing self.V = {k : v for v, k in enumerate(V)} sparse = {} N = len(documents) u = Update('computing co-occurance matrix : document', N) for n, document in enumerate(documents, 1): u.increment() for tokens in self.preprocessor(document): ntokens = len(tokens) for t, token1 in enumerate(tokens): # Center token i = self.V[token1] # Window (forwards only) window = range(t + 1, min(ntokens, t + self.window_size)) for w in window: increment = 1 / (w - t) # Token ahead token2 = tokens[w] j = self.V[token2] # Increment forwards and backwards if (i, j) in sparse: sparse[(i, j)] += increment sparse[(j, i)] += increment else: sparse[(i, j)] = increment sparse[(j, i)] = increment # Verbose updates every 1000 documents if n % 1000 == 0: u.display() # Final update if not already given if n % 1000 != 0: u.increment() u.display() rprint('converting to sparse indices and values') # Store rows and values self.r, self.c = np.array(list(sparse)).T.astype('int32') self.x = np.array(list(sparse.values())).astype('int32') logger(f'computed co-occurance matrix with {len(self.V) ** 2:,d} elements and {len(self.x):,d} interactions') self.compute_min_idx()
def load(self, path): """ Loads vocabulary and sparse co-occurance matrix """ npz = np.load(path, allow_pickle = True) self.V = npz['V'].tolist() # Vocabulary self.r = npz['r'] # Rows of non-zero co-occurances self.c = npz['c'] # Cols of non-zero co-occurances self.x = npz['x'] # Vals of non-zero co-occurances logger(f'set co-occurance matrix with {len(self.V) ** 2:,d} elements, {len(self.x):,d} interactions, and {len(self.V):,d} unique tokens') self.compute_min_idx()
def dump_vectors(self, path, **kwargs): """ Dumps the word and context weight matrices and bias vectors """ if self.x_min is not None: rprint('computing valid mask') sp = sparse.csr_matrix((self.x, (self.r, self.c))) valid = np.where(sp.max(axis = 1).A.flatten() > self.x_min)[0] np.savez(path, W = self.W, Wc = self.Wc, b = self.b, bc = self.bc, L = self.L, valid = valid) else: np.savez(path, W = self.W, Wc = self.Wc, b = self.b, bc = self.bc, L = self.L, **kwargs) logger(f'dumped vectors at "{path}"')
def does_pool_match(self, pool, new_random): start = 0 end = -1 for p in self.pools: if pool.id == p.id: end = start + int(p.share * self.random_granularity) break start += int(p.share * self.random_granularity) if end == -1: logger("random-data-generator").warn( "Generated random value did not fit in any pools!!!!") return start < new_random <= end
def extend_mine_data_by_prediction(how_many): """ Predicts new block information and appends it to the mine database table :param how_many: how many new rows to predict :return: None """ logger("prediction").info( "Predicting {0} new records and " "adding them to the mine database main table".format(how_many)) ''' Create database elements if they do not exist ''' pass
def update(): """ Updates the mine data by fetching new records from the pool web API :return: None """ logger("data_fetcher").info("Updating the data") # get last block data from database last_block_no = get_last_block_no_seen() # use slushpool api to update database result = update_with_api(last_block_no) print(result)
def decide(self, current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows): logger("STEP-PREDICTOR").debug( "current_x: {} / avg_windows: {} / assmnt_window: {} / predictions: {} / assmnt_windows: {}".format( current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows )) if horizon_predictions is None: return None window_length = TIME_10_MINUTES * assessment_window occurrences_count_in_window = 0 for p in horizon_predictions: if 0 < p - current_x[0] <= window_length: occurrences_count_in_window += 1 return occurrences_count_in_window >= self.positive_decision_occurrence_count_threshold
def decide(self, current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows): logger("SKTIME-DECIDE").debug( "current_x: {} / avg_windows: {} / assmnt_window: {} / predictions: {} / assmnt_windows: {}".format( current_x, luck_average_windows, assessment_window, horizon_predictions, assessment_windows )) if horizon_predictions is None: return None for wi, w in enumerate(assessment_windows): if w > self.horizon: break if w == assessment_window: return horizon_predictions[wi] == 'true' return None
def predict(self, luck_average_windows, assessment_windows, from_idx=None): logger("MODEL-FIT").debug( "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags, self.pred_stride, self.fit_intercept, self.horizon)) x = self.data_points strengths = self.aggregator.aggregate_lucks(x, luck_average_windows) stengths_serie = [s[1] for s in strengths] y = np.array(stengths_serie) X = y.reshape(-1, 1).copy() result = [] if self.learning_method == "deep": if from_idx is None: prediction = self.pipeline.predict(X.astype(np.float32), start_idx=len(X) - 1, to_scale=True) predictions = [] for h in assessment_windows: if h <= self.horizon: predictions.append(prediction[h - 1]) result.append(predictions) else: prediction = self.pipeline.predict(X.astype(np.float32), start_idx=from_idx, to_scale=True) for p in prediction: predictions = [] for h in assessment_windows: if h <= self.horizon: predictions.append(p[h - 1]) result.append(predictions) else: if from_idx is None: prediction = self.pipeline.predict(X, start_idx=len(X) - 1) predictions = [] for h in assessment_windows: if h <= self.horizon: predictions.append(prediction[h - 1]) result.append(predictions) else: prediction = self.pipeline.predict(X, start_idx=from_idx) for p in prediction: predictions = [] for h in assessment_windows: if h <= self.horizon: predictions.append(p[h - 1]) result.append(predictions) return result
def update_with_api(last_block_no): """ Gets blocks data from slushpool API, if last block is included in 15 blcok data, update database accordingly, otherwise the last block value to be updated using web scrapping :return: last block value to be scrapped, if none required returns 0 """ # temporary last_block_no = 641547 # using slushpool api, get data of last 15 blocks url = "https://slushpool.com/stats/json/btc/" token = get_slush_account_token() headerVar = {"X-SlushPool-Auth-Token": token} result = requests.get(url, headers=headerVar) data = result.json() # parse json data, check if last_block_no is included in data retrieved data = data["btc"] blocks = data["blocks"] isIncluded = False blockNoList = [] for key in blocks.keys(): if last_block_no == int(key): isIncluded = True blockNoList.append(int(key)) # take action based on the fact if last block No. exist in api response or not if (not isIncluded): return (min(blockNoList)) else: # add data retrieved from API to the database logger("data_fetcher").info("Updating the data from pool web API") mine_database.switch_to_temporary_copy() for key in blocks.keys(): if int(key) > last_block_no: blockData = blocks[key] dbRecord = dict() dbRecord["date_found"] = blockData["date_found"] dbRecord["duration"] = blockData["mining_duration"] dbRecord["hash_rate"] = blockData["pool_scoring_hash_rate"] dbRecord["difficulty"] = 111111111111111 # FIXME get block difficulty dbRecord["block_no"] = int(key) dbRecord["block_value"] = blockData["value"] print(dbRecord)
def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6): logger("MODEL-FIT").debug( "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format( max_horizon, str(luck_average_windows), str(assessment_windows), until, len(self.data_points))) if until is not None and (until < 0 or until >= len(self.data_points)): logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until)) return self.horizon = max_horizon for wi, w in enumerate(assessment_windows): if w > self.horizon: break # prepare data frame for sktime package temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until, self.case_observation_size, wi, w) # parse data frames from the temporary fit data file X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100") # which label is the first one? true_index = 0 if y[0] == "false": true_index = 1 new_class_weights = self.create_class_weight_dict(true_index=true_index) estimators = [] for i in range(0, len(luck_average_windows)): estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier( n_estimators=int(self.no_estimators), n_jobs=16, max_depth=self.max_depth, class_weight=new_class_weights, criterion=self.criterion, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, oob_score=self.oob_score, bootstrap=self.bootstrap), [i])) c = ColumnEnsembleClassifier(estimators=estimators) c.fit(X, y) # print(str(c.classes_)) self.classifiers.append(c)
def predict(self, luck_average_windows, assessment_windows, from_idx=None): logger("MODEL-FIT").debug( "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags, self.pred_stride, self.fit_intercept, self.horizon)) from_idx = len(self.data_points) - 1 if from_idx is None else from_idx if self.prediction_failed_in_fit: return [None for i in range(from_idx, len(self.data_points))] from_idx_on_occurrences = find_index_of_last_timestamp_before(self.data_points_filtered, self.data_points[from_idx][0]) if from_idx_on_occurrences < 0: logger("MODEL-PREDICT").warn("No block occurrence is found before the given points") return [None for i in range(from_idx, len(self.data_points))] # Find prediction on filtered data occurrence_timestamps = [data_point[0] for data_point in self.data_points_filtered] y = np.array(occurrence_timestamps) X = y.reshape(-1, 1).copy() to_scale = False if self.learning_method == "deep": X = X.astype(np.float32) to_scale = True future_points_prediction = self.pipeline.predict(X, start_idx=from_idx_on_occurrences, to_scale=to_scale) result = [] # For each requested point, check if there is any close occurrence point to use for prediction for i in range(from_idx, len(self.data_points)): data_point = self.data_points[i] last_filtered_index = find_index_of_last_timestamp_before(self.data_points_filtered, data_point[0]) if last_filtered_index < 0 or (data_point[0] - self.data_points_filtered[last_filtered_index][ 0]) > self.too_late_to_predict_time_threshold: result.append(None) else: data_point_last_prediction = future_points_prediction[last_filtered_index - from_idx_on_occurrences] prediction_age = data_point[0] - self.data_points_filtered[last_filtered_index][0] result.append([p - prediction_age for p in data_point_last_prediction]) return result
def case_algorithm(algorithm, data_handler, luck_average_windows, assessment_average_windows, pool_name, step_predictor=False): ## Scikit default_parameters = { "no_estimators": 150, "case_observation_size": 24 * TICKS_HOUR, "prediction_above_one_margin": 0, "round_to_n_decimal_points": 7, "class_weight": None, "max_depth": 3, "criterion": 'entropy', "min_samples_split": 10, "min_samples_leaf": 1, "bootstrap": True, "oob_score": True, } logger("CASES").info( "========================================================== base") case(data_handler, luck_average_windows=luck_average_windows, assessment_average_windows=assessment_average_windows, pool_name=pool_name, no_estimators=default_parameters["no_estimators"], predictor_class="scikit", case_observation_size=default_parameters["case_observation_size"], prediction_above_one_margin=default_parameters[ "prediction_above_one_margin"], round_to_n_decimal_points=default_parameters[ "round_to_n_decimal_points"], class_weight=default_parameters["class_weight"], max_depth=default_parameters["max_depth"], criterion=default_parameters["criterion"], min_samples_split=default_parameters["min_samples_split"], min_samples_leaf=default_parameters["min_samples_leaf"], bootstrap=default_parameters["bootstrap"], oob_score=default_parameters["oob_score"])
def update_pools_db_with_occurrences(self): now_timestamp = get_now_timestamp() start_timestamp = now_timestamp - self.all_time_range # if there are occurrences inserted from before, continue on top of that latest_timestamp = block_data.get_latest_pool_block_occurrence_timestamp( ) if latest_timestamp is not None: start_timestamp = latest_timestamp + self.step_size seed(int(datetime.now().timestamp())) block_no = 10000 while start_timestamp < now_timestamp: logger("random-data-generator").debug( "Block # {} processed.".format(block_no)) new_random = randint(1, self.random_granularity) matching_pool = None for p in self.pools: if self.does_pool_match(p, new_random): matching_pool = p break # Update occurrence block_data.insert_pool_block_occurrence(start_timestamp, matching_pool.id, block_no) self.pool_stats[matching_pool].add_point_and_update( start_timestamp) # Update luck tables self.update_luck_tables_after_one_step(start_timestamp) start_timestamp += self.step_size block_no += 1 # Update assessments all_block_occurrences = block_data.get_all_block_occurrences() for row in all_block_occurrences: matching_pool = None for p in self.pools: if p.id == row[1]: matching_pool = p break self.pool_assessment_stats[matching_pool].add_point_and_update( row[0]) # Update assessment tables self.update_luck_tables_after_one_step(row[0], mode="assessments")
def predict(self, luck_average_windows, assessment_windows, from_idx=None): logger("MODEL-PREDICT").debug( "horizon: {} / avg windows: {} / assmnt windows: {} / from_idx: {} / total_data_size: {}".format( self.horizon, str(luck_average_windows), str(assessment_windows), from_idx, len(self.data_points))) if from_idx is not None and (from_idx < 0 or from_idx >= len(self.data_points)): logger("MODEL-PREDICT").error("Parameter until is too large for the given data points: {}".format(from_idx)) return from_idx = len(self.data_points) - 1 if from_idx is None else from_idx y_predictions = [[] for i in range(from_idx, len(self.data_points))] for wi, w in enumerate(assessment_windows): if w > self.horizon: break # prepare data frame for sktime package temporary_data_fit_file = self.prepare_ts_file(from_idx - self.case_observation_size, len(self.data_points), self.case_observation_size, wi, w) X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100") y_prediction = self.classifiers[wi].predict(X) for pred_point_index, y_point_prediction in enumerate(y_prediction): y_predictions[pred_point_index].append(y_point_prediction) logger("MODEL-PREDICT").debug("Predictions: {}".format(y_predictions)) return y_predictions
def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6): x = self.data_points if until is not None: x = self.data_points[:until] self.pred_stride = int(len(assessment_windows) * self.pred_stride) self.horizon = max_horizon logger("MODEL-FIT").debug( "num_lags: {} / pred_stride: {} / fit_intercept: {} / horizon: {}".format(self.num_lags, self.pred_stride, self.fit_intercept, self.horizon)) strengths = self.aggregator.aggregate_lucks(x, luck_average_windows) stengths_serie = [s[1] for s in strengths] y = np.array(stengths_serie) X = y.reshape(-1, 1).copy() self.pipeline = self.get_pipeline() if self.learning_method == "deep": self.pipeline.fit(X[:-1].astype(np.float32), y[:-1].astype(np.float32)) else: self.pipeline.fit(X[:-1], y[:-1])
def __init__(self, timestamps, data_points, correct_decision_labels, no_estimators=100, filter_object=None, case_observation_size=24 * 6, every_m_observations_for_dimension=None, class_weight="balanced", max_depth=5, criterion='entropy', min_samples_split=2, min_samples_leaf=1, bootstrap=False, oob_score=False): super().__init__(data_points, no_estimators=no_estimators) """ :param correct_decision_labels: parallel list with assessment windows. List of True/False data points label lists """ if len(timestamps) != len(data_points) or len(timestamps) != len(correct_decision_labels): logger("MODEL-CREATE").error( "Failed to create predictor because of inconsistent length of timestamp/x/y lists") return self.timestamps = timestamps self.correct_decision_labels = correct_decision_labels self.filter = filter_object self.data_points_filter_results = [] self.case_observation_size = case_observation_size self.every_m_observations_for_dimension = every_m_observations_for_dimension self.class_weight = class_weight self.max_depth = max_depth self.criterion = criterion self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.bootstrap = bootstrap self.oob_score = oob_score # The following list will contain one classifier per assessment window self.classifiers = [] self.horizon = 9 * 6
def run(self): """ Executes the ticks one by one from the beginning until the end :return: """ logger_object = logger("tester") # pre tick self.Algorithm.pre_ticks(self) # run ticks for tick_index in range(len(self.r.RuntimeTicks)): self.r.current_run_tick_index = tick_index tick = self.r.RuntimeTicks[tick_index] tick.run(self) # post tick self.Algorithm.post_ticks(self) # print cost and reward logger_object.info("Cost: {0:.3f} - Reward: {1:0.3f} - R/C%: [ {3:.3f} >> {2:.3f} << {4:.3f} ]".format( self.r.total_cost, self.r.total_reward, (self.r.total_reward * 100) / self.r.total_cost, self.r.statistics.profit_min, self.r.statistics.profit_max)) return self.r.total_cost, self.r.total_reward, (self.r.total_reward * 100) / self.r.total_cost, \ self.r.statistics.profit_min, self.r.statistics.profit_max
def dump_co_occurance(self, path, **kwargs): """ Dumps the vocabulary and co-occurance matrix """ np.savez(path, V = self.V, r = self.r, c = self.c, x = self.x, **kwargs) logger(f'dumped co-occurance at "{path}"')
def fit(self, vector_size, eta = 1e-4, epochs = 100, optimiser = 'adagrad', stop = None, tau = 1e-7, **optimiser_kwargs): if isinstance(optimiser, str): optimiser = get_optimiser(optimiser) logger(f'fitting with vector size = {vector_size:,d}') r, c, x = self.r, self.c, self.x # Filter out not frequent enough co-occurances if self.x_min is not None: _r, _c, x = r[self._idx], c[self._idx], x[self._idx] ur = {r : i for i, r in enumerate(np.unique(_r))} uc = {c : i for i, c in enumerate(np.unique(_c))} r = np.array([ur[r] for r in _r]).astype('int32') c = np.array([uc[c] for c in _c]).astype('int32') # Free memory del _r, _c, ur, uc; gc.collect() # Compute max if not set, then cap values x_max = x.max() if self.x_max is None else self.x_max if self.x_max is not None: rprint('setting x_max upper bound') _x = np.minimum(x, x_max) rprint('precomputing f(X)') fx = (_x / x_max) ** self.alpha # Free memory del _x; gc.collect() else: rprint('precomputing f(X)') fx = (x / x_max) ** self.alpha rprint('precomputing log(X)') lx = np.log(x) # Free memory del x; gc.collect() np.random.seed(self.random_state) shape = len(np.unique(r)), vector_size rprint('initialising word vectors and bias vector variables') W1 = np.random.normal(scale = 0.5, size = shape).astype('float32') W2 = np.random.normal(scale = 0.5, size = shape).astype('float32') b1 = np.random.normal(scale = 0.5, size = shape[0]).astype('float32') b2 = np.random.normal(scale = 0.5, size = shape[0]).astype('float32') # As sparse matrix may have multiple entries per row, compute these entries before hand for later ease rprint('computing masks for optimisation') rmasks = {} cmasks = {} for d, masks in zip([r, c], [rmasks, cmasks]): for i, val in enumerate(d): if val not in masks: masks[val] = [] masks[val] += [i] # Free memory (masks is linked to cmasks so cannot delete it) del d; gc.collect() # Initialise optimisers (W1, W2, b) optim = [optimiser(eta = eta, **optimiser_kwargs) for _ in range(3)] logger(f'initialised variables') u = Update('optimising epoch', epochs) L = self.L = np.ones(epochs + 1) * np.inf N = fx.sum() lo = np.inf for i in range(epochs): # Early stopping condition if over the last "stop" iterations there is a total variation of less than "tau" if stop is not None and i >= stop: if (L[i - stop: i].max() / L[i - stop: i].min() - 1) <= tau: break delta = (W1[r] * W2[c]).sum(axis = 1) + b1[r] + b2[c] - lx L[i] = np.mean(fx * np.square(delta)) # Store the best if L[i] < lo: best = [W1.copy(), W2.copy(), b1.copy(), b2.copy()] lo = L[i] # Chain rule of loss function of the form L = fx * (delta ^ 2) w.r.t. delta (ignoring proportional constants) chain = (fx * delta) # Compute gradients to update W and b i.e. differentiate delta w.r.t W and b respectively # # Steps: # • Compute adjusted gradients using optimiser # • Aggregate gradients for each token (row of W) # • Update parameter # • Free space to reduce memory cost # # Do for W1 (optim[0]), W2 (optim[1]), b1 (optim[2]), b2 (optim[2]) # Gradients for b1 and b2 are similar just with different aggregation masks r and c gw1 = optim[0](np.einsum('c,cv->cv', chain, W2[c]).astype('float32')) gW1 = np.zeros_like(W1) for j, mask in rmasks.items(): gW1[j] += gw1[mask].mean(axis = 0) W1 -= gW1 del gw1, gW1; gc.collect() gw2 = optim[1](np.einsum('c,cv->cv', chain, W1[r]).astype('float32')) gW2 = np.zeros_like(W2) for j, mask in cmasks.items(): gW2[j] += gw2[mask].mean(axis = 0) W2 -= gW2 del gw2, gW2; gc.collect() # Common gradients for b1 and b2 with different aggregations gb = optim[2](chain.astype('float32')) gb1 = np.zeros_like(b1) for j, mask in rmasks.items(): gb1[j] += gb[mask].mean(axis = 0) b1 -= gb1 del gb1; gc.collect() gb2 = np.zeros_like(b2) for j, mask in cmasks.items(): gb2[j] += gb[mask].mean(axis = 0) b2 -= gb2 del chain, gb2; gc.collect() # Verbose update u.increment() u.display(loss = L[i], best = lo) else: # Enters the else statement only if the for loop completes without break i += 1 delta = (W1[r] * W2[c]).sum(axis = 1) + b1[r] + b2[c] - lx L[i] = np.sum(fx * np.square(delta)) / N if L[i] == L.min(): best = [W1.copy(), W2.copy(), b1.copy(), b2.copy()] self.W, self.Wc, self.b, self.bc = best self.L = L[:i + 1] logger(f'optimised over {i:,d} epochs (best loss = {min(L):,.3e}, final loss = {L[i]:,.3e})') return self
def optimize(): """ Finds the best values for the prediction :return: """ logger("optimizer").info("Tuning prediction parameters")
def compute_min_idx(self): if self.x_min is not None: self._idx = np.where(self.x_min <= self.x)[0] logger(f'{len(self._idx):,d} interactions above x_min = {self.x_min}')
def __init__(self): self.logger = logger("Algorithm-3-Hour")
# logger("main").info("Minimums\t|\t{0:.3f}\t{1:.3f}\t {3:.3f} >> {2:.3f} << {4:.3f}".format(*minimums)) # logger("main").info("Maximums\t|\t{0:.3f}\t{1:.3f}\t {3:.3f} >> {2:.3f} << {4:.3f}".format(*maximums)) if __name__ == "__main__": luck_average_windows = prepare_average_luck_windows() assessment_average_windows = prepare_average_assessment_windows() pools = prepare_pools() # predictor.populate_db_with_random(pools, luck_average_windows, assessment_average_windows) table_names = block_data.get_list_of_table_names(which_db="pools") print(str(table_names)) data_handler = predictor.create_data_handler(pools, luck_average_windows, assessment_average_windows) pool_names = ["SLUSHPOOL", "BTCCOM", "VIABTC"] for pool_name in pool_names[:1]: logger("RESULTS").info("Pool: {}".format(pool_name)) # Combination example # Booster # case_algorithm("booster", data_handler, luck_average_windows, assessment_average_windows, pool_name, # step_predictor=True) # Linear case_algorithm("linear", data_handler, luck_average_windows, assessment_average_windows, pool_name, step_predictor=True) # Linear # algorithm_tester.add_algorithm([ # StrengthPredictor(learning_method="linear", aggregator=Aggregator(method="strength"), # num_lags=10, pred_stride=1, fit_intercept=False,
def case(data_handler, luck_average_windows, assessment_average_windows, pool_name, cases=None, method="linear", aggr_method="strength", aggr_avg_window_idx=6, lags=5, stride=0.5, no_estimators=50, too_late_to_predict_time_threshold=1.5 * TIME_10_MINUTES, positive_decision_occurrence_count_threshold=2, decision_aggregation_method="and", predictor_class="aggregation", data_filter=None, case_observation_size=24 * 6, prediction_above_one_margin=0.5, round_to_n_decimal_points=5, class_weight=None, max_depth=5, criterion='entropy', min_samples_split=2, min_samples_leaf=1, bootstrap=False, oob_score=False): """ :param predictor_class: aggregation or step or scikit :return: """ logger("==========================================").info("") if cases is None: logger("CASE").info("{}-{}-{}-{}-{}-{}-{}".format( predictor_class, method, aggr_method, (aggr_avg_window_idx if aggr_avg_window_idx is not None else ""), lags, stride, no_estimators)) else: logger("CASE").info("-- COMBINATION --") for test_case in cases: logger("CASE").info("{}-{}-{}-{}-{}-{}-{}".format( predictor_class, test_case[0], test_case[1], (test_case[2] if test_case[2] is not None else ""), test_case[3], test_case[4], test_case[5])) logger("CASE").info("-----------------") sum_results = None no_exp_repeats = 10 for day_offset in range(no_exp_repeats, 0, -1): data_handler.set_main_configs_for_input_data_preparation( no_days_offset=(day_offset - 1) * 3) x, y = predictor.export_pool_data_points_for_training( data_handler, pool_name, round_to_n_decimal_points=round_to_n_decimal_points) data_points_filter = None if predictor_class == "step" or predictor_class == "scikit": data_points_filter = data_filter if data_points_filter is not None: data_points_filter.init(x) algorithm_tester = AlgorithmTester(luck_average_windows, assessment_average_windows, x, y) # Booster if cases is None: if predictor_class == "aggregation": algorithm_tester.add_algorithm([ StrengthPredictor(learning_method=method, aggregator=Aggregator( method=aggr_method, avg_window_idx=aggr_avg_window_idx), num_lags=lags, pred_stride=stride, fit_intercept=False, success_hardness_factor=1, no_estimators=no_estimators) ]) elif predictor_class == "step": algorithm_tester.add_algorithm([ StepPredictor(x, learning_method=method, aggregator=Aggregator( method=aggr_method, avg_window_idx=aggr_avg_window_idx), num_lags=lags, pred_stride=stride, fit_intercept=False, no_estimators=no_estimators, filter_object=data_points_filter, too_late_to_predict_time_threshold= too_late_to_predict_time_threshold, positive_decision_occurrence_count_threshold= positive_decision_occurrence_count_threshold) ]) elif predictor_class == "scikit": # prepare scikit friendly x x_without_timestamp = [] x_only_timestamp = [] for data_point in x: x_without_timestamp.append(data_point[1:]) x_only_timestamp.append(data_point[0]) # prepare classification labeling based on assessments decision_labels = [] for data_point_assessments in y: decision_labels.append([ (assessment >= 1 + prediction_above_one_margin) for assessment in data_point_assessments[1:] ]) algorithm_tester.add_algorithm([ SciKitPredictor( x_only_timestamp, x_without_timestamp, decision_labels, no_estimators=no_estimators, filter_object=data_points_filter, case_observation_size=case_observation_size, every_m_observations_for_dimension=[ get_every_nth_value_for_average_window(avg_window) for avg_window in luck_average_windows ], class_weight=class_weight, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, bootstrap=bootstrap, oob_score=oob_score) ]) else: for test_case in cases: if predictor_class == "aggregation": algorithm_tester.add_algorithm([ StrengthPredictor(learning_method=test_case[0], aggregator=Aggregator( method=test_case[1], avg_window_idx=test_case[2]), num_lags=test_case[3], pred_stride=test_case[4], fit_intercept=False, success_hardness_factor=1, no_estimators=test_case[5]) ]) elif predictor_class == "step": algorithm_tester.add_algorithm([ StepPredictor( x, learning_method=test_case[0], aggregator=Aggregator(method=test_case[1], avg_window_idx=test_case[2]), num_lags=test_case[3], pred_stride=test_case[4], fit_intercept=False, no_estimators=test_case[5], filter_object=data_points_filter, too_late_to_predict_time_threshold=test_case[6], positive_decision_occurrence_count_threshold= test_case[7]) ]) max_horizon = 1000000 results = algorithm_tester.test_algorithms( decision_aggregation_method=decision_aggregation_method, max_horizon=max_horizon, test_size=100) if sum_results is None: sum_results = results else: new_results = [] for i, w in enumerate(assessment_average_windows): if w > max_horizon: continue last_window_sum = sum_results[i] current_result = results[i] new_sum = None for last_window_sum_idx in range(0, len(last_window_sum)): if last_window_sum_idx == 0: new_sum = (last_window_sum[last_window_sum_idx] + current_result[last_window_sum_idx], ) else: new_sum = new_sum + ( last_window_sum[last_window_sum_idx] + current_result[last_window_sum_idx], ) new_results.append(new_sum) sum_results = new_results logger("RESULTS-AVG").debug( "Day offset: \t Horizon : S/T\tP/T\tRP/T\tPS/P\tPS/T\tT") for i, w in enumerate(assessment_average_windows): if w > max_horizon: continue logger("RESULTS-AVG").debug( "Day offset: {} \t Horizon {} : {:.2f}\t{:.2f}\t{}\t{:.2f}\t{:.2f}\t{}" .format(day_offset, w, sum_results[i][0] / no_exp_repeats, sum_results[i][1] / no_exp_repeats, sum_results[i][2] / no_exp_repeats, sum_results[i][3] / no_exp_repeats, sum_results[i][4] / no_exp_repeats, sum_results[i][5] / no_exp_repeats)) logger("RESULTS-AVG").info( "Day offset: \t Horizon : S/T\tP/T\tRP/T\tPS/P\tPS/T\tT") for i, w in enumerate(assessment_average_windows): if w > max_horizon: continue logger("RESULTS-AVG").info( "Horizon {} : {:.2f}\t{:.2f}\t{}\t{:.2f}\t{:.2f}\t{}".format( w, sum_results[i][0] / no_exp_repeats, sum_results[i][1] / no_exp_repeats, sum_results[i][2] / no_exp_repeats, sum_results[i][3] / no_exp_repeats, sum_results[i][4] / no_exp_repeats, sum_results[i][5] / no_exp_repeats))
def extend_mine_data_by_prediction(how_many): """ Predicts new block information and appends it to the mine database table :param how_many: how many new rows to predict :return: None """ logger("prediction").info( "Predicting {0} new records and " "adding them to the mine database main table".format(how_many)) ''' Create database elements if they do not exist ''' pass logger("prediction").info("Setting up the predictor") def populate_db_with_random(pools, luck_average_windows, assessment_average_windows): block_data.switch_to_temporary_copy(which_db="pools") data_handler = RandomPoolDataHandler(pools, luck_average_windows, assessment_average_windows) data_handler.initialize() data_handler.update_pools_db_with_occurrences() # data_handler.update_luck_tables() block_data.switch_to_main_copy(save_temporary_copy=True, remove_temporary_copy=True, which_db="pools") # block_data.print_all_pools_data()
def generate_plots(): """ Generates all plots from the mine database and saves them as PNG files :return: None """ logger("plots").info("Generating plots")