def split_datsets(dataset, training_number, per_class): # this scenario : dataset, 10, 9, 19 train_set = [] test_set = [] y_train = [] y_test = [] for i in range(len(dataset)): if mod(i, per_class) < training_number: train_set.append(dataset[i]) y_train.append(ceil(i / per_class)) else: test_set.append(dataset[i]) y_test.append(ceil(i / per_class)) return (train_set, test_set, y_train, y_test)
def showDataFigure(dataArr, *args): m, n = shape(dataArr) print m, n example_width = round(sqrt(n)) print "特征数目:", n print "example_width:", example_width # 20 example_height = n / example_width # 20 display_rows = floor(sqrt(m)) # 10 display_cols = ceil(m / display_rows) # 10 pad = 1 display_array = -ones((pad + display_rows * (example_height + pad), pad + display_cols * (example_width + pad))) print "display_array 的维度为:", shape(display_array) curr_ex = 0; for j in range(0, int(display_rows)): for i in range(0, int(display_cols)): # for j in range(0, int(1)): #for i in range(0, int(1)): if curr_ex >= m: break max_val = max(abs(display_array[curr_ex, :])) #print "##################maxval",max_val display_array[ int(pad + j * (example_height + pad)): int(pad + j * (example_height + pad)) + int(example_height), int(pad + i * (example_width + pad)):int(pad + i * (example_width + pad)) + int(example_width)] = \ reshape(dataArr[curr_ex, :], (example_height, example_width)) / max_val curr_ex = curr_ex + 1; if curr_ex >= m: break; plt.imshow(display_array.T, CM.gray) # 类似matlib中的imagesc # scaledimage.scaledimage(display_array) plt.show()
def to_rounded_int(self, special: bool = False) -> BBox: """Rounds BBox object to have integer coordinates. Keyword Arguments: special {bool} -- [Round xmin and ymin down using floor, and round xmax and ymax using ceil.] (default: {False}) Returns: BBox -- [description] """ if not special: return BBox(xmin=round(self.xmin), ymin=round(self.ymin), xmax=round(self.xmax), ymax=round(self.ymax)) else: return BBox(xmin=floor(self.xmin), ymin=floor(self.ymin), xmax=ceil(self.xmax), ymax=ceil(self.ymax))
def on_bar(self, bar: BarData): """ 1、 计算价格相对初始位置的距离, 距离 = 格距 * 格数(总格数 当前价格为中心点) 2、 计算当前价格到结束位的距离 price_change = 指定的最高位 - Decimal(str(bar.close_price)) """ self.cancel_all() # 全撤之前的委托 # 计算网格交易信号 if not self.pos: # 以当前价格为中心轴,布一个指标大小的网,计算出启始位! self.grid_start = bar.close_price * ( 1 + int(self.grid_total / 2) * self.grid_distance / 100) self.price_change = bar.close_price - self.grid_start self.current_grid = self.price_change * ( self.grid_distance / 100) # 最大变动距离 除以 每格的变动价格(百分比) 总格数 current_volume = self.grid_volume / bar.close_price # 计算每份USDT可以买入多少币 self.max_target = ceil( -self.current_grid) * current_volume # 总量(最大) = 总格数 X 每格购买量 self.min_target = floor( -self.current_grid) * current_volume # 总量(最小) = 总格数 X 每格购买量 self.max_target = float( format(self.max_target, f".{self.len_tick_decimal}f")) self.min_target = float( format(self.min_target, f".{self.len_tick_decimal}f")) self.write_log(f"price_change:{self.price_change}") self.write_log(f"current_grid:{self.current_grid}") self.write_log(f"max:{self.max_target}") self.write_log(f"min:{self.min_target}") self.write_log(f"current_volume:{current_volume}") self.pos_grid = float(format(self.pos, f".{self.len_tick_decimal}f")) # 做多,检查最小持仓,和当前持仓的差值 long_volume = self.min_target - self.pos if long_volume > 0: long_price = bar.close_price + (self.price_tick * self.pay_up) if self.pos_grid >= 0: self.write_log( f"买入:{self.vt_symbol},价格:{long_price},数量:{long_volume}") self.buy(long_price, long_volume) short_volume = self.max_target - self.pos if short_volume < 0: short_price = bar.close_price - (self.price_tick * self.pay_up) if self.pos_grid > 0: self.sell(short_price, abs(short_volume)) self.write_log( f"卖出:{self.vt_symbol},价格:{short_price},数量:{short_volume}") self.put_event()
def get_platform_node_requirements(task_count): """ Generates the number of tasks, nodes and tasks per node for each platform as required by the header for that platform :param task_count: The number of cores/threads/tasks the job will take :return: A dictionary containing the values to be used in the header """ if platform == Platforms.NESI or platform == Platforms.LOCAL: return {"n_tasks": task_count} elif platform == Platforms.TACC: return { "n_tasks": task_count, "n_nodes": int(ceil(task_count / qconfig["cores_per_node"])), } elif platform == Platforms.KISTI: n_nodes = int(ceil(task_count / qconfig["cores_per_node"])) return { "n_nodes": n_nodes, "n_tasks_per_node": qconfig["cores_per_node"] } raise NotImplementedError( f"The platform {platform} does not have related node requirements")
def get_shift_range(sample_rate: int, max_length: int): """ :param max_length: wav file data arrays length :param sample_rate: wav file sampling rate :return: shift range based on humans hearing frequency range (20Hz - 20kHz) """ minimal_shift = int(ceil(sample_rate / MAX_FREQUENCY)) maximum_shift = int(floor(sample_rate / MIN_FREQUENCY)) if maximum_shift > max_length: maximum_shift = max_length return minimal_shift, maximum_shift
def _curve_mask(size: int): """Return curved mask for random matrix""" mask = np.zeros((size, size)) prev = 0 for ind in range(size): _y = size - int(round(float(sqrt(pow(size, 2) - pow(ind, 2))))) mask[ind, int(ceil(_y / 2))] = 1 mask[int(ceil(_y / 2)), ind] = 1 mask[ind, _y] = 1 mask[_y, ind] = 1 for _i in range(prev, _y): mask[ind, _i] = 1 mask[_i, ind] = 1 mask[ind, int(ceil(_i / 2))] = 1 mask[int(ceil(_i / 2)), ind] = 1 if ind == size - 1: for _i in range(prev, size): mask[ind, _i] = 1 mask[_i, ind] = 1 prev = _y mask[ind, ind] = 1 return mask
def extrapolate_days2ctrl(df, y1, y2): ''' As we can see the the days_2_control field has many 0 values. These values are missing because cont_doy is missing. But these values can be predicted based on certain assumptions about the wildfire. Over the years US wildfire mitigation have definitely improved the equipment that has been used to subdue the wildfire. But within a given year we can assume that the time taken to contain a wildfire is directly proportional to size of the Fire. With this assumption we can fit a linear regression model to fit a curve for existing values of FIRE_SIZE vs DAYS_2_CONTROL. And then use the learned model to extrapolate and guess the DAYS_2_CONTROL for missing rows. This function performs this operation and fills up the rows where DAYS_2_CONTROL is 0 :param df: data frame with DAYS_2_CONTROL field :return df: Updated df where all the values of DAYS_2_CONTROL are non-zero. ''' if y1 + 5 != y2: assert False; df2use = df; df2use = df2use[df2use['FIRE_YEAR'] > y1]; # Filter records > y1 df2use = df2use[df2use['FIRE_YEAR'] <= y2]; # Filter records <= y1 fitdf = df2use[df2use['DAYS_2_CONTROL'] != 0]; # Filter out data for modeling preddf = df2use[df2use['DAYS_2_CONTROL'] == 0]; # Filter the data for predicting based on model # Get the mode for DAYS_2_CONTROL fitdf = fitdf[["FIRE_SIZE", "DAYS_2_CONTROL"]].groupby("FIRE_SIZE").agg(lambda x: x.value_counts().index[0]). \ sort_values(by="FIRE_SIZE", ascending=False); X = fitdf.index.values.tolist(); Y = fitdf['DAYS_2_CONTROL'].tolist(); z = np.polyfit(X, Y, 2); # Fit a n-degree polynomial f = np.poly1d(z); # Model handle X_2predict = np.array(list(set(preddf['FIRE_SIZE'].tolist()))); Y_predicted = f(X_2predict); temp_dict = dict(zip(X_2predict, Y_predicted)); for index, row in preddf.iterrows(): oid = row['OBJECTID']; fsz = row['FIRE_SIZE']; if df.DAYS_2_CONTROL.iloc[int(oid) - 1] != 0: assert False; else: df.DAYS_2_CONTROL.iloc[int(oid) - 1] = ceil(temp_dict[fsz]); # # plt.legend(); # plt.plot(X, Y, label=str(year)); # plt.xlabel("Days2Control"); # plt.ylabel("Fire_size"); # plt.title("Trend for year - " + str(year)); # plt.savefig("./year_trends/" + str(year)+ "_" + s +"_trend.png", facecolor="grey"); # plt.close(); return df
def summarise(self): """ Performs summation of articles; Implemented using Kmeans for clustering. :return: List of Strings for summary """ avg = [] total_words = self.calculate_words() print('Splitting into sentences...'), self.sentence_tokenize() print('done') if self.is_language_english(): print('Encode sentences...'), encoded_article = self.encode_article() print('done') print("Clustering"), # Get user defined number of clusters using compression rate. number_of_clusters = int(ceil( len(encoded_article[0]) ** self.compression_rate)) k_means_clusters = KMeans(n_clusters=number_of_clusters, random_state=0).fit( encoded_article[0]) # Get the clusters of the article. for i in range(number_of_clusters): index = where(k_means_clusters.labels_ == i)[0] avg.append(numpy.mean(index)) # Find centroid from mean of points. closest, temp = pairwise_distances_argmin_min(k_means_clusters.cluster_centers_, encoded_article[0]) # Assign labels based on closest center. #self.plot_article_clusters(k_means_clusters.cluster_centers_, closest) # Plotting article clusters for testing. ordering = sorted(range(number_of_clusters), key=lambda cluster: avg[cluster]) # sort clusters by their average value. print ("done") # Join selected clusters in order. self.article[0] = ' '.join([self.article[0][closest[index]] for index in ordering]) [x.encode('utf-8') for x in self.article] # encode article in utf-8 (removes ascii tokens) for formatting. new_total_words = self.calculate_words() return x, total_words, abs(new_total_words-total_words), self.error_dict else: return "Article summation failed", 0, 0, self.error_dict
def make_intervals(self, start_intense, final_intense, interval, duration, step_length, files_count, init_delay=0): try: intervals_per_step = int(ceil(float(step_length) / interval)) step_count = int(duration / step_length) interval = int(round(float(step_length) / intervals_per_step * 60)) start_intense_per_step = round(start_intense / (60.0 / step_length)) final_intense_per_step = round(final_intense / (60.0 / step_length)) step_intense = (final_intense_per_step - start_intense_per_step) / (step_count - 1 if step_count > 1 else 1) intenses = [int(round(start_intense_per_step + i * step_intense)) for i in range(0, step_count)] steps = [i * files_count for i in intenses] out = [] for s in steps: taill = 0 for i in range(0, intervals_per_step): new_val = int(round(((float(s) / intervals_per_step) + taill) / 2.) * 2) taill += float(s) / intervals_per_step - new_val out.append(new_val) return [sum(out) / files_count] + [[interval if n > 0 else 0, o] for n, o in enumerate(out)] except Exception as e: return e
except getopt.GetoptError: print('breakdown.py -id testid') sys.exit(2) for opt, opt_value in opts: if opt in ('-h', '--help'): print("[*] Help info") exit() elif opt == '-i': print('Test ID:', opt_value) id = (int)(opt_value) x_values = [ 'NPJ', 'PRJ', 'MWAY', 'MPASS', '', 'SHJ$^{JM}$', 'SHJ$^{JB}$', 'PMJ$^{JM}$', 'PMJ$^{JB}$' ] # join time is getting from total - others. y_values, max_value = ReadFile(id) # y_norm_values = normalize(y_values) # break into 4 parts legend_labels = [ 'wait', 'partition', 'build/sort', 'merge', 'probe', 'others' ] # DrawFigure(x_values, y_values, double(ceil(max_value / 1000.0)) * 1000, legend_labels, '', 'cycles per input', 'breakdown_figure{}'.format(id), id, False) #DrawLegend(legend_labels, 'breakdown_legend')
def _get_quadratic_mask(size: int) -> np.matrix: """Return mask for triangle test matrix.""" mask = np.zeros((size, size)) offset = int(floor( size / 150)) # add extra diagonal for each 150elements grown size # lets add some randomly distributed diagonals extra_diagonals = [x for x in range(1, offset) if random.randint(0, 1)] for ind in range(size): mask[ind, ind] = 1 mask[ind, int(ceil(ind / 2))] = 1 mask[int(ceil(ind / 2)), ind] = 1 mask[ind, int(ceil(ind / 4))] = 1 mask[int(ceil(ind / 4)), ind] = 1 # will add extra diagonals randomly for add in extra_diagonals: if ind + add < size: mask[ind + add, ind] = 1 mask[ind + add, int(ceil(ind / 2))] = 1 mask[int(ceil(ind / 2)), ind + add] = 1 mask[ind + add, int(ceil(ind / 4))] = 1 mask[int(ceil(ind / 4)), ind + add] = 1 if ind - add >= 0: mask[ind - add, ind] = 1 mask[ind - add, int(ceil(ind / 2))] = 1 mask[int(ceil(ind / 2)), ind - add] = 1 mask[ind - add, int(ceil(ind / 4))] = 1 mask[int(ceil(ind / 4)), ind - add] = 1 for splitter in range(int(np.sqrt(size))): matrix_splitter = ceil(float(pow(2, splitter + 1))) split_value = size - int(size / matrix_splitter) for ind in range(split_value, size): for add in [0] + extra_diagonals: if split_value + add < size: mask[ind, split_value + add] = 1 mask[split_value + add, ind] = 1 return mask
def postive(x): if int(ceil(x)) >= 1: return 1 else: return 0