def parse_config_file(path): """Parse INI file containing configuration details. """ # Parse options file parser = SafeConfigParser(defaults=DEFAULTS) with open(path, 'r') as fp: parser.readfp(fp) # Build a list of label named tuples ignore_sections = ['options', 'auth', 'connection'] sections = [s for s in parser.sections() if s not in ignore_sections] labels = [ make_label(sec, parser.get(sec, 'min'), parser.get(sec, 'max')) for sec in sections ] # Build an options object and return it opts = dict( folder=parser.get('options', 'folder'), user=parser.get('auth', 'user'), password=parser.get('auth', 'password'), host=parser.get('connection', 'host'), ssl=parser.getboolean('connection', 'ssl'), port=parser.getint('connection', 'port'), labels=labels, ) return opts
def get_input(debug=True): """ 得到Bert模型的输入 """ input_word_list = [] input_label_list = [] with open("tmp/input_bert.json", 'r', encoding='UTF-8') as f: data = json.load(f) bert_words = list(data["sentence"]) label_list = ["O" for _ in bert_words] # 首先制作全O的标签 for entity in data["entity-mentions"]: en_start = entity["start"] en_end = entity["end"] en_type = entity["entity-type"] # 根据开始与结束位置打标签 make_label(en_start, en_end, en_type, label_list) input_word_list.append(["[CLS]"] + bert_words + ["[SEP]"]) input_label_list.append(["O"] + label_list + ["O"]) return input_word_list, input_label_list
def make_classifier(self, save=True): data = self.data.copy() data_for_training = utils.simple_feature_engineer(data) data_for_training, label = utils.make_label(data_for_training) logreg = LogisticRegression() logreg.fit(data_for_training, label) if save: name = self.symbol + '_clf.pkl' path = config.path_to_clf + name with open(path, 'wb') as f: pkl.dump(logreg, f) self.clf = logreg return
def create_raw_training_data(ticker, feature_path, label_path, features, label, min_date, max_date, regression, fp): feature_map = read_data(feature_path, min_date, max_date) label_map = read_data(label_path, min_date, max_date) count = 0 for d in sorted(feature_map.keys() & label_map.keys(), reverse=True): if label not in label_map[d]: continue ok = True for f in features: if f not in feature_map[d]: ok = False break if not ok: continue items = [ticker, d, utils.make_label(label_map[d][label], regression)] for i in range(len(features)): items.append('%d:%f' % (i + 1, feature_map[d][features[i]])) print(' '.join(items), file=fp) count += 1 logging.info('%d data points' % count)
def create_raw_training_data(ticker, feature_path, label_path, features, label, min_date, max_date, regression, fp): feature_map = read_data(feature_path, min_date, max_date) label_map = read_data(label_path, min_date, max_date) count = 0 for d in sorted(feature_map.keys() & label_map.keys(), reverse=True): if label not in label_map[d]: continue ok = True for f in features: if f not in feature_map[d]: ok = False break if not ok: continue items = [ticker, d, utils.make_label(label_map[d][label], regression)] for i in range(len(features)): items.append('%d:%f' % (i+1, feature_map[d][features[i]])) print(' '.join(items), file=fp) count += 1 logging.info('%d data points' % count)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_path', required=True) parser.add_argument('--output_data_path', required=True) parser.add_argument('--output_index_path', required=True) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() utils.setup_logging(args.verbose) with open(args.input_path, 'r') as fp: lines = fp.read().splitlines() # This block below is to keep the output data in sync with the ones # produced by split_data_for_cv.py. I.e. the date and ticker of each # input line are swapped (such that date goes before ticker), and the # lines are sorted (by date and then by ticker). # Swap date and ticker in place. item_count = -1 for i in range(len(lines)): items = lines[i].split(' ') if item_count < 0: item_count = len(items) else: assert item_count == len(items) items[0], items[1] = items[1], items[0] lines[i] = ' '.join(items) # This will sort lines by entry and then ticker. lines.sort() data_fp = open(args.output_data_path, 'w') index_fp = open(args.output_index_path, 'w') for line in lines: items = line.split(' ') assert len(items) > 3 data = '%s %s' % (utils.make_label(float(items[2]), False), ' '.join(items[3:])) index = ' '.join(items[:2]) print(data, file=data_fp) print(index, file=index_fp) data_fp.close() index_fp.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_path', required=True) parser.add_argument('--output_data_path', required=True) parser.add_argument('--output_index_path', required=True) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() utils.setup_logging(args.verbose) with open(args.input_path, 'r') as fp: lines = fp.read().splitlines() # This block below is to keep the output data in sync with the ones # produced by split_data_for_cv.py. I.e. the date and ticker of each # input line are swapped (such that date goes before ticker), and the # lines are sorted (by date and then by ticker). # Swap date and ticker in place. item_count = -1 for i in range(len(lines)): items = lines[i].split(' ') if item_count < 0: item_count = len(items) else: assert item_count == len(items) items[0], items[1] = items[1], items[0] lines[i] = ' '.join(items) # This will sort lines by entry and then ticker. lines.sort() data_fp = open(args.output_data_path, 'w') index_fp = open(args.output_index_path, 'w') for line in lines: items = line.split(' ') assert len(items) > 3 data = '%s %s' % (utils.make_label(float(items[2]), False), ' '.join( items[3:])) index = ' '.join(items[:2]) print(data, file=data_fp) print(index, file=index_fp) data_fp.close() index_fp.close()
def get_segy_pts(data_dir, cube_incr, save_dir=None, save=False, pred=False): files = os.listdir(data_dir) pts_files = list() for f in files: full_filename = os.path.join(data_dir, f) ext = os.path.splitext(full_filename)[-1] if ext == '.pts': pts_files.append(full_filename) elif ext == '.segy': segy_files = full_filename else: raise ValueError('%s type is not a data format' % ext) pts_files.sort() print('Point files', pts_files) segy_obj = utils.segy_decomp(segy_files, plot_data=False, inp_res = np.float32) # Define the buffer zone around the edge of the cube that defines the legal/illegal adresses inl_min = segy_obj.inl_start + segy_obj.inl_step*cube_incr inl_max = segy_obj.inl_end - segy_obj.inl_step*cube_incr xl_min = segy_obj.xl_start + segy_obj.xl_step*cube_incr xl_max = segy_obj.xl_end - segy_obj.xl_step*cube_incr t_min = segy_obj.t_start + segy_obj.t_step*cube_incr t_max = segy_obj.t_end - segy_obj.t_step*cube_incr # Print the buffer zone edges print('Defining the buffer zone:') print('(inl_min,','inl_max,','xl_min,','xl_max,','t_min,','t_max)') print('(',inl_min,',',inl_max,',',xl_min,',',xl_max,',',t_min,',',t_max,')') section = [inl_min, inl_max, segy_obj.inl_start, segy_obj.inl_step, xl_min, xl_max, segy_obj.xl_start, segy_obj.xl_step, t_min, t_max, segy_obj.t_start, segy_obj.t_step] adr_label, num_classes = utils.make_label(pts_files, save_dir=save_dir, save=save) # Shuffle # np.take(arr, indices, axis=3) is equivalent to arr[:,:,:,indices,...]. If provided parameter 'out', the result will be placed in this array adr_label = np.take(adr_label, np.random.permutation(len(adr_label)), axis=0, out=adr_label) if pred: label_dict = utils.label_dict(pts_files) return segy_obj.data, adr_label, section, num_classes, label_dict else: return segy_obj.data, adr_label, section, num_classes
def initGui(self): """initGui(self) -> None initialize the GUI """ self.initMainGrid() if self.DEBUG: self.setWindowTitle("CandleStick(Emulate)") else: self.setWindowTitle("CandleStick") # analysis graphs self.analysis_graphs = AnalysisGraphs() self.analysis_graphs.setMaximumWidth(self._window_width // 3) # chart graphs self.chart_graphs = ChartGraphs() # Settings group_setting, grid_setting = make_groupbox_and_grid( self, 40, (self._window_height) // 3, "Settings", self._groupbox_title_font_size, self._spacing ) label_ema1 = make_label( group_setting, "N1", self._label_font_size, True, Qt.AlignLeft ) grid_setting.addWidget(label_ema1, 0, 0) self.le_ema1 = QLineEdit(group_setting) self.le_ema1.setText(str(self._N_ema1)) # font = self.le_ema1.font() # font.setPointSize(self._button_font_size) # self.le_ema1.setFont(font) self.le_ema1.setMaximumWidth(40) self.le_ema1.setMaximumHeight(16) # self.le_ema1.resize(20, 16) self.le_ema1.setStyleSheet("background-color:{};".format(self._txt_bg_color)) self.le_ema1.setValidator(QIntValidator()) grid_setting.addWidget(self.le_ema1, 0, 1) label_ema2 = make_label( group_setting, "N2", self._label_font_size, True, Qt.AlignLeft ) grid_setting.addWidget(label_ema2, 1, 0) self.le_ema2 = QLineEdit(group_setting) self.le_ema2.setText(str(self._N_ema2)) # font = self.le_ema2.font() # font.setPointSize(self._button_font_size) # self.le_ema2.setFont(font) self.le_ema2.setMaximumWidth(40) self.le_ema2.setMaximumHeight(16) # self.le_ema2.resize(20, 16) self.le_ema2.setStyleSheet("background-color:{};".format(self._txt_bg_color)) self.le_ema2.setValidator(QIntValidator()) grid_setting.addWidget(self.le_ema2, 1, 1) label_delta = make_label( group_setting, "delta", self._label_font_size, True, Qt.AlignLeft ) grid_setting.addWidget(label_delta, 2, 0) self.le_delta = QLineEdit(group_setting) self.le_delta.setText(str(self._delta)) # font = self.le_delta.font() # font.setPointSize(self._button_font_size) # self.le_delta.setFont(font) self.le_delta.setMaximumWidth(40) self.le_delta.setMaximumHeight(16) self.le_delta.setStyleSheet("background-color:{};".format(self._txt_bg_color)) self.le_delta.setValidator(QDoubleValidator()) grid_setting.addWidget(self.le_delta, 2, 1) # Results group_results, grid_results = make_groupbox_and_grid( self, 40, (self._window_height) // 3, "Results", self._groupbox_title_font_size, self._spacing ) label_benefit = make_label( group_results, "Benefit", self._label_font_size, True, Qt.AlignLeft ) self.label_benefit_value = make_label( group_results, "0", self._label_font_size, True, Qt.AlignLeft ) label_days = make_label( group_results, "Days", self._label_font_size, True, Qt.AlignLeft ) self.label_days_value = make_label( group_results, "0", self._label_font_size, True, Qt.AlignLeft ) label_perday = make_label( group_results, "Per day", self._label_font_size, True, Qt.AlignLeft ) self.label_perday_value = make_label( group_results, "0", self._label_font_size, True, Qt.AlignLeft ) grid_results.addWidget(label_benefit, 0, 0) grid_results.addWidget(self.label_benefit_value, 1, 0) grid_results.addWidget(label_days, 2, 0) grid_results.addWidget(self.label_days_value, 3, 0) grid_results.addWidget(label_perday, 4, 0) grid_results.addWidget(self.label_perday_value, 5, 0) # Items in debug mode if not self.DEBUG: self.grid.addWidget(self.analysis_graphs, 0, 0, 2, 2) self.grid.addWidget(self.chart_graphs, 0, 2, 2, 3) # self.grid.addWidget(self.glw, 0, 0, 3, 5) self.grid.addWidget(group_setting, 0, 5, 1, 1) self.grid.addWidget(group_results, 1, 5, 1, 1) else: group_debug, grid_debug = make_groupbox_and_grid( self, 60, self._window_height // 3, "DEBUG", self._groupbox_title_font_size, self._spacing ) ## start position self.le_start = QLineEdit(group_debug) self.le_start.setText("0") # font = self.le_start.font() # font.setPointSize(self._button_font_size) # self.le_start.setFont(font) self.le_start.resize(40, 16) self.le_start.setStyleSheet("background-color:{};".format(self._txt_bg_color)) self.le_start.setValidator(QIntValidator()) ## end position self.le_end = QLineEdit(group_debug) self.le_end.setText("100") self.le_end.resize(40, 16) self.le_end.setStyleSheet("background-color:{};".format(self._txt_bg_color)) self.le_end.setValidator(QIntValidator()) ## checkbox to use the average values of each OHLC as order ltps self.chk_use_average = QCheckBox(group_debug) pal = QPalette() pal.setColor(QPalette.Foreground, QColor(self._chk_box_bg_color)) # pal.setColor(QPalette.Active, QColor("white")) self.chk_use_average.setPalette(pal) # self.chk_use_average.setStyleSheet("background-color:{};".format(self._chk_bg_color)) self.chk_use_average.setChecked(False) self.chk_use_average.resize(16, 16) # self.chk_use_average.stateChanged.connect(self.setTxtBTCJPYEditState) ## update button self.button1 = make_pushbutton( self, 40, 16, "Update", 14, method=self.update, color=None, isBold=False ) self.button3 = make_pushbutton( self, 40, 16, "Analyze", 14, method=self.analyze, color=None, isBold=False ) self.button4 = make_pushbutton( self, 40, 16, "View", 14, method=self.drawAnalysisResults, color=None, isBold=False ) ## add grid_debug.addWidget(self.le_start, 0, 0) grid_debug.addWidget(self.le_end, 1, 0) grid_debug.addWidget(self.chk_use_average, 2, 0) grid_debug.addWidget(self.button1, 3, 0) grid_debug.addWidget(self.button3, 4, 0) grid_debug.addWidget(self.button4, 5, 0) self.grid.addWidget(self.analysis_graphs, 0, 0, 3, 2) self.grid.addWidget(self.chart_graphs, 0, 2, 3, 3) # self.grid.addWidget(self.glw, 0, 0, 3, 5) self.grid.addWidget(group_setting, 0, 5, 1, 1) self.grid.addWidget(group_results, 1, 5, 1, 1) self.grid.addWidget(group_debug, 2, 5, 1, 1)
def parse_label(self, value): name, size_range = value.split(':') min_size, max_size = size_range.split('-') return make_label(name, min_size, max_size)
def initGui(self): """initGui(self) -> None initialize the GUI """ # initialize the main self.resize(self._init_window_width, self._init_window_height) grid = QGridLayout(self) grid.setSpacing(10) if self.DEBUG: self.setWindowTitle("Analysis(Debug)") # Coordinate and Value of the mouse pointer. widget_coor_value = QWidget(self) widget_coor_value.resize(self._init_window_width, 30) grid_coor_value = QGridLayout(widget_coor_value) grid_coor_value.setSpacing(10) self.label_coor_value = make_label(self, "(NA, NA, 0.00e-00)", self._font_size_label, isBold=True, alignment=Qt.AlignRight) grid_coor_value.addWidget(self.label_coor_value, 0, 0) # graphs self.glw = pg.GraphicsLayoutWidget() ## benefit map self.plot_benefit = self.glw.addPlot( # axisItems={"bottom":self.iw_axBottom, "left":self.iw_axLeft} ) self.plot_benefit.setAspectLocked(True) self.img_benefit = pg.ImageItem() self.plot_benefit.addItem(self.img_benefit) def mouseMoved(pos): try: coor = self.img_benefit.mapFromScene(pos) x, y = int(coor.x()), int(coor.y()) if self.img_benefit.image is not None: img = self.img_benefit.image if 0 <= x <= img.shape[1] and 0 <= y <= img.shape[0]: self.label_coor_value.setText( "({0}, {1}, {2:.4e})".format(x, y, img[x, y])) except IndexError: pass except Exception as ex: print(ex) self.img_benefit.scene().sigMouseMoved.connect(mouseMoved) ## Box plot diagram for dead cross self.glw.nextRow() self.plot_box_dead = self.glw.addPlot() ## Box plot diagram for golden cross self.glw.nextRow() self.plot_box_golden = self.glw.addPlot() # construct grid.addWidget(widget_coor_value, 0, 0, 1, 1) grid.addWidget(self.glw, 1, 0, 1, 9)
def train(self, config): if config.is_train: data_dir = self.train_dir else: data_dir = self.test_dir data, label = read_data(data_dir) global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 #learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, #1000, 0.001, staircase=True) train_op={} models={} for step in range(config.scale_factor//2): train_op['train_op%i'%step]=tf.train.AdamOptimizer(config.learning_rate).minimize(self.loss(step)) models['model%i'%step]=self.model(step) tf.global_variables_initializer().run() counter = 0 start_time = time.time() if self.load(self.checkpoint_dir): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") if config.is_train: print("Training...") topg=[make_topg(data, step, self.scale_factor) for step in range(self.scale_factor//2)] topg=np.array(topg) label=[make_label(label, step, self.scale_factor) for step in range(self.scale_factor//2)] label=np.array(label) for ep in xrange(config.epoch): ep=np.int64(ep) # Run by batch images batch_idxs = len(data) // config.batch_size for idx in xrange(0, batch_idxs): batch_images0 = data[idx * config.batch_size: (idx + 1) * config.batch_size] batch_labels0 = label[:,idx * config.batch_size: (idx + 1) * config.batch_size,:,:] topg0=topg[:,idx * config.batch_size: (idx + 1) * config.batch_size,:,:] counter += 1 for step in range(self.scale_factor//2): if step==0: batch_images=np.stack([batch_images0[:,:,:,0],topg0[step]] ,axis=-1) batch_labels=batch_labels0[step] _, result=self.sess.run([train_op['train_op%i'%step], models['model%i'%step]], feed_dict={self.images: batch_images, self.labels: batch_labels}) else: #result = self.sess.run(self.model(step-1), #feed_dict={self.images['images%i'%(step-1)] : batch_images}) result=result[:,:,:,0] batch_images=np.stack([result, topg0[step]], axis=-1) batch_labels =batch_labels0[step] _, result=self.sess.run([train_op['train_op%i'%step], models['model%i'%step]], feed_dict={self.images: batch_images, self.labels: batch_labels}) if counter % 10 == 0: s = (self.scale_factor // 2) - 1 result = self.sess.run(models['model%i'%s], feed_dict={self.images: batch_images}) MSE = 0 for k in range(config.batch_size): acc = sklearn.metrics.mean_squared_error(result[k, :, :, 0], label[-1,k, :, :, 0]) MSE += acc err = MSE / len(data[:, :, :, 0]-1) print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f] " \ % ((ep + 1), counter, time.time() - start_time, err)) if counter % 500 == 0: self.save(config.checkpoint_dir, counter) tf.get_default_graph().finalize() else: print("Testing...") MSE=0 for k in range(len(data[:,0,0,0])): acc = sklearn.metrics.mean_squared_error(data[k,:,:,0],label[k,:,:,0]) MSE+=acc print("Bicubic error : ", MSE / len(data[:, :, :, 0])) for step in range(self.scale_factor//2): if step == 0: batch_images = np.stack([data[:, :, :, 0], make_topg(data, step, self.scale_factor)] , axis=-1) else : batch_images=np.stack([result, make_topg(data,step,self.scale_factor)], axis=-1) result = self.sess.run(models['model%i'%step], feed_dict={self.images: batch_images}) result = np.array(result)[:,:,:,0] MSE=0 for k in range(len(data[:,0,0,0])): acc = sklearn.metrics.mean_squared_error(result[k,:,:],label[k,:,:,0]) MSE+=acc print("Model error : ", MSE/len(data[:,:,:,0])) if config.save_result: if not os.path.exists(config.result_fold): os.makedirs(config.result_fold) for k in range(len(data[:,0,0,0])): original = plt.contourf(label[k, :, :, 0]) plt.colorbar(original) Cmap=original.get_cmap() plt.savefig('%s/%d_label.png' % (config.result_fold, k)) plt.show() base = plt.contourf(data[k, :, :, 0], cmap=Cmap) plt.colorbar(original) plt.savefig('%s/%d_bicubic.png' %(config.result_fold,k)) plt.show() prev=plt.contourf(result[k,:,:], cmap=Cmap) plt.colorbar(original) plt.savefig('%s/%d_predicted.png' %(config.result_fold,k)) plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_path', required=True) parser.add_argument('--output_dir', required=True) parser.add_argument('--folds', required=True) parser.add_argument('--verbose', action='store_true') args = parser.parse_args() utils.setup_logging(args.verbose) folds = int(args.folds) assert folds > 1 with open(args.input_path, 'r') as fp: lines = fp.read().splitlines() # Swap date and ticker in place. item_count = -1 for i in range(len(lines)): items = lines[i].split(' ') if item_count < 0: item_count = len(items) else: assert item_count == len(items) items[0], items[1] = items[1], items[0] lines[i] = ' '.join(items) # This will sort lines by entry and then ticker. lines.sort() # Prepare all the file handlers. We are going to write to folds * 4 files, # with each fold a training data file, a training index file, a testing # data file, and a testing index file. train_data_fps, train_index_fps = [], [] test_data_fps, test_index_fps = [], [] for i in range(folds): train_data_fps.append(open('%s/train_data_%d' % (args.output_dir, i), 'w')) train_index_fps.append(open('%s/train_index_%d' % (args.output_dir, i), 'w')) test_data_fps.append(open('%s/test_data_%d' % (args.output_dir, i), 'w')) test_index_fps.append(open('%s/test_index_%d' % (args.output_dir, i), 'w')) # Sanity checks. assert len(train_data_fps) == folds assert len(train_index_fps) == folds assert len(test_data_fps) == folds assert len(test_index_fps) == folds segment = int(len(lines) / folds) for i in range(folds): logging.info('Writing fold %d' % (i+1)) start = segment * i end = start + segment if i == folds - 1: end = len(lines) for j in range(start, end): items = lines[j].split(' ') # Write label, features to data files and date, ticker to index files. assert len(items) > 3 data = '%s %s' % (utils.make_label(float(items[2]), False), ' '.join(items[3:])) index = ' '.join(items[:2]) for k in range(folds): if i != k: print(data, file=train_data_fps[k]) print(index, file=train_index_fps[k]) else: print(data, file=test_data_fps[k]) print(index, file=test_index_fps[k]) # Close all file handlers. close_all(train_data_fps) close_all(train_index_fps) close_all(test_data_fps) close_all(test_index_fps)