Exemple #1
0
def parse_config_file(path):
    """Parse INI file containing configuration details.

    """
    # Parse options file
    parser = SafeConfigParser(defaults=DEFAULTS)

    with open(path, 'r') as fp:
        parser.readfp(fp)

    # Build a list of label named tuples
    ignore_sections = ['options', 'auth', 'connection']

    sections = [s for s in parser.sections() if s not in ignore_sections]

    labels = [
        make_label(sec, parser.get(sec, 'min'), parser.get(sec, 'max'))
        for sec in sections
    ]

    # Build an options object and return it

    opts = dict(
        folder=parser.get('options', 'folder'),
        user=parser.get('auth', 'user'),
        password=parser.get('auth', 'password'),
        host=parser.get('connection', 'host'),
        ssl=parser.getboolean('connection', 'ssl'),
        port=parser.getint('connection', 'port'),
        labels=labels,
    )
    return opts
Exemple #2
0
def get_input(debug=True):
    """
    得到Bert模型的输入
    """
    input_word_list = []
    input_label_list = []
    with open("tmp/input_bert.json", 'r', encoding='UTF-8') as f:
        data = json.load(f)
    bert_words = list(data["sentence"])
    label_list = ["O" for _ in bert_words]  # 首先制作全O的标签
    for entity in data["entity-mentions"]:
        en_start = entity["start"]
        en_end = entity["end"]
        en_type = entity["entity-type"]
        # 根据开始与结束位置打标签
        make_label(en_start, en_end, en_type, label_list)
    input_word_list.append(["[CLS]"] + bert_words + ["[SEP]"])
    input_label_list.append(["O"] + label_list + ["O"])

    return input_word_list, input_label_list
Exemple #3
0
    def make_classifier(self, save=True):
        data = self.data.copy()
        data_for_training = utils.simple_feature_engineer(data)
        data_for_training, label = utils.make_label(data_for_training)
        logreg = LogisticRegression()
        logreg.fit(data_for_training, label)

        if save:
            name = self.symbol + '_clf.pkl'
            path = config.path_to_clf + name
            with open(path, 'wb') as f:
                pkl.dump(logreg, f)

        self.clf = logreg

        return
Exemple #4
0
def create_raw_training_data(ticker, feature_path, label_path, features, label,
                             min_date, max_date, regression, fp):
    feature_map = read_data(feature_path, min_date, max_date)
    label_map = read_data(label_path, min_date, max_date)
    count = 0
    for d in sorted(feature_map.keys() & label_map.keys(), reverse=True):
        if label not in label_map[d]: continue
        ok = True
        for f in features:
            if f not in feature_map[d]:
                ok = False
                break
        if not ok: continue
        items = [ticker, d, utils.make_label(label_map[d][label], regression)]
        for i in range(len(features)):
            items.append('%d:%f' % (i + 1, feature_map[d][features[i]]))
        print(' '.join(items), file=fp)
        count += 1
    logging.info('%d data points' % count)
def create_raw_training_data(ticker, feature_path, label_path, features, label,
                             min_date, max_date, regression, fp):
  feature_map = read_data(feature_path, min_date, max_date)
  label_map = read_data(label_path, min_date, max_date)
  count = 0
  for d in sorted(feature_map.keys() & label_map.keys(), reverse=True):
    if label not in label_map[d]: continue
    ok = True
    for f in features:
      if f not in feature_map[d]:
        ok = False
        break
    if not ok: continue
    items = [ticker, d, utils.make_label(label_map[d][label], regression)]
    for i in range(len(features)):
      items.append('%d:%f' % (i+1, feature_map[d][features[i]]))
    print(' '.join(items), file=fp)
    count += 1
  logging.info('%d data points' % count)
Exemple #6
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--input_path', required=True)
  parser.add_argument('--output_data_path', required=True)
  parser.add_argument('--output_index_path', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)

  with open(args.input_path, 'r') as fp:
    lines = fp.read().splitlines()

  # This block below is to keep the output data in sync with the ones
  # produced by split_data_for_cv.py.  I.e. the date and ticker of each
  # input line are swapped (such that date goes before ticker), and the
  # lines are sorted (by date and then by ticker).

  # Swap date and ticker in place.
  item_count = -1
  for i in range(len(lines)):
    items = lines[i].split(' ')
    if item_count < 0: item_count = len(items)
    else: assert item_count == len(items)
    items[0], items[1] = items[1], items[0]
    lines[i] = ' '.join(items)
  # This will sort lines by entry and then ticker.
  lines.sort()

  data_fp = open(args.output_data_path, 'w')
  index_fp = open(args.output_index_path, 'w')
  for line in lines:
    items = line.split(' ')
    assert len(items) > 3
    data = '%s %s' % (utils.make_label(float(items[2]), False),
                      ' '.join(items[3:]))
    index = ' '.join(items[:2])
    print(data, file=data_fp)
    print(index, file=index_fp)

  data_fp.close()
  index_fp.close()
Exemple #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_path', required=True)
    parser.add_argument('--output_data_path', required=True)
    parser.add_argument('--output_index_path', required=True)
    parser.add_argument('--verbose', action='store_true')
    args = parser.parse_args()

    utils.setup_logging(args.verbose)

    with open(args.input_path, 'r') as fp:
        lines = fp.read().splitlines()

    # This block below is to keep the output data in sync with the ones
    # produced by split_data_for_cv.py.  I.e. the date and ticker of each
    # input line are swapped (such that date goes before ticker), and the
    # lines are sorted (by date and then by ticker).

    # Swap date and ticker in place.
    item_count = -1
    for i in range(len(lines)):
        items = lines[i].split(' ')
        if item_count < 0: item_count = len(items)
        else: assert item_count == len(items)
        items[0], items[1] = items[1], items[0]
        lines[i] = ' '.join(items)
    # This will sort lines by entry and then ticker.
    lines.sort()

    data_fp = open(args.output_data_path, 'w')
    index_fp = open(args.output_index_path, 'w')
    for line in lines:
        items = line.split(' ')
        assert len(items) > 3
        data = '%s %s' % (utils.make_label(float(items[2]), False), ' '.join(
            items[3:]))
        index = ' '.join(items[:2])
        print(data, file=data_fp)
        print(index, file=index_fp)

    data_fp.close()
    index_fp.close()
def get_segy_pts(data_dir, cube_incr, save_dir=None, save=False, pred=False):
    files = os.listdir(data_dir)    
    pts_files = list()
    for f in files:
        full_filename = os.path.join(data_dir, f)
        ext = os.path.splitext(full_filename)[-1]
        if ext == '.pts':
            pts_files.append(full_filename)
        elif ext == '.segy':
            segy_files = full_filename
        else:
            raise ValueError('%s type is not a data format' % ext)

    pts_files.sort()
    print('Point files', pts_files)

    segy_obj = utils.segy_decomp(segy_files, plot_data=False, inp_res = np.float32)
    # Define the buffer zone around the edge of the cube that defines the legal/illegal adresses
    inl_min = segy_obj.inl_start + segy_obj.inl_step*cube_incr
    inl_max = segy_obj.inl_end - segy_obj.inl_step*cube_incr
    xl_min = segy_obj.xl_start + segy_obj.xl_step*cube_incr
    xl_max = segy_obj.xl_end - segy_obj.xl_step*cube_incr
    t_min = segy_obj.t_start + segy_obj.t_step*cube_incr
    t_max = segy_obj.t_end - segy_obj.t_step*cube_incr

    # Print the buffer zone edges
    print('Defining the buffer zone:')
    print('(inl_min,','inl_max,','xl_min,','xl_max,','t_min,','t_max)')
    print('(',inl_min,',',inl_max,',',xl_min,',',xl_max,',',t_min,',',t_max,')')
    section = [inl_min, inl_max, segy_obj.inl_start, segy_obj.inl_step, xl_min, xl_max, segy_obj.xl_start, segy_obj.xl_step, t_min, t_max, segy_obj.t_start, segy_obj.t_step]

    adr_label, num_classes = utils.make_label(pts_files, save_dir=save_dir, save=save) 
    # Shuffle
        # np.take(arr, indices, axis=3) is equivalent to arr[:,:,:,indices,...]. If provided parameter 'out', the result will be placed in this array
    adr_label = np.take(adr_label, np.random.permutation(len(adr_label)), axis=0, out=adr_label)

    if pred:
        label_dict = utils.label_dict(pts_files)
        return segy_obj.data, adr_label, section, num_classes, label_dict
    else:
        return segy_obj.data, adr_label, section, num_classes
Exemple #9
0
    def initGui(self):
        """initGui(self) -> None

        initialize the GUI
        """
        self.initMainGrid()

        if self.DEBUG:
            self.setWindowTitle("CandleStick(Emulate)")
        else:
            self.setWindowTitle("CandleStick")
        
        # analysis graphs
        self.analysis_graphs = AnalysisGraphs()
        self.analysis_graphs.setMaximumWidth(self._window_width // 3)

        # chart graphs
        self.chart_graphs = ChartGraphs()

        # Settings
        group_setting, grid_setting = make_groupbox_and_grid(
            self, 40, (self._window_height) // 3,
            "Settings", self._groupbox_title_font_size, self._spacing
        )

        label_ema1 = make_label(
            group_setting, "N1", self._label_font_size, True, Qt.AlignLeft
        )
        grid_setting.addWidget(label_ema1, 0, 0)

        self.le_ema1 = QLineEdit(group_setting)
        self.le_ema1.setText(str(self._N_ema1))
        # font = self.le_ema1.font()
        # font.setPointSize(self._button_font_size)
        # self.le_ema1.setFont(font)
        self.le_ema1.setMaximumWidth(40)
        self.le_ema1.setMaximumHeight(16)
        # self.le_ema1.resize(20, 16)
        self.le_ema1.setStyleSheet("background-color:{};".format(self._txt_bg_color))
        self.le_ema1.setValidator(QIntValidator())
        grid_setting.addWidget(self.le_ema1, 0, 1)

        label_ema2 = make_label(
            group_setting, "N2", self._label_font_size, True, Qt.AlignLeft
        )
        grid_setting.addWidget(label_ema2, 1, 0)

        self.le_ema2 = QLineEdit(group_setting)
        self.le_ema2.setText(str(self._N_ema2))
        # font = self.le_ema2.font()
        # font.setPointSize(self._button_font_size)
        # self.le_ema2.setFont(font)
        self.le_ema2.setMaximumWidth(40)
        self.le_ema2.setMaximumHeight(16)
        # self.le_ema2.resize(20, 16)
        self.le_ema2.setStyleSheet("background-color:{};".format(self._txt_bg_color))
        self.le_ema2.setValidator(QIntValidator())
        grid_setting.addWidget(self.le_ema2, 1, 1)

        label_delta = make_label(
            group_setting, "delta", self._label_font_size, True, Qt.AlignLeft
        )
        grid_setting.addWidget(label_delta, 2, 0)

        self.le_delta = QLineEdit(group_setting)
        self.le_delta.setText(str(self._delta))
        # font = self.le_delta.font()
        # font.setPointSize(self._button_font_size)
        # self.le_delta.setFont(font)
        self.le_delta.setMaximumWidth(40)
        self.le_delta.setMaximumHeight(16)
        self.le_delta.setStyleSheet("background-color:{};".format(self._txt_bg_color))
        self.le_delta.setValidator(QDoubleValidator())
        grid_setting.addWidget(self.le_delta, 2, 1)

        # Results
        group_results, grid_results = make_groupbox_and_grid(
            self, 40, (self._window_height) // 3,
            "Results", self._groupbox_title_font_size, self._spacing
        )
        label_benefit = make_label(
            group_results, "Benefit", self._label_font_size, True, Qt.AlignLeft
        )
        self.label_benefit_value = make_label(
            group_results, "0", self._label_font_size, True, Qt.AlignLeft
        )
        label_days = make_label(
            group_results, "Days", self._label_font_size, True, Qt.AlignLeft
        )
        self.label_days_value = make_label(
            group_results, "0", self._label_font_size, True, Qt.AlignLeft
        )
        label_perday = make_label(
            group_results, "Per day", self._label_font_size, True, Qt.AlignLeft
        )
        self.label_perday_value = make_label(
            group_results, "0", self._label_font_size, True, Qt.AlignLeft
        )

        grid_results.addWidget(label_benefit, 0, 0)
        grid_results.addWidget(self.label_benefit_value, 1, 0)
        grid_results.addWidget(label_days, 2, 0)
        grid_results.addWidget(self.label_days_value, 3, 0)
        grid_results.addWidget(label_perday, 4, 0)
        grid_results.addWidget(self.label_perday_value, 5, 0)

        # Items in debug mode
        if not self.DEBUG:
            self.grid.addWidget(self.analysis_graphs, 0, 0, 2, 2)
            self.grid.addWidget(self.chart_graphs, 0, 2, 2, 3)
            # self.grid.addWidget(self.glw, 0, 0, 3, 5)
            self.grid.addWidget(group_setting, 0, 5, 1, 1)
            self.grid.addWidget(group_results, 1, 5, 1, 1)
        else:
            group_debug, grid_debug = make_groupbox_and_grid(
                self, 60, self._window_height // 3,
                "DEBUG", self._groupbox_title_font_size, self._spacing
            )

            ## start position
            self.le_start = QLineEdit(group_debug)
            self.le_start.setText("0")
            # font = self.le_start.font()
            # font.setPointSize(self._button_font_size)
            # self.le_start.setFont(font)
            self.le_start.resize(40, 16)
            self.le_start.setStyleSheet("background-color:{};".format(self._txt_bg_color))
            self.le_start.setValidator(QIntValidator())

            ## end position
            self.le_end = QLineEdit(group_debug)
            self.le_end.setText("100")
            self.le_end.resize(40, 16)
            self.le_end.setStyleSheet("background-color:{};".format(self._txt_bg_color))
            self.le_end.setValidator(QIntValidator())

            ## checkbox to use the average values of each OHLC as order ltps
            self.chk_use_average = QCheckBox(group_debug)
            pal = QPalette()
            pal.setColor(QPalette.Foreground, QColor(self._chk_box_bg_color))
            # pal.setColor(QPalette.Active, QColor("white"))
            self.chk_use_average.setPalette(pal)
            # self.chk_use_average.setStyleSheet("background-color:{};".format(self._chk_bg_color))
            self.chk_use_average.setChecked(False)
            self.chk_use_average.resize(16, 16)
            # self.chk_use_average.stateChanged.connect(self.setTxtBTCJPYEditState)

            ## update button
            self.button1 = make_pushbutton(
                self, 40, 16, "Update", 14, 
                method=self.update, color=None, isBold=False
            )

            self.button3 = make_pushbutton(
                self, 40, 16, "Analyze", 14, 
                method=self.analyze, color=None, isBold=False
            )

            self.button4 = make_pushbutton(
                self, 40, 16, "View", 14, 
                method=self.drawAnalysisResults, color=None, isBold=False
            )

            ## add
            grid_debug.addWidget(self.le_start, 0, 0)
            grid_debug.addWidget(self.le_end, 1, 0)
            grid_debug.addWidget(self.chk_use_average, 2, 0)
            grid_debug.addWidget(self.button1, 3, 0)
            grid_debug.addWidget(self.button3, 4, 0)
            grid_debug.addWidget(self.button4, 5, 0)

            self.grid.addWidget(self.analysis_graphs, 0, 0, 3, 2)
            self.grid.addWidget(self.chart_graphs, 0, 2, 3, 3)
            # self.grid.addWidget(self.glw, 0, 0, 3, 5)
            self.grid.addWidget(group_setting, 0, 5, 1, 1)
            self.grid.addWidget(group_results, 1, 5, 1, 1)
            self.grid.addWidget(group_debug, 2, 5, 1, 1)
Exemple #10
0
 def parse_label(self, value):
     name, size_range = value.split(':')
     min_size, max_size = size_range.split('-')
     return make_label(name, min_size, max_size)
Exemple #11
0
    def initGui(self):
        """initGui(self) -> None

        initialize the GUI
        """
        # initialize the main
        self.resize(self._init_window_width, self._init_window_height)
        grid = QGridLayout(self)
        grid.setSpacing(10)

        if self.DEBUG:
            self.setWindowTitle("Analysis(Debug)")

        # Coordinate and Value of the mouse pointer.
        widget_coor_value = QWidget(self)
        widget_coor_value.resize(self._init_window_width, 30)
        grid_coor_value = QGridLayout(widget_coor_value)
        grid_coor_value.setSpacing(10)

        self.label_coor_value = make_label(self,
                                           "(NA, NA, 0.00e-00)",
                                           self._font_size_label,
                                           isBold=True,
                                           alignment=Qt.AlignRight)
        grid_coor_value.addWidget(self.label_coor_value, 0, 0)

        # graphs
        self.glw = pg.GraphicsLayoutWidget()

        ## benefit map
        self.plot_benefit = self.glw.addPlot(
            # axisItems={"bottom":self.iw_axBottom, "left":self.iw_axLeft}
        )
        self.plot_benefit.setAspectLocked(True)
        self.img_benefit = pg.ImageItem()
        self.plot_benefit.addItem(self.img_benefit)

        def mouseMoved(pos):
            try:
                coor = self.img_benefit.mapFromScene(pos)
                x, y = int(coor.x()), int(coor.y())
                if self.img_benefit.image is not None:
                    img = self.img_benefit.image
                    if 0 <= x <= img.shape[1] and 0 <= y <= img.shape[0]:
                        self.label_coor_value.setText(
                            "({0}, {1}, {2:.4e})".format(x, y, img[x, y]))
            except IndexError:
                pass
            except Exception as ex:
                print(ex)

        self.img_benefit.scene().sigMouseMoved.connect(mouseMoved)

        ## Box plot diagram for dead cross
        self.glw.nextRow()
        self.plot_box_dead = self.glw.addPlot()

        ## Box plot diagram for golden cross
        self.glw.nextRow()
        self.plot_box_golden = self.glw.addPlot()

        # construct
        grid.addWidget(widget_coor_value, 0, 0, 1, 1)
        grid.addWidget(self.glw, 1, 0, 1, 9)
Exemple #12
0
    def train(self, config):

        if config.is_train:
            data_dir =  self.train_dir
        else:
            data_dir = self.test_dir

        data, label = read_data(data_dir)


        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 0.1
        #learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                                   #1000, 0.001, staircase=True)
        train_op={}
        models={}
        for step in range(config.scale_factor//2):
            train_op['train_op%i'%step]=tf.train.AdamOptimizer(config.learning_rate).minimize(self.loss(step))
            models['model%i'%step]=self.model(step)



        tf.global_variables_initializer().run()


        counter = 0
        start_time = time.time()

        if self.load(self.checkpoint_dir):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        if config.is_train:
            print("Training...")

            topg=[make_topg(data, step, self.scale_factor) for step in range(self.scale_factor//2)]
            topg=np.array(topg)
            label=[make_label(label, step, self.scale_factor) for step in range(self.scale_factor//2)]
            label=np.array(label)

            for ep in xrange(config.epoch):
                ep=np.int64(ep)
                # Run by batch images
                batch_idxs = len(data) // config.batch_size
                for idx in xrange(0, batch_idxs):
                    batch_images0 = data[idx * config.batch_size: (idx + 1) * config.batch_size]
                    batch_labels0 = label[:,idx * config.batch_size: (idx + 1) * config.batch_size,:,:]
                    topg0=topg[:,idx * config.batch_size: (idx + 1) * config.batch_size,:,:]

                    counter += 1

                    for step in range(self.scale_factor//2):
                        if step==0:
                            batch_images=np.stack([batch_images0[:,:,:,0],topg0[step]]
                                                   ,axis=-1)
                            batch_labels=batch_labels0[step]
                            _, result=self.sess.run([train_op['train_op%i'%step], models['model%i'%step]],
                                                   feed_dict={self.images: batch_images, self.labels: batch_labels})

                        else:
                            #result = self.sess.run(self.model(step-1),
                                                                 #feed_dict={self.images['images%i'%(step-1)] : batch_images})
                            result=result[:,:,:,0]
                            batch_images=np.stack([result, topg0[step]], axis=-1)
                            batch_labels =batch_labels0[step]
                            _, result=self.sess.run([train_op['train_op%i'%step], models['model%i'%step]],
                                          feed_dict={self.images: batch_images,
                                                     self.labels: batch_labels})



                    if counter % 10 == 0:
                        s = (self.scale_factor // 2) - 1
                        result = self.sess.run(models['model%i'%s], feed_dict={self.images: batch_images})
                        MSE = 0
                        for k in range(config.batch_size):
                            acc = sklearn.metrics.mean_squared_error(result[k, :, :, 0], label[-1,k, :, :, 0])
                            MSE += acc
                        err = MSE / len(data[:, :, :, 0]-1)
                        print("Epoch: [%2d], step: [%2d], time: [%4.4f], loss: [%.8f]  " \
                              % ((ep + 1), counter, time.time() - start_time, err))


                    if counter % 500 == 0:
                        self.save(config.checkpoint_dir, counter)
                tf.get_default_graph().finalize()


        else:
            print("Testing...")

            MSE=0
            for k in range(len(data[:,0,0,0])):
                acc = sklearn.metrics.mean_squared_error(data[k,:,:,0],label[k,:,:,0])
                MSE+=acc
            print("Bicubic error : ", MSE / len(data[:, :, :, 0]))

            for step in range(self.scale_factor//2):
                if step == 0:
                    batch_images = np.stack([data[:, :, :, 0], make_topg(data, step, self.scale_factor)]
                        , axis=-1)

                else :
                    batch_images=np.stack([result, make_topg(data,step,self.scale_factor)], axis=-1)
                result = self.sess.run(models['model%i'%step], feed_dict={self.images: batch_images})
                result = np.array(result)[:,:,:,0]


            MSE=0
            for k in range(len(data[:,0,0,0])):
                acc = sklearn.metrics.mean_squared_error(result[k,:,:],label[k,:,:,0])
                MSE+=acc
            print("Model error : ", MSE/len(data[:,:,:,0]))
            if config.save_result:
                if not os.path.exists(config.result_fold):
                    os.makedirs(config.result_fold)
                for k in range(len(data[:,0,0,0])):
                    original = plt.contourf(label[k, :, :, 0])
                    plt.colorbar(original)
                    Cmap=original.get_cmap()
                    plt.savefig('%s/%d_label.png' % (config.result_fold, k))
                    plt.show()
                    base = plt.contourf(data[k, :, :, 0], cmap=Cmap)
                    plt.colorbar(original)
                    plt.savefig('%s/%d_bicubic.png' %(config.result_fold,k))
                    plt.show()
                    prev=plt.contourf(result[k,:,:], cmap=Cmap)
                    plt.colorbar(original)
                    plt.savefig('%s/%d_predicted.png' %(config.result_fold,k))
                    plt.show()
Exemple #13
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--input_path', required=True)
  parser.add_argument('--output_dir', required=True)
  parser.add_argument('--folds', required=True)
  parser.add_argument('--verbose', action='store_true')
  args = parser.parse_args()

  utils.setup_logging(args.verbose)
  folds = int(args.folds)
  assert folds > 1

  with open(args.input_path, 'r') as fp:
    lines = fp.read().splitlines()
  # Swap date and ticker in place.
  item_count = -1
  for i in range(len(lines)):
    items = lines[i].split(' ')
    if item_count < 0: item_count = len(items)
    else: assert item_count == len(items)
    items[0], items[1] = items[1], items[0]
    lines[i] = ' '.join(items)
  # This will sort lines by entry and then ticker.
  lines.sort()

  # Prepare all the file handlers.  We are going to write to folds * 4 files,
  # with each fold a training data file, a training index file, a testing
  # data file, and a testing index file.
  train_data_fps, train_index_fps = [], []
  test_data_fps, test_index_fps = [], []
  for i in range(folds):
    train_data_fps.append(open('%s/train_data_%d' % (args.output_dir, i), 'w'))
    train_index_fps.append(open('%s/train_index_%d' % (args.output_dir, i),
                                'w'))
    test_data_fps.append(open('%s/test_data_%d' % (args.output_dir, i), 'w'))
    test_index_fps.append(open('%s/test_index_%d' % (args.output_dir, i), 'w'))
  # Sanity checks.
  assert len(train_data_fps) == folds
  assert len(train_index_fps) == folds
  assert len(test_data_fps) == folds
  assert len(test_index_fps) == folds

  segment = int(len(lines) / folds)
  for i in range(folds):
    logging.info('Writing fold %d' % (i+1))
    start = segment * i
    end = start + segment
    if i == folds - 1: end = len(lines)
    for j in range(start, end):
      items = lines[j].split(' ')
      # Write label, features to data files and date, ticker to index files.
      assert len(items) > 3
      data = '%s %s' % (utils.make_label(float(items[2]), False),
                        ' '.join(items[3:]))
      index = ' '.join(items[:2])
      for k in range(folds):
        if i != k:
          print(data, file=train_data_fps[k])
          print(index, file=train_index_fps[k])
        else:
          print(data, file=test_data_fps[k])
          print(index, file=test_index_fps[k])

  # Close all file handlers.
  close_all(train_data_fps)
  close_all(train_index_fps)
  close_all(test_data_fps)
  close_all(test_index_fps)