def _convert_to_tfrecords(self, start_index, end_index, name): out_fn = os.path.join(self.DATA_PATH, name + ".tfrecords") date_fn = os.path.join(self.DATA_PATH, name + "_date.csv") start = -1 if check_path_exists(out_fn): return with tf.python_io.TFRecordWriter(out_fn) as record_writer: for target in tqdm(range(start_index, end_index)): end = target - self.h + 1 beg = end - self.para.max_len if beg < 0: continue if start < 0: start = target example = tf.train.Example(features=tf.train.Features( feature={ "x": self._float_list_feature(self.raw_data.loc[ beg:end - 1, self.series].values.flatten()), "y": self._float_list_feature(self.raw_data.loc[ target, self.series].values), })) # print(self.raw_data.loc[beg:end,self.series].values.flatten()) record_writer.write(example.SerializeToString()) # print(self.raw_data.loc[beg:end,self.series].values.flatten()) self.raw_data.loc[start:end_index - 1, 'date'].to_csv(date_fn)
def _convert_to_tfrecords(self, st, ed, name): st = int(self.dat.shape[0] * st) ed = int(self.dat.shape[0] * ed) out_fn = os.path.join(self.DATA_PATH, name + ".tfrecords") if check_path_exists(out_fn): return with tf.python_io.TFRecordWriter(out_fn) as record_writer: for target in tqdm(range(st, ed)): end = target - self.h + 1 beg = end - self.para.max_len if beg < 0: continue example = tf.train.Example(features=tf.train.Features( feature={ "x": self._float_list_feature( self.dat[beg:end].flatten()), "y": self._float_list_feature(self.dat[target]), })) record_writer.write(example.SerializeToString())
def _convert_to_tfrecords(self, mode, filename_list): filename = self.DATA_PATH + "/" + mode + ".tfrecords" if check_path_exists(filename): return logging.info("Writing {}".format(filename)) with tf.python_io.TFRecordWriter(filename) as writer: for filename in tqdm(filename_list): if filename.endswith(".mid") or filename.endswith(".midi"): multi_track = ppr.parse(filename) else: multi_track = ppr.load(filename) TOTAL_STEPS = self._choose_total_steps(multi_track) if TOTAL_STEPS == 1e8: continue RANGE = self.INPUT_SIZE FINAL_STEPS = math.ceil(TOTAL_STEPS / 24) multi_data = np.zeros((FINAL_STEPS, RANGE)) for track in multi_track.tracks: if not self._is_valid_track(track): continue data = track.pianoroll.astype(int) data = self._sampling(data) multi_data = np.add(multi_data, data) multi_data = np.clip(multi_data, 0, 1).astype(int) RANGE = self._split_into_segments(multi_data, 1) length = self.MAX_LEN for start in RANGE: end = start + length if end >= FINAL_STEPS: break example = tf.train.Example( features=tf.train.Features( feature={ "pianoroll": self._int64_list_feature( multi_data[start:end + 1].flatten()) })) writer.write(example.SerializeToString())
def _extract_file(self): if not check_path_exists(self.DATA_FULL_PATH): logging.info("Extracting %s dataset..." % self.para.data_set) tarfile.open(self.DATA_FULL_PATH + ".tar").extractall(path=self.DATA_PATH)