def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :format: The format in which the actual data sets should be stored. Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat) format. In the MATLAB and text format, all time series objects are concatenated to a single large table containing only integer values. For the csv format comma separated values are taken as default or a specified Python format string. The MATLAB format is a struct that contains the data, the sampling frequency and the channel names. .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. """ name = "time_series" if type(s_format) == list: s_type = s_format[1] s_format = s_format[0] else: s_type = "%.18e" if s_format in ["text", "matlab"]: s_type = "%i" if s_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data try: author = pwd.getpwuid(os.getuid())[4] except Exception: author = "unknown" self._log("Author could not be resolved.", level=logging.WARNING) self.update_meta_data({"type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + s_format}) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if s_format in ["pickle", "cpickle", "cPickle"]: result_file = open(os.path.join(result_path, name+key_str+".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) elif s_format in ["text","csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker"}) result_file = open(os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if s_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if s_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() elif s_format in ["mat"]: result_file = open(os.path.join(result_path, name + key_str + ".mat"),"w") # extract a first time series object to get meta data merged_time_series = time_series.pop(0)[0] # collect all important information in the collection_object collection_object = { "sampling_frequency": merged_time_series.sampling_frequency, "channel_names": merged_time_series.channel_names} # merge all data for (data,key) in time_series: merged_time_series = numpy.vstack((merged_time_series, data)) collection_object["data"] = merged_time_series mdict = dict() mdict[name + key_str] = collection_object import scipy.io scipy.io.savemat(result_file, mdict=mdict) else: NotImplementedError("Using unavailable storage format:%s!" % s_format) result_file.close() self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)
def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :format: The format in which the actual data sets should be stored. Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat) format. In the MATLAB and text format, all time series objects are concatenated to a single large table containing only integer values. For the csv format comma separated values are taken as default or a specified Python format string. The MATLAB format is a struct that contains the data, the sampling frequency and the channel names. .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. """ name = "time_series" if type(s_format) == list: s_type = s_format[1] s_format = s_format[0] else: s_type = "%.18e" if s_format in ["text", "matlab"]: s_type = "%i" if s_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data author = get_author() self.update_meta_data({ "type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + s_format }) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if s_format in ["pickle", "cpickle", "cPickle"]: result_file = open( os.path.join(result_path, name + key_str + ".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) elif s_format in ["text", "csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker" }) result_file = open( os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if s_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if s_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() elif s_format in ["mat"]: result_file = open( os.path.join(result_path, name + key_str + ".mat"), "w") # extract a first time series object to get meta data merged_time_series = time_series.pop(0)[0] # collect all important information in the collection_object collection_object = { "sampling_frequency": merged_time_series.sampling_frequency, "channel_names": merged_time_series.channel_names } # merge all data for (data, key) in time_series: merged_time_series = numpy.vstack( (merged_time_series, data)) collection_object["data"] = merged_time_series mdict = dict() mdict[name + key_str] = collection_object import scipy.io scipy.io.savemat(result_file, mdict=mdict) elif s_format in ["eeg"]: result_file = open( os.path.join(result_path, name + key_str + ".eeg"), "a+") result_file_mrk = open( os.path.join(result_path, name + key_str + ".vmrk"), "w") result_file_mrk.write( "Brain Vision Data Exchange Marker File, " "Version 1.0\n") result_file_mrk.write("; Data stored by pySPACE\n") result_file_mrk.write("[Common Infos]\n") result_file_mrk.write("Codepage=UTF-8\n") result_file_mrk.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_file_mrk.write("\n[Marker Infos]\n") markerno = 1 datapoint = 1 sf = None channel_names = None for t in time_series: if sf is None: sf = t[0].sampling_frequency if channel_names is None: channel_names = t[0].get_channel_names() for mrk in t[0].marker_name.keys(): for tm in t[0].marker_name[mrk]: result_file_mrk.write( str("Mk%d=Stimulus,%s,%d,1,0\n" % (markerno, mrk, datapoint + (tm * sf / 1000.0)))) markerno += 1 data_ = t[0].astype(numpy.int16) data_.tofile(result_file) datapoint += data_.shape[0] result_hdr = open( os.path.join(result_path, name + key_str + ".vhdr"), "w") result_hdr.write("Brain Vision Data Exchange Header " "File Version 1.0\n") result_hdr.write("; Data stored by pySPACE\n\n") result_hdr.write("[Common Infos]\n") result_hdr.write("Codepage=UTF-8\n") result_hdr.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_hdr.write("MarkerFile=%s\n" % str(name + key_str + ".vmrk")) result_hdr.write("DataFormat=BINARY\n") result_hdr.write("DataOrientation=MULTIPLEXED\n") result_hdr.write("NumberOfChannels=%d\n" % len(channel_names)) result_hdr.write("SamplingInterval=%d\n\n" % (1000000 / sf)) result_hdr.write("[Binary Infos]\n") result_hdr.write("BinaryFormat=INT_16\n\n") result_hdr.write("[Channel Infos]\n") # TODO: Add Resolutions to time_series # 0 = 0.1 [micro]V, # 1 = 0.5 [micro]V, # 2 = 10 [micro]V, # 3 = 152.6 [micro]V (seems to be unused!) resolutions_str = [ unicode("0.1,%sV" % unicode(u"\u03BC")), unicode("0.5,%sV" % unicode(u"\u03BC")), unicode("10,%sV" % unicode(u"\u03BC")), unicode("152.6,%sV" % unicode(u"\u03BC")) ] for i in range(len(channel_names)): result_hdr.write( unicode("Ch%d=%s,,%s\n" % (i + 1, channel_names[i], unicode(resolutions_str[0]))).encode('utf-8')) else: NotImplementedError("Using unavailable storage format:%s!" % s_format) result_file.close() self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)
def store(self, result_dir, s_format="pickle"): """ Stores this collection in the directory *result_dir*. In contrast to *dump* this method stores the collection not in a single file but as a whole directory structure with meta information etc. The data sets are stored separately for each run, split, train/test combination. **Parameters** :result_dir: The directory in which the collection will be stored. :name: The prefix of the file names in which the individual data sets are stored. The actual file names are determined by appending suffixes that encode run, split, train/test information. (*optional, default: "time_series"*) :format: The format in which the actual data sets should be stored. Possible formats are *pickle*, *text*, *csv* and *MATLAB* (.mat) format. In the MATLAB and text format, all time series objects are concatenated to a single large table containing only integer values. For the csv format comma separated values are taken as default or a specified Python format string. The MATLAB format is a struct that contains the data, the sampling frequency and the channel names. .. note:: For the text and MATLAB format, markers could be added by using a Marker_To_Mux node before (*optional, default: "pickle"*) .. todo:: Put marker to the right time point and also write marker channel. """ name = "time_series" if type(s_format) == list: s_type = s_format[1] s_format = s_format[0] else: s_type = "%.18e" if s_format in ["text", "matlab"]: s_type = "%i" if s_format == "csv" and s_type == "real": s_type = "%.18e" # Update the meta data author = get_author() self.update_meta_data({"type": "time_series", "storage_format": s_format, "author": author, "data_pattern": "data_run" + os.sep + name + "_sp_tt." + s_format}) # Iterate through splits and runs in this dataset for key, time_series in self.data.iteritems(): # load data, if necessary # (due to the lazy loading, the data might be not loaded already) if isinstance(time_series, basestring): time_series = self.get_data(key[0], key[1], key[2]) if self.sort_string is not None: time_series.sort(key=eval(self.sort_string)) # Construct result directory result_path = result_dir + os.sep + "data" + "_run%s" % key[0] if not os.path.exists(result_path): os.mkdir(result_path) key_str = "_sp%s_%s" % key[1:] # Store data depending on the desired format if s_format in ["pickle", "cpickle", "cPickle"]: result_file = open(os.path.join(result_path, name+key_str+".pickle"), "w") cPickle.dump(time_series, result_file, cPickle.HIGHEST_PROTOCOL) elif s_format in ["text","csv"]: self.update_meta_data({ "type": "stream", "marker_column": "marker"}) result_file = open(os.path.join(result_path, name + key_str + ".csv"), "w") csvwriter = csv.writer(result_file) channel_names = copy.deepcopy(time_series[0][0].channel_names) if s_format == "csv": channel_names.append("marker") csvwriter.writerow(channel_names) for (data, key) in time_series: if s_format == "text": numpy.savetxt(result_file, data, delimiter=",", fmt=s_type) if not key is None: result_file.write(str(key)) result_file.flush() elif data.marker_name is not None \ and len(data.marker_name) > 0: result_file.write(str(data.marker_name)) result_file.flush() else: first_line = True marker = "" if not key is None: marker = str(key) elif data.marker_name is not None \ and len(data.marker_name) > 0: marker = str(data.marker_name) for line in data: l = list(line) l.append(marker) csvwriter.writerow(list(l)) if first_line: first_line = False marker = "" result_file.flush() elif s_format in ["mat"]: result_file = open(os.path.join(result_path, name + key_str + ".mat"),"w") # extract a first time series object to get meta data merged_time_series = time_series.pop(0)[0] # collect all important information in the collection_object collection_object = { "sampling_frequency": merged_time_series.sampling_frequency, "channel_names": merged_time_series.channel_names} # merge all data for (data,key) in time_series: merged_time_series = numpy.vstack((merged_time_series, data)) collection_object["data"] = merged_time_series mdict = dict() mdict[name + key_str] = collection_object import scipy.io scipy.io.savemat(result_file, mdict=mdict) elif s_format in ["eeg"]: result_file = open(os.path.join(result_path, name + key_str + ".eeg"),"a+") result_file_mrk = open(os.path.join(result_path, name + key_str + ".vmrk"),"w") result_file_mrk.write("Brain Vision Data Exchange Marker File, " "Version 1.0\n") result_file_mrk.write("; Data stored by pySPACE\n") result_file_mrk.write("[Common Infos]\n") result_file_mrk.write("Codepage=UTF-8\n") result_file_mrk.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_file_mrk.write("\n[Marker Infos]\n") markerno = 1 datapoint = 1 sf = None channel_names = None for t in time_series: if sf is None: sf = t[0].sampling_frequency if channel_names is None: channel_names = t[0].get_channel_names() for mrk in t[0].marker_name.keys(): for tm in t[0].marker_name[mrk]: result_file_mrk.write(str("Mk%d=Stimulus,%s,%d,1,0\n" % (markerno, mrk, datapoint+(tm*sf/1000.0)))) markerno += 1 data_ = t[0].astype(numpy.int16) data_.tofile(result_file) datapoint += data_.shape[0] result_hdr = open(os.path.join(result_path, name + key_str + ".vhdr"),"w") result_hdr.write("Brain Vision Data Exchange Header " "File Version 1.0\n") result_hdr.write("; Data stored by pySPACE\n\n") result_hdr.write("[Common Infos]\n") result_hdr.write("Codepage=UTF-8\n") result_hdr.write("DataFile=%s\n" % str(name + key_str + ".eeg")) result_hdr.write("MarkerFile=%s\n" % str(name + key_str + ".vmrk")) result_hdr.write("DataFormat=BINARY\n") result_hdr.write("DataOrientation=MULTIPLEXED\n") result_hdr.write("NumberOfChannels=%d\n" % len(channel_names)) result_hdr.write("SamplingInterval=%d\n\n" % (1000000/sf)) result_hdr.write("[Binary Infos]\n") result_hdr.write("BinaryFormat=INT_16\n\n") result_hdr.write("[Channel Infos]\n") # TODO: Add Resolutions to time_series # 0 = 0.1 [micro]V, # 1 = 0.5 [micro]V, # 2 = 10 [micro]V, # 3 = 152.6 [micro]V (seems to be unused!) resolutions_str = [unicode("0.1,%sV" % unicode(u"\u03BC")), unicode("0.5,%sV" % unicode(u"\u03BC")), unicode("10,%sV" % unicode(u"\u03BC")), unicode("152.6,%sV" % unicode(u"\u03BC"))] for i in range(len(channel_names)): result_hdr.write(unicode("Ch%d=%s,,%s\n" % (i+1,channel_names[i], unicode(resolutions_str[0]))).encode('utf-8')) else: NotImplementedError("Using unavailable storage format:%s!" % s_format) result_file.close() self.update_meta_data({ "channel_names": copy.deepcopy(time_series[0][0].channel_names), "sampling_frequency": time_series[0][0].sampling_frequency }) #Store meta data BaseDataset.store_meta_data(result_dir, self.meta_data)