def from_data(data, header=None): """build from data""" if not isinstance(data, str): raise TypeError('needs a sting as input!') at = 0 # begin to build package grp_idx, kind, nc, tf = unpack('<HcHH', data[at:at + 7]) data_lst = [grp_idx, kind, nc, tf] tf_nc = tf * nc at += 7 xcoors = sp.frombuffer(data[at:at + (nc * nc * (tf * 2 - 1) * 4)], dtype=sp.float32) xcoors.shape = (nc * nc, 2 * tf - 1) data_lst.append(xcoors) at += nc * nc * (tf * 2 - 1) * 4 cov = sp.frombuffer(data[at:at + (tf_nc * tf_nc * 4)], dtype=sp.float32) cov.shape = (tf_nc, tf_nc) data_lst.append(cov) return BS3CoveDataBlock(data_lst, header=header)
def svm_read_problem(self, data_file_name, return_scipy=False): """ svm_read_problem(data_file_name, return_scipy=False) -> [y, x], y: list, x: list of dictionary svm_read_problem(data_file_name, return_scipy=True) -> [y, x], y: ndarray, x: csr_matrix Read LIBSVM-format data from data_file_name and return labels y and data instances x. """ if scipy != None and return_scipy: prob_y = array('d') prob_x = array('d') row_ptr = array('l', [0]) col_idx = array('l') else: prob_y = [] prob_x = [] row_ptr = [0] col_idx = [] indx_start = 1 for i, line in enumerate(open(data_file_name)): line = line.split(None, 1) # In case an instance with all zero features if len(line) == 1: line += [''] label, features = line prob_y.append(float(label)) if scipy != None and return_scipy: nz = 0 for e in features.split(): ind, val = e.split(":") if ind == '0': indx_start = 0 val = float(val) if val != 0: col_idx.append(int(ind) - indx_start) prob_x.append(val) nz += 1 row_ptr.append(row_ptr[-1] + nz) else: xi = {} for e in features.split(): ind, val = e.split(":") xi[int(ind)] = float(val) prob_x += [xi] if scipy != None and return_scipy: prob_y = pd.DataFrame(scipy.frombuffer(prob_y, dtype='d')) prob_x = scipy.frombuffer(prob_x, dtype='d') col_idx = scipy.frombuffer(col_idx, dtype='l') row_ptr = scipy.frombuffer(row_ptr, dtype='l') prob_x = pd.DataFrame(sparse.csr_matrix((prob_x, col_idx, row_ptr)).todense()) return (prob_y, prob_x)
def svm_read_problem(data_file_name, return_scipy=False): """ svm_read_problem(data_file_name, return_scipy=False) -> [y, x], y: list, x: list of dictionary svm_read_problem(data_file_name, return_scipy=True) -> [y, x], y: ndarray, x: csr_matrix Read LIBSVM-format data from data_file_name and return labels y and data instances x. """ if scipy != None and return_scipy: prob_y = array('d') prob_x = array('d') row_ptr = array('l', [0]) col_idx = array('l') else: prob_y = [] prob_x = [] row_ptr = [0] col_idx = [] indx_start = 1 for i, line in enumerate(open(data_file_name)): line = line.split(None, 1) # In case an instance with all zero features if len(line) == 1: line += [''] label, features = line prob_y.append(float(label)) if scipy != None and return_scipy: nz = 0 for e in features.split(): ind, val = e.split(":") if ind == '0': indx_start = 0 val = float(val) if val != 0: col_idx.append(int(ind)-indx_start) prob_x.append(val) nz += 1 row_ptr.append(row_ptr[-1]+nz) else: xi = {} for e in features.split(): ind, val = e.split(":") xi[int(ind)] = float(val) prob_x += [xi] if scipy != None and return_scipy: prob_y = scipy.frombuffer(prob_y, dtype='d') prob_x = scipy.frombuffer(prob_x, dtype='d') col_idx = scipy.frombuffer(col_idx, dtype='l') row_ptr = scipy.frombuffer(row_ptr, dtype='l') prob_x = sparse.csr_matrix((prob_x, col_idx, row_ptr)) return (prob_y, prob_x)
def play(self): self.set_wave() self.set_stream() self.data = self.wf.readframes(self.chunk) self.changePlaySpeed(self.current_speed) while len(self.data) > 0: self.stream.write(self.data) self.data = self.wf.readframes(self.chunk) self.data = frombuffer(self.data, dtype="int16") if self.task is None: pass elif self.task[0] == "pause": self.task = None self.pause() elif self.task[0] == "stop": self.task = None self.stop() elif self.task[0] == "change_speed": print(f"curren_speed changed to: {self.task[1]}") self.current_speed = self.task[1] self.task = None self.changePlaySpeed(self.current_speed) print("play done") self.isstart = False
def read_libsvm_format( file_path: str) -> 'tuple[list[list[int]], sparse.csr_matrix]': """Read multi-label LIBSVM-format data. Args: file_path (str): Path to file. Returns: tuple[list[list[int]], sparse.csr_matrix]: A tuple of labels and features. """ def as_ints(str): return [int(s) for s in str.split(',')] prob_y = [] prob_x = array('d') row_ptr = array('l', [0]) col_idx = array('l') for i, line in enumerate(open(file_path)): line = line.split(None, 1) # In case an instance with all zero features if len(line) == 1: line += [''] label, features = line prob_y.append(as_ints(label)) nz = 0 for e in features.split(): ind, val = e.split(':') val = float(val) if val != 0: col_idx.append(int(ind) - 1) prob_x.append(val) nz += 1 row_ptr.append(row_ptr[-1] + nz) prob_x = scipy.frombuffer(prob_x, dtype='d') col_idx = scipy.frombuffer(col_idx, dtype='l') row_ptr = scipy.frombuffer(row_ptr, dtype='l') prob_x = sparse.csr_matrix((prob_x, col_idx, row_ptr)) return (prob_y, prob_x)
def wavread(filename, normalize=False, dtype=float64): win = wave.open(filename, 'r') length = win.getnframes() * win.getsampwidth() * win.getnchannels() nframes = win.getnframes() x = frombuffer(win.readframes(length), dtype='c').view(typeinfo[win.getsampwidth()]).reshape(nframes, win.getnchannels()).astype(dtype) win.close() if normalize: x /= abs(x).max() else: x = x / (2 ** (win.getsampwidth() * 8 - 1)) # print "==============================================12341234===" return x, win.getframerate(), win.getsampwidth() * 8
def from_data(data, header=None): """build from data""" if not isinstance(data, str): raise TypeError('needs a sting as input!') at = 0 # groups group_lst = [] ngroup, = unpack('<H', data[at:at + 2]) at += 2 if ngroup > 0: for _ in xrange(ngroup): grp_idx, nc, tf, cl = unpack('<HHHH', data[at:at + 8]) at += 8 tf_nc = tf * nc cov = sp.frombuffer(data[at:at + tf_nc * tf_nc * 4], dtype=sp.float32) at += tf_nc * tf_nc * 4 cov.shape = (tf_nc, tf_nc) nunit, = unpack('<I', data[at:at + 4]) at += 4 unit_lst = [] if nunit > 0: for _ in xrange(nunit): filt = sp.frombuffer( data[at:at + tf_nc * 4], dtype=sp.float32 ).reshape(tf, nc).T at += tf_nc * 4 temp = sp.frombuffer( data[at:at + tf_nc * 4], dtype=sp.float32 ).reshape(tf, nc).T at += tf_nc * 4 snr, active, u1, u2 = unpack('<fBHH', data[at:at + 9]) at += 9 unit_lst.append((filt, temp, snr, active, u1, u2)) group_lst.append((grp_idx, nc, tf, cl, cov, unit_lst)) return BS3SortSetupBlock(group_lst, header=header)
def wavread_old(fname, dtype='float'): """ Read wave file Parameters: fname: string input filename Returns: outData: ndarray wave data of fname fs: float sampling rate of fname bit: int bit depth of fname """ wp = wave.open(fname, 'rb') fs = wp.getframerate() bit = wp.getsampwidth() * 8 ch = wp.getnchannels() length = wp.getnframes() if dtype == 'float': scaling = 32768.0 elif dtype == 'int': scaling = 1 else: scaling = 32768.0 str_data = wp.readframes(wp.getnframes()) if bit == 16: data = sp.frombuffer(str_data, 'int16') / scaling # 24bit wavはどうすればいい? elif bit == 24: #data = sp.fromstring(strData, sp.int32) # 24bit wavはどうすればいい? data = sp.zeros(wp.getnframes()) if (ch == 2): #lData = sp.int32(data[::2]) #rData = sp.int32(data[1::2]) l_data = data[::2] r_data = data[1::2] out_data = sp.array([l_data, r_data]) else: #lData = sp.int32(data) l_data = data out_data = sp.array(l_data) return out_data, fs, bit
def fill_image(self, i, l, use_binarization=True, equalize=False): text = l[-1] if len(text) > self.word_size: return None gray = int(l[2]) box = [int(l[3]), int(l[4]), int(l[5]), int(l[6])] word_id = l[0].split("-") line_id="-".join(word_id[:3]) line_box = self.lines[line_id] word_dy = box[1]-line_box[1] if word_dy < 0: return None scale = self.scale*random.uniform(0.9, 1.1) new_height = floor(box[3]*scale) if (new_height>self.line_height): return None new_width = floor(box[2]*scale) if new_width < 2 or new_height < 2: return None f_name = self.word_file(l) data = self.word_images[l[0]] data = sp.frombuffer(data, sp.uint8) try: img = cv2.imdecode(data, 0) if equalize: img = cv2.equalizeHist(img,gray) elif use_binarization: _, img = cv2.threshold(img,gray,255,cv2.THRESH_BINARY) img=cv2.resize(img, (new_width, new_height), cv2.INTER_LANCZOS4) word_dy = 128 - floor(word_dy*self.scale) - new_height + floor(random.uniform(-self.rand_y, self.rand_y)) dx = self.dx + floor(random.uniform(0, self.rand_x)) max_x = dx+new_width if max_x > self.line_width: new_width -= max_x - self.line_width self.tmp[i, word_dy:word_dy+new_height, 0+dx:dx+new_width]=img except: return None w = l[-1] W = self.encode_word(w) return W
def separate_wav(self, cut_interval, shift_time): # frames = int(self.s_info.channels * self.s_info.f_rate) frames = self.s_info.f_rate shift_frames = int(frames * shift_time) chunk_size = int(frames * cut_interval) sample = frombuffer(self.s_info.data, dtype=int16) sample_len = len(sample) def calc_begin_end(offset: int) -> Tuple[int, int]: begin = offset * shift_frames end = begin + chunk_size return begin, end delimiter = ((sample_len - frames) // shift_frames) + 1 for i in range(delimiter): begin, end = calc_begin_end(offset=i) yield sample[begin:end]
def _get_echan(self, idx): """yields an event channel as ndarray :type idx: int :param idx: channel id. """ # checks if idx not in self.echan_header: raise IndexError('no data for this channel: %s' % idx) # get data self.fp.seek(self.echan_header[idx].data_offset, 0) byte_data = self.fp.read(self.echan_header[idx].n_sample * 4) # return if len(byte_data) == 0: return sp.array([], dtype=sp.int32) return sp.frombuffer(byte_data, dtype=sp.int32)
def _get_achan(self, idx): """yields an analog channel as ndarray :type idx: int :param idx: channel id """ # checks if idx >= self.max_achan: raise IndexError('not a valid channel: %s' % idx) if idx not in self.achan_header: raise IndexError('no data for this channel: %s' % idx) # get data self.fp.seek(self.achan_header[idx].data_offset) byte_data = self.fp.read(self.achan_header[idx].n_sample * 2) # return if len(byte_data) == 0: return sp.array([], dtype=sp.int16) return sp.frombuffer(byte_data, dtype=sp.int16)
def from_data(data, header=None): """build from data""" if not isinstance(data, str): raise TypeError('needs a sting as input!') at = 0 # events event_lst = [] nevent, = unpack('<I', data[at:at + 4]) at += 4 if nevent > 0: for _ in xrange(nevent): gid, uid, tv, nc, ns = unpack('<HIQHH', data[at:at + 18]) at += 18 wf = sp.frombuffer( data[at:at + ns * nc * 2], dtype=sp.int16 ).reshape(nc, ns).T at += ns * nc * 2 event_lst.append((gid, uid, tv, nc, ns, wf)) return BS3WaveDataBlock(event_lst, header=header)
def read_mhd(mhd_path): """ reads the data from an mhd file and returns a np array. The data type to read is specified from the tifffile module by the tiff reading capabilities. Based on code from the pirt library. see: https://bitbucket.org/almarklein/pirt""" # Load description from mhd file mhd = open(mhd_path, 'r').read() # Get data filename and load raw data raw_path = re.findall('ElementDataFile = (.+)', mhd)[0] # if the path in the mhd is not an absolute path, make it one if raw_path[0] != '/': raw_path = os.path.join(os.path.dirname(mhd_path), os.path.basename(raw_path)) # get dimensions dimensions = sp.int16(re.findall('DimSize = (.+)', mhd)[0].split()) # get correct datatype mapping mhd_dtype = re.findall('ElementType = (.+)', mhd)[0] dtype = get_np_dtype(mhd_dtype) # read and reshape fh = open(raw_path, 'rb') data = sp.frombuffer(fh.read(), dtype=dtype) if len(dimensions) == 2: data_reshape = sp.reshape(data, (dimensions[1], dimensions[0])) data_reshape = data_reshape.T if len(dimensions) == 3: # FIXME sys.exit() data_reshape = sp.reshape( data, (dimensions[2], dimensions[1], dimensions[0])) return data_reshape
def split_mp3_to_wav_segment(filename, sampling_rate, mp3_length, num_segment, index): wavf = str(filename) + '.wav' wr = wave.open(wavf, 'r') ch = wr.getnchannels() width = wr.getsampwidth() fr = wr.getframerate() fn = wr.getnframes() total_time = 1.0 * fn / fr t = mp3_length / num_segment frames = int(ch * fr * t) data = wr.readframes(wr.getnframes()) wr.close() X = frombuffer(data, dtype=int16) if total_time < mp3_length: print('mp3_length must be shorter than total_time') for j in range(num_segment): split = df.split[index] clip_id = df.clip_id[index] outf = 'wav_segment/' + split + '_' + str(clip_id) + '_' + str( j + 1) + '.wav' x = math.floor(sampling_rate * (total_time - mp3_length) / 2) start_cut = j * frames + x end_cut = j * frames + frames + x Y = X[start_cut:end_cut] outd = struct.pack("h" * len(Y), *Y) ww = wave.open(outf, 'w') ww.setnchannels(ch) ww.setsampwidth(width) ww.setframerate(fr) ww.writeframes(outd) ww.close()
def read_mhd(mhd_path): """ reads the data from an mhd file and returns a np array. The data type to read is specified from the tifffile module by the tiff reading capabilities. Based on code from the pirt library. see: https://bitbucket.org/almarklein/pirt""" # Load description from mhd file mhd = open(mhd_path,'r').read() # Get data filename and load raw data raw_path = re.findall('ElementDataFile = (.+)',mhd)[0] # if the path in the mhd is not an absolute path, make it one if raw_path[0] != '/': raw_path = os.path.join(os.path.dirname(mhd_path),os.path.basename(raw_path)) # get dimensions dimensions = sp.int16(re.findall('DimSize = (.+)',mhd)[0].split()) # get correct datatype mapping mhd_dtype = re.findall('ElementType = (.+)',mhd)[0] dtype = get_np_dtype(mhd_dtype) # read and reshape fh = open(raw_path, 'rb') data = sp.frombuffer(fh.read(),dtype = dtype) if len(dimensions) == 2: data_reshape = sp.reshape(data,(dimensions[1],dimensions[0])) data_reshape = data_reshape.T if len(dimensions) == 3: # FIXME sys.exit() data_reshape = sp.reshape(data,(dimensions[2],dimensions[1],dimensions[0])) return data_reshape
('S1_phi', None), ('S1_eta', None), ('S1_theta', None), ) print "... going to fetch data from tree" ##### copy the data to memory. This is super fast, excellent!! data_buf = [None] * len(data_format) tIn.SetEstimate(nevents + 1) for idx, df in enumerate(data_format): print '... reading data in', df[0] tIn.Draw(df[0], "", "goff", nevents, 0) temp = tIn.GetV1() data_buf[idx] = copy.deepcopy( scipy.frombuffer(buffer=temp, dtype='double', count=nevents)) data = np.asarray(data_buf) dataframe = pd.DataFrame(data, index=[x[0] for x in data_format]) pd.set_option('expand_frame_repr', False) pd.options.display.max_rows = 10 print "... data moved to memory" # print data # print dataframe ### transpose, so that row ==> evt index, column ==> property dataframe = dataframe.transpose() print "... dataset transposed" print dataframe
else: ## first determine average for each ##### copy the data to memory. This is super fast, excellent!! nevents = tIn.GetEntries() tIn.SetEstimate(nevents + 1) avgs = [0, 0, 0, 0] for idx in range(4): print '... making averages', idx + 1 thecut = cut.format("S%i" % (idx + 1)) + " && has_S%i" % (idx + 1) print thecut tIn.Draw(expr.format("S%i") % (idx + 1), thecut, "goff", nevents, 0) temp = tIn.GetV1() nselected = tIn.GetEntries(thecut) print nselected data_buf = copy.deepcopy( scipy.frombuffer(buffer=temp, dtype='double', count=nselected)) data = np.asarray(data_buf) # print data # print min(data), max(data) avgs[idx] = np.average(data) print " >> avg ", avgs[idx] h_S1 = make_histogram(tIn, expr.format("S1"), cut.format("S1") + " && has_S1", hbounds, 'hscat_S1', xshift=-1. * avgs[0]) ## to be centred at 0 h_S2 = make_histogram(tIn, expr.format("S2"), cut.format("S2") + " && has_S2",
# check if the word satisfies all thresholds if wordConfi > confidence_threshold and ( wordT1 - wordT0) / wordCount > length_threshold: confidences[wordPy] = wordConfi timestamps[wordPy] = [wordT0, wordT1] # extract basic information of the wave file audio = wave.open(wav_file, 'r') ch = audio.getnchannels() width = audio.getsampwidth() fr = audio.getframerate() fn = audio.getnframes() data = audio.readframes(fn) audioContent = frombuffer(data, dtype=int16) # split the wave file if not os.path.exists('./output'): os.makedirs('./output') for name in timestamps: segment = audioContent[int(timestamps[name][0] * fr * ch):int(timestamps[name][1] * fr * ch)] outd = struct.pack("h" * len(segment), *segment) ww = wave.open('./output/' + name + '.wav', 'w') ww.setnchannels(ch) ww.setsampwidth(width) ww.setframerate(fr) ww.writeframes(outd) ww.close()
# ('genjet_eta' , 'double'), # ('genjet_phi' , 'double'), # ('genjet_mass' , 'double'), ) print "... going to fetch data from tree" ##### copy the data to memory. This is super fast, excellent!! data_buf = [None] * len(data_format) tIn.SetEstimate(nevents + 1) for idx, df in enumerate(data_format): print '... reading data in', df[0] tIn.Draw(df[0], "", "goff", nevents, 0) temp = tIn.GetV1() data_buf[idx] = copy.deepcopy( scipy.frombuffer(buffer=temp, dtype=df[1], count=nevents)) print "... done" data = np.asarray(data_buf) df = pd.DataFrame(data, index=[x[0] for x in data_format]) df = df.transpose() ## graphics options for display pd.set_option('expand_frame_repr', False) pd.options.display.max_rows = 10 # print df # convert jet flav column to integer type df['jet_flav'] = df['jet_flav'].astype(int) # use abs eta of jet
def cut_wav(filename, time): wav_file = f'{filename}.wav' wr = wave.open(wav_file, 'r') ch = wr.getnchannels() width = wr.getsampwidth() fr = wr.getframerate() fn = wr.getnframes() total_time = 1.0 * fn / fr integer = math.floor(total_time) # t = int(time) t = time # フレームに応じて図の横幅が変わるため、一定値にする frames = int(ch * fr * t) num_cut = int(integer // t) print(f'channle: {ch}') print(f'sample width: {width}') print(f'frame rate: {fr}') print(f'frame num: {fn}') print(f'params: {wr.getparams()}') print(f'total time: {total_time}') print(f'total time(int): {integer}') print(f'time: {t}') print(f'frames: {frames}') print(f'number of cut: {num_cut}') data = wr.readframes(wr.getnframes()) wr.close() X = frombuffer(data, dtype=int16) out_file_path = f'{export_dir}/{filename}' if not os.path.exists(out_file_path): os.makedirs(out_file_path) end_condition = frames * num_cut filename = os.path.basename(filename) frames = 10000 # 横幅を合わせるための for i in range(num_cut): print(i) out_file = f'{out_file_path}/{filename}_{i}.wav' start_cut = i * frames end_cut = (i + 10) * frames # end_cut = (i + 10) * frames + frames # print(start_cut, end_cut) # print() if end_cut > end_condition: print(' over frames') return Y = X[start_cut:end_cut] out_date = struct.pack('h' * len(Y), *Y) # output with wave.open(out_file, 'w') as ww: ww.setnchannels(ch) ww.setsampwidth(width) ww.setframerate(fr) ww.writeframes(out_date)
OnePlotAxesX.spines['top'].set_visible(False) OnePlotAxesY.spines['right'].set_visible(False) OnePlotAxes.tick_params(axis='y', direction='out') OnePlotAxes.set_xlabel(var1label) OnePlotAxes.set_ylabel(var2label) N = mytree.GetEntries() corrhist = mytree.Draw(vartreeexp+">>htemp111%s(%d,%f,%f,%d,%f,%f)"% (MCSample,var1limits[2],var1limits[0],var1limits[1],var2limits[2],var2limits[0],var2limits[1]), selection="%f"%(MCScales[MCSample]), create_hist=True, options="goff") # mytree.GetSelectedRows() y = mytree.GetV1() y = copy.deepcopy(scipy.frombuffer(buffer=y,dtype='double',count=1000000 )) x = mytree.GetV2() x = copy.deepcopy(scipy.frombuffer(buffer=x,dtype='double',count=1000000 )) w = mytree.GetW() w = copy.deepcopy(scipy.frombuffer(buffer=w,dtype='double',count=1000000 )) # print w[:100] # print w[-100:] # plt.show() # code.interact(local=locals()) # Let's add on other samples that are related for (tmpMCSample,tmpNorm, tmpLabel, tmpFirstInCombinedSample) in MCSamples[jSample+1:]: if tmpFirstInCombinedSample:
def numpy_from_array(v): # return scipy.frombuffer(v.get_obj()) return scipy.frombuffer(v.get_obj())
wavfile = "./futta-dream.wav" # WAVファイルを開く wr = wave.open(wavfile, "rb") # WAVファイルの情報を表示(別にいらん) print("Channel num : ", wr.getnchannels()) print("Sample size : ", wr.getsampwidth()) print("Sampling rate : ", wr.getframerate()) print("Frame num : ", wr.getnframes()) print("Prams : ", wr.getparams()) print("Sec : ", float(wr.getnframes()) / wr.getframerate()) # データの読み込み data = wr.readframes(wr.getnframes()) # 文字型から数値型に num_data = frombuffer(data, dtype=int16) if (wr.getnchannels() == 2): # 左チャンネル left = num_data[::2] # 右チャンネル right = num_data[1::2] wr.close() plt.plot(left) plt.show()