def get_data(**kwargs): ''' Returns data from audio tracks ''' if os.path.exists(DATAFILE_MAT): Data = GroupXData.LoadFromFile(DATAFILE_MAT) else: obs = [] doc_range = [0] count = 0 with h5py.File('../tracks.h5', 'r') as tracks: for track, grp in ProgressBar(tracks.items()): if 'gfccs' not in grp: continue data = grp['gfccs'] count += data.shape[0] doc_range.append(count) obs.append(data.value.astype(np.float64)) X = np.vstack(obs) Data = GroupXData(X=X, doc_range=doc_range) Data.save_to_mat(DATAFILE_MAT) Data.name = 'AudioCorpus' Data.summary = 'Audio Corpus. obs=10.5M docs=559' return Data
def tracks_to_assignments(track_file=os.path.join(BASE_DIR, 'segmented.h5'), token_file=os.path.join(BASE_DIR, 'vocab', 'tokens.h5')): progress = ProgressBar() anns = get_anns() with h5py.File(track_file, 'r') as f: # check an examplar to make sure features_N is up to date ex = f.values()[0] for data_type in ex: features = ex[data_type].shape[1] if data_type == TIMBRE_GROUP: features -= 1 assert features == FEATURES_N[data_type] with h5py.File(token_file) as g: for track in progress(f): grp = f[track] if set(grp.keys()) != valid_data_types: # or track in g: continue out_grp = g.create_group(track) for t, tree in anns.iteritems(): values = grp[t].value if t == TIMBRE_GROUP: values = values[:, 1:] assigned = [tree.nn(x) for x in values] out_grp.create_dataset(t, data=np.asarray(assigned))
def add_rhythm(track_file='../tracks.h5'): progress = ProgressBar() with h5py.File(track_file, 'r') as f: keys = f.keys() for track in progress(keys): with h5py.File(track_file) as f: grp = f[track] if TIMBRE_GROUP in grp and RHYTHM_GROUP not in grp: bccs = beat_dcts_from_mfccs(grp[TIMBRE_GROUP]) grp.create_dataset(RHYTHM_GROUP, data=bccs, compression=9)
def iterate(self, maxiter, prefunc=lambda s: s, quiet=False): self.maxiter = maxiter self.generate_initial() prefunc(self) try: pb = ProgressBar(range(maxiter), quiet=quiet >= 2, title='PLSA EM', key='plsa_em') for self.itnum in pb: start_time = time() self.iteration() end_time = time() progress_items = [ ('iteration', self.itnum), ('time', end_time - start_time), ] + [(k, v) for q in self.quantities for k, v in q.items(self)] self.progress.append(progress_items) pb.set_extra_text('; '.join([ '%s: %s' % (k.capitalize(), v) for k, v in progress_items[2:] if np.isscalar(v) ])) if not quiet: print '; '.join([ '%s: %s' % (k.capitalize(), v) for k, v in progress_items ]) if any( sc.is_stop(self, dict(progress_items)) for sc in self.stop_conditions): break except KeyboardInterrupt: print '<Interrupted>'
def fill_heatmaps(dfs, numparams, nr, nc, statfunc='mean', showbar=False, verbose=False, rowID_key=ROWID_KEY, colID_key=COLUMNID_KEY): """ Create dictionary containing heatmaps (dataframes) for all measured parameters 1) Determine how many wells actually contain data 2) Loop over all wells 3) Extract only data from current well from dataframe and calc statistics 4) Save the results in a dictionary containing entries for all wells 5) Fill the arrays with the values for the measured parameters from the well dictionary 6) Create a dictionary that contains heatmaps (dataframes) for all measure parameters :param dfs - input data frame :param numparams - number of measured parameters except the object number :param nr - number of rows of well plate --> 96 plate = 8 :param nc - number of rows of well plate --> 96 plate = 12 :param statfunc - choice which statistics should be calculated :param verbose - if True more output will be shown :return: hm_dict - dictionary containing one dataframe for all measured parameters plus one entry for the heatmap containing the object numbers :return: welldata_dict - dictionary containing entries for every well analyzed with the values calculated by the statistical function """ welldata_dict = {} heatmap_dict = {} # get all wells containing some data wellID_key = WELLID_KEY #'ImageSceneContainerName::Image Scene Container Name ' print('---------------------------------------------------') print('wellID_key : ', wellID_key) print('Found keys:') print(dfs.keys()) print('---------------------------------------------------') wells_real = dfs[wellID_key].value_counts() df_stats = pd.DataFrame(index=range(len(wells_real)), columns=dfs.columns) df_stats.drop(df_stats.columns[[3, 4]], axis=1, inplace=True) new_cols = df_stats.columns # create an additional columns of the object numbers df_obj = pd.DataFrame(index=range(len(wells_real)), columns=['ObjectNumbers']) if showbar == True: # initialize the progress bar pb1 = ProgressBar(len(wells_real), title='Processing Wells') elif showbar == False: pb1 = iter(range(len(wells_real))) # iterate over all wells that were detected and do the statistics for well in pb1: # extract current dataframe for all existing wells current_wellid = wells_real.keys()[well] if verbose: print("Found data for wells : ", current_wellid) # get all data for the current well from the over dataframe df_tmp = get_well_all_parameters(dfs, current_wellid) # fill in the wellID, rowID and colID into the new dataframe for the statistics df_stats.iloc[well][new_cols[0]] = current_wellid df_stats.iloc[well][new_cols[1]] = df_tmp.iloc[0][new_cols[1]] df_stats.iloc[well][new_cols[2]] = df_tmp.iloc[0][new_cols[2]] colnames = df_tmp.columns[list(range(5, 5 + numparams))] if statfunc == 'mean': stats_out = df_tmp.mean(axis=0)[colnames] for col in colnames: df_stats.iloc[well][col] = stats_out[col] elif statfunc == 'median': stats_out = df_tmp.median(axis=0)[colnames] for col in colnames: df_stats.iloc[well][col] = stats_out[col] elif statfunc == 'min': stats_out = df_tmp.min(axis=0)[colnames] for col in colnames: df_stats.iloc[well][col] = stats_out[col] elif statfunc == 'max': stats_out = df_tmp.max(axis=0)[colnames] for col in colnames: df_stats.iloc[well][col] = stats_out[col] # get number of entries and add them to stats data frame numobj_current_wellID = df_tmp.shape[0] # find the row index for the current wellID ... tmprow = df_stats[wellID_key].values.tolist().index(current_wellid) # ... and use the index to add the object number to the dataframe for the numbers df_obj['ObjectNumbers'][tmprow] = numobj_current_wellID # join the data frame with object numbers to df_stats df_stats = pd.concat([df_stats, df_obj], axis=1) # create welldata_dict for well in range(len(wells_real)): wellid = df_stats[wellID_key][well] # adding data to welldata_dict using the wellid) welldata_dict[wellid] = df_stats.iloc[well] for hm in range(3, df_stats.shape[1]): # create heatmap based on the platetype heatmap_array = np.full([nr, nc], np.nan) heatmap_name = df_stats.columns[int(hm)] # cycle to df_stats based on the columns nam and transfer data to heatmap for v in range(0, df_stats.shape[0]): rowindex = df_stats[rowID_key].iloc[v] colindex = df_stats[colID_key].iloc[v] hm_value = df_stats[heatmap_name].iloc[v] heatmap_array[int(rowindex) - 1, int(colindex) - 1] = hm_value # convert array to heatmap_dataframe heatmap_dict[heatmap_name] = convert_array_to_heatmap( heatmap_array, nr, nc) return heatmap_dict, welldata_dict
from ipy_progressbar.terminal_bar import ProgressBarTerminal from ipy_progressbar import ProgressBar from time import sleep from random import random # both should work: pb = ProgressBarTerminal(5) for i in pb: sleep(0.5 * random()) pb = ProgressBar(5) for i in pb: sleep(0.5 * random()) # output throttling for i in ProgressBar(10000): sleep(0.0005 * random()) # nested pb = ProgressBar(5, title='Outer') pb_inner = ProgressBar(5, title='Inner') for i in pb: for j in pb_inner: sleep(0.5 * random())
def analyze_tracks(track_dir, track_file='../tracks.h5'): files = [ x for x in os.listdir(track_dir) if x.endswith('.mp3') or x.endswith('.wav') ] w = Windowing(type='hann', size=WINDOW_SIZE) spectrum = Spectrum() options = { 'sampleRate': SAMPLE_RATE, 'numberBands': BANDS, 'numberCoefficients': COEFS, 'lowFrequencyBound': LOW_FREQ, 'highFrequencyBound': HIGH_FREQ } gfcc = GFCC(**options) fcc_name = 'lowlevel.gfcc' framecutter = FrameCutter(frameSize=WINDOW_SIZE, hopSize=HOP_SIZE) peaks = SpectralPeaks(sampleRate=SAMPLE_RATE) hpcp = HPCP(sampleRate=SAMPLE_RATE) pool = essentia.Pool() framecutter.frame >> w.frame >> spectrum.frame spectrum.spectrum >> peaks.spectrum peaks.magnitudes >> hpcp.magnitudes peaks.frequencies >> hpcp.frequencies spectrum.spectrum >> gfcc.spectrum gfcc.bands >> None hpcp.hpcp >> (pool, 'lowlevel.hpcp') gfcc.gfcc >> (pool, fcc_name) loader = MonoLoader() loader.audio >> framecutter.signal for filename in ProgressBar(files): with h5py.File(track_file) as f: if filename not in f: try: track_path = os.path.join(track_dir, filename) loader.configure(filename=track_path, sampleRate=SAMPLE_RATE) essentia.reset(loader) essentia.run(loader) grp = f.create_group(filename) grp.create_dataset(TIMBRE_GROUP, data=pool[fcc_name], compression=9) grp.create_dataset(CHROMA_GROUP, data=pool['lowlevel.hpcp'], compression=9) bccs = beat_dcts_from_mfccs(pool[fcc_name]) grp.create_dataset(RHYTHM_GROUP, data=bccs, compression=9) pool.clear() except (SystemExit, KeyboardInterrupt): break except Exception as e: logging.error(e)
def fill_heatmaps(dfs, numparams, nr, nc, statfunc='mean', showbar=False, verbose=True): """ Create dictionary containing heatmaps (dataframes) for all measured parameters 1) Determine how many wells actually contain data 2) Loop over all wells 3) Extract only data fro current well from dataframe and calc statistics 4) Save the results in a dictionary containing entries for all wells 5) Fill the arrays with the values for the measured parameters from the well dictionary 6) Create a dictionary that contains heatmaps (dataframes) for all measure parameters :param dfs - input data frame :param numparams - number of measured parameters except the object number :param nr - number of rows of well plate --> 96 plate = 8 :param nc - number of rows of well plate --> 96 plate = 12 :param statfunc - choice which statistics should be calculated :param verbose - if True more output will be shown :return: hm_dict - dictionary containing one dataframe for all measured parameters plus one entry for the heatmap containing the object numbers :return: welldata_dict - dictionary containing entries for every well analyzed with the values calculated by the statistical function """ # create list containing as many empty arrays as parameters were measured hm_list_array = create_heatmap_list_arrays(numparams, nr, nc) welldata_dict = {} hm_dict = {} # get all wells containing some data wells_real = dfs['WellID'].value_counts() if showbar == True: # initialize the progress bar pb = ProgressBar(len(wells_real), title='Processing Wells') elif showbar == False: pb = iter(range(len(wells_real))) # iterate over all wells that were detected for i in pb: #for i in range(len(wells_real)): # extract current dataframe for all existing wells current_wellid = wells_real.keys()[i] if verbose: print("Found data for wells : ", current_wellid) # get all data for the current well from the over dataframe df_tmp = get_well_all_parameters(dfs, current_wellid) # calculate the mean, median, min or max values if statfunc == 'mean': welldata_dict[current_wellid] = df_tmp.mean() elif statfunc == 'median': welldata_dict[current_wellid] = df_tmp.median() elif statfunc == 'min': welldata_dict[current_wellid] = df_tmp.min() elif statfunc == 'max': welldata_dict[current_wellid] = df_tmp.max() # fill heatmap dictionary for p in range(0, numparams + 1): # the 1st heatmap reserved for the object numbers if p == 0: num_objects_current_well = df_tmp.shape[0] row = np.int(welldata_dict[current_wellid]['RowID'] - 1) col = np.int(welldata_dict[current_wellid]['ColumnID'] - 1) # update array inside heatmap list with object numbers hm_list_array[p][row, col] = num_objects_current_well # create entry in dictionary containing the single wells welldata_dict[current_wellid][ 'ObjectNumber'] = num_objects_current_well # converts array to pandas dataframe and transfer it to the dictionary hm_dict['ObjectNumber'] = convert_array_to_heatmap( hm_list_array[p], nr, nc) # cycle through all measured parameters beside the object number elif p > 0: row = np.int(welldata_dict[current_wellid]['RowID'] - 1) col = np.int(welldata_dict[current_wellid]['ColumnID'] - 1) # index 0-4 must be skipped but p is already >=1 # WellID RowID ColumnID ID Index #curr_key = welldata_dict[current_wellid].keys()[p+numparams-1 - 1] curr_key = welldata_dict[current_wellid].keys()[p + 3] #print curr_key hm_list_array[p][row, col] = welldata_dict[current_wellid][curr_key] # converts array to pandas dataframe and transfer it to the dictionary hm_dict[curr_key] = convert_array_to_heatmap( hm_list_array[p], nr, nc) return hm_dict, welldata_dict
def CSM(Sobject, Cn,chains = None, Rotfunc=Rot,perm_opt = False): """Calculates the continuous symmetry measure as defined by Zabrodsky et al. (Continuous Symmetry Measures JAmChemSoc 1992, 114, 7843-7851) for the calculation of the optimal rotation axes the analytical solution was used from with Pinsky et al. (Analytical Methods for Calculating Continuous Symmetry Measures and the Chirality JComputChem 29: 2712-2721, 2008) Parameters ---------- Sobject: Model or SelObj of the ngmx class. In the current implementation this has to be a single frame! Cn: (int) Cn is the symmetry group. Currently only Cn symmetry is supported. chains: Can specify the chains of a protein by giving a list of start and end atomindices of the chains. This is only usefull if the sorting of the protein is not in a single direction (clockwise or anti-clockwise but is sorted differently). For more complex sortings see Rotfunc option. Rotfunc: If for the rotation it is not enough to cycle the protein but to have a more complex symmetry a user defined rotation function can be implemented. Returns ------- asymmetry_measure: (float) A value between 0. and 1. where 1. means perfect symmetry according to Cn and 0. no symmetry. symm_struct: (SelObj) Symmetric structure corresponding to trajectory """ Smean = Sobject[:] # Create the structure which will be used # to calculate the symmetric structure. B = Sobject[:Cn] Sn = [] if pbb: # Use progessbar to show the progress pb = ProgressBar(Sobject.n_frames, title='Progress') pb.start() atNCn = Sobject.n_atoms / Cn if chains == None: chains = [] for i in range(Cn): chains.append([atNCn * i,atNCn * (i+1)]) # Do the optimization if activated: if perm_opt: # dict of all ambiguous residues with the matching residues ambiguous_atoms = {"VAL":[["CG1","CG2"],["CG2","CG1"]], "TYR": [["CD1","CD2","CE1","CE2"],["CD2","CD1","CE2","CE1"]], "ASP": [["OD1","OD2"],["OD2","OD1"]], "GLU": [["OE1","OE2"],["OE2","OE1"]]} table, bonds = Sobject.topology.to_dataframe() # find all residues which are of the type defined in keys keys = ambiguous_atoms.keys() ambiguous_residues = [] for key in keys: ambiguous_residues.append(list(unique(table[table.resName == key].resSeq))) ambiguous_residues = [item for sublist in ambiguous_residues for item in sublist] # Create the lists which atoms have to be exchanged from the original sorting # for any of the permutations. from_id_list = [] to_id_list = [] indices_list = [] perms = create_permutations(Cn) for perm in perms: indices_list.append([i for i, x in enumerate(perm) if x == '1']) # Get the list of chains (indices) which chains have to be relabeled: for indices in indices_list: from_id = [] to_id = [] # And find the atoms to be relabeled for any of the chains: for chain in indices: for sel_res in ambiguous_residues: #First identify the residues: dfs = table[(table.resSeq == sel_res) & (table.chainID == chain)] # find the id of the atoms we need to permute from for at in ambiguous_atoms[unique(dfs.resName)[0]][0]: from_id.append( dfs[dfs.name == at].index.tolist()[0] ) # and to permute to for at in ambiguous_atoms[unique(dfs.resName)[0]][1]: to_id.append( dfs[dfs.name == at].index.tolist()[0] ) from_id_list.append(from_id) to_id_list.append(to_id) # END OPTIMIZATION for t in range(Sobject.n_frames): # Center COG at (0,0,0) Sobject.xyz[t] = Sobject[t].xyz - Sobject[t].xyz.mean(axis=1) # rotate to have all possible rotations # and relabel them for rot in range(Cn): B.xyz[rot] = Sobject.xyz[t] B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains) rot_axis = calc_rot_axis(B,Cn) if rot_axis is None: continue # We rotate around the axis and average over the rotations # to get the closest symmetric structure. for rot in range(1,Cn): # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True) # Do the rotation using this matrix B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind') # And average over it Smean.xyz[t] = B.xyz.mean(axis=0) # optimize if activated: if perm_opt: rmsd_list = [] B_opt = B[0] for i, sel_res in enumerate(ambiguous_residues): indlist = table[(table.resSeq == sel_res)].index.tolist() rmsd_list.append(sum((Smean.atom_slice(indlist).xyz[t] - B.atom_slice(indlist).xyz[0])**2)) for from_id,to_id in zip(from_id_list,to_id_list): # Do the permutation chains_to_perm = [] # reset B for rot in range(Cn): B.xyz[rot] = Sobject.xyz[t] B.xyz[rot][to_id] = B[rot].xyz[0][from_id] # And do the rotation again B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains) for rot in range(1,Cn): # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True) # Do the rotation using this matrix B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind') # Calculate the new Mean Smean.xyz[t] = B.xyz.mean(axis=0) # Calculate the distance for all relevant residues for i, sel_res in enumerate(ambiguous_residues): indlist = table[(table.resSeq == sel_res)].index.tolist() rmsd = sum((Smean.xyz[t][indlist] - B.xyz[0][indlist])**2) # Compare new distance value to old one # and than replace the residue in Smean # and in the optimized B if rmsd_list[i] > rmsd: rmsd_list[i] = rmsd B_opt.xyz[0][indlist] = B[0].xyz[0][indlist] # calculate the new optimal rotation axis for rot in range(Cn): B.xyz[rot] = B_opt.xyz[0] B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains) rot_axis = calc_rot_axis(B,Cn) #if rot_axis is None: # continue # calculate the S value from the optimal value # We rotate around the axis and average over the rotations # to get the closest symmetric structure. for rot in range(1,Cn): # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True) # Do the rotation using this matrix B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind') # And average over it Smean.xyz[t] = B.xyz.mean(axis=0) # END OPT # d_sq: Square of root mean square size of the object. # Used to calculate the correct normalization of the CSM. d_sq = sum(B.xyz[0] ** 2) # CSM: Is the distance of the original object to the closest # symmetric object (Smean) with a normalization. # S = [t, 100./d_sq * sum((B.xyz[0] - Smean.xyz[t]) ** 2)] # The CSM can also be calculated based on the individual subunits. # By this the individual contributions to the asymmetry can be detected. for ch in chains: S.append(100./d_sq * sum((B.xyz[0,ch[0]:ch[1]] - Smean.xyz[t,ch[0]:ch[1]]) ** 2)) Sn.append(S) # If ProgressBar works: advance it. if pbb: pb.advance() if pbb: pb.finish() return array(Sn),Smean
from ipy_progressbar.terminal_bar import ProgressBarTerminal from ipy_progressbar import ProgressBar from time import sleep from random import random # both should work: pb = ProgressBarTerminal(5, title='Outer', key='outer') for i in pb: pb_inner = ProgressBarTerminal(5, title='Inner', key='inner') for j in pb_inner: sleep(0.5 * random()) # pb.set_extra_text('inner: %d' % j) pb = ProgressBar(5, title='Outer', key='outer') for i in pb: pb_inner = ProgressBar(5, title='Inner', key='inner') for j in pb_inner: sleep(0.5 * random()) # pb.set_extra_text('inner: %d' % j)