Exemple #1
0
def get_data(**kwargs):
    ''' Returns data from audio tracks
    '''

    if os.path.exists(DATAFILE_MAT):
        Data = GroupXData.LoadFromFile(DATAFILE_MAT)
    else:
        obs = []
        doc_range = [0]
        count = 0
        with h5py.File('../tracks.h5', 'r') as tracks:
            for track, grp in ProgressBar(tracks.items()):
                if 'gfccs' not in grp:
                    continue
                data = grp['gfccs']
                count += data.shape[0]
                doc_range.append(count)
                obs.append(data.value.astype(np.float64))
        X = np.vstack(obs)
        Data = GroupXData(X=X, doc_range=doc_range)
        Data.save_to_mat(DATAFILE_MAT)
    Data.name = 'AudioCorpus'
    Data.summary = 'Audio Corpus. obs=10.5M docs=559'

    return Data
Exemple #2
0
def tracks_to_assignments(track_file=os.path.join(BASE_DIR, 'segmented.h5'),
                          token_file=os.path.join(BASE_DIR, 'vocab',
                                                  'tokens.h5')):
    progress = ProgressBar()
    anns = get_anns()

    with h5py.File(track_file, 'r') as f:
        # check an examplar to make sure features_N is up to date
        ex = f.values()[0]
        for data_type in ex:
            features = ex[data_type].shape[1]
            if data_type == TIMBRE_GROUP:
                features -= 1
            assert features == FEATURES_N[data_type]
        with h5py.File(token_file) as g:
            for track in progress(f):
                grp = f[track]
                if set(grp.keys()) != valid_data_types:  # or track in g:
                    continue
                out_grp = g.create_group(track)
                for t, tree in anns.iteritems():
                    values = grp[t].value
                    if t == TIMBRE_GROUP:
                        values = values[:, 1:]
                    assigned = [tree.nn(x) for x in values]
                    out_grp.create_dataset(t, data=np.asarray(assigned))
Exemple #3
0
def add_rhythm(track_file='../tracks.h5'):
    progress = ProgressBar()
    with h5py.File(track_file, 'r') as f:
        keys = f.keys()
    for track in progress(keys):
        with h5py.File(track_file) as f:
            grp = f[track]
            if TIMBRE_GROUP in grp and RHYTHM_GROUP not in grp:
                bccs = beat_dcts_from_mfccs(grp[TIMBRE_GROUP])
                grp.create_dataset(RHYTHM_GROUP, data=bccs, compression=9)
Exemple #4
0
    def iterate(self, maxiter, prefunc=lambda s: s, quiet=False):
        self.maxiter = maxiter
        self.generate_initial()
        prefunc(self)
        try:
            pb = ProgressBar(range(maxiter),
                             quiet=quiet >= 2,
                             title='PLSA EM',
                             key='plsa_em')

            for self.itnum in pb:
                start_time = time()
                self.iteration()
                end_time = time()

                progress_items = [
                    ('iteration', self.itnum),
                    ('time', end_time - start_time),
                ] + [(k, v) for q in self.quantities for k, v in q.items(self)]
                self.progress.append(progress_items)

                pb.set_extra_text('; '.join([
                    '%s: %s' % (k.capitalize(), v)
                    for k, v in progress_items[2:] if np.isscalar(v)
                ]))
                if not quiet:
                    print '; '.join([
                        '%s: %s' % (k.capitalize(), v)
                        for k, v in progress_items
                    ])

                if any(
                        sc.is_stop(self, dict(progress_items))
                        for sc in self.stop_conditions):
                    break
        except KeyboardInterrupt:
            print '<Interrupted>'
Exemple #5
0
def fill_heatmaps(dfs,
                  numparams,
                  nr,
                  nc,
                  statfunc='mean',
                  showbar=False,
                  verbose=False,
                  rowID_key=ROWID_KEY,
                  colID_key=COLUMNID_KEY):
    """
    Create dictionary containing heatmaps (dataframes) for all measured parameters

    1) Determine how many wells actually contain data
    2) Loop over all wells
    3) Extract only data from current well from dataframe and calc statistics
    4) Save the results in a dictionary containing entries for all wells
    5) Fill the arrays with the values for the measured parameters from the well dictionary
    6) Create a dictionary that contains heatmaps (dataframes) for all measure parameters

    :param dfs - input data frame
    :param numparams - number of measured parameters except the object number
    :param nr - number of rows of well plate --> 96 plate = 8
    :param nc - number of rows of well plate --> 96 plate = 12
    :param statfunc - choice which statistics should be calculated
    :param verbose - if True more output will be shown
    :return: hm_dict - dictionary containing one dataframe for all measured parameters
    plus one entry for the heatmap containing the object numbers
    :return: welldata_dict - dictionary containing entries for every well analyzed
    with the values calculated by the statistical function
    """

    welldata_dict = {}
    heatmap_dict = {}

    # get all wells containing some data
    wellID_key = WELLID_KEY  #'ImageSceneContainerName::Image Scene Container Name '
    print('---------------------------------------------------')
    print('wellID_key : ', wellID_key)
    print('Found keys:')
    print(dfs.keys())
    print('---------------------------------------------------')
    wells_real = dfs[wellID_key].value_counts()

    df_stats = pd.DataFrame(index=range(len(wells_real)), columns=dfs.columns)
    df_stats.drop(df_stats.columns[[3, 4]], axis=1, inplace=True)
    new_cols = df_stats.columns

    # create an additional columns of the object numbers
    df_obj = pd.DataFrame(index=range(len(wells_real)),
                          columns=['ObjectNumbers'])

    if showbar == True:
        # initialize the progress bar
        pb1 = ProgressBar(len(wells_real), title='Processing Wells')
    elif showbar == False:
        pb1 = iter(range(len(wells_real)))

    # iterate over all wells that were detected and do the statistics
    for well in pb1:

        # extract current dataframe for all existing wells
        current_wellid = wells_real.keys()[well]

        if verbose:
            print("Found data for wells : ", current_wellid)

        # get all data for the current well from the over dataframe
        df_tmp = get_well_all_parameters(dfs, current_wellid)

        # fill in the wellID, rowID and colID into the new dataframe for the statistics
        df_stats.iloc[well][new_cols[0]] = current_wellid
        df_stats.iloc[well][new_cols[1]] = df_tmp.iloc[0][new_cols[1]]
        df_stats.iloc[well][new_cols[2]] = df_tmp.iloc[0][new_cols[2]]

        colnames = df_tmp.columns[list(range(5, 5 + numparams))]
        if statfunc == 'mean':
            stats_out = df_tmp.mean(axis=0)[colnames]
            for col in colnames:
                df_stats.iloc[well][col] = stats_out[col]
        elif statfunc == 'median':
            stats_out = df_tmp.median(axis=0)[colnames]
            for col in colnames:
                df_stats.iloc[well][col] = stats_out[col]
        elif statfunc == 'min':
            stats_out = df_tmp.min(axis=0)[colnames]
            for col in colnames:
                df_stats.iloc[well][col] = stats_out[col]
        elif statfunc == 'max':
            stats_out = df_tmp.max(axis=0)[colnames]
            for col in colnames:
                df_stats.iloc[well][col] = stats_out[col]

        # get number of entries and add them to stats data frame
        numobj_current_wellID = df_tmp.shape[0]
        # find the row index for the current wellID ...
        tmprow = df_stats[wellID_key].values.tolist().index(current_wellid)
        # ... and use the index to add the object number to the dataframe for the numbers
        df_obj['ObjectNumbers'][tmprow] = numobj_current_wellID

    # join the data frame with object numbers to df_stats
    df_stats = pd.concat([df_stats, df_obj], axis=1)

    # create welldata_dict
    for well in range(len(wells_real)):

        wellid = df_stats[wellID_key][well]
        # adding data to welldata_dict using the wellid)
        welldata_dict[wellid] = df_stats.iloc[well]

    for hm in range(3, df_stats.shape[1]):

        # create heatmap based on the platetype
        heatmap_array = np.full([nr, nc], np.nan)
        heatmap_name = df_stats.columns[int(hm)]

        # cycle to df_stats based on the columns nam and transfer data to heatmap
        for v in range(0, df_stats.shape[0]):

            rowindex = df_stats[rowID_key].iloc[v]
            colindex = df_stats[colID_key].iloc[v]
            hm_value = df_stats[heatmap_name].iloc[v]
            heatmap_array[int(rowindex) - 1, int(colindex) - 1] = hm_value

            # convert array to heatmap_dataframe
            heatmap_dict[heatmap_name] = convert_array_to_heatmap(
                heatmap_array, nr, nc)

    return heatmap_dict, welldata_dict
Exemple #6
0
from ipy_progressbar.terminal_bar import ProgressBarTerminal
from ipy_progressbar import ProgressBar
from time import sleep
from random import random

# both should work:
pb = ProgressBarTerminal(5)
for i in pb:
    sleep(0.5 * random())

pb = ProgressBar(5)
for i in pb:
    sleep(0.5 * random())

# output throttling
for i in ProgressBar(10000):
    sleep(0.0005 * random())

# nested
pb = ProgressBar(5, title='Outer')
pb_inner = ProgressBar(5, title='Inner')
for i in pb:
    for j in pb_inner:
        sleep(0.5 * random())
Exemple #7
0
def analyze_tracks(track_dir, track_file='../tracks.h5'):
    files = [
        x for x in os.listdir(track_dir)
        if x.endswith('.mp3') or x.endswith('.wav')
    ]

    w = Windowing(type='hann', size=WINDOW_SIZE)
    spectrum = Spectrum()
    options = {
        'sampleRate': SAMPLE_RATE,
        'numberBands': BANDS,
        'numberCoefficients': COEFS,
        'lowFrequencyBound': LOW_FREQ,
        'highFrequencyBound': HIGH_FREQ
    }
    gfcc = GFCC(**options)
    fcc_name = 'lowlevel.gfcc'

    framecutter = FrameCutter(frameSize=WINDOW_SIZE, hopSize=HOP_SIZE)
    peaks = SpectralPeaks(sampleRate=SAMPLE_RATE)
    hpcp = HPCP(sampleRate=SAMPLE_RATE)
    pool = essentia.Pool()

    framecutter.frame >> w.frame >> spectrum.frame
    spectrum.spectrum >> peaks.spectrum
    peaks.magnitudes >> hpcp.magnitudes
    peaks.frequencies >> hpcp.frequencies

    spectrum.spectrum >> gfcc.spectrum
    gfcc.bands >> None

    hpcp.hpcp >> (pool, 'lowlevel.hpcp')
    gfcc.gfcc >> (pool, fcc_name)
    loader = MonoLoader()

    loader.audio >> framecutter.signal

    for filename in ProgressBar(files):
        with h5py.File(track_file) as f:
            if filename not in f:
                try:
                    track_path = os.path.join(track_dir, filename)
                    loader.configure(filename=track_path,
                                     sampleRate=SAMPLE_RATE)

                    essentia.reset(loader)
                    essentia.run(loader)

                    grp = f.create_group(filename)
                    grp.create_dataset(TIMBRE_GROUP,
                                       data=pool[fcc_name],
                                       compression=9)
                    grp.create_dataset(CHROMA_GROUP,
                                       data=pool['lowlevel.hpcp'],
                                       compression=9)

                    bccs = beat_dcts_from_mfccs(pool[fcc_name])
                    grp.create_dataset(RHYTHM_GROUP, data=bccs, compression=9)

                    pool.clear()
                except (SystemExit, KeyboardInterrupt):
                    break
                except Exception as e:
                    logging.error(e)
Exemple #8
0
def fill_heatmaps(dfs,
                  numparams,
                  nr,
                  nc,
                  statfunc='mean',
                  showbar=False,
                  verbose=True):
    """
    Create dictionary containing heatmaps (dataframes) for all measured parameters

    1) Determine how many wells actually contain data
    2) Loop over all wells
    3) Extract only data fro current well from dataframe and calc statistics
    4) Save the results in a dictionary containing entries for all wells
    5) Fill the arrays with the values for the measured parameters from the well dictionary
    6) Create a dictionary that contains heatmaps (dataframes) for all measure parameters

    :param dfs - input data frame
    :param numparams - number of measured parameters except the object number
    :param nr - number of rows of well plate --> 96 plate = 8
    :param nc - number of rows of well plate --> 96 plate = 12
    :param statfunc - choice which statistics should be calculated
    :param verbose - if True more output will be shown
    :return: hm_dict - dictionary containing one dataframe for all measured parameters
    plus one entry for the heatmap containing the object numbers
    :return: welldata_dict - dictionary containing entries for every well analyzed
    with the values calculated by the statistical function
    """

    # create list containing as many empty arrays as parameters were measured
    hm_list_array = create_heatmap_list_arrays(numparams, nr, nc)
    welldata_dict = {}
    hm_dict = {}

    # get all wells containing some data
    wells_real = dfs['WellID'].value_counts()

    if showbar == True:
        # initialize the progress bar
        pb = ProgressBar(len(wells_real), title='Processing Wells')
    elif showbar == False:
        pb = iter(range(len(wells_real)))

    # iterate over all wells that were detected
    for i in pb:
        #for i in range(len(wells_real)):

        # extract current dataframe for all existing wells
        current_wellid = wells_real.keys()[i]
        if verbose:
            print("Found data for wells : ", current_wellid)

        # get all data for the current well from the over dataframe
        df_tmp = get_well_all_parameters(dfs, current_wellid)

        # calculate the mean, median, min or max values
        if statfunc == 'mean':
            welldata_dict[current_wellid] = df_tmp.mean()
        elif statfunc == 'median':
            welldata_dict[current_wellid] = df_tmp.median()
        elif statfunc == 'min':
            welldata_dict[current_wellid] = df_tmp.min()
        elif statfunc == 'max':
            welldata_dict[current_wellid] = df_tmp.max()

        # fill heatmap dictionary
        for p in range(0, numparams + 1):

            # the 1st heatmap reserved for the object numbers
            if p == 0:

                num_objects_current_well = df_tmp.shape[0]
                row = np.int(welldata_dict[current_wellid]['RowID'] - 1)
                col = np.int(welldata_dict[current_wellid]['ColumnID'] - 1)
                # update array inside heatmap list with object numbers
                hm_list_array[p][row, col] = num_objects_current_well
                # create entry in dictionary containing the single wells
                welldata_dict[current_wellid][
                    'ObjectNumber'] = num_objects_current_well
                # converts array to pandas dataframe and transfer it to the dictionary
                hm_dict['ObjectNumber'] = convert_array_to_heatmap(
                    hm_list_array[p], nr, nc)

            # cycle through all measured parameters beside the object number
            elif p > 0:

                row = np.int(welldata_dict[current_wellid]['RowID'] - 1)
                col = np.int(welldata_dict[current_wellid]['ColumnID'] - 1)
                # index 0-4 must be skipped but p is already >=1
                # WellID	RowID	ColumnID	ID	Index
                #curr_key = welldata_dict[current_wellid].keys()[p+numparams-1 - 1]
                curr_key = welldata_dict[current_wellid].keys()[p + 3]
                #print curr_key
                hm_list_array[p][row,
                                 col] = welldata_dict[current_wellid][curr_key]
                # converts array to pandas dataframe and transfer it to the dictionary
                hm_dict[curr_key] = convert_array_to_heatmap(
                    hm_list_array[p], nr, nc)

    return hm_dict, welldata_dict
Exemple #9
0
def CSM(Sobject, Cn,chains = None, Rotfunc=Rot,perm_opt = False):
    """Calculates the continuous symmetry measure as
    defined by Zabrodsky et al. (Continuous Symmetry
    Measures JAmChemSoc 1992, 114, 7843-7851) for the
    calculation of the optimal rotation axes the 
    analytical solution was used from with Pinsky et al. 
    (Analytical Methods for Calculating Continuous
    Symmetry Measures and the Chirality 
    JComputChem 29: 2712-2721, 2008)
    
    Parameters
    ----------
    Sobject:
        Model or SelObj of the ngmx class. In the 
        current implementation this has to be a single 
        frame!
    Cn: (int)
        Cn is the symmetry group. Currently only Cn 
        symmetry is supported.
    chains:
        Can specify the chains of a protein by giving
        a list of start and end atomindices of the 
        chains. This is only usefull if the sorting 
        of the protein is not in a single direction 
        (clockwise or anti-clockwise but is sorted 
        differently). For more complex sortings see 
        Rotfunc option.
    Rotfunc:
        If for the rotation it is not enough to cycle
        the protein but to have a more complex symmetry
        a user defined rotation function can be 
        implemented.

    Returns
    -------
    asymmetry_measure: (float)
        A value between 0. and 1. where 1. means perfect 
        symmetry according to Cn and 0. no symmetry.
    symm_struct: (SelObj)
        Symmetric structure corresponding to trajectory
    """
    
    Smean = Sobject[:]
    # Create the structure which will be used
    # to calculate the symmetric structure.
    B = Sobject[:Cn]
        
    Sn = []
    
    if pbb:
        # Use progessbar to show the progress
        pb = ProgressBar(Sobject.n_frames, title='Progress')
        pb.start()
        
    atNCn = Sobject.n_atoms / Cn
    if chains == None:
        chains = []
        for i in range(Cn):
            chains.append([atNCn * i,atNCn * (i+1)]) 
    # Do the optimization if activated:
    if perm_opt:       
        # dict of all ambiguous residues with the matching residues
        ambiguous_atoms = {"VAL":[["CG1","CG2"],["CG2","CG1"]],
                          "TYR": [["CD1","CD2","CE1","CE2"],["CD2","CD1","CE2","CE1"]],
                          "ASP": [["OD1","OD2"],["OD2","OD1"]],
                          "GLU": [["OE1","OE2"],["OE2","OE1"]]}

        table, bonds = Sobject.topology.to_dataframe()
        # find all residues which are of the type defined in keys
        keys = ambiguous_atoms.keys()
        ambiguous_residues = []
        for key in keys:
                ambiguous_residues.append(list(unique(table[table.resName == key].resSeq)))
        ambiguous_residues = [item for sublist in ambiguous_residues for item in sublist]


        # Create the lists which atoms have to be exchanged from the original sorting 
        # for any of the permutations.
        from_id_list = []
        to_id_list = []
        indices_list = []

        perms = create_permutations(Cn)
        for perm in perms:
            indices_list.append([i for i, x in enumerate(perm) if x == '1'])
        # Get the list of chains (indices) which chains have to be relabeled:
        for indices in indices_list:
            from_id = []
            to_id = []

            # And find the atoms to be relabeled for any of the chains:
            for chain in indices:
                for sel_res in ambiguous_residues:

                    #First identify the residues:
                    dfs = table[(table.resSeq == sel_res) & (table.chainID == chain)]
                    # find the id of the atoms we need to permute from
                    for at in ambiguous_atoms[unique(dfs.resName)[0]][0]:
                        from_id.append( dfs[dfs.name == at].index.tolist()[0] )

                    # and to permute to
                    for at in ambiguous_atoms[unique(dfs.resName)[0]][1]:
                        to_id.append( dfs[dfs.name == at].index.tolist()[0] )
                        
            from_id_list.append(from_id)
            to_id_list.append(to_id)
    

    # END OPTIMIZATION
    for t in range(Sobject.n_frames):

        # Center COG at (0,0,0)
        Sobject.xyz[t] = Sobject[t].xyz - Sobject[t].xyz.mean(axis=1)

        # rotate to have all possible rotations
        # and relabel them
        for rot in range(Cn):
            B.xyz[rot] = Sobject.xyz[t]
            B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains)
            

        rot_axis = calc_rot_axis(B,Cn)
        
        if rot_axis is None:
            continue
        
        # We rotate around the axis and average over the rotations
        # to get the closest symmetric structure.
        for rot in range(1,Cn):
            # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn
            RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True)
            # Do the rotation using this matrix
            B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind')
        # And average over it
        Smean.xyz[t] = B.xyz.mean(axis=0)
        
        
        # optimize if activated:
        if perm_opt:
            rmsd_list = []
            B_opt = B[0]

            for i, sel_res in enumerate(ambiguous_residues):
                indlist = table[(table.resSeq == sel_res)].index.tolist()
                rmsd_list.append(sum((Smean.atom_slice(indlist).xyz[t] - B.atom_slice(indlist).xyz[0])**2))
                
            for from_id,to_id in zip(from_id_list,to_id_list):
                # Do the permutation
                chains_to_perm = []
                
                # reset B
                for rot in range(Cn):
                    B.xyz[rot] = Sobject.xyz[t]
                    B.xyz[rot][to_id] = B[rot].xyz[0][from_id]
                    # And do the rotation again
                    B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains)
                    
                for rot in range(1,Cn):
                    # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn
                    RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True)
                    # Do the rotation using this matrix
                    B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind')
                    
                # Calculate the new Mean
                Smean.xyz[t] = B.xyz.mean(axis=0)
                
                # Calculate the distance for all relevant residues
                for i, sel_res in enumerate(ambiguous_residues):
                    indlist = table[(table.resSeq == sel_res)].index.tolist()
                    rmsd = sum((Smean.xyz[t][indlist] - B.xyz[0][indlist])**2)
                
                    # Compare new distance value to old one
                    # and than replace the residue in Smean
                    # and in the optimized B
                    if rmsd_list[i] > rmsd:
                        rmsd_list[i] = rmsd
                        B_opt.xyz[0][indlist] = B[0].xyz[0][indlist]

                        
            # calculate the new optimal rotation axis
            for rot in range(Cn):
                B.xyz[rot] = B_opt.xyz[0]
                B.xyz[rot] = Rotfunc(rot,B.xyz[rot],chains)


            rot_axis = calc_rot_axis(B,Cn)

            #if rot_axis is None:
            #    continue
                
            # calculate the S value from the optimal value
            # We rotate around the axis and average over the rotations
            # to get the closest symmetric structure.
            for rot in range(1,Cn):
                # Calculate rotation matrix around rot_axis by an angle of rot*360/Cn
                RMat = RMatrix_Axis_Angle([rot_axis],[ rot * 360./Cn ],deg=True)
                # Do the rotation using this matrix
                B.xyz[rot] = einsum("tnc,tcp->tnp",B[rot].xyz,RMat,casting='same_kind')
            # And average over it
            Smean.xyz[t] = B.xyz.mean(axis=0)
                    
        
        # END OPT
        
        # d_sq: Square of root mean square size of the object.
        #       Used to calculate the correct normalization of the CSM.
        d_sq = sum(B.xyz[0] ** 2)
        
        # CSM: Is the distance of the original object to the closest
        #      symmetric object (Smean) with a normalization.
        #      
        S = [t, 100./d_sq * sum((B.xyz[0] - Smean.xyz[t]) ** 2)]
        
        # The CSM can also be calculated based on the individual subunits.
        # By this the individual contributions to the asymmetry can be detected.
        for ch in chains:
            S.append(100./d_sq * sum((B.xyz[0,ch[0]:ch[1]] - Smean.xyz[t,ch[0]:ch[1]]) ** 2))
        Sn.append(S)
            


        # If ProgressBar works: advance it.
        if pbb:
            pb.advance()
    if pbb:
        pb.finish()
    return array(Sn),Smean
Exemple #10
0
from ipy_progressbar.terminal_bar import ProgressBarTerminal
from ipy_progressbar import ProgressBar
from time import sleep
from random import random

# both should work:

pb = ProgressBarTerminal(5, title='Outer', key='outer')
for i in pb:
    pb_inner = ProgressBarTerminal(5, title='Inner', key='inner')
    for j in pb_inner:
        sleep(0.5 * random())
        # pb.set_extra_text('inner: %d' % j)

pb = ProgressBar(5, title='Outer', key='outer')
for i in pb:
    pb_inner = ProgressBar(5, title='Inner', key='inner')
    for j in pb_inner:
        sleep(0.5 * random())
        # pb.set_extra_text('inner: %d' % j)