Ejemplo n.º 1
0
def merge_detected_peaks(peak_files, out_file, raw_data_dir, f_type, tol,
            min_pw, min_mz):
    """ Merge peaks detected by different algorithms in the same sample

    Args:
        peak_files (list of str): Absolute or relative path to classified peaks
            detected by the different algorithms.
        out_file (str): Absolute or relative path to the prospective output file.
        raw_data_dir (str): Absolute or relative path to the data directory
            containing all samples.
        f_type (str): File type of the data samples.
        tol (int|float): RT window within which peaks are merged.
        min_pw (int|float): Minimum peak width necessary for considering a 
            detected peak a real peak.
        min_mz (int): Minimum number of detected m/z values necessary for
            considering a detected peak a real peak.

    """

    for peak_file in peak_files:
        algorithm = utils.split_path(peak_file)[1]
        new_peaks = DetectedXCMSPeaks(peak_file, algorithm, min_pw, min_mz, tol)
        try:
            peaks.join(new_peaks, tol)
        except NameError:
            peaks = new_peaks
    
    file_path_split = utils.split_path(peak_files[0])
    sample_name = '{}.{}'.format(file_path_split[-1].split('__')[0], f_type)
    sample_path = os.path.join(raw_data_dir, file_path_split[-2], sample_name)
    
    peaks.add_spectra(load_sampledata(sample_path))
    peaks.save_data(out_file)
    peaks.save_dropped(out_file.replace('merged', 'dropped'))
Ejemplo n.º 2
0
def parse_args():    
    global DEBUG
    parser = ap.ArgumentParser()
    parser.add_argument("filepath", help="path to input audio file")
    parser.add_argument("trans_id", help="id in the database")
    parser.add_argument("-i", "--indir", help="directory containing input audio file, if filepath does not contain path")
    parser.add_argument("-d", "--debug", help="enble debugging output", type=bool)
    parser.add_argument("-s", "--sumpath", help="path to summary file. Can either be file or dir. If dir, summary file stored in dir/basefilename.txt")
    parser.add_argument("-f", "--segdir", help="directory to store temporary audio seg/frag-ment files")
    args = parser.parse_args()
    
    cfg_params = ut.get_cfg_params()
    
    filepath=""
    sumpath=""
    segdir=""
    
    #Get the input file path
    #FIX: Optimization, if os.path.isfile(args.filepath): filepath = args.filepath
    drct, fl = ut.split_path(args.filepath)
    if not drct:
        if args.indir and not os.path.isdir(args.indir):
            print "Invalid input directory specified"
            raise Exception
    if not fl:
        raise Exception
        print "Invalid input file specified"
    filepath = ut.dir_path( drct or args.indir or cfg_params["in_dir"] ) + fl
    
    if not os.path.exists(filepath):
        print "Input file not found. Filepath is {}".format(filepath)
        raise Exception    
    
    #Get summary file path    
    if args.sumpath:
        drct, fl = ut.split_path(args.sumpath)
        if drct and fl: sumpath = args.sumpath
        elif drct:
            sumpath = ut.dir_path(drct) + ut.base_filename(filepath)
        elif fl: 
            sumpath = ut.dir_path(cfg_params["summary_dir"]) + fl
        else:
            print "Invalid summary path specified"
            raise Exception
    else:
         sumpath = ut.dir_path(cfg_params["summary_dir"]) + ut.base_filename(filepath)
    #Append extension
    if sumpath[-5:] != ".smry" : sumpath += ".smry"
    
    #Get segment directory path           
    if args.segdir and not os.path.isdir(args.segdir):
        print "Invalid segment file directory specified"
        raise Exception
    segdir = ut.dir_path(args.segdir or cfg_params["seg_file_dir"])  
    
    #Set Debug flag
    if args.debug:
        DEBUG = args.debug 
    
    return (filepath, sumpath, segdir)
Ejemplo n.º 3
0
def aws_search(awspaths, time=(datetime(2000,1,1),datetime.now())):
    """
    Search data in AWS storage using AWS CLI

    :param awspaths: list of paths to search using AWS CLI
    :param time: time interval in datetime format to consider the data from
    :return result: metadata of the satellite data found
    """
    ls_cmd = 'aws s3 ls {} --recursive --no-sign-request'
    result = {}
    for awspath in awspaths:
        cmd = ls_cmd.format(awspath).split()
        r = aws_request(cmd)
        if r != None:
            for line in r.decode().split('\n'):
                if len(line):
                    _,_,file_size,file_name = list(map(str.strip,line.split()))
                    info = parse_filename(file_name)
                    if info['start_date'] <= time[1] and info['start_date'] >= time[0]:
                        base = split_path(awspath)[0]
                        url = osp.join('s3://',base,file_name) 
                        file_basename = osp.basename(file_name)
                        product_id = 'A{:04d}{:03d}_{:02d}{:02d}'.format(info['start_date'].year,
                                            info['start_date'].timetuple().tm_yday,
                                            info['start_date'].hour,
                                            info['start_date'].minute)
                        result.update({product_id: {'file_name': file_basename, 'file_remote_path': file_name,
                            'time_start': utc_to_utcf(info['start_date']), 
                            'time_end': utc_to_utcf(info['end_date']), 
                            'updated': datetime.now(), 'url': url, 'file_size': int(file_size), 
                            'domain': info['domain'], 'satellite': 'G{}'.format(info['satellite']), 
                            'mode': info['mode'], 
                            'product_id': product_id}})
    return result
Ejemplo n.º 4
0
 def _build_speaker_dict(self, data_path):
     speaker_dict = dict(
         enumerate([
             split_path(s)[1]
             for s in sorted(glob.glob('{}/*.wav'.format(data_path)))
         ]))
     return speaker_dict
Ejemplo n.º 5
0
def restore_all(password_file=None):
    for file_ in get_files(ATK_VAULT):
        if os.path.basename(file_) == 'encrypted':

            # Get the path without the atk vault base and encrypted filename
            original_path = os.path.join(*split_path(file_)[1:-1])
            restore(original_path, password_file)
def add_from_txt(txt_folder_name=r'..\..\papers_data'):

    file_paths = []
    listdir(txt_folder_name, file_paths, ['.txt'])
    start_time = time.time()
    res_json_text = []
    org_years = collections.defaultdict(set)  # for json name.
    for txt_file_path in tqdm(file_paths, ncols=100, desc=f'process txts'):
        folder_name, org, org_year, file_name = split_path(txt_file_path)

        name = file_name.split('.txt')[0]
        year = org_year.split('_')[1]

        json_body = {}
        json_body['Organization'] = org
        json_body['Name'] = name
        json_body['Year'] = year

        with open(txt_file_path, 'r', encoding='utf8') as f:
            json_body['Text'] = clean_text(f.read())

        json_start = '{"index":{"_id":\"' + curlmd5(json_body['Name']) + '\"}}'
        json_body = json_start + '\n' + json.dumps(json_body) + '\n'
        res_json_text.append([org, json_body])
        org_years[org].add(year)

    out_folder = 'txt2ES_json'
    if not os.path.exists(out_folder): os.mkdir(out_folder)
    for _org in org_years.keys():
        org_years[_org] = '_'.join(sorted(org_years[_org]))
        with open(f'./{out_folder}/{_org}_{org_years[_org]}.json',
                  'w',
                  encoding='utf8') as f:
            for __org, _json_body in res_json_text:
                if __org == _org: f.write(_json_body)

    end_time = time.time()
    print(f'time cost: {end_time - start_time:.2f}s.')
    return None
Ejemplo n.º 7
0
def parse_args(fpath=None, trans_id=None, idr=None, dbg=None, spath=None, sgdr=None):    
    global DEBUG
    parser = ap.ArgumentParser()
    
    if not fpath:
        parser.add_argument("filepath", help="path to input audio file")
    if not trans_id:
        parser.add_argument("trans_id", help="id in the database")
    if not idr:
        parser.add_argument("-i", "--indir", help="directory containing input audio file, if filepath does not contain path")
    if not dbg:
        parser.add_argument("-d", "--debug", help="enble debugging output", type=bool)
    if not spath:
        parser.add_argument("-s", "--sumpath", help="path to summary file. Can either be file or dir. If dir, summary file stored in dir/basefilename.txt")
    if not sgdr:    
        parser.add_argument("-f", "--segdir", help="directory to store temporary audio seg/frag-ment files")
    args = parser.parse_args()
    
    cfg_params = ut.get_cfg_params()
    
    filepath=""
    sumpath=""
    segdir=""

    directory=""

    #Get the input file path
    if fpath and os.path.isfile(fpath):    
        filepath = fpath
    else:    
        drct, fl = ut.split_path(fpath if fpath else args.filepath)
        if not fl:
            print "Invalid input file: '{}'".format(drct+"/"+fl)
            raise Exception
        if not drct:
            directory = idr or args.indir or cfg_params["in_dir"]
            if not os.path.isdir(directory):
                print "Invalid input directory '{}'".format(directory)
                raise Exception
            filepath = ut.dir_path(directory) + fl
        else:
            filepath = ut.dir_path(drct) + fl
            
        if not os.path.exists(filepath):
            print "Input file not found. Filepath is {}".format(filepath)
            raise Exception    
    
    #Get summary file path
    sumpath = spath or args.sumpath  
    if sumpath:
        drct, fl = ut.split_path(sumpath)
        directory = drct
        if drct and fl:
            pass
        elif drct:
            sumpath = ut.dir_path(drct) + ut.base_filename(filepath)
        elif fl: 
            directory=ut.dir_path(cfg_params["summary_dir"])
            sumpath = directory + fl
        else:
            #TODO: Check if this is necessary
            print "Invalid summary path {}".format(sumpath)
            raise Exception
    else:
       directory = ut.dir_path(cfg_params["summary_dir"])
       sumpath = directory + ut.base_filename(filepath)
    #Append extension
    if sumpath[-5:] != ".smry" : sumpath += ".smry"
    #If dir does not exist, create it
    if not os.path.exists(directory):
        if DEBUG: print "Creating dir {}".format(directory)
        os.makedirs(directory)
            
    #Get segment directory path           
    segdir = sgdr or args.segdir or cfg_params["seg_file_dir"]
    if not os.path.exists(segdir):
        if DEBUG: print "Creating dir {}".format(segdir)
        os.makedirs(segdir)
  
    #Set Debug flag
    if args.debug:
        DEBUG = args.debug 
    
    return (filepath, sumpath, segdir)
                             binsize,
                             winlen,
                             dither=dither,
                             n_surr=n_surr,
                             min_spikes=min_spikes,
                             min_occ=min_occ,
                             spectrum=spectrum)
    else:
        output = spade.spade(spikedata,
                             binsize,
                             winlen,
                             dither=dither,
                             n_surr=0,
                             min_spikes=min_spikes,
                             min_occ=min_occ,
                             spectrum=spectrum)
    # Storing data
    if rank == 0:
        results[rep] = output
if rank == 0:
    # Storing results
    path = '../results/results_data{}'.format(data_idx)
    path_temp = './'
    for folder in split_path(path):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)
    if xi == 0 and occ == 0:
        np.save(path + '/result_ind'.format(xi, occ), [results, param])
    else:
        np.save(path + '/result_xi{}_occ{}'.format(xi, occ), [results, param])
Ejemplo n.º 9
0
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Extract and patch the interface without bootloader
"""
from options import get_options
from paths import get_interface_path, TMP_DIR
from utils import gen_binary, is_lpc, split_path

from os.path import join

if __name__ == '__main__':
    options = get_options()

    in_path = get_interface_path(options.interface,
                                 options.target,
                                 bootloader=False)
    _, name, _ = split_path(in_path)
    out_path = join(TMP_DIR, name + '.bin')

    print '\nELF: %s' % in_path
    gen_binary(in_path, out_path, is_lpc(options.interface))
    print "\nBINARY: %s" % out_path
                                         norm=LogNorm(vmin=0.0001, vmax=1),
                                         cmap=plt.cm.RdPu)
    ax_max_FPFNs.plot(np.where(max_FPFNs > 0.05)[0] + 0.5,
                      np.where(max_FPFNs > 0.05)[1] + 0.5,
                      'o',
                      markerfacecolor="None",
                      markeredgecolor='k',
                      markersize=1.8)
    ax_max_FPFNs.tick_params(length=1.2)
    ax_max_FPFNs.set_xticks(np.arange(0.5, 9, 1))
    ax_max_FPFNs.set_xticklabels(range(3, 11), size=tick_size)
    ax_max_FPFNs.set_xlim([0, 8])
    ax_max_FPFNs.set_xlabel('pattern size $(z)$', size=label_size)
    ax_max_FPFNs.set_yticks(np.arange(0.5, 9, 1))
    ax_max_FPFNs.set_yticklabels(range(3, 11), size=tick_size)
    ax_max_FPFNs.set_ylim([0, 8])
    if max_FPFNs_idx == 0:
        ax_max_FPFNs.set_ylabel('$\#$ occurrences$(c)$', size=label_size)
    if max_FPFNs_idx == len(data_idxs) - 1:
        cbar = colorbar(pcol_max_FPFNs)
        # cbar = fig_FPFN.colorbar(pcol_max_FPFNs, ax=ax_max_FPFNs, fraction=.01)
        cbar.set_label('max(FP, FN)', size=label_size)
        cbar.ax.tick_params(size=0., labelsize=tick_size)

figure_path = '../figures'
path_temp = './'
for folder in split_path(figure_path):
    path_temp = path_temp + '/' + folder
    mkdirp(path_temp)
fig_FPFN.savefig(figure_path + '/FPFN_performance.eps')
                  for i in range(1, xi)] * binsize.units
        print(delays)
        # Generating patterns
        stp = generate_stp(occ=occ, xi=xi, t_stop=t_stop, delays=delays)
        stps.extend(stp)
        # Merging the background spiketrains to the patterns
        for p_idx, p in enumerate(stp):
            sts.append(
                stg._pool_two_spiketrains(background_sts[l_idx * xi + p_idx],
                                          p))
    sts.extend(background_sts[num_neu_patt:])

    data_path = '../data'
    # Create path is not already existing
    path_temp = './'
    for folder in split_path(data_path):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)
    # Storing data
    np.save(
        data_path + '/art_data.npy', {
            'patt': stps,
            'data': sts,
            'params': {
                'xi': xi,
                'occ': occ,
                'rate': rate,
                't_stop': t_stop,
                'n': n,
                'lengths': lengths,
                'binsize': binsize,
Ejemplo n.º 12
0
def evaluate_grid_search(res_files, out_file, raw_data_dir, f_type, scoring):
    """ Calculate a score for each parameter tuple based on the number of peaks
    and the classes of detected peaks.

    Args:
        res_files (list of str): Absolute or relative path to classified peaks
            detected in the optimization samples.
        out_file (str): Absolute or relative path to the prospective output file.
        raw_data_dir (str): Absolute or relative path to the data directory
            containing all samples.
        f_type (str): File type of the data samples.
        scoring (dict of str: str): Scoring values for the several classes. Key:
            class number, value: scoring value for the class.

    """

    # Load scoring
    str_to_class = {j: int(i) for i, j in valid_classes.items()}
    scoring = {str_to_class[i]: j for i, j in scoring.items()}
    # Group result files by samples
    sample_results = {}
    for res_file in res_files:
        sample = os.path.basename(res_file).split('__')[0]
        try:
            sample_results[sample].append(res_file)
        except KeyError:
            sample_results[sample] = [res_file]
    algorithm = utils.split_path(res_files[0])[1]
    # Iterate over detected peaks within one sample but detected by different
    # parameter tuples.
    for sample, sample_files in sample_results.items():
        sample_res = pd.DataFrame()
        for res_file in sample_files:     
            peaks = DetectedXCMSPeaks(res_file, algorithm, 0, 1, 0)
            score = peaks.get_score(scoring)
            idx = pd.MultiIndex.from_tuples(
                [tuple(peaks.params.values())], names=peaks.params.keys()
            )
            new_row = pd.DataFrame({sample: score}, index=idx)

            sample_res = sample_res.append(new_row, sort=False)    
        sample_res.sort_index(inplace=True)
        try:
            results = pd.concat([results, sample_res], axis=1)
        except NameError:
            results = sample_res

    # Get mean scores and highest scored parameters
    results['mean'] = results.mean(axis=1)
    par_best = results['mean'].idxmax()
    par_best_dict = {}
    with open(out_file, 'w') as f:
        for idx, par_str in enumerate(results.index.names):
            f.write('{}\t{}\n'.format(par_str, par_best[idx]))
            par_best_dict[par_str] = par_best[idx]

    # Save all results 
    overview_file = out_file.replace('00', '01') \
        .replace('best_parameters', 'all_results') 
    results.to_csv(overview_file, sep='\t')
    # Sleep 1 sec before continuing pipeline to prevent tangling
    return par_best_dict
Ejemplo n.º 13
0
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Extract and patch the interface without bootloader
"""
from options import get_options
from paths import get_interface_path, TMP_DIR
from utils import gen_binary, is_lpc, split_path

from os.path import join


if __name__ == '__main__':
    options = get_options()
    
    in_path = get_interface_path(options.interface, options.target, bootloader=False)
    _, name, _ = split_path(in_path)
    out_path = join(TMP_DIR, name + '.bin')
    
    print '\nELF: %s' % in_path
    gen_binary(in_path, out_path, is_lpc(options.interface))
    print "\nBINARY: %s" % out_path
Ejemplo n.º 14
0
from gensim import corpora, similarities

parser = argparse.ArgumentParser(description="""Génère la représentation matricielle
associée à un corpus""")
parser.add_argument('file_path', type=str, help='Le fichier .tsv(.gz) contenant le corpus')
parser.add_argument('--stopwords', type=str, help='Un fichier contenant un stopword par ligne')
parser.add_argument('-v', '--verbose', action='store_true',
                    help="Afficher les messages d'information")
parser.add_argument('--saveindex', action='store_true',
                    help="Si vrai, le script enregistre l'index de similarité pour le corpus'")
args = parser.parse_args()

if args.verbose:
    logging.basicConfig(asctime='%(s)levelname : %(format)s : %(message)s', level=logging.INFO)

input_file = utils.split_path(args.file_path)
if (os.path.isfile(input_file['name'] + '_bow.mm')):
    raise IOError("Le corpus existe déjà sous forme matricielle")

if args.stopwords:
    with open(args.stopwords) as f:
        stopwords = [line.rstrip() for line in f]
else:
    stopwords = []

corpus = habeascorpus.HabeasCorpus(input_file['path'], stopwords)
corpus.dictionary.filter_extremes(no_below=5, no_above=0.5)
corpus.dictionary.save_as_text(input_file['name'] + '_wordids.txt')
corpora.mmcorpus.MmCorpus.serialize(input_file['name'] + '_bow.mm', corpus, progress_cnt=1000)

with open(input_file['path']) as f:
Ejemplo n.º 15
0
from utils import get_file_read, update_file, get_files_in_dir, split_path, clear_path, add_and_get_file

if __name__ == '__main__':
    test = int(sys.argv[1])
    if test == 0:
        test_path = SRC_PATH + '/FS_3'
        print(get_file_read("D1/1.1", test_path))

    if test == 1:
        test_path = SRC_PATH + '/' + 'cache'
        file_name = 'D1/1.1'
        file_content = 'Hello test'
        print(update_file(file_name, test_path, file_content))

    if test == 2:
        test_path = SRC_PATH + '/' + 'FS_1/D1'
        print(get_files_in_dir(test_path))
    elif test == 3:
        path = 'D1/1.1'
        print(split_path(path)[1])

    elif test == 4:
        test_path = SRC_PATH + '/' + 'temp'
        print(clear_path(test_path))

    elif test == 5:
        test_path = SRC_PATH + '/' + 'temp'
        file_name = 'D1/1.1'
        f = add_and_get_file(file_name, test_path)
        print(f.read())
    comm = MPI.COMM_WORLD  # create MPI communicator
    rank = comm.Get_rank()  # get rank of current MPI task
    size = comm.Get_size()  # Get the total number of MPI processes
    print('Number of processes:{}'.format(size))

    # Compute spade res
    print('Running spade')
    spade_res = spade.spade(data,
                            binsize=binsize,
                            winlen=winlen,
                            n_surr=n_surr,
                            min_spikes=min_spikes,
                            max_spikes=max_spikes,
                            spectrum=spectrum,
                            alpha=1,
                            min_occ=10,
                            min_neu=10,
                            output_format='concepts',
                            psr_param=None)

    # Storing data
    res_path = '../results/{}/winlen{}'.format(sess_id, winlen)
    spectrum = '3d#'
    # Create path if not already existing
    path_temp = './'
    for folder in split_path(res_path):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)

    np.save(res_path + '/data_results.npy', [spade_res, param])
Ejemplo n.º 17
0
def plot(data_file,
         spectra,
         binsize,
         winlens,
         label_size=6,
         text_size=8,
         tick_size=5):
    """
    Function creating figure representing the difference of 2d- to 3d-SPADE
    in terms of statistical performance in the case of a data set with patterns
    of multiple durations.
    Parameters
    ----------
    data_file : list of neo.SpikeTrains
        file with the generated artificial data set
    spectra : str
        flag indicating the type of spectra employed (thus the correspondent
        SPADE version)
        if spectra == '#':
            then 2d-SPADE is called
        elif spectra == '3d#'
            then 3d-SPADE is called
    binsize : quantity
        bin size necessary for the SPADE analysis
    winlens : lis
        list of window length used for each run of SPADE
    label_size: int
        label size of the plot. Default is 6
    tick_size: int
        tick size of the plot. Default is 5
    text_size: int
        tick size of the plot. Default is 8
    """
    # Plotting parameters
    inch2cm = 0.3937

    sts = data_file['data']
    stps = data_file['patt']

    figure = plt.figure(figsize=(12 * inch2cm, 9 * inch2cm))
    figure.subplots_adjust(left=.08,
                           right=.91,
                           wspace=.8,
                           hspace=1,
                           top=.95,
                           bottom=.1)
    ax_raster = plt.subplot2grid((3, 2 * np.sum(winlens[:5])), (0, 0),
                                 rowspan=1,
                                 colspan=2 * np.sum(winlens[:5]))
    for s_idx, s in enumerate(sts[:len(stps)]):
        s_sliced = s.time_slice(s.t_start + .15 * pq.s, 0.4 * pq.s)
        ax_raster.plot(s_sliced, [s_idx] * len(s_sliced), 'k.', markersize=2)
    for p_idx, p in enumerate(stps):
        p_sliced = p.time_slice(s.t_start + .15 * pq.s, 0.4 * pq.s)
        if p_idx == len(stps) - 1:
            ax_raster.plot(p_sliced, [p_idx] * len(p_sliced),
                           'ro',
                           label='STPs',
                           fillstyle='full',
                           markeredgecolor='red',
                           markersize=1.5)
        else:
            ax_raster.plot(p_sliced, [p_idx] * len(p_sliced),
                           'ro',
                           fillstyle='full',
                           markeredgecolor='red',
                           markersize=1.5)
    ax_raster.set_xlabel('time (s)', size=label_size)
    ax_raster.set_ylabel('neuron id', size=label_size)
    ax_raster.set_ylim(-0.5, 15)
    ax_raster.tick_params(labelsize=tick_size)

    # Plotting SPADE results
    p_values_table = {}
    for spectrum_idx, spectrum in enumerate(spectra):
        p_values_table[str(spectrum)] = {}
        ax_count = plt.subplot2grid((3, 2 * np.sum(winlens[:5])),
                                    (1, spectrum_idx * np.sum(winlens[:5])),
                                    rowspan=1,
                                    colspan=np.sum(winlens[:5]))
        # Plotting count and pval spectrum of patterns for each window length
        for w_idx, w in enumerate(winlens):
            # Load filtered results
            patterns, pval_spectrum, ns_sgnt, params = np.load(
                '../results/{}/winlen{}/filtered_patterns.npy'.format(
                    spectrum, w),
                encoding='latin1')

            ax_count.bar(w_idx + 1, len(patterns), color='k', width=0.05)
            if w_idx == len(winlens) - 1:
                ax_count.plot(w_idx + 1,
                              w_idx + 1,
                              'x',
                              markersize=4,
                              label='Number of STPs',
                              color='g')
            else:
                ax_count.plot(w_idx + 1,
                              w_idx + 1,
                              'x',
                              markersize=4,
                              color='g')
            ax_count.set_yticks(np.arange(0, len(stps) + 1))
            for tick in ax_count.xaxis.get_major_ticks():
                tick.label.set_fontsize(tick_size)
            # Plotting pvalue spectra
            pval_matrix = np.zeros((4, w))
            for sgnt in pval_spectrum:
                if spectrum == '#':
                    for length in range(w):
                        pval_matrix[sgnt[1] - 2, length] = sgnt[2]
                elif spectrum == '3d#':
                    pval_matrix[sgnt[1] - 2, sgnt[2]] = sgnt[3]
            ax_pval_spectrum = plt.subplot2grid(
                (3, 2 * np.sum(winlens[:5])),
                (2, spectrum_idx * np.sum(winlens[:5]) + sum(winlens[:w_idx])),
                colspan=sum(winlens[w_idx:w_idx + 1]))
            pcol = ax_pval_spectrum.pcolor(pval_matrix,
                                           norm=LogNorm(vmin=0.0001, vmax=1),
                                           cmap=plt.cm.YlGnBu)
            if spectrum == '#':
                for duration in range(pval_matrix.shape[1]):
                    for occ in range(2, pval_matrix.shape[0] + 2):
                        if (3, occ) not in ns_sgnt:
                            ax_pval_spectrum.plot(duration + 0.5,
                                                  occ - 2 + 0.5,
                                                  'ro',
                                                  markersize=1)
            elif spectrum == '3d#':
                for duration in range(pval_matrix.shape[1]):
                    for occ in range(2, pval_matrix.shape[0] + 2):
                        if (3, occ, duration) not in ns_sgnt:
                            ax_pval_spectrum.plot(duration + 0.5,
                                                  occ - 2 + 0.5,
                                                  'ro',
                                                  markersize=1)
            x_grid = np.arange(0, pval_matrix.shape[1] + 1, 1)
            y_grid = np.arange(0, pval_matrix.shape[0] + 1, 1)
            x_ticks = np.arange(0.5, pval_matrix.shape[1], 2)
            y_ticks = np.arange(0.5, pval_matrix.shape[0])
            x, y = np.meshgrid(x_grid, y_grid)
            c = np.ones_like(x)
            ax_pval_spectrum.pcolor(x, y, c, facecolor='none', edgecolor='k')
            ax_pval_spectrum.set_xticks(x_ticks)
            ax_pval_spectrum.set_xticklabels(range(0, pval_matrix.shape[1], 2),
                                             size=tick_size)
            if w_idx == len(winlens) // 2 + 1:
                ax_pval_spectrum.set_xlabel('d (ms)', size=label_size)
            if w_idx == 0 and spectrum == '#':
                ax_pval_spectrum.set_yticks(y_ticks)
                ax_pval_spectrum.set_yticklabels(range(
                    2, pval_matrix.shape[0] + 2),
                                                 size=tick_size)
                ax_pval_spectrum.set_ylabel('c', size=label_size)
            else:
                ax_pval_spectrum.set_yticklabels(())
                ax_pval_spectrum.set_yticks(())
            if w_idx == len(winlens) - 1 and spectrum == '3d#':
                cbar = figure.colorbar(pcol,
                                       ticks=[0.0001, 0.001, 0.01, 0.1, 1],
                                       ax=ax_pval_spectrum)
                cbar.set_label('p-values', size=label_size)
                cbar.ax.tick_params(labelsize=tick_size)
                cbar.ax.minorticks_off()
            ax_pval_spectrum.set_xlim(0, pval_matrix.shape[1])
            ax_pval_spectrum.set_title(int(w * binsize.magnitude),
                                       size=tick_size)
            if w == 13:
                # print(pval_matrix[-2, :])
                pattern_length = [0, 2, 6, 8, 12]
                p_val_arr = np.take(pval_matrix[2, :], pattern_length)
                p_values_table[str(spectrum)][str(w)] = p_val_arr
                # print(p_val_arr)

        if spectrum == '#':
            ax_count.set_title('2d-SPADE', size=text_size)
            ax_count.set_ylabel('detected STPs', size=label_size)
        elif spectrum == '3d#':
            ax_count.set_title('3d-SPADE', size=text_size)
            ax_count.set_yticks(())
        ax_count.set_xticks(range(1, len(winlens) + 1))
        ax_count.set_xticklabels(winlens * int(binsize.magnitude))
        ax_count.tick_params(labelsize=tick_size)
        ax_count.set_ylim([0, len(winlens) + 1])
        ax_count.set_xlabel('w (ms)', size=label_size)
        if spectrum_idx == 0:
            ax_count.legend(loc='best', fontsize=label_size)
    figure_path = '../figures'
    path_temp = './'
    for folder in split_path(figure_path):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)
    fig_formats = ['eps', 'png']
    for format in fig_formats:
        figure.savefig(figure_path +
                       '/raster_patt_count_spectra.{}'.format(format))
    # save p_values_table (only for window = 13 and n_occ = 4)
    #np.save('../figures/pvalues_table.npy', p_values_table)
    plt.show()
Ejemplo n.º 18
0
def compute_profiling_time(key, expected_num_spikes, rate, t_stop, n,
                           winlen, binsize, num_rep=10):
    """
    Function computing the profiling time needed to run SPADE on artificial
    poisson data of given rate, recording time, and number of neurons
    Parameters
    ----------
    key: list
        list of keys of the varying variable of the profiling analysis.
        Maximum of three keys, can be either 'neurons', 'time' and
        'rate'.
    expected_num_spikes: int
        expected number of spikes of the generated spike train
    rate: quantity
        rate of the poisson process
    t_stop: quantity
        duration of the spike trains
    n: int
        number of spike trains
    winlen: int
        window length for the SPADE analysis
    binsize: quantity
        binsize for the SPADE analysis
    num_rep: int
        number of repetitions of
    """

    time_fast_fca = 0.
    time_fpgrowth = 0.
    for rep in range(num_rep):
        # Generating artificial data
        data = []
        for i in range(n):
            np.random.seed(0)
            data.append(stg.homogeneous_poisson_process(
                rate=rate, t_start=0*pq.s, t_stop=t_stop))

        # Extracting Closed Frequent Itemset with FP-Growth
        t0 = time.time()
        # Binning the data and clipping (binary matrix)
        binary_matrix = conv.BinnedSpikeTrain(data, binsize).to_bool_array()
        # Computing the context and the binary matrix encoding the relation
        # between objects (window positions) and attributes (spikes,
        # indexed with a number equal to  neuron idx*winlen+bin idx)
        context, transactions, rel_matrix = spade._build_context(binary_matrix,
                                                                 winlen)
        # Applying FP-Growth
        fim_results = [i for i in spade._fpgrowth(
                    transactions,
                    rel_matrix=rel_matrix,
                    winlen=winlen)]
        time_fpgrowth += time.time() - t0

        # Extracting Closed Frequent Itemset with Fast_fca
        t1 = time.time()
        # Binning the data and clipping (binary matrix)
        binary_matrix = conv.BinnedSpikeTrain(data, binsize).to_bool_array()
        # Computing the context and the binary matrix encoding the relation
        # between objects (window positions) and attributes (spikes,
        # indexed with a number equal to  neuron idx*winlen+bin idx)
        context, transactions, rel_matrix = \
            spade._build_context(binary_matrix, winlen)
        # Applying FP-Growth
        fim_results = spade._fast_fca(context, winlen=winlen)
        time_fast_fca += time.time() - t1

    time_profiles = {'fp_growth': time_fpgrowth/num_rep,
                     'fast_fca': time_fast_fca/num_rep}

    # Storing data
    res_path = '../results/{}/{}/'.format(key, expected_num_spikes)
    # Create path is not already existing
    path_temp = './'
    for folder in split_path(res_path):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)

    np.save(res_path + '/profiling_results.npy', {'results': time_profiles,
            'parameters': {'rate': rate, 't_stop': t_stop, 'n': n,
                           'winlen': winlen, 'binsize': binsize}})
def plot(time_fpgrowth, time_fast_fca, keys, label_size=8, tick_size=6):
    """
    Function plotting the profiling time for all SPADE components (FIM and
    PSF), comparing the run time for the fpgrowth and the fast_fca algorithm
    (both implemented in SPADE).
    Parameters
    ----------
    time_fpgrowth : dictionary
        dictionary with profiling time of the fpgrowth algorithm,
        with keys depending on the parameters varying given by 'keys'
        and of the number of estimated spikes in the dataset
    time_fast_fca : dictionary
        dictionary similar to time_fpgrowth, but obtained by the run
        of fast_fca algorithm
    keys : list
        list of keys of the varying variable of the profiling analysis.
        Maximum of three keys, can be either 'neurons', 'time' and
        'rate'. Depending on the keys array, the function produces the
        corresponding plots (1 to 3 panels)
    label_size: int
        label size of the plot. Default is 8
    tick_size: int
        tick size of the plot. Default is 6

    """
    # Plot configurations
    inch2cm = 2.540  # conversion from inches to centimeters

    max_num_colors = 10
    cm = plt.get_cmap('hsv')
    cmap = sns.color_palette("muted", max_num_colors)
    colors = {}
    for i in range(1, max_num_colors + 1):
        color = cm(1. * i // max_num_colors)
        colors[i - 1] = color
    colors = cmap
    f, ax = plt.subplots(2, 3, figsize=(18.4 / inch2cm, 9.2 / inch2cm))
    f.subplots_adjust(wspace=0.15,
                      hspace=0.05,
                      bottom=0.2,
                      left=0.07,
                      right=0.98)

    interpolate = False
    linestyle = ':'

    y1_lower_plot = np.array([])
    y2_lower_plot = np.array([])
    y2_upper_plot = np.array([])

    for key_idx, key in enumerate(keys):
        # Plotting all functions
        # Create plots by column
        axes_vertical = [ax[0][key_idx]] + [ax[1][key_idx]]
        for idx, axes in enumerate(axes_vertical):
            # Plot FP-growth
            compute_xy(time_fpgrowth[key],
                       count_spikes,
                       axes,
                       function=square,
                       label="FP-growth (C++)",
                       colors=colors[1],
                       marker="o",
                       interpolate=interpolate,
                       linestyle='-',
                       markerfacecolor='None')
            # Plot FCA
            compute_xy(time_fast_fca[key],
                       count_spikes,
                       axes,
                       function=linear,
                       label="Fast-FCA (Python)",
                       colors=colors[0],
                       marker="o",
                       interpolate=interpolate,
                       linestyle=linestyle,
                       markerfacecolor=colors[0])
            # Plot Spectra
            compute_xy(np.array(time_fpgrowth[key]) * 2000,
                       count_spikes,
                       axes,
                       function=poly4,
                       label="2d FP-growth",
                       colors=colors[9],
                       marker="o",
                       interpolate=interpolate,
                       linestyle='-',
                       markerfacecolor='None')
            compute_xy(np.array(time_fpgrowth[key]) * 2000,
                       count_spikes,
                       axes,
                       function=poly4,
                       label="3d FP-growth",
                       colors=colors[3],
                       marker="o",
                       interpolate=interpolate,
                       linestyle=linestyle,
                       markerfacecolor=colors[3])
            compute_xy(np.array(time_fast_fca[key]) * 2000,
                       count_spikes,
                       axes,
                       function=poly4,
                       label="2d Fast-FCA",
                       colors=colors[4],
                       marker="o",
                       interpolate=interpolate,
                       linestyle='-',
                       markerfacecolor='None')
            compute_xy(np.array(time_fast_fca[key]) * 2000,
                       count_spikes,
                       axes,
                       function=poly4,
                       label="3d Fast-FCA",
                       colors=colors[2],
                       marker="o",
                       interpolate=interpolate,
                       linestyle=linestyle,
                       markerfacecolor=colors[2])

        # Axes specific things
        # Ax 0

        ax[0][key_idx].set_xticks(count_spikes)
        ax[0][key_idx].set_xticklabels([], size=tick_size)
        # ax[0].set_xlabel("number of spikes", size=label_size)
        ax[0][key_idx].tick_params(axis='both', length=2., labelsize=tick_size)
        # Ax 1
        ax[1][key_idx].tick_params(axis='y', which='minor', left='off')
        ax[1][key_idx].set_xticks(count_spikes)
        ax[1][key_idx].set_xticklabels(count_spikes, size=tick_size)
        ax[1][key_idx].set_xlabel("$N_s$", size=label_size)
        ax[1][key_idx].set_yscale('log')
        ax[1][key_idx].tick_params(axis='both', length=2., labelsize=tick_size)

        # set titles only for upper plots
        if key == 'rate':
            title = 'T = ' + str(t_stops[0]) + 's, N = ' + str(ns[0])
            ax[0][key_idx].set_title(title, size=label_size)
        elif key == 'time':
            title = 'N = ' + str(ns[0]) + ', $\lambda$ = ' + str(rates[0]) + \
                    'Hz'
            ax[0][key_idx].set_title(title, size=label_size)
        elif key == 'neurons':
            title = 'T = ' + str(t_stops[0]) + 's, $\lambda$ = ' + \
                    str(rates[0]) + 'Hz'
            ax[0][key_idx].set_title(title, size=label_size)
        else:
            raise ValueError('key not valid')

        if key_idx == 0:
            ax[0][key_idx].set_ylabel("compute time (min)", size=label_size)
            ax[1][key_idx].set_ylabel('log (compute time)', size=label_size)
            # Put legend position
            legend = ax[0][key_idx].legend(loc="best",
                                           numpoints=1,
                                           markerscale=0.9,
                                           prop={"size": label_size - 2},
                                           frameon=True,
                                           borderpad=0.5)
            legend.get_frame().set_edgecolor('grey')

        # Set second x-axis
        ax2 = ax[1][key_idx].twiny()

        # Decide the ticklabel position in the new x-axis,
        # then convert them to the position in the old x-axis
        if key == 'rate':
            newlabel = rates
            label_add_ax = '$\lambda$ (Hz)'
        elif key == 'time':
            newlabel = t_stops
            label_add_ax = 'T (s)'
        elif key == 'neurons':
            newlabel = ns
            label_add_ax = 'N'
        else:
            raise ValueError('key not valid')

        ax2.set_xticks(count_spikes)
        ax2.tick_params(length=2., labelsize=tick_size)
        ax2.xaxis.set_ticks_position('bottom')
        ax2.xaxis.set_label_position('bottom')
        ax2.spines['bottom'].set_position(('outward', 25))
        ax2.set_xlabel(label_add_ax, size=label_size)
        ax2.set_xlim(ax[1][key_idx].get_xlim())
        ax2.set_xticklabels(newlabel, size=tick_size)

        # make the lower share the same ylim
        y1_l, y2_l = ax[1][key_idx].get_ylim()
        y1_lower_plot = np.append(y1_lower_plot, y1_l)
        y2_lower_plot = np.append(y2_lower_plot, y2_l)

        # make the time and the neurons plot share the same y axis (only in the
        # max value)
        if key == 'neurons' or key == 'time':
            y1_u, y2_u = ax[0][key_idx].get_ylim()
            y2_upper_plot = np.append(y2_upper_plot, y2_u)

    ymin_l = np.max(y1_lower_plot)
    ymax_l = np.max(y2_lower_plot)
    ymax_u = np.max(y2_upper_plot)
    for key_idx, key in enumerate(keys):
        ax[1][key_idx].set_ylim(ymin_l, ymax_l)
        if key == 'neurons' or key == 'time':
            y1_u, y2_u = ax[0][key_idx].get_ylim()
            ax[0][key_idx].set_ylim(y1_u, ymax_u)

    # Comment this if you want set manually the space between edges and graph
    # see above subplots_adjust
    # plt.tight_layout()

    figname = 'profiling_times'
    figpath = '../figures/'
    path_temp = './'
    for folder in split_path(figpath):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)
    f.savefig(figpath + figname + '.' + 'eps', format='eps')
                # Generating the stp
                np.random.seed(i * 100 + xi + occurr)
                stp = generate_stp(occurr, xi, 1 * pq.s,
                                   np.arange(5, 5 * (xi), 5) * pq.ms)
                # Merging the stp in the first xi sts
                sts_pool = [0] * xi
                for st_id, st in enumerate(stp):
                    sts_pool[st_id] = stg._pool_two_spiketrains(st, sts[st_id])
                # Storing datasets containg stps
                if i == 0:
                    sts_rep['sts_%iocc_%ixi' %
                            (occurr, xi)] = [sts_pool + sts[xi:]]
                    sts_rep['stp_%iocc_%ixi' % (occurr, xi)] = [stp]
                else:
                    sts_rep['sts_%iocc_%ixi' % (occurr, xi)].append(sts_pool +
                                                                    sts[xi:])
                    sts_rep['stp_%iocc_%ixi' % (occurr, xi)].append(stp)
            sts_rep['params_background'] = params_background

    # Saving the datasets
    filepath = '../data/'
    path_temp = './'
    for folder in split_path(filepath):
        path_temp = path_temp + '/' + folder
        mkdirp(path_temp)

    filename = 'stp_data%i' % (data_idx)
    np.save(filepath + filename, sts_rep)

print((time.time() - t0))
Ejemplo n.º 21
0
    def __init__(self,
                 root_path: str,
                 image_size: Tuple[int, int, int],
                 transform: Compose = None,
                 training: bool = True,
                 crops: Tuple[Tuple[int, int],
                              Tuple[int, int]] = ((0, 384), (0, 384))):

        super(DatasetFolder, self).__init__(root_path,
                                            transform=transform,
                                            target_transform=None)

        if image_size[0] == 1:
            phase = False
        else:
            phase = True

        folders = set()

        input_images_amp = {}
        input_images_phase = {}
        reference_images_amp = {}
        reference_images_phase = {}

        for root, dirs, files in os.walk(root_path):
            if config.INPUT_FOLDER_NAME in root and config.AMP_FOLDER_NAME in root:
                folder_name = split_path(root)[-3]
                folders.add(folder_name)
                input_images_amp[folder_name] = make_dataset(root, files)
            if config.INPUT_FOLDER_NAME in root and config.PHASE_FOLDER_NAME in root and phase:
                folder_name = split_path(root)[-3]
                folders.add(folder_name)
                input_images_phase[folder_name] = make_dataset(root, files)
            if config.REFERENCE_FOLDER_NAME in root and config.AMP_FOLDER_NAME in root and training:
                folder_name = split_path(root)[-3]
                folders.add(folder_name)
                reference_images_amp[folder_name] = make_dataset(root, files)
            if config.REFERENCE_FOLDER_NAME in root and config.PHASE_FOLDER_NAME in root and phase and training:
                folder_name = split_path(root)[-3]
                folders.add(folder_name)
                reference_images_phase[folder_name] = make_dataset(root, files)

        self.input_images_amp = []
        self.input_images_phase = []
        self.reference_images_amp = []
        self.reference_images_phase = []

        for i in folders:
            self.input_images_amp.extend(input_images_amp[i])
            if phase:
                try:
                    self.input_images_phase.extend(input_images_phase[i])
                except KeyError:
                    print(
                        f'Could not find input phase folder for {i} although phase was required'
                    )

            if training:
                assert (len(input_images_amp[i]) == len(reference_images_amp[i])), f'Not the same amount of input ' \
                                                                                   f'and ' \
                                                                                   f'reference images in amp folder ' \
                                                                                   f'{i}!'
                try:
                    self.reference_images_amp.extend(reference_images_amp[i])
                except KeyError:
                    print(
                        f'Error, loader in training mode requires a reference folder for every input folder ({i})!'
                    )
                if phase:
                    assert (len(input_images_phase[i]) == len(reference_images_phase[i])), f'Not the same amount of ' \
                                                                                           f'input and reference ' \
                                                                                           f'images in phase folder ' \
                                                                                           f'{i}! '
                    try:
                        self.reference_images_phase.extend(
                            reference_images_phase[i])
                    except KeyError:
                        print(
                            f'Could not find reference phase folder for {i} although phase was required'
                        )

        self.phase = phase
        self.image_size = (image_size[1], image_size[2])
        self.training = training
        self.crops = crops
        self.val = False