def merge_detected_peaks(peak_files, out_file, raw_data_dir, f_type, tol, min_pw, min_mz): """ Merge peaks detected by different algorithms in the same sample Args: peak_files (list of str): Absolute or relative path to classified peaks detected by the different algorithms. out_file (str): Absolute or relative path to the prospective output file. raw_data_dir (str): Absolute or relative path to the data directory containing all samples. f_type (str): File type of the data samples. tol (int|float): RT window within which peaks are merged. min_pw (int|float): Minimum peak width necessary for considering a detected peak a real peak. min_mz (int): Minimum number of detected m/z values necessary for considering a detected peak a real peak. """ for peak_file in peak_files: algorithm = utils.split_path(peak_file)[1] new_peaks = DetectedXCMSPeaks(peak_file, algorithm, min_pw, min_mz, tol) try: peaks.join(new_peaks, tol) except NameError: peaks = new_peaks file_path_split = utils.split_path(peak_files[0]) sample_name = '{}.{}'.format(file_path_split[-1].split('__')[0], f_type) sample_path = os.path.join(raw_data_dir, file_path_split[-2], sample_name) peaks.add_spectra(load_sampledata(sample_path)) peaks.save_data(out_file) peaks.save_dropped(out_file.replace('merged', 'dropped'))
def parse_args(): global DEBUG parser = ap.ArgumentParser() parser.add_argument("filepath", help="path to input audio file") parser.add_argument("trans_id", help="id in the database") parser.add_argument("-i", "--indir", help="directory containing input audio file, if filepath does not contain path") parser.add_argument("-d", "--debug", help="enble debugging output", type=bool) parser.add_argument("-s", "--sumpath", help="path to summary file. Can either be file or dir. If dir, summary file stored in dir/basefilename.txt") parser.add_argument("-f", "--segdir", help="directory to store temporary audio seg/frag-ment files") args = parser.parse_args() cfg_params = ut.get_cfg_params() filepath="" sumpath="" segdir="" #Get the input file path #FIX: Optimization, if os.path.isfile(args.filepath): filepath = args.filepath drct, fl = ut.split_path(args.filepath) if not drct: if args.indir and not os.path.isdir(args.indir): print "Invalid input directory specified" raise Exception if not fl: raise Exception print "Invalid input file specified" filepath = ut.dir_path( drct or args.indir or cfg_params["in_dir"] ) + fl if not os.path.exists(filepath): print "Input file not found. Filepath is {}".format(filepath) raise Exception #Get summary file path if args.sumpath: drct, fl = ut.split_path(args.sumpath) if drct and fl: sumpath = args.sumpath elif drct: sumpath = ut.dir_path(drct) + ut.base_filename(filepath) elif fl: sumpath = ut.dir_path(cfg_params["summary_dir"]) + fl else: print "Invalid summary path specified" raise Exception else: sumpath = ut.dir_path(cfg_params["summary_dir"]) + ut.base_filename(filepath) #Append extension if sumpath[-5:] != ".smry" : sumpath += ".smry" #Get segment directory path if args.segdir and not os.path.isdir(args.segdir): print "Invalid segment file directory specified" raise Exception segdir = ut.dir_path(args.segdir or cfg_params["seg_file_dir"]) #Set Debug flag if args.debug: DEBUG = args.debug return (filepath, sumpath, segdir)
def aws_search(awspaths, time=(datetime(2000,1,1),datetime.now())): """ Search data in AWS storage using AWS CLI :param awspaths: list of paths to search using AWS CLI :param time: time interval in datetime format to consider the data from :return result: metadata of the satellite data found """ ls_cmd = 'aws s3 ls {} --recursive --no-sign-request' result = {} for awspath in awspaths: cmd = ls_cmd.format(awspath).split() r = aws_request(cmd) if r != None: for line in r.decode().split('\n'): if len(line): _,_,file_size,file_name = list(map(str.strip,line.split())) info = parse_filename(file_name) if info['start_date'] <= time[1] and info['start_date'] >= time[0]: base = split_path(awspath)[0] url = osp.join('s3://',base,file_name) file_basename = osp.basename(file_name) product_id = 'A{:04d}{:03d}_{:02d}{:02d}'.format(info['start_date'].year, info['start_date'].timetuple().tm_yday, info['start_date'].hour, info['start_date'].minute) result.update({product_id: {'file_name': file_basename, 'file_remote_path': file_name, 'time_start': utc_to_utcf(info['start_date']), 'time_end': utc_to_utcf(info['end_date']), 'updated': datetime.now(), 'url': url, 'file_size': int(file_size), 'domain': info['domain'], 'satellite': 'G{}'.format(info['satellite']), 'mode': info['mode'], 'product_id': product_id}}) return result
def _build_speaker_dict(self, data_path): speaker_dict = dict( enumerate([ split_path(s)[1] for s in sorted(glob.glob('{}/*.wav'.format(data_path))) ])) return speaker_dict
def restore_all(password_file=None): for file_ in get_files(ATK_VAULT): if os.path.basename(file_) == 'encrypted': # Get the path without the atk vault base and encrypted filename original_path = os.path.join(*split_path(file_)[1:-1]) restore(original_path, password_file)
def add_from_txt(txt_folder_name=r'..\..\papers_data'): file_paths = [] listdir(txt_folder_name, file_paths, ['.txt']) start_time = time.time() res_json_text = [] org_years = collections.defaultdict(set) # for json name. for txt_file_path in tqdm(file_paths, ncols=100, desc=f'process txts'): folder_name, org, org_year, file_name = split_path(txt_file_path) name = file_name.split('.txt')[0] year = org_year.split('_')[1] json_body = {} json_body['Organization'] = org json_body['Name'] = name json_body['Year'] = year with open(txt_file_path, 'r', encoding='utf8') as f: json_body['Text'] = clean_text(f.read()) json_start = '{"index":{"_id":\"' + curlmd5(json_body['Name']) + '\"}}' json_body = json_start + '\n' + json.dumps(json_body) + '\n' res_json_text.append([org, json_body]) org_years[org].add(year) out_folder = 'txt2ES_json' if not os.path.exists(out_folder): os.mkdir(out_folder) for _org in org_years.keys(): org_years[_org] = '_'.join(sorted(org_years[_org])) with open(f'./{out_folder}/{_org}_{org_years[_org]}.json', 'w', encoding='utf8') as f: for __org, _json_body in res_json_text: if __org == _org: f.write(_json_body) end_time = time.time() print(f'time cost: {end_time - start_time:.2f}s.') return None
def parse_args(fpath=None, trans_id=None, idr=None, dbg=None, spath=None, sgdr=None): global DEBUG parser = ap.ArgumentParser() if not fpath: parser.add_argument("filepath", help="path to input audio file") if not trans_id: parser.add_argument("trans_id", help="id in the database") if not idr: parser.add_argument("-i", "--indir", help="directory containing input audio file, if filepath does not contain path") if not dbg: parser.add_argument("-d", "--debug", help="enble debugging output", type=bool) if not spath: parser.add_argument("-s", "--sumpath", help="path to summary file. Can either be file or dir. If dir, summary file stored in dir/basefilename.txt") if not sgdr: parser.add_argument("-f", "--segdir", help="directory to store temporary audio seg/frag-ment files") args = parser.parse_args() cfg_params = ut.get_cfg_params() filepath="" sumpath="" segdir="" directory="" #Get the input file path if fpath and os.path.isfile(fpath): filepath = fpath else: drct, fl = ut.split_path(fpath if fpath else args.filepath) if not fl: print "Invalid input file: '{}'".format(drct+"/"+fl) raise Exception if not drct: directory = idr or args.indir or cfg_params["in_dir"] if not os.path.isdir(directory): print "Invalid input directory '{}'".format(directory) raise Exception filepath = ut.dir_path(directory) + fl else: filepath = ut.dir_path(drct) + fl if not os.path.exists(filepath): print "Input file not found. Filepath is {}".format(filepath) raise Exception #Get summary file path sumpath = spath or args.sumpath if sumpath: drct, fl = ut.split_path(sumpath) directory = drct if drct and fl: pass elif drct: sumpath = ut.dir_path(drct) + ut.base_filename(filepath) elif fl: directory=ut.dir_path(cfg_params["summary_dir"]) sumpath = directory + fl else: #TODO: Check if this is necessary print "Invalid summary path {}".format(sumpath) raise Exception else: directory = ut.dir_path(cfg_params["summary_dir"]) sumpath = directory + ut.base_filename(filepath) #Append extension if sumpath[-5:] != ".smry" : sumpath += ".smry" #If dir does not exist, create it if not os.path.exists(directory): if DEBUG: print "Creating dir {}".format(directory) os.makedirs(directory) #Get segment directory path segdir = sgdr or args.segdir or cfg_params["seg_file_dir"] if not os.path.exists(segdir): if DEBUG: print "Creating dir {}".format(segdir) os.makedirs(segdir) #Set Debug flag if args.debug: DEBUG = args.debug return (filepath, sumpath, segdir)
binsize, winlen, dither=dither, n_surr=n_surr, min_spikes=min_spikes, min_occ=min_occ, spectrum=spectrum) else: output = spade.spade(spikedata, binsize, winlen, dither=dither, n_surr=0, min_spikes=min_spikes, min_occ=min_occ, spectrum=spectrum) # Storing data if rank == 0: results[rep] = output if rank == 0: # Storing results path = '../results/results_data{}'.format(data_idx) path_temp = './' for folder in split_path(path): path_temp = path_temp + '/' + folder mkdirp(path_temp) if xi == 0 and occ == 0: np.save(path + '/result_ind'.format(xi, occ), [results, param]) else: np.save(path + '/result_xi{}_occ{}'.format(xi, occ), [results, param])
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Extract and patch the interface without bootloader """ from options import get_options from paths import get_interface_path, TMP_DIR from utils import gen_binary, is_lpc, split_path from os.path import join if __name__ == '__main__': options = get_options() in_path = get_interface_path(options.interface, options.target, bootloader=False) _, name, _ = split_path(in_path) out_path = join(TMP_DIR, name + '.bin') print '\nELF: %s' % in_path gen_binary(in_path, out_path, is_lpc(options.interface)) print "\nBINARY: %s" % out_path
norm=LogNorm(vmin=0.0001, vmax=1), cmap=plt.cm.RdPu) ax_max_FPFNs.plot(np.where(max_FPFNs > 0.05)[0] + 0.5, np.where(max_FPFNs > 0.05)[1] + 0.5, 'o', markerfacecolor="None", markeredgecolor='k', markersize=1.8) ax_max_FPFNs.tick_params(length=1.2) ax_max_FPFNs.set_xticks(np.arange(0.5, 9, 1)) ax_max_FPFNs.set_xticklabels(range(3, 11), size=tick_size) ax_max_FPFNs.set_xlim([0, 8]) ax_max_FPFNs.set_xlabel('pattern size $(z)$', size=label_size) ax_max_FPFNs.set_yticks(np.arange(0.5, 9, 1)) ax_max_FPFNs.set_yticklabels(range(3, 11), size=tick_size) ax_max_FPFNs.set_ylim([0, 8]) if max_FPFNs_idx == 0: ax_max_FPFNs.set_ylabel('$\#$ occurrences$(c)$', size=label_size) if max_FPFNs_idx == len(data_idxs) - 1: cbar = colorbar(pcol_max_FPFNs) # cbar = fig_FPFN.colorbar(pcol_max_FPFNs, ax=ax_max_FPFNs, fraction=.01) cbar.set_label('max(FP, FN)', size=label_size) cbar.ax.tick_params(size=0., labelsize=tick_size) figure_path = '../figures' path_temp = './' for folder in split_path(figure_path): path_temp = path_temp + '/' + folder mkdirp(path_temp) fig_FPFN.savefig(figure_path + '/FPFN_performance.eps')
for i in range(1, xi)] * binsize.units print(delays) # Generating patterns stp = generate_stp(occ=occ, xi=xi, t_stop=t_stop, delays=delays) stps.extend(stp) # Merging the background spiketrains to the patterns for p_idx, p in enumerate(stp): sts.append( stg._pool_two_spiketrains(background_sts[l_idx * xi + p_idx], p)) sts.extend(background_sts[num_neu_patt:]) data_path = '../data' # Create path is not already existing path_temp = './' for folder in split_path(data_path): path_temp = path_temp + '/' + folder mkdirp(path_temp) # Storing data np.save( data_path + '/art_data.npy', { 'patt': stps, 'data': sts, 'params': { 'xi': xi, 'occ': occ, 'rate': rate, 't_stop': t_stop, 'n': n, 'lengths': lengths, 'binsize': binsize,
def evaluate_grid_search(res_files, out_file, raw_data_dir, f_type, scoring): """ Calculate a score for each parameter tuple based on the number of peaks and the classes of detected peaks. Args: res_files (list of str): Absolute or relative path to classified peaks detected in the optimization samples. out_file (str): Absolute or relative path to the prospective output file. raw_data_dir (str): Absolute or relative path to the data directory containing all samples. f_type (str): File type of the data samples. scoring (dict of str: str): Scoring values for the several classes. Key: class number, value: scoring value for the class. """ # Load scoring str_to_class = {j: int(i) for i, j in valid_classes.items()} scoring = {str_to_class[i]: j for i, j in scoring.items()} # Group result files by samples sample_results = {} for res_file in res_files: sample = os.path.basename(res_file).split('__')[0] try: sample_results[sample].append(res_file) except KeyError: sample_results[sample] = [res_file] algorithm = utils.split_path(res_files[0])[1] # Iterate over detected peaks within one sample but detected by different # parameter tuples. for sample, sample_files in sample_results.items(): sample_res = pd.DataFrame() for res_file in sample_files: peaks = DetectedXCMSPeaks(res_file, algorithm, 0, 1, 0) score = peaks.get_score(scoring) idx = pd.MultiIndex.from_tuples( [tuple(peaks.params.values())], names=peaks.params.keys() ) new_row = pd.DataFrame({sample: score}, index=idx) sample_res = sample_res.append(new_row, sort=False) sample_res.sort_index(inplace=True) try: results = pd.concat([results, sample_res], axis=1) except NameError: results = sample_res # Get mean scores and highest scored parameters results['mean'] = results.mean(axis=1) par_best = results['mean'].idxmax() par_best_dict = {} with open(out_file, 'w') as f: for idx, par_str in enumerate(results.index.names): f.write('{}\t{}\n'.format(par_str, par_best[idx])) par_best_dict[par_str] = par_best[idx] # Save all results overview_file = out_file.replace('00', '01') \ .replace('best_parameters', 'all_results') results.to_csv(overview_file, sep='\t') # Sleep 1 sec before continuing pipeline to prevent tangling return par_best_dict
you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Extract and patch the interface without bootloader """ from options import get_options from paths import get_interface_path, TMP_DIR from utils import gen_binary, is_lpc, split_path from os.path import join if __name__ == '__main__': options = get_options() in_path = get_interface_path(options.interface, options.target, bootloader=False) _, name, _ = split_path(in_path) out_path = join(TMP_DIR, name + '.bin') print '\nELF: %s' % in_path gen_binary(in_path, out_path, is_lpc(options.interface)) print "\nBINARY: %s" % out_path
from gensim import corpora, similarities parser = argparse.ArgumentParser(description="""Génère la représentation matricielle associée à un corpus""") parser.add_argument('file_path', type=str, help='Le fichier .tsv(.gz) contenant le corpus') parser.add_argument('--stopwords', type=str, help='Un fichier contenant un stopword par ligne') parser.add_argument('-v', '--verbose', action='store_true', help="Afficher les messages d'information") parser.add_argument('--saveindex', action='store_true', help="Si vrai, le script enregistre l'index de similarité pour le corpus'") args = parser.parse_args() if args.verbose: logging.basicConfig(asctime='%(s)levelname : %(format)s : %(message)s', level=logging.INFO) input_file = utils.split_path(args.file_path) if (os.path.isfile(input_file['name'] + '_bow.mm')): raise IOError("Le corpus existe déjà sous forme matricielle") if args.stopwords: with open(args.stopwords) as f: stopwords = [line.rstrip() for line in f] else: stopwords = [] corpus = habeascorpus.HabeasCorpus(input_file['path'], stopwords) corpus.dictionary.filter_extremes(no_below=5, no_above=0.5) corpus.dictionary.save_as_text(input_file['name'] + '_wordids.txt') corpora.mmcorpus.MmCorpus.serialize(input_file['name'] + '_bow.mm', corpus, progress_cnt=1000) with open(input_file['path']) as f:
from utils import get_file_read, update_file, get_files_in_dir, split_path, clear_path, add_and_get_file if __name__ == '__main__': test = int(sys.argv[1]) if test == 0: test_path = SRC_PATH + '/FS_3' print(get_file_read("D1/1.1", test_path)) if test == 1: test_path = SRC_PATH + '/' + 'cache' file_name = 'D1/1.1' file_content = 'Hello test' print(update_file(file_name, test_path, file_content)) if test == 2: test_path = SRC_PATH + '/' + 'FS_1/D1' print(get_files_in_dir(test_path)) elif test == 3: path = 'D1/1.1' print(split_path(path)[1]) elif test == 4: test_path = SRC_PATH + '/' + 'temp' print(clear_path(test_path)) elif test == 5: test_path = SRC_PATH + '/' + 'temp' file_name = 'D1/1.1' f = add_and_get_file(file_name, test_path) print(f.read())
comm = MPI.COMM_WORLD # create MPI communicator rank = comm.Get_rank() # get rank of current MPI task size = comm.Get_size() # Get the total number of MPI processes print('Number of processes:{}'.format(size)) # Compute spade res print('Running spade') spade_res = spade.spade(data, binsize=binsize, winlen=winlen, n_surr=n_surr, min_spikes=min_spikes, max_spikes=max_spikes, spectrum=spectrum, alpha=1, min_occ=10, min_neu=10, output_format='concepts', psr_param=None) # Storing data res_path = '../results/{}/winlen{}'.format(sess_id, winlen) spectrum = '3d#' # Create path if not already existing path_temp = './' for folder in split_path(res_path): path_temp = path_temp + '/' + folder mkdirp(path_temp) np.save(res_path + '/data_results.npy', [spade_res, param])
def plot(data_file, spectra, binsize, winlens, label_size=6, text_size=8, tick_size=5): """ Function creating figure representing the difference of 2d- to 3d-SPADE in terms of statistical performance in the case of a data set with patterns of multiple durations. Parameters ---------- data_file : list of neo.SpikeTrains file with the generated artificial data set spectra : str flag indicating the type of spectra employed (thus the correspondent SPADE version) if spectra == '#': then 2d-SPADE is called elif spectra == '3d#' then 3d-SPADE is called binsize : quantity bin size necessary for the SPADE analysis winlens : lis list of window length used for each run of SPADE label_size: int label size of the plot. Default is 6 tick_size: int tick size of the plot. Default is 5 text_size: int tick size of the plot. Default is 8 """ # Plotting parameters inch2cm = 0.3937 sts = data_file['data'] stps = data_file['patt'] figure = plt.figure(figsize=(12 * inch2cm, 9 * inch2cm)) figure.subplots_adjust(left=.08, right=.91, wspace=.8, hspace=1, top=.95, bottom=.1) ax_raster = plt.subplot2grid((3, 2 * np.sum(winlens[:5])), (0, 0), rowspan=1, colspan=2 * np.sum(winlens[:5])) for s_idx, s in enumerate(sts[:len(stps)]): s_sliced = s.time_slice(s.t_start + .15 * pq.s, 0.4 * pq.s) ax_raster.plot(s_sliced, [s_idx] * len(s_sliced), 'k.', markersize=2) for p_idx, p in enumerate(stps): p_sliced = p.time_slice(s.t_start + .15 * pq.s, 0.4 * pq.s) if p_idx == len(stps) - 1: ax_raster.plot(p_sliced, [p_idx] * len(p_sliced), 'ro', label='STPs', fillstyle='full', markeredgecolor='red', markersize=1.5) else: ax_raster.plot(p_sliced, [p_idx] * len(p_sliced), 'ro', fillstyle='full', markeredgecolor='red', markersize=1.5) ax_raster.set_xlabel('time (s)', size=label_size) ax_raster.set_ylabel('neuron id', size=label_size) ax_raster.set_ylim(-0.5, 15) ax_raster.tick_params(labelsize=tick_size) # Plotting SPADE results p_values_table = {} for spectrum_idx, spectrum in enumerate(spectra): p_values_table[str(spectrum)] = {} ax_count = plt.subplot2grid((3, 2 * np.sum(winlens[:5])), (1, spectrum_idx * np.sum(winlens[:5])), rowspan=1, colspan=np.sum(winlens[:5])) # Plotting count and pval spectrum of patterns for each window length for w_idx, w in enumerate(winlens): # Load filtered results patterns, pval_spectrum, ns_sgnt, params = np.load( '../results/{}/winlen{}/filtered_patterns.npy'.format( spectrum, w), encoding='latin1') ax_count.bar(w_idx + 1, len(patterns), color='k', width=0.05) if w_idx == len(winlens) - 1: ax_count.plot(w_idx + 1, w_idx + 1, 'x', markersize=4, label='Number of STPs', color='g') else: ax_count.plot(w_idx + 1, w_idx + 1, 'x', markersize=4, color='g') ax_count.set_yticks(np.arange(0, len(stps) + 1)) for tick in ax_count.xaxis.get_major_ticks(): tick.label.set_fontsize(tick_size) # Plotting pvalue spectra pval_matrix = np.zeros((4, w)) for sgnt in pval_spectrum: if spectrum == '#': for length in range(w): pval_matrix[sgnt[1] - 2, length] = sgnt[2] elif spectrum == '3d#': pval_matrix[sgnt[1] - 2, sgnt[2]] = sgnt[3] ax_pval_spectrum = plt.subplot2grid( (3, 2 * np.sum(winlens[:5])), (2, spectrum_idx * np.sum(winlens[:5]) + sum(winlens[:w_idx])), colspan=sum(winlens[w_idx:w_idx + 1])) pcol = ax_pval_spectrum.pcolor(pval_matrix, norm=LogNorm(vmin=0.0001, vmax=1), cmap=plt.cm.YlGnBu) if spectrum == '#': for duration in range(pval_matrix.shape[1]): for occ in range(2, pval_matrix.shape[0] + 2): if (3, occ) not in ns_sgnt: ax_pval_spectrum.plot(duration + 0.5, occ - 2 + 0.5, 'ro', markersize=1) elif spectrum == '3d#': for duration in range(pval_matrix.shape[1]): for occ in range(2, pval_matrix.shape[0] + 2): if (3, occ, duration) not in ns_sgnt: ax_pval_spectrum.plot(duration + 0.5, occ - 2 + 0.5, 'ro', markersize=1) x_grid = np.arange(0, pval_matrix.shape[1] + 1, 1) y_grid = np.arange(0, pval_matrix.shape[0] + 1, 1) x_ticks = np.arange(0.5, pval_matrix.shape[1], 2) y_ticks = np.arange(0.5, pval_matrix.shape[0]) x, y = np.meshgrid(x_grid, y_grid) c = np.ones_like(x) ax_pval_spectrum.pcolor(x, y, c, facecolor='none', edgecolor='k') ax_pval_spectrum.set_xticks(x_ticks) ax_pval_spectrum.set_xticklabels(range(0, pval_matrix.shape[1], 2), size=tick_size) if w_idx == len(winlens) // 2 + 1: ax_pval_spectrum.set_xlabel('d (ms)', size=label_size) if w_idx == 0 and spectrum == '#': ax_pval_spectrum.set_yticks(y_ticks) ax_pval_spectrum.set_yticklabels(range( 2, pval_matrix.shape[0] + 2), size=tick_size) ax_pval_spectrum.set_ylabel('c', size=label_size) else: ax_pval_spectrum.set_yticklabels(()) ax_pval_spectrum.set_yticks(()) if w_idx == len(winlens) - 1 and spectrum == '3d#': cbar = figure.colorbar(pcol, ticks=[0.0001, 0.001, 0.01, 0.1, 1], ax=ax_pval_spectrum) cbar.set_label('p-values', size=label_size) cbar.ax.tick_params(labelsize=tick_size) cbar.ax.minorticks_off() ax_pval_spectrum.set_xlim(0, pval_matrix.shape[1]) ax_pval_spectrum.set_title(int(w * binsize.magnitude), size=tick_size) if w == 13: # print(pval_matrix[-2, :]) pattern_length = [0, 2, 6, 8, 12] p_val_arr = np.take(pval_matrix[2, :], pattern_length) p_values_table[str(spectrum)][str(w)] = p_val_arr # print(p_val_arr) if spectrum == '#': ax_count.set_title('2d-SPADE', size=text_size) ax_count.set_ylabel('detected STPs', size=label_size) elif spectrum == '3d#': ax_count.set_title('3d-SPADE', size=text_size) ax_count.set_yticks(()) ax_count.set_xticks(range(1, len(winlens) + 1)) ax_count.set_xticklabels(winlens * int(binsize.magnitude)) ax_count.tick_params(labelsize=tick_size) ax_count.set_ylim([0, len(winlens) + 1]) ax_count.set_xlabel('w (ms)', size=label_size) if spectrum_idx == 0: ax_count.legend(loc='best', fontsize=label_size) figure_path = '../figures' path_temp = './' for folder in split_path(figure_path): path_temp = path_temp + '/' + folder mkdirp(path_temp) fig_formats = ['eps', 'png'] for format in fig_formats: figure.savefig(figure_path + '/raster_patt_count_spectra.{}'.format(format)) # save p_values_table (only for window = 13 and n_occ = 4) #np.save('../figures/pvalues_table.npy', p_values_table) plt.show()
def compute_profiling_time(key, expected_num_spikes, rate, t_stop, n, winlen, binsize, num_rep=10): """ Function computing the profiling time needed to run SPADE on artificial poisson data of given rate, recording time, and number of neurons Parameters ---------- key: list list of keys of the varying variable of the profiling analysis. Maximum of three keys, can be either 'neurons', 'time' and 'rate'. expected_num_spikes: int expected number of spikes of the generated spike train rate: quantity rate of the poisson process t_stop: quantity duration of the spike trains n: int number of spike trains winlen: int window length for the SPADE analysis binsize: quantity binsize for the SPADE analysis num_rep: int number of repetitions of """ time_fast_fca = 0. time_fpgrowth = 0. for rep in range(num_rep): # Generating artificial data data = [] for i in range(n): np.random.seed(0) data.append(stg.homogeneous_poisson_process( rate=rate, t_start=0*pq.s, t_stop=t_stop)) # Extracting Closed Frequent Itemset with FP-Growth t0 = time.time() # Binning the data and clipping (binary matrix) binary_matrix = conv.BinnedSpikeTrain(data, binsize).to_bool_array() # Computing the context and the binary matrix encoding the relation # between objects (window positions) and attributes (spikes, # indexed with a number equal to neuron idx*winlen+bin idx) context, transactions, rel_matrix = spade._build_context(binary_matrix, winlen) # Applying FP-Growth fim_results = [i for i in spade._fpgrowth( transactions, rel_matrix=rel_matrix, winlen=winlen)] time_fpgrowth += time.time() - t0 # Extracting Closed Frequent Itemset with Fast_fca t1 = time.time() # Binning the data and clipping (binary matrix) binary_matrix = conv.BinnedSpikeTrain(data, binsize).to_bool_array() # Computing the context and the binary matrix encoding the relation # between objects (window positions) and attributes (spikes, # indexed with a number equal to neuron idx*winlen+bin idx) context, transactions, rel_matrix = \ spade._build_context(binary_matrix, winlen) # Applying FP-Growth fim_results = spade._fast_fca(context, winlen=winlen) time_fast_fca += time.time() - t1 time_profiles = {'fp_growth': time_fpgrowth/num_rep, 'fast_fca': time_fast_fca/num_rep} # Storing data res_path = '../results/{}/{}/'.format(key, expected_num_spikes) # Create path is not already existing path_temp = './' for folder in split_path(res_path): path_temp = path_temp + '/' + folder mkdirp(path_temp) np.save(res_path + '/profiling_results.npy', {'results': time_profiles, 'parameters': {'rate': rate, 't_stop': t_stop, 'n': n, 'winlen': winlen, 'binsize': binsize}})
def plot(time_fpgrowth, time_fast_fca, keys, label_size=8, tick_size=6): """ Function plotting the profiling time for all SPADE components (FIM and PSF), comparing the run time for the fpgrowth and the fast_fca algorithm (both implemented in SPADE). Parameters ---------- time_fpgrowth : dictionary dictionary with profiling time of the fpgrowth algorithm, with keys depending on the parameters varying given by 'keys' and of the number of estimated spikes in the dataset time_fast_fca : dictionary dictionary similar to time_fpgrowth, but obtained by the run of fast_fca algorithm keys : list list of keys of the varying variable of the profiling analysis. Maximum of three keys, can be either 'neurons', 'time' and 'rate'. Depending on the keys array, the function produces the corresponding plots (1 to 3 panels) label_size: int label size of the plot. Default is 8 tick_size: int tick size of the plot. Default is 6 """ # Plot configurations inch2cm = 2.540 # conversion from inches to centimeters max_num_colors = 10 cm = plt.get_cmap('hsv') cmap = sns.color_palette("muted", max_num_colors) colors = {} for i in range(1, max_num_colors + 1): color = cm(1. * i // max_num_colors) colors[i - 1] = color colors = cmap f, ax = plt.subplots(2, 3, figsize=(18.4 / inch2cm, 9.2 / inch2cm)) f.subplots_adjust(wspace=0.15, hspace=0.05, bottom=0.2, left=0.07, right=0.98) interpolate = False linestyle = ':' y1_lower_plot = np.array([]) y2_lower_plot = np.array([]) y2_upper_plot = np.array([]) for key_idx, key in enumerate(keys): # Plotting all functions # Create plots by column axes_vertical = [ax[0][key_idx]] + [ax[1][key_idx]] for idx, axes in enumerate(axes_vertical): # Plot FP-growth compute_xy(time_fpgrowth[key], count_spikes, axes, function=square, label="FP-growth (C++)", colors=colors[1], marker="o", interpolate=interpolate, linestyle='-', markerfacecolor='None') # Plot FCA compute_xy(time_fast_fca[key], count_spikes, axes, function=linear, label="Fast-FCA (Python)", colors=colors[0], marker="o", interpolate=interpolate, linestyle=linestyle, markerfacecolor=colors[0]) # Plot Spectra compute_xy(np.array(time_fpgrowth[key]) * 2000, count_spikes, axes, function=poly4, label="2d FP-growth", colors=colors[9], marker="o", interpolate=interpolate, linestyle='-', markerfacecolor='None') compute_xy(np.array(time_fpgrowth[key]) * 2000, count_spikes, axes, function=poly4, label="3d FP-growth", colors=colors[3], marker="o", interpolate=interpolate, linestyle=linestyle, markerfacecolor=colors[3]) compute_xy(np.array(time_fast_fca[key]) * 2000, count_spikes, axes, function=poly4, label="2d Fast-FCA", colors=colors[4], marker="o", interpolate=interpolate, linestyle='-', markerfacecolor='None') compute_xy(np.array(time_fast_fca[key]) * 2000, count_spikes, axes, function=poly4, label="3d Fast-FCA", colors=colors[2], marker="o", interpolate=interpolate, linestyle=linestyle, markerfacecolor=colors[2]) # Axes specific things # Ax 0 ax[0][key_idx].set_xticks(count_spikes) ax[0][key_idx].set_xticklabels([], size=tick_size) # ax[0].set_xlabel("number of spikes", size=label_size) ax[0][key_idx].tick_params(axis='both', length=2., labelsize=tick_size) # Ax 1 ax[1][key_idx].tick_params(axis='y', which='minor', left='off') ax[1][key_idx].set_xticks(count_spikes) ax[1][key_idx].set_xticklabels(count_spikes, size=tick_size) ax[1][key_idx].set_xlabel("$N_s$", size=label_size) ax[1][key_idx].set_yscale('log') ax[1][key_idx].tick_params(axis='both', length=2., labelsize=tick_size) # set titles only for upper plots if key == 'rate': title = 'T = ' + str(t_stops[0]) + 's, N = ' + str(ns[0]) ax[0][key_idx].set_title(title, size=label_size) elif key == 'time': title = 'N = ' + str(ns[0]) + ', $\lambda$ = ' + str(rates[0]) + \ 'Hz' ax[0][key_idx].set_title(title, size=label_size) elif key == 'neurons': title = 'T = ' + str(t_stops[0]) + 's, $\lambda$ = ' + \ str(rates[0]) + 'Hz' ax[0][key_idx].set_title(title, size=label_size) else: raise ValueError('key not valid') if key_idx == 0: ax[0][key_idx].set_ylabel("compute time (min)", size=label_size) ax[1][key_idx].set_ylabel('log (compute time)', size=label_size) # Put legend position legend = ax[0][key_idx].legend(loc="best", numpoints=1, markerscale=0.9, prop={"size": label_size - 2}, frameon=True, borderpad=0.5) legend.get_frame().set_edgecolor('grey') # Set second x-axis ax2 = ax[1][key_idx].twiny() # Decide the ticklabel position in the new x-axis, # then convert them to the position in the old x-axis if key == 'rate': newlabel = rates label_add_ax = '$\lambda$ (Hz)' elif key == 'time': newlabel = t_stops label_add_ax = 'T (s)' elif key == 'neurons': newlabel = ns label_add_ax = 'N' else: raise ValueError('key not valid') ax2.set_xticks(count_spikes) ax2.tick_params(length=2., labelsize=tick_size) ax2.xaxis.set_ticks_position('bottom') ax2.xaxis.set_label_position('bottom') ax2.spines['bottom'].set_position(('outward', 25)) ax2.set_xlabel(label_add_ax, size=label_size) ax2.set_xlim(ax[1][key_idx].get_xlim()) ax2.set_xticklabels(newlabel, size=tick_size) # make the lower share the same ylim y1_l, y2_l = ax[1][key_idx].get_ylim() y1_lower_plot = np.append(y1_lower_plot, y1_l) y2_lower_plot = np.append(y2_lower_plot, y2_l) # make the time and the neurons plot share the same y axis (only in the # max value) if key == 'neurons' or key == 'time': y1_u, y2_u = ax[0][key_idx].get_ylim() y2_upper_plot = np.append(y2_upper_plot, y2_u) ymin_l = np.max(y1_lower_plot) ymax_l = np.max(y2_lower_plot) ymax_u = np.max(y2_upper_plot) for key_idx, key in enumerate(keys): ax[1][key_idx].set_ylim(ymin_l, ymax_l) if key == 'neurons' or key == 'time': y1_u, y2_u = ax[0][key_idx].get_ylim() ax[0][key_idx].set_ylim(y1_u, ymax_u) # Comment this if you want set manually the space between edges and graph # see above subplots_adjust # plt.tight_layout() figname = 'profiling_times' figpath = '../figures/' path_temp = './' for folder in split_path(figpath): path_temp = path_temp + '/' + folder mkdirp(path_temp) f.savefig(figpath + figname + '.' + 'eps', format='eps')
# Generating the stp np.random.seed(i * 100 + xi + occurr) stp = generate_stp(occurr, xi, 1 * pq.s, np.arange(5, 5 * (xi), 5) * pq.ms) # Merging the stp in the first xi sts sts_pool = [0] * xi for st_id, st in enumerate(stp): sts_pool[st_id] = stg._pool_two_spiketrains(st, sts[st_id]) # Storing datasets containg stps if i == 0: sts_rep['sts_%iocc_%ixi' % (occurr, xi)] = [sts_pool + sts[xi:]] sts_rep['stp_%iocc_%ixi' % (occurr, xi)] = [stp] else: sts_rep['sts_%iocc_%ixi' % (occurr, xi)].append(sts_pool + sts[xi:]) sts_rep['stp_%iocc_%ixi' % (occurr, xi)].append(stp) sts_rep['params_background'] = params_background # Saving the datasets filepath = '../data/' path_temp = './' for folder in split_path(filepath): path_temp = path_temp + '/' + folder mkdirp(path_temp) filename = 'stp_data%i' % (data_idx) np.save(filepath + filename, sts_rep) print((time.time() - t0))
def __init__(self, root_path: str, image_size: Tuple[int, int, int], transform: Compose = None, training: bool = True, crops: Tuple[Tuple[int, int], Tuple[int, int]] = ((0, 384), (0, 384))): super(DatasetFolder, self).__init__(root_path, transform=transform, target_transform=None) if image_size[0] == 1: phase = False else: phase = True folders = set() input_images_amp = {} input_images_phase = {} reference_images_amp = {} reference_images_phase = {} for root, dirs, files in os.walk(root_path): if config.INPUT_FOLDER_NAME in root and config.AMP_FOLDER_NAME in root: folder_name = split_path(root)[-3] folders.add(folder_name) input_images_amp[folder_name] = make_dataset(root, files) if config.INPUT_FOLDER_NAME in root and config.PHASE_FOLDER_NAME in root and phase: folder_name = split_path(root)[-3] folders.add(folder_name) input_images_phase[folder_name] = make_dataset(root, files) if config.REFERENCE_FOLDER_NAME in root and config.AMP_FOLDER_NAME in root and training: folder_name = split_path(root)[-3] folders.add(folder_name) reference_images_amp[folder_name] = make_dataset(root, files) if config.REFERENCE_FOLDER_NAME in root and config.PHASE_FOLDER_NAME in root and phase and training: folder_name = split_path(root)[-3] folders.add(folder_name) reference_images_phase[folder_name] = make_dataset(root, files) self.input_images_amp = [] self.input_images_phase = [] self.reference_images_amp = [] self.reference_images_phase = [] for i in folders: self.input_images_amp.extend(input_images_amp[i]) if phase: try: self.input_images_phase.extend(input_images_phase[i]) except KeyError: print( f'Could not find input phase folder for {i} although phase was required' ) if training: assert (len(input_images_amp[i]) == len(reference_images_amp[i])), f'Not the same amount of input ' \ f'and ' \ f'reference images in amp folder ' \ f'{i}!' try: self.reference_images_amp.extend(reference_images_amp[i]) except KeyError: print( f'Error, loader in training mode requires a reference folder for every input folder ({i})!' ) if phase: assert (len(input_images_phase[i]) == len(reference_images_phase[i])), f'Not the same amount of ' \ f'input and reference ' \ f'images in phase folder ' \ f'{i}! ' try: self.reference_images_phase.extend( reference_images_phase[i]) except KeyError: print( f'Could not find reference phase folder for {i} although phase was required' ) self.phase = phase self.image_size = (image_size[1], image_size[2]) self.training = training self.crops = crops self.val = False