def main(): from_str = args.cf.split('/')[-1].split('.')[0] df_file = args.pref + from_str + '_' + str(args.yr) \ + '_feat_matrix_unnormed.pkl' if args.split == None: df_raw = pd.read_pickle(df_file) else: # combine data matrix and score pickles split_files = glob.glob(args.pref + 'split_dm/*' + args.split + '*unnorm*.pkl') sort_files = natsort.natsorted(split_files) split_scores = glob.glob(args.pref + 'split_dm/*' + args.split + '*scores*.pkl') sort_scores = natsort.natsorted(split_scores) for ndx, pf in enumerate(sort_files): if ndx == 0: df_raw = pd.read_pickle(pf) scores = pd.read_pickle(sort_scores[ndx]) else: df_raw = df_raw.append(pd.read_pickle(pf)) scores = scores.append(pd.read_pickle(sort_scores[ndx])) # combine score pickles df_file_out = df_file.replace('_feat_matrix_unnormed', '_scores') scores.to_pickle(df_file_out) # cut columns of all zeros df_trim = df_raw[df_raw.columns[(df_raw != 0).any()]] # normalize on a per candidate basis df_trim_norm = df_trim.div(df_trim.sum(axis=1), axis=0) df_file_out = df_file.replace('unnormed', 'trim_normed') df_trim_norm.to_pickle(df_file_out)
def find_idle_busy_slaves(parser, args, show_idle): parser.add_option( '-b', '--builder', dest='builders', action='append', default=[], help='Builders to filter on') parser.add_option( '-s', '--slave', dest='slaves', action='append', default=[], help='Slaves to filter on') options, args, buildbot = parser.parse_args(args) if args: parser.error('Unrecognized parameters: %s' % ' '.join(args)) if not options.builders: options.builders = buildbot.builders.keys for builder in options.builders: builder = buildbot.builders[builder] if options.slaves: # Only the subset of slaves connected to the builder. slaves = list(set(options.slaves).intersection(set(builder.slaves.names))) if not slaves: continue else: slaves = builder.slaves.names busy_slaves = [build.slave.name for build in builder.current_builds] if show_idle: slaves = natsorted(set(slaves) - set(busy_slaves)) else: slaves = natsorted(set(slaves) & set(busy_slaves)) if options.quiet: for slave in slaves: print slave else: if slaves: print 'Builder %s: %s' % (builder.name, ', '.join(slaves)) return 0
def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): load_locale('en_US') a = ['0', 'Á', '2', 'Z'] assert natsorted(a) == ['0', '2', 'Z', 'Á'] a = ['2', 'ä', 'b', 1.5, 3] assert natsorted(a, alg=ns.LOCALE) == [1.5, '2', 3, 'ä', 'b'] locale.setlocale(locale.LC_ALL, str(''))
def test_natsorted_returns_sorted_list_with_mixed_type_input_and_does_not_raise_TypeError_on_Python3(): # You can mix types with natsorted. This can get around the new # 'unorderable types' issue with Python 3. a = [6, 4.5, '7', '2.5', 'a'] assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a'] a = [46, '5a5b2', 'af5', '5a5-4'] assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5']
def write_excel_data(dev_data, norm_to_ctrl, norm_to_mean): """Write data into a file""" # Define excel directory xls_dir = "./excel" # Change directory to EXPROOTPATH os.chdir(EXPROOTPATH) # Check to see if excel directory exists and if it doesn't make it try: os.makedirs(xls_dir) except OSError: if not os.path.isdir(xls_dir): raise # Reorder dev_data = dev_data.reorder_levels(['device', 'interval', 'well']) norm_to_ctrl = norm_to_ctrl.stack().unstack(-4).reorder_levels(['device', 'interval', 2]) #.sort_index(0) norm_to_mean = norm_to_mean.stack().unstack(-4).reorder_levels(['device', 'interval', 2]) # Sort dev_data = dev_data.reindex(index=natsorted(dev_data.index)) norm_to_ctrl = norm_to_ctrl.reindex(index=natsorted(norm_to_ctrl.index)) norm_to_mean = norm_to_mean.reindex(index=natsorted(norm_to_mean.index)) # Create the Excel Workbook writer = pd.ExcelWriter(xls_dir+"/"+'data.xlsx', engine='xlsxwriter') # Write the data to the Excel Workbook dev_data.to_excel(writer, sheet_name='Raw_Device_Data') norm_to_ctrl.to_excel(writer, sheet_name='Ratio_to_Control') norm_to_mean.to_excel(writer, sheet_name='Ratio_to_Control_2')
def get_sheet_values(self, sheetname, includeEmptyCells=True): """ Returns the values from the sheet name specified. Arguments: | Sheet Name (string) | The selected sheet that the cell values will be returned from. | | Include Empty Cells (default=True) | The empty cells will be included by default. To deactivate and only return cells with values, pass 'False' in the variable. | Example: | *Keywords* | *Parameters* | | Open Excel | C:\\Python27\\ExcelRobotTest\\ExcelRobotTest.xls | | Get Sheet Values | TestSheet1 | """ my_sheet_index = self.sheetNames.index(sheetname) sheet = self.wb.sheet_by_index(my_sheet_index) data = {} for row_index in range(sheet.nrows): for col_index in range(sheet.ncols): cell = cellname(row_index, col_index) value = sheet.cell(row_index, col_index).value data[cell] = value if includeEmptyCells is True: sortedData = natsort.natsorted(data.items(), key=itemgetter(0)) return sortedData else: data = dict([(k, v) for (k, v) in data.items() if v]) OrderedData = natsort.natsorted(data.items(), key=itemgetter(0)) return OrderedData
def get_cover (filepath): path = root + '/' + filepath if os.path.isdir(path): files = quick(os.listdir(path)) image = path + '/' + cover_cleaner(files) with open(image, 'rb') as file_: cover = file_.read() return cover elif os.path.isfile(path): filetype = path.split('.')[-1] if filetype == 'zip': with ZipFile(path) as archive: files = natsorted(archive.namelist()) image = cover_cleaner(files) with archive.open(image) as file_: cover = file_.read() return cover elif filetype == 'rar': with rarfile.RarFile(path) as archive: files = natsorted(archive.namelist()) image = cover_cleaner(files) with archive.open(image) as file_: cover = file_.read() return cover
def get_log_output(self, submission_id): '''Gets log output (standard output and error). Parameters ---------- submission_id: int ID of the tool job :class:`Submission <tmlib.models.submission.Submission>` Returns ------- Dict[str, str] "stdout" and "stderr" for the given job ''' stdout_files = glob.glob( os.path.join(self._log_location, '*_%d_*.out' % submission_id) ) stderr_files = glob.glob( os.path.join(self._log_location, '*_%d_*.err' % submission_id) ) if not stdout_files or not stderr_files: raise IOError('No log files found for tool job #%d' % submission_id) # Take the most recent log files log = dict() with open(natsorted(stdout_files)[-1], 'r') as f: log['stdout'] = f.read() with open(natsorted(stderr_files)[-1], 'r') as f: log['stderr'] = f.read() return log
def stats_from_df(df): """Create a dictionary of summary statistics for a sample or prep template Parameters ---------- t : SampleTemplate or PrepTemplate Sample or prep template object to summarize Returns ------- dict Dictionary object where the keys are the names of the metadata categories and the keys are tuples where the first element is the name of a metadata value in category and the second element is the number of times that value was seen. """ out = {} for column in natsorted(df.columns): counts = df[column].value_counts() # get a pandas series of the value-count pairs out[column] = [(key, counts[key]) for key in natsorted(counts.index)] return out
def fpkm_from_htseq(bam_path,ruv_path,exn_file): """ This function calculates fpkm from the htseq-count results. * bam_path: pathway that has bam files. Used to get total mapped reads. * ruv_path: pathway that has ruvseq corrected count data. * exn_file: 6 columns. including ['chr','start','end','geneid','traccess','strand']. output file that ends with .fpkm. """ os.chdir(bam_path) bams = [f for f in os.listdir(bam_path) if f.endswith('.bam')] bams = natsorted(bams) # 1. get total count totalCount = [] for b in bams: bamHandle = pysam.AlignmentFile(b,'rb') totalCount.append(bamHandle.mapped) # 2. get rna_obj rna_df = pd.read_csv(exn_file,sep='\t',header=0,low_memory=False) rna_obj = trpr(rna_df) # 3. get length for each gene os.chdir(ruv_path) norm_count_files = [f for f in os.listdir(ruv_path) if f.endswith('.txt')] norm_count_files = natsorted(norm_count_files) for fn,total in zip(norm_count_files,totalCount): df = pd.read_csv(fn,sep=' ',header=None,names=['geneid','count'],index_col=0,low_memory=False) df['len'] = df.index.map(lambda x: rna_obj.get_gene_trpr_len(x,multi_chrom='Y')) df['fpkm'] = df['count']/float(total)/df['len']*10**9 df['fpkm'].ix[:-20].to_csv(fn[:-3]+'fpkm.txt',sep='\t')
def render_listing(in_name, out_name, folders=[], files=[]): if in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, HtmlFormatter(cssclass='code', linenos="table", nowrap=False, lineanchors=utils.slugify(in_name), anchorlinenos=True)) title = os.path.basename(in_name) else: code = '' title = '' crumbs = utils.get_crumbs(os.path.relpath(out_name, kw['output_folder']), is_file=True) context = { 'code': code, 'title': title, 'crumbs': crumbs, 'lang': kw['default_lang'], 'folders': natsort.natsorted(folders), 'files': natsort.natsorted(files), 'description': title, } self.site.render_template('listing.tmpl', out_name, context)
def main_exonerate(ref_fa,refseq_pr,exonerate_path,thread,exon2align_gff,index_s=0,index_e=0): ''' * refseq_pr: all protein seqeunces of the organism * path: path to store splited protein sequences. ''' if not os.path.exists(exonerate_path): os.mkdir(exonerate_path) # 1) split file os.chdir(exonerate_path) if os.listdir(path) != []: split_fa(refseq_pr,100,exonerate_path) # 2) run exonerate for each file faFiles = natsorted(glob.glob('file*.fa')) if index_e == 0: faFiles = faFiles[index_s:] else: faFiles = faFiles[index_s:index_e] pool = mp.Pool(processes=int(thread)) for f in faFiles: out = f[:-2]+'gff' pool.apply_async(exonerate,args=(ref_fa,f,out)) pool.close() pool.join() # 3) merge the gff files exonerate_gff = 'exonerate.gff' if not os.path.exists(exonerate_gff): gff_fns = natsorted(glob.glob('file*.gff')) exonerate2gff(gff_fns,exonerate_gff)
def get_all_tags(self): """ Return a tuple of lists ([common_tags], [anti_tags], [organisational_tags]) all tags of all tasks of this course Since this is an heavy procedure, we use a cache to cache results. Cache should be updated when a task is modified. """ if self._all_tags_cache != None: return self._all_tags_cache tag_list_common = set() tag_list_misconception = set() tag_list_org = set() tasks = self.get_tasks() for id, task in tasks.items(): for tag in task.get_tags()[0]: tag_list_common.add(tag) for tag in task.get_tags()[1]: tag_list_misconception.add(tag) for tag in task.get_tags()[2]: tag_list_org.add(tag) tag_list_common = natsorted(tag_list_common, key=lambda y: y.get_name().lower()) tag_list_misconception = natsorted(tag_list_misconception, key=lambda y: y.get_name().lower()) tag_list_org = natsorted(tag_list_org, key=lambda y: y.get_name().lower()) self._all_tags_cache = (list(tag_list_common), list(tag_list_misconception), list(tag_list_org)) return self._all_tags_cache
def metadata_stats_from_sample_and_prep_templates(st, pt): """Print out summary statistics for the sample and prep templates Parameters ---------- st : SampleTemplate Initialized SampleTemplate to use for the metadat stats. pt : PrepTemplate Initialized PrepTemplate to use for the metadat stats. Returns ------- dict Dictionary object where the keys are the names of the metadata categories and the keys are tuples where the first element is the name of a metadata value in category and the second element is the number of times that value was seen. """ df = metadata_map_from_sample_and_prep_templates(st, pt) out = {} for column in natsorted(df.columns): counts = df[column].value_counts() # get a pandas series of the value-count pairs out[column] = [(key, counts[key]) for key in natsorted(counts.index)] return out
def get_metadata(path, band_name, git_root): try: with open(os.path.join(path, 'description'), 'r') as desc: description = desc.read() except Exception: description = '' metadata = {'name': band_name, 'description': description, 'albums': [], 'git_root': git_root} album_paths = natsort.natsorted(os.listdir(path)) for album_name in filter(lambda a: filter_album_names(path, a), album_paths): album_path = os.path.join(path, album_name) try: with open(os.path.join(album_path, 'description'), 'r') as desc: album_description = desc.read() except Exception: album_description = 'Shorts are comfy and easy to wear!' tracks = [] track_number = 1 track_paths = natsort.natsorted(os.listdir(album_path)) for track in filter(filter_tracks, track_paths): track_name = clean_track_name(track) tracks.append({'number': track_number, 'name': track_name, 'path': os.path.join(album_path, track)}) track_number += 1 metadata['albums'].append({'name': album_name, 'path': album_path, 'tracks': tracks, 'description': album_description}) return metadata
def get_all_tags_names_as_list(self, admin=False, language="en"): """ Computes and cache two list containing all tags name sorted by natural order on name """ if admin: if self._all_tags_cache_list_admin != {} and language in self._all_tags_cache_list_admin: return self._all_tags_cache_list_admin[language] #Cache hit else: if self._all_tags_cache_list != {} and language in self._all_tags_cache_list: return self._all_tags_cache_list[language] #Cache hit #Cache miss, computes everything s_stud = set() s_admin = set() (common, _, org) = self.get_all_tags() for tag in common + org: # Is tag_name_with_translation correct by doing that like that ? tag_name_with_translation = self.gettext(language, tag.get_name()) if tag.get_name() else "" s_admin.add(tag_name_with_translation) if tag.is_visible_for_student(): s_stud.add(tag_name_with_translation) self._all_tags_cache_list_admin[language] = natsorted(s_admin, key=lambda y: y.lower()) self._all_tags_cache_list[language] = natsorted(s_stud, key=lambda y: y.lower()) if admin: return self._all_tags_cache_list_admin[language] return self._all_tags_cache_list[language]
def get_page (filepath, pagenum): path = root + '/' + filepath if os.path.isdir(path): files = natsorted(os.listdir(path)) files = pages_cleaner(files) image = path + '/' + files[pagenum - 1] with open(image, 'rb') as file_: page = file_.read() return page elif os.path.isfile(path): filetype = path.split('.')[-1] if filetype == 'zip': with ZipFile(path) as archive: files = natsorted(archive.namelist()) files = pages_cleaner(files) image = files[pagenum - 1] with archive.open(image) as file_: page = file_.read() return page elif filetype == 'rar': with rarfile.RarFile(path) as archive: files = natsorted(archive.namelist()) files = pages_cleaner(files) image = files[pagenum - 1] with archive.open(image) as file_: page = file_.read() return page
def listFiles(): """ Lists all available Charmm topologies and parameter files Examples -------- >>> from htmd.builder import charmm >>> charmm.listFiles() # doctest: +ELLIPSIS ---- Topologies files list... """ from natsort import natsorted charmmdir = path.join(home(), 'builder', 'charmmfiles', '') # maybe just lookup current module? topos = natsorted(glob(path.join(charmmdir, 'top', '*.rtf'))) params = natsorted(glob(path.join(charmmdir, 'par', '*.prm'))) streams = natsorted(glob(path.join(charmmdir, 'str', '*', '*.str'))) print('---- Topologies files list: ' + path.join(charmmdir, 'top', '') + ' ----') for t in topos: t = t.replace(charmmdir, '') print(t) print('---- Parameters files list: ' + path.join(charmmdir, 'par', '') + ' ----') for p in params: p = p.replace(charmmdir, '') print(p) print('---- Stream files list: ' + path.join(charmmdir, 'str', '') + ' ----') for s in streams: s = s.replace(charmmdir, '') print(s)
def scenarios_comms(paths): subdirs = natsorted(map_paths(paths)) for i, subdir in enumerate(natsorted(subdirs)): title = os.path.basename(subdir) sources = npz_in_dir(subdir) log.info("{0:%}:{1}:{2}/{3}".format(float(i) / float(len(subdirs)), title, memory(), swapsize())) yield (subdir, generate_sources(sources, comms_only=True))
def _get_list_of_files(self,path): """ Go through each subdirectory of `path`, and choose one file from each to use in our hash. Continue to increase self.iter, so we use a different 'slot' of randomness each time. """ chosen_files = [] # Get a list of all subdirectories directories = [] for root, dirs, files in natsort.natsorted(os.walk(path, topdown=False)): for name in dirs: if name[:1] is not '.': directories.append(os.path.join(root, name)) directories = natsort.natsorted(directories) # Go through each directory in the list, and choose one file from each. # Add this file to our master list of robotparts. for directory in directories: files_in_dir = [] for imagefile in natsort.natsorted(os.listdir(directory)): files_in_dir.append(os.path.join(directory,imagefile)) files_in_dir = natsort.natsorted(files_in_dir) # Use some of our hash bits to choose which file element_in_list = self.hasharray[self.iter] % len(files_in_dir) chosen_files.append(files_in_dir[element_in_list]) self.iter += 1 return chosen_files
def __GetOBSDatasetName(self, band): self.refBand = dict() self.emisBand = dict() self.refBandname = dict() self.emisBandname = dict() for band in self.OrbitInfo.BandsType: if self.OrbitInfo.BandsType[band] == 'REF': self.refBand[band] = self.OrbitInfo.BandsType[band] else: self.emisBand[band] = self.OrbitInfo.BandsType[band] self.refBand = natsorted(self.refBand, alg=ns.IGNORECASE) self.emisBand = natsorted(self.emisBand, alg=ns.IGNORECASE) refNum = 0 for refband in self.refBand: self.refBandname[refband] = refNum refNum = refNum + 1 emisNum = 0 for emisband in self.emisBand: self.emisBandname[emisband] = emisNum emisNum = emisNum + 1 return self.refBandname, self.emisBandname
def find_idle_busy_slaves(parser, args, show_idle): parser.add_option("-b", "--builder", dest="builders", action="append", default=[], help="Builders to filter on") parser.add_option("-s", "--slave", dest="slaves", action="append", default=[], help="Slaves to filter on") options, args, buildbot = parser.parse_args(args) if args: parser.error("Unrecognized parameters: %s" % " ".join(args)) if not options.builders: options.builders = buildbot.builders.keys for builder in options.builders: builder = buildbot.builders[builder] if options.slaves: # Only the subset of slaves connected to the builder. slaves = list(set(options.slaves).intersection(set(builder.slaves.names))) if not slaves: continue else: slaves = builder.slaves.names busy_slaves = [build.slave.name for build in builder.current_builds] if show_idle: slaves = natsorted(set(slaves) - set(busy_slaves)) else: slaves = natsorted(set(slaves) & set(busy_slaves)) if options.quiet: for slave in slaves: print slave else: if slaves: print "Builder %s: %s" % (builder.name, ", ".join(slaves)) return 0
def fill(self): scene_index = {} for index, key in enumerate(natsorted(self.seqdata.scene.keys())): scene_index[key] = index + 1 lastpos = len(self.seqdata.workpackage) totalbar_pos = lastpos + 3 for index, a in enumerate(natsorted(self.seqdata.workpackage.keys())): self.statgrid.SetCellValue(index, 0, a) self.statgrid.SetCellValue(index, 2, ','.join(self.seqdata.workpackage[a].scenes)) self.statgrid.SetCellValue(index, 3, self.seqdata.workpackage[a].descr) ee = [] ff = [] gg = [] for scene_key in self.seqdata.workpackage[a].scenes: ee.append("Q%d" % int(scene_index[scene_key])) ff.append("S%d" % int(scene_index[scene_key])) gg.append("T%d" % int(scene_index[scene_key])) self.statgrid.SetCellValue(index, 4, "=SUM(%s)" % ','.join(ee)) self.statgrid.SetCellValue(index, 5, "=SUM(%s)" % ','.join(ff)) self.statgrid.SetCellValue(index, 6, "=AVERAGE(%s)" % ','.join(gg)) self.statgrid.SetCellValue(index, 7, '=F%d*G%d' % (index+1,index+1)) self.statgrid.SetCellValue(index, 8, '=(1/F%d)*F%d' % (totalbar_pos, index+1)) self.statgrid.SetCellValue(index, 9, '=(1/H%d)*H%d' % (totalbar_pos, index+1)) ## totalbar self.statgrid.SetCellValue(totalbar_pos-1, 3, 'totalen') self.statgrid.SetCellValue(totalbar_pos-1, 4, '=SUM(E1:E%d)' % lastpos) self.statgrid.SetCellValue(totalbar_pos-1, 5, '=SUM(F1:F%d)' % lastpos) self.statgrid.SetCellValue(totalbar_pos-1, 7, '=SUM(H1:H%d)' % lastpos) self.statgrid.SetCellValue(totalbar_pos-1, 8, '=SUM(I1:I%d)' % lastpos) self.statgrid.SetCellValue(totalbar_pos-1, 9, '=SUM(J1:J%d)' % lastpos) for index, a in enumerate(natsorted(self.seqdata.scene.keys())): self.statgrid.SetCellValue(index, 14, a) self.statgrid.SetCellValue(index, 15, self.seqdata.scene[a].descr) try: self.statgrid.SetCellValue(index, 16, self.seqdata.scene[a].frames) except: pass try: self.statgrid.SetCellValue(index, 17, self.seqdata.scene[a].char_amount) except: pass #self.statgrid.SetCellValue(index, 14, '=N%d*M%d' % (index+1,index+1)) self.statgrid.SetCellValue(index, 18, '=((SUMIF(R%d;">1"))*15%%+1)*Q%d' % (index+1,index+1)) self.statgrid.SetCellValue(index, 19, '2')
def _add_formatted_imports(self): """ Adds the imports back to the file (at the index of the first import) sorted alphabetically and split between groups """ output = [] for section in Sections.ALL: straight_modules = list(self.imports[section]['straight']) straight_modules.sort(key=lambda key: self._module_key(key, self.config)) for module in straight_modules: if module in self.as_map: output.append("import {0} as {1}".format(module, self.as_map[module])) else: output.append("import {0}".format(module)) from_modules = list(self.imports[section]['from'].keys()) from_modules = natsorted(from_modules, key=lambda key: self._module_key(key, self.config)) for module in from_modules: import_start = "from {0} import ".format(module) from_imports = list(self.imports[section]['from'][module]) from_imports = natsorted(from_imports, key=lambda key: self._module_key(key, self.config)) for from_import in copy.copy(from_imports): import_as = self.as_map.get(module + "." + from_import, False) if import_as: output.append(import_start + "{0} as {1}".format(from_import, import_as)) from_imports.remove(from_import) if from_imports: if "*" in from_imports: import_statement = "{0}*".format(import_start) else: import_statement = import_start + (", ").join(from_imports) if len(import_statement) > self.config['line_length']: import_statement = import_start + "(" size = len(import_statement) import_statement += (",\n" + " " * size).join(from_imports) import_statement += ")" output.append(import_statement) if straight_modules or from_modules: output.append("") while output[-1:] == [""]: output.pop() while self.import_index + 2 < len(self.out_lines) and self.out_lines[self.import_index + 1] == "": self.out_lines.pop(self.import_index + 1) if len(self.out_lines) > self.import_index + 1: next_construct = self.out_lines[self.import_index + 1] if next_construct.startswith("def") or next_construct.startswith("class") or \ next_construct.startswith("@"): output += ["", ""] else: output += [""] self.out_lines[self.import_index:1] = output
def get_consensus_map(rna_df,pr_df,gene,rna_ac,pr_ac): '''this function check if the rna map and pr map have the same splice sites * rna_df: mRNA map to genome gff dataframe with additional rna_ac column * pr_df: protein map to genome dataframe with additional 'pr_ac' and 'pr_id' column ''' if not rna_df.empty: # get rna scaffold name, if more than 1 scaffold then don't add it's annotation rna_chr = list(set(rna_df[0].tolist())) if len(rna_chr) != 1: assert False, rna_ac + ' map to multiple scaffolds' else: rna_chr = rna_chr[0] # get strand, if map to both strand don't output rna_str = list(set(rna_df[6].tolist())) if len(rna_str) != 1: assert False, rna_ac + ' map to both strands' else: rna_str = rna_str[0] # get rna splice sites rna_splice = natsorted(rna_df[3].tolist() + rna_df[4].tolist()) # change exon id n = 1 for i,row in rna_df.iterrows(): item = row[8].split(';') iid = '.'.join(item[0].split('.')[:-1]) anno = iid+' '+str(n)+';'+re.sub('Name.+?;','',';'.join(item[1:]))+';Parent='+rna_ac+';gene_id='+gene+';transcript_id='+rna_ac # anno = iid+'_'+str(n)+';'+ re.sub('Name','transcript_id',';'.join(item[1:]))+';Parent='+rna_ac+';gene_id='+gene rna_df.loc[i,8] = anno rna_df.loc[i,2] = 'exon' n += 1 #--------------- process protein gff information if not pr_df.empty: pr_id = pr_df['pr_id'].tolist()[0] sub_pr_df = pr_df[(pr_df['pr_id'].values==pr_id) & (pr_df[0].values==rna_chr)].copy() # change cds id m = 1 for i,row in sub_pr_df.iterrows(): item = row[8].split(';') anno = 'ID='+pr_ac+'_'+str(m)+';'+';'.join(item[2:])+';protein_id='+pr_ac+';Parent='+rna_ac+';gene_id='+gene sub_pr_df.loc[i,8] = anno sub_pr_df.loc[i,2] = 'CDS' m += 1 pr_splice = natsorted(sub_pr_df[3].tolist() + sub_pr_df[4].tolist()) if sub_pr_df.shape[0] == 1: if not rna_splice[0]<pr_splice[0]<pr_splice[1]<rna_splice[1]: sub_pr_df = pd.DataFrame() else: rna_pr_sites_match = set(pr_splice[1:-1]).intersection(rna_splice) m_len = len(rna_pr_sites_match) pr_len = len(pr_splice[1:-1]) if m_len != pr_len: print pr_ac,m_len,'/',pr_len if len(pr_splice) > len(rna_splice): print 'protein has more splice than rna, rna/pr:',len(rna_splice),'/',len(pr_splice) sub_pr_df = pd.DataFrame() else: sub_pr_df = pr_df return rna_df,sub_pr_df,rna_chr,rna_splice[0],rna_splice[-1],rna_str
def _print_error_if_no_chromosome_overlap(genome_dict, bim_table): print("Error! No chromosome overlap between your fasta and bim file.") fasta_chromosomes = ", ".join(natsorted(genome_dict.keys())) bim_chromosomes = ", ".join(natsorted(bim_table.chromosome.drop_duplicates())) print("These are the chromosomes in your bim file: {}".format(bim_chromosomes)) print("These are the chromosomes in your fasta file: {}".format(fasta_chromosomes)) print("Exiting.") exit()
def render_listing(in_name, out_name, input_folder, output_folder, folders=[], files=[]): needs_ipython_css = False if in_name and in_name.endswith('.ipynb'): # Special handling: render ipynbs in listings (Issue #1900) ipynb_compiler = self.site.plugin_manager.getPluginByName("ipynb", "PageCompiler").plugin_object ipynb_raw = ipynb_compiler.compile_html_string(in_name, True) ipynb_html = lxml.html.fromstring(ipynb_raw) # The raw HTML contains garbage (scripts and styles), we can’t leave it in code = lxml.html.tostring(ipynb_html.xpath('//*[@id="notebook"]')[0], encoding='unicode') title = os.path.basename(in_name) needs_ipython_css = True elif in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, utils.NikolaPygmentsHTML(in_name)) title = os.path.basename(in_name) else: code = '' title = os.path.split(os.path.dirname(out_name))[1] crumbs = utils.get_crumbs(os.path.relpath(out_name, self.kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.join( input_folder, os.path.relpath( out_name[:-5], # remove '.html' os.path.join( self.kw['output_folder'], output_folder)))) if self.site.config['COPY_SOURCES'] and in_name: source_link = permalink[:-5] # remove '.html' else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': self.kw['default_lang'], 'folders': natsort.natsorted( folders, alg=natsort.ns.F | natsort.ns.IC), 'files': natsort.natsorted( files, alg=natsort.ns.F | natsort.ns.IC), 'description': title, 'source_link': source_link, 'pagekind': ['listing'], } if needs_ipython_css: # If someone does not have ipynb posts and only listings, we # need to enable ipynb CSS for ipynb listings. context['needs_ipython_css'] = True self.site.render_template('listing.tmpl', out_name, context)
def scenarios_comms(paths, generator=False): subdirs = natsorted(map_paths(paths)) for i, subdir in enumerate(natsorted(subdirs)): sources = npz_in_dir(subdir) print("{0:%}:{1}".format(float(i) / float(len(subdirs)), subdir)) if generator: yield (subdir, generate_sources(sources, comms_only=True)) else: yield (subdir, load_sources(sources, comms_only=True))
def load_validation_trajectories(self, data_dir=None, dataset=None, preprocessor=None, load_config=False): """ :param data_dir: Directory of where validation files live :param dataset: Dataset used to train the model :param preprocessor: Preprocessing method used in the data. :param load_config: Whether to return also the configuration of validation call :return: pandas.DataFrame """ if data_dir is None: if self.data_dir is None: raise ValueError('Location of experiments not given') else: data_dir = self.data_dir if dataset is None: if self.dataset is not None: dataset = self.dataset else: raise ValueError('Dataset not given') validation_fn = "validationResults-detailed-traj-run-*-walltime.csv" preprocessors_list = ["Densifier", "TruncatedSVD", "ExtraTreesPreprocessorClassification", "FastICA", "FeatureAgglomeration", "KernelPCA", "RandomKitchenSinks", "LibLinear_Preprocessor", "NoPreprocessing", "Nystroem", "PCA", "PolynomialFeatures", "RandomTreesEmbedding", "SelectPercentileClassification", "SelectRates"] if preprocessor == 'all': scenario_dir = [os.path.join(data_dir, dataset, p, dataset, validation_fn) for p in preprocessors_list] dirs = [] [dirs.extend(_glob.glob(p)) for p in scenario_dir] dirs = _ns.natsorted(dirs) elif preprocessor is not None: scenario_dir = os.path.join(data_dir, dataset, preprocessor, dataset, validation_fn) dirs = _ns.natsorted(_glob.glob(scenario_dir)) else: scenario_dir = os.path.join(data_dir, dataset, validation_fn) dirs = _ns.natsorted(_glob.glob(scenario_dir)) if len(dirs) == 0: raise ValueError("Not file found in %s" % scenario_dir) seeds = ['seed_' + itseeds.split('-')[-2].split('.')[0] for itseeds in dirs] all_validations = [] for fname in dirs: try: val_traj = self.load_validation_by_file(fname, load_config=load_config) all_validations.append(val_traj) except IndexError: print("CRASH in: %s" % os.path.split(fname)[1]) validations_df = _pd.concat(all_validations, axis=0) validations_df = validations_df.reset_index(drop=True) return validations_df.copy()
def list_expr( cover, prm, fol, simple=False, use_dom=False, latex=False): """Return `list` of `str`, each an orthotope in `cover`. @param simple: if `True`, then return expression that can be parsed by `fol.add_expr`. @param use_dom: omit conjuncts that contain dom of var assumes that `|= care => type_hints` """ px = prm._px xp = _map_parameters_to_vars(px) support = fol.support(cover) keys = {xp[k] for k in support} keys = natsort.natsorted(keys) c = _orthotopes_iter(cover, fol) r = list() for product in c: w = list() for x in keys: a = px[x]['a'] b = px[x]['b'] a = product[a] b = product[b] tyh._check_type_hint(a, b, fol.vars[x], x) # can `x` be ignored ? if use_dom: dom = fol.vars[x]['dom'] a, b = tyh._clip_subrange((a, b), dom, x) if a is None and b is None: continue if a == b: s = '({x} = {a})' elif simple: s = '({a} <= {x}) /\ ({x} <= {b})' else: # precise even in absence of limits/dom s = '({x} \in {a} .. {b})' s = s.format(x=x, a=a, b=b) w.append(s) # conjoin as one triplet per line lines = w n_tail = len(lines) % 3 tail = lines[-n_tail:] lines = lines[:-n_tail] i = iter(lines) triplets = list(zip(i, i, i)) lines = [' /\ '.join(t) for t in triplets] lines.append(' /\ '.join(tail)) s = stx.vertical_op(lines, latex=latex, op='and') r.append(s) r = natsort.natsorted(r) # reproducible vertical listing return r
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') #array, custom num of detections using scores number_of_boxes_drawn = [] count = 0 # Load image using OpenCV and # expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value #FOR LOOP TO ITERATE THROUGH IMAGE DIRECTORY for filename in natsorted(os.listdir(PATH_TO_IMAGES)): image = cv2.imread(os.path.join(PATH_TO_IMAGES, filename)) image_expanded = np.expand_dims(image, axis=0) #start timer to get inf time start_time = time.time() # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) #print time of inference count += 1
def get_images(img_dir): return [a for a in natsorted(glob(os.path.join(img_dir, '*.jpg'))) if 'cust' not in a]
print(raw_background) fig, (ax, ax2) = plt.subplots(1, 2) files.remove('background.csv') background = np.genfromtxt(raw_background, unpack=True, delimiter=',') print(background) wave = np.linspace(498.6174, 1103.161, len(background)) fit_initial = [0.5, 10, 4, 850, 0.14] print(files) peak = list() current_time = 0 peak_wavelength = list() time = list() print("Sorted") print(natsorted(files, key=lambda y: y.lower())[2000:]) for file in natsorted(files, key=lambda y: y.lower())[2500:]: fit_initial = [0.5, 10, 4, 850, 0.5235924622541] print(file) ri_data = os.path.join(directory, file) reflectance = np.genfromtxt(ri_data, unpack=True, delimiter=',') wave_min = np.argmax(wave > 800) wave_max = np.argmax(wave > 900) result_obtained = False while not result_obtained: try: a = sci.curve_fit(grating_fit, wave[wave_min:wave_max], reflectance[wave_min:wave_max], p0=fit_initial)
def get_data(self, shuffle=True): # # Read and store into pairs of images and labels # images = glob.glob(self.imageDir + '/*.tif') labels = glob.glob(self.labelDir + '/*.tif') if self._size==None: self._size = len(images) from natsort import natsorted images = natsorted(images) labels = natsorted(labels) # # Pick randomly a pair of training instance # # seed = 2015 # np.random.seed(seed) for k in range(self._size): rand_index = np.random.randint(0, len(images)) rand_image = np.random.randint(0, len(images)) rand_label = np.random.randint(0, len(labels)) image = skimage.io.imread(images[rand_index]) label = skimage.io.imread(labels[rand_index]) # Crop the num image if greater than 50 # Random crop 50 1024 1024 from 150 assert image.shape == label.shape # numSections = image.shape[0] # if numSections > DIMN: # randz = np.random.randint(0, numSections - DIMN + 1) # DIMN is minimum # image = image[randz:randz+DIMN,...] # label = label[randz:randz+DIMN,...] dimz, dimy, dimx = image.shape # if self.isTrain: randz = np.random.randint(0, dimz-DIMZ+1) randy = np.random.randint(0, dimy-DIMY+1) randx = np.random.randint(0, dimx-DIMX+1) image = image[randz:randz+DIMZ, randy:randy+DIMY, randx:randx+DIMX] label = label[randz:randz+DIMZ, randy:randy+DIMY, randx:randx+DIMX] if self.isTrain: seed = np.random.randint(0, 20152015) seed_image = np.random.randint(0, 2015) seed_label = np.random.randint(0, 2015) #TODO: augmentation here image = self.random_flip(image, seed=seed) image = self.random_reverse(image, seed=seed) image = self.random_square_rotate(image, seed=seed) # image = self.random_permute(image, seed=seed) # image = self.random_elastic(image, seed=seed) # image = skimage.util.random_noise(image, seed=seed) # TODO # image = skimage.util.img_as_ubyte(image) label = self.random_flip(label, seed=seed) label = self.random_reverse(label, seed=seed) label = self.random_square_rotate(label, seed=seed) # label = self.random_permute(label, seed=seed) # label = self.random_elastic(label, seed=seed) # image = self.random_reverse(image, seed=seed) # label = self.random_reverse(label, seed=seed) # Further augmentation in image image = skimage.util.random_noise(image, mean=0, var=0.001, seed=seed) # TODO image = skimage.util.img_as_ubyte(image) pixel = np.random.randint(-20, 20) image = image + pixel # Downsample for test ting # image = skimage.transform.resize(image, output_shape=(DIMZ, DIMY, DIMX), order=1, preserve_range=True, anti_aliasing=True) # label = skimage.transform.resize(label, output_shape=(DIMZ, DIMY, DIMX), order=0, preserve_range=True) # label = label/255.0 # Calculate vector field # dirsx, dirsy, dirsz = self.toVectorField(label) membr = np.zeros_like(label) for z in range(membr.shape[0]): membr[z,...] = 1-skimage.segmentation.find_boundaries(np.squeeze(label[z,...]), mode='thick') #, mode='inner' # membr = 1-skimage.segmentation.find_boundaries(label, mode='thick') #, mode='inner' membr = 255*membr membr[label==0] = 0 # Calculate pointz array = np.zeros_like(label) point = array[0,...].copy() # point[label[0,...]==label[1,...]] = 255.0 point = 255*np.equal(label[0,...], label[1,...]) point[membr[0,...]==0] = 0; point[membr[1,...]==0] = 0; # image = np.expand_dims(image, axis=0) # label = np.expand_dims(label, axis=0) # dirsx = np.expand_dims(dirsx, axis=0) # dirsy = np.expand_dims(dirsy, axis=0) # membr = np.expand_dims(membr, axis=0) image = np.expand_dims(image, axis=0) membr = np.expand_dims(membr, axis=0) point = np.expand_dims(point, axis=0) point = np.expand_dims(point, axis=0) # image = np.expand_dims(image, axis=-1) # membr = np.expand_dims(membr, axis=-1) # point = np.expand_dims(point, axis=-1) # membr = np.expand_dims(membr, axis=-1) yield [image.astype(np.float32), membr.astype(np.float32), point.astype(np.float32)]
def main(argv): direct = 'C:/Users/gomes/Desktop/ISLES2017_Training' path_folder = os.listdir(direct) path_folder = natsort.natsorted(path_folder, reverse=False) for folder in path_folder: path = os.path.join(direct, folder) path_enter=os.listdir(path) path_enter=natsort.natsorted(path_enter,reverse=False) for arq in path_enter: val=os.path.join(path,arq) val_enter=os.listdir(val) for filename in val_enter: if 'nii' in filename.lower(): input = direct+'/'+folder+'/'+arq+'/'+filename output= 'C:/Users/gomes/Desktop/training translate/'+folder+'/'+arq # print(output) # print(input) inputfile = input outputfile = output try: opts, args = getopt.getopt(argv, "hi:o:", ["ifile=", "ofile="]) except getopt.GetoptError: print('nii2png.py -i <inputfile> -o <outputfile>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('nii2png.py -i <inputfile> -o <outputfile>') sys.exit() elif opt in ("-i", "--input"): inputfile = arg elif opt in ("-o", "--output"): outputfile = arg print('Input file is ', inputfile) print('Output folder is ', outputfile) # set fn as your 4d nifti file image_array = nibabel.load(inputfile).get_data() print(len(image_array.shape)) # ask if rotate ask_rotate = 'y' if ask_rotate == 'y': ask_rotate_num = 90 if ask_rotate_num == 90 or ask_rotate_num == 180 or ask_rotate_num == 270: print('Got it. Your images will be rotated by {} degrees.'.format(ask_rotate_num)) else: print('You must enter a value that is either 90, 180, or 270. Quitting...') sys.exit() elif ask_rotate.lower() == 'n': print('OK, Your images will be converted it as it is.') else: print('You must choose either y or n. Quitting...') sys.exit() # if 4D image inputted if len(image_array.shape) == 4: # set 4d array dimension values nx, ny, nz, nw = image_array.shape # set destination folder if not os.path.exists(outputfile): os.makedirs(outputfile) print("Created ouput directory: " + outputfile) print('Reading NIfTI file...') total_volumes = image_array.shape[3] total_slices = image_array.shape[2] # iterate through volumes for current_volume in range(0, total_volumes): slice_counter = 0 # iterate through slices for current_slice in range(0, total_slices): if (slice_counter % 1) == 0: # rotate or no rotate if ask_rotate == 'y': if ask_rotate_num == 90 or ask_rotate_num == 180 or ask_rotate_num == 270: print('Rotating image...') if ask_rotate_num == 90: data = numpy.rot90(image_array[:, :, current_slice, current_volume]) elif ask_rotate_num == 180: data = numpy.rot90(numpy.rot90(image_array[:, :, current_slice, current_volume])) elif ask_rotate_num == 270: data = numpy.rot90( numpy.rot90(numpy.rot90(image_array[:, :, current_slice, current_volume]))) elif ask_rotate.lower() == 'n': data = image_array[:, :, current_slice, current_volume] # alternate slices and save as png print('Saving image...') image_name = inputfile[:-4] + "_t" + "{:0>3}".format( str(current_volume + 1)) + "_z" + "{:0>3}".format(str(current_slice + 1)) + ".png" # scipy.misc.imsave(image_name, data)] cv2.imwrite(image_name,normalizeImage(data)) # cv2.imshow(image_name,normalizeImage(data)) # cv2.waitKey(0) # count_debug = di.show_figures(data, 1) # ppl.show() print('Saved.') # move images to folder print('Moving files...') src = image_name shutil.move(src, outputfile) slice_counter += 1 print('Moved.') print('Finished converting images') # else if 3D image inputted elif len(image_array.shape) == 3: # set 4d array dimension values nx, ny, nz = image_array.shape # set destination folder if not os.path.exists(outputfile): os.makedirs(outputfile) print("Created ouput directory: " + outputfile) print('Reading NIfTI file...') total_slices = image_array.shape[2] slice_counter = 0 # iterate through slices for current_slice in range(0, total_slices): # alternate slices if (slice_counter % 1) == 0: # rotate or no rotate if ask_rotate.lower() == 'y': if ask_rotate_num == 90 or ask_rotate_num == 180 or ask_rotate_num == 270: if ask_rotate_num == 90: data = numpy.rot90(image_array[:, :, current_slice]) elif ask_rotate_num == 180: data = numpy.rot90(numpy.rot90(image_array[:, :, current_slice])) elif ask_rotate_num == 270: data = numpy.rot90(numpy.rot90(numpy.rot90(image_array[:, :, current_slice]))) elif ask_rotate.lower() == 'n': data = image_array[:, :, current_slice] # alternate slices and save as png if (slice_counter % 1) == 0: print('Saving image...') image_name = inputfile[:-4] + "_z" + "{:0>3}".format(str(current_slice + 1)) + ".png" # scipy.misc.imsave(image_name, data) cv2.imwrite(image_name,normalizeImage(data)) # cv2.imshow(image_name, normalizeImage(data)) # cv2.waitKey(0) # count_debug = di.show_figures(data, 1) # ppl.show() print('Saved.') # move images to folder print('Moving image...') src = image_name # shutil.move(src, outputfile) slice_counter += 1 print('Moved.') print('Finished converting images') else: print('Not a 3D or 4D Image. Please try again.')
import cv2 import numpy as np import matplotlib.pyplot as plt import os import glob from PIL import Image from skimage.exposure import rescale_intensity from scipy.ndimage import correlate,convolve import natsort path = os.path.join(os.getcwd(), '') path_mask = os.path.join(os.getcwd(), 'training', 'mask') path_results = os.path.join(os.getcwd(), 'Binary Images') files_avail = glob.glob(os.path.join(path, '*.tif')) masks = os.listdir(path_mask) masks = natsort.natsorted(masks) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(21,21)) def convolve2D(image,kernel): (iH, iW) = image.shape (kH, kW) = kernel.shape pad = (kW - 1) // 2 img = cv2.copyMakeBorder(image, pad, pad, pad, pad, cv2.BORDER_REPLICATE) w = np.zeros((iH,iW), dtype = "float32") output = np.zeros((iH, iW), dtype = "float32") for y in np.arange(pad, iH + pad): for x in np.arange(pad, iW + pad): roi = img[y - pad:y + pad + 1, x - pad:x + pad + 1] output[y - pad,x - pad] = (roi * kernel).sum() w = image - output
if ctype == "text/plain" and "attachment" not in cdispo: body += part.get_payload(decode=True) # decode break # not multipart - i.e. plain text, no attachments, keeping fingers crossed else: body += message.get_payload(decode=True) return body ## This is where the script starts if len(sys.argv) < 2: sys.exit("You need to specify your group name") groupName = sys.argv[1] oldDir = os.getcwd() if os.path.exists(groupName): archiveDir = os.path.abspath(groupName + "-archive") if not os.path.exists(archiveDir): os.makedirs(archiveDir) os.chdir(groupName) for file in natsorted(os.listdir(os.getcwd())): messageYear = getYahooMessageYear(file) archiveFile = archiveDir + "/archive-" + str(messageYear) + ".txt" archiveYahooMessage(file, archiveFile, messageYear, "utf-8") else: sys.exit("Please run archive-group.py first") os.chdir(oldDir) print("Complete")
def get_args(): parser = argparse.ArgumentParser(description="SNIa classification") parser.add_argument("--seed", type=int, default=0, help="Random seed to be used") ####################### # General parameters ####################### parser.add_argument("--data", action="store_true", help="Create dataset for ML training") parser.add_argument("--train_rnn", action="store_true", help="Train RNN model") parser.add_argument("--train_rf", action="store_true", help="Train RandomForest model") parser.add_argument("--validate_rnn", action="store_true", help="Validate RNN model") parser.add_argument("--validate_rf", action="store_true", help="Validate RandomForest model") parser.add_argument( "--override_source_data", default=None, type=str, choices=["photometry", "saltfit"], help="Change the source data (for representativeness purposes)", ) parser.add_argument( "--explore_lightcurves", action="store_true", help="Plot a random selection of lightcurves", ) parser.add_argument("--speed", action="store_true", help="Get RNN speed benchmark") parser.add_argument( "--monitor_interval", type=int, default=1, help="Monitor validation every monitor_interval epoch", ) parser.add_argument("--metrics", action="store_true", help="Use Pred file to compute metrics") parser.add_argument( "--performance", action="store_true", help="Get method performance and paper plots", ) parser.add_argument("--science_plots", action="store_true", help="Plots of scientific interest") parser.add_argument( "--calibration", action="store_true", help="Plot calibration of trained classifiers", ) parser.add_argument( "--plot_lcs", action="store_true", help="Plot lcs with classification probabilities", ) parser.add_argument( "--plot_prediction_distribution", action="store_true", help="Plot lcs and the histogram of probability for each class", ) parser.add_argument("--model_files", nargs="+", help="Path to model files") parser.add_argument("--prediction_files", nargs="+", help="Path to prediction files") parser.add_argument("--metric_files", nargs="+", help="Path to metric files") parser.add_argument("--done_file", default=None, type=str, help="Done or failure file name") ####################### # PLASTICC parameters ####################### parser.add_argument( "--viz_plasticc", action="store_true", help="Visualize data PLASTICC competition", ) parser.add_argument( "--train_plasticc", action="store_true", help="Train model for PLASTICC competition", ) parser.add_argument( "--predict_plasticc", action="store_true", help="Make predictions for PLASTICC competition", ) parser.add_argument( "--data_plasticc_train", action="store_true", help="Create dataset for PLASTICC competition", ) parser.add_argument( "--data_plasticc_test", action="store_true", help="Create dataset for PLASTICC competition", ) ######################## # Data parameters ######################## dir_path = os.path.dirname(os.path.realpath(__file__)) default_dump_dir = str(Path(dir_path).parent.parent / "snndump") parser.add_argument( "--dump_dir", type=str, default=default_dump_dir, help="Default path where data and models are dumped", ) parser.add_argument( "--fits_dir", type=str, default=f"{default_dump_dir}/fits", help="Default path where fits to photometry are", ) parser.add_argument( "--raw_dir", type=str, default=f"{default_dump_dir}/raw", help="Default path where raw data is", ) parser.add_argument( "--redshift", choices=[None, "zpho", "zspe"], default=None, help="Host redshift used in classification: zpho, zspe", ) parser.add_argument( "--norm", choices=["none", "perfilter", "global"], default="global", help="Feature normalization: global does the same norm for all filters", ) parser.add_argument( "--peak_norm", choices=[None, "basic", "log"], default=None, help= "Delta peak normalization, basic just shifts the mean, log similar to flux", ) parser.add_argument( "--source_data", choices=["saltfit", "photometry"], default="saltfit", help="Data source used to select light-curves for supernnova", ) parser.add_argument( "--no_overwrite", action="store_true", help= "If True: do not clean processed_dir and preprocessed_dir when calling `python run.py --data`", ) parser.add_argument("--data_fraction", type=float, default=1.0, help="Fraction of data to use") parser.add_argument( "--data_training", default=False, action="store_true", help="Create database with mostly training set of 99.5%", ) parser.add_argument( "--data_testing", default=False, action="store_true", help="Create database with only validation set", ) # Photometry window parser.add_argument("--photo_window_files", nargs="+", help="Path to fits with PEAKMJD estimation") parser.add_argument( "--photo_window_var", type=str, default='PKMJDINI', help= "Variable representing PEAKMJD for photo window (in photo_window_files)" ) parser.add_argument("--photo_window_min", type=int, default=-30, help="Window size before peak") parser.add_argument("--photo_window_max", type=int, default=100, help="Window size after peak") # Survey configuration parser.add_argument("--list_filters", nargs='+', default=natsorted(["g", "i", "r", "z"]), help="Survey filters") parser.add_argument("--list_filters_combination", nargs='+', default=natsorted([ 'g', 'r', 'i', 'z', 'gr', 'gi', 'gz', 'ir', 'iz', 'rz', 'gir', 'giz', 'grz', 'irz', 'girz' ]), help="Possible combination of filters") ###################### # RNN parameters ###################### parser.add_argument("--cyclic", action="store_true", help="Use cyclic learning rate") parser.add_argument("--cyclic_phases", nargs=3, default=[5, 10, 15], type=int, help="Cyclic phases") parser.add_argument( "--random_length", choices=[True, False], default=True, type=lambda x: bool(strtobool(x)), help="Use random length sequences for training", ) parser.add_argument( "--random_start", action='store_true', help="Use random start length sequences for training (for peak)", ) parser.add_argument( "--random_redshift", action="store_true", help= "In PLASTICC, randomly set spectroscopic redshift to -1 (i.e. unknown)", ) parser.add_argument( "--weight_decay", type=float, default=0.0000001, help="L2 decay on weights (variational)", ) parser.add_argument( "--layer_type", default="lstm", type=str, choices=["lstm", "gru", "rnn"], help="recurrent layer type", ) parser.add_argument( "--model", default="vanilla", type=str, choices=["vanilla", "variational", "bayesian"], help="recurrent model type", ) parser.add_argument("--use_cuda", action="store_true", help="Use GPU (pytorch backend only)") parser.add_argument("--learning_rate", default=1e-3, type=float, help="Learning rate") parser.add_argument("--nb_classes", default=2, type=int, help="Number of classification targets") parser.add_argument("--sntypes", default=OrderedDict({ "101": "Ia", "120": "IIP", "121": "IIn", "122": "IIL1", "123": "IIL2", "132": "Ib", "133": "Ic" }), type=json.loads, help="SN classes in sims (put Ia always first)") parser.add_argument("--nb_epoch", default=90, type=int, help="Number of batches per epoch") parser.add_argument("--batch_size", default=128, type=int, help="Batch size") parser.add_argument("--hidden_dim", default=32, type=int, help="Hidden layer dimension") parser.add_argument("--num_layers", default=2, type=int, help="Number of recurrent layers") parser.add_argument("--dropout", default=0.05, type=float, help="Dropout value") parser.add_argument( "--bidirectional", choices=[True, False], default=True, type=lambda x: bool(strtobool(x)), help="Use bidirectional models", ) parser.add_argument( "--rnn_output_option", default="mean", type=str, choices=["standard", "mean"], help="RNN output options", ) parser.add_argument("--pi", default=0.75, type=float) parser.add_argument("--log_sigma1", default=-1.0, type=float) parser.add_argument("--log_sigma2", default=-7.0, type=float) parser.add_argument("--rho_scale_lower", default=4.0, type=float) parser.add_argument("--rho_scale_upper", default=3.0, type=float) # Different parameters for output layer to obtain better uncertainty parser.add_argument("--log_sigma1_output", default=-1.0, type=float) parser.add_argument("--log_sigma2_output", default=-7.0, type=float) parser.add_argument("--rho_scale_lower_output", default=4.0, type=float) parser.add_argument("--rho_scale_upper_output", default=3.0, type=float) parser.add_argument( "--num_inference_samples", type=int, default=50, help="Number of samples to use for Bayesian inference", ) parser.add_argument( "--mean_field_inference", action="store_true", help="Use mean field inference for bayesian models", ) ######################### # RandomForest parameters ######################### # Classifier initialization parser.add_argument( "--bootstrap", action="store_true", help="Activate bootstrap when building trees", ) parser.add_argument( "--min_samples_leaf", default=3, type=int, help="Minimum samples required to be a leaf node", ) parser.add_argument("--n_estimators", default=50, type=int, help="Number of trees") parser.add_argument("--min_samples_split", default=10, type=int, help="Min samples to create split") parser.add_argument("--criterion", default="entropy", type=str, help="Tree splitting criterion") parser.add_argument("--max_features", default=5, type=int, help="Max features per tree") parser.add_argument("--max_depth", default=7, type=int, help="Max tree depth") args = parser.parse_args() return args
"""Script to plot nasa dataset time-series.""" import glob import matplotlib.pyplot as plt import pandas as pd from natsort import natsorted from ddm_project.settings import datasets_dir root_dir = datasets_dir\ + "/nasa_valve_dataset/Waveform Data/COL 1 Time COL 2 Current" # print(glob.glob(root_dir + "/*.CSV")) filepaths = natsorted(glob.glob(root_dir + "/*.CSV")) blacklist = ['TEK0000{}.CSV'.format(i) for i in range(4, 10)] filepaths = [f for f in filepaths if f.split('/')[-1] not in blacklist] fig, axes = plt.subplots(2, 6) for i, p in enumerate(filepaths): df = pd.read_csv(p, header=None, names=('a', 'b')) print(len(df)) color = 'green' if i < 4 else 'red' ax = axes[i // 6, i % 6] ax.plot(df.iloc[:, 0], df.iloc[:, 1], color=color) ax.set_title(p.split('/')[-1]) plt.show()
async def upload_to_tg( message: Message, dirname: str, post: Post ) -> None: # pylint: disable=R0912 """ uploads downloaded post from local to telegram servers """ pto = (".jpg", ".jpeg", ".png", ".bmp") vdo = (".mkv", ".mp4", ".webm") paths = [] if post.typename == "GraphSidecar": # upload media group captioned = False media = [] for path in natsorted(os.listdir(dirname)): ab_path = dirname + "/" + path paths.append(ab_path) if str(path).endswith(pto): if captioned: media.append(InputMediaPhoto(media=ab_path)) else: media.append( InputMediaPhoto(media=ab_path, caption=get_caption(post)[:1023]) ) captioned = True elif str(path).endswith(vdo): if captioned: media.append(InputMediaVideo(media=ab_path)) else: media.append( InputMediaVideo(media=ab_path, caption=get_caption(post)[:1023]) ) captioned = True if media: await message.client.send_media_group(message.chat.id, media) await message.client.send_media_group(Config.LOG_CHANNEL_ID, media) if post.typename == "GraphImage": # upload a photo for path in natsorted(os.listdir(dirname)): if str(path).endswith(pto): ab_path = dirname + "/" + path paths.append(ab_path) await message.client.send_photo( message.chat.id, ab_path, caption=get_caption(post)[:1023] ) await message.client.send_photo( Config.LOG_CHANNEL_ID, ab_path, caption=get_caption(post)[:1023] ) if post.typename == "GraphVideo": # upload a video for path in natsorted(os.listdir(dirname)): if str(path).endswith(vdo): ab_path = dirname + "/" + path paths.append(ab_path) thumb = await get_thumb(ab_path) duration = 0 metadata = extractMetadata(createParser(ab_path)) if metadata and metadata.has("duration"): duration = metadata.get("duration").seconds await message.client.send_video( chat_id=message.chat.id, video=ab_path, duration=duration, thumb=thumb, caption=get_caption(post)[:1023], ) await message.client.send_video( chat_id=Config.LOG_CHANNEL_ID, video=ab_path, duration=duration, thumb=thumb, caption=get_caption(post)[:1023], ) await remove_thumb(thumb) for del_p in paths: if os.path.lexists(del_p): os.remove(del_p)
id = self.vtkPoints.InsertNextPoint(point) self.vtkCells.InsertNextCell(1) self.vtkCells.InsertCellPoint(id) self.vtkCells.Modified() self.vtkPoints.Modified() def initPoints(self): self.vtkPoints = vtk.vtkPoints() self.vtkCells = vtk.vtkCellArray() self.vtkPolyData.SetPoints(self.vtkPoints) self.vtkPolyData.SetVerts(self.vtkCells) files = glob.glob( '../Patient_Data/4DCT_109/Attempt3/displacement_surfaces/*.vtk') files = natsorted(files) #Load first file to access number of points reader = vtkDataSetReader() reader.SetFileName(files[4]) reader.ReadAllVectorsOn() reader.ReadAllScalarsOn() reader.Update() #Load the data data = reader.GetOutput() #Get the number of points Npoints = data.GetNumberOfPoints() Nfiles = len(files)
def render_listing(in_name, out_name, input_folder, output_folder, folders=[], files=[]): needs_ipython_css = False if in_name and in_name.endswith('.ipynb'): # Special handling: render ipynbs in listings (Issue #1900) ipynb_compiler = self.site.plugin_manager.getPluginByName( "ipynb", "PageCompiler").plugin_object ipynb_raw = ipynb_compiler.compile_html_string(in_name, True) ipynb_html = lxml.html.fromstring(ipynb_raw) # The raw HTML contains garbage (scripts and styles), we can’t leave it in code = lxml.html.tostring( ipynb_html.xpath('//*[@id="notebook"]')[0], encoding='unicode') title = os.path.basename(in_name) needs_ipython_css = True elif in_name: with open(in_name, 'r') as fd: try: lexer = get_lexer_for_filename(in_name) except: lexer = TextLexer() code = highlight(fd.read(), lexer, utils.NikolaPygmentsHTML(in_name)) title = os.path.basename(in_name) else: code = '' title = os.path.split(os.path.dirname(out_name))[1] crumbs = utils.get_crumbs(os.path.relpath( out_name, self.kw['output_folder']), is_file=True) permalink = self.site.link( 'listing', os.path.join( input_folder, os.path.relpath( out_name[:-5], # remove '.html' os.path.join(self.kw['output_folder'], output_folder)))) if self.site.config['COPY_SOURCES'] and in_name: source_link = permalink[:-5] # remove '.html' else: source_link = None context = { 'code': code, 'title': title, 'crumbs': crumbs, 'permalink': permalink, 'lang': self.kw['default_lang'], 'folders': natsort.natsorted(folders, alg=natsort.ns.F | natsort.ns.IC), 'files': natsort.natsorted(files, alg=natsort.ns.F | natsort.ns.IC), 'description': title, 'source_link': source_link, 'pagekind': ['listing'], } if needs_ipython_css: # If someone does not have ipynb posts and only listings, we # need to enable ipynb CSS for ipynb listings. context['needs_ipython_css'] = True self.site.render_template('listing.tmpl', out_name, context)
def get_course_groups(self, course): """ Returns a list of the course groups""" return natsorted(list( self._database.groups.find({"courseid": course.get_id()})), key=lambda x: x["description"])
def pdb_seq_to_number(pdbfile, chain_ids, chain_identity='require_same'): """Get sequence and number of chain(s) for some protein in PDB file. Parameters ---------- pdbfile : str Path to existing PDB file. chain_ids : list List of chains in PDB file. All these chains must correspond to the same underlying molecule (i.e., be monomers in h**o-oligomer). chain_identity : {'union', 'intersection', 'require_same'} How to parse chains. They are required to share the same wildtype at all sites they have in common. If all sites are not shared between all chains, take the union, the intersection, or raise an error if they are not exactly the same. Returns ------- pandas.DataFrame Columns are: - 'sequential' : sequential 1, 2, ... numbering of residues - 'pdb_site' : PDB file residue number - 'wildtype' : wildtype residue identity (1-letter code) """ with warnings.catch_warnings(): warnings.simplefilter( 'ignore', category=Bio.PDB.PDBExceptions.PDBConstructionWarning) structure = Bio.PDB.PDBParser().get_structure('_', pdbfile) if len(structure) != 1: raise ValueError(f"{pdbfile} has multiple models") else: model = list(structure)[0] aa_3to1 = { three.upper(): one for three, one in Bio.SeqUtils.IUPACData.protein_letters_3to1.items() } df = pd.DataFrame({}, columns=['pdb_site', 'wildtype', 'chain']) for chain_id in chain_ids: chain = model[chain_id] pdb_sites = [] wildtypes = [] for res in chain.child_list: heteroflag, resnum, insertcode = res.get_id() if not heteroflag.strip(): # ignore hetero-residues pdb_sites.append(f"{resnum}{insertcode.strip()}") wildtypes.append(aa_3to1[res.resname]) if pdb_sites != natsort.natsorted(pdb_sites): raise ValueError(f"residues in {pdbfile} chain {chain_id} not " 'naturally sorted') df = df.append( pd.DataFrame({ 'pdb_site': pdb_sites, 'wildtype': wildtypes, 'chain': chain_id })) # check all chains have same wildtype at each position mismatched_wt = (df.groupby('pdb_site', sort=False).aggregate({ 'wildtype': 'nunique' }).reset_index().query('wildtype != 1')['pdb_site'].tolist()) if mismatched_wt: raise ValueError(f"{pdbfile} chains {', '.join(chain_ids)} differ in " f"wildtype at sites:\n{', '.join(mismatched_wt)}") if chain_identity == 'require_same': not_exact = (df.groupby( ['pdb_site', 'wildtype'], sort=False).aggregate({ 'chain': 'count' }).reset_index().query(f"chain != {len(chain_ids)}") ['pdb_site'].tolist()) if not_exact: raise ValueError(f"`chain_identity` is {chain_identity}, but " f"{pdbfile} chains {', '.join(chain_ids)} " f"don't all have sites:\n{', '.join(not_exact)}") else: df = df[['pdb_site', 'wildtype']].drop_duplicates() elif chain_identity == 'union': df = (df[['pdb_site', 'wildtype']].drop_duplicates()) elif chain_identity == 'intersection': df = (df.groupby(['pdb_site', 'wildtype'], sort=False).aggregate({ 'chain': 'count' }).reset_index().query(f"chain == {len(chain_ids)}")) else: raise ValueError(f"invalid `chain_identity` {chain_identity}") # natural sort on site as here: https://stackoverflow.com/a/29582718 df = df.set_index('pdb_site') df = (df.reindex(index=natsort.natsorted(df.index)).reset_index().assign( sequential=lambda x: x.reset_index().index + 1)[[ 'sequential', 'pdb_site', 'wildtype' ]]) return df
def parse_xml(filename, hostname): mini_graph_path, root = reconcile_mini_graph_locations(filename, hostname) u_neighbors = None u_devices = None hwsku = None bgp_sessions = None bgp_asn = None intfs = None vlan_intfs = None pc_intfs = None vlans = None pcs = None mgmt_intf = None lo_intf = None neighbors = None devices = None hostname = None syslog_servers = [] dhcp_servers = [] ntp_servers = [] mgmt_routes = [] bgp_peers_with_range = [] deployment_id = None hwsku_qn = QName(ns, "HwSku") hostname_qn = QName(ns, "Hostname") for child in root: if child.tag == str(hwsku_qn): hwsku = child.text if child.tag == str(hostname_qn): hostname = child.text # port_alias_map maps ngs port name to sonic port name if hwsku == "Force10-S6000": for i in range(0, 128, 4): port_alias_map["fortyGigE0/%d" % i] = "Ethernet%d" % i elif hwsku == "Force10-S6100": for i in range(0, 4): for j in range(0, 16): port_alias_map["fortyGigE1/%d/%d" % (i + 1, j + 1)] = "Ethernet%d" % (i * 16 + j) elif hwsku == "Arista-7050-QX32": for i in range(1, 25): port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) for i in range(25, 33): port_alias_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 1) * 4) elif hwsku == "Arista-7050-QX-32S": for i in range(6, 29): port_alias_map["Ethernet%d/1" % i] = "Ethernet%d" % ((i - 1) * 4) for i in range(1, 5) + range(29, 37): port_alias_map["Ethernet%d" % i] = "Ethernet%d" % ((i - 1) * 4) else: for i in range(0, 128, 4): port_alias_map["Ethernet%d" % i] = "Ethernet%d" % i for child in root: if child.tag == str(QName(ns, "DpgDec")): (intfs, lo_intfs, mgmt_intf, vlans, pcs, acls) = parse_dpg(child, hostname) elif child.tag == str(QName(ns, "CpgDec")): (bgp_sessions, bgp_asn, bgp_peers_with_range) = parse_cpg(child, hostname) elif child.tag == str(QName(ns, "PngDec")): (neighbors, devices, console_dev, console_port, mgmt_dev, mgmt_port) = parse_png(child, hostname) elif child.tag == str(QName(ns, "UngDec")): (u_neighbors, u_devices, _, _, _, _) = parse_png(child, hostname) elif child.tag == str(QName(ns, "MetadataDeclaration")): (syslog_servers, dhcp_servers, ntp_servers, mgmt_routes, deployment_id) = parse_meta(child, hostname) # Create port index map. Since we currently output a mix of NGS names # and SONiC mapped names, we include both in this map. # SONiC aliases, when sorted in natural sort order, match the phyical port # index order, so we sort by SONiC port alias, and map # back to NGS names after sorting using this inverted map # # TODO: Move all alias-related code out of minigraph_facts.py and into # its own module to be used as another layer after parsing the minigraph. inverted_port_alias_map = {v: k for k, v in port_alias_map.iteritems()} # Start by creating a list of all port aliases port_alias_list = [] for k, v in port_alias_map.iteritems(): port_alias_list.append(v) # Sort the list in natural order port_alias_list_sorted = natsorted(port_alias_list) # Create map from SONiC alias to physical index and NGS name to physical index port_index_map = {} for idx, val in enumerate(port_alias_list_sorted): port_index_map[val] = idx port_index_map[inverted_port_alias_map[val]] = idx # Create maps: # from SONiC phy iface name to NGS phy iface name # from NGS phy iface name to SONiC phy iface name # These maps include mappings from original name to original name too iface_map_sonic_to_ngs = {} iface_map_ngs_to_sonic = {} for val in port_alias_list_sorted: iface_map_sonic_to_ngs[val] = inverted_port_alias_map[val] iface_map_sonic_to_ngs[ inverted_port_alias_map[val]] = inverted_port_alias_map[val] iface_map_ngs_to_sonic[inverted_port_alias_map[val]] = val iface_map_ngs_to_sonic[val] = val # Generate results Tree = lambda: defaultdict(Tree) results = Tree() results['minigraph_hwsku'] = hwsku # sorting by lambdas are not easily done without custom filters. # TODO: add jinja2 filter to accept a lambda to sort a list of dictionaries by attribute. # TODO: alternatively (preferred), implement class containers for multiple-attribute entries, enabling sort by attr results['minigraph_bgp'] = sorted(bgp_sessions, key=lambda x: x['addr']) results['minigraph_bgp_asn'] = bgp_asn results['minigraph_bgp_peers_with_range'] = bgp_peers_with_range # TODO: sort does not work properly on all interfaces of varying lengths. Need to sort by integer group(s). phyport_intfs = [] vlan_intfs = [] pc_intfs = [] for intf in intfs: intfname = intf['attachto'] if intfname[0:4] == 'Vlan': vlan_intfs.append(intf) elif intfname[0:11] == 'PortChannel': pc_intfs.append(intf) else: phyport_intfs.append(intf) results['minigraph_interfaces'] = sorted(phyport_intfs, key=lambda x: x['attachto']) results['minigraph_vlan_interfaces'] = sorted(vlan_intfs, key=lambda x: x['attachto']) results['minigraph_portchannel_interfaces'] = sorted( pc_intfs, key=lambda x: x['attachto']) results['minigraph_ports'] = ports results['minigraph_vlans'] = vlans results['minigraph_portchannels'] = pcs results['minigraph_mgmt_interface'] = mgmt_intf results['minigraph_lo_interfaces'] = lo_intfs results['minigraph_acls'] = acls results['minigraph_neighbors'] = neighbors results['minigraph_devices'] = devices results['minigraph_underlay_neighbors'] = u_neighbors results['minigraph_underlay_devices'] = u_devices results['minigraph_port_indices'] = port_index_map results['minigraph_map_sonic_to_ngs'] = iface_map_sonic_to_ngs results['minigraph_map_ngs_to_sonic'] = iface_map_ngs_to_sonic results['minigraph_as_xml'] = mini_graph_path if devices != None: results['minigraph_console'] = get_console_info( devices, console_dev, console_port) results['minigraph_mgmt'] = get_mgmt_info(devices, mgmt_dev, mgmt_port) results['minigraph_hostname'] = hostname results['inventory_hostname'] = hostname results['syslog_servers'] = syslog_servers results['dhcp_servers'] = dhcp_servers results['ntp_servers'] = ntp_servers results['forced_mgmt_routes'] = mgmt_routes results['deployment_id'] = deployment_id return results
def calculate_metrics(self): SSIM = np.zeros([len(self.phantoms), 4]) DSC = np.zeros([len(self.phantoms), 4 * (self.classes - 1)]) MSE = np.zeros([len(self.phantoms), 4]) PSNR = np.zeros([len(self.phantoms), 4]) SSIM_ROI = np.zeros([len(self.phantoms), 4]) DSC_ROI = np.zeros([len(self.phantoms), 4 * (self.classes - 1)]) MSE_ROI = np.zeros([len(self.phantoms), 4]) PSNR_ROI = np.zeros([len(self.phantoms), 4]) ssim_per_slice = np.zeros([len(self.it), 4, 501]) mse_per_slice = np.zeros([len(self.it), 4, 501]) dsc_per_slice = np.zeros([len(self.it), 4 * (self.classes - 1), 501]) psnr_per_slice = np.zeros([len(self.it), 4, 501]) for z, phantom in enumerate(self.phantoms): for it_idx, ii in enumerate(self.it): print(phantom, 'Phantom') gold_standard_imgs = natsorted( glob( '/bigstore/felix/Walnuts/Walnut{}/Reconstructions/nnls_pos123*' .format(phantom))) input_imgs = natsorted( glob( '/bigstore/felix/Walnuts/Walnut{}/Reconstructions/fdk_pos{}*' .format(phantom, self.pos))) ir_imgs = natsorted( glob( '/bigstore/jordi/Walnuts/IR_reconstruction/Walnut{}/Reconstructions/nnls_pos1*' .format(phantom))) rad2ax_imgs = natsorted( glob(self.path_to_rad2ax_imgs + '{}_pos{}_it{}_depth{}_phantom{}/*'.format( self.architecture, self.pos, ii, self.depth, phantom))) it_hor = 2 #needs to be adjusted based on reconstructions hor_imgs = natsorted( glob(self.path_to_hor_imgs + '{}_pos{}_it{}_*_phantom{}/*'.format( self.architecture, self.pos, it_hor, phantom))) assert len(gold_standard_imgs) == len(input_imgs) == len( rad2ax_imgs) == len(hor_imgs) == len(ir_imgs) inp_vol = np.zeros([len(input_imgs), 501, 501]) gs = np.zeros([len(input_imgs), 501, 501]) ir_vol = np.zeros([len(input_imgs), 501, 501]) rad2ax_vol = np.zeros([len(input_imgs), 501, 501]) horizontal_vol = np.zeros([len(input_imgs), 501, 501]) imgs = [input_imgs, ir_imgs, hor_imgs, rad2ax_imgs] volumes = [inp_vol, ir_vol, horizontal_vol, rad2ax_vol] for j, img in enumerate(imgs): print(j, 'j', len(img)) # Calculate metrics per slice for jj, gs_img in enumerate(gold_standard_imgs): image = img[jj] if self.compute_ssim == True: ssim_per_slice[it_idx, j, jj] = self.ssim(img[jj], gs_img) if self.compute_mse == True: mse_per_slice[it_idx, j, jj] = self.mse(img[jj], gs_img) if self.compute_dsc == True: dsc_per_slice[it_idx, j * (self.classes - 1):(j + 1) * (self.classes - 1), jj] = self.dsc( self.otsu(img[jj]), self.otsu(gs_img) ) # we get 2 outputs for 3 clasess if self.compute_psnr == True: psnr_per_slice[it_idx, j, jj] = self.psnr(img[jj], gs_img) # Create volumes inp_vol[jj, :, :] = imread(input_imgs[jj]) gs[jj, :, :] = imread(gold_standard_imgs[jj]) ir_vol[jj, :, :] = imread(ir_imgs[jj]) rad2ax_vol[jj, :, :] = imread(rad2ax_imgs[jj]) horizontal_vol[jj, :, :] = imread(hor_imgs[jj]) # Calculate metrics for volumes as a whole for k, vol in enumerate(volumes): if self.compute_ssim == True: SSIM[z, k] = self.ssim(vol, gs) SSIM_ROI[z, k] = self.ssim(vol, gs, roi=self.ROI) if self.compute_mse == True: MSE[z, k] = self.mse(vol, gs) MSE_ROI[z, k] = self.mse(vol, gs, roi=self.ROI) if self.compute_dsc == True: DSC[z, k * (self.classes - 1):(k + 1) * (self.classes - 1)] = self.dsc( self.otsu(vol, num_classes=self.classes), self.otsu(gs, num_classes=self.classes)) print(DSC.shape, 'shape DSC') DSC_ROI[z, k * (self.classes - 1):(k + 1) * (self.classes - 1)] = self.dsc( self.otsu(vol, num_classes=self.classes), self.otsu(gs, num_classes=self.classes), roi=self.ROI) if self.compute_psnr == True: PSNR[z, k] = self.psnr(vol, gs) PSNR_ROI[z, k] = self.psnr(vol, gs, roi=self.ROI) print('Processed iteration {}'.format(ii)) #save selected slices, optional slices = [100] if len(slices) > 0: for slice in slices: self.save_slice(phantom, slice, gold_standard_imgs, rad2ax_imgs) make_per_phantom_plots = True if make_per_phantom_plots is True: otsu_gs = self.otsu(gs, num_classes=self.classes) otsu_rad2ax = self.otsu(rad2ax_vol, num_classes=self.classes) otsu_hor = self.otsu(horizontal_vol, num_classes=self.classes) vol = otsu_gs + otsu_rad2ax + otsu_hor vol[vol != 6] = 0 for x in range(vol.shape[0]): if np.sum(vol[x, :, :]) > 1000: x1 = x break x2 = max(np.nonzero(vol)[0]) self.plot_per_phantom(phantom, ssim_per_slice, mse_per_slice, dsc_per_slice, psnr_per_slice, x1, x2) comparisons = [ 'input vs gold_standard', 'iterative vs gold standard', 'radial2axial vs gold standard', 'horizontal vs gold standard' ] for b in range(len(self.phantoms)): for a in range(4): print( "SSIM Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), SSIM[b, a]) print( "DSC Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), DSC[b, a]) print( "MSE Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), MSE[b, a]) print( "PSNR Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), PSNR[b, a]) print( "SSIM-ROI Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), SSIM_ROI[b, a]) print( "DSC-ROI Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), DSC_ROI[b, a]) print( "MSE-ROI Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), MSE_ROI[b, a]) print( "PSNR-ROI Phantom {} - IT{} - {}:".format( self.phantoms[b], ii, comparisons[a]), PSNR_ROI[b, a]) comparisons_header = "input iterative horizontal radial2axial" #save txt files if self.compute_ssim == True: np.savetxt(self.results_path + 'SSIM_{}_it{}.txt'.format(self.architecture, ii), SSIM, header=comparisons_header) np.savetxt(self.results_path + 'SSIM_ROI_{}_it{}.txt'.format(self.architecture, ii), SSIM_ROI, header=comparisons_header) if self.compute_mse == True: np.savetxt(self.results_path + 'MSE_{}_it{}.txt'.format(self.architecture, ii), MSE, header=comparisons_header) np.savetxt(self.results_path + 'MSE_ROI_{}_it{}.txt'.format(self.architecture, ii), MSE_ROI, header=comparisons_header) if self.compute_dsc == True: np.savetxt(self.results_path + 'DSC_{}_it{}.txt'.format(self.architecture, ii), DSC, header=comparisons_header) np.savetxt(self.results_path + 'DSC_ROI_{}_it{}.txt'.format(self.architecture, ii), DSC_ROI, header=comparisons_header) np.savetxt(self.results_path + 'DSC_low_{}_it{}.txt'.format(self.architecture, ii), DSC[:, ::2], header=comparisons_header) np.savetxt(self.results_path + 'DSC_ROI_low_{}_it{}.txt'.format(self.architecture, ii), DSC_ROI[:, ::2], header=comparisons_header) np.savetxt(self.results_path + 'DSC_high_{}_it{}.txt'.format(self.architecture, ii), DSC[:, 1::2], header=comparisons_header) np.savetxt( self.results_path + 'DSC_ROI_high_{}_it{}.txt'.format(self.architecture, ii), DSC_ROI[:, 1::2], header=comparisons_header) if self.compute_psnr == True: np.savetxt(self.results_path + 'PSNR_{}_it{}.txt'.format(self.architecture, ii), PSNR, header=comparisons_header) np.savetxt(self.results_path + 'PSNR_ROI_{}_it{}.txt'.format(self.architecture, ii), PSNR_ROI, header=comparisons_header) DSC_low = DSC[:, 6] DSC_ROI_low = DSC_ROI[:, 6] DSC_high = DSC[:, 7] DSC_ROI_high = DSC_ROI[:, 7] return (SSIM[:, 3][0], SSIM_ROI[:, 3][0], MSE[:, 3][0], MSE_ROI[:, 3][0], DSC_low[0], DSC_ROI_low[0], DSC_high[0], DSC_ROI_high[0], PSNR[:, 3][0], PSNR_ROI[:, 3][0])
def run(args): import os import sys import logging import pandas as pd import numpy as np from natsort import natsorted from evo.tools import file_interface, user, settings from evo.tools.settings import SETTINGS settings.configure_logging(args.verbose, args.silent, args.debug) if args.debug: import pprint logging.debug("main_parser config:\n" + pprint.pformat({arg: getattr(args, arg) for arg in vars(args)}) + "\n") # store data in Pandas data frames for easier analysis raw_df = pd.DataFrame() stat_df = pd.DataFrame() info_df = pd.DataFrame() use_seconds = False for result_file in args.result_files: logging.debug(SEP) result_obj = file_interface.load_res_file(result_file, True) short_est_name = os.path.splitext(os.path.basename(result_obj.info["est_name"]))[0] error_array = result_obj.np_arrays["error_array"] if "seconds_from_start" in result_obj.np_arrays: seconds_from_start = result_obj.np_arrays["seconds_from_start"] else: seconds_from_start = None if not args.no_warnings and (short_est_name in info_df.columns): logging.warning("double entry detected: " + short_est_name) if not user.confirm("ignore? enter 'y' to go on or any other key to quit"): sys.exit() if SETTINGS.plot_usetex: short_est_name = short_est_name.replace("_", "\\_") if args.use_abs_time: if "timestamps" in result_obj.np_arrays: index = result_obj.np_arrays["timestamps"] use_seconds = True else: raise RuntimeError("no timestamps found for --use_abs_time") elif seconds_from_start is not None: index = seconds_from_start.tolist() use_seconds = True else: index = np.arange(0, error_array.shape[0]) result_obj.info["traj. backup?"] = \ all(k in result_obj.trajectories for k in ("traj_ref", "traj_est")) result_obj.info["res_file"] = result_file new_raw_df = pd.DataFrame({short_est_name: error_array.tolist()}, index=index) duplicates = new_raw_df.index.get_level_values(0).get_duplicates() if len(duplicates) != 0: logging.warning("duplicate indices in error array of {} - " "keeping only first occurrence of duplicates".format(result_file)) new_raw_df.drop_duplicates(keep="first", inplace=True) new_info_df = pd.DataFrame({short_est_name: result_obj.info}) new_stat_df = pd.DataFrame({short_est_name: result_obj.stats}) # natural sort num strings "10" "100" "20" -> "10" "20" "100" new_stat_df = new_stat_df.reindex(index=natsorted(new_stat_df.index)) # column-wise concatenation raw_df = pd.concat([raw_df, new_raw_df], axis=1) info_df = pd.concat([info_df, new_info_df], axis=1) stat_df = pd.concat([stat_df, new_stat_df], axis=1) # if verbose: log infos of the current data logging.debug("\n" + result_obj.pretty_str(title=True, stats=False, info=True)) logging.debug(SEP) logging.info("\nstatistics overview:\n" + stat_df.T.to_string(line_width=80) + "\n") # check titles first_title = info_df.ix["title", 0] first_res_file = info_df.ix["res_file", 0] if args.save_table or args.plot or args.save_plot: for short_est_name, column in info_df.iteritems(): if column.ix["title"] != first_title and not args.no_warnings: logging.info(SEP) msg = ("mismatching titles, you probably use data from different metrics" + "\nconflict:\n{} {}\n{}\n".format("<"*7, first_res_file, first_title) + "{}\n{}\n".format("="*7, column.ix["title"]) + "{} {}\n\n".format(">"*7, column.ix["res_file"]) + "only the first one will be used as the title!") logging.warning(msg) if not user.confirm("plot/save anyway? - enter 'y' or any other key to exit"): sys.exit() if args.save_table: logging.debug(SEP) if args.no_warnings or user.check_and_confirm_overwrite(args.save_table): table_fmt = SETTINGS.table_export_format if SETTINGS.table_export_transpose: getattr(stat_df.T, "to_" + table_fmt)(args.save_table) else: getattr(stat_df, "to_" + table_fmt)(args.save_table) logging.debug(table_fmt + " table saved to: " + args.save_table) if args.plot or args.save_plot or args.serialize_plot: # check if data has NaN "holes" due to different indices inconsistent = raw_df.isnull().values.any() if inconsistent and not args.no_warnings: logging.debug(SEP) logging.warning("data lengths/indices are not consistent, plotting could make no sense") from evo.tools import plot import matplotlib.pyplot as plt import seaborn as sns import math from scipy import stats # use default plot settings figsize = (SETTINGS.plot_figsize[0], SETTINGS.plot_figsize[1]) use_cmap = SETTINGS.plot_multi_cmap.lower() != "none" colormap = SETTINGS.plot_multi_cmap if use_cmap else None linestyles = ["-o" for x in args.result_files] if args.plot_markers else None # labels according to first dataset title = first_title if "xlabel" in info_df.ix[:, 0].index: index_label = info_df.ix["xlabel", 0] else: index_label = "$t$ (s)" if use_seconds else "index" metric_label = info_df.ix["label", 0] plot_collection = plot.PlotCollection(title) # raw value plot fig_raw = plt.figure(figsize=figsize) # handle NaNs from concat() above raw_df.interpolate(method="index").plot(ax=fig_raw.gca(), colormap=colormap, style=linestyles, title=first_title) plt.xlabel(index_label) plt.ylabel(metric_label) plt.legend(frameon=True) plot_collection.add_figure("raw", fig_raw) # statistics plot fig_stats = plt.figure(figsize=figsize) exclude = stat_df.index.isin(["sse"]) # don't plot sse stat_df[~exclude].plot(kind="barh", ax=fig_stats.gca(), colormap=colormap, stacked=False) plt.xlabel(metric_label) plt.legend(frameon=True) plot_collection.add_figure("stats", fig_stats) # grid of distribution plots raw_tidy = pd.melt(raw_df, value_vars=list(raw_df.columns.values), var_name="estimate", value_name=metric_label) col_wrap = 2 if len(args.result_files) <= 2 else math.ceil(len(args.result_files) / 2.0) dist_grid = sns.FacetGrid(raw_tidy, col="estimate", col_wrap=col_wrap) dist_grid.map(sns.distplot, metric_label) # fits=stats.gamma plot_collection.add_figure("histogram", dist_grid.fig) # box plot fig_box = plt.figure(figsize=figsize) ax = sns.boxplot(x=raw_tidy["estimate"], y=raw_tidy[metric_label], ax=fig_box.gca()) # ax.set_xticklabels(labels=[item.get_text() for item in ax.get_xticklabels()], rotation=30) plot_collection.add_figure("box_plot", fig_box) # violin plot fig_violin = plt.figure(figsize=figsize) ax = sns.violinplot(x=raw_tidy["estimate"], y=raw_tidy[metric_label], ax=fig_violin.gca()) # ax.set_xticklabels(labels=[item.get_text() for item in ax.get_xticklabels()], rotation=30) plot_collection.add_figure("violin_histogram", fig_violin) if args.plot: plot_collection.show() if args.save_plot: logging.debug(SEP) plot_collection.export(args.save_plot, confirm_overwrite=not args.no_warnings) if args.serialize_plot: logging.debug(SEP) plot_collection.serialize(args.serialize_plot, confirm_overwrite=not args.no_warnings)
susceptibility_exp = [] dimensionality_ = [] degre_ = [] max_degre_ = [] path_simulation_output = ['/home/brainlab/Desktop/Rudas/Data/Ising/new_experiment/simulation/000_density_20'] #'/home/brainlab/Desktop/Rudas/Data/Ising/experiment_2/simulation/13_undirected_unweighted_0.8'] # '/home/brainlab/Desktop/Rudas/Data/Ising/experiment_2/simulation/3_undirected_unweighted', # '/home/brainlab/Desktop/Rudas/Data/Ising/experiment_2/simulation/4_undirected_weighted'] sizes_ = np.linspace(0.05, 100, num=19).astype(np.int16) for path in path_simulation_output: print(path) dimensionality_exp = [] for simulation in natsorted(os.listdir(path)): path_simulation = path + '/' + simulation if os.path.isdir(path_simulation): print() print(simulation) print() pkl_file = open(path_simulation + '/parameters.pkl', 'rb') simulation_parameters = pickle.load(pkl_file) pkl_file.close() ts = np.linspace(simulation_parameters['temperature_parameters'][0], simulation_parameters['temperature_parameters'][1], simulation_parameters['temperature_parameters'][2])
def _do_post_list(start=None, stop=None, reverse=False, tags=None, categories=None, slugs=None, post_type='post', show_all=False, lang=None, template='post_list_directive.tmpl', sort=None, id=None, data=None, state=None, site=None): if lang is None: lang = utils.LocaleBorg().current_lang if site.invariant: # for testing purposes post_list_id = id or 'post_list_' + 'fixedvaluethatisnotauuid' else: post_list_id = id or 'post_list_' + uuid.uuid4().hex filtered_timeline = [] posts = [] step = -1 if reverse is None else None # TODO: remove in v8 if show_all is None: timeline = [p for p in site.timeline] elif post_type == 'page': timeline = [p for p in site.timeline if not p.use_in_feeds] elif post_type == 'all': timeline = [p for p in site.timeline] else: # post timeline = [p for p in site.timeline if p.use_in_feeds] # TODO: replaces show_all, uncomment in v8 # if post_type == 'page': # timeline = [p for p in site.timeline if not p.use_in_feeds] # elif post_type == 'all': # timeline = [p for p in site.timeline] # else: # post # timeline = [p for p in site.timeline if p.use_in_feeds] if categories: timeline = [ p for p in timeline if p.meta('category', lang=lang).lower() in categories ] for post in timeline: if tags: cont = True tags_lower = [t.lower() for t in post.tags] for tag in tags: if tag in tags_lower: cont = False if cont: continue filtered_timeline.append(post) if sort: filtered_timeline = natsort.natsorted( filtered_timeline, key=lambda post: post.meta[lang][sort], alg=natsort.ns.F | natsort.ns.IC) for post in filtered_timeline[start:stop:step]: if slugs: cont = True for slug in slugs: if slug == post.meta('slug'): cont = False if cont: continue bp = post.translated_base_path(lang) if os.path.exists(bp) and state: state.document.settings.record_dependencies.add(bp) posts += [post] if not posts: return [] template_data = { 'lang': lang, 'posts': posts, # Need to provide str, not TranslatableSetting (Issue #2104) 'date_format': site.GLOBAL_CONTEXT.get('date_format')[lang], 'post_list_id': post_list_id, 'messages': site.MESSAGES, } output = site.template_system.render_template(template, None, template_data) return output
def handler(self, start=None, stop=None, reverse=False, tags=None, require_all_tags=False, categories=None, sections=None, slugs=None, post_type='post', type=False, lang=None, template='post_list_directive.tmpl', sort=None, id=None, data=None, state=None, site=None, date=None, filename=None, post=None): """Generate HTML for post-list.""" if lang is None: lang = utils.LocaleBorg().current_lang if site.invariant: # for testing purposes post_list_id = id or 'post_list_' + 'fixedvaluethatisnotauuid' else: post_list_id = id or 'post_list_' + uuid.uuid4().hex # Get post from filename if available if filename: self_post = site.post_per_input_file.get(filename) else: self_post = None if self_post: self_post.register_depfile("####MAGIC####TIMELINE", lang=lang) # If we get strings for start/stop, make them integers if start is not None: start = int(start) if stop is not None: stop = int(stop) # Parse tags/categories/sections/slugs (input is strings) categories = [c.strip().lower() for c in categories.split(',')] if categories else [] sections = [s.strip().lower() for s in sections.split(',')] if sections else [] slugs = [s.strip() for s in slugs.split(',')] if slugs else [] filtered_timeline = [] posts = [] step = None if reverse is False else -1 if type is not False: post_type = type if post_type == 'page' or post_type == 'pages': timeline = [p for p in site.timeline if not p.use_in_feeds] elif post_type == 'all': timeline = [p for p in site.timeline] else: # post timeline = [p for p in site.timeline if p.use_in_feeds] # self_post should be removed from timeline because this is redundant timeline = [p for p in timeline if p.source_path != filename] if categories: timeline = [ p for p in timeline if p.meta('category', lang=lang).lower() in categories ] if sections: timeline = [ p for p in timeline if p.section_name(lang).lower() in sections ] if tags: tags = {t.strip().lower() for t in tags.split(',')} if require_all_tags: compare = set.issubset else: compare = operator.and_ for post in timeline: post_tags = {t.lower() for t in post.tags} if compare(tags, post_tags): filtered_timeline.append(post) else: filtered_timeline = timeline if sort: filtered_timeline = natsort.natsorted( filtered_timeline, key=lambda post: post.meta[lang][sort], alg=natsort.ns.F | natsort.ns.IC) if date: _now = utils.current_time() filtered_timeline = [ p for p in filtered_timeline if date_in_range(utils.html_unescape(date), p.date, now=_now) ] for post in filtered_timeline[start:stop:step]: if slugs: cont = True for slug in slugs: if slug == post.meta('slug'): cont = False if cont: continue bp = post.translated_base_path(lang) if os.path.exists(bp) and state: state.document.settings.record_dependencies.add(bp) elif os.path.exists(bp) and self_post: self_post.register_depfile(bp, lang=lang) posts += [post] if not posts: return '', [] template_deps = site.template_system.template_deps(template) if state: # Register template as a dependency (Issue #2391) for d in template_deps: state.document.settings.record_dependencies.add(d) elif self_post: for d in template_deps: self_post.register_depfile(d, lang=lang) template_data = { 'lang': lang, 'posts': posts, # Need to provide str, not TranslatableSetting (Issue #2104) 'date_format': site.GLOBAL_CONTEXT.get('date_format')[lang], 'post_list_id': post_list_id, 'messages': site.MESSAGES, '_link': site.link, } output = site.template_system.render_template(template, None, template_data) return output, template_deps
def __init__( self, source_path, config, destination, use_in_feeds, messages, template_name, compiler, destination_base=None, metadata_extractors_by=None ): """Initialize post. The source path is the user created post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. destination_base must be None or a TranslatableSetting instance. If specified, it will be prepended to the destination path. """ self.config = config self.compiler = compiler self.compiler_contexts = {} self.compile_html = self.compiler.compile self.demote_headers = self.compiler.demote_headers and self.config['DEMOTE_HEADERS'] tzinfo = self.config['__tzinfo__'] if self.config['FUTURE_IS_NOW']: self.current_time = None else: self.current_time = current_time(tzinfo) self.translated_to = set([]) self._prev_post = None self._next_post = None self.base_url = self.config['BASE_URL'] self.is_draft = False self.is_private = False self.strip_indexes = self.config['STRIP_INDEXES'] self.index_file = self.config['INDEX_FILE'] self.pretty_urls = self.config['PRETTY_URLS'] self.source_path = source_path # posts/blah.txt self.post_name = os.path.splitext(source_path)[0] # posts/blah _relpath = os.path.relpath(self.post_name) if _relpath != self.post_name: self.post_name = _relpath.replace('..' + os.sep, '_..' + os.sep) # cache[\/]posts[\/]blah.html self.base_path = os.path.join(self.config['CACHE_FOLDER'], self.post_name + ".html") # cache/posts/blah.html self._base_path = self.base_path.replace('\\', '/') self.metadata_path = self.post_name + ".meta" # posts/blah.meta self.folder_relative = destination self.folder_base = destination_base self.default_lang = self.config['DEFAULT_LANG'] self.translations = self.config['TRANSLATIONS'] self.messages = messages self.skip_untranslated = not self.config['SHOW_UNTRANSLATED_POSTS'] self._template_name = template_name self.is_two_file = True self._reading_time = None self._remaining_reading_time = None self._paragraph_count = None self._remaining_paragraph_count = None self._dependency_file_fragment = defaultdict(list) self._dependency_file_page = defaultdict(list) self._dependency_uptodate_fragment = defaultdict(list) self._dependency_uptodate_page = defaultdict(list) self._depfile = defaultdict(list) if metadata_extractors_by is None: self.metadata_extractors_by = {'priority': {}, 'source': {}} else: self.metadata_extractors_by = metadata_extractors_by # Load internationalized metadata for lang in self.translations: if os.path.isfile(get_translation_candidate(self.config, self.source_path, lang)): self.translated_to.add(lang) default_metadata, default_used_extractor = get_meta(self, lang=None) self.meta = Functionary(lambda: None, self.default_lang) self.used_extractor = Functionary(lambda: None, self.default_lang) self.meta[self.default_lang] = default_metadata self.used_extractor[self.default_lang] = default_used_extractor for lang in self.translations: if lang != self.default_lang: meta = defaultdict(lambda: '') meta.update(default_metadata) _meta, _extractors = get_meta(self, lang) meta.update(_meta) self.meta[lang] = meta self.used_extractor[lang] = _extractors if not self.is_translation_available(self.default_lang): # Special case! (Issue #373) # Fill default_metadata with stuff from the other languages for lang in sorted(self.translated_to): default_metadata.update(self.meta[lang]) # Compose paths if self.folder_base is not None: # Use translatable destination folders self.folders = {} for lang in self.config['TRANSLATIONS'].keys(): if os.path.isabs(self.folder_base(lang)): # Issue 2982 self.folder_base[lang] = os.path.relpath(self.folder_base(lang), '/') self.folders[lang] = os.path.normpath(os.path.join(self.folder_base(lang), self.folder_relative)) else: # Old behavior (non-translatable destination path, normalized by scanner) self.folders = {lang: self.folder_relative for lang in self.config['TRANSLATIONS'].keys()} self.folder = self.folders[self.default_lang] # Load data field from metadata self.data = Functionary(lambda: None, self.default_lang) for lang in self.translations: if self.meta[lang].get('data') is not None: self.data[lang] = utils.load_data(self.meta[lang]['data']) if 'date' not in default_metadata and not use_in_feeds: # For pages we don't *really* need a date if self.config['__invariant__']: default_metadata['date'] = datetime.datetime(2013, 12, 31, 23, 59, 59, tzinfo=tzinfo) else: default_metadata['date'] = datetime.datetime.utcfromtimestamp( os.stat(self.source_path).st_ctime).replace(tzinfo=dateutil.tz.tzutc()).astimezone(tzinfo) # If time zone is set, build localized datetime. try: self.date = to_datetime(self.meta[self.default_lang]['date'], tzinfo) except ValueError: if not self.meta[self.default_lang]['date']: msg = 'Missing date in file {}'.format(source_path) else: msg = "Invalid date '{0}' in file {1}".format(self.meta[self.default_lang]['date'], source_path) LOGGER.error(msg) raise ValueError(msg) if 'updated' not in default_metadata: default_metadata['updated'] = default_metadata.get('date', None) self.updated = to_datetime(default_metadata['updated'], tzinfo) if 'title' not in default_metadata or 'slug' not in default_metadata \ or 'date' not in default_metadata: raise ValueError("You must set a title (found '{0}'), a slug (found '{1}') and a date (found '{2}')! " "[in file {3}]".format(default_metadata.get('title', None), default_metadata.get('slug', None), default_metadata.get('date', None), source_path)) if 'type' not in default_metadata: # default value is 'text' default_metadata['type'] = 'text' self.publish_later = False if self.current_time is None else self.date >= self.current_time is_draft = False is_private = False self._tags = {} for lang in self.translated_to: if isinstance(self.meta[lang]['tags'], (list, tuple, set)): _tag_list = self.meta[lang]['tags'] else: _tag_list = self.meta[lang]['tags'].split(',') self._tags[lang] = natsort.natsorted( list(set([x.strip() for x in _tag_list])), alg=natsort.ns.F | natsort.ns.IC) self._tags[lang] = [t for t in self._tags[lang] if t] if 'draft' in [_.lower() for _ in self._tags[lang]]: is_draft = True LOGGER.debug('The post "{0}" is a draft.'.format(self.source_path)) self._tags[lang].remove('draft') if 'private' in self._tags[lang]: is_private = True LOGGER.debug('The post "{0}" is private.'.format(self.source_path)) self._tags[lang].remove('private') # While draft comes from the tags, it's not really a tag self.is_draft = is_draft self.is_private = is_private self.is_post = use_in_feeds self.use_in_feeds = use_in_feeds and not is_draft and not is_private \ and not self.publish_later # Allow overriding URL_TYPE via meta # The check is done here so meta dicts won’t change inside of # generic_post_rendere self.url_type = self.meta('url_type') or None # Register potential extra dependencies self.compiler.register_extra_dependencies(self)
''' fig, ax = plt.subplots(rows, cols, figsize=[12, 12]) for i in range(rows * cols): ind = start_with + i * show_every ax[int(i / rows), int(i % rows)].set_title('slice %d' % ind) ax[int(i / rows), int(i % rows)].imshow(stack[ind], cmap='gray') ax[int(i / rows), int(i % rows)].axis('off') plt.show() rootFolder = "/Users/yuan_pengyu/Downloads/IncidentalLungCTs_sample/" CTlist = [ i for i in os.listdir(rootFolder) if os.path.isdir(os.path.join(rootFolder, i)) ] CTlist = natsorted(CTlist) CTinfo = [] no_CTscans = [] matchMoreThanOne = [] matches = ["LUNG", "lung"] for CTscanId in CTlist: imgFolder = os.path.join(rootFolder, CTscanId, "CT_data") sliceList = natsorted(os.listdir(imgFolder)) # Distribute all slices to different series seriesDict = defaultdict(list) for sliceID in sliceList: sliceDicom = dicom.read_file(os.path.join(imgFolder, sliceID)) series = sliceDicom.SeriesDescription seriesDict[series].append(sliceDicom) patientID = sliceDicom.PatientID
def get_remaining(self): with self._lock: return natsorted(self.config_set - self.completed_set)
def main(dname, out_dir): # prepare out_dir.mkdir(parents=True, exist_ok=True) df = pd.read_csv(dname / 'measures.csv') # print statistics print(df.groupby('method').count()) print(df.groupby(['method'])['roc_auc'].median()) print(df.groupby(['method'])['roc_auc'].std()) # aggregated plot fig, ax = plt.subplots(figsize=(8, 6)) sns.boxplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor']) for patch in ax.artists: r, g, b, a = patch.get_facecolor() patch.set_facecolor((r, g, b, 0.3)) sns.stripplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor']) statannot.add_stat_annotation( ax, data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor'], box_pairs=[('dce', 'cor'), ('dce', 'pcor')], test='Wilcoxon', text_format='simple', loc='outside', verbose=2, ) ax.set_xlabel('Method') ax.set_ylabel('ROC-AUC') fig.tight_layout() fig.savefig(out_dir / 'method_comparison.pdf') # stratified plot g = sns.catplot( data=df, x='method', y='roc_auc', hue='perturbed_gene', row='treatment', kind='box', hue_order=natsorted(df['perturbed_gene'].unique()), aspect=2, ) g.map(lambda **kwargs: plt.axhline( 0.5, ls='dashed', color='gray', alpha=1, zorder=-1)) g.set_axis_labels('Method', 'ROC-AUC') g._legend.set_title('Perturbed gene(s)') g.savefig(out_dir / 'method_comparison_stratified.pdf')
def indexing(self): """ create anns structure based on textline json """ print("==> Start indexing TeeNet..") self.anns = [] self.page_counts = [] pre = 0 # for label_path in natsorted(glob.glob(os.path.join(self.root_dir, "json", "*.json"))): for label_path in natsorted(glob.glob(os.path.join(self.root_dir, "new_json1", "*.json"))): label_name = os.path.basename(label_path) pdf_name = os.path.splitext(label_name)[0] """ # checking if all image is good is_bad = False lenght_pdf = len(glob.glob(os.path.join(self.root_dir, "pdf", pdf_name[:-4], "*.png"))) for image_path_idx, image_path in enumerate(glob.glob(os.path.join(self.root_dir, "pdf", pdf_name[:-4], "*.png"))): try: image = cv2.imread(image_path) if image is None: is_bad = True tmp_h, tmp_w = image.shape[:2] except Exception as e: # ignore file if it touch any exception print(e) is_bad = True if is_bad is True: print("{}/{} BAD {}".format(image_path_idx, lenght_pdf, os.path.basename(image_path))) break print("{}/{} GOOD {}".format(image_path_idx, lenght_pdf, os.path.basename(image_path))) if is_bad is True: is_bad = False continue """ with open(label_path, "r") as label_file_ref: label = json.load(label_file_ref) if len(label) == 0: # there is nothing in this file pass else: # create list of anns for this file """ self.anns = [ { "pdf_name": "sample.pdf", "page_idx_list": [1, 2], "page_textline_list": [[..], [..], [..]] }, .. ] """ ann = { "pdf_name": pdf_name, "page_idx_list": [], "page_size_list": [], "page_textline_list": [] } # for page_idx, page_textline_list in label.items(): # for page_data in label.items(): for page_data in label: # ann["page_idx_list"].append(page_idx) # ann["page_textline_list"].append(page_textline_list) ann["page_idx_list"].append(page_data["page_num"]) ann["page_size_list"].append(page_data["page_size"]) ann["page_textline_list"].append(page_data["page_text_list"]) self.anns.append(ann) page_count = len(ann["page_idx_list"]) self.page_counts.append(pre + page_count) pre += page_count if len(self.page_counts) > 0: logger.info("we have total {} pages".format(self.page_counts[-1])) else: print("we have no pages, exit..") raise SystemExit
def load_best_checkpoint(checkpoint_dir): checkpoints = natsorted(glob(os.path.join(checkpoint_dir, "*.h5"))) if len(checkpoints) != 0: return checkpoints[-1] return None
model = tf.keras.models.load_model("VGGx2sig.model") # Reading the mask RGB image files mask_RGB = np.array([]) mask_RGB = np.empty([1, row_col_msize[0], row_col_msize[1], 3]) output = np.array([]) output = np.empty([1, 2]) print(output) print(output.shape) file_dir = [] i=0 for folder in natsort.natsorted(os.listdir(CTG_UP_DIR)): if folder.endswith('.jpg'): file_dir.append(folder) mask_RGB_info = Image.open( os.path.join (CTG_UP_DIR, folder)) RGB_temp = np.asarray(mask_RGB_info) RGB_temp = np.expand_dims(RGB_temp, axis=0) mask_RGB = np.append(mask_RGB, RGB_temp, axis = 0) output = np.append(output, [[1, 0]], axis = 0) prediction = model.predict(RGB_temp) print('Index : ', i+1 ) print('File Name : ', folder) print('Prediction Result : ', prediction) print('Ground Truth Output : [1,0]') if prediction[0][0] < prediction[0][1]:
def sort_uniq(sequence): return map(operator.itemgetter(0), itertools.groupby(natsorted(sequence)))
def reindex(pipe: Pipe): """Sort and reindex.""" pipe.matrix = pipe.matrix.reindex(index=natsorted(pipe.matrix.index))
def cut_class(label): height, width, channel = label.shape for i in range(height): for j in range(width): pixel = label[i, j].tolist() if pixel == [0, 150, 200] or pixel == [0, 200, 250]: label[i, j] = [0, 0, 200] elif pixel == [200, 200, 0]: label[i, j] = [250, 200, 0] elif pixel == [150, 250, 0] or pixel == [150, 200, 150]: label[i, j] = [0, 200, 0] elif pixel == [150, 0, 250] or pixel == [150, 150, 250]: label[i, j] = [200, 0, 200] return label GT_Path = Path("label_copy") GT_File = natsort.natsorted(list(GT_Path.glob("*.tif")), alg=natsort.PATH) GT_Str = [] for i in GT_File: GT_Str.append(str(i)) out_prefix = "out\\" for k in tqdm(range(len(GT_Str))): label_ori = load_image(GT_Str[k]) label_cut = cut_class(label_ori) label_out = cv.cvtColor(label_cut, cv.COLOR_RGB2BGR) out_str = out_prefix + Path(GT_Str[k]).stem + '.png' cv.imwrite(out_str, label_out) print('kk')