def LoadMostRecentModel(historical_snapshots_folder): print("Looking in", os.path.abspath(historical_snapshots_folder)) subfolders = GetSubfolders(historical_snapshots_folder) if len(subfolders) == 0: return None most_recent_subfolder = natsort.natsorted(subfolders)[-1] filenames = glob.glob(os.path.join(most_recent_subfolder, "*")) most_recent_filename = natsort.natsorted(filenames)[-1] return most_recent_filename
def generate_max_intensity_proj(self, image_folder_path, output_path): files = os.listdir(image_folder_path) natsort.natsorted(files, reverse=False) first_iteration = True max_index = len(files) for index in range(max_index): file = files[index] filepath = tools.make_path(image_folder_path, file) current_image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) if first_iteration: max_projection = current_image first_iteration = False else: max_projection=np.maximum(max_projection, current_image) self.data_tracker.save_and_show(image=max_projection, filename=MAX_INTENSITY_PROJ_FILENAME, caption="Max intensity projection") return max_projection
def _703_prepare_data_splits(): """ Sample fram pathes for the i3d model. :return: """ annot_dict_path = Pth( 'EPIC-Kitchens/annotations/EPIC_train_action_labels_dict.pkl') annot_idxes_many_shots_path = Pth( 'EPIC-Kitchens/annotations/annot_idxes_many_shots_noun_verb.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') annot_idxes_many_shots = utils.pkl_load(annot_idxes_many_shots_path) annot_dict = utils.pkl_load(annot_dict_path) # split_ratio split_ratio = 0.8 person_videos_dict = {} # first loop to collect all unique video ids for annot_id in annot_idxes_many_shots: annot_line = annot_dict[annot_id] person_id = annot_line[0] video_id = annot_line[1] if person_id not in person_videos_dict: person_videos_dict[person_id] = [] person_videos_dict[person_id].append(video_id) for person_id in person_videos_dict: video_names = natsort.natsorted( np.unique(person_videos_dict[person_id])) person_videos_dict[person_id] = video_names # now that we have collected the persons, and their videos, see how much videos if we split video_names_tr = [] video_names_te = [] for person_id in person_videos_dict: v_names = person_videos_dict[person_id] idx = int(len(v_names) * split_ratio) v_names_tr = v_names[:idx] v_names_te = v_names[idx:] video_names_tr += v_names_tr video_names_te += v_names_te video_names_tr = np.array(video_names_tr) video_names_te = np.array(video_names_te) print len(video_names_tr) + len(video_names_te) print len(video_names_tr) print len(video_names_te) # save video names utils.pkl_dump((video_names_tr, video_names_te), video_names_splits_path)
def Run_Params(params): print("Running params") # Setup log log = open(params.log_file_path, "a") print("first log") Log_Initialize(params, log) for epoch in range(params.epochs): log_print("epoch:", epoch, log=log) # training for data_file in natsorted(os.listdir(params.train_data_dir)): log_print("Training file:", data_file, log=log) log.close() params.data_file = data_file if test_epoch: train(params) else: with concurrent.futures.ProcessPoolExecutor() as executor: future = executor.submit(train, params) executor.shutdown(wait=True) log = open(params.log_file_path, "a") log_print("File", data_file, "is Done", log=log) # Validtion for data_file in natsorted(os.listdir(params.val_data_dir)): log_print("Validation file:", data_file, log=log) log.close() params.data_file = data_file with concurrent.futures.ProcessPoolExecutor() as executor: future = executor.submit(val, params) executor.shutdown(wait=True) log = open(params.log_file_path, "a") log_print("File", data_file, "is Done", log=log) return "Done"
def sort_and_print_entries(entries, args): """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. num_type = { 'digit': None, 'version': None, 'ver': None, 'int': int, 'float': float }[args.number_type] unsigned = not args.signed or num_type is None alg = (ns.INT * int(num_type in (int, None)) | ns.UNSIGNED * unsigned | ns.NOEXP * (not args.exp) | ns.PATH * args.paths | ns.LOCALE * args.locale) # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: inp_options = (ns.INT * int(num_type in (int, None)) | ns.UNSIGNED * unsigned | ns.NOEXP * (not args.exp), '.') regex, num_function = _regex_and_num_function_chooser[inp_options] if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [ entry for entry in entries if keep_entry_range(entry, lows, highs, num_function, regex) ] if args.reverse_filter is not None: lows, highs = ([f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter]) entries = [ entry for entry in entries if not keep_entry_range(entry, lows, highs, num_function, regex) ] if args.exclude: exclude = set(args.exclude) entries = [ entry for entry in entries if exclude_entry(entry, exclude, num_function, regex) ] # Print off the sorted results for entry in natsorted(entries, reverse=args.reverse, alg=alg): print(entry)
def get_result(f, fname): try: document = Document.from_file(f, fname=fname) except Exception: return {} records = document.records.serialize() records = natsort.natsorted( records, lambda x: x.get('labels', ['ZZZ%s' % (99 - len(x.get('names', [])))])[0]) result = { 'records': records, 'abbreviations': document.abbreviation_definitions } return result
def make_table_label_cur(optlist, optname, sel=0): TABLE_LABEL_CUR=''' <select class="" name="###NAME###"> ###OPTION### </select> ''' opt='' for k in natsorted(optlist.keys()): v = optlist[k] if int(k) == sel: opt+= '<option selected="selected">' else: opt+= '<option>' opt+= '%d: %s' % (int(k)+1, v) opt+= '</option>' tmp=TABLE_LABEL_CUR.replace('###NAME###', optname) return tmp.replace('###OPTION###', opt)
def sort_and_print_entries(entries, args): """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. is_float = args.number_type in ('float', 'real', 'f', 'r') signed = args.signed or args.number_type in ('real', 'r') alg = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP * (not args.exp) | ns.PATH * args.paths | ns.LOCALE * args.locale) # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: inp_options = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP * (not args.exp), '.' ) regex, num_function = _regex_and_num_function_chooser[inp_options] if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [entry for entry in entries if keep_entry_range(entry, lows, highs, num_function, regex)] if args.reverse_filter is not None: lows, highs = ([f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter]) entries = [entry for entry in entries if not keep_entry_range(entry, lows, highs, num_function, regex)] if args.exclude: exclude = set(args.exclude) entries = [entry for entry in entries if exclude_entry(entry, exclude, num_function, regex)] # Print off the sorted results for entry in natsorted(entries, reverse=args.reverse, alg=alg): print(entry)
def get_data(self): data_list = [] data_list = os.listdir(self.root) data_list = natsort.natsorted(data_list) data_num = int(data_list[-1].split('.')[0]) + 1 df = pd.read_csv(configs.train_label_dir, sep='\t', header=None) df.columns = ['mean', 'std', 'j1', 'j2', 'j3'] label = df.loc[:data_num - 1, 'std'].to_numpy().reshape(-1, 1) # use original data train_data_num = math.floor(len(data_list) * 0.8) if(self.train == 0): # return data_list[:train_data_num], label[:train_data_num] return data_list, label elif(self.train == 1): return data_list[train_data_num:], label[train_data_num:] elif(self.train == 2): return data_list
def LoadModels(historical_snapshots_folder, backbone, day_number=None): print("Started dual-memory modelling, looking in", historical_snapshots_folder) # If there is no "most_recent_snapshot", return None: most_recent_snapshot = LoadMostRecentModel(historical_snapshots_folder) if most_recent_snapshot is None: return None print("Most recent snapshot:", most_recent_snapshot) # Search folder for a Day-10 snapshot: # f"historical_snapshots/Day{day_number}/snapshots/" if day_number is None: match = re.search("Day(\d+)", most_recent_snapshot) if match is None: raise ValueError("Filename doesn't conform to standard") day_number = int(match.group(1)) print("Day number is:", day_number) if day_number > 10: print("Day number is greater than 10") find_day = day_number - 10 folder = os.path.join(historical_snapshots_folder, f"Day{find_day}/") filenames = glob.glob(os.path.join(folder, "*")) combine_model_filename = natsort.natsorted(filenames)[-1] print("Done") # load and combine models: # models.load_model(model_filename, backbone_name=args.backbone) all_models = [ models.load_model(most_recent_snapshot, backbone_name=backbone), models.load_model(combine_model_filename, backbone_name=backbone) ] return all_models else: return [ models.load_model(most_recent_snapshot, backbone_name=backbone) ]
def sort_and_print_entries(entries, args): """Sort the entries, applying the filters first if necessary.""" # Extract the proper number type. is_float = args.number_type in ('float', 'real', 'f', 'r') signed = args.signed or args.number_type in ('real', 'r') alg = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP * (not args.exp) | ns.PATH * args.paths | ns.LOCALE * args.locale) # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering # as for sorting. do_filter = args.filter is not None or args.reverse_filter is not None if do_filter or args.exclude: inp_options = (ns.FLOAT * is_float | ns.SIGNED * signed | ns.NOEXP * (not args.exp)) regex = _regex_chooser[inp_options] if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [ entry for entry in entries if keep_entry_range(entry, lows, highs, float, regex) ] if args.reverse_filter is not None: lows, highs = ([f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter]) entries = [ entry for entry in entries if not keep_entry_range(entry, lows, highs, float, regex) ] if args.exclude: exclude = set(args.exclude) entries = [ entry for entry in entries if exclude_entry(entry, exclude, float, regex) ] # Print off the sorted results for entry in natsorted(entries, reverse=args.reverse, alg=alg): print(entry)
#PSU machines (linux lab) base1 = os.path.expanduser('~/dev/jc2') base2 = os.path.expanduser('~/Desktop') sys.path.append(os.path.join(base1, 'Projects/net')) file_path = base2 + '/kaggle/ultrasound' train_path = file_path + '/data/train' im_path = train_path + '.csv' mask_path = file_path + '/data/train_mask.csv' from feed_forward import ff_net ################################## Read training images and masks #################################################### '''Image 19_8.tif appears to not have a mask and 19_9 seems to be missing''' os.chdir(train_path) train_all = natsort.natsorted(os.listdir()) train_im = train_all[slice(0,len(train_all),2)] train_mask = train_all[slice(1,len(train_all),2)] train_final = [(i, j) for i, j in zip(train_im, train_mask)] im_dim = misc.imread(train_im[0]).shape ims = np.zeros((np.prod(im_dim), len(train_im))) for i in range(len(train_im)): ims[:, i] = misc.imread(train_im[i]).flatten() np.savetxt(im_path, ims, delimiter=',') masks = np.zeros((np.prod(im_dim), len(train_mask))) for i in range(len(im_data)): masks[:, i] = misc.imread(train_mask[i]).flatten() np.savetxt(mask_path, masks, delimiter=',')
if __name__=="__main__": parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=0, help="The random seed.") parser.add_argument('--raw_dir', type=str, required=True, help="The directory which contains the raw xml files.") parser.add_argument('--data_dir', type=str, default="data/opensubtitles-parsed", help="The parent directory for saving parsed data.") parser.add_argument('--bert_ckpt', type=str, default="bert-base-uncased", help="The checkpoint of the BERT to load the tokenizer.") parser.add_argument('--lam', type=int, default=2, help="The lambda value for the Poisson distribution.") parser.add_argument('--num_trunc', type=int, default=20, help="The number of turns to truncate.") args = parser.parse_args() file_list = glob(f"{args.raw_dir}/xml/en/*/*/*.xml") file_list = natsort.natsorted(file_list) print(file_list) print(f"The total number of files: {len(file_list)}") if not os.path.isdir(args.data_dir): os.makedirs(args.data_dir) # Load the tokenizer config = BertConfig.from_pretrained(args.bert_ckpt) tokenizer = BertTokenizer.from_pretrained(args.bert_ckpt) vocab = tokenizer.get_vocab() args.cls_id = vocab[tokenizer.cls_token] args.sep_id = vocab[tokenizer.sep_token] args.max_len = config.max_position_embeddings random.seed(args.seed)
import imageio from os import listdir from os.path import isfile, join from natsort import natsort images = [] PATH_TO_GIF = 'color-reduction.gif' PATH_TO_IMAGES = "Images/" filenames= [f for f in listdir(PATH_TO_IMAGES) if isfile(join(PATH_TO_IMAGES, f))] filenames = natsort.natsorted(filenames,reverse=False) print(filenames) for filename in filenames: images.append(imageio.imread("Images/" + filename)) imageio.mimsave(PATH_TO_GIF, images, duration=1.5)
def get_img_paths(dir_path, wildcard='*.png'): return natsort.natsorted(glob.glob(dir_path + '/' + wildcard))
import os import json from natsort.natsort import natsorted result = [] for path, dirname, filename in os.walk('.'): json_file_list = natsorted( [file for file in filename if file.endswith('数.json')]) for json_file in json_file_list: with open(json_file, 'r', encoding='utf8') as f: dic = json.load(f) dic = {'title': json_file, **dic} result.append(dic) with open('合计结果.json', 'w', encoding='utf8') as f: f.write(json.dumps(result, ensure_ascii=False))
def extractChromosomes(samtools, normal, tumors, reference=None): # Read the names of sequences in normal BAM file normal_sq = getSQNames(samtools, normal[0]) # Extract only the names of chromosomes in standard formats ''' Adding chromosome names to two different variables, checking if they exist, and then adding them to a third variable is redundant. chrm = set() no_chrm = set() ''' chromosomes = set() for i in range(1, 23): if str(i) in normal_sq: # no_chrm.add(str(i)) chromosomes.add(str(i)) elif "chr" + str(i) in normal_sq: # chrm.add("chr" + str(i)) chromosomes.add("chr" + str(i)) else: sys.stderr.write( "WARNING: a chromosome named either {} or a variant of CHR{} cannot be found in the " "normal BAM file\n".format(i, i)) # if len(chrm) == 0 and len(no_chrm) == 0: raise ValueError("No chromosomes found in the normal BAM") if chromosomes == 0: raise ValueError("No chromosomes found in the normal BAM") ''' Adding chromosome names to two different variables, checking if they exist, and then adding them to a third variable is redundant. if len(chrm) > len(no_chrm): chromosomes = chrm else: chromosomes = no_chrm ''' # Check that chromosomes with the same names are present in each tumor BAM contain for tumor in tumors: tumor_sq = getSQNames(samtools, tumor[0]) if not chromosomes <= tumor_sq: sys.stderr.write( "WARNING: chromosomes {} are not present in the tumor sample {}\n" .format(chromosomes - tumor_sq, tumor)) # Check consistency of chromosome names with the reference if reference: refdict = "{}.dict".format(reference) stdout, stderr = subprocess.Popen( "grep -e \"^>\" {}".format(reference), stdout=subprocess.PIPE, shell=True).communicate() if stderr: raise ValueError( "Error in reading the reference: {}".format(reference)) else: ref = set(c[1:].strip().split()[0] for c in stdout.strip().split('\n')) if not (chromosomes <= ref): raise ValueError( "The given reference cannot be used because the chromosome names are inconsistent!\n" "Chromosomes found in BAF files: {}\nChromosomes with the same name found in reference " "genome: {}".format(chromosomes, ref)) # return sorted(list(chromosomes), key=sp.numericOrder) return natsort.natsorted(list(chromosomes))
try: chromosome, coords = tsv_data["Mutation genome position"].split(":") except ValueError: # skip cosmic entries with no position data continue else: start, end = coords.split("-") tsv_data["chromosome"] = chromosome tsv_data["start"] = start tsv_data["end"] = end new_cosmic_lines.append(tsv_data) # tabix needs file to be sorted new_cosmic_lines.sort(key=lambda x: int(x["end"])) new_cosmic_lines.sort(key=lambda x: int(x["start"])) new_cosmic_lines = natsort.natsorted(new_cosmic_lines, key=lambda x: x["chromosome"]) headers.extend(["chromosome", "start", "end"]) out = open(output_fname, "w") out.write("#" + "\t".join(headers) + "\n") for tsv_data in new_cosmic_lines: out.write("\t".join([tsv_data[h] for h in headers]) + "\n") out.close() pysam.tabix_index(filename=output_fname, seq_col=34, start_col=35, end_col=36) print ("Creating second index for AA position...") # Create a second index file by gene and start AA, endAA TabixIndexer.indexGeneProteinPosition("Gene name", "Mutation AA", input_fname, output_fname + ".byAA")
def find_image_at_index(index, path): image_names = os.listdir(path) natsort.natsorted(image_names, reverse=False) image_name = image_names[index] image_path = make_path(path, image_name) return cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
def readBINs(normalbins, tumorbins): normalBINs = {} tumorBINs = {} normal = set() samples = set() normal_chr = set() tumor_chr = set() # Read normal bin counts with open(normalbins, 'r') as f: for line in f: parsed = line.strip().split()[:5] normal_chr.add(parsed[1]) normal.add(parsed[0]) if (parsed[1], int(parsed[2]), int(parsed[3])) not in normalBINs: normalBINs[parsed[1], int(parsed[2]), int(parsed[3])] = (parsed[0], int(parsed[4])) else: raise ValueError( sp.error( "Found multiple lines for the same interval in the normal bin counts!" )) # Check normal bin counts if len(normal) > 1: raise ValueError( sp.error("Found multiple samples in normal bin counts!")) prev_r = -1 prev_c = -1 for key in sorted(normalBINs, key=(lambda x: (sp.numericOrder(x[0]), int(x[1]), int(x[2])))): l, r = int(key[1]), int(key[2]) if l > r and prev_c == key[0]: raise ValueError( sp.error( "Found an interval with START {} greater than END {} in normal bin counts!" .format(key[1], key[2]))) if l < prev_r and prev_c == key[0]: raise ValueError( sp.error( "Found overlapping intervals one ending with {} and the next starting with {} in normal bin counts!" .format(prev_r, key[1]))) prev_r = r prev_c = key[0] # Read tumor bin counts with open(tumorbins, 'r') as f: for line in f: parsed = line.strip().split()[:5] tumor_chr.add(parsed[1]) samples.add(parsed[0]) try: tumorBINs[parsed[1], int(parsed[2]), int(parsed[3])].add((parsed[0], int(parsed[4]))) except KeyError: tumorBINs[parsed[1], int(parsed[2]), int(parsed[3])] = set() tumorBINs[parsed[1], int(parsed[2]), int(parsed[3])].add((parsed[0], int(parsed[4]))) # Check tumor bin counts prev_r = -1 prev_c = -1 num_samples = len(samples) for key in sorted(tumorBINs, key=(lambda x: (sp.numericOrder(x[0]), int(x[1]), int(x[2])))): l, r = int(key[1]), int(key[2]) if len(tumorBINs[key]) != num_samples: raise ValueError( sp.error( "Found multiple lines for the same interval in the tumor bin counts!" )) if l > r and prev_c == key[0]: raise ValueError( sp.error( "Found an interval with START {} greater than END {} in tumor bin counts!" .format(key[1], key[2]))) if l < prev_r and prev_c == key[0]: raise ValueError( sp.error( "Found overlapping intervals one ending with {} and the next starting with {} in tumor bin counts!" .format(prev_r, key[1]))) prev_r = r prev_c = key[0] if normal_chr != tumor_chr: raise ValueError( sp.error( "The chromosomes in normal and tumor bin counts are different!" )) if set(normalBINs) != set(tumorBINs): raise ValueError( sp.error( "The bins of the normal and tumor samples are different!")) # chromosomes = sorted(list(normal_chr), key=sp.numericOrder) chromosomes = natsort.natsorted(normal_chr) return normalBINs, tumorBINs, chromosomes, normal.pop(), samples
import imageio import os from natsort import natsort def create_gif(filenames, duration): images = [] for filename in filenames: imagees.append(imageio.imread(filename)) output_file = 'raytrace_2.gif' imageio.mimsave(output_file, images, duration=duration) path = '/home/vkvishal/Documents/RayTracing/' filenames = [] for file in os.listdir(path): # filename = os.fsdecode(file) if file.endswith(('.bmp')): filenames.append(file) filenames = natsort.natsorted(filenames) create_gif(filenames, duration=5)
def get_BMP_file_paths(BMP_files_directory_path): BMP_file_path_pattern = os.path.join(BMP_files_directory_path, '*.bmp') BMP_file_paths = natsort.natsorted(glob.glob(BMP_file_path_pattern)) return BMP_file_paths
def main(command_line_args=None): """ Let's get this party started. :param command_line_args: """ start_time = time.time() VersionDependencies.python_check() if not command_line_args: command_line_args = sys.argv run_start = datetime.datetime.today().strftime("%H:%M:%S %Y %a %b %d") parser = argparse.ArgumentParser( description= "A package to map genomic repair scars at defined loci.\n {} v{}". format(__package__, __version__), formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('--options_file', action='store', dest='options_file', required=True, help='File containing program parameters.') # Check options file for errors and return object. args = error_checking(string_to_boolean(parser)) log = Tool_Box.Logger(args) Tool_Box.log_environment_info(log, args, command_line_args) module_name = "" log.info("{} v{}".format(__package__, __version__)) if args.IndelProcessing: file_list = [] if args.Platform == "Illumina" or args.Platform == "Ramsden" or args.Platform == "TruSeq": log.info("Sending FASTQ files to FASTQ preprocessor.") if args.PEAR: file_list = pear_consensus(args, log) if not file_list: log.error("PEAR failed. Check logs.") raise SystemExit(1) fastq_consensus = file_list[0] fq1 = FASTQ_Tools.FASTQ_Reader(fastq_consensus, log) fq2 = None else: fq2 = FASTQ_Tools.FASTQ_Reader(args.FASTQ2, log) fq1 = FASTQ_Tools.FASTQ_Reader(args.FASTQ1, log) sample_manifest = Tool_Box.FileParser.indices( log, args.SampleManifest) indel_processing = \ Indel_Processing.DataProcessing(log, args, run_start, __version__, Target_Mapper.TargetMapper(log, args, sample_manifest), fq1, fq2) indel_processing.main_loop() # Compress or delete PEAR files. if args.PEAR and file_list: if args.DeleteConsensusFASTQ: log.info("Deleting PEAR FASTQ Files.") Tool_Box.delete(file_list) else: log.info( "Compressing {} FASTQ Files Generated by PEAR.".format( len(file_list))) p = pathos.multiprocessing.Pool(int(args.Spawn)) p.starmap(Tool_Box.compress_files, zip(file_list, itertools.repeat(log))) else: log.error( "Only 'Illumina', 'TruSeq' or 'Ramsden' --Platform methods currently allowed." ) raise SystemExit(1) elif not args.IndelProcessing: # Run frequency file Combine module run_start = datetime.datetime.today().strftime("%a %b %d %H:%M:%S %Y") log.info("Process Replicates.") data_dict = collections.defaultdict(list) file_list = [ f for f in glob.glob("{}*ScarMapper_Frequency.txt".format( args.DataFiles, )) ] file_count = len(file_list) page_header = "# ScarMapper File Merge v{}\n# Run: {}\n# Sample Name: {}\n" \ .format(__version__, run_start, args.SampleName) line_num = 0 index_file = list(csv.reader(open(file_list[0]), delimiter='\t')) for line in index_file: if not line: break elif line_num > 3: page_header += "{}\n".format(line[0]) line_num += 1 page_header += "\n\n" for file_name in file_list: freq_file_data = Tool_Box.FileParser.indices(log, file_name) for row in freq_file_data: key = "{}|{}|{}|{}".format(row[3], row[4], row[6], row[8]) row_data = row[2:] if key in data_dict: data_dict[key][0].append(float(row[1])) else: data_dict[key] = [[float(row[1])], row_data] # Process Data and Write Combined Frequency results file plot_data_dict = collections.defaultdict(list) label_dict = collections.defaultdict(float) output_data_dict = collections.defaultdict(list) marker_list = [] for key, row_list in data_dict.items(): # Force pattern to be in at least half of the files. if len(row_list[0]) / file_count >= 0.5: row_string = "\t".join(row_list[1]) freq = gmean(row_list[0]) sem = stats.sem(row_list[0]) freq_results_outstring = "{}\t{}\t{}\n".format( freq, sem, row_string) output_key = freq # Freq is a 17 digit float so it is very unlikely to be duplicated but if it is this increments it by # a small number then checks the uniqueness again. if output_key in output_data_dict: output_key = output_key + 1e-16 if output_key in output_data_dict: output_key = output_key + 1e-16 scar_type = row_list[1][0] label_dict[scar_type] += freq # Gather up our data for plotting lft_del = int(row_list[1][1]) rt_del = int(row_list[1][2]) mh_size = int(row_list[1][5]) ins_size = int(row_list[1][7]) output_data_dict[output_key] = \ [(freq, lft_del, rt_del, mh_size, ins_size, scar_type), freq_results_outstring] freq_results_outstring = \ "{}# Frequency\tSEM\tScar Type\tLeft Deletions\tRight Deletions\tDeletion Size\tMicrohomology\t" \ "Microhomology Size\tInsertion\tInsertion Size\tLeft Template\tRight Template\tConsensus Left Junction\t" \ "Consensus Right Junction\tTarget Left Junction\tTarget Right Junction\tConsensus\tTarget Region\n" \ .format(page_header) # Now draw a pretty graph of the data if we are not dealing with a negative control. for k in natsort.natsorted(output_data_dict, reverse=True): data_list = output_data_dict[k] freq_results_outstring += data_list[1] freq = data_list[0][0] lft_del = data_list[0][1] rt_del = data_list[0][2] mh_size = data_list[0][3] ins_size = data_list[0][4] scar_type = data_list[0][5] # Plotting all scar patterns is messy. This provides a cutoff. if freq < 0.00025: continue y_value = freq * 0.5 lft_ins_width = freq rt_ins_width = freq # This is gathered up to find the largest value. Used to set the x-axis limits. marker_list.extend([ lft_del + (mh_size * 0.5), rt_del + (mh_size * 0.5), ins_size ]) # Deletion size included half the size of any microhomology present. lft_del_plot_value = (lft_del + (mh_size * 0.5)) * -1 rt_del_plot_value = rt_del + (mh_size * 0.5) # Insertions are centered on 0 so we need to take half the value for each side. lft_ins_plot_value = (ins_size * 0.5) * -1 rt_ins_plot_value = ins_size * 0.5 # Scale the width of bars for insertions inside of deletions if lft_del + (mh_size * 0.5) != 0: lft_ins_width = freq * 0.5 if rt_del + (mh_size * 0.5) != 0: rt_ins_width = freq * 0.5 if scar_type not in plot_data_dict: plot_data_dict[scar_type] = \ [[freq], [lft_del_plot_value], [rt_del_plot_value], [lft_ins_plot_value], [rt_ins_plot_value], [lft_ins_width], [rt_ins_width], [y_value]] else: # Get some previous plot data count = len(plot_data_dict[scar_type][0]) previous_freq = plot_data_dict[scar_type][0][count - 1] previous_y = plot_data_dict[scar_type][7][count - 1] plot_data_dict[scar_type][0].append(freq) plot_data_dict[scar_type][1].append(lft_del_plot_value) plot_data_dict[scar_type][2].append(rt_del_plot_value) plot_data_dict[scar_type][3].append(lft_ins_plot_value) plot_data_dict[scar_type][4].append(rt_ins_plot_value) plot_data_dict[scar_type][5].append(lft_ins_width) plot_data_dict[scar_type][6].append(rt_ins_width) # Use the previous plot data to find the y-value of the current bar. plot_data_dict[scar_type][7] \ .append(previous_y + 0.002 + (0.5 * previous_freq) + y_value) plot_data_dict['Marker'] = [(max(marker_list)) * -1, max(marker_list)] # sample_name = "{}.{}".format(args.Job_Name, args.SampleName) ScarMapperPlot.scarmapperplot(args, datafile=None, sample_name=args.SampleName, plot_data_dict=plot_data_dict, label_dict=label_dict) freq_results_file = \ open("{}{}_ScarMapper_Combined_Frequency.txt".format(args.WorkingFolder, args.SampleName), "w") freq_results_file.write(freq_results_outstring) freq_results_file.close() warning = "\033[1;31m **See warnings above**\033[m" if log.warning_occurred else '' elapsed_time = int(time.time() - start_time) log.info( "****ScarMapper {0} complete ({1} seconds, {2} Mb peak memory).****". format(module_name, elapsed_time, Tool_Box.peak_memory(), warning)) # All done so we need to quit otherwise Python will not release the log file on virtual Linux. exit(0)
for info in infos.iter('bndbox'): x1 = float(info.find('xmin').text) y1 = float(info.find('ymin').text) x2 = float(info.find('xmax').text) y2 = float(info.find('ymax').text) idx_name = name_idx[name] w = (x2 - x1) / img_w h = (y2 - y1) / img_h x = (x1 / img_w) + (w / 2.) y = (y1 / img_h) + (h / 2.) txt_info = ("%g " * 5 + "\n") % (idx_name, x, y, w, h) result_txt.write(txt_info) if __name__ == '__main__': rotated_img_path = "/home/kinsozheng/Desktop/generate_test_dataset/rotated_set/rotated_img" files = natsort.natsorted(os.listdir(rotated_img_path)) names = [] for file in files: name = os.path.splitext(file)[0] names.append(name) pool = Pool() pool.map(xml_to_txt, tqdm(names)) pool.close() pool.join()
def _initialize_config_from_meta(ts_metadata_dict, context_meta, variable_meta): """ Prepares the correlation loop configuration from the uploaded metadata of selected timeseries. :param ts_metadata_dict: uploaded metadata :type ts_metadata_dict: dict :param context_meta: name of metadata providing the context. Example with Airbus datasets: "FlightIdentifier" :type context_meta: str :param variable_meta: name of the metadata providing the variable name: variables are sorted by alphanumeric order. Example with Airbus datasets: "metric" :type variable_meta: str :return: computed config is multiple result: - config_corr_loop: list of ( <context index>, [ (<var index 1> , <tsuid 1>), ..., (<var index N> , <tsuid N>) ] ) - contexts: ordered list of contexts: <context> = contexts[<context_index>] - variables: ordered list of variables: <variable name> = variables[ <var index> ] :rtype: list, list list :raise exception: IkatsInputContentError when an inconsistency cancels the correlations computing """ ts_metadata_accepted = defaultdict(dict) ts_variables_accepted_set = set() for tsuid, meta in ts_metadata_dict.items(): if context_meta not in meta: LOGGER.info("- Ignored: TS without context (meta %s): %s", context_meta, tsuid) elif variable_meta not in meta: LOGGER.info("- Ignored: TS without defined variable (meta %s): %s", variable_meta, tsuid) else: context_value = meta[context_meta] variable_name = meta[variable_meta] if variable_name in ts_metadata_accepted[context_value]: msg = "Inconsistency: context={} variable={} should match 1 TS: got at least 2 TS {} {}" raise IkatsInputContentError( msg.format( tsuid, ts_metadata_accepted[context_value][variable_name])) ts_metadata_accepted[context_value][variable_name] = tsuid def ignore_unique_ts(the_context, tsuid_by_var): """ - removes context with one single ts => useless to compute correlation - or else completes the set ts_variables_accepted_set :param the_context: :param tsuid_by_var: :return: """ if len(tsuid_by_var) == 1: LOGGER.info("- Ignored: unique TS in context %s=%s: %s", context_meta, the_context, list(tsuid_by_var.values())[0]) else: for var in tsuid_by_var: ts_variables_accepted_set.add(var) return len(tsuid_by_var) == 1 ts_metadata_accepted = { ctx: tsuid_by_var for ctx, tsuid_by_var in ts_metadata_accepted.items() if ignore_unique_ts(ctx, tsuid_by_var) == False } # provides translation indexes => value on contexts contexts = natsorted(ts_metadata_accepted.keys()) # provides translation indexes => value on variables variables = natsorted(ts_variables_accepted_set) # computes the corr_loop_config # ( <context index>, [ (<var index 1> , <tsuid 1>), ..., (<var index N> , <tsuid N>) ] ) # # Note: sorted( [ (2, "TS2"), (1, "TS1"), (0, "TS0"), ] ) # returns [(0, 'TS0'), (1, 'TS1'), (2, 'TS2')] # corr_loop_config = [(contexts.index(ctx), sorted([(variables.index(var), tsuid) for var, tsuid in tsuid_by_var.items()])) for ctx, tsuid_by_var in ts_metadata_accepted.items()] return corr_loop_config, contexts, variables
def connect(self): bin_sub_path = 'Bin\\Matrix.exe' library_sub_path = 'SDK\\RemoteAccess\\RemoteAccess_API.dll' name = os.path.basename(bin_sub_path) ok = False ps = psutil.process_iter() p = next(ps) go = True while go and not ok: try: p_path = p.exe() p_name = p.name() except: p_path = '' p_name = '' if p_name == name: installation_directory = p_path[:-(len(bin_sub_path) + 1)] library_path = os.path.join(installation_directory, library_sub_path) ok = os.path.exists(library_path) try: p = next(ps) except StopIteration: go = False ps.close() co = '' if ok: try: pe = pefile.PE(p_path) if pe.FILE_HEADER.Machine != self.machine: self.log.AppendText('Bit architecture mismatch.\n') pe.close() pe = None except: pe = None if hasattr(pe, 'FileInfo') and pe.FileInfo: if isinstance(pe.FileInfo[0], list): file_info = pe.FileInfo[0] else: file_info = pe.FileInfo entries = [i for i in file_info if hasattr(i, 'StringTable')] if entries: st_entries = [i for i in entries[0].StringTable] if st_entries: co = st_entries[0].entries[b'CompanyName'].decode() pe.close() if co: user_config_dir = os.environ['APPDATA'] all_default_paths = natsort.natsorted( os.listdir(f"{user_config_dir}\\{co}\\MATRIX")) exp_sub_path = f'MATRIX\\{all_default_paths[-1]}\\Experiments' self.experiments_directory = os.path.join(user_config_dir, co, exp_sub_path) self.lib_mate = ctypes.cdll.LoadLibrary(library_path) self.lib_mate.setHost(b'localhost') self.disconnect() if self.is_ran_down or self.testmode: rc = self.lib_mate.init(installation_directory.encode()) self.log.AppendText('Connecting to the MATRIX, response: ' + self.rc_key(rc) + '.\n') if (rc == self.rcs['RMT_SUCCESS']) or self.testmode: self.is_ran_down = False self.experiment() if not self.online: self.disconnect() else: self.rc = self.rcs['RMT_SUCCESS'] else: self.rc = rc else: self.log.AppendText('Connecting to the MATRIX, response: ' '---.\n')
clusters[item['date']].append(item) return clusters.values() target = 'site/content/calendar' file_name_base = 'day-%d.md' data_src = '../advent_2016_master.csv' with open(data_src, 'r') as f: data = csv.DictReader(f) data = filter(lambda x: len(x['DISP DATE']) != 0, data) women = [parse_row(x) for x in data] selection = [generate_struct(x, i) for i, x in enumerate(women)] clusters = group_clusters(selection) for item in clusters: formatted = format_cluster(item) path = os.path.join(target, file_name_base % int(item[0]['date'])) with open(path, 'w') as f: f.write(formatted.encode('UTF-8')) sources = ", ".join( natsort.natsorted( set( filter(lambda x: len(x) > 0, [y.strip().decode('utf-8') for y in set(sources)])))) # print sources
def fiFindByWildcard(wildcard): return natsort.natsorted(glob.glob(wildcard, recursive=True))
raise ValueError('This appears to be an old version of COSMIC, please use a newer version (v76 or above).') try: chromosome, coords = tsv_data['Mutation genome position'].split(':') except ValueError: #skip cosmic entries with no position data continue else: start, end = coords.split('-') tsv_data['chromosome'] = chromosome tsv_data['start'] = start tsv_data['end'] = end new_cosmic_lines.append(tsv_data) #tabix needs file to be sorted new_cosmic_lines.sort(key=lambda x: int(x['end'])) new_cosmic_lines.sort(key=lambda x: int(x['start'])) new_cosmic_lines = natsort.natsorted(new_cosmic_lines, key=lambda x: x['chromosome']) headers.extend(['chromosome', 'start', 'end']) out = open(output_fname, 'w') out.write('#' + '\t'.join(headers) + '\n') for tsv_data in new_cosmic_lines: out.write('\t'.join([tsv_data[h] for h in headers]) + '\n') out.close() pysam.tabix_index(filename=output_fname, seq_col=34, start_col=35, end_col=36) print("Creating second index for AA position...") # Create a second index file by gene and start AA, endAA TabixIndexer.indexGeneProteinPosition("Gene name", "Mutation AA", input_fname, output_fname + ".byAA")
def find_latest_trmt(folder: Path or str): return natsorted( [Path(f).abspath() for f in Path(folder).listdir('TRMT_*.miz')]).pop()
import os import numpy from natsort import natsort import cv2 root_dir = "/home/tonner/Downloads/GaitDatasetB-silh/001/cl-01/054" image_files = os.listdir(root_dir) image_files = natsort.natsorted(image_files) diff_img1 = [] N = len(image_files) print(image_files) for i in range(N): img = cv2.imread(os.path.join(root_dir, image_files[i])) if i == 0: diff_img = img diff_img1 = diff_img else: diff_img = cv2.imread(os.path.join(root_dir, image_files[i - 1])) diff_img1 = abs(img - diff_img) diff_img1 += diff_img1 cv2.imshow("diffrence", diff_img1) cv2.waitKey(1) cv2.imwrite("diffrence1.png", diff_img1) AEI = diff_img1 / N cv2.imwrite("diffrence2.png", AEI) # cv2.imshow("diffrence",AEI) # cv2.waitKey(1)
def natural_sorted(list_string, reverse=False): return natsort.natsorted(list_string, reverse=reverse)
img.putpixel(coordinate, (128, 0, 0)) for coordinate in fifth: img.putpixel(coordinate, (128, 0, 128)) for coordinate in sixth: img.putpixel(coordinate, (255, 255, 255)) for coordinate in seven: img.putpixel(coordinate, (0, 0, 0)) correct_save_path = "/home/kinsozheng/Desktop/compare_voc/8bit_fix/" + img_name + ".png" img.save(correct_save_path) if __name__ == '__main__': path = "/home/kinsozheng/Desktop/compare_voc/24bit_ori/" files = natsort.natsorted(os.listdir(path)) file_names = [] for file in files: name = os.path.splitext(file)[0] file_names.append(name) pool = Pool() pool.map(fix_eight_bit_problem, file_names) pool.close() pool.join()