def main(numOfQueries): p = 10**(-7) with open("words.txt") as f: cont = f.read() cont = cont.split() n = int(len(cont)) m = (-(n * np.log(p)) / (np.log(2)**2)) m = int(round(m)) global k k = round((m / n) * np.log(2)) global a print("p:", p, "n:", n, "m:", m, "k:", k) a = bitarray(m) bloom1 = bloom(m) bar = br.Bar('Read Words', max=n) for elem in cont: bloom1.add(str(elem)) bar.next() bar.finish() print("\nFile read...\n") print( "The number of words stored is: %d \nThe probability of a false positive occurence is: %f\nThe size of the bitarray is: %d positions" % (n, p, m)) wordsToCheck = r.generate(numOfQueries) timeStart = time.time() for x in wordsToCheck: bloom1.check(x) timeEnd = time.time() return timeEnd - timeStart
def write_labels(main_directory_path, dataset_name): bspath = external_working_directory_path + 'user/' + main_directory_path + '/' folders = os.listdir(bspath) n_images = 0 for folder in folders: n_images += len(os.listdir(bspath + folder)) class_id = {} if not os.path.exists(external_working_directory_path + 'user/datasets/' + dataset_name): os.mkdir(external_working_directory_path + 'user/datasets/' + dataset_name) with open( external_working_directory_path + 'user/datasets/' + dataset_name + '/labels.txt', 'a') as labels: writing_progress = bar.Bar("Writing image labels: ", max=n_images) for i, folder_name in enumerate(folders): for image in os.listdir(bspath + '/' + folder_name): labels.write(folder_name + '\n') writing_progress.next() class_id[i] = folder_name with open( external_working_directory_path + 'user/datasets/' + dataset_name + '/class_id.json', 'w') as txt: json.dump(class_id, txt, indent=3, sort_keys=True)
def write_image_data(self): writing_progress = bar.Bar("Writing images: ", max=len(self.image_data)) for imn in range(len(self.image_data)): self.image_data[imn].append(self.labels[imn]) self.csv_writer.write(self.image_data[imn]) writing_progress.next() self._write_metadata()
def main(lst): counter=0 varlist=[] var="" l=list(lst.lower()) t2lst = list(permutations(l, len(l))) try: for i in t2lst: for j in i: var=var+j varlist.append(var) var="" barl=bar.Bar(colorama.Fore.CYAN+"Searching-- ",max=len(varlist)) for string in varlist: r_through(string) barl.next() mainl=list(OrderedDict.fromkeys(mainlist)) if len(mainl)>0: print(colorama.Fore.YELLOW,"\nAll results found:") for i in mainl: print(colorama.Fore.GREEN,i) else: print(colorama.Fore.YELLOW,"\nSorry, no results found.") except KeyboardInterrupt: print(colorama.Fore.RED,"\nYou stopped the search")
def pgp(dateRange, source='/home/james/Documents/MPHYS_ARCHIVE/PGP', gsm=False): dates = [f"20{i[0]:02d}{i[1]:02d}" for i in dateRange] files = sorted(glob.glob(source + '/*.cdf')) filesRef = [s.split('/')[-1].split('_')[-2][:-2] for s in files] files = dict(zip(filesRef, files)) df = pd.DataFrame(columns=['x', 'y', 'z']) with Timer('Timing PGP'): bar = progress.Bar('Loading PGP', max=len(dates)) for d in dates: logging.info(files[d]) pgp = cdflib.CDF(files[d]) timetags = pgp.varget('Epoch__CL_JP_PGP') pos = pgp.varget('sc_r_xyz_gse__CL_JP_PGP') if gsm: conv = pgp.varget('gse_gsm__CL_JP_PGP') pos[:, 1] *= np.cos(np.radians(-conv)) pos[:, 2] *= np.cos(np.radians(-conv)) time = cdflib.cdfepoch.unixtime(timetags) time = [dt.datetime.utcfromtimestamp(t) for t in time] locations_month = pd.DataFrame(np.column_stack( [time, pos[:, 0], pos[:, 1], pos[:, 2]]), columns=['time', 'x', 'y', 'z']) locations_month.x = locations_month.x.astype(float) locations_month.z = locations_month.z.astype(float) locations_month.y = locations_month.y.astype(float) locations_month = locations_month.set_index('time') df = df.append(locations_month) bar.next() bar.finish() return df
def getRGB(self): rgb_vals = [] pixels = self.non_mean_pixels() reading_progress = bar.Bar("Reading images: ", max=len(self.paths)) for n, im_pixels in enumerate(pixels): rgb_vals.append(im_pixels) self.rgb_vals.append(im_pixels) reading_progress.next()
def main(self): self.write_metadata() writing_progress = bar.Bar("Writing images: ", max=len(self.input_data)) for imn in range(len(self.input_data)): self.input_data[imn].append(self.labels[imn]) self.write_csv(self.input_data[imn]) writing_progress.next() self.id_man.add()
def write_unclassified_dataset_paths(dataset_path, dataset_name): dataset_dir = workspace_dir + 'user/unclassified_datasets/' + dataset_name mkdir(dataset_dir) images = os.listdir(dataset_path) n_images = len(images) writing_progress = bar.Bar("Writing image paths: ", max=n_images) with open(dataset_dir + '/paths.txt', 'a') as paths: for image in images: paths.write(dataset_path + '/' + image + '\n') writing_progress.next()
def generate(numOfWords, write=0): r = RandomWord() words = r.getList(numOfWords) copy = words if write == 1: bar = br.Bar('Generating Words', max=numOfWords) words = map(lambda x: x + '\n', words) with open("words.txt", "w") as f: for x in words: f.write(x) bar.next() bar.finish() return copy
def write_training_dataset_paths(dataset_path, dataset_name): dataset_dir = workspace_dir + 'user/training_datasets/' + dataset_name mkdir(dataset_dir) folders = os.listdir(dataset_path) n_images = 0 for folder in folders: n_images += len(os.listdir(dataset_path + '/' + folder)) with open(dataset_dir + '/paths.txt', 'a') as paths: writing_progress = bar.Bar("Writing image paths: ", max=n_images) for folder in folders: images = os.listdir(dataset_path + '/' + folder) for tp in images: paths.write(dataset_path + '/' + folder + '/' + tp + '\n') writing_progress.next()
def omni(dateRange, source='/home/james/Documents/MPHYS_ARCHIVE/OMNI', gsm=False): dates = [f"20{i[0]:02d}{i[1]:02d}" for i in dateRange] files = sorted(glob.glob(source + '/*.cdf')) filesRef = [s.split('/')[-1].split('_')[-2][:-2] for s in files] files = dict(zip(filesRef, files)) df = pd.DataFrame(columns=['bx', 'by', 'bz']) with Timer('Timing OMNI'): bar = progress.Bar('Loading OMNI', max=len(dates)) for d in dates: logging.info(files[d]) omni = cdflib.CDF(files[d]) timetags = omni.varget('Epoch') imf_x = omni.varget('BX_GSE') if gsm: imf_z = omni.varget('BZ_GSM') imf_y = omni.varget('BY_GSM') else: imf_z = omni.varget('BZ_GSE') imf_y = omni.varget('BY_GSE') time = cdflib.cdfepoch.unixtime(timetags) time = [dt.datetime.utcfromtimestamp(t) for t in time] imf_month = pd.DataFrame(np.column_stack( [time, imf_x, imf_y, imf_z]), columns=['time', 'bx', 'by', 'bz']) # imf_month.bx = imf_month.bx.mask( # imf_month.bx > 1000).interpolate().astype('float') # imf_month.by = imf_month.by.mask( # imf_month.by > 1000).interpolate().astype('float') # imf_month.bz = imf_month.bz.mask( # imf_month.bz > 1000).interpolate().astype('float') imf_month.bx = imf_month.bx.mask( imf_month.bx > 1000).astype('float') imf_month.by = imf_month.by.mask( imf_month.by > 1000).astype('float') imf_month.bz = imf_month.bz.mask( imf_month.bz > 1000).astype('float') imf_month.dropna(inplace=True) imf_month = imf_month.set_index('time') imf_month = imf_month.resample("5T").mean() df = df.append(imf_month) bar.next() bar.finish() return df
def extract_image_data(self): loading_progress = bar.Bar("Loading images: ", max=len(self.image_paths)) for pixels_n in range(len(self.image_paths)): img = Image.open(self.image_paths[pixels_n]) img_rgb = img.convert('RGB') im_pixels = [] for x in range(self.img_dims[0]): for y in range(self.img_dims[1]): r, g, b = img_rgb.getpixel((x, y)) im_pixels.append(r) im_pixels.append(g) im_pixels.append(b) loading_progress.next() self.image_data.append(im_pixels) self._write_metadata()
def non_mean_pixels(self): dimensions = self.get_dims() loading_progress = bar.Bar("Loading images: ", max=len(self.paths)) for pixels_n in range(len(self.paths)): img = Image.open(self.paths[pixels_n]) img_rgb = img.convert('RGB') im_pixels = [] for x in range(dimensions[pixels_n][0]): for y in range(dimensions[pixels_n][1]): r, g, b = img_rgb.getpixel((x, y)) im_pixels.append(r) im_pixels.append(g) im_pixels.append(b) loading_progress.next() self.pixels.append(im_pixels) return self.pixels
def write_labels(dataset_path, dataset_name): dataset_dir = workspace_dir + 'user/training_datasets/' + dataset_name mkdir(dataset_dir) folders = os.listdir(dataset_path) n_images = 0 for folder in folders: n_images += len(os.listdir(dataset_path + '/' + folder)) class_id = {} with open(dataset_dir + '/labels.txt', 'a') as labels: writing_progress = bar.Bar("Writing image labels: ", max=n_images) for i, folder_name in enumerate(folders): for image in os.listdir(dataset_path + '/' + folder_name): labels.write(folder_name + '\n') writing_progress.next() class_id[i] = folder_name with open(dataset_dir + '/class_id.json', 'w') as txt: json.dump(class_id, txt, indent=3, sort_keys=True)
def write_paths(main_directory_path, dataset_name): bspath = external_working_directory_path + main_directory_path + '/' folders = os.listdir(bspath) n_images = 0 for folder in folders: n_images += len(os.listdir(bspath + folder)) if not os.path.exists(external_working_directory_path + 'datasets/' + dataset_name): os.mkdir(external_working_directory_path + 'datasets/' + dataset_name) with open( external_working_directory_path + 'datasets/' + dataset_name + '/paths.txt', 'a') as paths: writing_progress = bar.Bar("Writing image paths: ", max=n_images) for n, folder in enumerate(folders): images = os.listdir(bspath + folder) for tp in images: paths.write(bspath + folder + '/' + tp + '\n') writing_progress.next() print("")
def write_labels(main_directory_path, dataset_name, label_file_name='labels.txt'): bspath = external_working_directory_path + main_directory_path + '/' folders = os.listdir(bspath) n_images = 0 for folder in folders: n_images += len(os.listdir(bspath + folder)) keys = [] for k in product(string.ascii_uppercase, repeat=2): keys.append(k[0] + k[1]) obj_to_lb = {} if not os.path.exists(external_working_directory_path + 'datasets/' + dataset_name): os.mkdir(external_working_directory_path + 'datasets/' + dataset_name) with open( external_working_directory_path + 'datasets/' + dataset_name + '/' + label_file_name, 'a') as labels: writing_progress = bar.Bar("Writing image labels: ", max=n_images) for i, folder in enumerate(folders): for ftp in os.listdir(bspath + '/' + folder): labels.write(keys[i] + '\n') writing_progress.next() obj_to_lb[folder.split('-')[0]] = keys[i] with open( external_working_directory_path + 'datasets/' + dataset_name + '/obj_labels.json', 'w') as txt: json.dump(obj_to_lb, txt, indent=3, sort_keys=True) print("")
def moments(dateRange, source='/home/james/Documents/MPHYS_ARCHIVE/MOMENTS'): """ Loads moments cdf data and returns DateTime object. Inputs: dateRange -> Outputs: DataFrame """ dates = [f"20{i[0]:02d}{i[1]:02d}" for i in dateRange] files = sorted(glob.glob(source + '/*.cdf')) filesRef = [s.split('/')[-1].split('_')[6][:-2] for s in files] files = dict(zip(filesRef, files)) df = pd.DataFrame(columns=['temp']) with Timer('Timing MOMENTS'): bar = progress.Bar('Loading MOMENTS', max=len(dates)) for d in dates: logging.info(files[d]) moments = cdflib.CDF(files[d]) timetags = moments.varget( 'time_tags__C1_CP_CIS-HIA_ONBOARD_MOMENTS') time = cdflib.cdfepoch.unixtime(timetags) time = [dt.datetime.utcfromtimestamp(t) for t in time] temp = moments.varget('temperature__C1_CP_CIS-HIA_ONBOARD_MOMENTS') moments = pd.DataFrame(np.column_stack([time, temp]), columns=['time', 'temp']) moments.set_index('time', inplace=True) moments.temp = moments.temp.astype(float) moments = moments.resample("5T").mean().clip(lower=-2e3, upper=2e3) moments.temp = moments.temp.mask(moments.temp > 140).astype(float) moments.temp = moments.temp.mask(moments.temp < -20).astype(float) df = df.append(moments) bar.next() bar.finish() return df
def _write_image_xml(self): xml = "<image>\n" name_attributes = "" if self.appliance_version: name_attributes += " version='%s'" % self.appliance_version if self.appliance_release: name_attributes += " release='%s'" % self.appliance_release xml += " <name%s>%s</name>\n" % (name_attributes, self.name) xml += " <domain>\n" # XXX don't hardcode - determine based on the kernel we installed for grub # baremetal vs xen xml += " <boot type='hvm'>\n" xml += " <guest>\n" xml += " <arch>%s</arch>\n" % os.uname()[4] xml += " </guest>\n" xml += " <os>\n" xml += " <loader dev='hd'/>\n" xml += " </os>\n" i = 0 for name in list(self.__disks.keys()): xml += " <drive disk='%s-%s.%s' target='hd%s'/>\n" % ( self.name, name, self.__disk_format, chr(ord('a') + i)) i = i + 1 xml += " </boot>\n" xml += " <devices>\n" xml += " <vcpu>%s</vcpu>\n" % self.vcpu xml += " <memory>%d</memory>\n" % (self.vmem * 1024) for network in self.ks.handler.network.network: xml += " <interface/>\n" xml += " <graphics/>\n" xml += " </devices>\n" xml += " </domain>\n" xml += " <storage>\n" if self.checksum is True: for name in list(self.__disks.keys()): diskpath = "%s/%s-%s.%s" % (self._outdir, self.name, name, self.__disk_format) disk_size = os.path.getsize(diskpath) meter_ct = 0 meter = progress.Bar("Generating disk signature for %s-%s.%s" % (self.name, name, self.__disk_format), max=disk_size) xml += " <disk file='%s-%s.%s' use='system' format='%s'>\n" % ( self.name, name, self.__disk_format, self.__disk_format) try: import hashlib m1 = hashlib.sha1() m2 = hashlib.sha256() except: import sha m1 = sha.new() m2 = None f = open(diskpath, "r") while 1: chunk = f.read(65536) if not chunk: break m1.update(chunk) if m2: m2.update(chunk) meter.next(65536) sha1checksum = m1.hexdigest() xml += """ <checksum type='sha1'>%s</checksum>\n""" % sha1checksum if m2: sha256checksum = m2.hexdigest() xml += """ <checksum type='sha256'>%s</checksum>\n""" % sha256checksum xml += " </disk>\n" else: for name in list(self.__disks.keys()): xml += " <disk file='%s-%s.%s' use='system' format='%s'/>\n" % ( self.name, name, self.__disk_format, self.__disk_format) xml += " </storage>\n" xml += "</image>\n" logging.debug("writing image XML to %s/%s.xml" % (self._outdir, self.name)) cfg = open("%s/%s.xml" % (self._outdir, self.name), "w") cfg.write(xml) cfg.close()
def write_image_data(self): writing_progress = bar.Bar("Writing images: ", max=len(self.data)) for image_data_inst in self.data: self.csv_writer.write(image_data_inst) writing_progress.next()
def annotate_trace(file_name, options): names = get_names(options) addresses=map(lambda x: x[0], names) syms = map(lambda x: x[1], names) disassembler = find_disassembler(options) # Open trace file and sniff file format v2_header = "\x82CheriStreamTrace" # pad to 32 bytes v2_header = v2_header + '\0' * (34-len(v2_header)) if file_name is None: trace_file=sys.stdin else: trace_file = open(file_name, 'r') if trace_file.read(len(v2_header)) == v2_header and not opts.version: if not opts.quiet: sys.stderr.write("Detected v2 trace format.\n") opts.version = 2 # shape of an entry if opts.version == 2: s = struct.Struct('>BBHIQQQBB') else: s = struct.Struct('>BBHIQQQ') # field names for above field_version ,\ field_exception ,\ field_cycles ,\ field_opcode ,\ field_pc ,\ field_result1 ,\ field_result2 ,\ field_threadID ,\ field_asid = range(9) if file_name is None: nentries = float('inf') else: trace_size = os.stat(file_name).st_size nentries = trace_size/s.size whence = 0 if opts.skip >= 0 else 2 trace_file.seek(opts.skip * s.size, whence) nentries = nentries - opts.skip if opts.skip >= 0 else -opts.skip if opts.limit is not None: nentries = min(opts.limit, nentries) if not options.quiet: sys.stderr.write("%s: %f entries\n" % (file_name, nentries)) if nentries < 0xfff or nentries == float('inf') or opts.quiet: bar = None else: bar = progress_bar.Bar('Processing', suffix='%(percent).1f%% - %(avg)f %(elapsed_td)s / %(eta_td)s', max=nentries) cycle_count = 0 inst_count = 0 next_pc = 0 last_pc = None last_cycles = None entry_no=0 # maintain a set of unique instruction encodings encountered # this is mainly useful for sizing the disassembly cache unique_opcodes = set() if opts.cut_func is not None: cut_func_idx = syms.index(opts.cut_func) opts.start_pc = addresses[cut_func_idx] opts.stop_pc = addresses[cut_func_idx+1] - 4 tracing = opts.start_pc == None and opts.start_inst == None and not opts.trace_markers start_inst = float('inf') if opts.start_inst is None else opts.start_inst stop_inst = float('inf') if opts.stop_inst is None else opts.stop_inst branch_target = None iteration = 0 if tracing and opts.cut: cut_file = open(opts.cut % iteration, 'w') if opts.version == 2: cut_file.write(v2_header) else: cut_file = None out_file = sys.stdout def newStats(): return Stats(opts.icache_line, opts.dcache_line, opts.page_bits) inst_stats = defaultdict(newStats) if opts.inst_stats else None func_stats = defaultdict(newStats) if opts.func_stats else None if opts.ordered_func_stats: current_sym_name = None ordered_func_stats = [] all_stats = newStats() if opts.stats or opts.inst_stats or opts.func_stats or opts.ordered_func_stats or opts.tlb_conflicts or opts.cache_conflicts else None while entry_no < nentries: if (bar is not None and entry_no & 0xfff == 0): bar.goto(entry_no) entry_no += 1 e = trace_file.read(s.size) if len(e) < s.size: break # EOF f = s.unpack(e) entry_type = f[field_version] if entry_type >= 0x80: # skip header records, but still write them out if cut_file: cut_file.write(e) continue if entry_type == version_timestamp: # skip timestamp records, but still write them out because we might want them in future if cut_file: cut_file.write(e) new_cycle_count = f[field_result1] new_inst_count = f[field_result2] if tracing and opts.show and not opts.quiet: if cycle_count != new_cycle_count: out_file.write("Warning: timestamp cycle count mismatch: %d != %d\n" % (cycle_count, new_cycle_count)) if inst_count != new_inst_count: out_file.write("Warning: timestamp instr count mismatch: %d != %d\n" % (inst_count, new_inst_count)) cycle_count = new_cycle_count inst_count = new_inst_count continue if entry_type in (version_cap_clc, version_cap_csc): # capability instructions don't give pc, so use one we made earlier pc = next_pc else: pc = f[field_pc] # currently this fails when PCC!=0 and on eret etc. #if next_pc is not None and pc != next_pc: # if not opts.quiet: sys.stdout.write("Warning: predicted next PC did not match trace: %x!=%x\n" % (pc, next_pc)) # sometimes entries are duplicated (e.g. after eret) -- skip them if pc == last_pc and f[field_cycles] == last_cycles: continue last_pc = pc inst_count += 1 # calculate the next pc in case we need it. We will get it wrong in # some cases (e.g. branch likely, eret) but this does not matter # unless we land on a clc or csc. if branch_target is not None: # branch delay slot, so use previously stored branch target next_pc = branch_target branch_target = None elif entry_type in (version_noresult, version_alu) and opts.branch_pc: # these instructions might contain branch destination dest_pc = f[field_result1] if dest_pc != pc + 4: # it's a b5Cranch branch_target = dest_pc #if tracing: print "branch => %x" % branch_target next_pc = pc + 4 # XXX wrong for branch likely else: next_pc = pc + 4 #print "%d, %x %x %s" % (entry_type, pc, next_pc, hex(f[field_result1])) if not tracing and ((opts.trace_markers and f[field_opcode] == 0xefbe0034) or (opts.start_pc is not None and pc >= opts.start_pc and pc < (opts.start_pc + 4*opts.pc_window)) or inst_count >= start_inst): if not opts.quiet: sys.stderr.write("\nStart: iteration=%d pc=%x inst=%x\n" % (iteration, pc, inst_count)) tracing = True if opts.cut: cut_file = open(opts.cut % iteration, 'w') if opts.version == 2: cut_file.write(v2_header) elif tracing and ((opts.trace_markers and f[field_opcode] == 0xadde0034) or (opts.stop_pc is not None and pc >= opts.stop_pc and pc < (opts.stop_pc + 4*opts.pc_window)) or inst_count > stop_inst): if not opts.quiet: sys.stderr.write("\nStop: iteration=%d pc=%x inst=%x\n" % (iteration, pc, inst_count)) tracing = False #start a new cut file for each iteration if cut_file: cut_file.close() cut_file = None iteration += 1 if inst_count > stop_inst: break continue elif not tracing or \ (opts.user and (pc & 0xf000000000000000) != 0) or \ (opts.kernel and (pc & 0xf000000000000000) == 0) or \ (opts.asid is not None and opts.asid != f[field_asid]) or \ (opts.thread is not None and opts.thread != f[field_threadID]): continue if cut_file: cut_file.write(e) i = bisect.bisect_right(addresses, pc)-1 sym = syms[i] sym_addr = addresses[i] sym_off = pc - sym_addr cycles = f[field_cycles] if last_cycles is None or cycles == last_cycles: # first instruction or dubious entry inst_cycles = 1 elif cycles > last_cycles: inst_cycles = cycles - last_cycles else: inst_cycles = 0x400 + cycles - last_cycles # overflow last_cycles = cycles cycle_count += inst_cycles # the instruction encoding is little endian for some reason inst = struct.unpack('>I', struct.pack('=I', f[field_opcode]))[0] if opts.count_encs: unique_opcodes.add(inst) asid = f[field_asid] if opts.version == 2 else 0 exception = f[field_exception] mem_addr = f[field_result1] if entry_type in (version_read, version_write, version_cap_clc, version_cap_csc) else None if func_stats is not None: func_stats[sym].update(cycles=inst_cycles, exception=exception, pc=pc, asid=asid, entry_type=entry_type, mem_addr=mem_addr, call=(sym_off == 0)) if inst_stats is not None: inst_stats[pc].update( cycles=inst_cycles, exception=exception, pc=pc, asid=asid, entry_type=entry_type, mem_addr=mem_addr) if all_stats is not None: all_stats.update( cycles=inst_cycles, exception=exception, pc=pc, asid=asid, entry_type=entry_type, mem_addr=mem_addr) if opts.ordered_func_stats: if current_sym_name != sym: current_sym_stats = newStats() current_sym_name = sym ordered_func_stats.append((current_sym_name, current_sym_stats)) current_sym_stats.update(cycles=inst_cycles, exception=exception, pc=pc, asid=asid, entry_type=entry_type, mem_addr=mem_addr) if opts.show: data = None op, args = disassembler.disassemble(inst) inst_no = '%0.16x ' % inst_count if opts.show_inst else '' asid_str = '%0.2x ' % f[field_asid] if opts.version == 2 else '' threadID = '%0.2x ' % f[field_threadID] if opts.version == 2 else '' data = '=%0.16x' % f[field_result2] if entry_type in (version_alu, version_write, version_read) else ' ' * 17 addr = '@%0.16x' % mem_addr if mem_addr is not None else ' ' * 17 e = '' if exception == 31 else 'EXCEPTION %s ' % exception_names[exception] if exception < len(exception_names) else 'UNNKOWN EXCEPTION %d:' % exception if entry_type in (version_cap_clc, version_cap_csc) and not opts.no_caps: data = decodeCap(f[field_result2], f[field_pc]) if entry_type == version_cap_cap and not opts.no_caps: data = decodeCap(f[field_result2], f[field_result1]) out_file.write("%s%s%s%16x %-12ls %-20s %s %s %3d %s%s +0x%x\n" % (inst_no, threadID, asid_str, pc, op, args, data, addr, inst_cycles, e, sym, sym_off)) if not tracing: # we've just stopped tracing last_cycles = None if bar is not None: bar.finish() if func_stats: dump_stats(sys.stdout, sorted(func_stats.iteritems()), opts.csv) if inst_stats: dump_stats(sys.stdout, sorted(inst_stats.iteritems()), opts.csv) if opts.ordered_func_stats: dump_stats(sys.stdout, ordered_func_stats, opts.csv) if all_stats: if opts.csv: out_file.write(','.join([file_name] + map(str,all_stats.as_tuple())) + '\n') else: print file_name, ':', all_stats if opts.tlb_conflicts: all_pages = list(all_stats.dpages.union(all_stats.ipages)) all_pages.sort() all_pages.sort(lambda x,y: cmp(x[1] % opts.tlb_hashed_entries, y[1] % opts.tlb_hashed_entries)) last_entry = None conflicts = 0 for asid, page in all_pages: entry = page % opts.tlb_hashed_entries if entry == last_entry: conflicts += 1 else: print "---" last_entry = entry print "Pages: %d Conflicts: %d" % (len(all_pages) , conflicts) if opts.cache_conflicts: print "\nL1 data conflicts (%dx%d-byte lines):" % (opts.dcache_line, opts.dcache_lines) print_conflicts(addresses, syms, list(all_stats.dcache_lines), opts.dcache_line, opts.dcache_lines) print "\nL1 inst conflicts (%dx%d-byte lines):" % (opts.icache_line, opts.icache_lines) print_conflicts(addresses, syms, list(all_stats.icache_lines), opts.icache_line, opts.icache_lines) print "\nL2 conflicts:" print_conflicts(addresses, syms, list(all_stats.icache_lines)+list(all_stats.dcache_lines), 128, 128) if opts.count_encs: sys.stderr.write("Unique encodings: %d\n" % len(unique_opcodes))
def main(): ### Get the current datetime for naming. ### now = datetime.datetime.now().strftime('%b%d_%H-%M-%S') suffix = suffix = 'Itr: %(index)d of %(max)d. Avg: %(avg).02f/itr. Total: %(elapsed).02f. Remaining: %(eta).02f.' ### Argument parsing. ### parser = argparse.ArgumentParser( description='Train a Multiscale Gazenet Model') ### Miscellaneous options. ### parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('-w', '--workers', default=2, type=int) ### Directory options. ### parser.add_argument('-tf', '--trial_file', default='config/good_gaze.txt') parser.add_argument( '-d', '--data_dir', default='/home/ben/Desktop/harplabstuff/harpdata/gaze_tensors2') parser.add_argument('-lc', '--config_file', default=None) parser.add_argument('-s', '--save_dir', default=os.path.join(os.getcwd(), 'runs', now + '_' + socket.gethostname())) parser.add_argument('-c', '--save_config', default='config.pkl') parser.add_argument('-p', '--pred_dir', default='preds') parser.add_argument('-l', '--log_dir', default='logs') parser.add_argument('-m', '--model_dir', default='models') ### Model options ### parser.add_argument('-w1', '--window1', default=12, type=int) parser.add_argument('-w2', '--window2', default=24, type=int) parser.add_argument('-w3', '--window3', default=36, type=int) ### Training options. ### parser.add_argument('-train', '--train_pct', default=.9, type=float) parser.add_argument('-valid', '--valid_pct', default=.1, type=float) parser.add_argument('-bs', '--batch_size', default=64, type=int) parser.add_argument('-lr', '--learning_rate', default=0.001, type=float) parser.add_argument('-e', '--epochs', default=100, type=int) args = parser.parse_args() if args.config_file and args._get_kwargs() == 1: ### If we want to load from a config file, do so. ### if args.verbose: print(message('Loading config file.')) with open(args.config_file, 'rb') as f: args = pickle.load(f) elif args.config_file and args._get_kwargs() > 1: ### If we have specified a config file and positional arguments ### ### raise an exception. ### raise TypeError( 'train_gazenet.py takes only 1 positional argument when config_file is specified.' ) else: ### Save the current configuration to a file in order to load later ### if args.verbose: print(message('Saving config file.')) os.system('mkdir -p ' + args.save_dir) os.system('touch ' + os.path.join(args.save_dir, args.save_config)) with open(os.path.join(args.save_dir, args.save_config), 'ab') as f: pickle.dump(args, f) ### Set some additional configuration arguments that are defined ### ### from those the user gave through standard in. ### args.windows = [ args.window3 - args.window1, args.window3 - args.window2, args.window3 - args.window3 ] args.test_pct = 1 - args.train_pct ### Make all of the given directories or files absolute. ### args.save_config = os.path.join(args.save_dir, args.save_config) args.pred_dir = os.path.join(args.save_dir, args.pred_dir) args.log_dir = os.path.join(args.save_dir, args.log_dir) args.model_dir = os.path.join(args.save_dir, args.model_dir) ### Create the above directories. ### os.system('mkdir -p ' + args.pred_dir) os.system('mkdir -p ' + args.log_dir) os.system('mkdir -p ' + args.model_dir) ### Create a Tensorboard SummaryWriter. ### if args.verbose: print(message('Creating SummaryWriter.')) with SummaryWriter() as writer: ### Create the necessary datasets and dataloaders. ### if args.verbose: print(message('Creating datasets and dataloaders.')) #trials, types = utils.get_trials(args.trial_file) #trials = utils.prefix_to_list(args.data_dir, trials) #total, train, valid, test = utils.test_train_split(GazeDataset, args.train_pct, valid=args.valid_pct, trials=trials, windows=args.windows) #train_trials, train_types, valid_trials, valid_types, test_trials, test_types = split_inds(args.trial_file, args.train_pct, valid=args.valid_pct, prefix=args.data_dir) train_trials = utils.prefix_to_list( args.data_dir, utils.load_pickle('config/train_trials.pkl')) valid_trials = utils.prefix_to_list( args.data_dir, utils.load_pickle('config/valid_trials.pkl')) test_trials = utils.prefix_to_list( args.data_dir, utils.load_pickle('config/test_trials.pkl')) train = GazeDataset(train_trials, args.windows) valid = GazeDataset(valid_trials, args.windows) test = GazeDataset(test_trials, args.windows) trainloader = DataLoader(train, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers) validloader = DataLoader(valid, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers) ### Save the train, valid, and test inds. ### if args.verbose: print(message('Saving index orders.')) utils.save_inds(train.inds, os.path.join(args.save_dir, 'train_inds.txt')) utils.save_inds(valid.inds, os.path.join(args.save_dir, 'valid_inds.txt')) utils.save_inds(test.inds, os.path.join(args.save_dir, 'test_inds.txt')) ### Create the model, criteria, and optimizer ### if args.verbose: print(message('Creating model and optimizer.')) model = GazeEncoderMS(args.window1, args.window2, args.window3) criterion = nn.MSELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # dummy_input = (Variable(torch.rand(64,2,12)), Variable(torch.rand(64,2,24)), Variable(torch.rand(64,2,36)), ) # writer.add_graph(model, dummy_input, verbose=True) ### Set some incrementers and begin training. ### if args.verbose: print(message('Starting to train.')) min_valid_loss = 999999999999999 num_train_iter = 0 num_valid_iter = 0 epoch = 0 for epoch in range(args.epochs): if args.verbose: print(message('Starting Epoch {:d}.'.format(epoch))) model_name = 'model_{:05d}.model'.format(epoch + 1) ### Specify where train pand valid predictions should be saved ### epoch_train_dir = os.path.join(args.save_dir, args.pred_dir, 'epoch_{:05d}'.format(epoch + 1), 'train') epoch_valid_dir = os.path.join(args.save_dir, args.pred_dir, 'epoch_{:05d}'.format(epoch + 1), 'valid') os.system('mkdir -p ' + epoch_train_dir) os.system('mkdir -p ' + epoch_valid_dir) ### Begin the training loop. ### train_bar = bar.Bar(message('Training'), max=len(train) / args.batch_size, suffix=suffix) train_loss = 0. train_copy_loss = 0. train_vel_loss = 0. if args.verbose: print(message('Enumerating Trainloader.')) for i, data in enumerate(trainloader, 1): train_bar.next() num_train_iter += 1 optimizer.zero_grad() ### Get the data and turn them into Variables. ### gaze1 = Variable(data[:, 2:, args.windows[0]:args.window3]) gaze2 = Variable(data[:, 2:, args.windows[1]:args.window3]) gaze3 = Variable(data[:, 2:, args.windows[2]:args.window3]) label = Variable(data[:, 2:, -1]) ### Forward pass. ### pred = model(gaze1, gaze2, gaze3) ### Calculate loss. ### pred_mask, label_mask, num = mask(pred, label) loss = criterion(pred_mask, label_mask) / num train_loss += loss.data[0] ### Calculate some other losses ### c_loss = copy_loss(gaze3, label) v_loss = const_vel_loss(gaze3, label) train_copy_loss += c_loss.data[0] train_vel_loss += v_loss.data[0] ### Backward pass. ### loss.backward() ### Optimize. ### optimizer.step() ### Save pertinent losses to tensorboard. ### for b in range(args.batch_size): num = (num_train_iter - 1) * args.batch_size + b writer.add_scalar('train/data/true_x', label.data[b, 0], num) writer.add_scalar('train/data/true_y', label.data[b, 1], num) writer.add_scalar('train/data/pred_x', pred.data[b, 0], num) writer.add_scalar('train/data/pred_y', pred.data[b, 1], num) writer.add_scalar('train/data/mask/true_x', label_mask.data[b, 0], num) writer.add_scalar('train/data/mask/true_y', label_mask.data[b, 1], num) writer.add_scalar('train/data/mask/pred_x', pred_mask.data[b, 0], num) writer.add_scalar('train/data/mask/pred_y', pred_mask.data[b, 1], num) writer.add_scalar('train/losses/train_loss_itr', loss.data[0], num_train_iter) writer.add_scalar('train/losses/train_copyloss_itr', c_loss.data[0], num_train_iter) writer.add_scalar('train/losses/train_constvelloss_itr', v_loss.data[0], num_train_iter) for name, param in model.named_parameters(): writer.add_histogram(name, param.clone().data.numpy(), num_train_iter) torch.save( pred.data, os.path.join(epoch_train_dir, 'pred_{:05d}.pt7'.format(i + 1))) train_bar.finish() ### Add the average loss over the epoch to the tensorboard. ### avg_train_loss = (train_loss / len(train)) * args.batch_size avg_c_loss = (train_copy_loss / len(train)) * args.batch_size avg_v_loss = (train_vel_loss / len(train)) * args.batch_size writer.add_scalar('train/losses/train_loss_epoch', avg_train_loss, epoch) writer.add_scalar('train/losses/train_copyloss_epoch', avg_c_loss, epoch) writer.add_scalar('train/losses/train_constvelloss_epoch', avg_v_loss, epoch) ### Begin validation loop. ### valid_bar = bar.Bar(message('Validate'), max=len(valid), suffix=suffix) valid_loss = 0 valid_copy_loss = 0. valid_vel_loss = 0. for i, data in enumerate(validloader): num_valid_iter += 1 valid_bar.next() ### Get the data and turn them into Variables. ### gaze1 = Variable(data[:, 2:, :args.windows[0]]) gaze2 = Variable(data[:, 2:, :args.windows[1]]) gaze3 = Variable(data[:, 2:, :args.windows[2]]) label = Variable(data[:, 2:, -1]) ### Forward pass. ### pred = model(gaze1, gaze2, gaze3) ### Calculate loss. ### pred_mask, label_mask, num = mask(pred, label) loss = criterion(pred_mask, label_mask) / num valid_loss += loss.data[0] c_loss = copy_loss(gaze3, label) valid_copy_loss += c_loss.data[0] cv_loss = const_vel_loss(gaze3, label) valid_vel_loss += cv_loss.data[0] ### Save pertinent losses to tensorboard. ### writer.add_scalar('valid/data/true_x', label.data[0, 0], num_valid_iter) writer.add_scalar('valid/data/true_y', label.data[0, 1], num_valid_iter) writer.add_scalar('valid/data/pred_x', pred.data[0, 0], num_valid_iter) writer.add_scalar('valid/data/pred_y', pred.data[0, 1], num_valid_iter) writer.add_scalar('valid/data/mask/true_x', label_mask.data[0, 0], num_valid_iter) writer.add_scalar('valid/data/mask/true_y', label_mask.data[0, 1], num_valid_iter) writer.add_scalar('valid/data/mask/pred_x', pred_mask.data[0, 0], num_valid_iter) writer.add_scalar('valid/data/mask/pred_y', pred_mask.data[0, 1], num_valid_iter) writer.add_scalar('valid/losses/valid_loss', loss.data[0], num_valid_iter) writer.add_scalar('valid/losses/valid_copyloss', c_loss.data[0], num_valid_iter) writer.add_scalar('valid/losses/valid_constvelloss', cv_loss.data[0], num_valid_iter) torch.save( pred.data, os.path.join(epoch_valid_dir, 'pred_{:05d}.pt7'.format(i + 1))) valid_bar.finish() ### Add the average loss over the epoch to the tensorboard. ### avg_valid_loss = valid_loss / len(valid) * args.batch_size writer.add_scalar('valid/losses/valid_loss_epoch', avg_valid_loss, epoch) avg_copy_loss = valid_copy_loss / len(valid) * args.batch_size writer.add_scalar('valid/losses/valid_copyloss_epoch', avg_copy_loss, epoch) avg_vel_loss = valid_vel_loss / len(valid) * args.batch_size writer.add_scalar('valid/losses/valid_constvelloss_epoch', avg_vel_loss, epoch) ### Export all scalars to json for later use. ### if epoch % 5 == 0: writer.export_scalars_to_json( os.path.join(args.log_dir, 'log.json')) ### Save the model if the current epoch's validation loss was less than ### ### the previous minimum. ### if avg_valid_loss < min_valid_loss: min_valid_loss = avg_valid_loss optimizer.zero_grad() torch.save(model.state_dict(), os.path.join(args.model_dir, model_name))
#!/usr/bin/env python import os import subprocess from Bio import SeqIO from progress import bar f_dir = '../input_genome_sequences' file_list = os.listdir(f_dir) file_list = [f for f in file_list if f.endswith('.gbk')] pbar = bar.Bar('files ', max=len(file_list)) for f in file_list: pbar.next() records = SeqIO.parse(os.path.join(f_dir, f), 'gb') new_file_name = f + '.fn' outh = open(new_file_name, 'w') for r in records: outh.write(r.format('fasta')) outh.close() rRNA_output = f + '.rnammer.bac.fasta' with open(rRNA_output, 'w') as file_handle: pass cmd = [ 'perl', '/home/nick/Downloads/rnammer-1.2/rnammer', '-S', 'bac', '-multi', '-m', 'lsu,ssu,tsu' '-f', rRNA_output, new_file_name ] result = subprocess.check_output(cmd)
def main(): ### Get the current datetime for naming. ### now = datetime.datetime.now().strftime('%b%d_%H-%M-%S') suffix = suffix='Itr: %(index)d of %(max)d. Avg: %(avg).02f/itr. Total: %(elapsed).02f. Remaining: %(eta).02f.' ### Argument parsing. ### parser = argparse.ArgumentParser(description='Train a Multiscale Gazenet Model') ### Miscellaneous options. ### parser.add_argument('-v', '--verbose', action='store_true') parser.add_argument('-w', '--workers', default=2, type=int) ### Directory options. ### parser.add_argument('-tf', '--trial_file', default='config/test_trials.pkl') parser.add_argument('-tyf', '--type_file', default='config/test_types.pkl') parser.add_argument('-d', '--data_dir', default='/home/ben/Desktop/harplabstuff/harpdata/gaze_tensors2') parser.add_argument('-s', '--save_dir', default=os.path.join(os.getcwd(), 'runs', now + '_' + socket.gethostname())) parser.add_argument('-c', '--save_config', default='config.pkl') parser.add_argument('-p', '--pred_dir', default='preds') parser.add_argument('-l', '--log_dir', default='logs') parser.add_argument('-m', '--model_dir', default='models') ### Model options ### parser.add_argument('-w1', '--window1', default=12, type=int) parser.add_argument('-w2', '--window2', default=24, type=int) parser.add_argument('-w3', '--window3', default=36, type=int) ### Testing Options ### parser.add_argument('-bs', '--batch_size', default=64, type=int) parser.add_argument('-lm', '--load_model', default='/home/ben/Desktop/harplabstuff/harpcode/eye_to_joy/runs/Jan25_01-57-27_Aeolus/models/model_00064.model') #parser.add_argument('-', '--window1', default=12, type=int) args = parser.parse_args() """ if args.config_file and args._get_kwargs() == 1: ### If we want to load from a config file, do so. ### if args.verbose: print message('Loading config file.') with open(args.config_file, 'rb') as f: args = pickle.load(f) elif args.config_file and args._get_kwargs() > 1: ### If we have specified a config file and positional arguments ### ### raise an exception. ### raise TypeError('train_gazenet.py takes only 1 positional argument when config_file is specified.') else: ### Save the current configuration to a file in order to load later ### if args.verbose: print message('Saving config file.') os.system('mkdir -p ' + args.save_dir) os.system('touch ' + os.path.join(args.save_dir, args.save_config)) with open(os.path.join(args.save_dir, args.save_config), 'ab') as f: pickle.dump(args, f) """ args.windows = [args.window1, args.window2, args.window3] args.save_config = os.path.join(args.save_dir, args.save_config) args.pred_dir = os.path.join(args.save_dir, args.pred_dir) args.log_dir = os.path.join(args.save_dir, args.log_dir) args.model_dir = os.path.join(args.save_dir, args.model_dir) test_trials = utils.prefix_to_list(args.data_dir, utils.load_pickle(args.trial_file)) test_types = utils.load_pickle(args.type_file) print test_types test = GazeDataset(test_trials, args.windows, test_types) testloader = DataLoader(test, batch_size=1, shuffle=False, drop_last=False, num_workers=args.workers) lens = test._trial_lengths tps = test.types print(tps) model = GazeEncoderMS(args.window1, args.window2, args.window3, eval=True) model.load_state_dict(torch.load(args.load_model)) model = model.eval() with open('test_results.csv', 'ab') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') tot = 0 test_bar = bar.Bar(message('Testing'), max=len(test) / 1) for i, data in enumerate(testloader, 1): tsum = 0 frames = [] vids = [] totlens = [] types = [] for j in range(len(lens)): if i+36*j - (lens[j][1]+tsum) <= 0: frames.append(i+36*j-tsum) vids.append(lens[j][0]) totlens.append(lens[j][1]) types.append(tps[j]) break else: tsum+=lens[j][1] ## NEED TO CHANGE THIS gaze1 = Variable(data[:,2:,:args.windows[0]]) gaze2 = Variable(data[:,2:,:args.windows[1]]) gaze3 = Variable(data[:,2:,:args.windows[2]]) label = Variable(data[:,2:,-1]) pred = model(gaze1, gaze2, gaze3) pred_mask, label_mask, num = mask(pred, label) tot+=torch.sum(pred_mask - label_mask)/num csvwriter.writerow(frames+vids+totlens+types+list(pred.data[0])+list(label.data[0])) test_bar.next() test_bar.finish() print(tot/i)
if __name__ == '__main__': with open("words.txt") as f: cont = f.read() cont = cont.split() print(len(cont)) p = 10**(-7) n = int(len(cont)) m = (-(n * np.log(p)) / (np.log(2)**2)) m = int(round(m)) print(m) print(p) bloom1 = bloom(m) bar = br.Bar('Read Words', max=n) for elem in cont: bloom1.add(str(elem)) bar.next() print("\nFile read...\n") print( "The number of words stored is: %d \nThe probability of a false positive occurence is: %f\nThe size of the bitarray is: %d positions" % (n, p, m)) choice = 0 while choice != 3: try: choice = int( input( "What would you like to do now ? :\n1) Add another Element \n2) Check if word is in the filter \n3) Exit \n" )) except: