def put(self, input_file): sha = util.hash_file(input_file) target_path = util.pjoin(self.url_components.path or "/", "db", sha[0:3], sha, "contents") self.bucket.upload_file(input_file, target_path.lstrip("/")) return sha
def put(self, input_file): sha = util.hash_file(input_file) target_path = os.path.join(self.repo_base, "db", sha[0:3], sha) fs.mkdir_p(target_path) shutil.copy(input_file, os.path.join(target_path, "contents")) return sha
def store(src_fn): name = prefix_path(args.prefix, src_fn) digest2 = repo.get_name_digest(name) if digest2 is not None and not args.overwrite: log('file with same name exists, skipping %s' % src_fn) return if not args.overwrite: size = os.stat(src_fn).st_size digest = repo.find_name_size(name, size) if digest: log('file with same name and size exists, skipping %s' % src_fn) return if not args.hash: digest = util.get_xattr_hash(src_fn) else: digest = None if digest is None: debug('computing digest', src_fn) digest, tmp = util.hash_file(src_fn) assert len(digest) == util.SHA256_LEN, repr(digest) if repo.data_exists(digest): key_abs = repo.data(digest) if not os.path.samefile(src_fn, key_abs): print('link from repo %r' % src_fn) if not OPTIONS.dryrun: util.link_over(key_abs, src_fn) else: print('skip existing %r' % src_fn) else: print('import', src_fn) repo.link_in(src_fn, digest) repo.add_data(digest, src_fn) # save filename in meta data repo.add_name(digest, name, overwrite=args.overwrite)
def add(self, path): digest = util.hash_file(path) log.info('digest: {} <- {}', digest, path) blob_id = self.db.blobs.get(digest) if (blob_id is None) or (blob_id not in self.blob_ids): blob_id = self._send(path, digest) log.info('blob id {} <- {}', blob_id, path) self.db.insert_file(file_path=path, file_id=digest)
def _send(self, path, digest, blob_id=None): temp_path = self._encrypt(path) if blob_id is None: blob_id = util.hash_file(temp_path) log.info('encrypt {} <- {}', blob_id, path) self.store.put(blob_id=blob_id, path=temp_path) if digest is not None: self.db.insert_blob(file_id=digest, blob_id=blob_id) self.blob_ids.add(blob_id) return blob_id
def put(self, blob_id, path): expected_sha1 = util.hash_file(path, hasher=util.sha1) listener = TqdmProgressListener(blob_id) self.bucket.upload_local_file(local_file=path, file_name=blob_id, sha1_sum=expected_sha1, progress_listener=listener) remote_sha1 = self.get_hash(blob_id) if remote_sha1 and remote_sha1 != expected_sha1: raise ValueError('Bad SHA1', remote_sha1) os.remove(path)
def _copy_tmp(self, src_fn): # copy external file to temporary file inside repo, store hash if not OPTIONS.dryrun: ensure_dir(self.tmp_dir) tmp = tempfile.NamedTemporaryFile(dir=self.tmp_dir) else: tmp = None digest, tmp = util.hash_file(src_fn, tmp=tmp) if not OPTIONS.dryrun: mtime = os.stat(src_fn).st_mtime util.set_xattr_hash(tmp.name, digest, mtime) return digest, tmp
def do_scrub(args): repo = _open_repo(args) problems = 0 err_file = os.path.join(repo.root, 'scrub_errors.txt') t = time.time() with util.open_text(err_file, 'a', buffering=1) as err_fp: def err(*args): print(*args, file=err_fp) err('scrub started %s' % datetime.datetime.now()) if args.cont: err('continue from %s' % args.cont) for digest in repo.list_files(): if args.cont: if digest == args.cont: args.cont = False else: continue # skip if time.time() - t > 20: # print progress print('scrub', digest) t = time.time() fn = repo.data(digest) if not os.path.exists(fn): err('missing data', digest) continue meta = repo.get_meta(digest) st = os.stat(fn) if st.st_size != int(meta['size']): err('size mismatch', digest, meta['size'], st.st_size) mtime_differs = abs(st.st_mtime - float(meta['mtime'])) > 5 if mtime_differs: err('mtime differs', digest, meta['mtime']) if args.fast: continue # no check of hashes if args.size and st.st_size > args.size: continue # skip large file if args.modified and not mtime_differs: continue # skip, modified time same # do the expensive hash digest2, tmp = util.hash_file(fn) log(digest, digest2) if digest != digest2: err('checksum mismatch', digest) problems += 1 else: digest2 = util.get_xattr_hash(fn) if digest2 != digest: err('update xattr', digest) util.set_xattr_hash(fn, digest) problems += 1 if problems: print('problems were found (%s), see scrub_errors.txt' % problems)
def verified_get(self, sha, output_file): self.get(sha, output_file) if stat.S_ISREG(os.stat(output_file).st_mode): actual_sha = util.hash_file(output_file) if actual_sha != sha: raise Exception("Hash of downloaded file does not match" + " expected sha: " + actual_sha + " vs " + sha) else: sys.stderr.write("Skipping verification because output is " + "not a regular file") sys.stderr.flush()
def before_platform_close(platform): # if the platform is already closed the psth will have written # its output and it's too late to add the file hash if platform.closed: return if record_stop_event is not None: # stop the recording process so we can calculate the hash of the recorded file print("waiting for recording to stop (platform)") record_stop_event.stop_recording() fpath = Path(args.loadcell_out) grf_file_hash = hash_file(fpath) platform.psth.output_extra['output_files'][fpath.name] = grf_file_hash
def try_load_lin(args: argparse.Namespace, file_idx: int, filename: str) \ -> Optional[List[str]]: lin_path = Path2(filename + ".lin") if args.verbose: eprint("Attempting to load cached linearized version from {}" .format(lin_path)) if not lin_path.exists(): return None try: ignore_lin_hash = args.ignore_lin_hash except AttributeError: ignore_lin_hash = False with lin_path.open(mode='r') as f: first_line = f.readline().strip() if ignore_lin_hash or hash_file(filename) == first_line: return serapi_instance.read_commands(f.read()) else: return None
print('error creating directory %s: %s' % (path.dirname(cache_ent), err), file=stderr) exit(1) print('Download %s' % src_url, file=stderr) try: check_call(['curl', '--proxy-anyauth', '-ksfo', cache_ent, src_url]) except OSError as err: print('could not invoke curl: %s\nis curl installed?' % err, file=stderr) exit(1) except CalledProcessError as err: print('error using curl: %s' % err, file=stderr) exit(1) if args.v: have = hash_file(sha1(), cache_ent).hexdigest() if args.v != have: print(( '%s:\n' + 'expected %s\n' + 'received %s\n') % (src_url, args.v, have), file=stderr) try: remove(cache_ent) except OSError as err: if path.exists(cache_ent): print('error removing %s: %s' % (cache_ent, err), file=stderr) exit(1) exclude = [] if args.x: exclude += args.x
def save_lin(commands: List[str], filename: str) -> None: output_file = filename + '.lin' with open(output_file, 'w') as f: print(hash_file(filename), file=f) for command in commands: print(command, file=f)
def _add_blob_by_path(self, path): hash = util.hash_file(path) self._add_blob(hash) self._db.set(self._pathkey(path), hash) self._db.sadd(self._blobkey(hash), path)
def run_psth_loop(platform: PsthTiltPlatform, tilt_sequence, *, sham: bool, retry_failed: bool, output_extra: Dict[str, Any], before_platform_close: Callable[[PsthTiltPlatform], None], ): input_file_list = [] if platform.template_in_path is not None: input_file_list.append(Path(platform.template_in_path)) input_files = {} for fpath in input_file_list: input_files[fpath.name] = hash_file(fpath) # psth = platform.psth if sham: with open(platform.template_in_path) as f: template_in = json.load(f) tilt_sequence = [] sham_decodes = [] sham_delays = [] for tilt in template_in['tilts']: tilt_sequence.append(tilt['tilt_type']) p_tilt_type = tilt['predicted_tilt_type'] if p_tilt_type is None: p_tilt_type = tilt['tilt_type'] sham_decodes.append(p_tilt_type) sham_delays.append(tilt['delay']) input_queue: 'Queue[str]' = Queue(1) def input_thread(): while True: cmd = input(">") input_queue.put(cmd) input_queue.put("") input_queue.join() spawn_thread(input_thread) tilt_records: List[Dict[str, Any]] = [] # tilts will be added to below before being written to json platform.psth.output_extra['tilts'] = tilt_records platform.psth.output_extra['baseline'] = platform.baseline_recording platform.psth.output_extra['input_files'] = input_files platform.psth.output_extra['output_files'] = {} platform.psth.output_extra.update(output_extra) def get_cmd(): try: _cmd: str = input_queue.get_nowait() except QEmpty: pass else: input_queue.task_done() print("Press enter to resume; q, enter to stop") cmd = input("paused>") # if cmd == 'q': # break input_queue.get() input_queue.task_done() return cmd def do_tilt(tilt_type, i, sham_i, retry=None): check_recording() if sham: sham_result = tilt_type == sham_decodes[sham_i] else: sham_result = None if retry is not None: delay = retry['delay'] elif sham: delay = sham_delays[sham_i] else: delay = None try: tilt_rec = platform.tilt(tilt_type, sham_result=sham_result, delay=delay) except SpikeWaitTimeout as e: tilt_rec = e.tilt_rec tilt_rec['spike_wait_timeout'] = True tilt_records.append(tilt_rec) raise tilt_rec['i'] = i tilt_rec['retry'] = retry # pprint(tilt_rec, sort_dicts=False) # pprint(tilt_rec) tilt_records.append(tilt_rec) # put data into psth class often so data will get saved in the case of a crash platform.psth.output_extra['tilts'] = tilt_records return tilt_rec def run_tilts(): for i, tilt_type in enumerate(tilt_sequence): print(f'tilt {i}/{len(tilt_sequence)}') do_tilt(tilt_type, i, i) if get_cmd() == 'q': return out_i = i if retry_failed: failed_tilts = [ deepcopy(x) for x in tilt_records if x['decoder_result_source'] == 'no_spikes' ] else: failed_tilts = None while failed_tilts: for tilt in failed_tilts: out_i += 1 i = tilt['i'] tilt_type = tilt['tilt_type'] retry_tilt = do_tilt(tilt_type, out_i, i, retry=tilt) if retry_tilt['decoder_result_source'] != 'no_spikes': tilt['retry_success'] = True if get_cmd() == 'q': return failed_tilts = [x for x in failed_tilts if not x.get('retry_success')] run_tilts() before_platform_close(platform) platform.close() psthclass = platform.psth if not platform.baseline_recording and not sham: # pylint: disable=import-error from sklearn.metrics import confusion_matrix print('actual events:y axis, predicted events:x axis') confusion_matrix_calc = confusion_matrix(psthclass.event_number_list,psthclass.decoder_list) print(confusion_matrix_calc) correct_trials = 0 for i in range(0,len(confusion_matrix_calc)): correct_trials = correct_trials + confusion_matrix_calc[i][i] decoder_accuracy = correct_trials / len(psthclass.event_number_list) print(('Accuracy = {} / {} = {}').format(correct_trials, len(psthclass.event_number_list), decoder_accuracy)) print('Stop Plexon Recording.') for tilt in tilt_records: for warning in tilt['warnings']: print(warning) print() if any(x['decoder_result_source'] == 'no_spikes' for x in tilt_records): num_failures = len([x['decoder_result_source'] == 'no_spikes' for x in tilt_records]) print(f"{num_failures} tilts failed due to no spikes occuring, THIS SHOULD NOT HAPPEN. TELL DR MOXON")
file=stderr) exit(1) print('Download %s' % src_url, file=stderr) try: check_call(['curl', '--proxy-anyauth', '-ksfo', cache_ent, src_url]) except OSError as err: print('could not invoke curl: %s\nis curl installed?' % err, file=stderr) exit(1) except CalledProcessError as err: print('error using curl: %s' % err, file=stderr) exit(1) if args.v: have = hash_file(sha1(), cache_ent).hexdigest() if args.v != have: print(('%s:\n' + 'expected %s\n' + 'received %s\n') % (src_url, args.v, have), file=stderr) try: remove(cache_ent) except OSError as err: if path.exists(cache_ent): print('error removing %s: %s' % (cache_ent, err), file=stderr) exit(1) exclude = [] if args.x: exclude = exclude + args.x if args.exclude_java_sources:
print(u"Matched jobs found") else: print(u"No package jobs found, aborting") sys.exit(1) if dryrun: print(u"Dry Run only, no files will be downloaded") for job in jobnames: file_paths = u.fetch_job_file_paths(job, jobnames) match = jobnames[job] for filename, url in file_paths: print(u"Downloading file {0}".format(filename)) path = u.get_final_path(job, match, filename) u.download_file(url, path) # Hash file, if it has not changed sha1 hash for this job, we unlink it so it won't be updated by the rest of the script hash = u.hash_file(path) if u.same_hash(filename, hash, conn): print(u"==> File not changed since last run, skipping") os.remove(path) continue elif not dryrun: # as long as we are not a dry run, update the hash if not u.update_hash(filename, hash, conn): print(u"==> Could not save hash, deleting file") os.remove(path) continue else: # always remove file in dryrun os.remove(path)