Exemple #1
0
    def put(self, input_file):
        sha = util.hash_file(input_file)
        target_path = util.pjoin(self.url_components.path or "/",
                                 "db", sha[0:3], sha, "contents")
        self.bucket.upload_file(input_file, target_path.lstrip("/"))

        return sha
Exemple #2
0
    def put(self, input_file):
        sha = util.hash_file(input_file)
        target_path = os.path.join(self.repo_base, "db", sha[0:3], sha)
        fs.mkdir_p(target_path)
        shutil.copy(input_file, os.path.join(target_path, "contents"))

        return sha
Exemple #3
0
 def store(src_fn):
     name = prefix_path(args.prefix, src_fn)
     digest2 = repo.get_name_digest(name)
     if digest2 is not None and not args.overwrite:
         log('file with same name exists, skipping %s' % src_fn)
         return
     if not args.overwrite:
         size = os.stat(src_fn).st_size
         digest = repo.find_name_size(name, size)
         if digest:
             log('file with same name and size exists, skipping %s' % src_fn)
             return
     if not args.hash:
         digest = util.get_xattr_hash(src_fn)
     else:
         digest = None
     if digest is None:
         debug('computing digest', src_fn)
         digest, tmp = util.hash_file(src_fn)
         assert len(digest) == util.SHA256_LEN, repr(digest)
     if repo.data_exists(digest):
         key_abs = repo.data(digest)
         if not os.path.samefile(src_fn, key_abs):
             print('link from repo %r' % src_fn)
             if not OPTIONS.dryrun:
                 util.link_over(key_abs, src_fn)
         else:
             print('skip existing %r' % src_fn)
     else:
         print('import', src_fn)
         repo.link_in(src_fn, digest)
         repo.add_data(digest, src_fn)
     # save filename in meta data
     repo.add_name(digest, name, overwrite=args.overwrite)
 def add(self, path):
     digest = util.hash_file(path)
     log.info('digest: {} <- {}', digest, path)
     blob_id = self.db.blobs.get(digest)
     if (blob_id is None) or (blob_id not in self.blob_ids):
         blob_id = self._send(path, digest)
     log.info('blob id {} <- {}', blob_id, path)
     self.db.insert_file(file_path=path, file_id=digest)
 def _send(self, path, digest, blob_id=None):
     temp_path = self._encrypt(path)
     if blob_id is None:
         blob_id = util.hash_file(temp_path)
     log.info('encrypt {} <- {}', blob_id, path)
     self.store.put(blob_id=blob_id, path=temp_path)
     if digest is not None:
         self.db.insert_blob(file_id=digest, blob_id=blob_id)
     self.blob_ids.add(blob_id)
     return blob_id
Exemple #6
0
 def put(self, blob_id, path):
     expected_sha1 = util.hash_file(path, hasher=util.sha1)
     listener = TqdmProgressListener(blob_id)
     self.bucket.upload_local_file(local_file=path,
                                   file_name=blob_id,
                                   sha1_sum=expected_sha1,
                                   progress_listener=listener)
     remote_sha1 = self.get_hash(blob_id)
     if remote_sha1 and remote_sha1 != expected_sha1:
         raise ValueError('Bad SHA1', remote_sha1)
     os.remove(path)
Exemple #7
0
 def _copy_tmp(self, src_fn):
     # copy external file to temporary file inside repo, store hash
     if not OPTIONS.dryrun:
         ensure_dir(self.tmp_dir)
         tmp = tempfile.NamedTemporaryFile(dir=self.tmp_dir)
     else:
         tmp = None
     digest, tmp = util.hash_file(src_fn, tmp=tmp)
     if not OPTIONS.dryrun:
         mtime = os.stat(src_fn).st_mtime
         util.set_xattr_hash(tmp.name, digest, mtime)
     return digest, tmp
Exemple #8
0
def do_scrub(args):
    repo = _open_repo(args)
    problems = 0
    err_file = os.path.join(repo.root, 'scrub_errors.txt')
    t = time.time()
    with util.open_text(err_file, 'a', buffering=1) as err_fp:
        def err(*args):
            print(*args, file=err_fp)
        err('scrub started %s' % datetime.datetime.now())
        if args.cont:
            err('continue from %s' % args.cont)
        for digest in repo.list_files():
            if args.cont:
                if digest == args.cont:
                    args.cont = False
                else:
                    continue # skip
            if time.time() - t > 20:
                # print progress
                print('scrub', digest)
                t = time.time()
            fn = repo.data(digest)
            if not os.path.exists(fn):
                err('missing data', digest)
                continue
            meta = repo.get_meta(digest)
            st = os.stat(fn)
            if st.st_size != int(meta['size']):
                err('size mismatch', digest, meta['size'], st.st_size)
            mtime_differs = abs(st.st_mtime - float(meta['mtime'])) > 5
            if mtime_differs:
                err('mtime differs', digest, meta['mtime'])
            if args.fast:
                continue # no check of hashes
            if args.size and st.st_size > args.size:
                continue # skip large file
            if args.modified and not mtime_differs:
                continue # skip, modified time same
            # do the expensive hash
            digest2, tmp = util.hash_file(fn)
            log(digest, digest2)
            if digest != digest2:
                err('checksum mismatch', digest)
                problems += 1
            else:
                digest2 = util.get_xattr_hash(fn)
                if digest2 != digest:
                    err('update xattr', digest)
                    util.set_xattr_hash(fn, digest)
                    problems += 1
    if problems:
        print('problems were found (%s), see scrub_errors.txt' % problems)
Exemple #9
0
    def verified_get(self, sha, output_file):
        self.get(sha, output_file)

        if stat.S_ISREG(os.stat(output_file).st_mode):
            actual_sha = util.hash_file(output_file)

            if actual_sha != sha:
                raise Exception("Hash of downloaded file does not match" +
                                " expected sha: " + actual_sha + " vs " + sha)
        else:
            sys.stderr.write("Skipping verification because output is " +
                             "not a regular file")
            sys.stderr.flush()
 def before_platform_close(platform):
     # if the platform is already closed the psth will have written
     # its output and it's too late to add the file hash
     if platform.closed:
         return
     if record_stop_event is not None:
         # stop the recording process so we can calculate the hash of the recorded file
         print("waiting for recording to stop (platform)")
         record_stop_event.stop_recording()
         
         fpath = Path(args.loadcell_out)
         grf_file_hash = hash_file(fpath)
         
         platform.psth.output_extra['output_files'][fpath.name] = grf_file_hash
Exemple #11
0
def try_load_lin(args: argparse.Namespace, file_idx: int, filename: str) \
        -> Optional[List[str]]:
    lin_path = Path2(filename + ".lin")
    if args.verbose:
        eprint("Attempting to load cached linearized version from {}"
               .format(lin_path))
    if not lin_path.exists():
        return None
    try:
        ignore_lin_hash = args.ignore_lin_hash
    except AttributeError:
        ignore_lin_hash = False

    with lin_path.open(mode='r') as f:
        first_line = f.readline().strip()
        if ignore_lin_hash or hash_file(filename) == first_line:
            return serapi_instance.read_commands(f.read())
        else:
            return None
Exemple #12
0
    print('error creating directory %s: %s' %
          (path.dirname(cache_ent), err), file=stderr)
    exit(1)

  print('Download %s' % src_url, file=stderr)
  try:
    check_call(['curl', '--proxy-anyauth', '-ksfo', cache_ent, src_url])
  except OSError as err:
    print('could not invoke curl: %s\nis curl installed?' % err, file=stderr)
    exit(1)
  except CalledProcessError as err:
    print('error using curl: %s' % err, file=stderr)
    exit(1)

if args.v:
  have = hash_file(sha1(), cache_ent).hexdigest()
  if args.v != have:
    print((
      '%s:\n' +
      'expected %s\n' +
      'received %s\n') % (src_url, args.v, have), file=stderr)
    try:
      remove(cache_ent)
    except OSError as err:
      if path.exists(cache_ent):
        print('error removing %s: %s' % (cache_ent, err), file=stderr)
    exit(1)

exclude = []
if args.x:
  exclude += args.x
Exemple #13
0
def save_lin(commands: List[str], filename: str) -> None:
    output_file = filename + '.lin'
    with open(output_file, 'w') as f:
        print(hash_file(filename), file=f)
        for command in commands:
            print(command, file=f)
Exemple #14
0
 def _add_blob_by_path(self, path):
     hash = util.hash_file(path)
     self._add_blob(hash)
     self._db.set(self._pathkey(path), hash)
     self._db.sadd(self._blobkey(hash), path)
def run_psth_loop(platform: PsthTiltPlatform, tilt_sequence, *,
    sham: bool, retry_failed: bool,
    output_extra: Dict[str, Any],
    before_platform_close: Callable[[PsthTiltPlatform], None],
):
    
    input_file_list = []
    if platform.template_in_path is not None:
        input_file_list.append(Path(platform.template_in_path))
    
    input_files = {}
    for fpath in input_file_list:
        input_files[fpath.name] = hash_file(fpath)
    
    # psth = platform.psth
    if sham:
        with open(platform.template_in_path) as f:
            template_in = json.load(f)
        tilt_sequence = []
        sham_decodes = []
        sham_delays = []
        
        for tilt in template_in['tilts']:
            tilt_sequence.append(tilt['tilt_type'])
            p_tilt_type = tilt['predicted_tilt_type']
            if p_tilt_type is None:
                p_tilt_type = tilt['tilt_type']
            sham_decodes.append(p_tilt_type)
            sham_delays.append(tilt['delay'])
    
    input_queue: 'Queue[str]' = Queue(1)
    def input_thread():
        while True:
            cmd = input(">")
            input_queue.put(cmd)
            input_queue.put("")
            input_queue.join()
    
    spawn_thread(input_thread)
    
    tilt_records: List[Dict[str, Any]] = []
    # tilts will be added to below before being written to json
    platform.psth.output_extra['tilts'] = tilt_records
    platform.psth.output_extra['baseline'] = platform.baseline_recording
    platform.psth.output_extra['input_files'] = input_files
    platform.psth.output_extra['output_files'] = {}
    platform.psth.output_extra.update(output_extra)
    
    def get_cmd():
        try:
            _cmd: str = input_queue.get_nowait()
        except QEmpty:
            pass
        else:
            input_queue.task_done()
            print("Press enter to resume; q, enter to stop")
            cmd = input("paused>")
            # if cmd == 'q':
            #     break
            input_queue.get()
            input_queue.task_done()
            return cmd
    
    def do_tilt(tilt_type, i, sham_i, retry=None):
        check_recording()
        if sham:
            sham_result = tilt_type == sham_decodes[sham_i]
        else:
            sham_result = None
        
        if retry is not None:
            delay = retry['delay']
        elif sham:
            delay = sham_delays[sham_i]
        else:
            delay = None
        
        try:
            tilt_rec = platform.tilt(tilt_type, sham_result=sham_result, delay=delay)
        except SpikeWaitTimeout as e:
            tilt_rec = e.tilt_rec
            tilt_rec['spike_wait_timeout'] = True
            tilt_records.append(tilt_rec)
            raise
        tilt_rec['i'] = i
        tilt_rec['retry'] = retry
        # pprint(tilt_rec, sort_dicts=False)
        # pprint(tilt_rec)
        tilt_records.append(tilt_rec)
        # put data into psth class often so data will get saved in the case of a crash
        platform.psth.output_extra['tilts'] = tilt_records
        return tilt_rec
    
    def run_tilts():
        for i, tilt_type in enumerate(tilt_sequence):
            print(f'tilt {i}/{len(tilt_sequence)}')
            do_tilt(tilt_type, i, i)
            
            if get_cmd() == 'q':
                return
        
        out_i = i
        
        if retry_failed:
            failed_tilts = [
                deepcopy(x)
                for x in tilt_records
                if x['decoder_result_source'] == 'no_spikes'
            ]
        else:
            failed_tilts = None
        
        while failed_tilts:
            for tilt in failed_tilts:
                out_i += 1
                
                i = tilt['i']
                tilt_type = tilt['tilt_type']
                
                retry_tilt = do_tilt(tilt_type, out_i, i, retry=tilt)
                
                if retry_tilt['decoder_result_source'] != 'no_spikes':
                    tilt['retry_success'] = True
                
                if get_cmd() == 'q':
                    return
            
            failed_tilts = [x for x in failed_tilts if not x.get('retry_success')]
    
    run_tilts()
    
    before_platform_close(platform)
    
    platform.close()
    psthclass = platform.psth
    
    if not platform.baseline_recording and not sham:
        # pylint: disable=import-error
        from sklearn.metrics import confusion_matrix
        
        print('actual events:y axis, predicted events:x axis')
        confusion_matrix_calc = confusion_matrix(psthclass.event_number_list,psthclass.decoder_list)
        print(confusion_matrix_calc)
        correct_trials = 0
        for i in range(0,len(confusion_matrix_calc)):
            correct_trials = correct_trials + confusion_matrix_calc[i][i]
        decoder_accuracy = correct_trials / len(psthclass.event_number_list)
        print(('Accuracy = {} / {} = {}').format(correct_trials, len(psthclass.event_number_list), decoder_accuracy))
        print('Stop Plexon Recording.')
    
    for tilt in tilt_records:
        for warning in tilt['warnings']:
            print(warning)
    print()
    if any(x['decoder_result_source'] == 'no_spikes' for x in tilt_records):
        num_failures = len([x['decoder_result_source'] == 'no_spikes' for x in tilt_records])
        print(f"{num_failures} tilts failed due to no spikes occuring, THIS SHOULD NOT HAPPEN. TELL DR MOXON")
Exemple #16
0
              file=stderr)
        exit(1)

    print('Download %s' % src_url, file=stderr)
    try:
        check_call(['curl', '--proxy-anyauth', '-ksfo', cache_ent, src_url])
    except OSError as err:
        print('could not invoke curl: %s\nis curl installed?' % err,
              file=stderr)
        exit(1)
    except CalledProcessError as err:
        print('error using curl: %s' % err, file=stderr)
        exit(1)

if args.v:
    have = hash_file(sha1(), cache_ent).hexdigest()
    if args.v != have:
        print(('%s:\n' + 'expected %s\n' + 'received %s\n') %
              (src_url, args.v, have),
              file=stderr)
        try:
            remove(cache_ent)
        except OSError as err:
            if path.exists(cache_ent):
                print('error removing %s: %s' % (cache_ent, err), file=stderr)
        exit(1)

exclude = []
if args.x:
    exclude = exclude + args.x
if args.exclude_java_sources:
    print(u"Matched jobs found")

else:
    print(u"No package jobs found, aborting")
    sys.exit(1)

if dryrun:
    print(u"Dry Run only, no files will be downloaded")

for job in jobnames:
    file_paths = u.fetch_job_file_paths(job, jobnames)
    match = jobnames[job]
    for filename, url in file_paths:
        print(u"Downloading file {0}".format(filename))
        path = u.get_final_path(job, match, filename)
        u.download_file(url, path)
        # Hash file, if it has not changed sha1 hash for this job, we unlink it so it won't be updated by the rest of the script
        hash = u.hash_file(path)
        if u.same_hash(filename, hash, conn):
            print(u"==> File not changed since last run, skipping")
            os.remove(path)
            continue
        elif not dryrun:  # as long as we are not a dry run, update the hash
            if not u.update_hash(filename, hash, conn):
                print(u"==> Could not save hash, deleting file")
                os.remove(path)
                continue
        else:
            # always remove file in dryrun
            os.remove(path)