def _fix_id(src_id, dest_id, identifier, root, repo, new_aliquot=None): sp = analysis_path(src_id, repo, root=root) dp = analysis_path(dest_id, repo, root=root, mode='w') print(sp, dp) if not os.path.isfile(sp): print('not a file', sp) return jd = dvc_load(sp) jd['identifier'] = identifier if new_aliquot: jd['aliquot'] = new_aliquot dvc_dump(jd, dp) print('{}>>{}'.format(sp, dp)) for modifier in ('baselines', 'blanks', 'extraction', 'intercepts', 'icfactors', 'peakcenter', '.data'): sp = analysis_path(src_id, repo, modifier=modifier, root=root) dp = analysis_path(dest_id, repo, modifier=modifier, root=root, mode='w') print('{}>>{}'.format(sp, dp)) if sp and os.path.isfile(sp): # shutil.copy(sp, dp) shutil.move(sp, dp)
def fix_timestamp(): dvc = get_dvc() dvc.connect() dvc.create_session() runlist = get_runlist() for run, aliquot in runlist: an = dvc.get_analysis_runid('bu-FC-J', aliquot) print('ff', an, an.repository_identifier) p = analysis_path(run, 'Streck2015') ip = analysis_path(run, 'Irradiation-NM-276') print(p, os.path.isfile(p), os.path.isfile(ip))
def fix_a_steps(dest, repo_identifier, root): with dest.session_ctx(): repo = dest.get_repository(repo_identifier) ans = [ (ra.analysis.irradiation_position.identifier, ra.analysis.aliquot, ra.analysis.increment, ra.analysis.record_id, ra.analysis.id) for ra in repo.repository_associations ] key = lambda x: x[0] ans = sorted(ans, key=key) for identifier, ais in groupby(ans, key=key): try: int(identifier) except ValueError: continue # groupby aliquot key = lambda xi: xi[1] for aliquot, ais in groupby(ais, key=key): ais = sorted(ais, key=lambda ai: ai[2]) print(identifier, aliquot, ais) # if the first increment for a given aliquot is 1 # and the increment for the first analysis of the aliquot is None if len(ais) == 1: continue if ais[0][2] is None and ais[1][2] == 1: an = dest.get_analysis(ais[0][4]) print('fix', ais[0], an, an.record_id) original_record_id = str(an.record_id) path = analysis_path(an.record_id, repo_identifier) obj = dvc_load(path) obj['increment'] = 0 an.increment = 0 npath = analysis_path(an.record_id, repo_identifier) dvc_dump(obj, npath) os.remove(path) for modifier in ('baselines', 'blanks', 'extraction', 'intercepts', 'icfactors', 'peakcenter', '.data'): npath = analysis_path(an.record_id, repo_identifier, modifier=modifier) opath = analysis_path(original_record_id, repo_identifier, modifier=modifier) # print opath, npath os.rename(opath, npath)
def __init__(self, uuid, record_id, repository_identifier, *args, **kw): super(DVCAnalysis, self).__init__(*args, **kw) self.record_id = record_id path = analysis_path((uuid, record_id), repository_identifier) self.repository_identifier = repository_identifier self.rundate = datetime.datetime.now() root = os.path.dirname(path) bname = os.path.basename(path) head, ext = os.path.splitext(bname) ep = os.path.join(root, 'extraction', '{}.extr{}'.format(head, ext)) if os.path.isfile(ep): jd = dvc_load(ep) self.load_extraction(jd) else: self.warning('Invalid analysis. RunID="{}". No extraction file {}'.format(record_id, ep)) if os.path.isfile(path): jd = dvc_load(path) self.load_spectrometer_parameters(jd.get('spec_sha')) self.load_environmentals(jd.get('environmental')) self.load_meta(jd) else: self.warning('Invalid analysis. RunID="{}". No meta file {}'.format(record_id, path)) self.load_paths()
def _do_diff_fired(self): if self.selected_commits: lhs = self.selected_lhs rhs = self.selected_rhs lhsid = lhs.hexsha[:8] lhsdate = isoformat_date(lhs.date) rhsid = rhs.hexsha[:8] rhsdate = rhs.date.isoformat() diffs = [] for a in ('blanks', 'icfactors', 'intercepts'): p = analysis_path((self.uuid, self.record_id), self.repository_identifier, modifier=a) dd = get_diff(self.repo, lhs.hexsha, rhs.hexsha, p) if dd: diffs.append((a, dd)) if diffs: v = DiffView(self.record_id, lhsid, rhsid, lhsdate, rhsdate) for a, (aa, bb) in diffs: func = getattr(v, 'set_{}'.format(a)) a = aa.data_stream.read().decode('utf-8') b = bb.data_stream.read().decode('utf-8') func(json.loads(a), json.loads(b)) v.finish() open_view(v) else: information(None, 'No Differences between {} and {}'.format(lhsid, rhsid))
def get_review_status(record): ms = 0 ritems = [] root = repository_path(record.repository_identifier) if os.path.isdir(root): repo = Repo(root) for m, func in (('blanks', is_blank_reviewed), ('intercepts', is_intercepts_reviewed), ('icfactors', is_icfactors_reviewed)): p = analysis_path(record, record.repository_identifier, modifier=m) if os.path.isfile(p): with open(p, 'r') as rfile: obj = json.load(rfile) date = repo.git.log('-1', '--format=%cd', p) items = func(obj, date) if items: if reviewed(items): ms += 1 ritems.extend(items) # setattr(record, '{}_review_status'.format(m), (reviewed, date)) record.review_items = ritems ret = 'Intermediate' # intermediate if not ms: ret = 'Default' # default elif ms == 3: ret = 'All' # all record.review_status = ret
def _do_diff_fired(self): if self.selected_commits: lhs = self.selected_lhs rhs = self.selected_rhs lhsid = lhs.hexsha[:8] lhsdate = isoformat_date(lhs.date) rhsid = rhs.hexsha[:8] rhsdate = rhs.date.isoformat() diffs = [] for a in ('blanks', 'icfactors', 'intercepts'): p = analysis_path((self.uuid, self.record_id), self.repository_identifier, modifier=a) dd = get_diff(self.repo, lhs.hexsha, rhs.hexsha, p) if dd: diffs.append((a, dd)) if diffs: v = DiffView(self.record_id, lhsid, rhsid, lhsdate, rhsdate) for a, (aa, bb) in diffs: func = getattr(v, 'set_{}'.format(a)) a = aa.data_stream.read().decode('utf-8') b = bb.data_stream.read().decode('utf-8') func(json.loads(a), json.loads(b)) v.finish() open_view(v) else: information( None, 'No Differences between {} and {}'.format(lhsid, rhsid))
def set_spectrometer_file(dban, root): meas = dban.measurement gain_history = dban.gain_history gains = {} if gain_history: gains = { d.detector.name: d.value for d in gain_history.gains if d.value is not None } # deflections deflections = { d.detector.name: d.deflection for d in meas.deflections if d.deflection is not None } # source src = { k: getattr(meas.spectrometer_parameters, k) for k in QTEGRA_SOURCE_KEYS } obj = dict(spectrometer=src, gains=gains, deflections=deflections) # hexsha = self.dvc.get_meta_head() # obj['commit'] = str(hexsha) spec_sha = spectrometer_sha(src, gains, deflections) path = os.path.join(root, '{}.json'.format(spec_sha)) dvc_dump(obj, path) # update analysis's spec_sha path = analysis_path(dban.record_id, os.path.basename(root)) obj = dvc_load(path) obj['spec_sha'] = spec_sha dvc_dump(obj, path)
def set_spectrometer_file(dban, root): meas = dban.measurement gain_history = dban.gain_history gains = {} if gain_history: gains = {d.detector.name: d.value for d in gain_history.gains if d.value is not None} # deflections deflections = {d.detector.name: d.deflection for d in meas.deflections if d.deflection is not None} # source src = {k: getattr(meas.spectrometer_parameters, k) for k in QTEGRA_SOURCE_KEYS} obj = dict(spectrometer=src, gains=gains, deflections=deflections) # hexsha = self.dvc.get_meta_head() # obj['commit'] = str(hexsha) spec_sha = spectrometer_sha(src, gains, deflections) path = os.path.join(root, '{}.json'.format(spec_sha)) dvc_dump(obj, path) # update analysis's spec_sha path = analysis_path(dban.record_id, os.path.basename(root)) obj = dvc_load(path) obj['spec_sha'] = spec_sha dvc_dump(obj, path)
def dump(self): obj = {'name': self.name, 'note': self.note, 'subgroup': self.subgroup} if not self.path: self.path = analysis_path(self.uuid, self.repository_identifier, modifier='tags', mode='w') dvc_dump(obj, self.path)
def _bulk_runid(self, ai, aliquot, step): if not aliquot: aliquot = ai.aliquot if not step: step = ai.step self.dvc.db.modify_aliquot_step(ai.uuid, aliquot, alpha_to_int(step)) def modify_meta(p): jd = dvc_load(p) jd['aliquot'] = aliquot jd['increment'] = alpha_to_int(step) dvc_dump(jd, p) ps = [] repo_id = ai.repository_identifier sp = analysis_path(('', ai.record_id), repo_id) if sp: modify_meta(sp) ps.append(sp) # using runid path name new_runid = make_runid(ai.identifier, aliquot, step) for m in NPATH_MODIFIERS: sp = analysis_path(('', ai.record_id), repo_id, modifier=m) dp = analysis_path(('', new_runid), repo_id, modifier=m, mode='w') if sp and os.path.isfile(sp): if os.path.isfile(dp) and m != 'extraction': continue ps.append(sp) ps.append(dp) shutil.move(sp, dp) else: # using uuid path name # only need to modify metadata file sp = analysis_path(ai, repo_id) modify_meta(sp, aliquot, step) ps.append(sp) return repo_id, ps
def fix_a_steps(dest, repo_identifier, root): with dest.session_ctx(): repo = dest.get_repository(repo_identifier) ans = [(ra.analysis.irradiation_position.identifier, ra.analysis.aliquot, ra.analysis.increment, ra.analysis.record_id, ra.analysis.id) for ra in repo.repository_associations] key = lambda x: x[0] ans = sorted(ans, key=key) for identifier, ais in groupby(ans, key=key): try: int(identifier) except ValueError: continue # groupby aliquot key = lambda xi: xi[1] for aliquot, ais in groupby(ais, key=key): ais = sorted(ais, key=lambda ai: ai[2]) print(identifier, aliquot, ais) # if the first increment for a given aliquot is 1 # and the increment for the first analysis of the aliquot is None if len(ais) == 1: continue if ais[0][2] is None and ais[1][2] == 1: an = dest.get_analysis(ais[0][4]) print('fix', ais[0], an, an.record_id) original_record_id = str(an.record_id) path = analysis_path(an.record_id, repo_identifier) obj = dvc_load(path) obj['increment'] = 0 an.increment = 0 npath = analysis_path(an.record_id, repo_identifier) dvc_dump(obj, npath) os.remove(path) for modifier in ('baselines', 'blanks', 'extraction', 'intercepts', 'icfactors', 'peakcenter', '.data'): npath = analysis_path(an.record_id, repo_identifier, modifier=modifier) opath = analysis_path(original_record_id, repo_identifier, modifier=modifier) # print opath, npath os.rename(opath, npath)
def fix_meta(dest, repo_identifier, root): d = os.path.join(root, repo_identifier) changed = False with dest.session_ctx(): repo = dest.get_repository(repo_identifier) for ra in repo.repository_associations: an = ra.analysis p = analysis_path(an.record_id, repo_identifier) obj = dvc_load(p) if not obj: print('********************** {} not found in repo'.format( an.record_id)) continue print(an.record_id, p) if not obj['irradiation']: obj['irradiation'] = an.irradiation lchanged = True changed = True if not obj['irradiation_position']: obj['irradiation_position'] = an.irradiation_position_position lchanged = True changed = True if not obj['irradiation_level']: obj['irradiation_level'] = an.irradiation_level lchanged = True changed = True if not obj['material']: obj['material'] = an.irradiation_position.sample.material.name lchanged = True changed = True if not obj['project']: obj['project'] = an.irradiation_position.sample.project.name lchanged = True changed = True if obj['repository_identifier'] != an.repository_identifier: obj['repository_identifier'] = an.repository_identifier lchanged = True changed = True if lchanged: print('{} changed'.format(an.record_id)) dvc_dump(obj, p) if changed: from pychron.git_archive.repo_manager import GitRepoManager rm = GitRepoManager() rm.open_repo(d) repo = rm._repo repo.git.add('.') repo.git.commit('-m', '<MANUAL> fixed metadata') repo.git.push()
def _make_path(self, modifier=None, extension='.json'): runid = self.per_spec.run_spec.runid uuid = self.per_spec.run_spec.uuid repository_identifier = self.per_spec.run_spec.repository_identifier if self.use_uuid_path_name: name = uuid, runid else: name = runid, runid return analysis_path(name, repository_identifier, modifier, extension, mode='w')
def fix_meta(dest, repo_identifier, root): d = os.path.join(root, repo_identifier) changed = False with dest.session_ctx(): repo = dest.get_repository(repo_identifier) for ra in repo.repository_associations: an = ra.analysis p = analysis_path(an.record_id, repo_identifier) obj = dvc_load(p) if not obj: print('********************** {} not found in repo'.format(an.record_id)) continue print(an.record_id, p) if not obj['irradiation']: obj['irradiation'] = an.irradiation lchanged = True changed = True if not obj['irradiation_position']: obj['irradiation_position'] = an.irradiation_position_position lchanged = True changed = True if not obj['irradiation_level']: obj['irradiation_level'] = an.irradiation_level lchanged = True changed = True if not obj['material']: obj['material'] = an.irradiation_position.sample.material.name lchanged = True changed = True if not obj['project']: obj['project'] = an.irradiation_position.sample.project.name lchanged = True changed = True if obj['repository_identifier'] != an.repository_identifier: obj['repository_identifier'] = an.repository_identifier lchanged = True changed = True if lchanged: print('{} changed'.format(an.record_id)) dvc_dump(obj, p) if changed: from pychron.git_archive.repo_manager import GitRepoManager rm = GitRepoManager() rm.open_repo(d) repo = rm._repo repo.git.add('.') repo.git.commit('-m', '<MANUAL> fixed metadata') repo.git.push()
def fix_iso_list(runid, repository, root): path = analysis_path(runid, repository, root=root) # print('asdf', path) obj = dvc_load(path) isotopes = obj['isotopes'] try: v = isotopes.pop('PHHCbs') v['name'] = 'Ar39' isotopes['Ar39'] = v obj['isotopes'] = isotopes dvc_dump(obj, path) except KeyError: return
def fix_run(runid, repository, root, modifier): path = analysis_path(runid, repository, root=root, modifier=modifier) # print('asdf', path) obj = dvc_load(path) # print('ff', obj) try: v = obj.pop('PHHCbs') obj['Ar39'] = v dvc_dump(obj, path) msg = 'fixed' except KeyError: msg = 'skipped' print(runid, msg)
def _map_paths(self, repo, src, dst): root = os.path.join(paths.repository_dataset_dir, repo) ps = [] def debug(msg, a, b): self.debug('{:<20s} {:<35s} >> {}'.format(msg, os.path.relpath(a, root), os.path.relpath(b, root))) sp = analysis_path(src, repo) dp = analysis_path(dst, repo, mode='w') ps.append(sp) ps.append(dp) if not os.path.isfile(sp): self.warning('not a file {}'.format(sp)) return dl, da, ds = strip_runid(dst) jd = dvc_load(sp) jd['identifier'] = dl jd['aliquot'] = da jd['increment'] = alpha_to_int(ds) dvc_dump(jd, dp) debug('----------', sp, dp) for modifier in ('baselines', 'blanks', 'extraction', 'intercepts', 'icfactors', 'peakcenter', '.data'): sp = analysis_path(src, repo, modifier=modifier) dp = analysis_path(dst, repo, modifier=modifier, mode='w') if sp and os.path.isfile(sp): debug('{:<20s}'.format(modifier), sp, dp) ps.append(sp) ps.append(dp) shutil.move(sp, dp) return ps
def from_analysis(cls, an, **kw): tag = cls() tag.name = an.tag tag.note = an.tag_note tag.record_id = an.record_id tag.uuid = an.uuid tag.repository_identifier = an.repository_identifier # tag.path = analysis_path(an.record_id, an.repository_identifier, modifier='tags') tag.path = analysis_path(an, an.repository_identifier, modifier='tags') tag.subgroup = an.subgroup for k, v in kw.items(): setattr(tag, k, v) return tag
def get_review_status(record): ms = 0 for m in ('blanks', 'intercepts', 'icfactors'): p = analysis_path(record.record_id, record.repository_identifier, modifier=m) date = '' with open(p, 'r') as rfile: obj = json.load(rfile) reviewed = obj.get('reviewed', False) if reviewed: dt = datetime.fromtimestamp(os.path.getmtime(p)) date = dt.strftime('%m/%d/%Y') ms += 1 setattr(record, '{}_review_status'.format(m), (reviewed, date)) ret = 'Intermediate' # intermediate if not ms: ret = 'Default' # default elif ms == 3: ret = 'All' # all record.review_status = ret
def _analysis_path(self, repository_identifier=None, **kw): if repository_identifier is None: repository_identifier = self.repository_identifier return analysis_path((self.uuid, self.record_id), repository_identifier, **kw)