def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ # Some packages, e.g., xarray, accept a list of files as input argument if isinstance(filename, list): for f in filename: log_input(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('ignored metadata', 'input_hashes'): record = filename else: record = (filename, hash_file(filename)) if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB version = get_version(source) db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('data', 'hash_inputs'): record = (filename, hash_file(filename)) else: record = filename if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def hash_outputs(): # Writing to output files is complete; we can now compute hashes. if option_set('ignored metadata', 'output_hashes'): return db = open_or_create_db() run = db.get(eid=RUN_ID) new_outputs = [(filename, hash_file(filename)) for filename in run.get('outputs')] db.update({'outputs': new_outputs}, eids=[RUN_ID]) db.close()
def hash_outputs(): # Writing to output files is complete; we can now compute hashes. if not option_set('data', 'hash_outputs'): return db = open_or_create_db() run = db.get(eid=RUN_ID) new_outputs = [(filename, hash_file(filename)) for filename in run.get('outputs')] db.update({'outputs': new_outputs}, eids=[RUN_ID]) db.close()
def search_hash(args): try: hash_value = hash_file(args['<outputfile>']) except Exception: # Probably an invalid filename/path so assume it is a raw hash value instead hash_value = args['<outputfile>'] Run = Query() # Search both outputs AND inputs # TODO: Add a command-line argument to force searching of just one # of inputs or outputs results = db.search(Run.outputs.test(find_by_hash, hash_value)) results += db.search(Run.inputs.test(find_by_hash, hash_value)) results = sorted(results, key=lambda x: x['date']) if args['--json']: if len(results) == 0: print('[]') return if args['--all']: res_to_output = results else: res_to_output = results[-1] output = dumps(res_to_output, indent=2, sort_keys=True, default=utils.json_serializer) print(output) else: if len(results) == 0: print('No results found') else: if args['--all']: for r in results[:-1]: print(template_result(r)) print("-" * 40) print(template_result(results[-1])) else: print(template_result(results[-1])) if len(results) > 1: print("** Previous runs have been " "found. Run with --all to show. **") if args['--diff']: if 'diff' in results[-1]: print("\n\n") print(results[-1]['diff']) db.close()