def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('data', 'hash_inputs'): record = (filename, hash_file(filename)) else: record = filename if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def log_output(filename, source): """Log output to the database. Called by patched functions that do some sort of output (writing to a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) db = open_or_create_db() if option_set('data', 'file_diff_outputs') and os.path.isfile(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID]) db.close()
def log_output(filename, source): """Log output to the database. Called by patched functions that do some sort of output (writing to a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ if isinstance(filename, list): for f in filename: log_output(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) version = get_version(source) db = open_or_create_db() if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \ and not is_binary(filename): tf = tempfile.NamedTemporaryFile(delete=False) shutil.copy2(filename, tf.name) add_file_diff_to_db(filename, tf.name, db) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_input(filename, source): """Log input to the database. Called by patched functions that do some sort of input (reading from a file etc) with the filename and some sort of information about the source. Note: the source parameter is currently not stored in the database. """ # Some packages, e.g., xarray, accept a list of files as input argument if isinstance(filename, list): for f in filename: log_input(f, source) return elif not isinstance(filename, six.string_types): try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('ignored metadata', 'input_hashes'): record = filename else: record = (filename, hash_file(filename)) if option_set('general', 'debug'): print("Input from %s using %s" % (record, source)) #Update object in DB version = get_version(source) db = open_or_create_db() db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID]) db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID]) db.close()
def latest_run(): form = SearchForm() annotateRunForm = AnnotateRunForm() db = utils.open_or_create_db() r = get_latest_run() if r is not None: diffs = db.table('filediffs').search(Query().run_id == r.eid) else: flash('No latest run (database is empty).', 'danger') diffs = [] r = _change_date(r) db.close() return render_template('details.html', query='', form=form, run=r, annotateRunForm=annotateRunForm, dbfile=recipyGui.config.get('tinydb'), diffs=diffs, active_page='latest_run')
def run_details(): form = SearchForm() annotateRunForm = AnnotateRunForm() query = request.args.get('query', '') run_id = int(request.args.get('id')) db = utils.open_or_create_db() r = db.get(eid=run_id) if r is not None: diffs = db.table('filediffs').search(Query().run_id == run_id) else: flash('Run not found.', 'danger') diffs = [] r = _change_date(r) db.close() return render_template('details.html', query=query, form=form, annotateRunForm=annotateRunForm, run=r, dbfile=recipyGui.config.get('tinydb'), diffs=diffs)
def index(): form = SearchForm() query = request.args.get('query', '').strip() # make sure chars like ':' and '\' are escaped properly before doing the search escaped_query = re.escape(query) if query else query db = utils.open_or_create_db() runs = search_database(db, query=escaped_query) runs = [_change_date(r) for r in runs] runs = sorted(runs, key=lambda x: x['date'], reverse=True) run_ids = [] for run in runs: if 'notes' in run.keys(): run['notes'] = str(escape(run['notes'])) run_ids.append(run.eid) db.close() return render_template('list.html', runs=runs, query=escaped_query, search_bar_query=query, form=form, run_ids=str(run_ids), dbfile=recipyGui.config.get('tinydb'))
def add_module_to_db(modulename, input_functions, output_functions, db_path=get_db_path()): db = open_or_create_db(path=db_path) patches = db.table('patches') patches.insert({'modulename': modulename, 'input_functions': input_functions, 'output_functions': output_functions}) db.close()
def output_file_diffs(): # Writing to output files is complete; we can now compute file diffs. if not option_set('data', 'file_diff_outputs'): return encodings = ['utf-8', 'latin-1'] with open_or_create_db() as db: diffs_table = db.table('filediffs') diffs = diffs_table.search(Query().run_id == RUN_ID) for item in diffs: if option_set('general', 'debug'): print('Storing file diff for "%s"' % item['filename']) lines1 = None lines2 = None for enc in encodings: try: with codecs.open(item['tempfilename'], encoding=enc) as f: lines1 = f.readlines() except UnicodeDecodeError: pass try: with codecs.open(item['filename'], encoding=enc) as f: lines2 = f.readlines() except UnicodeDecodeError: pass if lines1 is not None and lines2 is not None: diff = difflib.unified_diff(lines1, lines2, fromfile='before this run', tofile='after this run') with open_or_create_db() as db: diffs_table.update({'diff': ''.join([l for l in diff])}, eids=[item.eid]) else: msg = ('Unable to read file "{}" using supported encodings ({}). ' 'To be able to store file diffs, use one of the supported ' 'encodings to write the output file.') warnings.warn(msg.format(item['filename'], ', '.join(encodings))) # delete temporary file os.remove(item['tempfilename'])
def log_output(filename, source): filename = os.path.abspath(filename) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB db = open_or_create_db() db.update(append("outputs", filename), eids=[RUN_ID]) db.close()
def log_exit(): # Update the record with the timestamp of the script's completion. # We don't save the duration because it's harder to serialize a timedelta. if option_set('general', 'debug'): print("recipy run complete") exit_date = datetime.datetime.utcnow() db = open_or_create_db() db.update({'exit_date': exit_date}, eids=[RUN_ID]) db.close()
def log_init(): # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 if sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) else: scriptpath = os.path.realpath(sys.argv[0]) global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = {"unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow()} if not option_set('ignored metadata', 'git'): try: repo = Repo(scriptpath, search_parent_directories=True) run["gitrepo"] = repo.working_dir run["gitcommit"] = repo.head.commit.hexsha run["gitorigin"] = get_origin(repo) if not option_set('ignored metadata', 'diff'): whole_diff = '' diffs = repo.index.diff(None, create_patch=True) for diff in diffs: whole_diff += "\n\n\n" + diff.diff.decode("utf-8") run['diff'] = whole_diff except (InvalidGitRepositoryError, ValueError): # We can't store git info for some reason, so just skip it pass # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) db.close()
def runs2json(): run_ids = literal_eval(request.form['run_ids']) db = db = utils.open_or_create_db() runs = [db.get(eid=run_id) for run_id in run_ids] db.close() response = make_response(dumps(runs, indent=2, sort_keys=True)) response.headers['content-type'] = 'application/json' response.headers['Content-Disposition'] = 'attachment; filename=runs.json' return response
def patched_modules(): db = utils.open_or_create_db() modules = db.table('patches').all() db.close() form = SearchForm() return render_template('patched_modules.html', form=form, active_page='patched_modules', modules=modules, dbfile=recipyGui.config.get('tinydb'))
def annotate(): notes = request.form['notes'] run_id = int(request.form['run_id']) query = request.args.get('query', '') db = utils.open_or_create_db() db.update({'notes': notes}, eids=[run_id]) db.close() return redirect(url_for('run_details', id=run_id, query=query))
def hash_outputs(): # Writing to output files is complete; we can now compute hashes. if not option_set('data', 'hash_outputs'): return db = open_or_create_db() run = db.get(eid=RUN_ID) new_outputs = [(filename, hash_file(filename)) for filename in run.get('outputs')] db.update({'outputs': new_outputs}, eids=[RUN_ID]) db.close()
def hash_outputs(): # Writing to output files is complete; we can now compute hashes. if option_set('ignored metadata', 'output_hashes'): return db = open_or_create_db() run = db.get(eid=RUN_ID) new_outputs = [(filename, hash_file(filename)) for filename in run.get('outputs')] db.update({'outputs': new_outputs}, eids=[RUN_ID]) db.close()
def log_exception(typ, value, traceback): if option_set('general', 'debug'): print("Logging exception %s" % value) exception = {'type': typ.__name__, 'message': str(value), 'traceback': ''.join(format_tb(traceback))} # Update object in DB db = open_or_create_db() db.update({"exception": exception}, eids=[RUN_ID]) db.close() # Done logging, call default exception handler sys.__excepthook__(typ, value, traceback)
def add_module_to_db(modulename, input_functions, output_functions, db_path=get_db_path()): db = open_or_create_db(path=db_path) patches = db.table('patches') patches.insert({ 'modulename': modulename, 'input_functions': input_functions, 'output_functions': output_functions }) db.close()
def log_input(filename, source): if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('general', 'debug'): print("Input from %s using %s" % (filename, source)) #Update object in DB db = open_or_create_db() db.update(append("inputs", filename, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_output(filename, source): if type(filename) is not str: try: filename = filename.name except: pass filename = os.path.abspath(filename) if option_set('general', 'debug'): print("Output to %s using %s" % (filename, source)) #Update object in DB # data hash will be hashed at script exit, if enabled db = open_or_create_db() db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID]) db.close()
def log_exception(typ, value, traceback): if option_set('general', 'debug'): print("Logging exception %s" % value) exception = { 'type': typ.__name__, 'message': str(value), 'traceback': ''.join(format_tb(traceback)) } # Update object in DB db = open_or_create_db() db.update({"exception": exception}, eids=[RUN_ID]) db.close() # Done logging, call default exception handler sys.__excepthook__(typ, value, traceback)
def log_init(): # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 if sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = {"unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "exit_date": None, # updated at script exit "command_args": " ".join(cmd_args)} if not option_set('ignored metadata', 'git'): add_git_info(run, scriptpath) # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception
def log_warning(msg, typ, script, lineno, **kwargs): if option_set('general', 'debug'): print('Logging warning "%s"' % str(msg)) warning = { 'type': typ.__name__, 'message': str(msg), 'script': script, 'lineno': lineno } # Update object in DB db = open_or_create_db() db.update(append("warnings", warning, no_duplicates=True), eids=[RUN_ID]) db.close() # Done logging, print warning to stderr sys.stderr.write(warnings.formatwarning(msg, typ, script, lineno))
def dedupe_inputs(): """Remove inputs that are logged muliple times. Sometimes patched libraries use other patched libraries to open files. E.g., xarray internally uses netCDF4 to open netcdf files. If this happens, and recipy is configured to log file hashes, inputs are logged multiple times. Hashed inputs are stored as a list in the database, and tinydb does not automatically dedupe lists. Outputs do not need to be deduped, because file hashed are added after the run is finished, and tinydb can automatically dedupe strings. """ if option_set('ignored metadata', 'input_hashes'): return db = open_or_create_db() run = db.get(eid=RUN_ID) new_inputs = list(set([tuple(inp) for inp in run['inputs']])) db.update({'inputs': new_inputs}, eids=[RUN_ID]) db.close()
def output_file_diffs(): # Writing to output files is complete; we can now compute file diffs. if not option_set('data', 'file_diff_outputs'): return db = open_or_create_db() diffs_table = db.table('filediffs') diffs = diffs_table.search(Query().run_id == RUN_ID) for item in diffs: diff = difflib.unified_diff(open(item['tempfilename']).readlines(), open(item['filename']).readlines(), fromfile='before this run', tofile='after this run') diffs_table.update({'diff': ''.join([l for l in diff])}, eids=[item.eid]) # delete temporary file os.remove(item['tempfilename']) db.close()
def log_values(custom_values=None, **kwargs): """ Log a custom value-key pairs into the database e.g, >>> log_values(a=1, b=2) >>> log_values({'c': 3, 'd': 4}) >>> log_values({'e': 5, 'f': 6}, g=7, h=8) """ # create dictionary of custom values from arguments custom_values = {} if custom_values is None else custom_values assert isinstance(custom_values, dict), \ "custom_values must be a dict. type(custom_values) = %s" % type(custom_values) custom_values.update(kwargs) # debugging if option_set('general', 'debug'): print('Logging custom values: %s' % str(custom_values)) # Update object in DB db = open_or_create_db() db.update(add_dict("custom_values", custom_values), eids=[RUN_ID]) db.close()
def log_init(): """Do the initial logging for a new run. Works out what script has been run, creates a new unique run ID, and gets the basic metadata. This is called when running `import recipy`. """ # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 if sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = { "unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "command_args": " ".join(cmd_args), "warnings": [], "libraries": [get_version('recipy')] } if not option_set('ignored metadata', 'git'): try: repo = Repo(scriptpath, search_parent_directories=True) run["gitrepo"] = repo.working_dir run["gitcommit"] = repo.head.commit.hexsha run["gitorigin"] = get_origin(repo) if not option_set('ignored metadata', 'diff'): whole_diff = '' diffs = repo.index.diff(None, create_patch=True) for diff in diffs: whole_diff += "\n\n\n" + diff.diff.decode("utf-8") run['diff'] = whole_diff except (InvalidGitRepositoryError, ValueError): # We can't store git info for some reason, so just skip it pass # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) # check whether patched modules were imported before recipy was imported patches = db.table('patches') for p in patches.all(): if p['modulename'] in sys.modules: msg = 'not tracking inputs and outputs for {}; recipy was ' \ 'imported after this module'.format(p['modulename']) warnings.warn(msg, stacklevel=3) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception
def log_init(notebookName=None): """Do the initial logging for a new run. Works out what script has been run, creates a new unique run ID, and gets the basic metadata. This is called when running `import recipy`. """ notebookMode = get_notebook_mode() if notebookMode and notebookName is None: # Avoid first call without Notebook name return if notebookMode: scriptpath = notebookName cmd_args = sys.argv[1:] # Get the path of the script we're running # When running python -m recipy ..., during the recipy import argument 0 # is -c (for Python 2) or -m (for Python 3) and the script is argument 1 elif sys.argv[0] in ['-c', '-m']: # Has the user called python -m recipy without further arguments? if len(sys.argv) < 2: return scriptpath = os.path.realpath(sys.argv[1]) cmd_args = sys.argv[2:] else: scriptpath = os.path.realpath(sys.argv[0]) cmd_args = sys.argv[1:] global RUN_ID # Open the database db = open_or_create_db() # Create the unique ID for this run guid = str(uuid.uuid4()) # Get general metadata, environment info, etc run = { "unique_id": guid, "author": getpass.getuser(), "description": "", "inputs": [], "outputs": [], "script": scriptpath, "command": sys.executable, "environment": [platform.platform(), "python " + sys.version.split('\n')[0]], "date": datetime.datetime.utcnow(), "command_args": " ".join(cmd_args), "warnings": [], "libraries": [get_version('recipy')], "custom_values": {} } if not notebookName and not option_set('ignored metadata', 'git'): add_git_info(run, scriptpath) if not notebookName and not option_set('ignored metadata', 'svn'): add_svn_info(run, scriptpath) # Put basics into DB RUN_ID = db.insert(run) # Print message if not option_set('general', 'quiet'): print("recipy run inserted, with ID %s" % (guid)) # check whether patched modules were imported before recipy was imported patches = db.table('patches') for p in patches.all(): if p['modulename'] in sys.modules: msg = 'not tracking inputs and outputs for {}; recipy was ' \ 'imported after this module'.format(p['modulename']) warnings.warn(msg, stacklevel=3) db.close() # Register exception hook so exceptions can be logged sys.excepthook = log_exception
from docopt import docopt from jinja2 import Template from tinydb import where, Query from json import dumps import six from . import __version__ from recipyCommon import config, utils from recipyCommon.config import get_editor from recipyCommon.version_control import hash_file from colorama import init init() db = utils.open_or_create_db() template_str = """\aRun ID:\b {{ unique_id }} \aCreated by\b {{ author }} on {{ date }} UTC \aRan\b {{ script }} using {{ command }} {% if command_args|length > 0 %} Using command-line arguments: {{ command_args }} {% endif %} {% if gitcommit is defined %} \aGit:\b commit {{ gitcommit }}, in repo {{ gitrepo }}, with origin {{ gitorigin }} {% endif %} {% if svnrepo is defined %} \aSvn:\b commit {{ svncommit }}, in repo {{ svnrepo }}. {% endif %} \aEnvironment:\b {{ environment|join(", ") }} {% if libraries is defined %}
def log_update(field, filename, source): filename = os.path.abspath(filename) print("Adding %s to %s using $s" % (field, filename, source)) db = open_or_create_db() db.update(append(field, filename), eids=[RUN_ID]) db.close()
def setUp(self): self.db = utils.open_or_create_db()
import os import re import sys from docopt import docopt from pprint import pprint from jinja2 import Template from tinydb import TinyDB, where from dateutil.parser import parse import six from . import __version__ from recipyCommon import config, utils db = utils.open_or_create_db() def print_result(r): # Print a single result from the search template = """Run ID: {{ unique_id }} Created by {{ author }} on {{ date }} Ran {{ script }} using {{ command }} {% if gitcommit is defined %} Git: commit {{ gitcommit }}, in repo {{ gitrepo }}, with origin {{ gitorigin }} {% endif %} Environment: {{ environment|join(", ") }} {% if inputs|length == 0 %} Inputs: none {% else %} Inputs: