Example #1
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    # Some packages, e.g., xarray, accept a list of files as input argument
    if isinstance(filename, list):
        for f in filename:
            log_input(f, source)
        return
    elif not isinstance(filename, six.string_types):
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('ignored metadata', 'input_hashes'):
        record = filename
    else:
        record = (filename, hash_file(filename))

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    version = get_version(source)
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #2
0
def log_output(filename, source):
    """Log output to the database.

    Called by patched functions that do some sort of output (writing to a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)

    db = open_or_create_db()

    if option_set('data', 'file_diff_outputs') and os.path.isfile(filename):
        tf = tempfile.NamedTemporaryFile(delete=False)
        shutil.copy2(filename, tf.name)
        add_file_diff_to_db(filename, tf.name, db)

    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    # data hash will be hashed at script exit, if enabled
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #3
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('data', 'hash_inputs'):
        record = (filename, hash_file(filename))
    else:
        record = filename

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #4
0
def log_output(filename, source):
    """Log output to the database.

    Called by patched functions that do some sort of output (writing to a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if isinstance(filename, list):
        for f in filename:
            log_output(f, source)
        return
    elif not isinstance(filename, six.string_types):
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)

    version = get_version(source)
    db = open_or_create_db()

    if option_set('data', 'file_diff_outputs') and os.path.isfile(filename) \
       and not is_binary(filename):
        tf = tempfile.NamedTemporaryFile(delete=False)
        shutil.copy2(filename, tf.name)
        add_file_diff_to_db(filename, tf.name, db)

    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    # data hash will be hashed at script exit, if enabled
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", version, no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #5
0
def log_input(filename, source):
    """Log input to the database.

    Called by patched functions that do some sort of input (reading from a file
    etc) with the filename and some sort of information about the source.

    Note: the source parameter is currently not stored in the database.
    """
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('data', 'hash_inputs'):
        record = (filename, hash_file(filename))
    else:
        record = filename

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.update(append("libraries", get_version(source), no_duplicates=True),
              eids=[RUN_ID])
    db.close()
Example #6
0
def log_init():
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
    else:
        scriptpath = os.path.realpath(sys.argv[0])

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {"unique_id": guid,
        "author": getpass.getuser(),
        "description": "",
        "inputs": [],
        "outputs": [],
        "script": scriptpath,
        "command": sys.executable,
        "environment": [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date": datetime.datetime.utcnow()}

    if not option_set('ignored metadata', 'git'):
        try:
            repo = Repo(scriptpath, search_parent_directories=True)
            run["gitrepo"] = repo.working_dir
            run["gitcommit"] =  repo.head.commit.hexsha
            run["gitorigin"] = get_origin(repo)

            if not option_set('ignored metadata', 'diff'):
                whole_diff = ''
                diffs = repo.index.diff(None, create_patch=True)
                for diff in diffs:
                    whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

                run['diff'] = whole_diff
        except (InvalidGitRepositoryError, ValueError):
            # We can't store git info for some reason, so just skip it
            pass

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    db.close()
Example #7
0
def log_init():
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {"unique_id": guid,
        "author": getpass.getuser(),
        "description": "",
        "inputs": [],
        "outputs": [],
        "script": scriptpath,
        "command": sys.executable,
        "environment": [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date": datetime.datetime.utcnow(),
        "exit_date": None,  # updated at script exit
        "command_args": " ".join(cmd_args)}

    if not option_set('ignored metadata', 'git'):
        add_git_info(run, scriptpath)

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Example #8
0
    def load_module(self, name):
        """Module loading method. It imports the module normally,
        and then calls the `patch` method to wrap the functions we need.

        `patch` is implemented by subclasses
        """
        if name != self.modulename:
            raise ImportError(
                "%s can only be used to import a specific module!",
                self.__class__.__name__)
        if name in sys.modules:
            return sys.modules[name]  # already imported and patched

        # Find the module
        file_obj, pathname, desc = recursive_find_module(name, sys.path)

        try:
            mod = imp.load_module(name, file_obj, pathname, desc)
        finally:
            if file_obj:
                file_obj.close()

        if option_set('general', 'debug'):
            print("Patching %s" % mod.__name__)

        # Actually do the patching
        mod = self.patch(mod)

        # And put the module in Python's proper namespace
        sys.modules[name] = mod

        return mod
Example #9
0
    def patch(self, mod):
        for f in self.functions:
            if option_set('general', 'debug'):
                print('Patching input/output function: {}'.format(f))
            patch_function(mod, f, self.wrapper)

        return mod
Example #10
0
    def load_module(self, name):
        """Module loading method. It imports the module normally,
        and then calls the `patch` method to wrap the functions we need.

        `patch` is implemented by subclasses
        """
        if name != self.modulename:
            raise ImportError("%s can only be used to import a specific module!",
                              self.__class__.__name__)
        if name in sys.modules:
            return sys.modules[name]    # already imported and patched

        # Find the module
        file_obj, pathname, desc = recursive_find_module(name, sys.path)

        try:
            mod = imp.load_module(name, file_obj, pathname, desc)
        finally:
            if file_obj:
                file_obj.close()

        if option_set('general', 'debug'):
            print("Patching %s" % mod.__name__)

        # Actually do the patching
        mod = self.patch(mod)

        # And put the module in Python's proper namespace
        sys.modules[name] = mod

        return mod
Example #11
0
def output_file_diffs():
    # Writing to output files is complete; we can now compute file diffs.
    if not option_set('data', 'file_diff_outputs'):
        return

    encodings = ['utf-8', 'latin-1']

    with open_or_create_db() as db:
        diffs_table = db.table('filediffs')
        diffs = diffs_table.search(Query().run_id == RUN_ID)

    for item in diffs:
        if option_set('general', 'debug'):
            print('Storing file diff for "%s"' % item['filename'])

        lines1 = None
        lines2 = None
        for enc in encodings:
            try:
                with codecs.open(item['tempfilename'], encoding=enc) as f:
                    lines1 = f.readlines()
            except UnicodeDecodeError:
                pass

            try:
                with codecs.open(item['filename'], encoding=enc) as f:
                    lines2 = f.readlines()
            except UnicodeDecodeError:
                pass

        if lines1 is not None and lines2 is not None:
            diff = difflib.unified_diff(lines1,
                                        lines2,
                                        fromfile='before this run',
                                        tofile='after this run')
            with open_or_create_db() as db:
                diffs_table.update({'diff': ''.join([l for l in diff])},
                                   eids=[item.eid])
        else:
            msg = ('Unable to read file "{}" using supported encodings ({}). '
                   'To be able to store file diffs, use one of the supported '
                   'encodings to write the output file.')
            warnings.warn(msg.format(item['filename'], ', '.join(encodings)))

        # delete temporary file
        os.remove(item['tempfilename'])
Example #12
0
def log_output(filename, source):
    filename = os.path.abspath(filename)
    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("outputs", filename), eids=[RUN_ID])
    db.close()
Example #13
0
def log_exit():
    # Update the record with the timestamp of the script's completion.
    # We don't save the duration because it's harder to serialize a timedelta.
    if option_set('general', 'debug'):
        print("recipy run complete")
    exit_date = datetime.datetime.utcnow()
    db = open_or_create_db()
    db.update({'exit_date': exit_date}, eids=[RUN_ID])
    db.close()
Example #14
0
def log_exit():
    # Update the record with the timestamp of the script's completion.
    # We don't save the duration because it's harder to serialize a timedelta.
    if option_set('general', 'debug'):
        print("recipy run complete")
    exit_date = datetime.datetime.utcnow()
    db = open_or_create_db()
    db.update({'exit_date': exit_date}, eids=[RUN_ID])
    db.close()
Example #15
0
def log_input(filename, source):
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('data', 'hash_inputs'):
        record = (filename, git_hash_object(filename))
    else:
        record = filename

    if option_set('general', 'debug'):
        print("Input from %s using %s" % (record, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("inputs", record, no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #16
0
def hash_outputs():
    # Writing to output files is complete; we can now compute hashes.
    if not option_set('data', 'hash_outputs'):
        return

    db = open_or_create_db()
    run = db.get(eid=RUN_ID)
    new_outputs = [(filename, hash_file(filename))
                   for filename in run.get('outputs')]
    db.update({'outputs': new_outputs}, eids=[RUN_ID])
    db.close()
Example #17
0
def hash_outputs():
    # Writing to output files is complete; we can now compute hashes.
    if option_set('ignored metadata', 'output_hashes'):
        return

    db = open_or_create_db()
    run = db.get(eid=RUN_ID)
    new_outputs = [(filename, hash_file(filename))
                   for filename in run.get('outputs')]
    db.update({'outputs': new_outputs}, eids=[RUN_ID])
    db.close()
Example #18
0
def log_exception(typ, value, traceback):
    if option_set('general', 'debug'):
        print("Logging exception %s" % value)
    exception = {'type': typ.__name__,
                 'message': str(value),
                 'traceback': ''.join(format_tb(traceback))}
    # Update object in DB
    db = open_or_create_db()
    db.update({"exception": exception}, eids=[RUN_ID])
    db.close()
    # Done logging, call default exception handler
    sys.__excepthook__(typ, value, traceback)
Example #19
0
def log_output(filename, source):
    if type(filename) is not str:
        try:
            filename = filename.name
        except:
            pass
    filename = os.path.abspath(filename)
    if option_set('general', 'debug'):
        print("Output to %s using %s" % (filename, source))
    #Update object in DB
    db = open_or_create_db()
    db.update(append("outputs", filename, no_duplicates=True), eids=[RUN_ID])
    db.close()
Example #20
0
def log_exception(typ, value, traceback):
    if option_set('general', 'debug'):
        print("Logging exception %s" % value)
    exception = {
        'type': typ.__name__,
        'message': str(value),
        'traceback': ''.join(format_tb(traceback))
    }
    # Update object in DB
    db = open_or_create_db()
    db.update({"exception": exception}, eids=[RUN_ID])
    db.close()
    # Done logging, call default exception handler
    sys.__excepthook__(typ, value, traceback)
Example #21
0
def add_svn_info(run, scriptpath):
    """
    Add information about the svn repository holding the source file to the
    database.
    """
    try:
        svn_client = svn.local.LocalClient(scriptpath)
        svn_info = svn_client.info()
        run["svnrepo"] = svn_info["repository_root"]
        run["svncommit"] = svn_info["commit_revision"]
        if not option_set('ignored metadata', 'diff'):
            run['diff'] = svn_diff(svn_info["wc-info/wcroot-abspath"])
    except (SvnException, ValueError, OSError):
        # We can't access svn info for some reason, so just skip it
        pass
Example #22
0
    def patch(self, mod):
        """Do the patching of `input_functions` and `output_functions`
        in `mod` using `input_wrapper` and `output_wrapper` respectively.
        """
        for f in self.wrappers.functions:
            if not self._ignore(f):
                if option_set('general', 'debug'):
                    msg = 'Patching {} function: {}'.format(
                        f['type'], f['function'])
                    print(msg)
                # The function that is returned by create_wrapper assumes that
                # the wrapper is created directly on the patch object (the
                # first argument of f is self). We have to fake that here.
                # Otherwise, there will be an error, because an argument is
                # missing:
                # TypeError f() takes exactly 5 arguments (4 given)
                setattr(self.__class__, 'wrapper', f['wrapper'])
                patch_function(mod, f['function'], self.wrapper)
            else:
                if option_set('general', 'debug'):
                    print('Ignoring {} for: {}'.format(f['type'],
                                                       self.modulename))

        return mod
Example #23
0
    def patch(self, mod):
        """Do the patching of `input_functions` and `output_functions`
        in `mod` using `input_wrapper` and `output_wrapper` respectively.
        """

        if not self._ignore_input():
            for f in self.input_functions:
                if option_set('general', 'debug'):
                    print('Patching input function: %s' % f)
                patch_function(mod, f, self.input_wrapper)
        else:
            if option_set('general', 'debug'):
                print('Ignoring inputs for: %s' % self.modulename)

        if not self._ignore_output():
            for f in self.output_functions:
                if option_set('general', 'debug'):
                    print('Patching output function: %s' % f)
                patch_function(mod, f, self.output_wrapper)
        else:
            if option_set('general', 'debug'):
                print('Ignoring outputs for: %s' % self.modulename)

        return mod
Example #24
0
    def patch(self, mod):
        """Do the patching of `input_functions` and `output_functions`
        in `mod` using `input_wrapper` and `output_wrapper` respectively.
        """
        
        if not self._ignore_input():
            for f in self.input_functions:
                if option_set('general', 'debug'):
                    print('Patching input function: %s' % f)
                patch_function(mod, f, self.input_wrapper)
        else:
            if option_set('general', 'debug'):
                    print('Ignoring inputs for: %s' % self.modulename)

        if not self._ignore_output():
            for f in self.output_functions:
                if option_set('general', 'debug'):
                    print('Patching output function: %s' % f)
                patch_function(mod, f, self.output_wrapper)
        else:
            if option_set('general', 'debug'):
                    print('Ignoring outputs for: %s' % self.modulename)

        return mod
Example #25
0
def log_warning(msg, typ, script, lineno, **kwargs):
    if option_set('general', 'debug'):
        print('Logging warning "%s"' % str(msg))

    warning = {
        'type': typ.__name__,
        'message': str(msg),
        'script': script,
        'lineno': lineno
    }

    # Update object in DB
    db = open_or_create_db()
    db.update(append("warnings", warning, no_duplicates=True), eids=[RUN_ID])
    db.close()

    # Done logging, print warning to stderr
    sys.stderr.write(warnings.formatwarning(msg, typ, script, lineno))
Example #26
0
def log_warning(msg, typ, script, lineno, **kwargs):
    if option_set('general', 'debug'):
        print('Logging warning "%s"' % str(msg))

    warning = {
        'type': typ.__name__,
        'message': str(msg),
        'script': script,
        'lineno': lineno
    }

    # Update object in DB
    db = open_or_create_db()
    db.update(append("warnings", warning, no_duplicates=True), eids=[RUN_ID])
    db.close()

    # Done logging, print warning to stderr
    sys.stderr.write(warnings.formatwarning(msg, typ, script, lineno))
Example #27
0
def dedupe_inputs():
    """Remove inputs that are logged muliple times.

    Sometimes patched libraries use other patched libraries to open files.
    E.g., xarray internally uses netCDF4 to open netcdf files. If this happens,
    and recipy is configured to log file hashes, inputs are logged multiple
    times. Hashed inputs are stored as a list in the database, and tinydb does
    not automatically dedupe lists.

    Outputs do not need to be deduped, because file hashed are added after the
    run is finished, and tinydb can automatically dedupe strings.
    """
    if option_set('ignored metadata', 'input_hashes'):
        return
    db = open_or_create_db()
    run = db.get(eid=RUN_ID)
    new_inputs = list(set([tuple(inp) for inp in run['inputs']]))
    db.update({'inputs': new_inputs}, eids=[RUN_ID])
    db.close()
Example #28
0
def output_file_diffs():
    # Writing to output files is complete; we can now compute file diffs.
    if not option_set('data', 'file_diff_outputs'):
        return

    db = open_or_create_db()
    diffs_table = db.table('filediffs')
    diffs = diffs_table.search(Query().run_id == RUN_ID)
    for item in diffs:
        diff = difflib.unified_diff(open(item['tempfilename']).readlines(),
                                    open(item['filename']).readlines(),
                                    fromfile='before this run',
                                    tofile='after this run')
        diffs_table.update({'diff': ''.join([l for l in diff])},
                           eids=[item.eid])

        # delete temporary file
        os.remove(item['tempfilename'])
    db.close()
Example #29
0
def add_git_info(run, scriptpath):
    """Add information about the git repository holding the source file to the database"""
    try:
        repo = Repo(scriptpath, search_parent_directories=True)
        run["gitrepo"] = repo.working_dir
        run["gitcommit"] = repo.head.commit.hexsha
        run["gitorigin"] = get_origin(repo)

        if not option_set('ignored metadata', 'diff'):
            whole_diff = ''
            diffs = repo.index.diff(None, create_patch=True)
            for diff in diffs:
                whole_diff += "\n\n\n" + "--- {}\n+++ {}\n".format(
                    diff.a_path, diff.b_path) + diff.diff.decode("utf-8")

            run['diff'] = whole_diff
    except (InvalidGitRepositoryError, ValueError):
        # We can't store git info for some reason, so just skip it
        pass
Example #30
0
def output_file_diffs():
    # Writing to output files is complete; we can now compute file diffs.
    if not option_set('data', 'file_diff_outputs'):
        return

    db = open_or_create_db()
    diffs_table = db.table('filediffs')
    diffs = diffs_table.search(Query().run_id == RUN_ID)
    for item in diffs:
        diff = difflib.unified_diff(open(item['tempfilename']).readlines(),
                                    open(item['filename']).readlines(),
                                    fromfile='before this run',
                                    tofile='after this run')
        diffs_table.update({'diff': ''.join([l for l in diff])},
                           eids=[item.eid])

        # delete temporary file
        os.remove(item['tempfilename'])
    db.close()
Example #31
0
def add_git_info(run, scriptpath):
    try:
        repo = Repo(scriptpath, search_parent_directories=True)
        run["githash"] = git_hash_object(scriptpath)
        run["gitrepo"] = repo.working_dir
        run["gitcommit"] =  repo.head.commit.hexsha
        try:
            run["gitorigin"] = repo.remotes.origin.url
        except:
            run["gitorigin"] = None

        if not option_set('ignored metadata', 'diff'):
            whole_diff = ''
            diffs = repo.index.diff(None, create_patch=True)
            for diff in diffs:
                whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

            run['diff'] = whole_diff
    except (InvalidGitRepositoryError, ValueError):
        # We can't store git info for some reason, so just skip it
        pass
Example #32
0
def log_values(custom_values=None, **kwargs):
    """ Log a custom value-key pairs into the database
    e.g,
    >>> log_values(a=1, b=2)
    >>> log_values({'c': 3, 'd': 4})
    >>> log_values({'e': 5, 'f': 6}, g=7, h=8)
    """

    # create dictionary of custom values from arguments
    custom_values = {} if custom_values is None else custom_values
    assert isinstance(custom_values, dict), \
        "custom_values must be a dict. type(custom_values) = %s" % type(custom_values)
    custom_values.update(kwargs)

    # debugging
    if option_set('general', 'debug'):
        print('Logging custom values: %s' % str(custom_values))

    # Update object in DB
    db = open_or_create_db()
    db.update(add_dict("custom_values", custom_values), eids=[RUN_ID])
    db.close()
Example #33
0
def log_init():
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id": guid,
        "author": getpass.getuser(),
        "description": "",
        "inputs": [],
        "outputs": [],
        "script": scriptpath,
        "command": sys.executable,
        "environment": [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date": datetime.datetime.utcnow(),
        "command_args": " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')]
    }

    if not option_set('ignored metadata', 'git'):
        try:
            repo = Repo(scriptpath, search_parent_directories=True)
            run["gitrepo"] = repo.working_dir
            run["gitcommit"] = repo.head.commit.hexsha
            run["gitorigin"] = get_origin(repo)

            if not option_set('ignored metadata', 'diff'):
                whole_diff = ''
                diffs = repo.index.diff(None, create_patch=True)
                for diff in diffs:
                    whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

                run['diff'] = whole_diff
        except (InvalidGitRepositoryError, ValueError):
            # We can't store git info for some reason, so just skip it
            pass

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Example #34
0
def log_init(notebookName=None):
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    notebookMode = get_notebook_mode()
    if notebookMode and notebookName is None:
        # Avoid first call without Notebook name
        return

    if notebookMode:
        scriptpath = notebookName
        cmd_args = sys.argv[1:]
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    elif sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id":
        guid,
        "author":
        getpass.getuser(),
        "description":
        "",
        "inputs": [],
        "outputs": [],
        "script":
        scriptpath,
        "command":
        sys.executable,
        "environment":
        [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date":
        datetime.datetime.utcnow(),
        "command_args":
        " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')],
        "custom_values": {}
    }

    if not notebookName and not option_set('ignored metadata', 'git'):
        add_git_info(run, scriptpath)

    if not notebookName and not option_set('ignored metadata', 'svn'):
        add_svn_info(run, scriptpath)

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Example #35
0
def log_init():
    """Do the initial logging for a new run.

    Works out what script has been run, creates a new unique run ID,
    and gets the basic metadata.

    This is called when running `import recipy`.
    """
    # Get the path of the script we're running
    # When running python -m recipy ..., during the recipy import argument 0
    # is -c (for Python 2) or -m (for Python 3) and the script is argument 1
    if sys.argv[0] in ['-c', '-m']:
        # Has the user called python -m recipy without further arguments?
        if len(sys.argv) < 2:
            return
        scriptpath = os.path.realpath(sys.argv[1])
        cmd_args = sys.argv[2:]
    else:
        scriptpath = os.path.realpath(sys.argv[0])
        cmd_args = sys.argv[1:]

    global RUN_ID

    # Open the database
    db = open_or_create_db()

    # Create the unique ID for this run
    guid = str(uuid.uuid4())

    # Get general metadata, environment info, etc
    run = {
        "unique_id":
        guid,
        "author":
        getpass.getuser(),
        "description":
        "",
        "inputs": [],
        "outputs": [],
        "script":
        scriptpath,
        "command":
        sys.executable,
        "environment":
        [platform.platform(), "python " + sys.version.split('\n')[0]],
        "date":
        datetime.datetime.utcnow(),
        "command_args":
        " ".join(cmd_args),
        "warnings": [],
        "libraries": [get_version('recipy')]
    }

    if not option_set('ignored metadata', 'git'):
        try:
            repo = Repo(scriptpath, search_parent_directories=True)
            run["gitrepo"] = repo.working_dir
            run["gitcommit"] = repo.head.commit.hexsha
            run["gitorigin"] = get_origin(repo)

            if not option_set('ignored metadata', 'diff'):
                whole_diff = ''
                diffs = repo.index.diff(None, create_patch=True)
                for diff in diffs:
                    whole_diff += "\n\n\n" + diff.diff.decode("utf-8")

                run['diff'] = whole_diff
        except (InvalidGitRepositoryError, ValueError):
            # We can't store git info for some reason, so just skip it
            pass

    # Put basics into DB
    RUN_ID = db.insert(run)

    # Print message
    if not option_set('general', 'quiet'):
        print("recipy run inserted, with ID %s" % (guid))

    # check whether patched modules were imported before recipy was imported
    patches = db.table('patches')

    for p in patches.all():
        if p['modulename'] in sys.modules:
            msg = 'not tracking inputs and outputs for {}; recipy was ' \
                  'imported after this module'.format(p['modulename'])
            warnings.warn(msg, stacklevel=3)

    db.close()

    # Register exception hook so exceptions can be logged
    sys.excepthook = log_exception
Example #36
0
    def _ignore_output(self):
        root_modulename = self.modulename.split('.')[0]

        return (option_set('ignored outputs', root_modulename)
                or option_set('ignored outputs', 'all'))
Example #37
0
    def _ignore_output(self):
        root_modulename = self.modulename.split('.')[0]

        return option_set('ignored outputs', root_modulename) or option_set('ignored outputs', 'all')
Example #38
0
    def _ignore(self, f):
        root_modulename = self.modulename.split('.')[0]

        opt = 'ignored {}s'.format(f['type'])

        return option_set(opt, root_modulename) or option_set(opt, 'all')