def evaluate(system, gs, eval_class, **kwargs):
    """Evaluate the system by calling the eval_class (either EvaluatePHI or
    EvaluateCardiacRisk classes) with an annotation id indexed dict of
    StandoffAnnotation classes for the system(s) and the gold standard outputs.
    'system' will be a list containing either one file,  or one or more
    directories. 'gs' will be a file or a directory.  This function mostly just
    handles formatting arguments for the eval_class.
    """
    assert issubclass(eval_class, Evaluate) or \
        issubclass(eval_class, CombinedEvaluation), \
        "Must pass in EvaluatePHI or EvaluateCardiacRisk classes to evaluate()"

    gold_sa = {}
    evaluations = []

    # Strip verbose keyword if it exists
    # verbose is not a keyword to our eval classes
    # __init__() functions
    try:
        verbose = kwargs['verbose']
        del kwargs['verbose']
    except KeyError:
        verbose = False

    assert os.path.exists(gs), "{} does not exist!".format(gs)

    for s in system:
        assert os.path.exists(s), "{} does not exist!".format(s)

    # Handle if two files were passed on the command line
    if os.path.isfile(system[0]) and os.path.isfile(gs):
        gs = StandoffAnnotation(gs)
        s = StandoffAnnotation(system[0])
        e = eval_class({s.id: s}, {gs.id: gs}, **kwargs)
        e.print_docs()
        evaluations.append(e)

    # Handle the case where 'gs' is a directory and 'system' is a
    # list of directories.  For individual evaluation (one system output
    #  against the gold standard) this is a little overkill,  but this
    # lets us run multiple systems against the gold standard and get numbers
    # for each system output. useful for annotator agreement and final system
    # evaluations. Error checking to ensure consistent files in each directory
    # will be handled by the evaluation class.
    elif all([os.path.isdir(s) for s in system]) and os.path.isdir(gs):
        # Get a dict of gold standoff annotation indexed by id
        for fn in os.listdir(gs):
            sa = StandoffAnnotation(gs + fn)
            gold_sa[sa.id] = sa

        for s_id, system_sa in get_document_dict_by_system_id(system).items():
            e = eval_class(system_sa, gold_sa, **kwargs)
            e.print_report(verbose=verbose)
            evaluations.append(e)

    else:
        Exception("Must pass file.xml file.xml  or [directory/]+ directory/"
                  "on command line!")

    return evaluations[0] if len(evaluations) == 1 else evaluations
def get_document_dict_by_system_id(system_dirs):
    """Takes a list of directories and returns all of the StandoffAnnotation's
    as a system id, annotation id indexed dictionary. System id (or
    StandoffAnnotation.sys_id) is whatever values trail the XXX-YY file id.
    For example:
       301-01foo.xml
       patient id:   301
       document id:  01
       system id:    foo

    In the case where there is nothing trailing the document id,  the sys_id
    is the empty string ('').
    """
    documents = defaultdict(lambda: defaultdict(int))

    for d in system_dirs:
        for fn in os.listdir(d):
            sa = StandoffAnnotation(d + fn)
            documents[sa.sys_id][sa.id] = sa

    return documents