コード例 #1
0
ファイル: xmgr.py プロジェクト: stefanvds/themis
def augment_corpus_answers(corpus, qa_pairs):
    """
    Create a set of answers culled from both the corpus and the usage logs.

    These answers can be used to populate a Solr database.

    You would expect all the answers returned by the system to be in the corpus, but this is not the case. The
    'themis xmgr validate-answers' command shows which answers are missing from the corpus.

    :param corpus: answer corpus
    :type corpus: pandas.DataFrame
    :param qa_pairs: question answer pairs from usage logs
    :type qa_pairs: pandas.DataFrame
    :return: comprehensive set of answers
    :rtype: pandas.DataFrame
    """
    qa_pairs = qa_pairs.drop(
        [QUESTION, CONFIDENCE, USER_EXPERIENCE, FREQUENCY, DATE_TIME],
        axis="columns")
    # The corpus may contain multiple PAUs with the same text but different titles.
    answer_set = pandas.merge(corpus, qa_pairs, on=ANSWER,
                              how="outer").drop_duplicates([ANSWER, TITLE])
    n = len(answer_set)
    m = n - len(corpus)
    if m:
        logger.info("Added %d unique answers (%0.3f%%)" % (m, 100.0 * m / n))
    return answer_set
コード例 #2
0
ファイル: xmgr.py プロジェクト: DharmendraVaghela/themis
def get_truth_from_mapped_questions(mapped_questions):
    def get_pau_mapping(question):
        if "predefinedAnswerUnit" in question:
            return question["predefinedAnswerUnit"]
        elif "mappedQuestion" in question:
            question_id = question["mappedQuestion"]["id"]
            try:
                mapped_question = questions[question_id]
            except KeyError:
                logger.warning("Question %s mapped to non-existent question %s" % (question["id"], question_id))
                return None
            return get_pau_mapping(mapped_question)
        else:
            return None

    unmapped = 0
    # Index the questions by their question id so that mapped questions can be looked up.
    questions = dict([(question["id"], question) for question in mapped_questions])
    for question in questions.values():
        question[ANSWER_ID] = get_pau_mapping(question)
        if question[ANSWER_ID] is None:
            unmapped += 1
    questions = [q for q in questions.values() if q[ANSWER_ID] is not None]
    question_ids = [q["id"] for q in questions]
    question_text = [q["text"] for q in questions]
    answer_id = [q[ANSWER_ID] for q in questions]
    truth = pandas.DataFrame.from_dict({QUESTION_ID: question_ids, QUESTION: question_text, ANSWER_ID: answer_id})
    logger.info("%d mapped, %d unmapped" % (len(truth), unmapped))
    return truth
コード例 #3
0
ファイル: analyze.py プロジェクト: louisroehrs/themis
def system_similarity(systems_data):
    """
    For each system pair, return the number of questions they answered the same.

    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :return: table of pairs of systems and their similarity statistics
    :rtype: pandas.DataFrame
    """
    systems_data = drop_missing(systems_data)
    systems = systems_data[SYSTEM].drop_duplicates().sort_values()
    columns = ["System 1", "System 2", "Same Answer", "Same Answer %"]
    results = pandas.DataFrame(columns=columns)
    for x, y in itertools.combinations(systems, 2):
        data_x = systems_data[systems_data[SYSTEM] == x]
        data_y = systems_data[systems_data[SYSTEM] == y]
        m = pandas.merge(data_x, data_y, on=QUESTION)
        n = len(m)
        logger.info("%d question/answer pairs in common for %s and %s" % (n, x, y))
        same_answer = sum(m["%s_x" % ANSWER] == m["%s_y" % ANSWER])
        same_answer_pct = 100.0 * same_answer / n
        results = results.append(
            pandas.DataFrame([[x, y, same_answer, same_answer_pct]], columns=columns))
    results["Same Answer"] = results["Same Answer"].astype("int64")
    return results.set_index(["System 1", "System 2"])
コード例 #4
0
ファイル: analyze.py プロジェクト: louisroehrs/themis
def truth_coverage(corpus, truth, systems_data):
    """
    Statistics about which answers came from the truth set broken down by system.

    :param corpus: corpus generated by 'xmgr corpus' command
    :type corpus: pandas.DataFrame
    :param truth: question to answer mapping used in training
    :type truth: pandas.DataFrame
    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :return: truth coverage summary statistics
    :rtype: pandas.DataFrame
    """
    truth_answers = pandas.merge(corpus, truth, on=ANSWER_ID)[ANSWER].drop_duplicates()
    n = len(corpus)
    m = len(truth_answers)
    logger.info("%d answers out of %d possible answers in truth (%0.3f%%)" % (m, n, 100.0 * m / n))
    systems_data = pandas.concat(systems_data).dropna()
    answers = systems_data.groupby(SYSTEM)[[CORRECT]].count()
    answers_in_truth = systems_data[systems_data[ANSWER].isin(truth_answers)].groupby(SYSTEM)[[ANSWER]]
    summary = answers_in_truth.count()
    summary["Answers"] = answers
    summary = summary.rename(columns={ANSWER: "Answers in Truth"})
    summary["Answers in Truth %"] = 100 * summary["Answers in Truth"] / summary["Answers"]
    correct_answers = systems_data[systems_data[CORRECT]]
    correct_answers_in_truth = correct_answers[correct_answers[ANSWER].isin(truth_answers)]
    summary["Correct Answers"] = correct_answers.groupby(SYSTEM)[CORRECT].count()
    summary["Correct Answers in Truth"] = correct_answers_in_truth.groupby(SYSTEM)[CORRECT].count()
    summary["Correct Answers in Truth %"] = 100 * summary["Correct Answers in Truth"] / summary["Correct Answers"]
    return summary[
        ["Answers", "Correct Answers",
         "Answers in Truth", "Answers in Truth %",
         "Correct Answers in Truth", "Correct Answers in Truth %"]].sort_values("Correct Answers", ascending=False)
コード例 #5
0
ファイル: judge.py プロジェクト: DharmendraVaghela/themis
def interpret_annotation_assist(annotation_assist, judgment_threshold):
    """
    Convert the file produced by the Annotation Assist tool into a set of judgments that can be used by Themis.

    Convert the in purview column from an integer value to a boolean. Convert the annotation score column to a boolean
    correct column by applying a threshold. An answer can only be correct if the question is in purview. Drop any Q&A
    pairs that have multiple annotations.

    :param annotation_assist: Annotation Assist judgments
    :type annotation_assist: pandas.DataFrame
    :param judgment_threshold: threshold above which an answer is deemed correct
    :type judgment_threshold: pandas.DataFrame
    :return: Annotation Assist judgments with a boolean Correct column
    :rtype: pandas.DataFrame
    """
    qa_duplicates = annotation_assist[[QUESTION, ANSWER]].duplicated()
    if any(qa_duplicates):
        n = sum(qa_duplicates)
        logger.warning(
            "Dropping %d Q&A pairs with multiple annotations (%0.3f%%)" % (n, 100.0 * n / len(annotation_assist)))
        annotation_assist.drop_duplicates((QUESTION, ANSWER), keep=False, inplace=True)
    annotation_assist[IN_PURVIEW] = annotation_assist[IN_PURVIEW].astype("bool")
    annotation_assist[CORRECT] = \
        annotation_assist[IN_PURVIEW] & (annotation_assist[ANNOTATION_SCORE] >= judgment_threshold)
    logger.info("Processed %d judgments" % len(annotation_assist))
    return annotation_assist.drop(ANNOTATION_SCORE, axis="columns")
コード例 #6
0
def interpret_annotation_assist(annotation_assist, judgment_threshold):
    """
    Convert the file produced by the Annotation Assist tool into a set of judgments that can be used by Themis.

    Convert the in purview column from an integer value to a boolean. Convert the annotation score column to a boolean
    correct column by applying a threshold. An answer can only be correct if the question is in purview. Drop any Q&A
    pairs that have multiple annotations.

    :param annotation_assist: Annotation Assist judgments
    :type annotation_assist: pandas.DataFrame
    :param judgment_threshold: threshold above which an answer is deemed correct
    :type judgment_threshold: pandas.DataFrame
    :return: Annotation Assist judgments with a boolean Correct column
    :rtype: pandas.DataFrame
    """
    qa_duplicates = annotation_assist[[QUESTION, ANSWER]].duplicated()
    if any(qa_duplicates):
        n = sum(qa_duplicates)
        logger.warning(
            "Dropping %d Q&A pairs with multiple annotations (%0.3f%%)" %
            (n, 100.0 * n / len(annotation_assist)))
        annotation_assist.drop_duplicates((QUESTION, ANSWER),
                                          keep=False,
                                          inplace=True)
    annotation_assist[IN_PURVIEW] = annotation_assist[IN_PURVIEW].astype(
        "bool")
    annotation_assist[CORRECT] = \
        annotation_assist[IN_PURVIEW] & (annotation_assist[ANNOTATION_SCORE] >= judgment_threshold)
    logger.info("Processed %d judgments" % len(annotation_assist))
    return annotation_assist.drop(ANNOTATION_SCORE, axis="columns")
コード例 #7
0
def system_similarity(systems_data):
    """
    For each system pair, return the number of questions they answered the same.

    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :return: table of pairs of systems and their similarity statistics
    :rtype: pandas.DataFrame
    """
    systems_data = drop_missing(systems_data)
    systems = systems_data[SYSTEM].drop_duplicates().sort_values()
    columns = ["System 1", "System 2", "Same Answer", "Same Answer %"]
    results = pandas.DataFrame(columns=columns)
    for x, y in itertools.combinations(systems, 2):
        data_x = systems_data[systems_data[SYSTEM] == x]
        data_y = systems_data[systems_data[SYSTEM] == y]
        m = pandas.merge(data_x, data_y, on=QUESTION)
        n = len(m)
        logger.info("%d question/answer pairs in common for %s and %s" %
                    (n, x, y))
        same_answer = sum(m["%s_x" % ANSWER] == m["%s_y" % ANSWER])
        same_answer_pct = 100.0 * same_answer / n
        results = results.append(
            pandas.DataFrame([[x, y, same_answer, same_answer_pct]],
                             columns=columns))
    results["Same Answer"] = results["Same Answer"].astype("int64")
    return results.set_index(["System 1", "System 2"])
コード例 #8
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def kfold_split(df, outdir, _folds=5, _training_header=False):
    """
    
    Split the data-set into equal training and testing sets. Put training and testing set into local directory
    as csv files.

    :param df: data frame to be splited
    :param outdir: output directory path
    :param _folds: number of folds to be performed
    :param _training_header: header og the training file
    :return: list of directory for training set and teting set
    """
    # Randomize the order of the input dataframe
    df = df.iloc[np.random.permutation(len(df))]
    df = df.reset_index(drop=True)
    foldSize = int(math.ceil(len(df) / float(_folds)))
    logger.info("Total records: " + str(len(df)))
    logger.info("Fold size: " + str(foldSize))
    logger.info("Results written to output folder " + outdir)

    for x in range(0, _folds):
        fold_low = x * foldSize
        fold_high = (x + 1) * foldSize

        if fold_high >= len(df):
            fold_high = len(df)

        test_df = df.iloc[fold_low:fold_high]
        train_df = df.drop(df.index[fold_low:fold_high])

        test_df.to_csv(os.path.join(outdir, 'Test' + str(x) + '.csv'), encoding='utf-8', index=False)
        train_df.to_csv(os.path.join(outdir, 'Train' + str(x) + '.csv'), header=_training_header, encoding='utf-8', index=False)

        logger.info("--- Train_Fold_" + str(x) + ' size = ' + str(len(train_df)))
        logger.info("--- Test_Fold_" + str(x) + ' size = ' + str(len(test_df)))
コード例 #9
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def truth_coverage(corpus, truth, systems_data):
    """
    Statistics about which answers came from the truth set broken down by system.

    :param corpus: corpus generated by 'xmgr corpus' command
    :type corpus: pandas.DataFrame
    :param truth: question to answer mapping used in training
    :type truth: pandas.DataFrame
    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :return: truth coverage summary statistics
    :rtype: pandas.DataFrame
    """
    truth_answers = pandas.merge(corpus, truth, on=ANSWER_ID)[ANSWER].drop_duplicates()
    n = len(corpus)
    m = len(truth_answers)
    logger.info("%d answers out of %d possible answers in truth (%0.3f%%)" % (m, n, 100.0 * m / n))
    systems_data = pandas.concat(systems_data).dropna()
    answers = systems_data.groupby(SYSTEM)[[CORRECT]].count()
    answers_in_truth = systems_data[systems_data[ANSWER].isin(truth_answers)].groupby(SYSTEM)[[ANSWER]]
    summary = answers_in_truth.count()
    summary["Answers"] = answers
    summary = summary.rename(columns={ANSWER: "Answers in Truth"})
    summary["Answers in Truth %"] = 100 * summary["Answers in Truth"] / summary["Answers"]
    correct_answers = systems_data[systems_data[CORRECT]]
    correct_answers_in_truth = correct_answers[correct_answers[ANSWER].isin(truth_answers)]
    summary["Correct Answers"] = correct_answers.groupby(SYSTEM)[CORRECT].count()
    summary["Correct Answers in Truth"] = correct_answers_in_truth.groupby(SYSTEM)[CORRECT].count()
    summary["Correct Answers in Truth %"] = 100 * summary["Correct Answers in Truth"] / summary["Correct Answers"]
    return summary[
        ["Answers", "Correct Answers",
         "Answers in Truth", "Answers in Truth %",
         "Correct Answers in Truth", "Correct Answers in Truth %"]].sort_values("Correct Answers", ascending=False)
コード例 #10
0
def get_items(item_type, names, checkpoint, get_item, write_frequency):
    """
    Given a list of item names and a checkpoint, this function recovers any previously checkpointed items, then gets
    the remaining items and writes them to a checkpoint.

    :param item_type: name of item type for use in logging
    :type item_type: str
    :param names: list of item names
    :type names: list
    :param checkpoint: checkpoint to periodically write items to
    :type checkpoint: DataFrameCheckpoint
    :param get_item: function that returns an item given a name
    :type get_item: func
    :param write_frequency: how often to log a process message
    :type write_frequency: int
    :return: the checkpoint
    :rtype: DataFrameCheckpoint
    """
    recovered = checkpoint.recovered
    if recovered:
        logger.info("Recovered %d %s from previous run" % (len(recovered), item_type))
    total = len(names)
    start = 1 + len(recovered)
    try:
        names_to_get = sorted(set(names) - recovered)
        for i, name in enumerate(names_to_get, start):
            if i == start or i == total or i % write_frequency == 0:
                logger.info("Get " + percent_complete_message(item_type, i, total))
            item = get_item(name)
            checkpoint.write(name, item)
    finally:
        checkpoint.close()
    return checkpoint
コード例 #11
0
def get_items(item_type, names, checkpoint, get_item, write_frequency):
    """
    Given a list of item names and a checkpoint, this function recovers any previously checkpointed items, then gets
    the remaining items and writes them to a checkpoint.

    :param item_type: name of item type for use in logging
    :type item_type: str
    :param names: list of item names
    :type names: list
    :param checkpoint: checkpoint to periodically write items to
    :type checkpoint: DataFrameCheckpoint
    :param get_item: function that returns an item given a name
    :type get_item: func
    :param write_frequency: how often to log a process message
    :type write_frequency: int
    :return: the checkpoint
    :rtype: DataFrameCheckpoint
    """
    recovered = checkpoint.recovered
    if recovered:
        logger.info("Recovered %d %s from previous run" %
                    (len(recovered), item_type))
    total = len(names)
    start = 1 + len(recovered)
    try:
        names_to_get = sorted(set(names) - recovered)
        for i, name in enumerate(names_to_get, start):
            if i == start or i == total or i % write_frequency == 0:
                logger.info("Get " +
                            percent_complete_message(item_type, i, total))
            item = get_item(name)
            checkpoint.write(name, item)
    finally:
        checkpoint.close()
    return checkpoint
コード例 #12
0
def augment_usage_log(usage_log, judgments):
    """
    Add In Purview and Annotation Score information to system usage log.

    :param usage_log: user interaction logs from QuestionsData.csv XMGR report
    :type usage_log: pandas.DataFrame
    :param judgments: judgments
    :type judgments: pandas.DataFrame
    :return: user interaction logs with additional columns
    :rtype: pandas.DataFrame
    """
    usage_log = usage_log.rename(columns={
        QUESTION_TEXT: QUESTION,
        TOP_ANSWER_TEXT: ANSWER
    })
    augmented = pandas.merge(usage_log,
                             judgments,
                             on=(QUESTION, ANSWER),
                             how="left")
    n = len(usage_log[[QUESTION, ANSWER]].drop_duplicates())
    if n:
        m = len(judgments)
        logger.info("%d unique question/answer pairs, %d judgments (%0.3f%%)" %
                    (n, m, 100.0 * m / n))
    return augmented.rename(columns={
        QUESTION: QUESTION_TEXT,
        ANSWER: TOP_ANSWER_TEXT
    })
コード例 #13
0
def filter_judged_answers(systems_data, correct, system_names):
    """
    Filter out just the correct or incorrect in-purview answers.

    :param systems_data: questions, answers, and judgments across systems
    :type systems_data: list of pandas.DataFrame
    :param correct: filter correct or incorrect answers?
    :type correct: bool
    :param system_names: systems to filter to, if None show all systems
    :type system_names: list of str
    :return: set of in-purview questions with answers judged either correct or incorrect
    :rtype: pandas.DataFrame
    """
    systems_data = pandas.concat(systems_data).dropna()
    if system_names is not None:
        systems_data = systems_data[systems_data[SYSTEM].isin(system_names)]
    filtered = systems_data[(systems_data[IN_PURVIEW] == True)
                            & (systems_data[CORRECT] == correct)]
    n = len(systems_data)
    m = len(filtered)
    logger.info("%d in-purview %s answers out of %d (%0.3f%%)" %
                (m, {
                    True: "correct",
                    False: "incorrect"
                }[correct], n, 100 * m / n))
    return filtered
コード例 #14
0
ファイル: xmgr.py プロジェクト: DharmendraVaghela/themis
 def get_pau(pau_id):
     paus = xmgr.get_paus(pau_id)
     if paus:
         pau = paus[0]
         return {ANSWER: pau["responseMarkup"], TITLE: pau["title"], FILENAME: pau["sourceName"]}
     else:
         logger.info("Could not download pau %s" % pau_id)
         return None
コード例 #15
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def deakin(usage_log):
    low_confidence_response = usage_log[ANSWER].str.contains(
        "Here's Watson's response, but remember it's best to use full sentences.")
    logger.info("Removed %d questions with low confidence responses" % sum(low_confidence_response))
    usage_log = usage_log[~low_confidence_response]
    usage_log = filter_usage_log_by_user_experience(usage_log, ["Dialog Response"])
    usage_log = fix_confidence_ranges(usage_log)
    return usage_log
コード例 #16
0
def compare_systems(systems_data, x, y, comparison_type):
    """
    On which questions did system x do better or worse than system y?

    System x did better than system y if it correctly answered a question when system y did not, and vice versa.

    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :param x: system name
    :type x: str
    :param y: system name
    :type y: str
    :param comparison_type: "better" or "worse"
    :type comparison_type: str
    :return: all question/answer pairs from system x that were either better or worse than system y
    :rtype: pandas.DataFrame
    """
    def col_name(type, system):
        return type + " " + system

    systems_data = drop_missing(systems_data)
    systems_data = systems_data[systems_data[IN_PURVIEW]]
    data_x = systems_data[systems_data[SYSTEM] == x]
    data_y = systems_data[systems_data[SYSTEM] == y][[
        QUESTION, ANSWER, CONFIDENCE, CORRECT
    ]]
    questions = pandas.merge(data_x,
                             data_y,
                             on=QUESTION,
                             how="left",
                             suffixes=(" " + x, " " + y)).dropna()
    n = len(questions)
    logger.info("%d shared question/answer pairs between %s and %s" %
                (n, x, y))
    x_correct = col_name(CORRECT, x)
    y_correct = col_name(CORRECT, y)
    if comparison_type == "better":
        a = questions[x_correct] == True
        b = questions[y_correct] == False
    elif comparison_type == "worse":
        a = questions[x_correct] == False
        b = questions[y_correct] == True
    else:
        raise ValueError("Invalid comparison type %s" % comparison_type)
    d = questions[a & b]
    m = len(d)
    logger.info("%d %s (%0.3f%%)" % (m, comparison_type, 100.0 * m / n))
    d = d[[
        QUESTION, FREQUENCY,
        col_name(ANSWER, x),
        col_name(CONFIDENCE, x),
        col_name(ANSWER, y),
        col_name(CONFIDENCE, y)
    ]]
    d = d.sort_values([col_name(CONFIDENCE, x), FREQUENCY, QUESTION],
                      ascending=(False, False, True))
    return d.set_index(QUESTION)
コード例 #17
0
ファイル: main.py プロジェクト: ManaliChanchlani/themis
def judge_sample_handler(args):
    questions = pandas.concat(args.judgments)[[QUESTION]].drop_duplicates()
    sample = pandas.merge(questions, args.frequency, on=QUESTION, how="left")
    n = len(sample)
    logger.info("%d judged questions" % n)
    m = sum(sample[FREQUENCY].isnull())
    if m:
        logger.warning("Missing frequencies for %d questions (%0.3f%%)" % (m, 100.0 * m / n))
    print_csv(QuestionFrequencyFileType.output_format(sample))
コード例 #18
0
ファイル: xmgr.py プロジェクト: DharmendraVaghela/themis
def download_corpus_from_xmgr(xmgr, output_directory, checkpoint_frequency, max_docs):
    """
    Download the corpus from an XMGR project

    A corpus is a mapping of answer text to answer Ids. It also contains answer titles and the names of the documents
    from which the answers were extracted.

    This can take a long time to complete, so intermediate results are saved in the directory. If you restart an
    incomplete download it will pick up where it left off.

    :param xmgr: connection to an XMGR project REST API
    :type xmgr: XmgrProject
    :param output_directory: directory into which write the corpus.csv file
    :type output_directory: str
    :checkpoint_frequency: how often to write intermediate results to a checkpoint file
    :type checkpoint_frequency: int
    :param max_docs: maximum number of corpus documents to download, if None, download them all
    :type max_docs: int
    """
    document_ids_csv = os.path.join(output_directory, "document_ids.csv")
    corpus_csv = os.path.join(output_directory, "corpus.csv")
    if os.path.isfile(corpus_csv) and not os.path.isfile(document_ids_csv):
        logger.info("Corpus already downloaded")
        return
    logger.info("Download corpus from %s" % xmgr)
    document_ids = sorted(set(document["id"] for document in xmgr.get_documents()))
    document_ids = document_ids[:max_docs]
    n = len(document_ids)
    downloaded_document_ids = DataFrameCheckpoint(document_ids_csv, [DOCUMENT_ID, "Paus"], checkpoint_frequency)
    corpus = DataFrameCheckpoint(corpus_csv, CorpusFileType.columns)
    try:
        if downloaded_document_ids.recovered:
            logger.info("Recovered %d documents from previous run" % len(downloaded_document_ids.recovered))
        document_ids = sorted(set(document_ids) - downloaded_document_ids.recovered)
        m = len(document_ids)
        start = len(downloaded_document_ids.recovered) + 1
        if m:
            for i, document_id in enumerate(document_ids, start):
                if i % checkpoint_frequency == 0 or i == start or i == m:
                    corpus.flush()
                    logger.info(percent_complete_message("Get PAUs from document", i, n))
                paus = xmgr.get_paus_from_document(document_id)
                # The document id and number of PAUs are both integers. Cast them to strings, otherwise pandas will
                # write them as floats.
                for pau in paus:
                    corpus.write(pau["id"], pau["responseMarkup"], pau["title"], pau["sourceName"], str(document_id))
                downloaded_document_ids.write(str(document_id), str(len(paus)))
    finally:
        downloaded_document_ids.close()
        corpus.close()
    corpus = from_csv(corpus_csv).drop_duplicates(ANSWER_ID)
    to_csv(corpus_csv, CorpusFileType.output_format(corpus))
    docs = len(from_csv(document_ids_csv))
    os.remove(document_ids_csv)
    logger.info("%d documents and %d PAUs in corpus" % (docs, len(corpus)))
コード例 #19
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def nlc_router_test(url, username, password, collate_file, path):
    """
    Querying NLC for testing set to determine the system(NLC or Solr) and then lookup related
    fields from collated file (used as an input to the oracle experiment)

    :param url: URL of NLC instance
    :param username: NLC Username
    :param password: NLC password
    :param oracle_out: file created by oracle experiment
    :param collate_file: collated file created for oracle experiment as input
    :param path: directory path to save intermediate results

    :return: output file with best system NLC or Solr and relevant fields
    """
    def log_correct(system_data, name):
        n = len(system_data)
        m = sum(system_data[CORRECT])
        logger.info("%d of %d correct in %s (%0.3f%%)" % (m, n, name, 100.0 * m / n))

    # import list of classifier from file
    classifier_list = []
    with open(os.path.join(path, 'classifier.json'), 'r') as f:
        data = json.load(f)
    for x in range(0, NLC_ROUTER_FOLDS):
        classifier_list.append(data[x]['NLC+Solr Oracle_fold_{0}'.format(str(x))].encode("utf-8"))

    for x in range(0, NLC_ROUTER_FOLDS):
        test = pandas.read_csv(os.path.join(path, "Test{0}.csv".format(str(x))))
        test = test[[QUESTION]]
        test[QUESTION] = test[QUESTION].str.replace("\n", " ")
        classifier_id = classifier_list[x]
        n = NLC(url, username, password, classifier_id, test)
        out_file = os.path.join(path, "Out{0}.csv".format(str(x)))
        logger.info("Testing on fold {0} using NLC classifier {1}".format(str(x), str(classifier_list[x])))
        answer_router_questions(n, set(test[QUESTION]), out_file)

    # Concatenate multiple trained output into single csv file
    dfList = []
    columns = [QUESTION, SYSTEM]
    for x in range(0, NLC_ROUTER_FOLDS):
        df = pandas.read_csv(os.path.join(path, "Out{0}.csv".format(str(x))), header=0)
        dfList.append(df)

    concateDf = pandas.concat(dfList, axis=0)
    concateDf.columns = columns
    concateDf.to_csv(os.path.join(path, "Interim-Result.csv"), encoding='utf-8', index=None)

    # Join operation to get fields from oracle collated file
    result = pandas.merge(concateDf, collate_file, on=[QUESTION, SYSTEM])
    result = result.rename(columns={SYSTEM: ANSWERING_SYSTEM})
    result[SYSTEM] = 'NLC-as-router'
    result[CONFIDENCE] = __standardize_confidence(result)
    log_correct(result, 'NLC-as-router')
    return result
コード例 #20
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def deakin(usage_log):
    low_confidence_response = usage_log[ANSWER].str.contains(
        "Here's Watson's response, but remember it's best to use full sentences."
    )
    logger.info("Removed %d questions with low confidence responses" %
                sum(low_confidence_response))
    usage_log = usage_log[~low_confidence_response]
    usage_log = filter_usage_log_by_user_experience(usage_log,
                                                    ["Dialog Response"])
    usage_log = fix_confidence_ranges(usage_log)
    return usage_log
コード例 #21
0
def train_nlc(url, username, password, truth, name):
    logger.info("Train model %s with %d instances" % (name, len(truth)))
    with tempfile.TemporaryFile() as training_file:
        # NLC cannot handle newlines.
        truth[QUESTION] = truth[QUESTION].str.replace("\n", " ")
        to_csv(training_file, truth[[QUESTION, ANSWER_ID]], header=False, index=False)
        training_file.seek(0)
        nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
        r = nlc.create(training_data=training_file, name=name)
        logger.info(pretty_print_json(r))
    return r["classifier_id"]
コード例 #22
0
ファイル: nlc.py プロジェクト: DharmendraVaghela/themis
def train_nlc(url, username, password, truth, name):
    logger.info("Train model %s with %d instances" % (name, len(truth)))
    with tempfile.TemporaryFile() as training_file:
        # NLC cannot handle newlines.
        truth[QUESTION] = truth[QUESTION].str.replace("\n", " ")
        to_csv(training_file, truth[[QUESTION, ANSWER_ID]], header=False, index=False)
        training_file.seek(0)
        nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
        r = nlc.create(training_data=training_file, name=name)
        logger.info(pretty_print_json(r))
    return r["classifier_id"]
コード例 #23
0
ファイル: xmgr.py プロジェクト: stefanvds/themis
 def get_pau(pau_id):
     paus = xmgr.get_paus(pau_id)
     if paus:
         pau = paus[0]
         return {
             ANSWER: pau["responseMarkup"],
             TITLE: pau["title"],
             FILENAME: pau["sourceName"]
         }
     else:
         logger.info("Could not download pau %s" % pau_id)
         return None
コード例 #24
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def fallback_combination(systems_data, default_system, secondary_system):
    """
    Combine results from two systems into a single fallback system. The default system will answer the question if
    the confidence is above a certain threshold. This method will find the optimal confidence threshold.

    :param systems_data: collated results for the input systems
    :type systems_data: pandas.DataFrame
    :param default_system: the name of the default system (if confidence > t)
    :type default_system: str
    :param secondary_system: the name of the fallback system (if default_confidence < t)
    :type secondary_system: str
    :return: Fallback results in collated format
    :rtype: pandas.DataFrame
    """
    systems_data = drop_missing(systems_data)

    default_system_data = systems_data[systems_data[SYSTEM] == default_system]
    secondary_system_data = systems_data[systems_data[SYSTEM] == secondary_system]

    intersecting_questions = set(default_system_data[QUESTION]).intersection(set(secondary_system_data[QUESTION]))

    logger.warn("{0} questions in default system".format(len(default_system_data)))
    logger.warn("{0} questions in secondary system".format(len(secondary_system_data)))
    logger.warn("{0} questions in overlapping set".format(len(intersecting_questions)))

    default_system_data = default_system_data[default_system_data[QUESTION].isin(intersecting_questions)]
    secondary_system_data = secondary_system_data[secondary_system_data[QUESTION].isin(intersecting_questions)]

    unique_confidences = default_system_data[CONFIDENCE].unique()

    best_threshold, best_precision = 0, 0
    for threshold in unique_confidences:
        combined_system = _create_combined_fallback_system_at_threshold(default_system_data, secondary_system_data, threshold)

        system_precision = precision(combined_system, 0)
        if system_precision > best_precision:
            best_precision = system_precision
            best_threshold = threshold

    logger.info("Default system accuracy:   {0}%".format(str(precision(default_system_data, 0) * 100)[:4]))
    logger.info("Secondary system accuracy: {0}%".format(str(precision(secondary_system_data, 0) * 100)[:4]))
    logger.info("Combined system accuracy:  {0}%".format(str(best_precision * 100)[:4]))

    logger.info("Combined system best threshold: {0}".format(best_threshold))

    best_system = _create_combined_fallback_system_at_threshold(default_system_data, secondary_system_data, best_threshold)
    best_system[ANSWERING_SYSTEM] = best_system[SYSTEM]
    best_system[SYSTEM] = "{0}_FALLBACK_{1}_AT_{2}".format(default_system, secondary_system, str(best_threshold)[:4])

    logger.info("Questions answered by {0}: {1}%".format(default_system, str(100 * float(len(best_system[best_system[ANSWERING_SYSTEM] == default_system])) / len(best_system))[:4]))

    best_system[CONFIDENCE] = __standardize_confidence(best_system)
    return best_system
コード例 #25
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
 def __call__(self, filename):
     if os.path.isfile(filename):
         collated = super(self.__class__, self).__call__(filename)
         m = sum(collated[collated[IN_PURVIEW] == False][CORRECT])
         if m:
             n = len(collated)
             logger.warning(
                 "%d out of %d question/answer pairs in %s are marked as out of purview but correct (%0.3f%%)"
                 % (m, n, filename, 100.0 * m / n))
         return collated
     else:
         logger.info("{0} does not exist".format(filename))
         return None
コード例 #26
0
def kfold_split(df, outdir, _folds=5):
    # Randomize the order of the input dataframe
    df = df.iloc[np.random.permutation(len(df))]
    df = df.reset_index(drop=True)
    foldSize = int(math.ceil(len(df) / float(_folds)))
    logger.info("Total records: " + str(len(df)))
    logger.info("Fold size: " + str(foldSize))

    for x in range(0, _folds):
        fold_low = x * foldSize
        fold_high = (x + 1) * foldSize

        if fold_high >= len(df):
            fold_high = len(df)

        test_df = df.iloc[fold_low:fold_high]
        train_df = df.drop(df.index[fold_low:fold_high])

        test_df.to_csv(os.path.join(outdir, 'Test' + str(x) + '.csv'),
                       encoding='utf-8',
                       index=False)
        train_df.to_csv(os.path.join(outdir, 'Train' + str(x) + '.csv'),
                        header=False,
                        encoding='utf-8',
                        index=False)

        logger.info("--- Train_Fold_" + str(x) + ' size = ' +
                    str(len(train_df)))
        logger.info("--- Test_Fold_" + str(x) + ' size = ' + str(len(test_df)))
コード例 #27
0
ファイル: fixup.py プロジェクト: louisroehrs/themis
def filter_usage_log_by_user_experience(usage_log, disallowed):
    """
    Only retain questions whose 'user experience' value does not appear on a blacklist.

    :param usage_log: QuestionsData.csv report log
    :type usage_log: pandas.DataFrame
    :param disallowed: set of disallowed 'user experience' values
    :type disallowed: enumerable set of str
    :return: usage log with questions removed
    :rtype: pandas.DataFrame
    """
    n = len(usage_log)
    usage_log = usage_log[~usage_log[USER_EXPERIENCE].isin(disallowed)]
    logger.info("Removed %d questions with user experience '%s'" % ((n - len(usage_log)), ",".join(disallowed)))
    return usage_log
コード例 #28
0
ファイル: main.py プロジェクト: ManaliChanchlani/themis
def extract_handler(args):
    # Do custom fixup of usage logs.
    usage_log = pandas.concat(args.usage_log)
    n = len(usage_log)
    if args.before or args.after:
        usage_log = filter_usage_log_by_date(usage_log, args.before, args.after)
    user_experience = set(args.user_experience) | {"DIALOG"}  # DIALOG is always disallowed
    usage_log = filter_usage_log_by_user_experience(usage_log, user_experience)
    if args.deakin:
        usage_log = deakin(usage_log)
    m = n - len(usage_log)
    if n:
        logger.info("Removed %d of %d questions (%0.3f%%)" % (m, n, 100.0 * m / n))
    # Extract Q&A pairs from fixed up usage logs.
    qa_pairs = extract_question_answer_pairs_from_usage_logs(usage_log)
    print_csv(QAPairFileType.output_format(qa_pairs))
コード例 #29
0
def filter_usage_log_by_user_experience(usage_log, disallowed):
    """
    Only retain questions whose 'user experience' value does not appear on a blacklist.

    :param usage_log: QuestionsData.csv report log
    :type usage_log: pandas.DataFrame
    :param disallowed: set of disallowed 'user experience' values
    :type disallowed: enumerable set of str
    :return: usage log with questions removed
    :rtype: pandas.DataFrame
    """
    n = len(usage_log)
    usage_log = usage_log[~usage_log[USER_EXPERIENCE].isin(disallowed)]
    logger.info("Removed %d questions with user experience '%s'" %
                ((n - len(usage_log)), ",".join(disallowed)))
    return usage_log
コード例 #30
0
ファイル: analyze.py プロジェクト: louisroehrs/themis
def compare_systems(systems_data, x, y, comparison_type):
    """
    On which questions did system x do better or worse than system y?

    System x did better than system y if it correctly answered a question when system y did not, and vice versa.

    :param systems_data: collated results for all systems
    :type systems_data: pandas.DataFrame
    :param x: system name
    :type x: str
    :param y: system name
    :type y: str
    :param comparison_type: "better" or "worse"
    :type comparison_type: str
    :return: all question/answer pairs from system x that were either better or worse than system y
    :rtype: pandas.DataFrame
    """

    def col_name(type, system):
        return type + " " + system

    systems_data = drop_missing(systems_data)
    systems_data = systems_data[systems_data[IN_PURVIEW]]
    data_x = systems_data[systems_data[SYSTEM] == x]
    data_y = systems_data[systems_data[SYSTEM] == y][[QUESTION, ANSWER, CONFIDENCE, CORRECT]]
    questions = pandas.merge(data_x, data_y, on=QUESTION, how="left", suffixes=(" " + x, " " + y)).dropna()
    n = len(questions)
    logger.info("%d shared question/answer pairs between %s and %s" % (n, x, y))
    x_correct = col_name(CORRECT, x)
    y_correct = col_name(CORRECT, y)
    if comparison_type == "better":
        a = questions[x_correct] == True
        b = questions[y_correct] == False
    elif comparison_type == "worse":
        a = questions[x_correct] == False
        b = questions[y_correct] == True
    else:
        raise ValueError("Invalid comparison type %s" % comparison_type)
    d = questions[a & b]
    m = len(d)
    logger.info("%d %s (%0.3f%%)" % (m, comparison_type, 100.0 * m / n))
    d = d[[QUESTION, FREQUENCY,
           col_name(ANSWER, x), col_name(CONFIDENCE, x), col_name(ANSWER, y), col_name(CONFIDENCE, y)]]
    d = d.sort_values([col_name(CONFIDENCE, x), FREQUENCY, QUESTION], ascending=(False, False, True))
    return d.set_index(QUESTION)
コード例 #31
0
ファイル: judge.py プロジェクト: DharmendraVaghela/themis
def augment_usage_log(usage_log, judgments):
    """
    Add In Purview and Annotation Score information to system usage log.

    :param usage_log: user interaction logs from QuestionsData.csv XMGR report
    :type usage_log: pandas.DataFrame
    :param judgments: judgments
    :type judgments: pandas.DataFrame
    :return: user interaction logs with additional columns
    :rtype: pandas.DataFrame
    """
    usage_log = usage_log.rename(columns={QUESTION_TEXT: QUESTION, TOP_ANSWER_TEXT: ANSWER})
    augmented = pandas.merge(usage_log, judgments, on=(QUESTION, ANSWER), how="left")
    n = len(usage_log[[QUESTION, ANSWER]].drop_duplicates())
    if n:
        m = len(judgments)
        logger.info("%d unique question/answer pairs, %d judgments (%0.3f%%)" % (n, m, 100.0 * m / n))
    return augmented.rename(columns={QUESTION: QUESTION_TEXT, ANSWER: TOP_ANSWER_TEXT})
コード例 #32
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def filter_corpus(corpus, max_size):
    """
    Remove corpus entries above a specified size.

    :param corpus: corpus with Answer Id and Answer columns
    :type corpus: pandas.DataFrame
    :param max_size: maximum allowed Answer size in characters
    :type max_size: int
    :return: corpus with oversize answers removed
    :rtype: pandas.DataFrame
    """
    if max_size is not None:
        filtered = corpus[corpus[ANSWER].str.len() <= max_size]
        n = len(corpus)
        if n:
            m = n - len(filtered)
            logger.info("Filtered %d of %d answers over size %d (%0.3f%%)" % (m, n, max_size, 100.0 * m / n))
        corpus = filtered
    return corpus.set_index(ANSWER_ID)
コード例 #33
0
ファイル: answer.py プロジェクト: stefanvds/themis
def get_answers_from_usage_log(questions, qa_pairs_from_logs):
    """
    Get answers returned by WEA to questions by looking them up in the usage log.

    Each question in the Q&A pairs must have a unique answer.

    :param questions: questions to look up in the usage logs
    :type questions: pandas.DataFrame
    :param qa_pairs_from_logs: question/answer pairs extracted from user logs
    :type qa_pairs_from_logs: pandas.DataFrame
    :return: Question, Answer, and Confidence
    :rtype: pandas.DataFrame
    """
    answers = pandas.merge(questions, qa_pairs_from_logs, on=QUESTION, how="left")
    missing_answers = answers[answers[ANSWER].isnull()]
    if len(missing_answers):
        logger.warning("%d questions without answers" % len(missing_answers))
    logger.info("Answered %d questions" % len(answers))
    answers = answers[[QUESTION, ANSWER, CONFIDENCE]].sort_values([QUESTION, CONFIDENCE], ascending=[True, False])
    return answers.set_index(QUESTION)
コード例 #34
0
ファイル: main.py プロジェクト: ManaliChanchlani/themis
def collate_handler(parser, args):
    labeled_qa_pairs = answer_labels(parser, args)
    judgments = pandas.concat(args.judgments)
    all_systems = []
    for label, qa_pairs in labeled_qa_pairs:
        # Only consider the questions listed in the frequency file.
        qa_pairs = qa_pairs[qa_pairs[QUESTION].isin(args.frequency[QUESTION])]
        collated = add_judgments_and_frequencies_to_qa_pairs(qa_pairs, judgments, args.frequency, args.remove_newlines)
        collated[SYSTEM] = label
        all_systems.append(collated)
    collated = pandas.concat(all_systems)
    logger.info("%d question/answer pairs" % len(collated))
    n = len(collated)
    for column, s in [(ANSWER, "answers"), (IN_PURVIEW, "in purview judgments"), (CORRECT, "correctness judgments")]:
        m = sum(collated[column].isnull())
        if m:
            logger.warning("%d question/answer pairs out of %d missing %s (%0.3f%%)" % (m, n, s, 100.0 * m / n))
    # This will print a warning if any in-purview judgments are not unanimous for a given question.
    in_purview_disagreement(collated)
    print_csv(CollatedFileType.output_format(collated))
コード例 #35
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def filter_corpus(corpus, max_size):
    """
    Remove corpus entries above a specified size.

    :param corpus: corpus with Answer Id and Answer columns
    :type corpus: pandas.DataFrame
    :param max_size: maximum allowed Answer size in characters
    :type max_size: int
    :return: corpus with oversize answers removed
    :rtype: pandas.DataFrame
    """
    if max_size is not None:
        filtered = corpus[corpus[ANSWER].str.len() <= max_size]
        n = len(corpus)
        if n:
            m = n - len(filtered)
            logger.info("Filtered %d of %d answers over size %d (%0.3f%%)" %
                        (m, n, max_size, 100.0 * m / n))
        corpus = filtered
    return corpus.set_index(ANSWER_ID)
コード例 #36
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def filter_usage_log_by_date(usage_log, before, after):
    """
    Only retain questions that were asked within a specified time window.

    :param usage_log: QuestionsData.csv report log
    :type usage_log: pandas.DataFrame
    :param before: only use questions from before this date
    :type before: pandas.datetime
    :param after: only use questions from after this date
    :type after: pandas.datetime
    :return: usage log with questions in the specified time span
    :rtype: pandas.DataFrame
    """
    n = len(usage_log)
    if after is not None:
        usage_log = usage_log[usage_log[DATE_TIME] >= after]
    if before is not None:
        usage_log = usage_log[usage_log[DATE_TIME] <= before]
    if n:
        logger.info("Filtered %d questions by date" % (n - len(usage_log)))
    return usage_log
コード例 #37
0
ファイル: fixup.py プロジェクト: cognitive-catalyst/themis
def filter_usage_log_by_date(usage_log, before, after):
    """
    Only retain questions that were asked within a specified time window.

    :param usage_log: QuestionsData.csv report log
    :type usage_log: pandas.DataFrame
    :param before: only use questions from before this date
    :type before: pandas.datetime
    :param after: only use questions from after this date
    :type after: pandas.datetime
    :return: usage log with questions in the specified time span
    :rtype: pandas.DataFrame
    """
    n = len(usage_log)
    if after is not None:
        usage_log = usage_log[usage_log[DATE_TIME] >= after]
    if before is not None:
        usage_log = usage_log[usage_log[DATE_TIME] <= before]
    if n:
        logger.info("Filtered %d questions by date" % (n - len(usage_log)))
    return usage_log
コード例 #38
0
ファイル: xmgr.py プロジェクト: DharmendraVaghela/themis
def download_truth_from_xmgr(xmgr, output_directory):
    """
    Download truth from an XMGR project.

    Truth is a mapping of sets of questions to answer documents. Truth is used to train the WEA model and may be used
    to train an NLC model.

    This function creates two files in the output directory: a raw truth.json that contains all the information
    downloaded from XMGR and a filtered truth.csv file.

    :param xmgr: connection to an XMGR project REST API
    :type xmgr: XmgrProject
    :param output_directory: directory in which to create truth.json and truth.csv
    :type output_directory: str
    """
    ensure_directory_exists(output_directory)
    truth_json = os.path.join(output_directory, "truth.json")
    truth_csv = os.path.join(output_directory, "truth.csv")
    if os.path.isfile(truth_json) and os.path.isfile(truth_csv):
        logger.info("Truth already downloaded")
        return
    if not os.path.isfile(truth_json):
        logger.info("Get questions from %s" % xmgr)
        mapped_questions = [question for question in xmgr.get_questions() if not question["state"] == "REJECTED"]
        with open(truth_json, "w") as f:
            json.dump(mapped_questions, f, indent=2)
    else:
        with open(truth_json) as f:
            mapped_questions = json.load(f)
    logger.info("Build truth from questions")
    truth = get_truth_from_mapped_questions(mapped_questions)
    to_csv(truth_csv, TruthFileType.output_format(truth))
コード例 #39
0
ファイル: answer.py プロジェクト: stefanvds/themis
def answer_questions(system, questions, output_filename, checkpoint_frequency):
    """
    Use a Q&A system to provide answers to a test set of questions

    :param system: Q&A system
    :type system: object that exports an ask method
    :param questions: questions to ask
    :type questions: set
    :param output_filename: name of file to which write questions, answers, and confidences
    :type output_filename: str
    :param checkpoint_frequency: how often to write intermediary results to the output file
    :type checkpoint_frequency: int
    """
    logger.info("Get answers to %d questions from %s" % (len(questions), system))
    answers = DataFrameCheckpoint(output_filename, [QUESTION, ANSWER, CONFIDENCE], checkpoint_frequency)
    try:
        if answers.recovered:
            logger.info("Recovered %d answers from %s" % (len(answers.recovered), output_filename))
        questions = sorted(questions - answers.recovered)
        n = len(answers.recovered) + len(questions)
        for i, question in enumerate(questions, len(answers.recovered) + 1):
            if i is 1 or i == n or i % checkpoint_frequency is 0:
                logger.info(percent_complete_message("Question", i, n))
            # NLC and Solr cannot handle newlines in questions.
            answer, confidence = system.ask(question.replace("\n", " "))
            logger.debug("%s\t%s\t%s" % (question, answer, confidence))
            answers.write(question, answer, confidence)
    finally:
        answers.close()
コード例 #40
0
def retry(function, times):
    """
    Retry a function call that may fail a specified number of times.

    This attempts to call the function a specified number of times. If the function throws an exception, sleep for a
    minute and try again until we have made the specified number of attempts.

    If None is passed for the number of times, just try once and throw any exception that occurs.

    :param function: a function to be called
    :type function: function
    :param times: the number of times to call the function before giving up
    :type times: int
    """
    if times is None:
        function()
    else:
        assert times > 0
        try:
            function()
        except Exception as e:
            logger.info("Error %s" % e)
            times -= 1
            if times:
                logger.info("Retry %d more times" % times)
                time.sleep(60)
                retry(function, times)
            else:
                logger.info("Done retrying")
コード例 #41
0
ファイル: xmgr.py プロジェクト: stefanvds/themis
def download_truth_from_xmgr(xmgr, output_directory):
    """
    Download truth from an XMGR project.

    Truth is a mapping of sets of questions to answer documents. Truth is used to train the WEA model and may be used
    to train an NLC model.

    This function creates two files in the output directory: a raw truth.json that contains all the information
    downloaded from XMGR and a filtered truth.csv file.

    :param xmgr: connection to an XMGR project REST API
    :type xmgr: XmgrProject
    :param output_directory: directory in which to create truth.json and truth.csv
    :type output_directory: str
    """
    ensure_directory_exists(output_directory)
    truth_json = os.path.join(output_directory, "truth.json")
    truth_csv = os.path.join(output_directory, "truth.csv")
    if os.path.isfile(truth_json) and os.path.isfile(truth_csv):
        logger.info("Truth already downloaded")
        return
    if not os.path.isfile(truth_json):
        logger.info("Get questions from %s" % xmgr)
        mapped_questions = [
            question for question in xmgr.get_questions()
            if not question["state"] == "REJECTED"
        ]
        with open(truth_json, "w") as f:
            json.dump(mapped_questions, f, indent=2)
    else:
        with open(truth_json) as f:
            mapped_questions = json.load(f)
    logger.info("Build truth from questions")
    truth = get_truth_from_mapped_questions(mapped_questions)
    to_csv(truth_csv, TruthFileType.output_format(truth))
コード例 #42
0
def retry(function, times):
    """
    Retry a function call that may fail a specified number of times.

    This attempts to call the function a specified number of times. If the function throws an exception, sleep for a
    minute and try again until we have made the specified number of attempts.

    If None is passed for the number of times, just try once and throw any exception that occurs.

    :param function: a function to be called
    :type function: function
    :param times: the number of times to call the function before giving up
    :type times: int
    """
    if times is None:
        function()
    else:
        assert times > 0
        try:
            function()
        except Exception as e:
            logger.info("Error %s" % e)
            times -= 1
            if times:
                logger.info("Retry %d more times" % times)
                time.sleep(60)
                retry(function, times)
            else:
                logger.info("Done retrying")
コード例 #43
0
ファイル: analyze.py プロジェクト: louisroehrs/themis
def filter_judged_answers(systems_data, correct, system_names):
    """
    Filter out just the correct or incorrect in-purview answers.

    :param systems_data: questions, answers, and judgments across systems
    :type systems_data: list of pandas.DataFrame
    :param correct: filter correct or incorrect answers?
    :type correct: bool
    :param system_names: systems to filter to, if None show all systems
    :type system_names: list of str
    :return: set of in-purview questions with answers judged either correct or incorrect
    :rtype: pandas.DataFrame
    """
    systems_data = pandas.concat(systems_data).dropna()
    if system_names is not None:
        systems_data = systems_data[systems_data[SYSTEM].isin(system_names)]
    filtered = systems_data[(systems_data[IN_PURVIEW] == True) & (systems_data[CORRECT] == correct)]
    n = len(systems_data)
    m = len(filtered)
    logger.info("%d in-purview %s answers out of %d (%0.3f%%)" %
                (m, {True: "correct", False: "incorrect"}[correct], n, 100 * m / n))
    return filtered
コード例 #44
0
ファイル: question.py プロジェクト: cognitive-catalyst/themis
def extract_question_answer_pairs_from_usage_logs(usage_log):
    """
    Extract questions and answers from usage logs, adding question frequency information.

    We are assuming here that a given question always elicits the same answer. Print a warning if this is not the case
    and drop answers to make the answers unique. It is arbitrary which answer is dropped.

    :param usage_log: QuestionsData.csv usage log
    :type usage_log: pandas.DatFrame
    :return: Q&A pairs with question frequency information
    :rtype: pandas.DatFrame
    """
    frequency = question_frequency(usage_log)
    qa_pairs = usage_log.drop_duplicates(subset=(QUESTION, ANSWER))
    m = sum(qa_pairs.duplicated(QUESTION))
    if m:
        n = len(frequency)
        logger.warning("%d questions of %d have multiple answers (%0.3f%%), only keeping one answer per question" %
                       (m, n, 100.0 * m / n))
        qa_pairs = qa_pairs.drop_duplicates(QUESTION)
    qa_pairs = pandas.merge(qa_pairs, frequency, on=QUESTION)
    logger.info("%d question/answer pairs" % len(qa_pairs))
    return qa_pairs
コード例 #45
0
ファイル: xmgr.py プロジェクト: stefanvds/themis
def get_truth_from_mapped_questions(mapped_questions):
    def get_pau_mapping(question):
        if "predefinedAnswerUnit" in question:
            return question["predefinedAnswerUnit"]
        elif "mappedQuestion" in question:
            question_id = question["mappedQuestion"]["id"]
            try:
                mapped_question = questions[question_id]
            except KeyError:
                logger.warning(
                    "Question %s mapped to non-existent question %s" %
                    (question["id"], question_id))
                return None
            return get_pau_mapping(mapped_question)
        else:
            return None

    unmapped = 0
    # Index the questions by their question id so that mapped questions can be looked up.
    questions = dict([(question["id"], question)
                      for question in mapped_questions])
    for question in questions.values():
        question[ANSWER_ID] = get_pau_mapping(question)
        if question[ANSWER_ID] is None:
            unmapped += 1
    questions = [q for q in questions.values() if q[ANSWER_ID] is not None]
    question_ids = [q["id"] for q in questions]
    question_text = [q["text"] for q in questions]
    answer_id = [q[ANSWER_ID] for q in questions]
    truth = pandas.DataFrame.from_dict({
        QUESTION_ID: question_ids,
        QUESTION: question_text,
        ANSWER_ID: answer_id
    })
    logger.info("%d mapped, %d unmapped" % (len(truth), unmapped))
    return truth
コード例 #46
0
def extract_question_answer_pairs_from_usage_logs(usage_log):
    """
    Extract questions and answers from usage logs, adding question frequency information.

    We are assuming here that a given question always elicits the same answer. Print a warning if this is not the case
    and drop answers to make the answers unique. It is arbitrary which answer is dropped.

    :param usage_log: QuestionsData.csv usage log
    :type usage_log: pandas.DatFrame
    :return: Q&A pairs with question frequency information
    :rtype: pandas.DatFrame
    """
    frequency = question_frequency(usage_log)
    qa_pairs = usage_log.drop_duplicates(subset=(QUESTION, ANSWER))
    m = sum(qa_pairs.duplicated(QUESTION))
    if m:
        n = len(frequency)
        logger.warning(
            "%d questions of %d have multiple answers (%0.3f%%), only keeping one answer per question"
            % (m, n, 100.0 * m / n))
        qa_pairs = qa_pairs.drop_duplicates(QUESTION)
    qa_pairs = pandas.merge(qa_pairs, frequency, on=QUESTION)
    logger.info("%d question/answer pairs" % len(qa_pairs))
    return qa_pairs
コード例 #47
0
ファイル: xmgr.py プロジェクト: DharmendraVaghela/themis
def augment_corpus_answers(corpus, qa_pairs):
    """
    Create a set of answers culled from both the corpus and the usage logs.

    These answers can be used to populate a Solr database.

    You would expect all the answers returned by the system to be in the corpus, but this is not the case. The
    'themis xmgr validate-answers' command shows which answers are missing from the corpus.

    :param corpus: answer corpus
    :type corpus: pandas.DataFrame
    :param qa_pairs: question answer pairs from usage logs
    :type qa_pairs: pandas.DataFrame
    :return: comprehensive set of answers
    :rtype: pandas.DataFrame
    """
    qa_pairs = qa_pairs.drop([QUESTION, CONFIDENCE, USER_EXPERIENCE, FREQUENCY, DATE_TIME], axis="columns")
    # The corpus may contain multiple PAUs with the same text but different titles.
    answer_set = pandas.merge(corpus, qa_pairs, on=ANSWER, how="outer").drop_duplicates([ANSWER, TITLE])
    n = len(answer_set)
    m = n - len(corpus)
    if m:
        logger.info("Added %d unique answers (%0.3f%%)" % (m, 100.0 * m / n))
    return answer_set
コード例 #48
0
ファイル: xmgr.py プロジェクト: stefanvds/themis
def augment_corpus_truth(xmgr, corpus, truth, checkpoint_frequency):
    """
    Find answer IDs referenced in the truth file that are missing from the corpus, download them from XMGR, then add
    them to the corpus.

    Intermediary results are periodically written to an augment.temp.csv file in the current directory so that
    downloading can resume from where it left off if it fails in the middle. The augment.temp.csv file is deleted upon
    completion of downloading.

    :param xmgr: connection to an XMGR project REST API
    :type xmgr: XmgrProject
    :param corpus: answer corpus
    :type corpus: pandas.DataFrame
    :param truth:  truth downloaded from xmgr
    :type truth: pandas.DataFrame
    :checkpoint_frequency: how often to write intermediate results to a checkpoint file
    :type checkpoint_frequency: int
    :return: augmented answer corpus
    :rtype: pandas.DataFrame
    """
    def get_pau(pau_id):
        paus = xmgr.get_paus(pau_id)
        if paus:
            pau = paus[0]
            return {
                ANSWER: pau["responseMarkup"],
                TITLE: pau["title"],
                FILENAME: pau["sourceName"]
            }
        else:
            logger.info("Could not download pau %s" % pau_id)
            return None

    n = len(corpus)
    missing_pau_ids = truth[missing_truth_in_corpus(
        corpus, truth)][ANSWER_ID].drop_duplicates()
    l = len(missing_pau_ids)
    logger.info("%d answer IDs referenced in truth missing from corpus" % l)
    checkpoint = PauCheckpoint("augment.temp.csv", checkpoint_frequency)
    get_items("PAUs", missing_pau_ids, checkpoint, get_pau,
              checkpoint_frequency)
    new_corpus = from_csv(checkpoint.filename())
    new_corpus[DOCUMENT_ID] = os.path.basename(truth.filename)
    corpus = pandas.concat([corpus, new_corpus])
    # noinspection PyTypeChecker
    m = len(corpus) - n
    if m:
        logger.info("Added %d unique answers (%0.3f%%)" % (m, 100.0 * m / n))
    if checkpoint.invalid:
        logger.info("Failed to download %d PAU ids (%0.3f%%)" %
                    (checkpoint.invalid, 100.0 * checkpoint.invalid / l))
    os.remove(checkpoint.filename())
    return corpus
コード例 #49
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def nlc_router_train(url, username, password, oracle_out, path, all_correct):

    """
    NLC Training on the oracle experiment output to determine which system(NLC or Solr) should
    answer particular question.

    1. Splitting up the oracle experiment output data into 8 equal training records and testing records. This is to
    ensure 8-fold cross validation of the data-set. All training and Testing files will be stored
    at the "path"

     2. Perform NLC training on the all 8 training set simultaneously and returns list of classifier
     ids as json file in the working directory

    :param url: URL of NLC instance
    :param username: NLC Username
    :param password: NLC password
    :param oracle_out: file created by oracle experiment
    :param path: directory path to save intermediate results
    :param all_correct: optional boolean parameter to train with only correct QA pairs
    :return: list of classifier ids by NLC training
    """
    ensure_directory_exists(path)

    sys_name = oracle_out[SYSTEM][0]
    oracle_out[QUESTION] = oracle_out[QUESTION].str.replace("\n", " ")
    kfold_split(oracle_out, path, NLC_ROUTER_FOLDS, True)
    classifier_list = []
    list = []

    for x in range(0, NLC_ROUTER_FOLDS):
        train = pandas.read_csv(os.path.join(path, "Train{0}.csv".format(str(x))))
        if all_correct:
            logger.info("Training only on CORRECT examples.")
            # Ignore records from training which are not correct
            train = train[train[CORRECT]]
            train = train[train[IN_PURVIEW]]
        train = train[[QUESTION, ANSWERING_SYSTEM]]
        logger.info("Training set size = {0}".format(str(len(train))))
        with tempfile.TemporaryFile() as training_file:
            to_csv(training_file, train[[QUESTION, ANSWERING_SYSTEM]], header=False, index=False)
            training_file.seek(0)
            nlc = NaturalLanguageClassifier(url=url, username=username, password=password)
            classifier_id = nlc.create(training_data=training_file, name="{0}_fold_{1}".format(str(sys_name), str(x)))
            classifier_list.append(classifier_id["classifier_id"].encode("utf-8"))
            list.append({classifier_id["name"].encode("utf-8"): classifier_id["classifier_id"].encode("utf-8")})
            logger.info(pretty_print_json(classifier_id))
            pretty_print_json(classifier_id)

    with open(os.path.join(path, 'classifier.json'), 'wb') as f:
        json.dump(list, f)
    return classifier_list
コード例 #50
0
def annotation_assist_qa_input(answers, questions, judgments):
    """
    Create list of Q&A pairs for judgment by Annotation Assist.

    The Q&A pairs to be judged are compiled from sets of answers generated by Q&A systems. These may be filtered by an
    optional list of questions. Judgements may be taken from optional sets of previously judged Q&A pairs.

    :param answers: answers to questions as generated by Q&A systems
    :type answers: pandas.DataFrame
    :param questions: optional set of questions to filter on, if None use all answered questions
    :type questions: pandas.DataFrame
    :param judgments: optional judgments, look up a judgment here before sending the Q&A pair to Annotation Assist
    :type judgments: pandas.DataFrame
    :return: Q&A pairs to pass to Annotation Assist for judgment
    :rtype: pandas.DataFrame
    """
    qa_pairs = pandas.concat(answers)
    qa_pairs = qa_pairs.drop_duplicates([QUESTION, ANSWER])
    logger.info("%d Q&A pairs" % len(qa_pairs))
    if questions is not None:
        qa_pairs = pandas.merge(qa_pairs, questions)
        logger.info("%d Q&A pairs for %d unique questions" %
                    (len(qa_pairs), len(questions)))
    if judgments:
        judged_qa_pairs = pandas.concat(judgments)
        assert not any(judged_qa_pairs.duplicated()
                       ), "There are Q&A pairs with multiple judgements"
        qa_pairs = pandas.merge(qa_pairs,
                                judged_qa_pairs,
                                on=(QUESTION, ANSWER),
                                how="left")
        not_judged = qa_pairs[qa_pairs[CORRECT].isnull()]
        n = len(not_judged)
        logger.info("%d unjudged Q&A pairs (%0.3f%%)" %
                    (n, 100.0 * n / len(qa_pairs)))
    else:
        not_judged = qa_pairs
    not_judged = not_judged.rename(
        columns={
            QUESTION: QUESTION_TEXT_INPUT,
            ANSWER: TOP_ANSWER_TEXT_ANNOTATION_ASSIST,
            CONFIDENCE: TOP_ANSWER_CONFIDENCE
        })
    not_judged = not_judged[[
        QUESTION_TEXT_INPUT, TOP_ANSWER_TEXT_ANNOTATION_ASSIST,
        TOP_ANSWER_CONFIDENCE
    ]]
    return not_judged
コード例 #51
0
ファイル: analyze.py プロジェクト: stefanvds/themis
def kfold_split(df, outdir, _folds = 5):
    # Randomize the order of the input dataframe
    df = df.iloc[np.random.permutation(len(df))]
    df = df.reset_index(drop=True)
    foldSize = int(math.ceil(len(df) / float(_folds)))
    logger.info("Total records: " + str(len(df)))
    logger.info("Fold size: " + str(foldSize))

    for x in range(0, _folds):
        fold_low = x*foldSize
        fold_high = (x+1)*foldSize

        if fold_high >= len(df):
            fold_high = len(df)

        test_df = df.iloc[fold_low:fold_high]
        train_df = df.drop(df.index[fold_low:fold_high])

        test_df.to_csv(os.path.join(outdir, 'Test' + str(x) + '.csv'), encoding='utf-8', index=False)
        train_df.to_csv(os.path.join(outdir, 'Train' + str(x) + '.csv'), header=False, encoding='utf-8', index=False)

        logger.info("--- Train_Fold_" + str(x) + ' size = ' + str(len(train_df)))
        logger.info("--- Test_Fold_" + str(x) + ' size = ' + str(len(test_df)))
コード例 #52
0
ファイル: analyze.py プロジェクト: cognitive-catalyst/themis
def answer_router_questions(system, questions, output):

    """
    Get Answer from given system to the question asked and store it in the output file
    :param system: System NLC or Solr
    :param questions: Question set
    :param output: Output file
    :return:
    """
    logger.info("Get answers to %d questions from %s" % (len(questions), system))
    answers = DataFrameCheckpoint(output, [QUESTION, ANSWERING_SYSTEM])
    try:
        if answers.recovered:
            logger.info("Recovered %d answers from %s" % (len(answers.recovered), output))
        questions = sorted(questions - answers.recovered)
        n = len(answers.recovered) + len(questions)
        for i, question in enumerate(questions, len(answers.recovered) + 1):
            if i is 1 or i == n or i % 25 == 0:
                logger.info(percent_complete_message("Question", i, n))
            answer = system.query(question.replace("\n", " "))
            #logger.debug("%s\t%s" % (question, answer))
            answers.write(question, answer)
    finally:
        answers.close()
コード例 #53
0
 def log_correct(system_data, name):
     n = len(system_data)
     m = sum(system_data[CORRECT])
     logger.info("%d of %d correct in %s (%0.3f%%)" %
                 (m, n, name, 100.0 * m / n))