Ejemplos de Inputs.get_taxonomy en Python

Lenguaje de programación: Python

Namespace/Package Name: dice

Clase / Tipo: Inputs

Método / Función: get_taxonomy

Ejemplos en hotexamples.com: 2

Python Inputs.get_taxonomy - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de dice.Inputs.get_taxonomy extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Inputs(10)

get_kb(2)

get_taxonomy(2)

__init__(1)

get_assignment(1)

get_detective(1)

get_probability(1)

get_similarity_matrix(1)

load_detective(1)

load_kb(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: extension_capacity.py Proyecto: ychalier/dice

from dice import Inputs

if __name__ == "__main__":

    print("Kb extension computation")
    if len(sys.argv) != 2:
        print(__doc__)
        exit()
    inputs_folder = sys.argv[1]

    print("Loading...")
    inputs = Inputs(inputs_folder)
    print("\tLoading KB...")
    kb = inputs.get_kb()
    print("\tLoading taxonomy...")
    taxonomy = inputs.get_taxonomy()
    print("\tLoading probability...")
    probability = inputs.get_probability()

    print("Gathering properties...")
    properties = dict()
    for fact in tqdm(kb.values()):
        properties.setdefault(fact.subject, set())
        properties[fact.subject].add(probability.index_P[fact.property])

    print("Gathering scores...")
    scores = sparse.lil_matrix((len(probability.C), len(probability.P)))
    for subject, indices in tqdm(properties.items()):
        index_c = probability.index_C[subject]
        for index_p in indices:
            scores[index_c, index_p] = 1

Ejemplo n.º 2

Mostrar archivo

Archivo: modules.py Proyecto: ychalier/dice

def demo(argv):
    """demo
    arguments:  <inputs-folder> <partition-file> <max-facts-per-subjects> <clean-source> <save-folder>
    """
    inputs_folder, partition_file, maximum_facts_per_subject, clean_source, save_path = argv
    from dice import Inputs
    from dice.misc import Output
    from dice.constants import Dimensions
    from dice.reason import Variable
    from dice.evidence.cues import JointCue
    from dice.evidence.cues import NecessityCue
    from dice.evidence.cues import SufficiencyCue
    from dice.evidence.cues import ImplicationCue
    from dice.evidence.cues import EntailmentCue
    from dice.evidence.cues import ContradictionCue
    from dice.evidence.cues import EntropyCue
    from tqdm import tqdm
    import pandas as pd
    output = Output(save_path)
    inputs = Inputs(inputs_folder)
    print("Loading inputs...")
    kb = inputs.get_kb()
    taxonomy = inputs.get_taxonomy()
    detective = inputs.get_detective()
    assignment = inputs.get_assignment()
    similarity = inputs.get_similarity_matrix()
    data = list()
    selected_indices = set()
    subjects_representation = dict()
    print("Selecting indices...")
    for fact in tqdm(inputs.get_kb().values()):
        subjects_representation.setdefault(fact.subject, list())
        subjects_representation[fact.subject].append(fact.index)
    print("Thresholding number of facts per subject...")
    for subject, indices in tqdm(subjects_representation.items()):
        # if len(indices) > 20:
        selected_indices = selected_indices.union(
            indices[:int(maximum_facts_per_subject)])
    print("Gathering facts...")
    for fact in tqdm(inputs.get_kb().values()):
        if fact.index not in selected_indices:
            continue
        data.append({
            "index":
            fact.index,
            "source":
            clean_source,
            "subject":
            fact.subject,
            "property":
            fact.property,
            "score":
            fact.score,
            "evidence_plausible":
            detective[fact.index].plausible,
            "evidence_typical":
            detective[fact.index].typical,
            "evidence_remarkable":
            detective[fact.index].remarkable,
            "evidence_salient":
            detective[fact.index].salient,
            "cue_joint":
            detective.cues[JointCue][fact.index],
            "cue_necessity":
            detective.cues[NecessityCue][fact.index],
            "cue_sufficiency":
            detective.cues[SufficiencyCue][fact.index],
            "cue_implication":
            detective.cues[ImplicationCue][fact.index],
            "cue_entailment":
            detective.cues[EntailmentCue][fact.index],
            "cue_contradiction":
            detective.cues[ContradictionCue][fact.index],
            "cue_entropy":
            detective.cues[EntropyCue][fact.index],
            "plausible":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.PLAUSIBLE), 0),
            "typical":
            assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL),
                                      0),
            "remarkable":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.REMARKABLE), 0),
            "salient":
            assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT),
                                      0),
            "plausible_percentile":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.PLAUSIBLE), 0),
            "typical_percentile":
            assignment.confidence.get(Variable(fact.index, Dimensions.TYPICAL),
                                      0),
            "remarkable_percentile":
            assignment.confidence.get(
                Variable(fact.index, Dimensions.REMARKABLE), 0),
            "salient_percentile":
            assignment.confidence.get(Variable(fact.index, Dimensions.SALIENT),
                                      0),
        })
    df_facts = pd.DataFrame(data)
    del data
    n = df_facts.shape[0]
    print("Normalizing columns...")
    pbar = tqdm(total=20)
    for column in [
            "plausible_percentile", "typical_percentile",
            "remarkable_percentile", "salient_percentile",
            "evidence_plausible", "evidence_typical", "evidence_remarkable",
            "evidence_salient", "cue_joint", "cue_necessity",
            "cue_sufficiency", "cue_implication", "cue_implication",
            "cue_entailment", "cue_contradiction", "cue_entropy"
    ]:
        df_facts = df_facts.sort_values(by=column)
        df_facts[column] = [i / (n - 1) for i in range(n)]
        pbar.update(1)
    for column in ["plausible", "typical", "remarkable", "salient"]:
        values = list()
        a, b = df_facts[column].min(), df_facts[column].max()
        for index, row in df_facts.iterrows():
            values.append((row[column] - a) / (b - a))
        df_facts[column] = values
        pbar.update(1)
    pbar.close()
    print("Gathering partition...")
    data = list()
    with open(partition_file) as file:
        for line in tqdm(file.readlines()):
            count, *indices = list(map(int, line.strip().split("\t")))
            subjects = set([kb[j].subject for j in indices])
            properties_all = list(set([kb[j].property for j in indices]))
            local_indices = [similarity.index[p] for p in properties_all]
            local_matrix = similarity.matrix[local_indices][:, local_indices]
            for i in range(count):
                fact = kb[indices[i]]
                if indices[i] not in selected_indices:
                    continue
                property_index_self = similarity.index[fact.property]
                parents = list()
                children = list()
                siblings = list()
                if fact.subject in taxonomy.nodes:
                    parents = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.predecessors(fact.subject))
                    ]
                    children = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.successors(fact.subject))
                    ]
                    siblings = [
                        "{neighbor}:{weight}".format(
                            neighbor=neighbor,
                            weight=taxonomy.weight(fact.subject, neighbor),
                        ) for neighbor in subjects.intersection(
                            taxonomy.siblings(fact.subject))
                    ]
                properties = list()
                for j, k in zip(*local_matrix.nonzero()):
                    if local_indices[j] != property_index_self:
                        continue
                    properties.append(properties_all[k] + ":" +
                                      str(local_matrix[j, k]))
                data.append({
                    "index": indices[i],
                    "parents": ";".join(parents),
                    "children": ";".join(children),
                    "siblings": ";".join(siblings),
                    "properties": ";".join(properties),
                })
    df_partition = pd.DataFrame(data)
    df = df_facts.set_index("index").join(df_partition.set_index("index"),
                                          on="index",
                                          how="outer")
    df.to_csv(output.path("demo.csv"), index=False)