def doi_iri(doi, title=None, statements={}):
    """
    Function to create relevant statements about a DOI.

    Parameters
    ----------
    doi: string
        Digital Object Identifier

    title: string, optional
        title of digital object

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Example
    -------
    >>> print([k for k in doi_iri(
    ...     "10.1109/IEEESTD.2015.7084073",
    ...     "1872-2015 - IEEE Standard Ontologies for Robotics and Automation"
    ... )][0])
    <https://dx.doi.org/10.1109/IEEESTD.2015.7084073>
    """
    local_iri = check_iri(
        'https://dx.doi.org/{0}'.format(
            doi
        )
    )
    doi = '"""{0}"""^^rdfs:Literal'.format(doi)
    for pred in [
        ("datacite:usesIdentifierScheme", "datacite:doi"),
        ("datacite:hasIdentifier", doi)
    ]:
        statements = add_if(
            local_iri,
            pred[0],
            pred[1],
            statements
        )
    return (
        add_if(
            local_iri,
            "rdfs:label",
            language_string(
                title
            ),
            statements
        ) if title else statements
    )
Ejemplo n.º 2
0
def audience_statements(statements={}):
    """
    Function to generate PeopleAudience subClasses.

    Parameter
    ---------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Example
    -------
    >>> print(
    ...     audience_statements()["mhdb:MaleAudience"]["rdfs:subClassOf"]
    ... )
    {'schema:PeopleAudience'}
    """
    for gendered_audience in {"Male", "Female"}:
        gendered_iri = check_iri("".join([gendered_audience, "Audience"]))
        schema_gender = "schema:{0}".format(gendered_audience)
        g_statements = {
            "rdfs:subClassOf": {"schema:PeopleAudience"},
            "rdfs:label":
            {language_string(" ".join([gendered_audience, "Audience"]))},
            "schema:requiredGender": {schema_gender}
        }
        if gendered_iri not in statements:
            statements[gendered_iri] = g_statements
        else:
            statements[gendered_iri] = {
                **statements[gendered_iri],
                **g_statements
            }
    return (statements)
    def type_pred(row, prefixes):
        """
        Function to create and return a tuple of
        Turtle property and Turtle object for a
        given label

        Parameters
        ----------
        row : Series
            row from structure_to_keep
            pandas series from generator
            ie, row[1] for row in iterrows()

        prefixes : iterable of 2-tuples
            (prefix_string: string
            prefix_iri: string)
            defined RDF prefixes

        Returns
        -------
        predicate : 2-tuple
            predicate[0]: string
                Turtle property

            predicate[1]: string
                Turtle object
        """
        prop = "rdfs:subClassOf" if row[
                                        "Class, Property or Instance"
                                    ] == "Class" else "rdfs:subPropertyOf" if row[
                                                                                  "Class, Property or Instance"
                                                                              ] == "Property" else "rdfs:type"
        predicate = tuple(
            (
                prop,
                check_iri(
                    row["Type"],
                    prefixes
                )
            )
        ) if row["Type"] else None
        return (predicate)
Ejemplo n.º 4
0
def Project(technology_xls, mentalhealth_xls=None, statements={}):
    '''
    Function to ingest 1cuJXT1Un7HPLYcDyHAXprH-wGS1azuUNmVQnb3dV1cY Project

    Parameters
    ----------
    sheet: spreadsheet workbook
        1cuJXT1Un7HPLYcDyHAXprH-wGS1azuUNmVQnb3dV1cY

    mentalhealth_xls: spreadsheet workbook, optional
        1MfW9yDw7e8MLlWWSBBXQAC2Q4SDiFiMMb7mRtr7y97Q

    statements:  dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Example
    -------
    # TODO
    '''
    for subject in ["schema:Book", "schema:Article"]:
        statements = add_if(subject, "rdfs:subClassOf", "mhdb:BookOrArticle",
                            statements)

    for pred in [("rdfs:subClassOf", "schema:CreativeWork"),
                 ("rdfs:subClassOf", "dcterms:BibliographicResource"),
                 ("rdfs:label", language_string("Book / Article"))]:
        statements = add_if("mhdb:BookOrArticle", pred[0], pred[1], statements)

    for pred in [("rdfs:subClassOf", "schema:CreativeWork"),
                 ("rdfs:subClassOf", "schema:MedicalTest"),
                 ("rdfs:label", language_string("Assessment"))]:
        statements = add_if("mhdb:Assessment", pred[0], pred[1], statements)

    for pred in [("rdfs:subClassOf", "schema:CreativeWork"),
                 ("rdfs:subClassOf", "dcterms:InteractiveResource"),
                 ("rdfs:label", language_string("Virtual Reality"))]:
        statements = add_if("mhdb:VirtualReality", pred[0], pred[1],
                            statements)

    for pred in [("rdfs:subClassOf", "schema:CreativeWork"),
                 ("rdfs:subClassOf", "dcterms:InteractiveResource"),
                 ("rdfs:label", language_string("Augmented Reality"))]:
        statements = add_if("mhdb:AugmentedReality", pred[0], pred[1],
                            statements)

    for pred in [("rdfs:subClassOf", "schema:Book"),
                 ("rdfs:label", language_string("Resource Guide"))]:
        statements = add_if("mhdb:ResourceGuide", pred[0], pred[1], statements)

    for pred in [("rdfs:subClassOf", "schema:Service"),
                 ("rdfs:subClassOf", "schema:OrganizeAction"),
                 ("rdfs:label", language_string("Community Initiative"))]:
        statements = add_if("mhdb:CommunityInitiative", pred[0], pred[1],
                            statements)

    for pred in [
        ("rdfs:subClassOf", "ssn:Device"),
        ("rdfs:comment",
         language_string(
             "A smart electronic device (electronic device with "
             "micro-controller(s)) that can be worn on the body as implants or "
             "accessories.")),
        ("rdfs:isDefinedBy",
         check_iri("https://en.wikipedia.org/wiki/Wearable_technology")),
        ("rdfs:label", language_string("Wearable"))
    ]:
        statements = add_if("mhdb:Wearable", pred[0], pred[1], statements)

        for pred in [("rdfs:subClassOf", "ssn:Device"),
                     ("rdfs:label", language_string("Tablet"))]:
            statements = add_if("mhdb:Tablet", pred[0], pred[1], statements)

    for pred in [("rdfs:subClassOf", "schema:Game"),
                 ("owl:disjointWith", "schema:VideoGame"),
                 ("rdfs:label", language_string("Non-Digital Game"))]:
        statements = add_if("mhdb:NonDigitalGame", pred[0], pred[1],
                            statements)

    statements = {
        **doi_iri(
            "10.1109/IEEESTD.2015.7084073", "1872-2015 - IEEE Standard Ontologies for Robotics and Automation"),
        **statements
    }

    for pred in [
        ("rdfs:subClassOf", "dcterms:Agent"),
        ("rdfs:subClassOf", "ssn:Device"),
        ("dcterms:source",
         check_iri('https://dx.doi.org/10.1109/IEEESTD.2015.7084073')),
        ("rdfs:label", language_string("Robot")),
        ("rdfs:comment",
         language_string(
             "An agentive device (Agent and Device in SUMO) in a broad "
             "sense, purposed to act in the physical world in order to "
             "accomplish one or more tasks. In some cases, the actions of a "
             "robot might be subordinated to actions of other agents (Agent "
             "in SUMO), such as software agents (bots) or humans. A robot "
             "is composed of suitable mechanical and electronic parts. "
             "Robots might form social groups, where they interact to "
             "achieve a common goal. A robot (or a group of robots) can "
             "form robotic systems together with special environments "
             "geared to facilitate their work."))
    ]:
        statements = add_if("mhdb:Robot", pred[0], pred[1], statements)

    for pred in [
        ("rdfs:subClassOf", "schema:CreativeWork"),
        ("dcterms:source",
         check_iri("http://afirm.fpg.unc.edu/social-narratives")),
        ("rdfs:isDefinedBy",
         check_iri("http://afirm.fpg.unc.edu/social-narratives")),
        ("rdfs:comment",
         language_string(
             "Social narratives (SN) describe social situations for "
             "learners by providing relevant cues, explanation of the "
             "feelings and thoughts of others, and descriptions of "
             "appropriate behavior expectations.")),
        ("rdfs:label", language_string("Social Narrative"))
    ]:
        statements = add_if("mhdb:SocialNarrative", pred[0], pred[1],
                            statements)

    for pred in [("rdfs:label", language_string("Ann M. Sam")),
                 ("foaf:name", language_string("Ann M. Sam")),
                 ("foaf:familyName", language_string("Sam")),
                 ("foaf:givenName", language_string("Ann")),
                 ("rdfs:type", "foaf:Person"),
                 ("rdfs:site",
                  "mhdb:University_of_North_Carolina_at_Chapel_Hill")]:
        statements = add_if(check_iri("http://fpg.unc.edu/profiles/ann-m-sam"),
                            pred[0], pred[1], statements)

    for pred in [("rdfs:label", language_string("AFIRM Team")),
                 ("foaf:name", language_string("AFIRM Team")),
                 ("rdfs:type", "foaf:Organization"),
                 ("rdfs:site",
                  "mhdb:University_of_North_Carolina_at_Chapel_Hill")]:
        statements = add_if(check_iri("AFIRM Team"), pred[0], pred[1],
                            statements)

    for contributor in [
            check_iri("http://fpg.unc.edu/profiles/ann-m-sam"),
            check_iri("AFIRM Team")
    ]:
        statements = add_if(
            check_iri("http://afirm.fpg.unc.edu/social-narratives"),
            "dcterms:contributor", contributor, statements)

    for pred in [
        ("rdfs:subClassOf", "mhdb:SocialNarrative"),
        ("rdfs:subClassOf", "schema:Game"),
        ("rdfs:label",
         language_string("Combination of a Social Narrative and Gaming System")
         )
    ]:
        statements = add_if("mhdb:SocialNarrativeGamingSystem", pred[0],
                            pred[1], statements)

    for pred in [("rdfs:subClassOf", "sio:SIO_001066"),
                 ("schema:participant", "schema:ParentAudience")]:
        statements = add_if("mhdb:StudyWithParents", pred[0], pred[1],
                            statements)

    for pred in [("rdfs:label", language_string("Competition")),
                 ("rdfs:subClassOf", "schema:Event")]:
        statements = add_if("mhdb:Competition", pred[0], pred[1], statements)

    for pred in [("rdfs:label", language_string("Science Contest")),
                 ("rdfs:subClassOf", "mhdb:Competition")]:
        statements = add_if("mhdb:ScienceContest", pred[0], pred[1],
                            statements)

    for pred in [("rdfs:label", language_string("Massive Open Online Course")),
                 ("rdfs:subClassOf", "schema:Course")]:
        statements = add_if("mhdb:MOOC", pred[0], pred[1], statements)

    #TODO: define Toy, StudentProject, Hackathon, OutreachProgram, SupportGroup

    project = technology_xls.parse("Project", convert_float=False)
    homepage = technology_xls.parse("HomePageLink")
    type_of_project = technology_xls.parse("TypeOfProject")
    mhealthpeople = technology_xls.parse("MHealthPeople")
    research_study = technology_xls.parse("ResearchStudyOnProject")

    for row in project.iterrows():
        if isinstance(row[1]["project"], float) and np.isnan(
                row[1]["project"]):
            continue
        project_iri = check_iri(row[1]["project"])
        project_label = language_string(row[1]["project"])

        disorder_iris = [
            int(disorder_index.strip())
            for disorder_index in row[1]["disorder_index"].split(",")
        ] if ((isinstance(row[1]["disorder_index"], str)) and
              ("," in row[1]["disorder_index"])
              ) else [int(row[1]["disorder_index"])] if (
                  not isinstance(row[1]["disorder_index"], float) or
                  (not np.isnan(row[1]["disorder_index"]))) else None

        homepage_iris = object_split_lookup(row[1]["HomePageLink_index"],
                                            homepage, "index", "HomePageLink",
                                            ",")

        type_of_project_iris = object_split_lookup(
            row[1]["TypeOfProject_index"], type_of_project, "index", "IRI",
            ",")

        mhealthpeople_iris = object_split_lookup(row[1]["MHealthPeople_index"],
                                                 mhealthpeople, "index", "URL",
                                                 ",")

        study_iris = object_split_lookup(
            row[1]["ResearchStudyOnProjectLink_index"], research_study,
            "index", "ResearchStudyOnProjectLink", ",")
        disorder_statements = {}
        if disorder_iris and len(disorder_iris):
            for disorder in disorder_iris:
                disorder_statements = disorder_iri(
                    disorder,
                    mentalhealth_xls=mentalhealth_xls,
                    pre_specifiers_indices=[6, 7, 24, 25, 26],
                    post_specifiers_indices=[27, 28, 56, 78])
                statements = add_if(project_iri, "dcterms:subject",
                                    [k for k in disorder_statements][0], {
                                        **statements,
                                        **disorder_statements
                                    })

        if homepage_iris and len(homepage_iris):
            for homepage_iri in homepage_iris:
                for prop in [("schema:about", project_iri),
                             ("rdf:type", "schema:WebPage")]:
                    statements = add_if(homepage_iri, prop[0], prop[1],
                                        statements)

        if type_of_project_iris and len(type_of_project_iris):
            for type_of_project_iri in type_of_project_iris:
                statements = add_if(project_iri, "rdf:type",
                                    type_of_project_iri, statements)

        if mhealthpeople_iris and len(mhealthpeople_iris):
            for mhealthpeople_iri in mhealthpeople_iris:
                for prop in [("dcterms:contributor", mhealthpeople_iri)]:
                    statements = add_if(project_iri, prop[0], prop[1],
                                        statements)

        if study_iris and len(study_iris):
            for study_iri in study_iris:
                for prop in [("schema:about", project_iri),
                             ("rdf:type", "schema:ScholarlyArticle")]:
                    statements = add_if(study_iri, prop[0], prop[1],
                                        statements)

        for prop in [("rdfs:label", project_label),
                     ("rdfs:subClassOf", "schema:Product")]:
            statements = add_if(project_iri, prop[0], prop[1], statements)

    return (statements)
Ejemplo n.º 5
0
def object_split_lookup(object_indices,
                        lookup_sheet,
                        lookup_key_column,
                        lookup_value_column,
                        separator=","):
    """
    Function to lookup values from comma-separated key columns.

    Parameters
    ----------
    object_indices: string
        maybe-separated string of foreign keys

    lookup_sheet: DataFrame
        foreign table

    lookup_key_column: string
        foreign table key column header

    lookup_value_column: string
        foreign table value column header

    separator: string
        default=","

    Returns
    -------
    object_iris: list of strings
        list of Turtle-formatted IRIs or empty list if none

    Example
    -------
    >>> import pandas as pd
    >>> sheet = pd.DataFrame({
    ...     "index": list(range(3)),
    ...     "bird": [":duck", ":goose", ":swan"]
    ... })
    >>> print(object_split_lookup(
    ...     object_indices="0/2",
    ...     lookup_sheet=sheet,
    ...     lookup_key_column="index",
    ...     lookup_value_column="bird",
    ...     separator="/"
    ... ))
    [':duck', ':swan']
    """
    try:
        if not isinstance(object_indices, float) and len(
                str(object_indices).strip()):
            object_indices = str(object_indices)
            if separator not in object_indices:
                object_iris = [
                    check_iri(
                        lookup_sheet[lookup_sheet[lookup_key_column] == int(
                            object_indices)][lookup_value_column].values[0])
                ] if lookup_sheet[lookup_sheet[lookup_key_column] == int(
                    object_indices)][lookup_value_column].values.size else None
            else:
                object_iris = [
                    int(s.strip()) for s in object_indices.split(separator)
                ]
                object_iris = [
                    check_iri(
                        lookup_sheet[lookup_sheet[lookup_key_column] ==
                                     object_i][lookup_value_column].values[0])
                    for object_i in object_iris
                ]
            return (object_iris)
        else:
            return ([])
    except:
        print(str(lookup_value_column))
        print(str(object_indices))
        return ([])
Ejemplo n.º 6
0
def MHealthPeople(technology_xls, statements={}):
    '''
    Function to ingest 1cuJXT1Un7HPLYcDyHAXprH-wGS1azuUNmVQnb3dV1cY
    MHealthPeople

    Parameters
    ----------
    sheet: spreadsheet workbook
        1cuJXT1Un7HPLYcDyHAXprH-wGS1azuUNmVQnb3dV1cY

    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Example
    -------
    # TODO
    '''
    for pred in [("rdfs:label", language_string("site")),
                 ("rdfs:comment",
                  language_string("Site, place or location of anything.")),
                 ("rdfs:range", "schema:Place"),
                 ("rdfs:range", "dcterms:Location"),
                 ("rdf:type", "rdf:Property")]:
        statements = add_if("mhdb:site", pred[0], pred[1], statements)

    mhealthpeople = technology_xls.parse("MHealthPeople")

    for row in mhealthpeople.iterrows():
        predicates = set()
        person_iri = check_iri(row[1]["URL"])
        person_label = language_string(row[1]["MHealthPeople/Labs"]) if (
            (len(str(row[1]["MHealthPeople/Labs"])))
            and not (isinstance(row[1]["MHealthPeople/Labs"], float))
            and not (str(row[1]["MHealthPeople/Labs"]).startswith("Also"))
        ) else None
        person_place = check_iri(row[1]["Site"]) if (
            len(str(row[1]["Site"]).strip())
            and not (isinstance(row[1]["Site"], float))) else None

        if person_label:
            predicates.add(("rdfs:label", person_label))

        if person_place:
            predicates.add(("mhdb:site", person_place))
            statements = add_if(person_place, "rdfs:label",
                                language_string(row[1]["Site"]), statements)

        if "<" in person_iri:
            predicates.add(("schema:WebPage", person_iri))

        if len(predicates):
            for prop in predicates:
                statements = add_if(person_iri, prop[0], prop[1], statements)

        for affiliate_i in range(1, 10):
            affiliate = "{0}{1}".format("Affiliate", str(affiliate_i))
            if row[1][affiliate] and len(str(
                    row[1][affiliate])) and not isinstance(
                        row[1][affiliate], float):
                affiliate_iri = check_iri(
                    row[1][affiliate].split("(")[1].rstrip(")")) if (
                        ("@" in row[1][affiliate]) or
                        ("://" in row[1][affiliate])
                    ) else check_iri(", ".join([
                        " ".join(
                            list(row[1][affiliate].strip().split("(")[0].split(
                                " ")[1:])).strip(), row[1][affiliate].strip().
                        split("(")[0].split(" ")[0].strip()
                    ])) if "(" in row[1][affiliate] else check_iri(", ".join([
                        " ".join(list(
                            row[1][affiliate].strip().split(" ")[1:])).strip(),
                        row[1][affiliate].strip().split(" ")[0].strip()
                    ]))
                affiliate_preds = {
                    (property,
                     language_string(row[1][affiliate].strip(
                     ).split("(")[0].strip() if "(" in
                                     row[1][affiliate] else row[1][affiliate]))
                    for property in ["rdfs:label", "foaf:name"]
                }
                if "(" in row[1][affiliate]:
                    if "@" in row[1][affiliate]:
                        affiliate_preds.add(("schema:email",
                                             check_iri(row[1][affiliate].split(
                                                 "(")[1].rstrip(")").strip())))
                    elif "://" in row[1][affiliate]:
                        affiliate_webpage = row[1][affiliate].split(
                            "(")[1].rstrip(")").strip()
                        affiliate_preds.add(("schema:WebPage",
                                             check_iri(row[1][affiliate].split(
                                                 "(")[1].rstrip(")").strip())))
                    elif "lab pup" in row[1][affiliate]:
                        affiliate_preds.add(
                            ("rdfs:comment", language_string("lab pup")))
                    else:
                        affiliate_preds.add(("mhdb:site",
                                             check_iri(row[1][affiliate].split(
                                                 "(")[1].rstrip(")").strip())))

                for pred in affiliate_preds:
                    statements = add_if(affiliate_iri, pred[0], pred[1],
                                        statements)

                statements = add_if(person_iri, "dcterms:contributor",
                                    affiliate_iri, statements)

    return (statements)
Ejemplo n.º 7
0
def disorder_iri(index,
                 mentalhealth_xls=None,
                 pre_specifiers_indices=[6, 7, 24, 25, 26],
                 post_specifiers_indices=[27, 28, 56, 78]):
    """
    Function to figure out IRIs for disorders based on
    mentalhealth.xls::Disorder

    Parameters
    ----------
    index: int
        key to lookup in Disorder table

    mentalhealth_xls: spreadsheet workbook, optional
        1MfW9yDw7e8MLlWWSBBXQAC2Q4SDiFiMMb7mRtr7y97Q

    pre_specifiers_indices: [int], optional
        list of indices of diagnostic specifiers to precede disorder names

    post_specifiers_indices: [int], optional
        list of indices of diagnostic specifiers to be preceded by disorder
        names

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects
    """
    disorder = mentalhealth_xls.parse("Disorder")
    severity = mentalhealth_xls.parse("DisorderSeverity")
    specifier = mentalhealth_xls.parse("DiagnosticSpecifier")
    criterion = mentalhealth_xls.parse("DiagnosticCriterion")
    disorderSeries = disorder[disorder["index"] == index]
    disorder_name = disorderSeries["DisorderName"].values[0]
    if (not isinstance(
            disorderSeries["DiagnosticSpecifier_index"].values[0],
            float)) or (not np.isnan(
                disorderSeries["DiagnosticSpecifier_index"].values[0])):
        disorder_name = " ".join([
            specifier[specifier["index"] ==
                      disorderSeries["DiagnosticSpecifier_index"].values[0]]
            ["DiagnosticSpecifierName"].values[0], disorder_name
        ]) if disorderSeries["DiagnosticSpecifier_index"].values[
            0] in pre_specifiers_indices else " ".join([
                disorder_name,
                specifier[specifier["index"] ==
                          disorderSeries["DiagnosticSpecifier_index"].
                          values[0]]["DiagnosticSpecifierName"].values[0]
            ]) if disorderSeries["DiagnosticSpecifier_index"].values[
                0] in post_specifiers_indices else ", ".join([
                    disorder_name,
                    specifier[specifier["index"] ==
                              disorderSeries["DiagnosticSpecifier_index"].
                              values[0]]["DiagnosticSpecifierName"].values[0]
                ])
    disorder_name = " with ".join([
        disorder_name,
        criterion[criterion["index"] ==
                  disorderSeries["DiagnosticInclusionCriterion_index"]]
        ["DiagnosticCriterionName"].values[0]
    ]) if (not isinstance(
        disorderSeries["DiagnosticInclusionCriterion_index"].values[0],
        float)) or (not np.isnan(
            disorderSeries["DiagnosticInclusionCriterion_index"].values[0])
                    ) else disorder_name
    disorder_name = " and ".join([
        disorder_name,
        criterion[criterion["index"] ==
                  disorderSeries["DiagnosticInclusionCriterion2_index"]]
        ["DiagnosticCriterionName"].values[0]
    ]) if (not isinstance(
        disorderSeries["DiagnosticInclusionCriterion2_index"].values[0],
        float)) or (not np.isnan(
            disorderSeries["DiagnosticInclusionCriterion2_index"].values[0])
                    ) else disorder_name
    disorder_name = " without ".join([
        disorder_name,
        criterion[criterion["index"] ==
                  disorderSeries["DiagnosticExclusionCriterion_index"]]
        ["DiagnosticCriterionName"].values[0]
    ]) if (not isinstance(
        disorderSeries["DiagnosticExclusionCriterion_index"].values[0],
        float)) or (not np.isnan(
            disorderSeries["DiagnosticExclusionCriterion_index"].values[0])
                    ) else disorder_name
    disorder_name = " and ".join([
        disorder_name,
        criterion[criterion["index"] ==
                  disorderSeries["DiagnosticExclusionCriterion2_index"]]
        ["DiagnosticCriterionName"].values[0]
    ]) if (not isinstance(
        disorderSeries["DiagnosticExclusionCriterion2_index"].values[0],
        float)) or (not np.isnan(
            disorderSeries["DiagnosticExclusionCriterion2_index"].values[0])
                    ) else disorder_name
    disorder_name = " ".join([
        severity[severity["index"] == int(
            disorderSeries["DisorderSeverity_index"])]
        ["DisorderSeverityName"].values[0], disorder_name
    ]) if (not isinstance(
        disorderSeries["DisorderSeverity_index"].values[0], float)) or (
            not np.isnan(disorderSeries["DisorderSeverity_index"].values[0])
        ) else disorder_name
    iri = check_iri(disorder_name)
    label = language_string(disorder_name)
    statements = {iri: {"rdfs:label": [label]}}
    return (statements)
Ejemplo n.º 8
0
def BehaviorSheet1(behavior_xls,
                   mentalhealth_xls=None,
                   sign_or_symptom=None,
                   statements={}):
    '''
    Function to ingest 1sQp63K5nGrYSgK2ZvsTfTDmlM4W5_eFHfy6Ckoi7yP4 Sheet1

    Parameters
    ----------
    sheet: spreadsheet workbook
        1sQp63K5nGrYSgK2ZvsTfTDmlM4W5_eFHfy6Ckoi7yP4

    mentalhealth_xls: spreadsheet workbook, optional
        1MfW9yDw7e8MLlWWSBBXQAC2Q4SDiFiMMb7mRtr7y97Q

    statements:  dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Returns
    -------
    statements: dictionary
        key: string
            RDF subject
        value: dictionary
            key: string
                RDF predicate
            value: {string}
                set of RDF objects

    Example
    -------
    >>> try:
    ...     from mhdb.spreadsheet_io import download_google_sheet
    ...     from mhdb.write_ttl import turtle_from_dict
    ... except:
    ...     from mhdb.mhdb.spreadsheet_io import download_google_sheet
    ...     from mhdb.mhdb.write_ttl import turtle_from_dict
    >>> import os
    >>> import pandas as pd
    >>> try:
    ...     behaviorFILE = download_google_sheet(
    ...         'data/separating.xlsx',
    ...         "1sQp63K5nGrYSgK2ZvsTfTDmlM4W5_eFHfy6Ckoi7yP4"
    ...     )
    ... except:
    ...     behaviorFILE = 'data/separating.xlsx'
    >>> behavior_xls = pd.ExcelFile(behaviorFILE)
    >>> statements = BehaviorSheet1(
    ...     behavior_xls
    ... )
    >>> print(turtle_from_dict({
    ...     statement: statements[
    ...         statement
    ...     ] for statement in statements if statement == "mhdb:despair"
    ... }).split("\\n\\t")[0])
    mhdb:despair rdfs:label """despair"""@en ;
    '''
    sheet = behavior_xls.parse("Sheet1")
    gender = behavior_xls.parse("gender")
    statements = audience_statements(statements)

    if not mentalhealth_xls:
        try:
            mentalhealthFILE = download_google_sheet(
                'data/mentalhealth.xlsx',
                "1MfW9yDw7e8MLlWWSBBXQAC2Q4SDiFiMMb7mRtr7y97Q")
        except:
            mentalhealthFILE = 'data/mentalhealth.xlsx'
        mentalhealth_xls = pd.ExcelFile(mentalhealthFILE)

    mh_reference = mentalhealth_xls.parse("Reference")

    for row in sheet.iterrows():
        sign_or_symptom = "health-lifesci:MedicalSign" if (
            row[1]["sign_or_symptom_index"]
        ) == 1 else "health-lifesci:MedicalSymptom" if (
            row[1]["sign_or_symptom_index"]
            == 2) else "health-lifesci:MedicalSignOrSymptom"

        source = mh_reference[mh_reference["index"] == row[1][
            "reference_index (refer to reference in our master spreadsheet."
            " 8=dsm, 84=us)"]]["ReferenceLink"].values[0]
        source = None if isinstance(source, float) else check_iri(source)

        symptom_label = language_string(row[1]["symptom"])

        symptom_iri = check_iri(row[1]["symptom"])

        audience_gender = gender[gender["index"] == row[1]
                                 ["gender_index"]]["gender"]

        audience_gender = None if not audience_gender.size else \
        audience_gender.values[
            0
        ]

        for predicates in [("rdfs:label", symptom_label),
                           ("rdfs:subClassOf", sign_or_symptom),
                           ("dcterms:source", source)]:
            statements = add_if(symptom_iri, predicates[0], predicates[1],
                                statements)

        if audience_gender:
            for prop in ["schema:audience", "schema:epidemiology"]:
                statements = add_if(symptom_iri, prop, audience_gender,
                                    statements)

    return (statements)
    def follow_structure(row, files, stc, prefixes=None):
        """
        Function to follow format of "structure_to_keep"

        Parameters
        ----------
        row: Series
            pandas series from generator
            ie, row[1] for row in iterrows()

        files : dictionary
            {fn: string:
            file: DataFrame}
            one entry per unique value in structure_to_keep's "File" column

        stc : DataFrame

        prefixes : iterable of 2-tuples
            (prefix_string: string
            prefix_iri: string)
            defined RDF prefixes

        Returns
        -------
        ttl_dict : dictionary
            keys: str
                subjects
            values: sets of 2-tuple (str, str)
                [0]: predicate
                [1]: object
        """
        sheet = files[row.File].parse(row.Sheet)
        ttl_dict = dict()
        if row.Type != "foreign key":
            for structure_row in sheet.iterrows():
                subjects = structure_row[1][row.Indexed_Entity]
                if isinstance(subjects, str):
                    subjects = subjects.split(
                        row.split_indexed_by
                    ) if (
                            isinstance(
                                row.split_indexed_by,
                                str
                            ) and (
                                    row.split_indexed_by in subjects
                            )
                    ) else [subjects]
                    subject = check_iri(
                        subjects[0],
                        prefixes
                    )
                    related_predicates = collect_predicates(
                        subject,
                        row,
                        structure_row[1],
                        files,
                        stc,
                        prefixes
                    )
                    ttl_dict[subject] = related_predicates if (
                            subject not in ttl_dict
                    ) else (
                            ttl_dict[subject] |
                            related_predicates
                    )
        return (ttl_dict)
def label(row, structure_row, prefixes):
    """
    Function to create and return a tuple of
    Turtle property and Turtle object for a
    given label

    Parameters
    ----------

    row : Series
        row from structure_to_keep
        pandas series from generator
        ie, row[1] for row in iterrows()

    structure_row : Series
        row indicated in row from structure_to_keep
        pandas series from generator
        ie, structure_row[1] for structure_row in iterrows()

    prefixes : iterable of 2-tuples
        (prefix_string: string
        prefix_iri: string)
        defined RDF prefixes


    Returns
    -------
    predicates : set of 2-tuples
        predicate[0]: string
            Turtle property

        predicate[1]: string
            Turtle object
    """
    texts = str(
        structure_row[
            row.Column_Header
        ]
    )
    texts = texts.split(
        row.split_indexed_by
    ) if (
        isinstance(
            row.split_indexed_by,
            str
        ) and (
            row.split_indexed_by in texts
        )
    ) else [texts]

    return(
        {
            tuple(
                (
                    check_iri(
                        row["Definition or Relationship"],
                        prefixes
                    ),
                    "\"\"\"{0}\"\"\"@en".format(
                        text.replace(
                            "\n",
                            " "
                        ).replace(
                            "\"",
                            "\\\""
                        ).strip()
                    )
                )
            ) for text in texts
        }
    )
def foreign(structure_row, related_row, files, stc, prefixes):
    """
    Function to follow (a) foreign key(s) and return a set of predicate tuples

    Parameters
    ----------
    structure_row : Series
        row indicated in row from structure_to_keep
        pandas series from generator
        ie, structure_row[1] for structure_row in iterrows()

    related_row : Series
        row indicated in row from structure_to_keep
        pandas series from generator
        ie, related_row[1] for related_row in iterrows()

    files : dictionary
        {fn: string:
        file: DataFrame}
        one entry per unique value in structure_to_keep's "File" column

    stc : DataFrame

    prefixes : iterable of 2-tuples
        (prefix_string: string
        prefix_iri: string)
        defined RDF prefixes

    Returns
    -------
    foreign_predicates: set of 2-tuples

        foreign_predicates[0]: string
            Turtle property

        foreign_predicates[1]: string
            Turtle object
    """
    foreign_predicates = set()
    fks = structure_row[
        related_row["Column_Header"]
    ]
    skb = related_row["split_key_by"]
    skb = skb if isinstance(skb, str) else None
    if isinstance(fks, float) and np.isnan(fks):
        return({})
    fks = [
        int(float(fk)) for fk in str(fks).split(skb)
    ] if skb else [fks]
    svb = related_row["split_value_by"]
    svb = svb if isinstance(svb, str) else None
    if len(fks):
        for fk in fks:
            fvalues = follow_fk(
                files[
                    related_row["Foreign File"]
                ].parse(
                    related_row[
                        "Foreign Sheet"
                    ]
                ),
                related_row[
                    "Foreign Key Column_Header"
                ],
                related_row[
                    "Foreign Value Column_Header"
                ],
                fk
            )
            if (
                (fvalues is None)
                or
                (fvalues=="None")
            ) and (
                not related_row[
                    "Foreign Value Column_Backup_Header"
                ] in [
                    None,
                    np.nan,
                    "",
                    "None"
                ]
            ):
                fvalues = follow_fk(
                    files[
                        related_row["Foreign File"]
                    ].parse(
                        related_row[
                            "Foreign Sheet"
                        ]
                    ),
                    related_row[
                        "Foreign Key Column_Header"
                    ],
                    related_row[
                        "Foreign Value Column_Backup_Header"
                    ],
                    fk
                )
            fvalues = fvalues.split(
                svb
            ) if svb and fvalues else [fvalues]
            if fvalues:
                for fvalue in fvalues:
                    foreign_predicates.add(
                        (
                            check_iri(
                                related_row[
                                    "Definition or Relationship"
                                ],
                                prefixes
                            ),
                            check_iri(
                                fvalue,
                                prefixes
                            )
                        )
                    )
    return(foreign_predicates)