def update_comments_opinion_counts(debug=False):
    # Count number of likes and dislikes for each comment in the repository
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX lodep: <https://webgate.acceptance.ec.testa.eu/eparticipation/ontologies/LOD_Eparticipation/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?post
       COUNT(DISTINCT ?reply) AS ?count_tot
       COUNT(DISTINCT ?pos_reply) AS ?count_pos
       COUNT(DISTINCT ?neg_reply) AS ?count_neg
WHERE {
  ?post a sioc:Post .
  ?reply lodep:give_note_to ?post .
  OPTIONAL {
    ?pos_reply lodep:give_note_to ?post ;
               sioc:note "yes"^^rdfs:Literal .
  }
  OPTIONAL {
    ?neg_reply lodep:give_note_to ?post ;
               sioc:note "no"^^rdfs:Literal .
  }
}"""
    results = virtuoso_read(req)
    # Iterate on the comment and update the ``lodep:num_like`` and
    # ``lodep:num_dislike`` properties with the correct values counted above
    func = partial(build_comment_counts_update_request, debug=debug)
    res = save_items_in_repository(results, u"counts", func)
    return res
def load_documents(doc_base_uri,
                   default_creation_date=DEFAULT_DOC_CREATION_DATE):
    """
    Return a dictionary that gives for each language the tree structure of
    the document with the identifiers of each part of this structure (document,
    section, chapter, articles).
    """
    docs = {}
    for lang_code3, lang_code2 in LANG_CODE2.items():
        sys.stdout.write(u"   {0} ".format(lang_code2.upper()))
        sys.stdout.flush()
        # Get document content path from repository
        lng_doc_uri = build_doc_part_uri(doc_base_uri, lang_code3)
        req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX lodep: <https://webgate.acceptance.ec.testa.eu/eparticipation/ontologies/LOD_Eparticipation/>
SELECT DISTINCT ?doc_space
       ?doc_date
WHERE {
  <%(uri)s> sioc:has_space ?doc_space .
  OPTIONAL {
    <%(uri)s> lodep:created_at ?doc_date .
  }
}""" % {
            u"uri": lng_doc_uri,
        }
        result = virtuoso_read(req)
        if len(result) == 1:
            # Get actual document content from application
            resp = requests.get(LODEPART_LOAD_DOC_URL,
                                params={u"path": result[0][u"doc_space"]})
            content = resp.json()
            if len(content) > 0:
                lng_doc_id = content[u"uri"]
                if not lng_doc_uri.endswith(lng_doc_id):
                    sys.stdout.write(u"error\n\n")
                    raise ValueError(
                        u"Inconsistent document content retrieved from "
                        u"application")
                base_uri = lng_doc_uri[:-(len(lng_doc_id) + 1)]
                docs[lang_code2] = build_doc_structure(content, base_uri)
                if u"doc_date" in result[0]:
                    docs[lang_code2][u"creation_date"] = \
                                    datetime.strptime(result[0][u"doc_date"],
                                                      u"%Y-%m-%dT%H:%M:%S")
                else:
                    str_d = default_creation_date.strftime(
                        u"%Y-%m-%d %H:%M:%S")
                    sys.stdout.write(
                        u"No creation date in repository, using default one "
                        u"({0})\n      ".format(str_d))
                    docs[lang_code2][u"creation_date"] = default_creation_date
                sys.stdout.write(u"ok\n")
            else:
                sys.stdout.write(u"no content retrieved from application\n")
        else:
            sys.stdout.write(u"not found in repository\n")
    return docs
def run():
    data = {}
    with open(osp.join(DATA_DIR, USERS_FILENAME)) as stream:
        data = json.load(stream)
    # Users creation
    users_to_activate = {}
    for idx, user in enumerate(data.values()):
        sys.stdout.write(u"Adding user {0} ({1}/{2})...".format(
            user[u"user_id"], idx + 1, len(data.values())))
        sys.stdout.flush()
        res = create_user(user)
        if res:
            sys.stdout.write(u" ok\n")
            users_to_activate[user[u"user_id"]] = {
                u"lang": user.get(u"lang", u"en"),
                u"email": user[u"email"],
            }
        else:
            sys.stdout.write(u" ===> ERROR!\n")
        sleep(0.25)
    # Users activation
    sys.stdout.write(u"\n")
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX lodep: <https://webgate.acceptance.ec.testa.eu/eparticipation/ontologies/LOD_Eparticipation/>
SELECT ?user ?email ?token WHERE {
  ?user sioc:member_of ?grp ;
        foaf:mbox ?email ;
        lodep:token_id ?token .
  FILTER (
    STRENDS(STR(?grp), "%(grp_postfix)s")
  )
}
""" % {
        u"grp_postfix": TEST_USERS_GROUP_POSTFIX.replace(u" ", u"_"),
    }
    results = virtuoso_read(req)
    user_tokens = {
        res_line[u"email"]: res_line[u"token"]
        for res_line in results
    }
    for idx, (user_id, user_data) in enumerate(users_to_activate.items()):
        sys.stdout.write(u"Activating user {0} ({1}/{2})...".format(
            user_id, idx + 1, len(users_to_activate)))
        sys.stdout.flush()
        md5_email = hashlib.md5(user_data[u"email"].strip().encode(u"ascii"))\
                           .hexdigest()
        params = {
            u"lang": user_data[u"lang"],
            u"token": user_tokens[md5_email],
        }
        resp = requests.get(LODEPART_ACTIVATION_URL, params=params)
        if resp.status_code == requests.codes.ok:
            sys.stdout.write(u" ok\n")
        else:
            sys.stdout.write(u" ===> ERROR!\n")
        sleep(0.25)
    sys.stdout.write(u"\n")
Beispiel #4
0
def generate_comments(docs,
                      plays,
                      pers,
                      comments_low_max=COMMENTS_LOW_MAX,
                      comments_high_max=COMMENTS_HIGH_MAX,
                      low_max_percent=COMMENTS_LOW_MAX_PERCENT,
                      replies_max=REPLIES_MAX):
    comments = []
    available_langs = list(
        set(docs.keys()).intersection(set(LANGUAGES.keys())))
    available_langs.sort()
    # Get URIs already used
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT ?comm
WHERE {
   ?comm a sioc:Post .
}"""
    results = virtuoso_read(req)
    used_uris = set(res_line[u"comm"] for res_line in results)
    # Choose parameters for the comments generation
    lang_medians = choose_language_opinion_medians(available_langs)
    lang_props = choose_language_comments_proportion(available_langs)
    doc_part_devs = {}
    doc_part_nums = {}
    for lang in available_langs:
        doc_part_devs = choose_doc_part_deviations(docs[lang], LANGUAGES[lang],
                                                   doc_part_devs)
        doc_part_nums = choose_doc_part_comment_numbers(
            docs[lang],
            LANGUAGES[lang],
            lang_props[lang],
            doc_part_nums,
            low_max=comments_low_max,
            high_max=comments_high_max,
            low_percent=low_max_percent)
    # Actually generate the comments for all the document parts
    for lang in available_langs:
        doc_struct = docs[lang]
        sys.stdout.write(u"   {0} ".format(lang.upper()))
        sys.stdout.flush()
        comments.extend(
            generate_comments_for_item(doc_struct,
                                       plays,
                                       pers,
                                       lang,
                                       doc_struct[u"creation_date"],
                                       lang_medians,
                                       doc_part_devs,
                                       doc_part_nums,
                                       replies_max,
                                       used_uris=used_uris))
        sys.stdout.write(u"\n")
    return comments
def load_personae():
    """
    Return a dictionary that gives for each play a dictionary containing the
    personae data. This data contains the corresponding user URI stored inside
    the repository.
    """
    personae = {}
    in_data = {}
    with open(osp.join(DATA_DIR, USERS_FILENAME)) as stream:
        in_data = json.load(stream)
    for user in in_data.values():
        pers_data = {}
        for key in (u"play_id", u"user_id", u"persona", u"email", u"password",
                    u"group", u"lang"):
            pers_data[key] = user[key]
        personae.setdefault(pers_data[u"play_id"], {})
        personae[pers_data[u"play_id"]][pers_data[u"persona"]] = pers_data
        # Get user URI from repository
        md5_email = hashlib.md5(
            pers_data[u"email"].strip().encode(u"ascii")).hexdigest()
        req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT DISTINCT ?user_uri
WHERE {
  ?user_uri a sioc:UserAccount ;
            foaf:mbox <%(email)s> ;
            sioc:member_of <%(group)s> .
}""" % {
            u"email": md5_email,
            u"group": build_group_uri(pers_data[u"group"])
        }
        result = virtuoso_read(req)
        if len(result) != 1:
            raise ValueError(
                u"User {0} ({1}) can't be found in the repository. Have you "
                u"created the users?"
                u"".format(pers_data[u"persona"], pers_data[u"user_id"]))
        pers_data[u"user_uri"] = result[0][u"user_uri"]
        pers_data[u"lodepart_user_id"] = extract_user_id(
            pers_data[u"user_uri"])
    return personae
Beispiel #6
0
def run(document_uri, comments_low_max, comments_high_max, low_max_percent,
        replies_max, default_creation_date):
    if comments_high_max < comments_low_max:
        sys.stderr.write("Low maximum number of comments ({0}) should be "
                         u"inferior to high maximum number of comments ({1})"
                         u"".format(comments_low_max, comments_high_max))
        sys.exit(1)
    if low_max_percent < 0 or low_max_percent > 100:
        sys.stderr.write("Percent of document parts using low maximum of "
                         u"comments ({0}) should be between 0 and 100"
                         u"".format(low_max_percent))
        sys.exit(1)
    sys.stdout.write(u"Loading the document in the various languages...\n")
    docs = load_documents(document_uri, default_creation_date)
    if len(docs) == 0:
        sys.stderr.write(
            u"\nDocument with URI <{0}> not found in repository!\n\n"
            u"".format(document_uri))
        sys.exit(1)
    sys.stdout.write(u"Loading users' data...\n")
    pers = load_personae()
    sys.stdout.write(u"Loading users' dialog lines...\n")
    plays = load_plays()
    sys.stdout.write(u"Generating comments...\n")
    comments = generate_comments(docs, plays, pers, comments_low_max,
                                 comments_high_max, low_max_percent,
                                 replies_max)
    sys.stdout.write(u"Saving documents structure...\n")
    save_doc_structure(docs)
    sys.stdout.write(u"Saving comments...\n")
    save_comments(comments)
    sys.stdout.write(u"Updating counts of opinions on comments ...\n")
    update_comments_opinion_counts()
    sys.stdout.write(u"Updating counts of opinions on document parts ...\n")
    update_doc_parts_opinion_counts()
    req = u"""
PREFIX lodep: <https://webgate.acceptance.ec.testa.eu/eparticipation/ontologies/LOD_Eparticipation/>
SELECT ?doc_uri ?total_na ?yes_na ?no_na ?mixed_na ?total ?yes ?no ?mixed
WHERE {
  OPTIONAL {
    ?doc_uri lodep:num_items_total_na ?total_na .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_yes_na ?yes_na .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_no_na ?no_na .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_mixed_na ?mixed_na .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_total ?total .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_yes ?yes .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_no ?no .
  }
  OPTIONAL {
    ?doc_uri lodep:num_items_mixed ?mixed .
  }
  FILTER (
    ?doc_uri = <%s>
  )
}""" % document_uri
    res = virtuoso_read(req)
    sys.stdout.write(u"\nEnd of comments creation\n\n")
    sys.stdout.write(u"SPARQL Request:\n{0}\n\n".format(req))
    sys.stdout.write(u"JSON result:\n\n{0}\n\n".format(str(res)))
from const import VIRTUOSO_URL


print("Virtuoso URL: {0}".format(VIRTUOSO_URL))


req = """
SELECT ?s ?p ?o {
  ?s ?p ?o .
  FILTER (
    ?s = <_MY_TEST_COMMENT>
  )
}
"""
print(req)
res = virtuoso_read(req)
print(u"--> {0}".format(str(res)))


req = """
SELECT ?s ?p ?o {
  ?s ?p ?o .
  FILTER (
    ?s = <_MY_TEST_DOCUMENT>
  )
}
"""
print(req)
res = virtuoso_read(req)
print(u"--> {0}".format(str(res)))
Beispiel #8
0
def run():
    sys.stdout.write(u"Getting all test users... \n")
    # Get all test users
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT ?user
WHERE {
  ?user sioc:member_of ?grp .
  FILTER (
    STRENDS(STR(?grp), "%(grp_postfix)s")
  )
}""" % {
        u"grp_postfix": TEST_USERS_GROUP_POSTFIX.replace(u" ", u"_"),
    }
    results = virtuoso_read(req)
    # Delete all the posts of these users
    sys.stdout.write(u"Deleting all comments written by these users... \n")
    for res in results:
        res[u"graph"] = LODEPART_GRAPH
        req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
WITH <%(graph)s> DELETE {
  ?post ?p ?o
} WHERE {
  ?post sioc:has_creator <%(user)s> .
  ?post ?p ?o .
}""" % res
        virtuoso_write(req)
    # Delete all the document parts that have no comment and that aren't parent
    # of a document part with comments.
    sys.stdout.write(
        u"Deleting all document parts that now have no comment... "
        u"\n")
    sel_req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT DISTINCT ?doc_part
WHERE {
  ?doc_part a sioc:Thread .
  FILTER (
    NOT EXISTS {
      ?child sioc:has_parent ?doc_part .
    }
    &&
    NOT EXISTS {
      ?post sioc:has_container ?doc_part .
    }
  )
}"""
    # Selects the document parts with no comment and no child
    results = virtuoso_read(sel_req)
    while len(results) > 0:
        # Delete this document parts, then selects the document parts with
        # no comment and no child and so on
        for res in results:
            res[u"graph"] = LODEPART_GRAPH
            req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
WITH <%(graph)s> DELETE {
  <%(doc_part)s> ?p ?o
} WHERE {
  <%(doc_part)s> ?p ?o
}""" % res
            virtuoso_write(req)
        results = virtuoso_read(sel_req)
    # Finally update the opinion counts on comments and document parts
    sys.stdout.write(u"Updating counts of opinions on comments ...\n")
    update_comments_opinion_counts()
    sys.stdout.write(u"Updating counts of opinions on document parts ...\n")
    update_doc_parts_opinion_counts()
def check_documents():
    found_docs = {}
    # Find all documents defined in the repository
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT DISTINCT ?doc_uri
WHERE {
  ?doc_uri a sioc:Forum .
  FILTER(
    NOT EXISTS {
      ?doc_uri sioc:has_parent ?parent .
    }
  )
}"""
    result = virtuoso_read(req)
    sys.stdout.write(u"\nFound {0:d} document(s) in repository\n"
                     u"".format(len(result)))
    for res_line in result:
        # Search language versions of each document
        doc_uri = res_line[u"doc_uri"]
        sys.stdout.write(u"\n** Document {0}\n".format(doc_uri))
        found_docs[doc_uri] = []
        if not (doc_uri.startswith(LODEPART_BASE_URI)):
            sys.stdout.write(
                u"   ---> Document URI doesn't start with the base URI defined "
                u"in the `const.py` file\n        <{0}> vs. <{1}>\n"
                u"        PLEASE CHECK THIS IS CORRECT!\n"
                u"".format(doc_uri, LODEPART_BASE_URI))
        if LODEPART_BASE_URI.endswith(u"/"):
            sys.stdout.write(
                u"   ---> Base URI defined in the `const.py` file ends with "
                u"\"/\"\n        <{0}>\n        PLEASE CHECK THIS IS CORRECT!\n"
                u"".format(LODEPART_BASE_URI))
        for lang_code3, lang_code2 in LANG_CODE2.items():
            sys.stdout.write(u"\n   {0} version:\n".format(lang_code2.upper()))
            # Get document content path from repository
            lng_doc_uri = build_doc_part_uri(doc_uri, lang_code3)
            req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT DISTINCT ?doc_space
WHERE {
  <%(uri)s> sioc:has_space ?doc_space .
}""" % {
                u"uri": lng_doc_uri,
            }
            result = virtuoso_read(req)
            if len(result) == 0:
                sys.stdout.write(
                    u"      ---> Document <{0}> not found in repository\n"
                    u"           SKIPPING DOCUMENT!\n".format(lng_doc_uri))
            elif len(result) > 1:
                sys.stdout.write(
                    u"      ---> Multiple documents <{0}> found in repository\n"
                    u"           REPOSITORY IS INCONSISTENT! SKIPPING DOCUMENT!"
                    u"\n".format(lng_doc_uri))
            else:
                sys.stdout.write(
                    u"      Document <{0}> found in repository; trying to load "
                    u"document content from Web application\n"
                    u"".format(lng_doc_uri))
                # Get actual document content from application
                resp = requests.get(LODEPART_LOAD_DOC_URL,
                                    params={u"path": result[0][u"doc_space"]})
                try:
                    content = resp.json()
                except ValueError:
                    content = {}
                if len(content) > 0:
                    lng_doc_id = content[u"uri"]
                    if not lng_doc_uri.endswith(lng_doc_id):
                        sys.stdout.write(
                            u"      ---> Document ID read from content is "
                            u"inconsistent with document URI\n"
                            u"           \"{0}\" vs <{1}>\n"
                            u"           SKIPPING DOCUMENT!\n"
                            u"".format(lng_doc_id, lng_doc_uri))
                    else:
                        sys.stdout.write(
                            u"      Document content properly retrieved from "
                            u"Web application\n   Ok!\n")
                        found_docs[doc_uri].append(lang_code2)
                else:
                    sys.stdout.write(
                        u"      ---> No content found in Web application for "
                        u"document \"{0}\"\n           SKIPPING DOCUMENT!\n"
                        u"".format(result[0][u"doc_space"]))
    # Write a synthesis
    sys.stdout.write(u"\nSynthesis\n=========\n\n")
    doc_written = False
    for doc_uri, versions in sorted(found_docs.items()):
        if len(versions) == 0:
            continue
        doc_written = True
        sys.stdout.write(u"* {0}\n".format(doc_uri))
        sys.stdout.write(u"  versions: {0}\n"
                         u"".format(u", ".join(
                             [lng.upper() for lng in sorted(versions)])))
    if not doc_written:
        sys.stdout.write(u"No document found\n")
    sys.stdout.write(u"\n")
def update_doc_parts_opinion_counts(debug=False):
    # Count the positive, negative and mixed notes on the comments for each
    # document part in the repository
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX lodep: <https://webgate.acceptance.ec.testa.eu/eparticipation/ontologies/LOD_Eparticipation/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?doc_part
       COUNT(DISTINCT ?comment) AS ?count_tot
       COUNT(DISTINCT ?pos_comment) AS ?count_pos
       COUNT(DISTINCT ?neg_comment) AS ?count_neg
       COUNT(DISTINCT ?mix_comment) AS ?count_mix
WHERE {
  ?comment a sioc:Post ;
           sioc:has_container ?doc_part .
  OPTIONAL {
    ?pos_comment a sioc:Post ;
                 sioc:has_container ?doc_part ;
                 sioc:note "yes"^^rdfs:Literal .
  }
  OPTIONAL {
    ?neg_comment a sioc:Post ;
                 sioc:has_container ?doc_part ;
                 sioc:note "no"^^rdfs:Literal .
  }
  OPTIONAL {
    ?mix_comment a sioc:Post ;
                 sioc:has_container ?doc_part ;
                 sioc:note "mixed"^^rdfs:Literal .
  }
}"""
    results = virtuoso_read(req)
    opinions = {}
    for res in results:
        opinions[res[u"doc_part"]] = {
            u"total": int(res[u"count_tot"]),
            u"positive": int(res[u"count_pos"]),
            u"negative": int(res[u"count_neg"]),
            u"mixed": int(res[u"count_mix"]),
        }
    # Read the document tree structures from the repository thanks to the
    # ``sioc:has_parent`` links
    req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT ?doc_part
       ?doc_type
       ?parent_doc_part
       ?parent_doc_type
WHERE {
  ?doc_part a ?doc_type .
  ?doc_part sioc:has_parent ?parent_doc_part .
  ?parent_doc_part a ?parent_doc_type .
}"""
    results = virtuoso_read(req)
    doc_structs = {}
    doc_roots = set()
    doc_not_roots = set()
    for res in results:
        doc_structs.setdefault(
            res[u"doc_part"], {
                u"uri": res[u"doc_part"],
                u"is_forum": res[u"doc_type"].endswith(u"Forum"),
                u"repo_children": []
            })
        doc_structs.setdefault(
            res[u"parent_doc_part"], {
                u"uri": res[u"parent_doc_part"],
                u"is_forum": res[u"doc_type"].endswith(u"Forum"),
                u"repo_children": []
            })
        doc_structs[res[u"parent_doc_part"]][u"repo_children"].append(
            doc_structs[res[u"doc_part"]])
        doc_roots.discard(res[u"doc_part"])
        doc_not_roots.add(res[u"doc_part"])
        if res[u"parent_doc_part"] not in doc_not_roots:
            doc_roots.add(res[u"parent_doc_part"])
    for uri in doc_not_roots:
        doc_structs.pop(uri)
    # The document structures in the repository are very strange and can not
    # directly be used: "doc_root" is parent of "doc_root/eng", "doc_root/fra";
    # "doc_root/eng" is parent of "doc_root/art_001"; "doc_root/fra" is parent
    # of "doc_root/art_001"; "doc_root/art001" is parent of
    # "doc_root/art001/eng", "doc_root/art001/fra"; etc.
    # Correct the document structures to have: "doc_root" is parent of
    # "doc_root/art_001"; "doc_root/fra" is parent_of "doc_root/art001/fra";
    # "doc_root/eng" is parent of "doc_root/art_001/eng"; "doc_root"
    # has language versions "doc_root/eng", "doc_root/fra"; ""doc_root/art001"
    # has language versions "doc_root/art001/fra",  "doc_root/art001/eng"
    for struct in doc_structs.values():
        correct_doc_structure(struct)
        clean_doc_structure(struct)
    # Count the opinions for each document part inside the structures and
    # insert these counts in the repository
    update_requests = []
    for struct in doc_structs.values():
        set_opinion_counts_in_doc_structure(struct, opinions)
        propagate_opinion_counts_in_doc_structure(struct)
        update_requests.extend(
            build_opinion_counts_update_from_doc_structure(struct,
                                                           debug=debug))
    res = save_items_in_repository(update_requests, u"counts", (lambda x: x))
    return res
Beispiel #11
0
def find_doc_items_to_save(doc_struct):
    """
    Given a document structure (containing the document in all the
    processed languages), finds the document parts that don't exist
    in the repository and the missing links between these parts and
    their parent.
    """
    doc_parts_uri = []
    for lang, lng_doc_struct in doc_struct.items():
        doc_parts_uri.extend(get_doc_parts_and_parents_uri(lng_doc_struct))
    processed_uris = set()
    doc_parts_to_create = []
    parent_links_to_weave = []
    for doc_part_uri, doc_lang_root_uri in doc_parts_uri:
        if doc_lang_root_uri is None:
            # Whole documents (highest level of the tree) are already in the
            # repository.
            continue
        if doc_part_uri in processed_uris:
            continue
        # check if doc_part already exists in repository
        req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT COUNT(*) AS ?cnt
WHERE {
  <%(uri)s> a sioc:Thread .
}""" % {
            u"uri": doc_part_uri
        }
        result = virtuoso_read(req)
        processed_uris.add(doc_part_uri)
        if int(result[0][u"cnt"]) == 0:
            # Current PHP application stores a strange tree:
            # doc_part is child of generic_doc_part (without language
            # specification) and generic_doc_part is child of
            # doc_lang_root (whole document ith language specification)
            generic_doc_part_uri = doc_part_uri[:doc_part_uri.rfind(u"/")]
            # Create doc_part and weave parent link
            doc_parts_to_create.append(doc_part_uri)
            parent_links_to_weave.append((doc_part_uri, generic_doc_part_uri))
            # Check if generic_doc_part_uri has already been created
            if generic_doc_part_uri in processed_uris:
                # generic_doc_part exists; only weave parent link
                parent_links_to_weave.append(
                    (generic_doc_part_uri, doc_lang_root_uri))
            else:
                processed_uris.add(generic_doc_part_uri)
                req = u"""
PREFIX sioc: <http://rdfs.org/sioc/ns#>
SELECT COUNT(*) AS ?cnt
WHERE {
  <%(uri)s> a sioc:Thread .
}""" % {
                    u"uri": generic_doc_part_uri
                }
                result = virtuoso_read(req)
                if int(result[0][u"cnt"]) == 0:
                    # Create generic_doc_part and weave parent link
                    doc_parts_to_create.append(generic_doc_part_uri)
                    parent_links_to_weave.append(
                        (generic_doc_part_uri, doc_lang_root_uri))
                else:
                    # generic_doc_part exists; only weave parent link
                    parent_links_to_weave.append(
                        (generic_doc_part_uri, doc_lang_root_uri))
    return doc_parts_to_create, parent_links_to_weave