Beispiel #1
0
def operation_pref_conjugaison(lang):
    log.info("Doing operation Set Property: LabelNamePreference: Conjugaison")

    preference_base = calculate_they_read()

    for c in DBRead(DBConjugations,
                    sql=" SELECT * FROM conjugations ",
                    cls=ConjugationsItem):

        log.info("  set Property: LabelNamePreference: %s", c)

        preference = \
            math.sqrt( len( c.AlternativeFormsOther ) ) + \
            math.sqrt( len( c.ExplainationTxt ) )

        #
        preference = preference / preference_base / 2

        if preference <= 0:
            LabelNamePreference = 0
        else:
            LabelNamePreference = 1

        DBExecute(
            DBConjugations, """
            UPDATE conjugations
               SET LabelNamePreference = ?, 
                   Operation_Pref = 1 
             WHERE PK = ?
             """, LabelNamePreference, c.PK)
Beispiel #2
0
def operation_pref_wiktionary(lang):
    log.info("Doing operation Set Property: LabelNamePreference: Wiktionary")

    preference_base = calculate_cat_felidae()

    for wt in DBRead(DBWiktionary,
                     sql=" SELECT * FROM wiktionary ",
                     cls=WiktionaryItem):

        log.info("  set Property: LabelNamePreference: %s", wt)

        preference = calculate_preference_wiktionary(wt)
        preference = preference / preference_base / 2

        if preference <= 0:
            LabelNamePreference = 0
        else:
            LabelNamePreference = 1

        DBExecute(
            DBWiktionary, """
            UPDATE wiktionary 
               SET LabelNamePreference = ?, 
                   Operation_Pref = 1 
             WHERE PrimaryKey = ?
             """, LabelNamePreference, wt.PrimaryKey)
Beispiel #3
0
def operation_pref_wikipedia(lang):
    log.info("Doing operation Set Property: LabelNamePreference: Wikipedia")

    preference_base = calculate_cat_felidae()

    for wp in DBRead(DBWikipedia,
                     sql=" SELECT * FROM wikipedia ",
                     cls=WikipediaItem):

        log.info("  set Property: LabelNamePreference: %s", wp)

        preference = \
            math.sqrt( len( wp.SeeAlsoWikipediaLinks ) ) + \
            math.sqrt( len( wp.ExplainationWPTxt ) ) + \
            math.sqrt( len( wp.ExplainationExamplesTxt  ) )

        #
        # then divide by value of ( CAT-FELIDAE ) and divide by 2
        # If <0 then : =0 elif >1 then : =1

        #
        preference = preference / preference_base / 2

        if preference <= 0:
            LabelNamePreference = 0
        else:
            LabelNamePreference = 1

        DBExecute(
            DBWikipedia, """
            UPDATE wikipedia
               SET LabelNamePreference = ?, 
                   Operation_Pref = 1 
             WHERE PK = ?
             """, LabelNamePreference, wp.PK)
Beispiel #4
0
def calculate_they_read():
    # 7.244997998398398

    DB_THEY_READ = sqlite3.connect("conjugations-they-read.sqlite3")

    rows = DBRead(
        DB_THEY_READ,
        sql=
        "SELECT * FROM conjugations WHERE PK = 'en§read§Verb_To_read_They_Indicative_Present§5'",
        cls=ConjugationsItem)

    try:
        c = next(rows)

    except StopIteration:
        raise Exception("No THEY READ")

    DB_THEY_READ.close()

    #
    preference = \
        math.sqrt( len( c.AlternativeFormsOther ) ) + \
        math.sqrt( len( c.ExplainationTxt ) )

    return preference  # 7.244997998398398
Beispiel #5
0
def vectorize_properties_wikipedia():
    log.info("Vectorizing wikipedia")

    for wp in DBRead(DBWikipedia, sql=" SELECT * FROM wikipedia ", cls=dict):

        log.info("  vectorize: %s", wp["LabelName"])

        vetorized = Vectorize_PKS(wp, default_language=wp["LanguageCode"])

        DBExecute(
            DBWiktionary, """
            UPDATE wiktionary 
               SET
                   Description_Vect = ?,
                   AlsoKnownAs_Vect = ?,
                   Instance_of_Vect= ?,
                   Subclass_of_Vect = ?,
                   Part_of_Vect = ?,
                   Operation_Vectorizer = 1
             WHERE PrimaryKey = ?
             """, to_json(vetorized["Description"]),
            to_json(vetorized["AlsoKnownAs"]),
            to_json(vetorized["Instance_of"]),
            to_json(vetorized["Subclass_of"]), to_json(vetorized["Part_of"]),
            wp["PrimaryKey"])
Beispiel #6
0
def merge(wp: WikipediaItem) -> Iterable[WordItem]:
    # load all wikipedia
    # and merge with exisiting word (wikidata)
    #   if same Ext_Wikipedia_URL,
    #   and also check if there is exisiting word with same labelname (not case sensitive),
    #   then use PKS_ListMatch to see if we merge or not

    log.info(wp)

    # search by URL and name
    sql = """ SELECT * 
                FROM words 
               WHERE LanguageCode = ?       COLLATE NOCASE
                 AND LabelName = ?          COLLATE NOCASE 
                 AND Ext_Wikipedia_URL = ?  COLLATE NOCASE 
             """  # ci_index

    # do search
    items = DBRead(DBWord,
                   sql=sql,
                   params=[wp.LanguageCode, wp.LabelName, wp.SelfUrlWikipedia],
                   cls=WordItem)
    items = list(items)

    #
    if items:
        # Match_List_PKS_With_Lists_Of_PKS
        sentences1 = [item.Description for item in items]
        sentences2 = [wp.ExplainationWPTxt]

        matches = Match_List_PKS_With_Lists_Of_PKS(tuple(sentences1),
                                                   tuple(sentences2))

        matched_words = [
            item for (item, (s1, s2)) in zip(items, matches)
            if s2 == wp.ExplainationWPTxt
        ]

        if matched_words:
            for w in matched_words:
                # merge
                log.debug("[ OK ] matched: %s == %s", w, wp)
                merge_words(w, wp)
                w.MergedWith.append(wp.PK)
                yield w

        else:
            # append
            log.debug("new: %s", wp)
            w = WordItem()
            merge_words(w, wp)
            yield w

    else:
        # append
        log.debug("new: %s", wp)
        w = WordItem()
        merge_words(w, wp)
        yield w
Beispiel #7
0
def load_wiktionary_one( DBWord, lang, label ):
    for wd in DBRead( DBWiktionary, table="wiktionary", cls=WiktionaryItem, where="LanguageCode=? COLLATE NOCASE AND LabelName=? COLLATE NOCASE", params=[ lang, label ] ):
        log.info( "%s", wd )

        for w in merge( wd ):
            DBWrite( DBWord, w, table="words", if_exists="replace" )

        DBExecute( DBWiktionary, "UPDATE wiktionary SET Operation_Merging = 1 WHERE PrimaryKey = ?", wd.PrimaryKey )
Beispiel #8
0
def find_word_by_label(label):
    sql = """
        SELECT * 
          FROM words 
    """
    rows = DBRead(DBWord, sql=sql)

    return ''
Beispiel #9
0
def load_wiktionary( DBWord ):
    log.info( "loading wiktionary" )

    for wd in DBRead( DBWiktionary, table="wiktionary", cls=WiktionaryItem ):
        log.info( "%s", wd )

        for w in merge( wd ):
            DBWrite( DBWord, w, table="words", if_exists="replace" )

        DBExecute( DBWiktionary, "UPDATE wiktionary SET Operation_Merging = 1 WHERE PrimaryKey = ?", wd.PrimaryKey )
Beispiel #10
0
def load_wikipedia(DBWord):
    log.info("loading wikipedia")

    for wd in DBRead(DBWikipedia, table="wikipedia", cls=WikipediaItem):
        log.info("%s", wd)

        for w in merge(wd):
            DBWrite(DBWord, w, table="words", if_exists="replace")

        DBExecute(DBWikipedia,
                  "UPDATE wikipedia SET Operation_Merging = 1 WHERE PK = ?",
                  wd.PK)
Beispiel #11
0
def merge_verbs( wt: WiktionaryItem ) -> Iterator[WordItem ]:
    # 1. find verbs with same LaabelName, Type='verb'
    # 2. do PKS_Match_List
    # 3. merge matched

    # find same verbs
    sql = """ SELECT * 
                FROM words 
               WHERE LanguageCode = ?       COLLATE NOCASE
                 AND LabelName = ?          COLLATE NOCASE
                 AND Type = 'verb'          COLLATE NOCASE
                 AND FromWT is NULL 
             """ # ci_index

    items = DBRead( DBWord, sql=sql, params=[wt.LanguageCode, wt.LabelName], cls=WordItem )
    items = list( items )

    #
    if items:
        log.debug( "found items: %s", items )
        # Match_List_PKS_With_Lists_Of_PKS( explanations, translation_sentences )
        sentences1 = [ item.Description for item in items ]
        sentences2 = [ wt.ExplainationTxt ]

        log.debug( "matching:" )
        log.debug( "  sentences1: %s", sentences1 )
        log.debug( "  sentences2: %s", sentences2 )
        matches = Match_List_PKS_With_Lists_Of_PKS( tuple(sentences1), tuple(sentences2) )

        matched_words = [ item for (item, (s1, s2)) in zip(items, matches) if s2 == wt.ExplainationTxt ]

        if matched_words:
            # merge
            for w in matched_words:
                log.debug( "[ OK ] matched: %s == %s", w, wt )
                merge_words( w, wt )
                w.MergedWith.append( wt.PrimaryKey )
                yield w

        else:
            # append
            log.debug( "new: %s", wt )
            w = WordItem()
            merge_words( w, wt )
            yield w

    else:
        # append
        log.debug( "new: %s", wt )
        w = WordItem()
        merge_words( w, wt )
        yield w
Beispiel #12
0
def operation_pref_wikidata(lang):
    log.info("Doing operation Set Property: LabelNamePreference: Wikidata")

    preference_base = calculate_cat_felidae()

    for wd in DBRead(DBWikidata,
                     sql=" SELECT * FROM wikidata ",
                     cls=WikidataItem):

        log.info("  set Property: LabelNamePreference: %s", wd)

        ExplainationExamplesTxt = get_sentences_with_label(
            lang, wd.Description, wd.LabelName)
        ExplainationTxt = wd.Description

        preference = \
            len( wd.AlsoKnownAs ) + \
            len( wd.Instance_of ) + \
            len( wd.Subclass_of ) + \
            len( wd.Part_of ) + \
            len( wd.Translation_EN ) + \
            len( wd.Translation_PT ) + \
            len( wd.Translation_DE ) + \
            len( wd.Translation_ES ) + \
            len( wd.Translation_FR ) + \
            len( wd.Translation_IT ) + \
            len( wd.Translation_RU ) + \
            math.sqrt( wd.WikipediaLinkCountTotal ) + \
            math.sqrt( len( ExplainationExamplesTxt ) ) + \
            math.sqrt( len( ExplainationTxt ) )

        #
        # then divide by value of ( CAT-FELIDAE ) and divide by 2
        # If <0 then : =0 elif >1 then : =1

        #
        preference = preference / preference_base / 2

        if preference <= 0:
            LabelNamePreference = 0
        else:
            LabelNamePreference = 1

        DBExecute(
            DBWikidata, """
            UPDATE wikidata 
               SET LabelNamePreference = ?,
                   Operation_Pref = 1 
             WHERE PrimaryKey = ?
             """, LabelNamePreference, wd.PrimaryKey)
Beispiel #13
0
def load_wikipedia_one(DBWord, lang, label):
    for wd in DBRead(
            DBWikipedia,
            table="wikipedia",
            cls=WikipediaItem,
            where=
            "LanguageCode=? COLLATE NOCASE AND LabelName=? COLLATE NOCASE",
            params=[lang, label]):
        log.info("%s", wd)

        for w in merge(wd):
            DBWrite(DBWord, w, table="words", if_exists="fail")

        DBExecute(DBWikipedia,
                  "UPDATE wikipedia SET Operation_Merging = 1 WHERE PK = ?",
                  wd.PK)
Beispiel #14
0
def invert_vector_properties(lang):
    # 1. PK -> Description[ PK1, PK2, ... ]
    # 2. PK1 -> Description_Inv[ ..., PK ]
    #    PK2 -> Description_Inv[ ..., PK ]
    log.info("Invert vectors words")

    word = WordItem()
    properties_to_invert = [
        x for x in vars(word) if not callable(x) and x.endswith("_Vect")
    ]

    for w in DBRead(DBWord, sql=" SELECT * FROM words ", cls=dict):

        log.info("  invert vector: %s", w["LabelName"])

        for prop in properties_to_invert:
            inverted = invert_property(lang, w, prop)
Beispiel #15
0
def invert_properties_words(lang):
    log.info("Vectorizing words")

    for w in DBRead(DBWord, sql=" SELECT * FROM words ", cls=dict):

        log.info("  vectorize: %s", w["LabelName"])

        vetorized = Vectorize_database_record(lang, w)

        DBExecute(
            DBWord, """
            UPDATE words 
               SET
                   ExplainationTxt_Vect = ?,
                   AlternativeFormsOther_Vect = ?,
                   Synonymy_Vect = ?,
                   Antonymy_Vect = ?,
                   Hypernymy_Vect = ?,
                   Hyponymy_Vect = ?,
                   Meronymy_Vect = ?,
                   RelatedTerms_Vect = ?,
                   Coordinate_Vect = ?,
                   Otherwise_Vect = ?,

                   Description_Vect = ?,
                   AlsoKnownAs_Vect = ?,
                   Instance_of_Vect= ?,
                   Subclass_of_Vect = ?,
                   Part_of_Vect = ?,

                   Operation_Vectorizer = 1
             WHERE PrimaryKey = ?
             """, to_json(vetorized["ExplainationTxt"]),
            to_json(vetorized["AlternativeFormsOther"]),
            to_json(vetorized["Synonymy"]), to_json(vetorized["Antonymy"]),
            to_json(vetorized["Hypernymy"]), to_json(vetorized["Hyponymy"]),
            to_json(vetorized["Meronymy"]), to_json(vetorized["RelatedTerms"]),
            to_json(vetorized["Coordinate"]), to_json(
                vetorized["Description"]), to_json(vetorized["AlsoKnownAs"]),
            to_json(vetorized["Instance_of"]),
            to_json(vetorized["Subclass_of"]), to_json(vetorized["Part_of"]),
            w["PrimaryKey"])
Beispiel #16
0
def calculate_cat_felidae():
    # 27.517909943958315

    DB_CAT = sqlite3.connect("wiktionary-cat.sqlite3")

    rows = DBRead(
        DB_CAT,
        sql=
        "SELECT * FROM wiktionary WHERE PrimaryKey = 'en-dictionary§Noun_Reference_Word_Alphabetical_with-1'",
        cls=WiktionaryItem)

    try:
        wt = next(rows)

    except StopIteration:
        raise Exception("No CAT-FELIDAE")

    DB_CAT.close()

    return calculate_preference_wiktionary(wt)