def operation_pref_wikipedia(lang): log.info("Doing operation Set Property: LabelNamePreference: Wikipedia") preference_base = calculate_cat_felidae() for wp in DBRead(DBWikipedia, sql=" SELECT * FROM wikipedia ", cls=WikipediaItem): log.info(" set Property: LabelNamePreference: %s", wp) preference = \ math.sqrt( len( wp.SeeAlsoWikipediaLinks ) ) + \ math.sqrt( len( wp.ExplainationWPTxt ) ) + \ math.sqrt( len( wp.ExplainationExamplesTxt ) ) # # then divide by value of ( CAT-FELIDAE ) and divide by 2 # If <0 then : =0 elif >1 then : =1 # preference = preference / preference_base / 2 if preference <= 0: LabelNamePreference = 0 else: LabelNamePreference = 1 DBExecute( DBWikipedia, """ UPDATE wikipedia SET LabelNamePreference = ?, Operation_Pref = 1 WHERE PK = ? """, LabelNamePreference, wp.PK)
def operation_pref_conjugaison(lang): log.info("Doing operation Set Property: LabelNamePreference: Conjugaison") preference_base = calculate_they_read() for c in DBRead(DBConjugations, sql=" SELECT * FROM conjugations ", cls=ConjugationsItem): log.info(" set Property: LabelNamePreference: %s", c) preference = \ math.sqrt( len( c.AlternativeFormsOther ) ) + \ math.sqrt( len( c.ExplainationTxt ) ) # preference = preference / preference_base / 2 if preference <= 0: LabelNamePreference = 0 else: LabelNamePreference = 1 DBExecute( DBConjugations, """ UPDATE conjugations SET LabelNamePreference = ?, Operation_Pref = 1 WHERE PK = ? """, LabelNamePreference, c.PK)
def vectorize_properties_wikipedia(): log.info("Vectorizing wikipedia") for wp in DBRead(DBWikipedia, sql=" SELECT * FROM wikipedia ", cls=dict): log.info(" vectorize: %s", wp["LabelName"]) vetorized = Vectorize_PKS(wp, default_language=wp["LanguageCode"]) DBExecute( DBWiktionary, """ UPDATE wiktionary SET Description_Vect = ?, AlsoKnownAs_Vect = ?, Instance_of_Vect= ?, Subclass_of_Vect = ?, Part_of_Vect = ?, Operation_Vectorizer = 1 WHERE PrimaryKey = ? """, to_json(vetorized["Description"]), to_json(vetorized["AlsoKnownAs"]), to_json(vetorized["Instance_of"]), to_json(vetorized["Subclass_of"]), to_json(vetorized["Part_of"]), wp["PrimaryKey"])
def operation_pref_wiktionary(lang): log.info("Doing operation Set Property: LabelNamePreference: Wiktionary") preference_base = calculate_cat_felidae() for wt in DBRead(DBWiktionary, sql=" SELECT * FROM wiktionary ", cls=WiktionaryItem): log.info(" set Property: LabelNamePreference: %s", wt) preference = calculate_preference_wiktionary(wt) preference = preference / preference_base / 2 if preference <= 0: LabelNamePreference = 0 else: LabelNamePreference = 1 DBExecute( DBWiktionary, """ UPDATE wiktionary SET LabelNamePreference = ?, Operation_Pref = 1 WHERE PrimaryKey = ? """, LabelNamePreference, wt.PrimaryKey)
def load_wiktionary_one( DBWord, lang, label ): for wd in DBRead( DBWiktionary, table="wiktionary", cls=WiktionaryItem, where="LanguageCode=? COLLATE NOCASE AND LabelName=? COLLATE NOCASE", params=[ lang, label ] ): log.info( "%s", wd ) for w in merge( wd ): DBWrite( DBWord, w, table="words", if_exists="replace" ) DBExecute( DBWiktionary, "UPDATE wiktionary SET Operation_Merging = 1 WHERE PrimaryKey = ?", wd.PrimaryKey )
def DBDeleteLangRecords(lang): """ Remove old lang data Args: lang (str): Lang. One of: en, de, it, es, pt, fr """ log.info("Deleting old '%s' records...", lang) return DBExecute(DBConjugations, "DELETE FROM conjugations WHERE LanguageCode = ?", lang)
def DBDeleteLangRecords(lang, DBWikipedia): """ Remove old lang data Args: lang (str): Lang. One of: en, de, it, es, pt, fr """ log.info("Deleting old '%s' records...", lang) return DBExecute(DBWikipedia, "DELETE FROM wikipedia WHERE LanguageCode = ?", lang)
def load_wiktionary( DBWord ): log.info( "loading wiktionary" ) for wd in DBRead( DBWiktionary, table="wiktionary", cls=WiktionaryItem ): log.info( "%s", wd ) for w in merge( wd ): DBWrite( DBWord, w, table="words", if_exists="replace" ) DBExecute( DBWiktionary, "UPDATE wiktionary SET Operation_Merging = 1 WHERE PrimaryKey = ?", wd.PrimaryKey )
def load_wikipedia(DBWord): log.info("loading wikipedia") for wd in DBRead(DBWikipedia, table="wikipedia", cls=WikipediaItem): log.info("%s", wd) for w in merge(wd): DBWrite(DBWord, w, table="words", if_exists="replace") DBExecute(DBWikipedia, "UPDATE wikipedia SET Operation_Merging = 1 WHERE PK = ?", wd.PK)
def operation_pref_wikidata(lang): log.info("Doing operation Set Property: LabelNamePreference: Wikidata") preference_base = calculate_cat_felidae() for wd in DBRead(DBWikidata, sql=" SELECT * FROM wikidata ", cls=WikidataItem): log.info(" set Property: LabelNamePreference: %s", wd) ExplainationExamplesTxt = get_sentences_with_label( lang, wd.Description, wd.LabelName) ExplainationTxt = wd.Description preference = \ len( wd.AlsoKnownAs ) + \ len( wd.Instance_of ) + \ len( wd.Subclass_of ) + \ len( wd.Part_of ) + \ len( wd.Translation_EN ) + \ len( wd.Translation_PT ) + \ len( wd.Translation_DE ) + \ len( wd.Translation_ES ) + \ len( wd.Translation_FR ) + \ len( wd.Translation_IT ) + \ len( wd.Translation_RU ) + \ math.sqrt( wd.WikipediaLinkCountTotal ) + \ math.sqrt( len( ExplainationExamplesTxt ) ) + \ math.sqrt( len( ExplainationTxt ) ) # # then divide by value of ( CAT-FELIDAE ) and divide by 2 # If <0 then : =0 elif >1 then : =1 # preference = preference / preference_base / 2 if preference <= 0: LabelNamePreference = 0 else: LabelNamePreference = 1 DBExecute( DBWikidata, """ UPDATE wikidata SET LabelNamePreference = ?, Operation_Pref = 1 WHERE PrimaryKey = ? """, LabelNamePreference, wd.PrimaryKey)
def load_wikipedia_one(DBWord, lang, label): for wd in DBRead( DBWikipedia, table="wikipedia", cls=WikipediaItem, where= "LanguageCode=? COLLATE NOCASE AND LabelName=? COLLATE NOCASE", params=[lang, label]): log.info("%s", wd) for w in merge(wd): DBWrite(DBWord, w, table="words", if_exists="fail") DBExecute(DBWikipedia, "UPDATE wikipedia SET Operation_Merging = 1 WHERE PK = ?", wd.PK)
def invert_properties_words(lang): log.info("Vectorizing words") for w in DBRead(DBWord, sql=" SELECT * FROM words ", cls=dict): log.info(" vectorize: %s", w["LabelName"]) vetorized = Vectorize_database_record(lang, w) DBExecute( DBWord, """ UPDATE words SET ExplainationTxt_Vect = ?, AlternativeFormsOther_Vect = ?, Synonymy_Vect = ?, Antonymy_Vect = ?, Hypernymy_Vect = ?, Hyponymy_Vect = ?, Meronymy_Vect = ?, RelatedTerms_Vect = ?, Coordinate_Vect = ?, Otherwise_Vect = ?, Description_Vect = ?, AlsoKnownAs_Vect = ?, Instance_of_Vect= ?, Subclass_of_Vect = ?, Part_of_Vect = ?, Operation_Vectorizer = 1 WHERE PrimaryKey = ? """, to_json(vetorized["ExplainationTxt"]), to_json(vetorized["AlternativeFormsOther"]), to_json(vetorized["Synonymy"]), to_json(vetorized["Antonymy"]), to_json(vetorized["Hypernymy"]), to_json(vetorized["Hyponymy"]), to_json(vetorized["Meronymy"]), to_json(vetorized["RelatedTerms"]), to_json(vetorized["Coordinate"]), to_json( vetorized["Description"]), to_json(vetorized["AlsoKnownAs"]), to_json(vetorized["Instance_of"]), to_json(vetorized["Subclass_of"]), to_json(vetorized["Part_of"]), w["PrimaryKey"])
def check_one(id_, lang): # get JSOBN. https: // www.wikidata.org / w / api.php?action = parse & page = Q20152873 import requests import json # get data from WikiDict. raw.json headers = {"Accept": "application/json"} r = requests.get( "https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids={}" .format(id_), headers=headers) data = json.loads(r.text) pprint(data) # delete old record if os.path.isfile(DB_NAME): DBExecute(DBWikidata, "DELETE FROM wikidata WHERE CodeInWiki = ?", (id_, )) # process process_web_record(data, lang, id_)
def update_MergedWith( wid, MergedWith, to ): MergedWith.append( to ) MergedWith_str = json.dumps( MergedWith, ensure_ascii=False ) DBExecute( DBWord, "UPDATE words SET MergedWith = ? WHERE PK = ?", MergedWith_str, wid )