def main(): for infobox in cats: sys.stdout.flush() begin = time.time() #result_json = [] query_res = run_query(SQL_main.format(infobox)) end = time.time() timelen = end - begin if timelen > 30: pywikibot.output('{}'.format(timelen)) result_json = [encode_all_items(f) for f in query_res] curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) #print(dateforq1) #put_db(infobox,result_json,dateforq1) #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) #pywikibot.output(SQL_main.format(newtitle)) pywikibot.output(result_json[:3]) connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() cursor1.execute( sql_insert, (infobox.replace('_по_алфавиту', '').replace('_', ' '), 'rusports', str(json.dumps(result_json, ensure_ascii=False)), dateforq1)) connLabs.commit() connLabs.close() conn.close() pywikibot.output('done')
def main(): infoboxlist = get_input_list() for infobox in infoboxlist: begin = time.time() query_res = run_query(SQL_main.format(infobox)) end = time.time() timelen = end - begin if timelen > 30: pywikibot.output('{}'.format(timelen)) result_json = [encode_all_items(f) for f in query_res] curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) #print(dateforq1) #put_db(infobox,result_json,dateforq1) #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) #pywikibot.output(SQL_main.format(newtitle)) #pywikibot.output(result_json) #if not connLabs and not connLabs.open: connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() cursor1.execute(sql_update, (str(json.dumps(result_json)), dateforq1, infobox)) connLabs.commit() connLabs.close() conn.close() pywikibot.output('done')
def saveToDB(jsonData, wiki, countryname, source): curr_time = utc_to_local(datetime.utcnow()) currTime = "{0:%Y%m%d%H%M%S}".format(curr_time) #jsondata=%s, last_upd=%s where wiki=%s and name=%s and list_type=%s' if cachedDB: with open('cats datggfgdfga2{}{}.txt'.format(wiki, source), "w", encoding='utf-8') as fileW: fileW.write(str(jsonData)) else: connLabs = toolforge.connect_tools('s53143__estlat_p') cursor1 = connLabs.cursor() cursor1.execute(sql_update, (str(json.dumps(jsonData)), currTime, wiki, countryname, "list", source)) connLabs.commit() connLabs.close()
def main(): #infoboxlist = get_input_list() for infobox in jsoninput: infname = infobox['name'] fr = infobox['frwiki'] grupa = infobox['group'] pywikibot.output('\t'+infname) sys.stdout.flush() begin = time.time() #result_json = [] appending = '' if infname=='indija': appending = ", (SELECT GROUP_CONCAT(tl_title SEPARATOR '|') FROM templatelinks WHERE tl_title like 'Infobox%' and tl_namespace = 10 and tl_from=p.page_id) as box" query_res = run_query(SQL_main.format(appending,fr)) end = time.time() timelen = end-begin if timelen>30: pywikibot.output('{}'.format(timelen)) result_json = format_frwiki(infname,[encode_all_items(f) for f in query_res]) curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) #print(dateforq1) #put_db(infobox,result_json,dateforq1) #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) #pywikibot.output(SQL_main.format(newtitle)) pywikibot.output(result_json) #if not connLabs and not connLabs.open: connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() cursor1.execute(sql_update, (str(json.dumps(result_json)),dateforq1,grupa, infname)) connLabs.commit() connLabs.close() conn.close() pywikibot.output('done')
def dataSaving(data): conn = toolforge.connect_tools('s53143__missing_p') cursor = conn.cursor() cursor.execute("TRUNCATE TABLE articles") conn.commit() sql = "INSERT INTO articles (wd, orig, lang, descr, iws) VALUES (%s, %s, %s, %s, %s)" for one in data: wd, orig, lang, descr, iws = one cursor.execute(sql, (wd, orig, lang, descr, iws)) # conn.commit() dateforq12 = "{0:%Y-%m-%d}".format(datetime.utcnow()) sql2 = 'UPDATE meta set value= %s where data="upd"' cursor.execute(sql2, (dateforq12)) conn.commit() conn.close()
def insert_into_db(group_name, name, jsondata): sql_insert = 'INSERT INTO `entries` (`name`, `group_name`, `jsondata`,`last_upd`) VALUES (%s, %s, %s, %s)' sql_update = 'UPDATE `entries` SET jsondata=%s, last_upd=%s where group_name=%s and name=%s' curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() isAlreadyInDB_sql = 'select id from entries where group_name=%s and name=%s' cursor1.execute(isAlreadyInDB_sql, (group_name, name)) isAlreadyInDB = cursor1.fetchall() #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) if len(isAlreadyInDB) < 1: cursor1.execute(sql_insert, (name, group_name, jsondata, dateforq1)) else: cursor1.execute(sql_update, (jsondata, dateforq1, group_name, name)) connLabs.commit() connLabs.close()
def main(): query_res = run_query(SQL_main) result_json = [encode_all_items(f) for f in query_res] bigm = {} for entry in result_json: title,iws,wd,cats = entry if title.endswith('(film_series)'): continue try: cats = cats.split('|') except: cats = [] mycat = [] for cat in cats: fsdf = re.search(catregex, cat) if fsdf: mycat.append(fsdf.group(1)) #if len(mycat)!=1: # pywikibot.output(title+'-'+str(mycat)) mycat = mycat[0] if len(mycat)>0 else '' if mycat[:3]=='201': if mycat in bigm: bigm[mycat].append([title,iws, mycat]) else: bigm[mycat] = [[title,iws, mycat]] else: if mycat=='': mycat_pre = '' else: mycat_pre = mycat[:3]+'0s' if mycat_pre in bigm: bigm[mycat_pre].append([title,iws, mycat]) else: bigm[mycat_pre] = [[title,iws, mycat]] curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) #print(dateforq1) #put_db(infobox,result_json,dateforq1) #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) #pywikibot.output(SQL_main.format(newtitle)) #pywikibot.output(result_json) #if not connLabs and not connLabs.open: connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() grupa = 'other1' infname = 'filmas' cursor1.execute('UPDATE `entries` SET jsondata=%s, last_upd=%s where group_name=%s and name=%s', (str(json.dumps(bigm)),dateforq1,grupa, infname)) connLabs.commit() connLabs.close() with open('films_debug.json', "w", encoding='utf-8') as fileS1: fileS1.write(json.dumps(result_json))#('var data = '+str(bigm)+';')
import pywikibot, os, re, requests, sys from pywikibot.data import api from datetime import timedelta, datetime import toolforge file = open('file_licence.txt', 'r', encoding='utf-8').read() site = pywikibot.Site('lv', "wikipedia") conn = toolforge.connect('lvwiki_p') connLabs = toolforge.connect_tools('s53143__meta_p') cursor1 = connLabs.cursor() def get_last_run(): query = "select lastupd from logtable where job='image_copyr'" query_res = run_query(query, connLabs) return encode_if_necessary(query_res[0][0]) # def set_last_run(timestamp): query = "UPDATE `logtable` SET lastupd=%s where job='image_copyr'" timeasUTC = "{0:%Y%m%d%H%M%S}".format(timestamp) cursor1.execute(query, (timeasUTC)) connLabs.commit() #
import requests import json import urllib.parse import pywikibot, re import toolforge lvsite = pywikibot.Site("lv", "wikipedia") conn = toolforge.connect_tools('s53143__missing_p') SQL = '''select lv, wd from list where lv<>""''' def encode_if_necessary(b): if type(b) is bytes: return b.decode('utf8') return b def run_query(): #query = query.encode('utf-8') #print(query) try: cursor = conn.cursor() cursor.execute(SQL) rows = cursor.fetchall() except KeyboardInterrupt: sys.exit() return rows
import pywikibot, re, os, time, sys, json import toolforge from datetime import date, datetime, timedelta, timezone from pytz import timezone conn = toolforge.connect('enwiki_p', 'analytics') connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() utc_timezone = timezone("UTC") lva_timezone = timezone("Europe/Riga") def encode_if_necessary(b): if type(b) is bytes: return b.decode('utf8') return b def run_query(query): #query = query.encode('utf-8') #print(query) try: cursor = conn.cursor() cursor.execute(query) rows = cursor.fetchall() except KeyboardInterrupt: sys.exit() return rows
import pywikibot, re, os, requests import toolforge from datetime import date, datetime, timedelta, timezone from pytz import timezone #from customFuncs import basic_petscan from natsort import natsorted import pymysql utc_timezone = timezone("UTC") lva_timezone = timezone("Europe/Riga") conn = toolforge.connect_tools('s53143__npp_p') conn1 = toolforge.connect('lvwiki_p', 'analytics') def utc_to_local(utc_dt): return utc_timezone.localize(utc_dt).astimezone(lva_timezone) # def local_to_utc(utc): return lva_timezone.localize(utc).astimezone(utc_timezone) # def encode_if_necessary(b): if type(b) is bytes: return b.decode('utf8') return b
def main(): thedataforreport = getData() for entry in thedataforreport: #[68, 'Natalia_Oreiro', 'Natalia Oreiro', 'Geboren_1977'] langs, de, en, cats = entry if not cats: cats = '' cats = cats.split('|') dzimusi = '' for cat in cats: if cat.startswith('Geboren_'): dzimusi = cat.replace('Geboren_', '') parsed = parse_dob(dzimusi) if not en: de = de else: de = en if not parsed: mycat_pre = '' else: mycat_pre = parsed if mycat_pre in bigm: bigm[mycat_pre].append([de, langs, dzimusi]) else: bigm[mycat_pre] = [[de, langs, dzimusi]] # bigm2 = {} for one in bigm: bigm2.update({one: bigm[one][:250]}) # theorder = sort_order(list(bigm2.keys())) theorder = [f[0] for f in theorder] fileS = open('frau2.json', "w", encoding='utf-8') fileS.write(str(json.dumps(bigm2))) #('var data = '+str(bigm)+';') curr_time = utc_to_local(datetime.utcnow()) dateforq1 = "{0:%Y%m%d%H%M%S}".format(curr_time) #print(dateforq1) #put_db(infobox,result_json,dateforq1) #cursor1.execute(sql_insert, (infobox.replace('Infobox_','').replace('_',' '), 'eninfobox',str(json.dumps(result_json)),dateforq1)) #pywikibot.output(SQL_main.format(newtitle)) #pywikibot.output(result_json) #if not connLabs and not connLabs.open: connLabs = toolforge.connect_tools('s53143__mis_lists_p') cursor1 = connLabs.cursor() grupa = 'other1' infname = 'women' cursor1.execute( 'UPDATE `entries` SET jsondata=%s, last_upd=%s where group_name=%s and name=%s', (str(json.dumps(bigm2)), dateforq1, grupa, infname)) connLabs.commit() connLabs.close()
import pywikibot, re, os, requests, sys import toolforge from datetime import date, datetime, timedelta, timezone from pytz import timezone #from customFuncs import basic_petscan from natsort import natsorted import pymysql utc_timezone = timezone("UTC") lva_timezone = timezone("Europe/Riga") conn = toolforge.connect_tools('s53143__spring_p') conn1 = toolforge.connect('lvwiki_p', 'analytics') def utc_to_local(utc_dt): return utc_timezone.localize(utc_dt).astimezone(lva_timezone) # def local_to_utc(utc): return lva_timezone.localize(utc).astimezone(utc_timezone) # def encode_if_necessary(b): if type(b) is bytes: return b.decode('utf8') return b