def load(command,expect,verbose=False): if verbose: show("begin with "+command) # no point in trying to show expect! sql = command ret = None try: # quick hack for getting return value. do fix me! if expect: resql = dboperator.get(sql) if verbose: show("command returned %d rows"%(len(resql))) # print out (as a return value) the entire result or row count or what, some kind of figuring? if expect=='row count': ret = len(resql) elif len(resql)==1 and expect in resql[0]: ret = resql[0][expect] else: ret = resql else: dboperator.execute(sql) except: show("Something went wrong. Over and out.") dboperator.close() exit(2) # lopeta virheeseen if verbose: show("ready") return ret
def load(command,expect,verbose=False): if verbose: show("begin with "+command) # no point in trying to show expect! sql = command ret = None try: # quick hack for getting return value. do fix me! if expect: resql = dboperator.get(sql) if verbose: show("command returned %d rows"%(len(resql))) # print out (as a return value) the entire result or row count or what, some kind of figuring? if expect=='row count': ret = len(resql) elif len(resql)==1 and expect in resql[0]: ret = resql[0][expect] else: ret = resql else: dboperator.execute(sql) except Exception, f: print(f) e = sys.exc_info()[0] #for debugging error 1.11.2018 VHä show("Something went wrong. Over and out. %s" % e) dboperator.close() exit(2) # lopeta virheeseen
def loadsql(sqlfile,verbose=False): fd = open(sqlfile, 'r') allsql = fd.read() # remove UTF-8 BOM characters if they exist allsql = allsql[3:] if allsql.startswith(codecs.BOM_UTF8) else allsql fd.close() # split MS SQL batches for sql in re.split('\ngo.*\n', allsql, flags=re.IGNORECASE): if verbose: show(sql) dboperator.execute(sql)
def loadsql(sqlfile, verbose=False): fd = open(sqlfile, 'r') allsql = fd.read() # remove UTF-8 BOM characters if they exist allsql = allsql[3:] if allsql.startswith(codecs.BOM_UTF8) else allsql fd.close() # split MS SQL batches for sql in re.split('\ngo.*\n', allsql, flags=re.IGNORECASE): if verbose: show(sql) dboperator.execute(sql)
def load(schema,procedure,verbose=False): show("begin with "+schema+" "+procedure) sql = "execute "+schema+"."+procedure try: dboperator.execute(sql) except: show("Something went wrong. Probably procedure wasn't found or there is a permission problem. Over and out.") dboperator.close() exit(2) # lopeta virheeseen dboperator.close() show("ready")
def load(sqlfile,migrate,verbose=False): show("begin "+sqlfile) number_togo = None if "__" in sqlfile: number_togo = int(sqlfile[sqlfile.rfind("/")+1:sqlfile.index("__",sqlfile.rfind("/")+1)]) if migrate and number_togo is not None: # migration install step 0 is special, just add the table for others if number_togo==0: loadsql(sqlfile,verbose) else: number_last = None result = dboperator.get("select max(number) as number from dbo.migration where phase='%s'"%(migrate)) if result[0]["number"] is not None: number_last = int(result[0]["number"]) if verbose: show("migrating %s which is going on at %s and now trying %s"%(migrate,number_last,number_togo)) if number_last is None or number_togo > number_last: show("Migrating from %s to %s"%(number_last,number_togo)) loadsql(sqlfile,verbose) result = dboperator.execute("insert into migration (phase,number) values ('%s',%s)"%(migrate,number_togo)) else: if verbose: show("skipping migration %s < %s"%(number_togo,number_last)) else: loadsql(sqlfile,verbose) dboperator.close() show("ready")
def load(database, schema, procedure, parameters, verbose=False): show("begin with " + database + " " + schema + " " + procedure + " " + parameters) sql = "execute " + database + "." + schema + "." + procedure if (parameters != ""): sql = "execute " + database + "." + schema + "." + procedure + " " + parameters result = dboperator.execute(sql) #succesful execution result returns 1, anything else is error message if (result != 1): show("A MSSQL error has been caught during executing " + procedure) show("Error code: " + result) dboperator.close() exit(2) # lopeta virheeseen dboperator.close() show("ready")
def load(sqlfile, migrate, verbose=False): show("begin " + sqlfile) number_togo = None if "__" in sqlfile: number_togo = int(sqlfile[sqlfile.rfind("/") + 1:sqlfile.index("__", sqlfile.rfind("/") + 1)]) if migrate and number_togo is not None: # migration install step 0 is special, just add the table for others if number_togo == 0: loadsql(sqlfile, verbose) else: number_last = None result = dboperator.get( "select max(number) as number from antero.dbo.migration where phase='%s'" % (migrate)) if result[0]["number"] is not None: number_last = int(result[0]["number"]) if verbose: show("migrating %s which is going on at %s and now trying %s" % (migrate, number_last, number_togo)) if number_last is None or number_togo > number_last: show("Migrating from %s to %s" % (number_last, number_togo)) loadsql(sqlfile, verbose) result = dboperator.execute( "insert into antero.dbo.migration (phase,number) values ('%s',%s)" % (migrate, number_togo)) if (result != 1): show(result) exit(2) else: if verbose: show("skipping migration %s < %s" % (number_togo, number_last)) else: loadsql(sqlfile, verbose) dboperator.close() show("ready")
def load(secure,hostname,url,schema,table,postdata,condition,verbose,rowcount): show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "")+" "+(condition or "")) if secure: address = "https://"+hostname+url else: address = "http://"+hostname+url show("load from "+address) reqheaders = {'Content-Type': 'application/json'} reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen' # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass) # automatic POST with (post)data print("value used for , -r, --rowcount=", rowcount) request = urllib2.Request(address, data=postdata, headers=reqheaders) try: response = urllib2.urlopen(request) except httplib.IncompleteRead as e: show('IncompleteRead exception.') show('Received: %d'%(e.partial)) sys.exit(2) except urllib2.HTTPError as e: show('The server couldn\'t fulfill the request.') show('Error code: %d'%(e.code)) sys.exit(2) except urllib2.URLError as e: show('We failed to reach a server.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'"%(schema,table,condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition)) else: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) show("insert data") cnt=0 manycount = 0 rows = [] for row in ijson.items(response,'item'): cnt+=1 manycount+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row)) # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str,json.dumps(row[col]))) rows.append(row) if cnt == 1: dboperator.insert(address,schema,table,row) manycount = 0 rows = [] if cnt > 1: if manycount == rowcount: insert(address,schema,table,rows) manycount = 0 rows = [] if len(rows) <= manycount and len(rows) > 0: insert(address,schema,table,rows) rows = [] manycount = 0 show("wrote %d"%(cnt)) show("ready")
show('The server couldn\'t fulfill the request.') show('Error code: %d'%(e.code)) sys.exit(2) except urllib2.URLError, e: show('We failed to reach a server.') show('Reason: %s'%(e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'"%(schema,table,condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition)) else: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) show("insert data") cnt=0 for row in ijson.items(response,'item'): cnt+=1 # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row))
def load(url, schema, table, condition): """ Results from ARVO-API can come in multiple pages. If that's the case, we need to make multiple requests to the ARVO API, using the "next_url" parameter. """ FIRST_LOOP = True # This is used to make possible DELETE operation (due to condition) only once. while True: show("begin " + url + " " + schema + " " + table + " " + (condition or "")) show("load from " + url) reqheaders = {'Content-Type': 'application/json'} # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode( apiuser + ":" + apipass) try: r = requests.get(url, headers=reqheaders) except requests.exceptions.RequestException as e: print e sys.exit(1) if r.status_code != 200: print "Error! HTTP status code: " + str(r.status_code) sys.exit(2) try: result = json.loads(r.content) except ValueError as e: print e sys.exit(3) if "pagination" not in result or "data" not in result: print "Error! Received JSON-data not valid." sys.exit(4) # everything is fine show("api call OK") """ Received data e.g. { "data": [ { "taustakysymykset": true, "koulutustoimija": "xxxx", "vastausid": 1111, "kyselykertaid": 123, "kysely_alkupvm": "2016-11-29T22:00:00Z", "suorituskieli": "fi", "tutkinto_fi": "xxxx", "opintoala_en": "xxxx", "valmistavan_koulutuksen_jarjestaja_en": "xxxx", "kysymys_en": "xxxx", "koulutustoimija_en": "xxxx", "tutkintotunnus": "xxxx", "numerovalinta": 2, "valmistavan_koulutuksen_oppilaitos_sv": "xxxx", "kysymys_sv": "xxxx", "kysymysjarjestys": 0, "opintoala_sv": "xxxx", "monivalintavaihtoehto": "xxxx", "kysymysid": 1234, "valmistavan_koulutuksen_oppilaitos_fi": "xxxx", "kysely_en": "xxxx", "vastaustyyppi": "xxxx", "kysymysryhma": "xxxx", "tutkinto_en": null, "kunta": null, "kysymysryhmaid": 110, "kysymysryhmajarjestys": 0, "vaihtoehto": null, "opintoala_fi": "xxxx", "kysymys_fi": "Ik", "vastaajaid": 123, "kyselyid": 111, "valmistavan_koulutuksen_jarjestaja_fi": "xxxx", "kysymysryhma_en": "xxxx", "kysely_sv": "xxxx", "kysymysryhma_fi": "xxxx", "opintoalatunnus": "xxxx", "valmistavan_koulutuksen_jarjestaja_sv": "xxxx", "vastausaika": "2017-02-05T22:00:00Z", "tunnus": "xxxx", "valmistavan_koulutuksen_jarjestaja": "xxxx", "koulutustoimija_fi": "xxxx", "kysely_loppupvm": null, "koulutusmuoto": null, "kyselykerta": "xxxx", "valmistavan_koulutuksen_oppilaitos_en": "xxxx", "valtakunnallinen": true, "tutkinto_sv": null, "koulutustoimija_sv": "xxxx", "valmistavan_koulutuksen_oppilaitos": "xxxx", "kysely_fi": "xxxx", "kysymysryhma_sv": "xxxx" } ], "pagination": { "next_url": "null" } } """ address = url.split( "?" )[0] # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01 # remove data conditionally, otherwise empty # merge operation could be considered here... if FIRST_LOOP: # This is done only on the first go (no matter if Arvo returns one or multiple pages) if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for row in result["data"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) dboperator.insert(address, schema, table, row) show("wrote %d" % (cnt)) show("ready") if result["pagination"]["next_url"] == "null": break # exit while-loop. We are done. else: url = result["pagination"]["next_url"] FIRST_LOOP = False # Do not make the possible DELETE-operation anymore!
def load(url, schema, table, condition): """ Results from VARDA-API can come in multiple pages. If that's the case, we need to make multiple requests to the VARDA API, using the "next" parameter. """ # First delete all from TABLE -> Start from scratch. TODO: Delete based on condition. dboperator.execute("DELETE FROM %s.%s" % (schema, table)) while True: show("begin " + url + " " + schema + " " + table + " " + (condition or "")) show("load from " + url) reqheaders = {'Content-Type': 'application/json'} reqheaders[ 'Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen' # api credentials from env vars if os.getenv("VARDA_API_KEY"): show("using authentication") api_key = os.getenv("VARDA_API_KEY") reqheaders['Authorization'] = 'Token %s' % api_key try: r = requests.get(url, headers=reqheaders) except requests.exceptions.RequestException as e: print("RequestException: ", e) sys.exit(1) if r.status_code != 200: print("Error! HTTP status code: " + str(r.status_code)) sys.exit(2) try: data = json.loads(r.content) except ValueError as e: print("ValueError: ", e) sys.exit(3) # everything is fine show("api call OK") # Parse URL # From https://varda.oso-pilot.csc.fi/api/v1/toimipisteet/ save in DB https://varda.oso-pilot.csc.fi n = url.find("/", url.find("/") + 2) # find 3rd occurance of "/" address = url[:n] show("insert data") cnt = 0 for row in data["results"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # Explicitly remove the keys from dict (row) that are not saved in Antero DB del row['johtaja'] del row['url'] del row['muutos_pvm'] del row['vaka_jarjestaja'] del row['toimipaikat'] # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) dboperator.insert(address, schema, table, row) show("wrote %d" % (cnt)) show("ready") if data["next"] is None: break # exit while-loop. We are done. else: url = data["next"]
def load(secure, hostname, url, schema, table, postdata, condition, verbose): show("begin " + hostname + " " + url + " " + schema + " " + table + " " + (postdata or "No postdata") + " " + (condition or "")) address = "http://" + hostname + url show("load from " + address) reqheaders = {'Content-Type': 'application/json'} reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen' # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser + ":" + apipass) # automatic POST with (post)data #request = urllib.request.Request(address, data=postdata, headers=reqheaders) #time=300 try: response = requests.get(address, headers=reqheaders).json() except Exception as e: show('HTTP GET failed.') show('Reason: %s' % (str(e))) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for i in response: cnt += 1 # make "columnlist" (type has no meaning as we're not creating table) row = makerow() # setup dboperator so other calls work dboperator.columns(row) row["avoinKK"] = jv(i, "avoinKK") row["db"] = jv(i, "db") row["erillinenOpintoOikeus"] = jv(i, "erillinenOpintoOikeus") row["kansainvVaihto"] = jv(i, "kansainvVaihto") row["kkYhteistyo"] = jv(i, "kkYhteistyo") row["koodi"] = jv(i, "koodi") row["koulutustyyppi"] = jv(i, "koulutustyyppi") row["kuvaus"] = jv(i, "kuvaus") row["lukumaara"] = jv(i, "lukumaara") row["luoja"] = jv(i, "luoja") row["luontipaivamaara"] = jv(i, "luontipaivamaara") row["maahanmValKoulutus"] = jv(i, "maahanmValKoulutus") row["oppilaitostunnus"] = jv(i, "oppilaitostunnus") row["perustutkOpiskelijat"] = jv(i, "perustutkOpiskelijat") row["suorituspaiva"] = jv(i, "suorituspaiva") row["tkiHarjoittelunLaajuus"] = jv(i, "tkiHarjoittelunLaajuus") row["tkiMuutLaajuus"] = jv(i, "tkiMuutLaajuus") row["tkiToiminnanLaajuus"] = jv(i, "tkiToiminnanLaajuus") row["ulkomaaharjoittelu"] = jv(i, "ulkomaaharjoittelu") row["ulkomailtaHyvLuet"] = jv(i, "ulkomailtaHyvLuet") row["vieraskSuoritukset"] = jv(i, "vieraskSuoritukset") row["vuosi"] = jv(i, "vuosi") row["erikoistumiskoulutus"] = jv(i, "erikoistumiskoulutus") dboperator.insert(hostname + url, schema, table, row) # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s" % (cnt, row)) show("wrote %d" % (cnt)) show("ready")
show('Error code: %d' % (e.code)) sys.exit(2) except urllib2.URLError, e: show('We failed to reach a server.') show('Reason: %s' % (e.reason)) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for row in ijson.items(response, 'item'): cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s" % (cnt, row))
def load(url, schema, table, condition): """ Results from ARVO-API can come in multiple pages. If that's the case, we need to make multiple requests to the ARVO API, using the "next_url" parameter. """ FIRST_LOOP = True # This is used to make possible DELETE operation (due to condition) only once. while True: show("begin " + url + " " + schema + " " + table + " " + (condition or "")) show("load from " + url) reqheaders = {'Content-Type': 'application/json'} # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser + ":" + apipass) try: r = requests.get(url, headers=reqheaders) except requests.exceptions.RequestException as e: print e sys.exit(1) if r.status_code != 200: print "Error! HTTP status code: " + str(r.status_code) sys.exit(2) try: result = json.loads(r.content) except ValueError as e: print e sys.exit(3) if "pagination" not in result or "data" not in result: print "Error! Received JSON-data not valid." sys.exit(4) # everything is fine show("api call OK") """ Received data e.g. { "data": [ { "taustakysymykset": true, "koulutustoimija": "xxxx", "vastausid": 1111, "kyselykertaid": 123, "kysely_alkupvm": "2016-11-29T22:00:00Z", "suorituskieli": "fi", "tutkinto_fi": "xxxx", "valmistavan_koulutuksen_jarjestaja_en": "xxxx", "kysymys_en": "xxxx", "koulutustoimija_en": "xxxx", "tutkintotunnus": "xxxx", "numerovalinta": 2, "valmistavan_koulutuksen_oppilaitos_sv": "xxxx", "kysymys_sv": "xxxx", "kysymysjarjestys": 0, "monivalintavaihtoehto": "xxxx", "kysymysid": 1234, "valmistavan_koulutuksen_oppilaitos_fi": "xxxx", "kysely_en": "xxxx", "vastaustyyppi": "xxxx", "kysymysryhma": "xxxx", "tutkinto_en": null, "kunta": null, "kysymysryhmaid": 110, "kysymysryhmajarjestys": 0, "vaihtoehto": null, "kysymys_fi": "Ik", "vastaajaid": 123, "kyselyid": 111, "valmistavan_koulutuksen_jarjestaja_fi": "xxxx", "kysymysryhma_en": "xxxx", "kysely_sv": "xxxx", "kysymysryhma_fi": "xxxx", "valmistavan_koulutuksen_jarjestaja_sv": "xxxx", "vastausaika": "2017-02-05T22:00:00Z", "tunnus": "xxxx", "valmistavan_koulutuksen_jarjestaja": "xxxx", "koulutustoimija_fi": "xxxx", "kysely_loppupvm": null, "koulutusmuoto": null, "kyselykerta": "xxxx", "valmistavan_koulutuksen_oppilaitos_en": "xxxx", "valtakunnallinen": true, "tutkinto_sv": null, "koulutustoimija_sv": "xxxx", "valmistavan_koulutuksen_oppilaitos": "xxxx", "kysely_fi": "xxxx", "kysymysryhma_sv": "xxxx" } ], "pagination": { "next_url": "null" } } """ address = url.split("?")[0] # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01 # remove data conditionally, otherwise empty # merge operation could be considered here... if FIRST_LOOP: # This is done only on the first go (no matter if Arvo returns one or multiple pages) if condition: show("remove from %s.%s with condition '%s'" % (schema, table, condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition)) else: show("empty %s.%s" % (schema, table)) dboperator.empty(schema, table) show("insert data") cnt = 0 for row in result["data"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) dboperator.insert(address, schema, table, row) show("wrote %d" % (cnt)) show("ready") if result["pagination"]["next_url"] == "null" or result["pagination"]["next_url"] == None: break # exit while-loop. We are done. else: url = result["pagination"]["next_url"] FIRST_LOOP = False # Do not make the possible DELETE-operation anymore!
def load(secure,hostname,url,schema,table,postdata,condition,verbose): show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "No postdata")+" "+(condition or "")) address = "http://"+hostname+url show("load from "+address) reqheaders = {'Content-Type': 'application/json'} reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen' # api credentials from env vars if os.getenv("API_USERNAME"): show("using authentication") apiuser = os.getenv("API_USERNAME") apipass = os.getenv("API_PASSWORD") reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass) # automatic POST with (post)data #request = urllib.request.Request(address, data=postdata, headers=reqheaders) #time=300 try: response = requests.get(address, headers=reqheaders).json() except Exception as e: show('HTTP GET failed.') show('Reason: %s'%(str(e))) sys.exit(2) else: # everything is fine show("api call OK") # remove data conditionally, otherwise empty # merge operation could be considered here... if condition: show("remove from %s.%s with condition '%s'"%(schema,table,condition)) dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition)) else: show("empty %s.%s"%(schema,table)) dboperator.empty(schema,table) show("insert data") cnt=0 for i in response: cnt+=1 # make "columnlist" (type has no meaning as we're not creating table) row = makerow() # setup dboperator so other calls work dboperator.columns(row) row["edellinenSyysolo"] = jv(i,"edellinenSyysolo") row["hetu"] = jv(i,"hetu") row["ika"] = jv(i,"ika") row["kevat"] = jv(i,"kevat") row["loAloituspvm"] = jv(i,"loAloituspvm") row["olok"] = jv(i,"olok") row["olos"] = jv(i,"olos") row["ooAloituspvm"] = jv(i,"ooAloituspvm") row["opSummaKunOtePankista"] = jv(i,"opSummaKunOtePankista") row["opiskelijaavain"] = jv(i,"opiskelijaavain") row["opiskeluoikeusavain"] = jv(i,"opiskeluoikeusavain") row["opiskeluoikeusid"] = jv(i,"opiskeluoikeusid") row["oppilaitos"] = jv(i,"oppilaitos") row["oppilaitostunnus"] = jv(i,"oppilaitostunnus") row["pankkiKumuEnnen55"] = jv(i,"pankkiKumuEnnen55") row["pankkiSaldo55"] = jv(i,"pankkiSaldo55") row["regDatum"] = jv(i,"regDatum") row["sukupuoli"] = jv(i,"sukupuoli") row["summa"] = jv(i,"summa") row["suorittanut27"] = jv(i,"suorittanut27") row["suorittanut55ilmanPankkia"] = jv(i,"suorittanut55ilmanPankkia") row["suorittanut55pankinAvulla"] = jv(i,"suorittanut55pankinAvulla") row["syys"] = jv(i,"syys") row["tkoodi"] = jv(i,"tkoodi") row["uusiOpisk"] = jv(i,"uusiOpisk") row["uusiOpiskKevat"] = jv(i,"uusiOpiskKevat") row["uuttaPankkiin"] = jv(i,"uuttaPankkiin") row["vuosi"] = jv(i,"vuosi") dboperator.insert(hostname+url,schema,table,row) # show some sign of being alive if cnt%100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt%1000 == 0: show("-- %d" % (cnt)) if verbose: show("%d -- %s"%(cnt,row)) show("wrote %d"%(cnt)) show("ready")
def load(url, schema, table, condition): """ Results from VARDA-API can come in multiple pages. If that's the case, we need to make multiple requests to the VARDA API, using the "next" parameter. """ # First delete all from TABLE -> Start from scratch. TODO: Delete based on condition. dboperator.execute("DELETE FROM %s.%s" % (schema, table)) while True: show("begin " + url + " " + schema + " " + table + " " + (condition or "")) show("load from " + url) reqheaders = {'Content-Type': 'application/json'} # api credentials from env vars if os.getenv("VARDA_API_KEY"): show("using authentication") api_key = os.getenv("VARDA_API_KEY") reqheaders['Authorization'] = 'Token %s' % api_key try: r = requests.get(url, headers=reqheaders) except requests.exceptions.RequestException as e: print e sys.exit(1) if r.status_code != 200: print "Error! HTTP status code: " + str(r.status_code) sys.exit(2) try: data = json.loads(r.content) except ValueError as e: print e sys.exit(3) # everything is fine show("api call OK") # Parse URL # From https://varda.oso-pilot.csc.fi/api/v1/toimipisteet/ save in DB https://varda.oso-pilot.csc.fi n = url.find("/", url.find("/") + 2) # find 3rd occurance of "/" address = url[:n] show("insert data") cnt = 0 for row in data["results"]: cnt += 1 # show some sign of being alive if cnt % 100 == 0: sys.stdout.write('.') sys.stdout.flush() if cnt % 1000 == 0: show("-- %d" % (cnt)) # Explicitly remove the keys from dict (row) that are not saved in Antero DB del row['johtaja'] del row['url'] del row['muutos_pvm'] del row['vaka_jarjestaja'] del row['toimipaikat'] # find out which columns to use on insert dboperator.resetcolumns(row) # flatten arrays/lists for col in row: if type(row[col]) is list: row[col] = ''.join(map(str, json.dumps(row[col]))) dboperator.insert(address, schema, table, row) show("wrote %d" % (cnt)) show("ready") if data["next"] is None: break # exit while-loop. We are done. else: url = data["next"]