Esempio n. 1
0
def load(command,expect,verbose=False):
  if verbose: show("begin with "+command) # no point in trying to show expect!

  sql = command
  ret = None
  try:
    # quick hack for getting return value. do fix me!
    if expect:
      resql = dboperator.get(sql)
      if verbose: show("command returned %d rows"%(len(resql)))
      # print out (as a return value) the entire result or row count or what, some kind of figuring?
      if expect=='row count':
        ret = len(resql)
      elif len(resql)==1 and expect in resql[0]:
        ret = resql[0][expect]
      else:
        ret = resql
    else:
      dboperator.execute(sql)
  except:
    show("Something went wrong. Over and out.")
    dboperator.close()
    exit(2) # lopeta virheeseen

  if verbose: show("ready")
  return ret
def load(command,expect,verbose=False):
  if verbose: show("begin with "+command) # no point in trying to show expect!

  sql = command
  ret = None

  try:
    # quick hack for getting return value. do fix me!
    if expect:
      resql = dboperator.get(sql)
      if verbose: show("command returned %d rows"%(len(resql)))
      # print out (as a return value) the entire result or row count or what, some kind of figuring?
      if expect=='row count':
        ret = len(resql)
      elif len(resql)==1 and expect in resql[0]:
        ret = resql[0][expect]
      else:
        ret = resql
    else:
      dboperator.execute(sql)
  except Exception, f:
    print(f)
    e = sys.exc_info()[0] #for debugging error 1.11.2018 VHä
    show("Something went wrong. Over and out. %s" % e)
    dboperator.close()
    exit(2) # lopeta virheeseen
Esempio n. 3
0
def loadsql(sqlfile,verbose=False):
  fd = open(sqlfile, 'r')
  allsql = fd.read()
  # remove UTF-8 BOM characters if they exist
  allsql = allsql[3:] if allsql.startswith(codecs.BOM_UTF8) else allsql
  fd.close()
  # split MS SQL batches
  for sql in re.split('\ngo.*\n', allsql, flags=re.IGNORECASE):
    if verbose: show(sql)
    dboperator.execute(sql)
Esempio n. 4
0
def loadsql(sqlfile, verbose=False):
    fd = open(sqlfile, 'r')
    allsql = fd.read()
    # remove UTF-8 BOM characters if they exist
    allsql = allsql[3:] if allsql.startswith(codecs.BOM_UTF8) else allsql
    fd.close()
    # split MS SQL batches
    for sql in re.split('\ngo.*\n', allsql, flags=re.IGNORECASE):
        if verbose: show(sql)
        dboperator.execute(sql)
Esempio n. 5
0
def load(schema,procedure,verbose=False):
  show("begin with "+schema+" "+procedure)

  sql = "execute "+schema+"."+procedure
  try:
    dboperator.execute(sql)
  except:
    show("Something went wrong. Probably procedure wasn't found or there is a permission problem. Over and out.")
    dboperator.close()
    exit(2) # lopeta virheeseen

  dboperator.close()

  show("ready")
Esempio n. 6
0
def load(sqlfile,migrate,verbose=False):
  show("begin "+sqlfile)

  number_togo = None
  if "__" in sqlfile:
    number_togo = int(sqlfile[sqlfile.rfind("/")+1:sqlfile.index("__",sqlfile.rfind("/")+1)])

  if migrate and number_togo is not None:
    # migration install step 0 is special, just add the table for others
    if number_togo==0:
      loadsql(sqlfile,verbose)
    else:
      number_last = None
      result = dboperator.get("select max(number) as number from dbo.migration where phase='%s'"%(migrate))
      if result[0]["number"] is not None:
        number_last = int(result[0]["number"])

      if verbose: show("migrating %s which is going on at %s and now trying %s"%(migrate,number_last,number_togo))

      if number_last is None or number_togo > number_last:
        show("Migrating from %s to %s"%(number_last,number_togo))
        loadsql(sqlfile,verbose)
        result = dboperator.execute("insert into migration (phase,number) values ('%s',%s)"%(migrate,number_togo))
      else:
        if verbose: show("skipping migration %s < %s"%(number_togo,number_last))

  else:
    loadsql(sqlfile,verbose)

  dboperator.close()
  show("ready")
Esempio n. 7
0
def load(database, schema, procedure, parameters, verbose=False):
    show("begin with " + database + " " + schema + " " + procedure + " " +
         parameters)
    sql = "execute " + database + "." + schema + "." + procedure
    if (parameters != ""):
        sql = "execute " + database + "." + schema + "." + procedure + " " + parameters

    result = dboperator.execute(sql)
    #succesful execution result returns 1, anything else is error message
    if (result != 1):
        show("A MSSQL error has been caught during executing " + procedure)
        show("Error code: " + result)
        dboperator.close()
        exit(2)  # lopeta virheeseen

    dboperator.close()
    show("ready")
Esempio n. 8
0
def load(sqlfile, migrate, verbose=False):
    show("begin " + sqlfile)

    number_togo = None
    if "__" in sqlfile:
        number_togo = int(sqlfile[sqlfile.rfind("/") +
                                  1:sqlfile.index("__",
                                                  sqlfile.rfind("/") + 1)])

    if migrate and number_togo is not None:
        # migration install step 0 is special, just add the table for others
        if number_togo == 0:
            loadsql(sqlfile, verbose)
        else:
            number_last = None
            result = dboperator.get(
                "select max(number) as number from antero.dbo.migration where phase='%s'"
                % (migrate))
            if result[0]["number"] is not None:
                number_last = int(result[0]["number"])

            if verbose:
                show("migrating %s which is going on at %s and now trying %s" %
                     (migrate, number_last, number_togo))

            if number_last is None or number_togo > number_last:
                show("Migrating from %s to %s" % (number_last, number_togo))
                loadsql(sqlfile, verbose)
                result = dboperator.execute(
                    "insert into antero.dbo.migration (phase,number) values ('%s',%s)"
                    % (migrate, number_togo))
                if (result != 1):
                    show(result)
                    exit(2)
            else:
                if verbose:
                    show("skipping migration %s < %s" %
                         (number_togo, number_last))

    else:
        loadsql(sqlfile, verbose)

    dboperator.close()
    show("ready")
Esempio n. 9
0
def load(secure,hostname,url,schema,table,postdata,condition,verbose,rowcount):
  show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "")+" "+(condition or ""))
  if secure:
    address = "https://"+hostname+url
  else:
    address = "http://"+hostname+url
  show("load from "+address)

  reqheaders = {'Content-Type': 'application/json'}
  reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen'

  # api credentials from env vars
  if os.getenv("API_USERNAME"):
    show("using authentication")
    apiuser = os.getenv("API_USERNAME")
    apipass = os.getenv("API_PASSWORD")
    reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass)

  # automatic POST with (post)data
  print("value used for , -r, --rowcount=", rowcount)
  request = urllib2.Request(address, data=postdata, headers=reqheaders)
  try:
    response = urllib2.urlopen(request)
  except httplib.IncompleteRead as e:
    show('IncompleteRead exception.')
    show('Received: %d'%(e.partial))
    sys.exit(2)
  except urllib2.HTTPError as e:
    show('The server couldn\'t fulfill the request.')
    show('Error code: %d'%(e.code))
    sys.exit(2)
  except urllib2.URLError as e:
    show('We failed to reach a server.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  manycount = 0
  rows = []

  for row in ijson.items(response,'item'):
        cnt+=1
        manycount+=1
        # show some sign of being alive
        if cnt%100 == 0:
          sys.stdout.write('.')
          sys.stdout.flush()
        if cnt%1000 == 0:
          show("-- %d" % (cnt))
        if verbose: show("%d -- %s"%(cnt,row))

        # find out which columns to use on insert
        dboperator.resetcolumns(row)

        # flatten arrays/lists
        for col in row:
            if type(row[col]) is list:
                row[col] = ''.join(map(str,json.dumps(row[col])))
        rows.append(row)
        if cnt == 1:
            dboperator.insert(address,schema,table,row)
            manycount = 0
            rows = []
        if cnt > 1:
            if manycount == rowcount:
                insert(address,schema,table,rows)
                manycount = 0
                rows = []
  if len(rows) <= manycount and len(rows) > 0:
      insert(address,schema,table,rows)
      rows = []
      manycount = 0

  show("wrote %d"%(cnt))
  show("ready")
Esempio n. 10
0
    show('The server couldn\'t fulfill the request.')
    show('Error code: %d'%(e.code))
    sys.exit(2)
  except urllib2.URLError, e:
    show('We failed to reach a server.')
    show('Reason: %s'%(e.reason))
    sys.exit(2)
  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0
  for row in ijson.items(response,'item'):
    cnt+=1
    # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))
    if verbose: show("%d -- %s"%(cnt,row))
Esempio n. 11
0
def load(url, schema, table, condition):
    """
    Results from ARVO-API can come in multiple pages. If that's the case,
    we need to make multiple requests to the ARVO API, using the "next_url" parameter.
    """

    FIRST_LOOP = True  # This is used to make possible DELETE operation (due to condition) only once.

    while True:
        show("begin " + url + " " + schema + " " + table + " " +
             (condition or ""))
        show("load from " + url)

        reqheaders = {'Content-Type': 'application/json'}
        # api credentials from env vars
        if os.getenv("API_USERNAME"):
            show("using authentication")
            apiuser = os.getenv("API_USERNAME")
            apipass = os.getenv("API_PASSWORD")
            reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(
                apiuser + ":" + apipass)

        try:
            r = requests.get(url, headers=reqheaders)
        except requests.exceptions.RequestException as e:
            print e
            sys.exit(1)

        if r.status_code != 200:
            print "Error! HTTP status code: " + str(r.status_code)
            sys.exit(2)

        try:
            result = json.loads(r.content)
        except ValueError as e:
            print e
            sys.exit(3)

        if "pagination" not in result or "data" not in result:
            print "Error! Received JSON-data not valid."
            sys.exit(4)

        # everything is fine
        show("api call OK")
        """
          Received data e.g.

            {
              "data": [
                {
                  "taustakysymykset": true,
                  "koulutustoimija": "xxxx",
                  "vastausid": 1111,
                  "kyselykertaid": 123,
                  "kysely_alkupvm": "2016-11-29T22:00:00Z",
                  "suorituskieli": "fi",
                  "tutkinto_fi": "xxxx",
                  "opintoala_en": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_en": "xxxx",
                  "kysymys_en": "xxxx",
                  "koulutustoimija_en": "xxxx",
                  "tutkintotunnus": "xxxx",
                  "numerovalinta": 2,
                  "valmistavan_koulutuksen_oppilaitos_sv": "xxxx",
                  "kysymys_sv": "xxxx",
                  "kysymysjarjestys": 0,
                  "opintoala_sv": "xxxx",
                  "monivalintavaihtoehto": "xxxx",
                  "kysymysid": 1234,
                  "valmistavan_koulutuksen_oppilaitos_fi": "xxxx",
                  "kysely_en": "xxxx",
                  "vastaustyyppi": "xxxx",
                  "kysymysryhma": "xxxx",
                  "tutkinto_en": null,
                  "kunta": null,
                  "kysymysryhmaid": 110,
                  "kysymysryhmajarjestys": 0,
                  "vaihtoehto": null,
                  "opintoala_fi": "xxxx",
                  "kysymys_fi": "Ik",
                  "vastaajaid": 123,
                  "kyselyid": 111,
                  "valmistavan_koulutuksen_jarjestaja_fi": "xxxx",
                  "kysymysryhma_en": "xxxx",
                  "kysely_sv": "xxxx",
                  "kysymysryhma_fi": "xxxx",
                  "opintoalatunnus": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_sv": "xxxx",
                  "vastausaika": "2017-02-05T22:00:00Z",
                  "tunnus": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja": "xxxx",
                  "koulutustoimija_fi": "xxxx",
                  "kysely_loppupvm": null,
                  "koulutusmuoto": null,
                  "kyselykerta": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos_en": "xxxx",
                  "valtakunnallinen": true,
                  "tutkinto_sv": null,
                  "koulutustoimija_sv": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos": "xxxx",
                  "kysely_fi": "xxxx",
                  "kysymysryhma_sv": "xxxx"
                }
              ],
              "pagination": {
                "next_url": "null"
              }
            }

        """

        address = url.split(
            "?"
        )[0]  # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01

        # remove data conditionally, otherwise empty
        # merge operation could be considered here...
        if FIRST_LOOP:  # This is done only on the first go (no matter if Arvo returns one or multiple pages)
            if condition:
                show("remove from %s.%s with condition '%s'" %
                     (schema, table, condition))
                dboperator.execute("DELETE FROM %s.%s WHERE %s" %
                                   (schema, table, condition))
            else:
                show("empty %s.%s" % (schema, table))
                dboperator.empty(schema, table)

        show("insert data")
        cnt = 0
        for row in result["data"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # find out which columns to use on insert
            dboperator.resetcolumns(row)

            # flatten arrays/lists
            for col in row:
                if type(row[col]) is list:
                    row[col] = ''.join(map(str, json.dumps(row[col])))

            dboperator.insert(address, schema, table, row)

        show("wrote %d" % (cnt))
        show("ready")

        if result["pagination"]["next_url"] == "null":
            break  # exit while-loop. We are done.
        else:
            url = result["pagination"]["next_url"]
            FIRST_LOOP = False  # Do not make the possible DELETE-operation anymore!
Esempio n. 12
0
def load(url, schema, table, condition):
    """
    Results from VARDA-API can come in multiple pages. If that's the case,
    we need to make multiple requests to the VARDA API, using the "next" parameter.
    """

    # First delete all from TABLE -> Start from scratch. TODO: Delete based on condition.
    dboperator.execute("DELETE FROM %s.%s" % (schema, table))

    while True:
        show("begin " + url + " " + schema + " " + table + " " +
             (condition or ""))
        show("load from " + url)

        reqheaders = {'Content-Type': 'application/json'}
        reqheaders[
            'Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen'
        # api credentials from env vars
        if os.getenv("VARDA_API_KEY"):
            show("using authentication")
            api_key = os.getenv("VARDA_API_KEY")
            reqheaders['Authorization'] = 'Token %s' % api_key

        try:
            r = requests.get(url, headers=reqheaders)
        except requests.exceptions.RequestException as e:
            print("RequestException: ", e)
            sys.exit(1)

        if r.status_code != 200:
            print("Error! HTTP status code: " + str(r.status_code))
            sys.exit(2)

        try:
            data = json.loads(r.content)
        except ValueError as e:
            print("ValueError: ", e)
            sys.exit(3)

        # everything is fine
        show("api call OK")

        # Parse URL
        # From https://varda.oso-pilot.csc.fi/api/v1/toimipisteet/ save in DB https://varda.oso-pilot.csc.fi
        n = url.find("/", url.find("/") + 2)  # find 3rd occurance of "/"
        address = url[:n]

        show("insert data")
        cnt = 0
        for row in data["results"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # Explicitly remove the keys from dict (row) that are not saved in Antero DB
            del row['johtaja']
            del row['url']
            del row['muutos_pvm']
            del row['vaka_jarjestaja']
            del row['toimipaikat']

            # find out which columns to use on insert
            dboperator.resetcolumns(row)

            # flatten arrays/lists
            for col in row:
                if type(row[col]) is list:
                    row[col] = ''.join(map(str, json.dumps(row[col])))

            dboperator.insert(address, schema, table, row)

        show("wrote %d" % (cnt))
        show("ready")

        if data["next"] is None:
            break  # exit while-loop. We are done.
        else:
            url = data["next"]
Esempio n. 13
0
def load(secure, hostname, url, schema, table, postdata, condition, verbose):
    show("begin " + hostname + " " + url + " " + schema + " " + table + " " +
         (postdata or "No postdata") + " " + (condition or ""))

    address = "http://" + hostname + url
    show("load from " + address)

    reqheaders = {'Content-Type': 'application/json'}
    reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen'

    # api credentials from env vars
    if os.getenv("API_USERNAME"):
        show("using authentication")
        apiuser = os.getenv("API_USERNAME")
        apipass = os.getenv("API_PASSWORD")
        reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser +
                                                                    ":" +
                                                                    apipass)

    # automatic POST with (post)data
    #request = urllib.request.Request(address, data=postdata, headers=reqheaders)
    #time=300
    try:
        response = requests.get(address, headers=reqheaders).json()

    except Exception as e:
        show('HTTP GET failed.')
        show('Reason: %s' % (str(e)))
        sys.exit(2)

    else:
        # everything is fine
        show("api call OK")

    # remove data conditionally, otherwise empty
    # merge operation could be considered here...
    if condition:
        show("remove from %s.%s with condition '%s'" %
             (schema, table, condition))
        dboperator.execute("DELETE FROM %s.%s WHERE %s" %
                           (schema, table, condition))
    else:
        show("empty %s.%s" % (schema, table))
        dboperator.empty(schema, table)

    show("insert data")
    cnt = 0

    for i in response:
        cnt += 1
        # make "columnlist" (type has no meaning as we're not creating table)
        row = makerow()
        # setup dboperator so other calls work
        dboperator.columns(row)
        row["avoinKK"] = jv(i, "avoinKK")
        row["db"] = jv(i, "db")
        row["erillinenOpintoOikeus"] = jv(i, "erillinenOpintoOikeus")
        row["kansainvVaihto"] = jv(i, "kansainvVaihto")
        row["kkYhteistyo"] = jv(i, "kkYhteistyo")
        row["koodi"] = jv(i, "koodi")
        row["koulutustyyppi"] = jv(i, "koulutustyyppi")
        row["kuvaus"] = jv(i, "kuvaus")
        row["lukumaara"] = jv(i, "lukumaara")
        row["luoja"] = jv(i, "luoja")
        row["luontipaivamaara"] = jv(i, "luontipaivamaara")
        row["maahanmValKoulutus"] = jv(i, "maahanmValKoulutus")
        row["oppilaitostunnus"] = jv(i, "oppilaitostunnus")
        row["perustutkOpiskelijat"] = jv(i, "perustutkOpiskelijat")
        row["suorituspaiva"] = jv(i, "suorituspaiva")
        row["tkiHarjoittelunLaajuus"] = jv(i, "tkiHarjoittelunLaajuus")
        row["tkiMuutLaajuus"] = jv(i, "tkiMuutLaajuus")
        row["tkiToiminnanLaajuus"] = jv(i, "tkiToiminnanLaajuus")
        row["ulkomaaharjoittelu"] = jv(i, "ulkomaaharjoittelu")
        row["ulkomailtaHyvLuet"] = jv(i, "ulkomailtaHyvLuet")
        row["vieraskSuoritukset"] = jv(i, "vieraskSuoritukset")
        row["vuosi"] = jv(i, "vuosi")
        row["erikoistumiskoulutus"] = jv(i, "erikoistumiskoulutus")

        dboperator.insert(hostname + url, schema, table, row)
        # show some sign of being alive
        if cnt % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        if cnt % 1000 == 0:
            show("-- %d" % (cnt))
        if verbose: show("%d -- %s" % (cnt, row))
    show("wrote %d" % (cnt))
    show("ready")
Esempio n. 14
0
        show('Error code: %d' % (e.code))
        sys.exit(2)
    except urllib2.URLError, e:
        show('We failed to reach a server.')
        show('Reason: %s' % (e.reason))
        sys.exit(2)
    else:
        # everything is fine
        show("api call OK")

    # remove data conditionally, otherwise empty
    # merge operation could be considered here...
    if condition:
        show("remove from %s.%s with condition '%s'" %
             (schema, table, condition))
        dboperator.execute("DELETE FROM %s.%s WHERE %s" %
                           (schema, table, condition))
    else:
        show("empty %s.%s" % (schema, table))
        dboperator.empty(schema, table)

    show("insert data")
    cnt = 0
    for row in ijson.items(response, 'item'):
        cnt += 1
        # show some sign of being alive
        if cnt % 100 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
        if cnt % 1000 == 0:
            show("-- %d" % (cnt))
        if verbose: show("%d -- %s" % (cnt, row))
def load(url, schema, table, condition):
    """
    Results from ARVO-API can come in multiple pages. If that's the case,
    we need to make multiple requests to the ARVO API, using the "next_url" parameter.
    """

    FIRST_LOOP = True  # This is used to make possible DELETE operation (due to condition) only once.

    while True:
        show("begin " + url + " " + schema + " " + table + " " + (condition or ""))
        show("load from " + url)

        reqheaders = {'Content-Type': 'application/json'}
        # api credentials from env vars
        if os.getenv("API_USERNAME"):
            show("using authentication")
            apiuser = os.getenv("API_USERNAME")
            apipass = os.getenv("API_PASSWORD")
            reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser + ":" + apipass)

        try:
            r = requests.get(url, headers=reqheaders)
        except requests.exceptions.RequestException as e:
            print e
            sys.exit(1)

        if r.status_code != 200:
            print "Error! HTTP status code: " + str(r.status_code)
            sys.exit(2)

        try:
            result = json.loads(r.content)
        except ValueError as e:
            print e
            sys.exit(3)

        if "pagination" not in result or "data" not in result:
            print "Error! Received JSON-data not valid."
            sys.exit(4)

        # everything is fine
        show("api call OK")

        """
          Received data e.g.

            {
              "data": [
                {
                  "taustakysymykset": true,
                  "koulutustoimija": "xxxx",
                  "vastausid": 1111,
                  "kyselykertaid": 123,
                  "kysely_alkupvm": "2016-11-29T22:00:00Z",
                  "suorituskieli": "fi",
                  "tutkinto_fi": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_en": "xxxx",
                  "kysymys_en": "xxxx",
                  "koulutustoimija_en": "xxxx",
                  "tutkintotunnus": "xxxx",
                  "numerovalinta": 2,
                  "valmistavan_koulutuksen_oppilaitos_sv": "xxxx",
                  "kysymys_sv": "xxxx",
                  "kysymysjarjestys": 0,
                  "monivalintavaihtoehto": "xxxx",
                  "kysymysid": 1234,
                  "valmistavan_koulutuksen_oppilaitos_fi": "xxxx",
                  "kysely_en": "xxxx",
                  "vastaustyyppi": "xxxx",
                  "kysymysryhma": "xxxx",
                  "tutkinto_en": null,
                  "kunta": null,
                  "kysymysryhmaid": 110,
                  "kysymysryhmajarjestys": 0,
                  "vaihtoehto": null,
                  "kysymys_fi": "Ik",
                  "vastaajaid": 123,
                  "kyselyid": 111,
                  "valmistavan_koulutuksen_jarjestaja_fi": "xxxx",
                  "kysymysryhma_en": "xxxx",
                  "kysely_sv": "xxxx",
                  "kysymysryhma_fi": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja_sv": "xxxx",
                  "vastausaika": "2017-02-05T22:00:00Z",
                  "tunnus": "xxxx",
                  "valmistavan_koulutuksen_jarjestaja": "xxxx",
                  "koulutustoimija_fi": "xxxx",
                  "kysely_loppupvm": null,
                  "koulutusmuoto": null,
                  "kyselykerta": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos_en": "xxxx",
                  "valtakunnallinen": true,
                  "tutkinto_sv": null,
                  "koulutustoimija_sv": "xxxx",
                  "valmistavan_koulutuksen_oppilaitos": "xxxx",
                  "kysely_fi": "xxxx",
                  "kysymysryhma_sv": "xxxx"
                }
              ],
              "pagination": {
                "next_url": "null"
              }
            }

        """

        address = url.split("?")[0]  # Save in DB only the part before ?-mark: https://arvo.csc.fi/api/vipunen?alkupvm=2018-01-01&loppupvm=2018-02-01

        # remove data conditionally, otherwise empty
        # merge operation could be considered here...
        if FIRST_LOOP:  # This is done only on the first go (no matter if Arvo returns one or multiple pages)
            if condition:
                show("remove from %s.%s with condition '%s'" % (schema, table, condition))
                dboperator.execute("DELETE FROM %s.%s WHERE %s" % (schema, table, condition))
            else:
                show("empty %s.%s" % (schema, table))
                dboperator.empty(schema, table)

        show("insert data")
        cnt = 0
        for row in result["data"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # find out which columns to use on insert
            dboperator.resetcolumns(row)

            # flatten arrays/lists
            for col in row:
                if type(row[col]) is list:
                    row[col] = ''.join(map(str, json.dumps(row[col])))

            dboperator.insert(address, schema, table, row)

        show("wrote %d" % (cnt))
        show("ready")

        if result["pagination"]["next_url"] == "null" or result["pagination"]["next_url"] == None:
            break  # exit while-loop. We are done.
        else:
            url = result["pagination"]["next_url"]
            FIRST_LOOP = False  # Do not make the possible DELETE-operation anymore!
Esempio n. 16
0
def load(secure,hostname,url,schema,table,postdata,condition,verbose):
  show("begin "+hostname+" "+url+" "+schema+" "+table+" "+(postdata or "No postdata")+" "+(condition or ""))

  address = "http://"+hostname+url
  show("load from "+address)

  reqheaders = {'Content-Type': 'application/json'}
  reqheaders['Caller-Id'] = '1.2.246.562.10.2013112012294919827487.vipunen'

  # api credentials from env vars
  if os.getenv("API_USERNAME"):
    show("using authentication")
    apiuser = os.getenv("API_USERNAME")
    apipass = os.getenv("API_PASSWORD")
    reqheaders['Authorization'] = 'Basic %s' % base64.b64encode(apiuser+":"+apipass)

  # automatic POST with (post)data
  #request = urllib.request.Request(address, data=postdata, headers=reqheaders)
  #time=300
  try:
    response = requests.get(address, headers=reqheaders).json()

  except Exception as e:
    show('HTTP GET failed.')
    show('Reason: %s'%(str(e)))
    sys.exit(2)

  else:
    # everything is fine
    show("api call OK")

  # remove data conditionally, otherwise empty
  # merge operation could be considered here...
  if condition:
    show("remove from %s.%s with condition '%s'"%(schema,table,condition))
    dboperator.execute("DELETE FROM %s.%s WHERE %s"%(schema,table,condition))
  else:
    show("empty %s.%s"%(schema,table))
    dboperator.empty(schema,table)

  show("insert data")
  cnt=0

  for i in response:
    cnt+=1
    # make "columnlist" (type has no meaning as we're not creating table)
    row = makerow()
    # setup dboperator so other calls work
    dboperator.columns(row)
    row["edellinenSyysolo"] = jv(i,"edellinenSyysolo")
    row["hetu"] = jv(i,"hetu")
    row["ika"] = jv(i,"ika")
    row["kevat"] = jv(i,"kevat")
    row["loAloituspvm"] = jv(i,"loAloituspvm")
    row["olok"] = jv(i,"olok")
    row["olos"] = jv(i,"olos")
    row["ooAloituspvm"] = jv(i,"ooAloituspvm")
    row["opSummaKunOtePankista"] = jv(i,"opSummaKunOtePankista")
    row["opiskelijaavain"] = jv(i,"opiskelijaavain")
    row["opiskeluoikeusavain"] = jv(i,"opiskeluoikeusavain")
    row["opiskeluoikeusid"] = jv(i,"opiskeluoikeusid")
    row["oppilaitos"] = jv(i,"oppilaitos")
    row["oppilaitostunnus"] = jv(i,"oppilaitostunnus")
    row["pankkiKumuEnnen55"] = jv(i,"pankkiKumuEnnen55")
    row["pankkiSaldo55"] = jv(i,"pankkiSaldo55")
    row["regDatum"] = jv(i,"regDatum")
    row["sukupuoli"] = jv(i,"sukupuoli")
    row["summa"] = jv(i,"summa")
    row["suorittanut27"] = jv(i,"suorittanut27")
    row["suorittanut55ilmanPankkia"] = jv(i,"suorittanut55ilmanPankkia")
    row["suorittanut55pankinAvulla"] = jv(i,"suorittanut55pankinAvulla")
    row["syys"] = jv(i,"syys")
    row["tkoodi"] = jv(i,"tkoodi")
    row["uusiOpisk"] = jv(i,"uusiOpisk")
    row["uusiOpiskKevat"] = jv(i,"uusiOpiskKevat")
    row["uuttaPankkiin"] = jv(i,"uuttaPankkiin")
    row["vuosi"] = jv(i,"vuosi")
    dboperator.insert(hostname+url,schema,table,row)
     # show some sign of being alive
    if cnt%100 == 0:
      sys.stdout.write('.')
      sys.stdout.flush()
    if cnt%1000 == 0:
      show("-- %d" % (cnt))
    if verbose: show("%d -- %s"%(cnt,row))
  show("wrote %d"%(cnt))
  show("ready")
def load(url, schema, table, condition):
    """
    Results from VARDA-API can come in multiple pages. If that's the case,
    we need to make multiple requests to the VARDA API, using the "next" parameter.
    """

    # First delete all from TABLE -> Start from scratch. TODO: Delete based on condition.
    dboperator.execute("DELETE FROM %s.%s" % (schema, table))

    while True:
        show("begin " + url + " " + schema + " " + table + " " + (condition or ""))
        show("load from " + url)

        reqheaders = {'Content-Type': 'application/json'}
        # api credentials from env vars
        if os.getenv("VARDA_API_KEY"):
            show("using authentication")
            api_key = os.getenv("VARDA_API_KEY")
            reqheaders['Authorization'] = 'Token %s' % api_key

        try:
            r = requests.get(url, headers=reqheaders)
        except requests.exceptions.RequestException as e:
            print e
            sys.exit(1)

        if r.status_code != 200:
            print "Error! HTTP status code: " + str(r.status_code)
            sys.exit(2)

        try:
            data = json.loads(r.content)
        except ValueError as e:
            print e
            sys.exit(3)

        # everything is fine
        show("api call OK")

        # Parse URL
        # From https://varda.oso-pilot.csc.fi/api/v1/toimipisteet/ save in DB https://varda.oso-pilot.csc.fi
        n = url.find("/", url.find("/") + 2)  # find 3rd occurance of "/"
        address = url[:n]

        show("insert data")
        cnt = 0
        for row in data["results"]:
            cnt += 1
            # show some sign of being alive
            if cnt % 100 == 0:
                sys.stdout.write('.')
                sys.stdout.flush()
            if cnt % 1000 == 0:
                show("-- %d" % (cnt))

            # Explicitly remove the keys from dict (row) that are not saved in Antero DB
            del row['johtaja']
            del row['url']
            del row['muutos_pvm']
            del row['vaka_jarjestaja']
            del row['toimipaikat']

            # find out which columns to use on insert
            dboperator.resetcolumns(row)

            # flatten arrays/lists
            for col in row:
                if type(row[col]) is list:
                    row[col] = ''.join(map(str, json.dumps(row[col])))

            dboperator.insert(address, schema, table, row)

        show("wrote %d" % (cnt))
        show("ready")

        if data["next"] is None:
            break  # exit while-loop. We are done.
        else:
            url = data["next"]