def status_export(self, interval=1800): # Get status of export download request = api.get(self.id_)[0].to_json() # If pending, wait for it to jump to in progress or successful. time_now = datetime.datetime.now() while request['status'] == 'PENDING': if self.verbose: print 'API returned {} for job {}. Retrying in 30 seconds.'.format( request['status'], self.course_slug) time.sleep(30) if (datetime.datetime.now() - time_now).total_seconds() >= 600: if self.log: logging.error( "API request has been returning status 'PENDING' for 10 minutes. Skipping this request." ) raise ApiResolve( "API request has been returning status 'PENDING' for 10 minutes. Skipping this request." ) request = api.get(self.id_)[0].to_json() # If in progress, check for every interval. time_now = datetime.datetime.now() while request['status'] == 'IN_PROGRESS': if self.verbose: print 'API returned {} for job {}. Retrying in {} minutes.'.format( request['status'], self.course_slug, str(interval / 60)) time.sleep(interval) # Check if (datetime.datetime.now() - time_now).total_seconds() >= 28800: if self.log: logging.error( "API request has been returning status 'IN_PROGRESS' for 8 hours. Skipping this request." ) raise ApiResolve( "API request has been returning status 'IN_PROGRESS' for 8 hours. Skipping this request." ) request = api.get(self.id_)[0].to_json() if request['status'] == 'SUCCESSFUL': # if clickstream data, return download links, else return download link for if request['exportType'] == 'RESEARCH_EVENTING': # Create request for links CLL = ClickstreamDownloadLinksRequest(course_id=self.course_id, interval=self.interval) links = api.get_clickstream_download_links(CLL) return links else: # This is table (sql) data. return [request["downloadLink"]] elif request['status'] == 'FAILED': if self.log: logging.error("API returned 'job failed'.") raise ApiResolve("API returned 'job failed'") else: if self.log: logging.error("Unknown status <{}> returned by api".format( request['status'])) raise ApiResolve("Unknown status returned by api")
def get(args): """ Get the details and status of a data export request using a job id. """ export_request = api.get(args.id)[0] export_request_info = [ ['Export Job Id:', export_request.id], ['Export Type:', export_request.export_type_display], ['Status:', export_request.status], ['Scope Context:', export_request.scope_context], ['Scope Id:', export_request.scope_id], ['Scope Name:', export_request.scope_name], ['User id Hashing: ', export_request.formatted_anonymity_level], ['Created:', export_request.created_at.strftime('%c')] ] if export_request.schema_names: export_request_info.append( ['Schemas:', export_request.schema_names_display]) if export_request.download_link: export_request_info.append( ['Download Link:', export_request.download_link]) if export_request.interval: export_request_info.append( ['Interval:', ' to '.join(export_request.interval)]) print(tabulate(export_request_info, tablefmt="plain"))
def download(args): """ Download a data export job using a request id. """ try: export_request = api.get(args.id)[0] dest = args.dest utils.download(export_request, dest) except Exception as err: logging.error('Download failed with exception:\n{}'.format(err)) raise
def issue_requests(config_json): range_first = config_json["fist_clickstream_date"] if config_json[ "first_time"] == "yes" else config_json["last_clickstream_date"] range_last = str(date.today() - timedelta( days=1)) if datetime.today().hour > 3 else str(date.today() - timedelta(days=2)) print(range_first, range_last) TABLES_ISSUE_REQUEST = ExportRequest.ExportRequest( partner_id=int(config_json["eit_digital_id"]), statement_of_purpose=config_json["purpose_for_requests"] + config_json["tables_purpose"], export_type=EXPORT_TYPE_TABLES, anonymity_level=ANONYMITY_LEVEL_COORDINATOR, schema_names=SCHEMA_NAMES) CLICKSTREAM_ISSUE_REQUEST = ExportRequest.ExportRequest( partner_id=int(config_json["eit_digital_id"]), statement_of_purpose=config_json["purpose_for_requests"] + config_json["clickstream_purpose"], export_type=EXPORT_TYPE_CLICKSTREAM, anonymity_level=ANONYMITY_LEVEL_COORDINATOR, interval=[range_first, range_last]) registered_tables_request = None registered_clickstream_request = None exit_with_error = False error_message = 'Error requesting the ' registered_tables_request, registered_clickstream_request = get_from_todays( TABLES_ISSUE_REQUEST, CLICKSTREAM_ISSUE_REQUEST) print registered_tables_request, registered_clickstream_request while (registered_tables_request == None or registered_clickstream_request == None): try: # try to get the table request # table_request_id if registered_tables_request == None: print 'TIME TO REQUEST TABLES' tables_response = api.post(TABLES_ISSUE_REQUEST) print 'THIS IS THE RESPONSE OF THE TABLE-POST' + str( tables_response[0]) registered_tables_request = api.get( tables_response[0].to_json()["id"])[0] print 'THIS IS THE REGISTERED TABLE-REQUEST ' + str( registered_tables_request) except Exception as e: print 'THIS IS THE MESSAGE: ' print e.message print 'THAT WAS THE MESSAGE' if int(str(e.message.split(' ')[0])) != 429: #time to wait exit_with_error = True error_message += 'tables: ' + e.message break else: print 'YOU NEED TO WAIT FOR TABLES' try: # try to get the clickstream request if registered_clickstream_request == None: print 'ABOUT TO REQUEST CLICKSTREAM' print str(CLICKSTREAM_ISSUE_REQUEST.to_json()) clickstream_response = api.post(CLICKSTREAM_ISSUE_REQUEST) print 'THIS IS THE RESPONSE OF THE CLICKSTREAM-POST: ' + str( clickstream_response) print 'THIS IS THE RESPONSE OF THE CLICKSTREAM-POST: ' + str( clickstream_response[0]) print type(clickstream_response[0]) registered_clickstream_request = api.get( clickstream_response[0].to_json()["id"])[0] print 'THIS IS THE REGISTERED CLICKSTREAM-REQUEST ' + str( registered_clickstream_request) except Exception as e: print 'THIS IS THE MESSAGE: ' print e.message print 'THAT WAS THE MESSAGE: ' if int(e.message.split(' ')[0]) != 429: exit_with_error = True error_message += 'clickstream: ' + e.message break else: print 'YOU NEED TO WAIT FOR EVENTS' #wait for the necessary time print registered_tables_request, registered_clickstream_request if (registered_tables_request == None or registered_clickstream_request == None): time.sleep(60) if exit_with_error: print error_message sys.exit() else: return registered_tables_request, registered_clickstream_request, range_last
def download_requests(tables_request, clickstream_request, config_json): print 'ENTERING METHOD DOWNLOAD REQUEST' downloaded_tables = False downloaded_clickstream = False exit_with_error = False error_message = 'Error downloading the ' table_file = '' clickstream_files = [] tables_registered_req = None clickstream_registered_req = None while (not downloaded_tables or not downloaded_clickstream): if not downloaded_tables: try: print type(tables_request) print type(tables_request.to_json()) tables_registered_req = api.get(tables_request.to_json()['id']) except Exception as e: print e.message exit_with_error = True error_message += 'tables, at updating request' break if tables_registered_req[0].to_json()['status'] == 'SUCCESSFUL': try: # try to download the table request table_file = utils.download( tables_registered_req[0], config_json['initial_download_location']) downloaded_tables = True except Exception as e: exit_with_error = True error_message += 'tables' break else: print 'MUST WAIT FOR THE TABLES' if not downloaded_clickstream: try: clickstream_registered_req = api.get( clickstream_request.to_json()['id']) except: exit_with_error = True error_message += 'tables, at updating request' break print str(clickstream_registered_req) if clickstream_registered_req[0].to_json( )['status'] == 'SUCCESSFUL': try: # try to download the clickstream request clickstream_files = utils.download( clickstream_registered_req[0], config_json['initial_download_location']) downloaded_clickstream = True except Exception as e: exit_with_error = True error_message += 'clickstream, at downloading' break else: print 'MUST WAIT FOR THE CLICKSTREAM' #wait for the necessary time if (not downloaded_tables or not downloaded_clickstream): time.sleep(60) if exit_with_error: print error_message sys.exit() else: return table_file, clickstream_files, config_json
from datetime import date, datetime, timedelta from courseraresearchexports.exports import api, utils from courseraresearchexports.models import ExportRequest from courseraresearchexports.constants.api_constants import ANONYMITY_LEVEL_COORDINATOR, EXPORT_TYPE_TABLES, EXPORT_TYPE_CLICKSTREAM, SCHEMA_NAMES range_first = str(date.today() - timedelta(days=5)) range_last = str(date.today() - timedelta(days=1)) CLICKSTREAM_ISSUE_REQUEST = ExportRequest.ExportRequest( partner_id=253, statement_of_purpose="purpose_for_requests", export_type=EXPORT_TYPE_CLICKSTREAM, anonymity_level=ANONYMITY_LEVEL_COORDINATOR, interval=[range_first, range_last]) clickstream_response = api.post(CLICKSTREAM_ISSUE_REQUEST) print 'THIS IS THE RESPONSE OF THE POST' + str(clickstream_response[0]) registered_clickstream_request = api.get( clickstream_response[0].to_json()["id"]) print 'THIS IS THE REGISTERED REQUEST ' + str( registered_clickstream_request[0])