def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT localStorage.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, localStorage.page_url, localStorage.scope, localStorage.KEY, localStorage.value FROM localStorage INNER JOIN crawl ON localStorage.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # flash cookie model local_storage_model = LocalStorageModel() local_storage_model.id = row[0] local_storage_model.crawl_id = crawl_model local_storage_model.page_url = row[12] local_storage_model.scope = row[13] local_storage_model.KEY = row[14] local_storage_model.value = row[15] model_list.append(local_storage_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, CrawlHistory.command, CrawlHistory.arguments, CrawlHistory.bool_success, CrawlHistory.dtg FROM CrawlHistory INNER JOIN crawl ON CrawlHistory.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[1] task_model.start_time = row[2] task_model.manager_params = row[3] task_model.openwpm_version = row[4] task_model.browser_version = row[5] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[0] crawl_model.task_id = task_model crawl_model.browser_params = row[6] crawl_model.screen_res = row[7] crawl_model.ua_string = row[8] crawl_model.finished = row[9] crawl_model.start_time = row[10] # crawl history model crawl_history_model = CrawlHistoryModel() crawl_history_model.crawl_id = crawl_model crawl_history_model.command = row[11] crawl_history_model.arguments = row[12] crawl_history_model.bool_success = row[13] crawl_history_model.timestamp = row[14] model_list.append(crawl_history_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT site_visits.visit_id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.site_url FROM site_visits INNER JOIN crawl ON site_visits.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # crawl history model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[0] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[12] model_list.append(site_visits_model) cursor.close() self.disconnect() return model_list
def users(event, context): """ taskに所属するuser一覧を返す """ try: logger.info(event) if not event['pathParameters']: raise errors.BadRequest('Bad request') task_id = event['pathParameters']['id'] # taskの取得 try: task = TaskModel.get(task_id) except TaskModel.DoesNotExist: raise errors.NotFound('The task does not exist') if not task.userIds: task.userIds = [] # usersの取得 try: users = task.get_users() except UserModel.DoesNotExist as e: logger.exception(e) raise errors.InternalError('Internal server error') return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps( { 'statusCode': 200, 'taskId': task_id, 'users': [dict(user) for user in users] } ) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500)
def done_undone(event, context): try: logger.info(event) if not event['pathParameters']: raise errors.BadRequest('Bad request') task_id = event['pathParameters']['id'] # done or undone で ture or false if re.match('.*/done$', event['resource']): flag = True else: flag = False # taskを取得 try: task = TaskModel.get(task_id) except TaskModel.DoesNotExist: raise errors.NotFound('The task does not exist') # taskを更新 try: task.status_update(flag) except UpdateError as e: logger.exception(e) raise errors.InternalError('Internal server error') return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps({ 'statusCode': 200, 'task': dict(task) }) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500)
def delete(event, context): """ delteFlagをfalseに変更 """ try: logger.info(event) if not event['pathParameters']: raise errors.BadRequest('Bad request') task_id = event['pathParameters']['id'] # taskの取得 try: task = TaskModel.get(task_id) except TaskModel.DoesNotExist: raise errors.NotFound('The task does not exist') # taskの削除 try: task.logic_delete() except UpdateError as e: logger.exception(e) raise errors.InternalError('Internal server error') return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps({'statusCode': 200}) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500)
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = "SELECT * FROM task" cursor.execute(query) rows = cursor.fetchall() for row in rows: model = TaskModel() model.task_id = row[0] model.start_time = row[1] model.manager_params = row[2] model.openwpm_version = row[3] model.browser_version = row[4] model_list.append(model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time FROM crawl INNER JOIN task on crawl.task_id = task.task_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[1] task_model.start_time = row[2] task_model.manager_params = row[3] task_model.openwpm_version = row[4] task_model.browser_version = row[5] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[0] crawl_model.task_id = task_model crawl_model.browser_params = row[6] crawl_model.screen_res = row[7] crawl_model.ua_string = row[8] crawl_model.finished = row[9] crawl_model.start_time = row[10] model_list.append(crawl_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT flash_cookies.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.visit_id, site_visits.site_url, flash_cookies.domain, flash_cookies.filename, flash_cookies.local_path, flash_cookies.key, flash_cookies.content FROM flash_cookies INNER JOIN crawl ON flash_cookies.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id INNER JOIN site_visits ON flash_cookies.visit_id = site_visits.visit_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # site visits model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[12] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[13] # flash cookie model flash_cookie_model = FlashCookiesModel() flash_cookie_model.id = row[0] flash_cookie_model.crawl_id = crawl_model flash_cookie_model.visit_id = site_visits_model flash_cookie_model.domain = row[14] flash_cookie_model.filename = row[15] flash_cookie_model.local_path = row[16] flash_cookie_model.key = row[17] flash_cookie_model.content = row[18] model_list.append(flash_cookie_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT profile_cookies.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.visit_id, site_visits.site_url, profile_cookies.baseDomain, profile_cookies.name, profile_cookies.value, profile_cookies.host, profile_cookies.path, profile_cookies.expiry, profile_cookies.accessed, profile_cookies.creationTime, profile_cookies.isSecure, profile_cookies.isHttpOnly FROM profile_cookies INNER JOIN crawl ON profile_cookies.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id INNER JOIN site_visits ON profile_cookies.visit_id = site_visits.visit_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # site visits model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[12] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[13] # flash cookie model profile_cookies_model = ProfileCookiesModel() profile_cookies_model.id = row[0] profile_cookies_model.crawl_id = crawl_model profile_cookies_model.visit_id = site_visits_model profile_cookies_model.baseDomain = row[14] profile_cookies_model.name = row[15] profile_cookies_model.value = row[16] profile_cookies_model.host = row[17] profile_cookies_model.path = row[18] profile_cookies_model.expiry = row[19] profile_cookies_model.accessed = row[20] profile_cookies_model.creationTime = row[21] profile_cookies_model.isSecure = row[22] profile_cookies_model.isHttpOnly = row[23] model_list.append(profile_cookies_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT javascript.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.visit_id, site_visits.site_url, javascript.script_url, javascript.script_line, javascript.script_col, javascript.func_name, javascript.script_loc_eval, javascript.call_stack, javascript.symbol, javascript.operation, javascript.value, javascript.arguments, javascript.time_stamp FROM javascript INNER JOIN crawl ON javascript.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id INNER JOIN site_visits ON javascript.visit_id = site_visits.visit_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # site visits model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[12] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[13] # flash cookie model javascript_model = JavascriptModel() javascript_model.id = row[0] javascript_model.crawl_id = crawl_model javascript_model.visit_id = site_visits_model javascript_model.script_url = row[14] javascript_model.script_line = row[15] javascript_model.script_col = row[16] javascript_model.func_name = row[17] javascript_model.script_loc_eval = row[18] javascript_model.call_stack = row[19] javascript_model.symbol = row[20] javascript_model.operation = row[21] javascript_model.value = row[22] javascript_model.arguments = row[23] javascript_model.time_stamp = row[24] model_list.append(javascript_model) cursor.close() self.disconnect() return model_list
def update(event, context): """ taskをupdate nameおよびdescription """ try: logger.info(event) if not (event['body'] and event['pathParameters']): raise errors.BadRequest('Bad request') data = json.loads(event['body']) # dataから不要なattributeを削除 data = {k: v for k, v in data.items() if k in ['name', 'description']} if not data: raise errors.BadRequest('Bad request') task_id = event['pathParameters']['id'] # taskの取得 try: task = TaskModel.get(task_id) except TaskModel.DoesNotExist: raise errors.NotFound('The task does not exist') if 'name' in data: task.name = data['name'] if 'description' in data: task.description = data['description'] if not task.userIds: task.userIds = [] try: task.save() except InvalidNameError as e: logger.exception(e) raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2]))) except InvalidDescriptionError as e: logger.exception(e) raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2]))) except PutError as e: logger.exception(e) raise errors.InternalError('Internal server error') return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps({ 'statusCode': 200, 'task': dict(task) }) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500)
def add_remove(event, context): """ userをtaskに追加 """ try: logger.info(event) if not (event['pathParameters'] and event['body']): raise errors.BadRequest('Bad request') data = json.loads(event['body']) # dataから不要なattributeを削除 data = { k: v for k, v in data.items() if k == 'userIds' } if not data: raise errors.BadRequest('Bad request') else: if type(data['userIds']) != list: raise errors.BadRequest('"userIds" attribute must be array') task_id = event['pathParameters']['id'] user_ids = data['userIds'] # taskの取得 try: task = TaskModel.get(task_id) except TaskModel.DoesNotExist: raise errors.NotFound('The task does not exist') # add or remove if re.match('.*/add$', event['resource']): flag = True else: flag = False # taskのuserIdsを更新 try: task.user_ids_update(user_ids, flag) except InvalidUserError as e: logger.exception(e) raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2]))) except UpdateError as e: logger.exception(e) raise errors.InternalError('Internal server error') task = TaskModel.get(task_id) return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps( { 'statusCode': 200, 'task': dict(task) } ) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500) except Exception as e: logger.exception(e) return { 'statusCode': 500, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps( { 'statusCode': 500, 'errorMessage': 'Internal server error' } ) }
def create(event, context): """ taskを作成 name, descriptionは必須 userIdsは任意 """ try: logger.info(event) if not (event['body']): raise errors.BadRequest('Bad request') body = json.loads(event['body']) validate_attributes(body) if not 'userIds' in body: body['userIds'] = [] task = TaskModel(id=str(uuid.uuid1()), name=body['name'], description=body['description'], taskListId=body['taskListId'], userIds=body['userIds']) # taskの保存 try: task.save() except InvalidNameError as e: logger.exception(e) raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2]))) except InvalidDescriptionError as e: logger.exception(e) raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2]))) except InvalidTaskListError as e: logger.exception(e) if str(e.with_traceback( sys.exc_info()[2])) == 'The taskList does not exist': raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2]))) else: raise errors.BadRequest( str(e.with_traceback(sys.exc_info()[2]))) except InvalidUserError as e: logger.exception(e) if str(e.with_traceback(sys.exc_info()[2]) ) == 'The userIds contains a invalid userId does not exist': raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2]))) else: raise errors.BadRequest( str(e.with_traceback(sys.exc_info()[2]))) except PutError as e: logger.exception(e) raise errors.InternalError('Internal server error') return { 'statusCode': 200, 'headers': { 'Access-Control-Allow-Origin': '*', 'Content-Type': 'application/json' }, 'body': json.dumps({ 'statusCode': 200, 'task': dict(task) }) } except errors.BadRequest as e: logger.exception(e) return build_response(e, 400) except errors.NotFound as e: logger.exception(e) return build_response(e, 404) except errors.InternalError as e: logger.exception(e) return build_response(e, 500)
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT http_requests.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.visit_id, site_visits.site_url, http_requests.url, http_requests.top_level_url, http_requests.method, http_requests.referrer, http_requests.headers, http_requests.channel_id, http_requests.is_XHR, http_requests.is_frame_load, http_requests.is_full_page, http_requests.is_third_party_channel, http_requests.is_third_party_window, http_requests.triggering_origin, http_requests.loading_origin, http_requests.loading_href, http_requests.req_call_stack, http_requests.content_policy_type, http_requests.post_body, http_requests.time_stamp FROM http_requests INNER JOIN crawl ON http_requests.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id INNER JOIN site_visits ON http_requests.visit_id = site_visits.visit_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # site visits model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[12] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[13] # flash cookie model http_requests_model = HttpRequestsModel() http_requests_model.id = row[0] http_requests_model.crawl_id = crawl_model http_requests_model.visit_id = site_visits_model http_requests_model.url = row[14] http_requests_model.top_level_url = row[15] http_requests_model.method = row[16] http_requests_model.referrer = row[17] http_requests_model.headers = row[18] http_requests_model.channel_id = row[19] http_requests_model.is_XHR = row[20] http_requests_model.is_frame_load = row[21] http_requests_model.is_full_page = row[22] http_requests_model.is_third_party_channel = row[23] http_requests_model.is_third_party_window = row[24] http_requests_model.triggering_origin = row[25] http_requests_model.loading_origin = row[26] http_requests_model.loading_href = row[27] http_requests_model.req_call_stack = row[28] http_requests_model.content_policy_type = row[29] http_requests_model.post_body = row[30] http_requests_model.time_stamp = row[31] model_list.append(http_requests_model) cursor.close() self.disconnect() return model_list
def select(self): model_list = [] self.connect() cursor = self.connection.cursor() query = """ SELECT http_responses.id, crawl.crawl_id, task.task_id, task.start_time, task.manager_params, task.openwpm_version, task.browser_version, crawl.browser_params, crawl.screen_res, crawl.ua_string, crawl.finished, crawl.start_time, site_visits.visit_id, site_visits.site_url, http_responses.url, http_responses.method, http_responses.referrer, http_responses.response_status, http_responses.response_status_text, http_responses.is_cached, http_responses.headers, http_responses.channel_id, http_responses.location, http_responses.time_stamp, http_responses.content_hash FROM http_responses INNER JOIN crawl ON http_responses.crawl_id = crawl.crawl_id INNER JOIN task ON crawl.task_id = task.task_id INNER JOIN site_visits ON http_responses.visit_id = site_visits.visit_id """ cursor.execute(query) rows = cursor.fetchall() for row in rows: # task model task_model = TaskModel() task_model.task_id = row[2] task_model.start_time = row[3] task_model.manager_params = row[4] task_model.openwpm_version = row[5] task_model.browser_version = row[6] # crawl model crawl_model = CrawlModel() crawl_model.crawl_id = row[1] crawl_model.task_id = task_model crawl_model.browser_params = row[7] crawl_model.screen_res = row[8] crawl_model.ua_string = row[9] crawl_model.finished = row[10] crawl_model.start_time = row[11] # site visits model site_visits_model = SiteVisitsModel() site_visits_model.visit_id = row[12] site_visits_model.crawl_id = crawl_model site_visits_model.site_url = row[13] # flash cookie model http_responses_model = HttpResponsesModel() http_responses_model.id = row[0] http_responses_model.crawl_id = crawl_model http_responses_model.visit_id = site_visits_model http_responses_model.url = row[14] http_responses_model.method = row[15] http_responses_model.referrer = row[16] http_responses_model.response_status = row[17] http_responses_model.response_status_text = row[18] http_responses_model.is_cached = row[19] http_responses_model.headers = row[20] http_responses_model.channel_id = row[21] http_responses_model.location = row[22] http_responses_model.time_stamp = row[23] http_responses_model.content_hash = row[24] model_list.append(http_responses_model) cursor.close() self.disconnect() return model_list
def add_task(self, task_title: str, the_list: ListModel): task_id = self.__create_id() task = TaskModel(task_id, task_title, the_list.id_) self.redis_connection.incr("tasks:index") self.redis_connection.rpush("list:%s:tasks" % the_list.id_, task.id_) return self.save_task(task)