def testCalculateDuplicatesForSingleOrg(self): """Test that calculates properly creates GSoCProposalDuplicate entities for a single organization. """ # skip the initialization step status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) status.status = 'processing' status.put() post_data = {'program_key': self.gsoc.key().id_or_name()} response = self.post(self.CALCULATE_URL, post_data) # must have enqueued itself again successfully self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(n=1) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) # the new task should have a query cursor present params = self.get_tasks()[0]['params'] self.assertTrue(params.has_key('org_cursor')) self.assertEqual(params['program_key'], urllib.quote_plus(self.gsoc.key().id_or_name())) # 2 duplicates should have been created since there are 2 students duplicates = GSoCProposalDuplicate.all().fetch(1000) self.assertLength(duplicates, 2) for dup in duplicates: if dup.student.key() == self.student1.key(): self.assertTrue(dup.is_duplicate) else: self.assertFalse(dup.is_duplicate) status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) self.assertEqual(status.status, 'processing')
def testGetOrCreateStatusForProgram(self): """Tests if a ProposalDuplicateStatus entity for a program is created or set. """ #program has no ProposalDuplicateStatus entity. Check if the entity #is created for the program. program_entity = self.program1 actual_pds = duplicate_logic.getOrCreateStatusForProgram(program_entity) self.assertEqual(actual_pds.program, program_entity) #program has a ProposalDuplicateStatus Entity. program_entity = self.program2 expected_pds = self.gpds actual_pds = duplicate_logic.getOrCreateStatusForProgram(program_entity) self.assertEqual(actual_pds.key(), expected_pds.key())
def testStartEnqueuesWhenRepeatIsFalse(self): """Test that start enqueues 1 calculate task when repeat is false. """ post_data = { 'program_key': self.gsoc.key().id_or_name(), 'repeat': 'no' } response = self.post(self.START_URL, post_data) self.assertEqual(response.status_code, httplib.OK) # Assert that there is only 1 task in the queue and check whether the URL # matches. self.assertTasksInQueue(1) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) tasks = self.get_tasks() task = tasks[0] expected_params = { 'program_key': urllib.quote_plus(self.gsoc.key().id_or_name()) } self.assertEqual(task['params'], expected_params) status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) self.assertEqual(status.status, 'processing')
def testStartEnqueuesWhenRepeatIsTrue(self): """Test that start enqueues 1 calculate task and 1 start task when repeat is set to true. """ post_data = { 'program_key': self.gsoc.key().id_or_name(), 'repeat': 'yes' } response = self.post(self.START_URL, post_data) self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(2) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) self.assertTasksInQueue(n=1, url=self.START_URL) for task in self.get_tasks(): expected_params = { 'program_key': urllib.quote_plus(self.gsoc.key().id_or_name()) } if task['url'] == self.START_URL: expected_params['repeat'] = 'yes' self.assertEqual(task['params'], expected_params) status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) self.assertEqual(status.status, 'processing')
def testCalculateDuplicatesTerminates(self): """Test that calculates terminates properly after going through all orgs. """ # skip the initialization step status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) status.status = 'processing' status.put() post_data = {'program_key': self.gsoc.key().id_or_name()} response = self.post(self.CALCULATE_URL, post_data) # must have enqueued itself again successfully self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(n=1) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) # this data should be used for the second iteration params = self.get_tasks()[0]['params'] for key, value in params.iteritems(): params[key] = urllib.unquote_plus(value) # clean the queue self.clear_task_queue() response = self.post(self.CALCULATE_URL, params) # only 1 org in test data so task should terminate now self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(n=0) # 1 duplicate should be left after task termination duplicates = GSoCProposalDuplicate.all().fetch(1000) self.assertEqual(len(duplicates), 1) dup = duplicates[0] self.assertTrue(dup.is_duplicate) student_key = GSoCProposalDuplicate.student.get_value_for_datastore( dup) self.assertEqual(student_key, self.student1.key.to_old_key()) self.assertEqual(len(dup.duplicates), _FIRST_STUDENT_NUMBER_OF_DUPLICATES) status = duplicates_logic.getOrCreateStatusForProgram(self.program) self.assertEqual(status.status, 'idle')
def testCalculateDuplicatesTerminates(self): """Test that calculates terminates properly after going through all orgs. """ # skip the initialization step status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) status.status = 'processing' status.put() post_data = {'program_key': self.gsoc.key().id_or_name()} response = self.post(self.CALCULATE_URL, post_data) # must have enqueued itself again successfully self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(n=1) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) # this data should be used for the second iteration params = self.get_tasks()[0]['params'] for key, value in params.iteritems(): params[key] = urllib.unquote_plus(value) # clean the queue self.clear_task_queue() response = self.post(self.CALCULATE_URL, params) # only 1 org in test data so task should terminate now self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(n=0) # 1 duplicate should be left after task termination duplicates = GSoCProposalDuplicate.all().fetch(1000) self.assertEqual(len(duplicates), 1) dup = duplicates[0] self.assertTrue(dup.is_duplicate) student_key = GSoCProposalDuplicate.student.get_value_for_datastore(dup) self.assertEqual(student_key, self.student1.key.to_old_key()) self.assertEqual(len(dup.duplicates), _FIRST_STUDENT_NUMBER_OF_DUPLICATES) status = duplicates_logic.getOrCreateStatusForProgram(self.program) self.assertEqual(status.status, 'idle')
def context(self, data, check, mutator): """Returns the context for this page.""" program = data.program q = GSoCProposalDuplicate.all() q.filter('program', program) q.filter('is_duplicate', True) duplicates = [Duplicate(data, duplicate) for duplicate in q.fetch(1000)] duplicates_status = duplicates_logic.getOrCreateStatusForProgram(program) context = { 'page_name': 'Duplicates for %s' %program.name, 'duplicates_status': duplicates_status, 'duplicates': duplicates, } return context
def context(self, data, check, mutator): """Returns the context for this page.""" program = data.program q = GSoCProposalDuplicate.all() q.filter('program', program) q.filter('is_duplicate', True) duplicates = [ Duplicate(data, duplicate) for duplicate in q.fetch(1000) ] duplicates_status = duplicates_logic.getOrCreateStatusForProgram( program) context = { 'page_name': 'Duplicates for %s' % program.name, 'duplicates_status': duplicates_status, 'duplicates': duplicates, } return context
def testStartEnqueuesWhenRepeatIsFalse(self): """Test that start enqueues 1 calculate task when repeat is false. """ post_data = {'program_key': self.gsoc.key().id_or_name(), 'repeat': 'no'} response = self.post(self.START_URL, post_data) self.assertEqual(response.status_code, httplib.OK) # Assert that there is only 1 task in the queue and check whether the URL # matches. self.assertTasksInQueue(1) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) tasks = self.get_tasks() task = tasks[0] expected_params = {'program_key': urllib.quote_plus(self.gsoc.key().id_or_name())} self.assertEqual(task['params'], expected_params) status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) self.assertEqual(status.status, 'processing')
def testStartEnqueuesWhenRepeatIsTrue(self): """Test that start enqueues 1 calculate task and 1 start task when repeat is set to true. """ post_data = {'program_key': self.gsoc.key().id_or_name(), 'repeat': 'yes'} response = self.post(self.START_URL, post_data) self.assertEqual(response.status_code, httplib.OK) self.assertTasksInQueue(2) self.assertTasksInQueue(n=1, url=self.CALCULATE_URL) self.assertTasksInQueue(n=1, url=self.START_URL) for task in self.get_tasks(): expected_params = {'program_key': urllib.quote_plus(self.gsoc.key().id_or_name())} if task['url'] == self.START_URL: expected_params['repeat'] = 'yes' self.assertEqual(task['params'], expected_params) status = duplicates_logic.getOrCreateStatusForProgram(self.gsoc) self.assertEqual(status.status, 'processing')
def calculate(self, request, *args, **kwargs): """Calculates the duplicate proposals in a given program for a student on a per Organization basis. Expects the following to be present in the POST dict: program_key: Specifies the program key name for which to find the duplicate proposals org_cursor: Specifies the organization datastore cursor from which to start the processing of finding the duplicate proposals Args: request: Django Request object """ post_dict = request.POST program_key = post_dict.get('program_key') if not program_key: # invalid task data, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program key: %s' % post_dict) program_entity = GSoCProgram.get_by_key_name(program_key) if not program_entity: # invalid program specified, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program specified: %s' % program_key) # get the organization and update the cursor if possible query = soc_org_model.SOCOrganization.query( soc_org_model.SOCOrganization.status == org_model.Status.ACCEPTED, soc_org_model.SOCOrganization.program == ndb.Key.from_old_key(program_entity.key()), soc_org_model.SOCOrganization.slot_allocation > 0) # retrieve the org_cursor from POST data org_cursor = post_dict.get('org_cursor') start_cursor = ( datastore_query.Cursor(urlsafe=org_cursor) if org_cursor else None) organizations, next_cursor, _ = query.fetch_page( 1, start_cursor=start_cursor) if organizations: organization = organizations[0] # get all the proposals likely to be accepted in the program accepted_proposals = ( proposal_logic.getProposalsToBeAcceptedForOrg(organization)) for accepted_proposal in accepted_proposals: q = GSoCProposalDuplicate.all() q.filter('student', accepted_proposal.parent_key()) proposal_duplicate = q.get() if (proposal_duplicate and accepted_proposal.key() not in proposal_duplicate.duplicates): # non-counted (to-be) accepted proposal found proposal_duplicate.duplicates = proposal_duplicate.duplicates + \ [accepted_proposal.key()] proposal_duplicate.is_duplicate = \ len(proposal_duplicate.duplicates) >= 2 if organization.key.to_old_key() not in proposal_duplicate.orgs: proposal_duplicate.orgs = ( proposal_duplicate.orgs + [organization.key.to_old_key()]) else: pd_fields = { 'program': program_entity, 'student': accepted_proposal.parent_key(), 'orgs':[organization.key.to_old_key()], 'duplicates': [accepted_proposal.key()], 'is_duplicate': False } proposal_duplicate = GSoCProposalDuplicate(**pd_fields) proposal_duplicate.put() # Adds a new task that performs duplicate calculation for # the next organization. task_params = { 'program_key': program_key, 'org_cursor': next_cursor.urlsafe() } task_url = '/tasks/gsoc/proposal_duplicates/calculate' new_task = taskqueue.Task(params=task_params, url=task_url) new_task.add() else: # There aren't any more organizations to process. So delete # all the proposals for which there are not more than one # proposal for duplicates property. duplicates_logic.deleteAllForProgram(program_entity, non_dupes_only=True) # update the proposal duplicate status and its timestamp pds_entity = duplicates_logic.getOrCreateStatusForProgram(program_entity) pds_entity.status = 'idle' pds_entity.calculated_on = datetime.datetime.now() pds_entity.put() # return OK return http.HttpResponse()
def calculate(self, request, *args, **kwargs): """Calculates the duplicate proposals in a given program for a student on a per Organization basis. Expects the following to be present in the POST dict: program_key: Specifies the program key name for which to find the duplicate proposals org_cursor: Specifies the organization datastore cursor from which to start the processing of finding the duplicate proposals Args: request: Django Request object """ post_dict = request.POST program_key = post_dict.get('program_key') if not program_key: # invalid task data, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program key: %s' % post_dict) program_entity = GSoCProgram.get_by_key_name(program_key) if not program_entity: # invalid program specified, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program specified: %s' % program_key) # get the organization and update the cursor if possible query = soc_org_model.SOCOrganization.query( soc_org_model.SOCOrganization.status == org_model.Status.ACCEPTED, soc_org_model.SOCOrganization.program == ndb.Key.from_old_key( program_entity.key()), soc_org_model.SOCOrganization.slot_allocation > 0) # retrieve the org_cursor from POST data org_cursor = post_dict.get('org_cursor') start_cursor = (datastore_query.Cursor( urlsafe=org_cursor) if org_cursor else None) organizations, next_cursor, _ = query.fetch_page( 1, start_cursor=start_cursor) if organizations: organization = organizations[0] # get all the proposals likely to be accepted in the program accepted_proposals = ( proposal_logic.getProposalsToBeAcceptedForOrg(organization)) for accepted_proposal in accepted_proposals: q = GSoCProposalDuplicate.all() q.filter('student', accepted_proposal.parent_key()) proposal_duplicate = q.get() if (proposal_duplicate and accepted_proposal.key() not in proposal_duplicate.duplicates): # non-counted (to-be) accepted proposal found proposal_duplicate.duplicates = proposal_duplicate.duplicates + \ [accepted_proposal.key()] proposal_duplicate.is_duplicate = \ len(proposal_duplicate.duplicates) >= 2 if organization.key.to_old_key( ) not in proposal_duplicate.orgs: proposal_duplicate.orgs = ( proposal_duplicate.orgs + [organization.key.to_old_key()]) else: pd_fields = { 'program': program_entity, 'student': accepted_proposal.parent_key(), 'orgs': [organization.key.to_old_key()], 'duplicates': [accepted_proposal.key()], 'is_duplicate': False } proposal_duplicate = GSoCProposalDuplicate(**pd_fields) proposal_duplicate.put() # Adds a new task that performs duplicate calculation for # the next organization. task_params = { 'program_key': program_key, 'org_cursor': next_cursor.urlsafe() } task_url = '/tasks/gsoc/proposal_duplicates/calculate' new_task = taskqueue.Task(params=task_params, url=task_url) new_task.add() else: # There aren't any more organizations to process. So delete # all the proposals for which there are not more than one # proposal for duplicates property. duplicates_logic.deleteAllForProgram(program_entity, non_dupes_only=True) # update the proposal duplicate status and its timestamp pds_entity = duplicates_logic.getOrCreateStatusForProgram( program_entity) pds_entity.status = 'idle' pds_entity.calculated_on = datetime.datetime.now() pds_entity.put() # return OK return http.HttpResponse()
def calculate(self, request, *args, **kwargs): """Calculates the duplicate proposals in a given program for a student on a per Organization basis. Expects the following to be present in the POST dict: program_key: Specifies the program key name for which to find the duplicate proposals org_cursor: Specifies the organization datastore cursor from which to start the processing of finding the duplicate proposals Args: request: Django Request object """ post_dict = request.POST program_key = post_dict.get('program_key') if not program_key: # invalid task data, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program key: %s' % post_dict) program_entity = GSoCProgram.get_by_key_name(program_key) if not program_entity: # invalid program specified, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program specified: %s' % program_key) # get the organization and update the cursor if possible q = GSoCOrganization.all() q.filter('status', 'active') q.filter('scope', program_entity) q.filter('slots >', 0) # retrieve the org_cursor from POST data org_cursor = post_dict.get('org_cursor') if org_cursor: org_cursor = str(org_cursor) q.with_cursor(org_cursor) org_entity = q.get() # update the cursor org_cursor = q.cursor() if org_entity: # get all the proposals likely to be accepted in the program accepted_proposals = proposal_logic.getProposalsToBeAcceptedForOrg(org_entity) for ap in accepted_proposals: student_entity = ap.parent() q = GSoCProposalDuplicate.all() q.filter('student', student_entity) proposal_duplicate = q.get() if proposal_duplicate and ap.key() not in proposal_duplicate.duplicates: # non-counted (to-be) accepted proposal found proposal_duplicate.duplicates = proposal_duplicate.duplicates + \ [ap.key()] proposal_duplicate.is_duplicate = \ len(proposal_duplicate.duplicates) >= 2 if org_entity.key() not in proposal_duplicate.orgs: proposal_duplicate.orgs = proposal_duplicate.orgs + [org_entity.key()] else: pd_fields = { 'program': program_entity, 'student': student_entity, 'orgs':[org_entity.key()], 'duplicates': [ap.key()], 'is_duplicate': False } proposal_duplicate = GSoCProposalDuplicate(**pd_fields) proposal_duplicate.put() # Adds a new task that performs duplicate calculation for # the next organization. task_params = {'program_key': program_key, 'org_cursor': unicode(org_cursor)} task_url = '/tasks/gsoc/proposal_duplicates/calculate' new_task = taskqueue.Task(params=task_params, url=task_url) new_task.add() else: # There aren't any more organizations to process. So delete # all the proposals for which there are not more than one # proposal for duplicates property. duplicates_logic.deleteAllForProgram(program_entity, non_dupes_only=True) # update the proposal duplicate status and its timestamp pds_entity = duplicates_logic.getOrCreateStatusForProgram(program_entity) pds_entity.status = 'idle' pds_entity.calculated_on = datetime.datetime.now() pds_entity.put() # return OK return http.HttpResponse()
def start(self, request, *args, **kwargs): """Starts the task to find all duplicate proposals which are about to be accepted for a single GSoCProgram. Expects the following to be present in the POST dict: program_key: Specifies the program key name for which to find the duplicate proposals repeat: Specifies if a new task that must be performed again an hour later, with the same POST data Args: request: Django Request object """ from soc.logic.helper import timeline as timeline_helper post_dict = request.POST # retrieve the program_key and repeat option from POST data program_key = post_dict.get('program_key') repeat = post_dict.get('repeat') if not (program_key and repeat): # invalid task data, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid task data: %s' % post_dict) # get the program for the given keyname program_entity = GSoCProgram.get_by_key_name(program_key) if not program_entity: # invalid program specified, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program specified: %s' % program_key) # obtain the proposal duplicate status pds_entity = duplicates_logic.getOrCreateStatusForProgram(program_entity) if pds_entity.status == 'idle': # delete all old duplicates duplicates_logic.deleteAllForProgram(program_entity) # pass these data along params as POST to the new task task_params = {'program_key': program_key} task_url = '/tasks/gsoc/proposal_duplicates/calculate' new_task = taskqueue.Task(params=task_params, url=task_url) def txn(): # add a new task that performs duplicate calculation per # organization new_task.add(transactional=True) # update the status of the PDS entity to processing pds_entity.status = 'processing' pds_entity.put() db.RunInTransaction(txn) # Add a new clone of this task that must be performed an hour later because # the current task is part of the task that repeatedly runs but repeat # it before accepted students are announced only. if repeat == 'yes' and timeline_helper.isBeforeEvent( program_entity.timeline, 'accepted_students_announced_deadline'): # pass along these params as POST to the new task task_params = {'program_key': program_key, 'repeat': 'yes'} task_url = '/tasks/gsoc/proposal_duplicates/start' new_task = taskqueue.Task(params=task_params, url=task_url, countdown=3600) new_task.add() # return OK return http.HttpResponse()
def start(self, request, *args, **kwargs): """Starts the task to find all duplicate proposals which are about to be accepted for a single GSoCProgram. Expects the following to be present in the POST dict: program_key: Specifies the program key name for which to find the duplicate proposals repeat: Specifies if a new task that must be performed again an hour later, with the same POST data Args: request: Django Request object """ from soc.logic.helper import timeline as timeline_helper post_dict = request.POST # retrieve the program_key and repeat option from POST data program_key = post_dict.get('program_key') repeat = post_dict.get('repeat') if not (program_key and repeat): # invalid task data, log and return OK return error_handler.logErrorAndReturnOK('Invalid task data: %s' % post_dict) # get the program for the given keyname program_entity = GSoCProgram.get_by_key_name(program_key) if not program_entity: # invalid program specified, log and return OK return error_handler.logErrorAndReturnOK( 'Invalid program specified: %s' % program_key) # obtain the proposal duplicate status pds_entity = duplicates_logic.getOrCreateStatusForProgram( program_entity) if pds_entity.status == 'idle': # delete all old duplicates duplicates_logic.deleteAllForProgram(program_entity) # pass these data along params as POST to the new task task_params = {'program_key': program_key} task_url = '/tasks/gsoc/proposal_duplicates/calculate' new_task = taskqueue.Task(params=task_params, url=task_url) def txn(): # add a new task that performs duplicate calculation per # organization new_task.add(transactional=True) # update the status of the PDS entity to processing pds_entity.status = 'processing' pds_entity.put() db.RunInTransaction(txn) # Add a new clone of this task that must be performed an hour later because # the current task is part of the task that repeatedly runs but repeat # it before accepted students are announced only. if repeat == 'yes' and timeline_helper.isBeforeEvent( program_entity.timeline, 'accepted_students_announced_deadline'): # pass along these params as POST to the new task task_params = {'program_key': program_key, 'repeat': 'yes'} task_url = '/tasks/gsoc/proposal_duplicates/start' new_task = taskqueue.Task(params=task_params, url=task_url, countdown=3600) new_task.add() # return OK return http.HttpResponse()