예제 #1
0
 def test_doesnt_pair_subs_with_differing_names(self):
     org = Organization.objects.get(slug='a_pubdef')
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joseph",
         middle_name="H",
         last_name="Conic Intersection")
     factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **a_name),
         organizations=[org],
     )
     factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     self.assertFalse(dups)
예제 #2
0
 def test_doesnt_pair_subs_with_differing_names(self):
     org = Organization.objects.get(slug='a_pubdef')
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joseph",
         middle_name="H",
         last_name="Conic Intersection")
     factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **a_name),
         organizations=[org],
     )
     factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     self.assertFalse(dups)
예제 #3
0
 def test_finds_subs_with_similar_names(self):
     org = Organization.objects.get(slug='a_pubdef')
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabole")
     a = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **a_name),
         organizations=[org],
     )
     b = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     c = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     pair = dups[0]
     for sub in (a, b, c):
         self.assertIn(sub, pair)
예제 #4
0
 def test_finds_subs_with_similar_names(self):
     org = Organization.objects.get(slug='a_pubdef')
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabole")
     a = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **a_name),
         organizations=[org],
     )
     b = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     c = factories.FormSubmissionWithOrgsFactory.create(
         answers=get_answers_for_orgs(
             [org],
             **b_name),
         organizations=[org],
     )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     pair = dups[0]
     for sub in (a, b, c):
         self.assertIn(sub, pair)
예제 #5
0
 def test_finds_subs_with_similar_names(self):
     org = Organization.objects.get(slug=Organizations.ALAMEDA_PUBDEF)
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabole")
     a = mock.FormSubmissionFactory.create(
         answers=mock.fake.alameda_pubdef_answers(**a_name),
         organizations=[org],
         )
     b = mock.FormSubmissionFactory.create(
         answers=mock.fake.alameda_pubdef_answers(**b_name),
         organizations=[org],
         )
     c = mock.FormSubmissionFactory.create(
         answers=mock.fake.alameda_pubdef_answers(**b_name),
         organizations=[org],
         )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     pair = dups[0]
     for sub in (a, b, c):
         self.assertIn(sub, pair)
예제 #6
0
 def test_doesnt_pair_subs_with_differing_names(self):
     org = Organization.objects.get(slug=Organizations.ALAMEDA_PUBDEF)
     a_name = dict(
         first_name="Joe",
         middle_name="H",
         last_name="Parabola")
     b_name = dict(
         first_name="Joseph",
         middle_name="H",
         last_name="Conic Intersection")
     mock.FormSubmissionFactory.create(
         answers=mock.fake.alameda_pubdef_answers(**a_name),
         organizations=[org],
         )
     mock.FormSubmissionFactory.create(
         answers=mock.fake.alameda_pubdef_answers(**b_name),
         organizations=[org],
         )
     dups = SubmissionsService.find_duplicates(
         FormSubmission.objects.all())
     self.assertFalse(dups)
예제 #7
0
 def handle(self, *args, **options):
     dup_sets = SubmissionsService.find_duplicates(
         models.FormSubmission.objects.all())
     self.stdout.write("Found {} duplicate sets".format(len(dup_sets)))
     existing_dup_sets = models.DuplicateSubmissionSet.objects.all()
     self.stdout.write("{} duplicate sets already exist".format(
         existing_dup_sets.count()))
     count_already_existed = 0
     dup_set_extensions = {}
     new_dup_sets = []
     existing_dup_set_lookups = {
         frozenset(dup_set.submissions.all()): dup_set
         for dup_set in existing_dup_sets
     }
     for dup_set in dup_sets:
         found_existing = False
         for lookup_set, existing in existing_dup_set_lookups.items():
             if dup_set == lookup_set:
                 found_existing = True
                 count_already_existed += 1
                 break
             elif dup_set & lookup_set:
                 dup_set_extensions[existing] = dup_set
                 found_existing = True
                 break
         if not found_existing:
             new_dup_sets.append(dup_set)
     for existing, new_dups in dup_set_extensions:
         existing.submissions.add(*new_dups)
     self.stdout.write(
         "{} found duplicate sets were existing".format(
             count_already_existed))
     self.stdout.write("Extended {} existing duplicate sets".format(
         len(dup_set_extensions)))
     for new_set in new_dup_sets:
         new_dup_set_object = models.DuplicateSubmissionSet()
         new_dup_set_object.save()
         new_dup_set_object.submissions.add(*new_set)
     self.stdout.write("Created {} new duplicate sets".format(
         len(new_dup_sets)))
예제 #8
0
 def handle(self, *args, **options):
     dup_sets = SubmissionsService.find_duplicates(
         models.FormSubmission.objects.all())
     self.stdout.write("Found {} duplicate sets".format(len(dup_sets)))
     existing_dup_sets = models.DuplicateSubmissionSet.objects.all()
     self.stdout.write("{} duplicate sets already exist".format(
         existing_dup_sets.count()))
     count_already_existed = 0
     dup_set_extensions = {}
     new_dup_sets = []
     existing_dup_set_lookups = {
         frozenset(dup_set.submissions.all()): dup_set
         for dup_set in existing_dup_sets
     }
     for dup_set in dup_sets:
         found_existing = False
         for lookup_set, existing in existing_dup_set_lookups.items():
             if dup_set == lookup_set:
                 found_existing = True
                 count_already_existed += 1
                 break
             elif dup_set & lookup_set:
                 dup_set_extensions[existing] = dup_set
                 found_existing = True
                 break
         if not found_existing:
             new_dup_sets.append(dup_set)
     for existing, new_dups in dup_set_extensions:
         existing.submissions.add(*new_dups)
     self.stdout.write("{} found duplicate sets were existing".format(
         count_already_existed))
     self.stdout.write("Extended {} existing duplicate sets".format(
         len(dup_set_extensions)))
     for new_set in new_dup_sets:
         new_dup_set_object = models.DuplicateSubmissionSet()
         new_dup_set_object.save()
         new_dup_set_object.submissions.add(*new_set)
     self.stdout.write("Created {} new duplicate sets".format(
         len(new_dup_sets)))