def add_document(url_info): url = str(url_info['url']).strip() reg_id = url_info['reg_id'] print url_info['reg_name'] if not Registrant.objects.filter(reg_id=reg_id).exists(): reg = Registrant(reg_id=reg_id, reg_name=url_info['reg_name']) reg.save() if not Document.objects.filter(url=url).exists(): document = Document( url=url, reg_id=url_info['reg_id'], doc_type=url_info['doc_type'], stamp_date=url_info['stamp_date'], ) document.save() if not MetaData.objects.filter(link=url).exists(): document = Document.objects.get(url=url) md = MetaData( link=url, upload_date=datetime.date.today(), reviewed=False, processed=False, is_amendment=False, form=document.id, ) md.save()
def add_document(url_info): url = str(url_info['url']).strip() reg_id = url_info['reg_id'] print url_info['reg_name'] if not Registrant.objects.filter(reg_id=reg_id).exists(): reg = Registrant (reg_id=reg_id, reg_name = url_info['reg_name'] ) reg.save() if not Document.objects.filter(url = url).exists(): document = Document(url = url, reg_id = url_info['reg_id'], doc_type = url_info['doc_type'], stamp_date = url_info['stamp_date'], ) document.save() if not MetaData.objects.filter(link= url).exists(): document = Document.objects.get(url = url) md = MetaData(link = url, upload_date = datetime.date.today(), reviewed = False, processed = False, is_amendment = False, form = document.id, ) md.save()
def handle(self, pythonpath, verbosity, traceback, settings): wrong_contact = 0 wrong_contribution = 0 wrong_payment = 0 wrong_disbursement = 0 problem_reg = [] for contact in Contact.objects.all(): reg_id = str(contact.registrant.reg_id) if reg_id not in contact.link: print "contact id", contact.id print reg_id, contact.link, contact.date, "\n" wrong_contact += 1 if reg_id not in problem_reg: problem_reg.append(reg_id) print "%s wrong contacts" % (wrong_contact) for contribution in Contribution.objects.all(): try: reg_id = str(contribution.registrant.reg_id) if reg_id not in contribution.link: print contribution.id print reg_id, contribution.link, contribution.date wrong_contribution += 1 link = contribution.link real_reg_id = re.sub('-', '', link[25:29]) real_reg_id = re.sub('S', '', real_reg_id) real_reg_id = re.sub('L', '', real_reg_id) new_reg = Registrant(reg_id=real_reg_id) print "new_reg" contribution.registrant = new_reg contribution.save() print contribution.id if reg_id not in problem_reg: problem_reg.append(reg_id) except: print "error" bad_ids = [3375, 2579] link = contribution.link real_reg_id = re.sub('-', '', link[25:29]) real_reg_id = re.sub('S', '', real_reg_id) real_reg_id = re.sub('L', '', real_reg_id) if int(real_reg_id) not in bad_ids: new_reg = Registrant(reg_id=real_reg_id) print "new_reg", Registrant else: print "Why are you not fixed?", contribution.id print contribution.id print "%s wrong contributions" % (wrong_contribution) for payment in Payment.objects.all(): reg_id = str(payment.registrant.reg_id) if reg_id not in payment.link: print "payment id ", payment.id print reg_id, payment.link, payment.date, "\n" wrong_payment += 1 if reg_id not in problem_reg: problem_reg.append(reg_id) print "%s wrong payments " % (wrong_payment) for disbursement in Disbursement.objects.all(): reg_id = str(disbursement.registrant.reg_id) if reg_id not in disbursement.link: print "dis id ", disbursement.id print reg_id, disbursement.link, disbursement.date, "\n" wrong_disbursement += 1 if reg_id not in problem_reg: problem_reg.append(reg_id) print "%s wrong disbursements" % (wrong_disbursement) print "contact", wrong_contact print "contribution", wrong_contribution print "payment", wrong_payment print "bad registrants", problem_reg
def handle(self, *args, **options): for doc in HistoricalDoc.objects.all(): link = doc.document_link related_fara_metadata = MetaData.objects.filter(link=link) if len(related_fara_metadata) == 0: #skip for now #we may want to add the metadata continue #look for the registrant hist_reln = doc.historical_relationship registrant_no = hist_reln.registrant_no registrant = Registrant.objects.filter(reg_id=registrant_no) if len(registrant) == 1: registrant = registrant[0] elif len(registrant) == 0: registrant = Registrant(reg_id=registrant_no, reg_name=hist_reln.registrant) registrant.save() else: #there's more tha one registrant with the same ID #what should we do? print("Multiple registrants with that ID, moving on!") continue #look for the client names = create_search_names(hist_reln.principal) for name in names: clients = Client.objects.filter(client_name__iexact=name) if len(clients) > 0: break if len(clients) == 1: client = clients[0] elif len(clients) > 1: #multiple matching clients, help! skipping for now print("Multiple clients with that name, moving on") else: location_text = hist_reln.location_represented #create the client #but first we have to match the location. #clean up known location mismatches location_dict = {'SOMALI DEMOCRATIC REPUBLIC' : 'SOMALIA', 'KOREA REPUBLIC OF' : 'SOUTH KOREA', 'BOSNIA-HERZEGOVINA' : 'BOSNIA AND HERZEGOVINA'} if location_text in location_dict: location_text = location_dict[location_text] location = Location.objects.filter(location__iexact=location_text) if len(location) == 1: location = location[0] else: print "Unknown location {}".format(location_text) continue client = Client(location=location, client_name=name.strip(), address1=hist_reln.address, state=hist_reln.state) #only going to save client if it actually needs #to be added for md in related_fara_metadata: new_registrant = False #with registrants we can use their id system to dedup #so we'll just add them if they aren't in there if not registrant in md.registrant_set.all(): md.registrant_set.add(registrant) md.save() new_registrant = True registrant.meta_data.add(link) #for clients we have to be more careful since there's no id #so we'll only add clients if the client_set is empty #otherwise we may add duplicates #and presumably only human-entered data is in the db and it's good? if len(md.client_set.all()) == 0: client.save() #we're good to save now md.client_set.add(client) md.save() for r in md.registrant_set.all(): cr = ClientReg(client_id=client, reg_id=r) cr.save() cr.meta_data.add(link) cr.save() elif new_registrant: for c in md.client_set.all(): cr = ClientReg(client_id=c, reg_id=registrant) cr.save() cr.meta_data.add(link) cr.save()
def handle(self, *args, **options): for doc in HistoricalDoc.objects.all(): link = doc.document_link related_fara_metadata = MetaData.objects.filter(link=link) if len(related_fara_metadata) == 0: #skip for now #we may want to add the metadata continue #look for the registrant hist_reln = doc.historical_relationship registrant_no = hist_reln.registrant_no registrant = Registrant.objects.filter(reg_id=registrant_no) if len(registrant) == 1: registrant = registrant[0] elif len(registrant) == 0: registrant = Registrant(reg_id=registrant_no, reg_name=hist_reln.registrant) registrant.save() else: #there's more tha one registrant with the same ID #what should we do? print("Multiple registrants with that ID, moving on!") continue #look for the client names = create_search_names(hist_reln.principal) for name in names: clients = Client.objects.filter(client_name__iexact=name) if len(clients) > 0: break if len(clients) == 1: client = clients[0] elif len(clients) > 1: #multiple matching clients, help! skipping for now print("Multiple clients with that name, moving on") else: location_text = hist_reln.location_represented #create the client #but first we have to match the location. #clean up known location mismatches location_dict = { 'SOMALI DEMOCRATIC REPUBLIC': 'SOMALIA', 'KOREA REPUBLIC OF': 'SOUTH KOREA', 'BOSNIA-HERZEGOVINA': 'BOSNIA AND HERZEGOVINA' } if location_text in location_dict: location_text = location_dict[location_text] location = Location.objects.filter( location__iexact=location_text) if len(location) == 1: location = location[0] else: print "Unknown location {}".format(location_text) continue client = Client(location=location, client_name=name.strip(), address1=hist_reln.address, state=hist_reln.state) #only going to save client if it actually needs #to be added for md in related_fara_metadata: new_registrant = False #with registrants we can use their id system to dedup #so we'll just add them if they aren't in there if not registrant in md.registrant_set.all(): md.registrant_set.add(registrant) md.save() new_registrant = True registrant.meta_data.add(link) #for clients we have to be more careful since there's no id #so we'll only add clients if the client_set is empty #otherwise we may add duplicates #and presumably only human-entered data is in the db and it's good? if len(md.client_set.all()) == 0: client.save() #we're good to save now md.client_set.add(client) md.save() for r in md.registrant_set.all(): cr = ClientReg(client_id=client, reg_id=r) cr.save() cr.meta_data.add(link) cr.save() elif new_registrant: for c in md.client_set.all(): cr = ClientReg(client_id=c, reg_id=registrant) cr.save() cr.meta_data.add(link) cr.save()