コード例 #1
0
ファイル: create_feed.py プロジェクト: sunlightlabs/fara
def add_document(url_info):
    url = str(url_info['url']).strip()
    reg_id = url_info['reg_id']

    print url_info['reg_name']

    if not Registrant.objects.filter(reg_id=reg_id).exists():
        reg = Registrant(reg_id=reg_id, reg_name=url_info['reg_name'])
        reg.save()

    if not Document.objects.filter(url=url).exists():
        document = Document(
            url=url,
            reg_id=url_info['reg_id'],
            doc_type=url_info['doc_type'],
            stamp_date=url_info['stamp_date'],
        )
        document.save()

    if not MetaData.objects.filter(link=url).exists():
        document = Document.objects.get(url=url)
        md = MetaData(
            link=url,
            upload_date=datetime.date.today(),
            reviewed=False,
            processed=False,
            is_amendment=False,
            form=document.id,
        )
        md.save()
コード例 #2
0
ファイル: create_feed.py プロジェクト: rshorey/fara
def add_document(url_info):
    url = str(url_info['url']).strip()
    reg_id = url_info['reg_id']

    print url_info['reg_name']
    
    if not Registrant.objects.filter(reg_id=reg_id).exists():
        reg = Registrant (reg_id=reg_id,
            reg_name = url_info['reg_name']
        )
        reg.save()

    if not Document.objects.filter(url = url).exists():
        document = Document(url = url,
            reg_id = url_info['reg_id'],
            doc_type = url_info['doc_type'],
            stamp_date = url_info['stamp_date'],
        )
        document.save()
        
    if not MetaData.objects.filter(link= url).exists():
        document = Document.objects.get(url = url)
        md = MetaData(link = url,
                        upload_date = datetime.date.today(),
                        reviewed = False,
                        processed = False,
                        is_amendment = False,
                        form = document.id,
        )
        md.save()
コード例 #3
0
ファイル: check_reg.py プロジェクト: sunlightlabs/fara
    def handle(self, pythonpath, verbosity, traceback, settings):
        wrong_contact = 0
        wrong_contribution = 0
        wrong_payment = 0
        wrong_disbursement = 0
        problem_reg = []

        for contact in Contact.objects.all():
            reg_id = str(contact.registrant.reg_id)
            if reg_id not in contact.link:
                print "contact id", contact.id
                print reg_id, contact.link, contact.date, "\n"
                wrong_contact += 1
                if reg_id not in problem_reg:
                    problem_reg.append(reg_id)
        print "%s wrong contacts" % (wrong_contact)

        for contribution in Contribution.objects.all():
            try:
                reg_id = str(contribution.registrant.reg_id)

                if reg_id not in contribution.link:
                    print contribution.id
                    print reg_id, contribution.link, contribution.date
                    wrong_contribution += 1

                    link = contribution.link
                    real_reg_id = re.sub('-', '', link[25:29])
                    real_reg_id = re.sub('S', '', real_reg_id)
                    real_reg_id = re.sub('L', '', real_reg_id)
                    new_reg = Registrant(reg_id=real_reg_id)
                    print "new_reg"
                    contribution.registrant = new_reg
                    contribution.save()
                    print contribution.id

                    if reg_id not in problem_reg:
                        problem_reg.append(reg_id)
            except:
                print "error"
                bad_ids = [3375, 2579]
                link = contribution.link
                real_reg_id = re.sub('-', '', link[25:29])
                real_reg_id = re.sub('S', '', real_reg_id)
                real_reg_id = re.sub('L', '', real_reg_id)
                if int(real_reg_id) not in bad_ids:
                    new_reg = Registrant(reg_id=real_reg_id)
                    print "new_reg", Registrant
                else:
                    print "Why are you not fixed?", contribution.id

                print contribution.id
        print "%s wrong contributions" % (wrong_contribution)

        for payment in Payment.objects.all():
            reg_id = str(payment.registrant.reg_id)
            if reg_id not in payment.link:
                print "payment id ", payment.id
                print reg_id, payment.link, payment.date, "\n"
                wrong_payment += 1
                if reg_id not in problem_reg:
                    problem_reg.append(reg_id)
        print "%s wrong payments " % (wrong_payment)

        for disbursement in Disbursement.objects.all():
            reg_id = str(disbursement.registrant.reg_id)
            if reg_id not in disbursement.link:
                print "dis id ", disbursement.id
                print reg_id, disbursement.link, disbursement.date, "\n"
                wrong_disbursement += 1
                if reg_id not in problem_reg:
                    problem_reg.append(reg_id)
        print "%s wrong disbursements" % (wrong_disbursement)

        print "contact", wrong_contact
        print "contribution", wrong_contribution
        print "payment", wrong_payment
        print "bad registrants", problem_reg
コード例 #4
0
ファイル: merge_feeds.py プロジェクト: matthewdgreen/fara
    def handle(self, *args, **options):
        for doc in HistoricalDoc.objects.all():
            link = doc.document_link
            related_fara_metadata = MetaData.objects.filter(link=link)
            if len(related_fara_metadata) == 0:
                #skip for now
                #we may want to add the metadata
                continue


            #look for the registrant
            hist_reln = doc.historical_relationship
            registrant_no = hist_reln.registrant_no
            registrant = Registrant.objects.filter(reg_id=registrant_no)
            if len(registrant) == 1:
                registrant = registrant[0]
            elif len(registrant) == 0:
                registrant = Registrant(reg_id=registrant_no,
                                        reg_name=hist_reln.registrant)
                registrant.save()
            else:
                #there's more tha one registrant with the same ID
                #what should we do?
                print("Multiple registrants with that ID, moving on!")
                continue


            #look for the client
            names = create_search_names(hist_reln.principal)
            for name in names:
                clients = Client.objects.filter(client_name__iexact=name)
                if len(clients) > 0:
                    break

            if len(clients) == 1:
                client = clients[0]
            elif len(clients) > 1:
                #multiple matching clients, help! skipping for now
                print("Multiple clients with that name, moving on")
            else:
                location_text = hist_reln.location_represented

                #create the client
                #but first we have to match the location.

                #clean up known location mismatches
                location_dict = {'SOMALI DEMOCRATIC REPUBLIC' : 'SOMALIA',
                                'KOREA REPUBLIC OF' : 'SOUTH KOREA',
                                'BOSNIA-HERZEGOVINA' : 'BOSNIA AND HERZEGOVINA'}
                if location_text in location_dict:
                    location_text = location_dict[location_text]

                location = Location.objects.filter(location__iexact=location_text)
                if len(location) == 1:
                    location = location[0]
                else:
                    print "Unknown location {}".format(location_text)
                    continue
                client = Client(location=location,
                                client_name=name.strip(),
                                address1=hist_reln.address,
                                state=hist_reln.state)
                #only going to save client if it actually needs
                #to be added

            for md in related_fara_metadata:

                new_registrant = False

                #with registrants we can use their id system to dedup
                #so we'll just add them if they aren't in there

                if not registrant in md.registrant_set.all():
                    md.registrant_set.add(registrant)
                    md.save()
                    new_registrant = True            
                    registrant.meta_data.add(link)
                #for clients we have to be more careful since there's no id
                #so we'll only add clients if the client_set is empty
                #otherwise we may add duplicates
                #and presumably only human-entered data is in the db and it's good?
                if len(md.client_set.all()) == 0:
                    client.save() #we're good to save now
                    md.client_set.add(client)
                    md.save()
                    for r in md.registrant_set.all():
                        cr = ClientReg(client_id=client,
                                reg_id=r)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
                elif new_registrant:
                    for c in md.client_set.all():
                        cr = ClientReg(client_id=c,
                                reg_id=registrant)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
コード例 #5
0
    def handle(self, *args, **options):
        for doc in HistoricalDoc.objects.all():
            link = doc.document_link
            related_fara_metadata = MetaData.objects.filter(link=link)
            if len(related_fara_metadata) == 0:
                #skip for now
                #we may want to add the metadata
                continue

            #look for the registrant
            hist_reln = doc.historical_relationship
            registrant_no = hist_reln.registrant_no
            registrant = Registrant.objects.filter(reg_id=registrant_no)
            if len(registrant) == 1:
                registrant = registrant[0]
            elif len(registrant) == 0:
                registrant = Registrant(reg_id=registrant_no,
                                        reg_name=hist_reln.registrant)
                registrant.save()
            else:
                #there's more tha one registrant with the same ID
                #what should we do?
                print("Multiple registrants with that ID, moving on!")
                continue

            #look for the client
            names = create_search_names(hist_reln.principal)
            for name in names:
                clients = Client.objects.filter(client_name__iexact=name)
                if len(clients) > 0:
                    break

            if len(clients) == 1:
                client = clients[0]
            elif len(clients) > 1:
                #multiple matching clients, help! skipping for now
                print("Multiple clients with that name, moving on")
            else:
                location_text = hist_reln.location_represented

                #create the client
                #but first we have to match the location.

                #clean up known location mismatches
                location_dict = {
                    'SOMALI DEMOCRATIC REPUBLIC': 'SOMALIA',
                    'KOREA REPUBLIC OF': 'SOUTH KOREA',
                    'BOSNIA-HERZEGOVINA': 'BOSNIA AND HERZEGOVINA'
                }
                if location_text in location_dict:
                    location_text = location_dict[location_text]

                location = Location.objects.filter(
                    location__iexact=location_text)
                if len(location) == 1:
                    location = location[0]
                else:
                    print "Unknown location {}".format(location_text)
                    continue
                client = Client(location=location,
                                client_name=name.strip(),
                                address1=hist_reln.address,
                                state=hist_reln.state)
                #only going to save client if it actually needs
                #to be added

            for md in related_fara_metadata:

                new_registrant = False

                #with registrants we can use their id system to dedup
                #so we'll just add them if they aren't in there

                if not registrant in md.registrant_set.all():
                    md.registrant_set.add(registrant)
                    md.save()
                    new_registrant = True
                    registrant.meta_data.add(link)
                #for clients we have to be more careful since there's no id
                #so we'll only add clients if the client_set is empty
                #otherwise we may add duplicates
                #and presumably only human-entered data is in the db and it's good?
                if len(md.client_set.all()) == 0:
                    client.save()  #we're good to save now
                    md.client_set.add(client)
                    md.save()
                    for r in md.registrant_set.all():
                        cr = ClientReg(client_id=client, reg_id=r)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()
                elif new_registrant:
                    for c in md.client_set.all():
                        cr = ClientReg(client_id=c, reg_id=registrant)
                        cr.save()
                        cr.meta_data.add(link)
                        cr.save()