def build_industries(csv_rows):
    industries_seen = {}
    for (source, code, name, industry, order) in csv.reader(csv_rows):
        try:
            industry = industry.strip()
            source = source.strip()
            order = order.strip()

            if industries_seen.has_key("_".join([industry, order])) or industry == 'industry':
                continue
            else:
                industries_seen["_".join([industry, order])] = 1

            print 'Generating entity for %s, %s, %s' % (source, industry, order)

            attributes = []
            if source == 'NIMSP':
                attributes.append(('urn:nimsp:industry', order))
            else:
                attributes.append(('urn:crp:industry', order))

            build_entity(industry, 'industry', attributes)

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
Exemple #2
0
def build_industries(csv_rows):
    industries_seen = {}
    for (source, code, name, industry, order) in csv.reader(csv_rows):
        try:
            industry = industry.strip()
            source = source.strip()
            order = order.strip()

            if industries_seen.has_key("_".join([industry, order
                                                 ])) or industry == 'industry':
                continue
            else:
                industries_seen["_".join([industry, order])] = 1

            print 'Generating entity for %s, %s, %s' % (source, industry,
                                                        order)

            attributes = []
            if source == 'NIMSP':
                attributes.append(('urn:nimsp:industry', order))
            else:
                attributes.append(('urn:crp:industry', order))

            build_entity(industry, 'industry', attributes)

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
Exemple #3
0
def build_recipients(csv_rows):
    for (name,namespace,id) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s, %s' % (name, namespace, id)
            name = name.strip().decode('utf8', 'replace')
            id = id.strip()
            namespace = namespace.strip()
            
            attributes = []    
            if id:
                if namespace == NIMSP_TRANSACTION_NAMESPACE:
                    attr_namespace = 'urn:nimsp:recipient'
                elif namespace == CRP_TRANSACTION_NAMESPACE:
                    attr_namespace = 'urn:crp:recipient'
                else:
                    raise Exception('Unknown namespace: %s' % namespace)
                attributes = [(attr_namespace, id)]
            
            build_entity(name, 'politician', attributes)
            
        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        finally:
            sys.stderr.flush()
            sys.stdout.flush()
 def handle(self, **options):
     print "Adding entities:"
     for (name, committee_id) in new_spenders():
         print "%s: %s" % (name, committee_id)
         build_entity(name, 'organization', [('urn:fec:committee', committee_id)])
         
     c = connection.cursor()
     c.execute(superpac_metadata_stmt)
Exemple #5
0
    def create_politicians(self):
        self.log.info("Starting to find politicians to create...")

        self.cursor.execute(
            'drop table if exists tmp_politicians_{0}'.format(self.today),
            None)
        tmp_sql = """
            create table tmp_politicians_{date} as
                select min(recipient_name) as name, transaction_namespace as namespace, recipient_ext_id as id
                from contribution_contribution
                where
                    recipient_type = 'P'
                    and recipient_name != ''
                    and recipient_ext_id != ''
                    and not exists (select * from matchbox_entityattribute where value = recipient_ext_id)
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = recipient_name and type = 'politician')
                    {namespace_clause}
                group by transaction_namespace, recipient_ext_id
        """.format(date=self.today,
                   namespace_clause=self.get_namespace_clause(),
                   cycle_clause=self.get_cycle_clause())

        self.cursor.execute(tmp_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_politicians_{0} populated.".format(
            self.today))

        self.cursor.execute(
            "select name, namespace, id from tmp_politicians_{0}".format(
                self.today), None)
        results = self.cursor.fetchall()
        transaction.commit()

        if not self.force_pols and len(results) > POLITICIAN_CREATE_MAX_WARN:
            raise EntityManagementError(
                "The number of politicians set to be created is {0}. The max this script will create automatically is {1}."
                .format(len(results), POLITICIAN_CREATE_MAX_WARN))

        for result in results:
            name, namespace, id = result
            if self.dry_run:
                self.log.info("- Would build entity %s|%s|%s" %
                              (name, namespace, id))
            else:
                attributes = []
                if id:
                    if namespace == NIMSP_TRANSACTION_NAMESPACE:
                        attributes.append(('urn:nimsp:recipient', id))
                    elif namespace == CRP_TRANSACTION_NAMESPACE:
                        attributes.append(('urn:crp:recipient', id))
                    else:
                        raise Exception('Unknown namespace: %s' % namespace)

                build_entity(name, 'politician', attributes)

        transaction.commit()
        self.log.info("- Created {0} politician entities.".format(
            len(results)))
Exemple #6
0
    def handle(self, **options):
        print "Adding entities:"
        for (name, committee_id) in new_spenders():
            print "%s: %s" % (name, committee_id)
            build_entity(name, 'organization',
                         [('urn:fec:committee', committee_id)])

        c = connection.cursor()
        c.execute(superpac_metadata_stmt)
    def create_individuals(self):
        self.log.info("Starting to find individuals to create...")
        self.log.debug("This is a debug log")

        self.cursor.execute('drop table if exists tmp_individuals_{0}'.format(self.today), None)
        creation_sql = """
            create table tmp_individuals_{date} as
                select min(name) as name, id from (
                    select min(lobbyist_name) as name, lobbyist_ext_id as id
                    from lobbying_lobbyist
                    inner join lobbying_report using (transaction_id)
                    where
                        lobbyist_name != ''
                        and not exists (select * from matchbox_entityattribute where substring(value for 11) = substring(lobbyist_ext_id for 11))
                        and not exists (select * from matchbox_entityblacklist meb where meb.name = lobbyist_name and type = 'individual')
                        {cycle_clause}
                    group by lobbyist_ext_id

                    union

                    select min(contributor_name) as name, contributor_ext_id as id
                    from contribution_contribution
                    where
                        contributor_name != ''
                        and contributor_ext_id like 'U%'
                        and not exists (select * from matchbox_entityattribute where value = contributor_ext_id)
                        and not exists (select * from matchbox_entityblacklist meb where meb.name = contributor_name and type = 'individual')
                        {namespace_clause}
                        {cycle_clause}
                    group by contributor_ext_id
                )x
                group by id
        """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause())

        self.cursor.execute(creation_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_individuals_%s populated." % self.today)

        self.cursor.execute("select name, id from tmp_individuals_%s" % self.today)
        results = self.cursor.fetchall()
        transaction.rollback()

        if not self.force_indivs and len(results) > INDIVIDUAL_CREATE_MAX_WARN:
            raise EntityManagementError("The number of individuals set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), INDIVIDUAL_CREATE_MAX_WARN))

        for result in results:
            name, crp_id = result
            if self.dry_run:
                self.log.info("- Would build entity {0}|{1}".format(name, crp_id))
            else:
                build_entity(name, 'individual', [('urn:crp:individual', crp_id)])

        transaction.commit()
        self.log.info("- Created {0} individual entities.".format(len(results)))
def build_individuals(csv_rows):
    for (name,crp_id) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s' % (name, crp_id)
            clean_name = name.strip().decode('utf8', 'replace')
            
            build_entity(clean_name, 'individual', [('urn:crp:individual', crp_id)])
            
        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        finally:
            sys.stderr.flush()
            sys.stdout.flush()
def build_individuals(csv_rows):
    for (name, crp_id) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s' % (name, crp_id)
            clean_name = name.strip().decode('utf8', 'replace')

            build_entity(clean_name, 'individual',
                         [('urn:crp:individual', crp_id)])

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        finally:
            sys.stderr.flush()
            sys.stdout.flush()
Exemple #10
0
    def create_politicians(self):
        self.log.info("Starting to find politicians to create...")

        self.cursor.execute('drop table if exists tmp_politicians_{0}'.format(self.today), None)
        tmp_sql = """
            create table tmp_politicians_{date} as
                select min(recipient_name) as name, transaction_namespace as namespace, recipient_ext_id as id
                from contribution_contribution
                where
                    recipient_type = 'P'
                    and recipient_name != ''
                    and recipient_ext_id != ''
                    and not exists (select * from matchbox_entityattribute where value = recipient_ext_id)
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = recipient_name and type = 'politician')
                    {namespace_clause}
                group by transaction_namespace, recipient_ext_id
        """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause())

        self.cursor.execute(tmp_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_politicians_{0} populated.".format(self.today))

        self.cursor.execute("select name, namespace, id from tmp_politicians_{0}".format(self.today), None)
        results = self.cursor.fetchall()
        transaction.commit()

        if not self.force_pols and len(results) > POLITICIAN_CREATE_MAX_WARN:
            raise EntityManagementError("The number of politicians set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), POLITICIAN_CREATE_MAX_WARN))

        for result in results:
            name, namespace, id = result
            if self.dry_run:
                self.log.info("- Would build entity %s|%s|%s" % (name, namespace, id))
            else:
                attributes = []
                if id:
                    if namespace == NIMSP_TRANSACTION_NAMESPACE:
                        attributes.append(('urn:nimsp:recipient', id))
                    elif namespace == CRP_TRANSACTION_NAMESPACE:
                        attributes.append(('urn:crp:recipient', id))
                    else:
                        raise Exception('Unknown namespace: %s' % namespace)

                build_entity(name, 'politician', attributes)

        transaction.commit()
        self.log.info("- Created {0} politician entities.".format(len(results)))
Exemple #11
0
def build_committees(csv_rows):
    for (fec_id, name) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s' % (fec_id, name)

            name = name.strip().decode('utf8', 'replace')
            fec_id = fec_id.strip()

            attributes = []
            attributes.append(('urn:fec:committee', fec_id))

            build_entity(name, 'organization', attributes)

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
Exemple #12
0
def build_organizations(csv_rows):
    for (crp_id, nimsp_id, name) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s, %s' % (crp_id, nimsp_id, name)

            name = name.strip().decode('utf8', 'replace')
            crp_id = crp_id.strip()
            nimsp_id = nimsp_id.strip()

            attributes = []
            if nimsp_id and nimsp_id != '0':
                attributes.append(('urn:nimsp:organization', nimsp_id))
            if crp_id and crp_id != '0':
                attributes.append(('urn:crp:organization', crp_id))

            build_entity(name, 'organization', attributes)

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
def build_organizations(csv_rows):
    for (crp_id, nimsp_id, name) in csv.reader(csv_rows):
        try:
            print 'Generating entity for %s, %s, %s' % (crp_id, nimsp_id, name)

            name = name.strip().decode('utf8', 'replace')
            crp_id = crp_id.strip()
            nimsp_id = nimsp_id.strip()
            
            attributes = []
            if nimsp_id and nimsp_id != '0':
                attributes.append(('urn:nimsp:organization', nimsp_id))
            if crp_id and crp_id != '0':
                attributes.append(('urn:crp:organization', crp_id))
            
            build_entity(name, 'organization', attributes)
            
        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
Exemple #14
0
def build_subindustries(csv_rows):
    for (source, code, name, industry, order) in csv.reader(csv_rows):
        try:
            code = code.strip()
            name = name.strip()
            source = source.strip()

            print 'Generating entity for %s, %s, %s' % (source, code, name)

            attributes = []
            if source == 'NIMSP':
                attributes.append(('urn:nimsp:subindustry', code))
            else:
                attributes.append(('urn:crp:subindustry', code))

            build_entity(name, 'industry', attributes)

        except:
            traceback.print_exception(*sys.exc_info())
            print "!!!!! Skipping Entity: %s !!!!!" % name
        sys.stdout.flush()
Exemple #15
0
    def create_organizations(self):
        self.log.info("Starting to find organizations to create...")

        self.cursor.execute('drop table if exists tmp_lobbying_orgs_{0}'.format(self.today), None)
        tmp_sql = """
            create table tmp_lobbying_orgs_{date} as
                select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.registrant_name) as name
                from lobbying_lobbying l
                where
                    l.use = 't'
                    and registrant_name != ''
                    and not exists (
                        select *
                        from matchbox_entity e
                        inner join matchbox_entityalias a on e.id = a.entity_id
                        where
                            e.type = 'organization'
                            and lower(l.registrant_name) = lower(a.alias)
                    )
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = registrant_name and type = 'organization')
                    {cycle_clause}
                group by lower(registrant_name)

                union

                select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.client_parent_name) as name
                from lobbying_lobbying l
                where
                    l.use = 't'
                    and client_parent_name != ''
                    and not exists (
                        select *
                        from matchbox_entity e
                        inner join matchbox_entityalias a on e.id = a.entity_id
                        where
                            e.type = 'organization'
                            and lower(l.client_parent_name) = lower(a.alias)
                    )
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = client_parent_name and type = 'organization')
                    {cycle_clause}
                group by lower(client_parent_name)
        """.format(date=self.today, cycle_clause=self.get_cycle_clause())

        self.cursor.execute(tmp_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_lobbying_orgs_{0} populated.".format(self.today))

        self.cursor.execute("select name, nimsp_id, crp_id from tmp_lobbying_orgs_{0}".format(self.today))
        results = self.cursor.fetchall()
        transaction.rollback()

        if not self.force_orgs and len(results) > ORGANIZATION_CREATE_MAX_WARN:
            raise EntityManagementError("The number of organizations set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), ORGANIZATION_CREATE_MAX_WARN))

        for result in results:
            name, nimsp_id, crp_id = result
            if self.dry_run:
                self.log.info("- Would build entity {0}".format(result))
            else:
                attributes = []
                if nimsp_id and nimsp_id != '0':
                    attributes.append(('urn:nimsp:organization', nimsp_id))
                if crp_id and crp_id != '0':
                    attributes.append(('urn:crp:organization', crp_id))

                build_entity(name, 'organization', attributes)

        transaction.commit()
        self.log.info("- Created {0} organization entities.".format(len(results)))
Exemple #16
0
    def create_individuals(self):
        self.log.info("Starting to find individuals to create...")
        self.log.debug("This is a debug log")

        self.cursor.execute(
            'drop table if exists tmp_individuals_{0}'.format(self.today),
            None)
        creation_sql = """
            create table tmp_individuals_{date} as
                select min(name) as name, id from (
                    select min(lobbyist_name) as name, lobbyist_ext_id as id
                    from lobbying_lobbyist
                    inner join lobbying_report using (transaction_id)
                    where
                        lobbyist_name != ''
                        and not exists (select * from matchbox_entityattribute where substring(value for 11) = substring(lobbyist_ext_id for 11))
                        and not exists (select * from matchbox_entityblacklist meb where meb.name = lobbyist_name and type = 'individual')
                        {cycle_clause}
                    group by lobbyist_ext_id

                    union

                    select min(contributor_name) as name, contributor_ext_id as id
                    from contribution_contribution
                    where
                        contributor_name != ''
                        and contributor_ext_id like 'U%'
                        and not exists (select * from matchbox_entityattribute where value = contributor_ext_id)
                        and not exists (select * from matchbox_entityblacklist meb where meb.name = contributor_name and type = 'individual')
                        {namespace_clause}
                        {cycle_clause}
                    group by contributor_ext_id
                )x
                group by id
        """.format(date=self.today,
                   namespace_clause=self.get_namespace_clause(),
                   cycle_clause=self.get_cycle_clause())

        self.cursor.execute(creation_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_individuals_%s populated." % self.today)

        self.cursor.execute("select name, id from tmp_individuals_%s" %
                            self.today)
        results = self.cursor.fetchall()
        transaction.rollback()

        if not self.force_indivs and len(results) > INDIVIDUAL_CREATE_MAX_WARN:
            raise EntityManagementError(
                "The number of individuals set to be created is {0}. The max this script will create automatically is {1}."
                .format(len(results), INDIVIDUAL_CREATE_MAX_WARN))

        for result in results:
            name, crp_id = result
            if self.dry_run:
                self.log.info("- Would build entity {0}|{1}".format(
                    name, crp_id))
            else:
                build_entity(name, 'individual',
                             [('urn:crp:individual', crp_id)])

        transaction.commit()
        self.log.info("- Created {0} individual entities.".format(
            len(results)))
Exemple #17
0
    def create_organizations(self):
        self.log.info("Starting to find organizations to create...")

        self.cursor.execute(
            'drop table if exists tmp_lobbying_orgs_{0}'.format(self.today),
            None)
        tmp_sql = """
            create table tmp_lobbying_orgs_{date} as
                select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.registrant_name) as name
                from lobbying_lobbying l
                where
                    l.use = 't'
                    and registrant_name != ''
                    and not exists (
                        select *
                        from matchbox_entity e
                        inner join matchbox_entityalias a on e.id = a.entity_id
                        where
                            e.type = 'organization'
                            and lower(l.registrant_name) = lower(a.alias)
                    )
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = registrant_name and type = 'organization')
                    {cycle_clause}
                group by lower(registrant_name)

                union

                select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.client_parent_name) as name
                from lobbying_lobbying l
                where
                    l.use = 't'
                    and client_parent_name != ''
                    and not exists (
                        select *
                        from matchbox_entity e
                        inner join matchbox_entityalias a on e.id = a.entity_id
                        where
                            e.type = 'organization'
                            and lower(l.client_parent_name) = lower(a.alias)
                    )
                    and not exists (select * from matchbox_entityblacklist meb where meb.name = client_parent_name and type = 'organization')
                    {cycle_clause}
                group by lower(client_parent_name)
        """.format(date=self.today, cycle_clause=self.get_cycle_clause())

        self.cursor.execute(tmp_sql, None)
        transaction.commit()
        self.log.info("- Table tmp_lobbying_orgs_{0} populated.".format(
            self.today))

        self.cursor.execute(
            "select name, nimsp_id, crp_id from tmp_lobbying_orgs_{0}".format(
                self.today))
        results = self.cursor.fetchall()
        transaction.rollback()

        if not self.force_orgs and len(results) > ORGANIZATION_CREATE_MAX_WARN:
            raise EntityManagementError(
                "The number of organizations set to be created is {0}. The max this script will create automatically is {1}."
                .format(len(results), ORGANIZATION_CREATE_MAX_WARN))

        for result in results:
            name, nimsp_id, crp_id = result
            if self.dry_run:
                self.log.info("- Would build entity {0}".format(result))
            else:
                attributes = []
                if nimsp_id and nimsp_id != '0':
                    attributes.append(('urn:nimsp:organization', nimsp_id))
                if crp_id and crp_id != '0':
                    attributes.append(('urn:crp:organization', crp_id))

                build_entity(name, 'organization', attributes)

        transaction.commit()
        self.log.info("- Created {0} organization entities.".format(
            len(results)))