def build_industries(csv_rows): industries_seen = {} for (source, code, name, industry, order) in csv.reader(csv_rows): try: industry = industry.strip() source = source.strip() order = order.strip() if industries_seen.has_key("_".join([industry, order])) or industry == 'industry': continue else: industries_seen["_".join([industry, order])] = 1 print 'Generating entity for %s, %s, %s' % (source, industry, order) attributes = [] if source == 'NIMSP': attributes.append(('urn:nimsp:industry', order)) else: attributes.append(('urn:crp:industry', order)) build_entity(industry, 'industry', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name sys.stdout.flush()
def build_industries(csv_rows): industries_seen = {} for (source, code, name, industry, order) in csv.reader(csv_rows): try: industry = industry.strip() source = source.strip() order = order.strip() if industries_seen.has_key("_".join([industry, order ])) or industry == 'industry': continue else: industries_seen["_".join([industry, order])] = 1 print 'Generating entity for %s, %s, %s' % (source, industry, order) attributes = [] if source == 'NIMSP': attributes.append(('urn:nimsp:industry', order)) else: attributes.append(('urn:crp:industry', order)) build_entity(industry, 'industry', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name sys.stdout.flush()
def build_recipients(csv_rows): for (name,namespace,id) in csv.reader(csv_rows): try: print 'Generating entity for %s, %s, %s' % (name, namespace, id) name = name.strip().decode('utf8', 'replace') id = id.strip() namespace = namespace.strip() attributes = [] if id: if namespace == NIMSP_TRANSACTION_NAMESPACE: attr_namespace = 'urn:nimsp:recipient' elif namespace == CRP_TRANSACTION_NAMESPACE: attr_namespace = 'urn:crp:recipient' else: raise Exception('Unknown namespace: %s' % namespace) attributes = [(attr_namespace, id)] build_entity(name, 'politician', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name finally: sys.stderr.flush() sys.stdout.flush()
def handle(self, **options): print "Adding entities:" for (name, committee_id) in new_spenders(): print "%s: %s" % (name, committee_id) build_entity(name, 'organization', [('urn:fec:committee', committee_id)]) c = connection.cursor() c.execute(superpac_metadata_stmt)
def create_politicians(self): self.log.info("Starting to find politicians to create...") self.cursor.execute( 'drop table if exists tmp_politicians_{0}'.format(self.today), None) tmp_sql = """ create table tmp_politicians_{date} as select min(recipient_name) as name, transaction_namespace as namespace, recipient_ext_id as id from contribution_contribution where recipient_type = 'P' and recipient_name != '' and recipient_ext_id != '' and not exists (select * from matchbox_entityattribute where value = recipient_ext_id) and not exists (select * from matchbox_entityblacklist meb where meb.name = recipient_name and type = 'politician') {namespace_clause} group by transaction_namespace, recipient_ext_id """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause()) self.cursor.execute(tmp_sql, None) transaction.commit() self.log.info("- Table tmp_politicians_{0} populated.".format( self.today)) self.cursor.execute( "select name, namespace, id from tmp_politicians_{0}".format( self.today), None) results = self.cursor.fetchall() transaction.commit() if not self.force_pols and len(results) > POLITICIAN_CREATE_MAX_WARN: raise EntityManagementError( "The number of politicians set to be created is {0}. The max this script will create automatically is {1}." .format(len(results), POLITICIAN_CREATE_MAX_WARN)) for result in results: name, namespace, id = result if self.dry_run: self.log.info("- Would build entity %s|%s|%s" % (name, namespace, id)) else: attributes = [] if id: if namespace == NIMSP_TRANSACTION_NAMESPACE: attributes.append(('urn:nimsp:recipient', id)) elif namespace == CRP_TRANSACTION_NAMESPACE: attributes.append(('urn:crp:recipient', id)) else: raise Exception('Unknown namespace: %s' % namespace) build_entity(name, 'politician', attributes) transaction.commit() self.log.info("- Created {0} politician entities.".format( len(results)))
def create_individuals(self): self.log.info("Starting to find individuals to create...") self.log.debug("This is a debug log") self.cursor.execute('drop table if exists tmp_individuals_{0}'.format(self.today), None) creation_sql = """ create table tmp_individuals_{date} as select min(name) as name, id from ( select min(lobbyist_name) as name, lobbyist_ext_id as id from lobbying_lobbyist inner join lobbying_report using (transaction_id) where lobbyist_name != '' and not exists (select * from matchbox_entityattribute where substring(value for 11) = substring(lobbyist_ext_id for 11)) and not exists (select * from matchbox_entityblacklist meb where meb.name = lobbyist_name and type = 'individual') {cycle_clause} group by lobbyist_ext_id union select min(contributor_name) as name, contributor_ext_id as id from contribution_contribution where contributor_name != '' and contributor_ext_id like 'U%' and not exists (select * from matchbox_entityattribute where value = contributor_ext_id) and not exists (select * from matchbox_entityblacklist meb where meb.name = contributor_name and type = 'individual') {namespace_clause} {cycle_clause} group by contributor_ext_id )x group by id """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause()) self.cursor.execute(creation_sql, None) transaction.commit() self.log.info("- Table tmp_individuals_%s populated." % self.today) self.cursor.execute("select name, id from tmp_individuals_%s" % self.today) results = self.cursor.fetchall() transaction.rollback() if not self.force_indivs and len(results) > INDIVIDUAL_CREATE_MAX_WARN: raise EntityManagementError("The number of individuals set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), INDIVIDUAL_CREATE_MAX_WARN)) for result in results: name, crp_id = result if self.dry_run: self.log.info("- Would build entity {0}|{1}".format(name, crp_id)) else: build_entity(name, 'individual', [('urn:crp:individual', crp_id)]) transaction.commit() self.log.info("- Created {0} individual entities.".format(len(results)))
def build_individuals(csv_rows): for (name,crp_id) in csv.reader(csv_rows): try: print 'Generating entity for %s, %s' % (name, crp_id) clean_name = name.strip().decode('utf8', 'replace') build_entity(clean_name, 'individual', [('urn:crp:individual', crp_id)]) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name finally: sys.stderr.flush() sys.stdout.flush()
def build_individuals(csv_rows): for (name, crp_id) in csv.reader(csv_rows): try: print 'Generating entity for %s, %s' % (name, crp_id) clean_name = name.strip().decode('utf8', 'replace') build_entity(clean_name, 'individual', [('urn:crp:individual', crp_id)]) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name finally: sys.stderr.flush() sys.stdout.flush()
def create_politicians(self): self.log.info("Starting to find politicians to create...") self.cursor.execute('drop table if exists tmp_politicians_{0}'.format(self.today), None) tmp_sql = """ create table tmp_politicians_{date} as select min(recipient_name) as name, transaction_namespace as namespace, recipient_ext_id as id from contribution_contribution where recipient_type = 'P' and recipient_name != '' and recipient_ext_id != '' and not exists (select * from matchbox_entityattribute where value = recipient_ext_id) and not exists (select * from matchbox_entityblacklist meb where meb.name = recipient_name and type = 'politician') {namespace_clause} group by transaction_namespace, recipient_ext_id """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause()) self.cursor.execute(tmp_sql, None) transaction.commit() self.log.info("- Table tmp_politicians_{0} populated.".format(self.today)) self.cursor.execute("select name, namespace, id from tmp_politicians_{0}".format(self.today), None) results = self.cursor.fetchall() transaction.commit() if not self.force_pols and len(results) > POLITICIAN_CREATE_MAX_WARN: raise EntityManagementError("The number of politicians set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), POLITICIAN_CREATE_MAX_WARN)) for result in results: name, namespace, id = result if self.dry_run: self.log.info("- Would build entity %s|%s|%s" % (name, namespace, id)) else: attributes = [] if id: if namespace == NIMSP_TRANSACTION_NAMESPACE: attributes.append(('urn:nimsp:recipient', id)) elif namespace == CRP_TRANSACTION_NAMESPACE: attributes.append(('urn:crp:recipient', id)) else: raise Exception('Unknown namespace: %s' % namespace) build_entity(name, 'politician', attributes) transaction.commit() self.log.info("- Created {0} politician entities.".format(len(results)))
def build_committees(csv_rows): for (fec_id, name) in csv.reader(csv_rows): try: print 'Generating entity for %s, %s' % (fec_id, name) name = name.strip().decode('utf8', 'replace') fec_id = fec_id.strip() attributes = [] attributes.append(('urn:fec:committee', fec_id)) build_entity(name, 'organization', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name sys.stdout.flush()
def build_organizations(csv_rows): for (crp_id, nimsp_id, name) in csv.reader(csv_rows): try: print 'Generating entity for %s, %s, %s' % (crp_id, nimsp_id, name) name = name.strip().decode('utf8', 'replace') crp_id = crp_id.strip() nimsp_id = nimsp_id.strip() attributes = [] if nimsp_id and nimsp_id != '0': attributes.append(('urn:nimsp:organization', nimsp_id)) if crp_id and crp_id != '0': attributes.append(('urn:crp:organization', crp_id)) build_entity(name, 'organization', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name sys.stdout.flush()
def build_subindustries(csv_rows): for (source, code, name, industry, order) in csv.reader(csv_rows): try: code = code.strip() name = name.strip() source = source.strip() print 'Generating entity for %s, %s, %s' % (source, code, name) attributes = [] if source == 'NIMSP': attributes.append(('urn:nimsp:subindustry', code)) else: attributes.append(('urn:crp:subindustry', code)) build_entity(name, 'industry', attributes) except: traceback.print_exception(*sys.exc_info()) print "!!!!! Skipping Entity: %s !!!!!" % name sys.stdout.flush()
def create_organizations(self): self.log.info("Starting to find organizations to create...") self.cursor.execute('drop table if exists tmp_lobbying_orgs_{0}'.format(self.today), None) tmp_sql = """ create table tmp_lobbying_orgs_{date} as select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.registrant_name) as name from lobbying_lobbying l where l.use = 't' and registrant_name != '' and not exists ( select * from matchbox_entity e inner join matchbox_entityalias a on e.id = a.entity_id where e.type = 'organization' and lower(l.registrant_name) = lower(a.alias) ) and not exists (select * from matchbox_entityblacklist meb where meb.name = registrant_name and type = 'organization') {cycle_clause} group by lower(registrant_name) union select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.client_parent_name) as name from lobbying_lobbying l where l.use = 't' and client_parent_name != '' and not exists ( select * from matchbox_entity e inner join matchbox_entityalias a on e.id = a.entity_id where e.type = 'organization' and lower(l.client_parent_name) = lower(a.alias) ) and not exists (select * from matchbox_entityblacklist meb where meb.name = client_parent_name and type = 'organization') {cycle_clause} group by lower(client_parent_name) """.format(date=self.today, cycle_clause=self.get_cycle_clause()) self.cursor.execute(tmp_sql, None) transaction.commit() self.log.info("- Table tmp_lobbying_orgs_{0} populated.".format(self.today)) self.cursor.execute("select name, nimsp_id, crp_id from tmp_lobbying_orgs_{0}".format(self.today)) results = self.cursor.fetchall() transaction.rollback() if not self.force_orgs and len(results) > ORGANIZATION_CREATE_MAX_WARN: raise EntityManagementError("The number of organizations set to be created is {0}. The max this script will create automatically is {1}.".format(len(results), ORGANIZATION_CREATE_MAX_WARN)) for result in results: name, nimsp_id, crp_id = result if self.dry_run: self.log.info("- Would build entity {0}".format(result)) else: attributes = [] if nimsp_id and nimsp_id != '0': attributes.append(('urn:nimsp:organization', nimsp_id)) if crp_id and crp_id != '0': attributes.append(('urn:crp:organization', crp_id)) build_entity(name, 'organization', attributes) transaction.commit() self.log.info("- Created {0} organization entities.".format(len(results)))
def create_individuals(self): self.log.info("Starting to find individuals to create...") self.log.debug("This is a debug log") self.cursor.execute( 'drop table if exists tmp_individuals_{0}'.format(self.today), None) creation_sql = """ create table tmp_individuals_{date} as select min(name) as name, id from ( select min(lobbyist_name) as name, lobbyist_ext_id as id from lobbying_lobbyist inner join lobbying_report using (transaction_id) where lobbyist_name != '' and not exists (select * from matchbox_entityattribute where substring(value for 11) = substring(lobbyist_ext_id for 11)) and not exists (select * from matchbox_entityblacklist meb where meb.name = lobbyist_name and type = 'individual') {cycle_clause} group by lobbyist_ext_id union select min(contributor_name) as name, contributor_ext_id as id from contribution_contribution where contributor_name != '' and contributor_ext_id like 'U%' and not exists (select * from matchbox_entityattribute where value = contributor_ext_id) and not exists (select * from matchbox_entityblacklist meb where meb.name = contributor_name and type = 'individual') {namespace_clause} {cycle_clause} group by contributor_ext_id )x group by id """.format(date=self.today, namespace_clause=self.get_namespace_clause(), cycle_clause=self.get_cycle_clause()) self.cursor.execute(creation_sql, None) transaction.commit() self.log.info("- Table tmp_individuals_%s populated." % self.today) self.cursor.execute("select name, id from tmp_individuals_%s" % self.today) results = self.cursor.fetchall() transaction.rollback() if not self.force_indivs and len(results) > INDIVIDUAL_CREATE_MAX_WARN: raise EntityManagementError( "The number of individuals set to be created is {0}. The max this script will create automatically is {1}." .format(len(results), INDIVIDUAL_CREATE_MAX_WARN)) for result in results: name, crp_id = result if self.dry_run: self.log.info("- Would build entity {0}|{1}".format( name, crp_id)) else: build_entity(name, 'individual', [('urn:crp:individual', crp_id)]) transaction.commit() self.log.info("- Created {0} individual entities.".format( len(results)))
def create_organizations(self): self.log.info("Starting to find organizations to create...") self.cursor.execute( 'drop table if exists tmp_lobbying_orgs_{0}'.format(self.today), None) tmp_sql = """ create table tmp_lobbying_orgs_{date} as select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.registrant_name) as name from lobbying_lobbying l where l.use = 't' and registrant_name != '' and not exists ( select * from matchbox_entity e inner join matchbox_entityalias a on e.id = a.entity_id where e.type = 'organization' and lower(l.registrant_name) = lower(a.alias) ) and not exists (select * from matchbox_entityblacklist meb where meb.name = registrant_name and type = 'organization') {cycle_clause} group by lower(registrant_name) union select 0::varchar(128) as crp_id, 0 as nimsp_id, max(l.client_parent_name) as name from lobbying_lobbying l where l.use = 't' and client_parent_name != '' and not exists ( select * from matchbox_entity e inner join matchbox_entityalias a on e.id = a.entity_id where e.type = 'organization' and lower(l.client_parent_name) = lower(a.alias) ) and not exists (select * from matchbox_entityblacklist meb where meb.name = client_parent_name and type = 'organization') {cycle_clause} group by lower(client_parent_name) """.format(date=self.today, cycle_clause=self.get_cycle_clause()) self.cursor.execute(tmp_sql, None) transaction.commit() self.log.info("- Table tmp_lobbying_orgs_{0} populated.".format( self.today)) self.cursor.execute( "select name, nimsp_id, crp_id from tmp_lobbying_orgs_{0}".format( self.today)) results = self.cursor.fetchall() transaction.rollback() if not self.force_orgs and len(results) > ORGANIZATION_CREATE_MAX_WARN: raise EntityManagementError( "The number of organizations set to be created is {0}. The max this script will create automatically is {1}." .format(len(results), ORGANIZATION_CREATE_MAX_WARN)) for result in results: name, nimsp_id, crp_id = result if self.dry_run: self.log.info("- Would build entity {0}".format(result)) else: attributes = [] if nimsp_id and nimsp_id != '0': attributes.append(('urn:nimsp:organization', nimsp_id)) if crp_id and crp_id != '0': attributes.append(('urn:crp:organization', crp_id)) build_entity(name, 'organization', attributes) transaction.commit() self.log.info("- Created {0} organization entities.".format( len(results)))