def save_to_csv(self, response, **meta): il = ItemLoader(item=AlMassageTherapyLicensesSpiderItem(), response=response) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'AL_Massage_Therapy_Licenses') il.add_value('url', 'http://www.almtbd.alabama.gov/licensee.aspx') il.add_value('category', meta['category']) il.add_value('company_name', self._getDBA(meta['company_name'])[0]) il.add_value('dba_name', self._getDBA(meta['company_name'])[1]) il.add_value('approved by', meta['approved_by']) il.add_value('permit_lic_no', meta['permit_lic_no']) il.add_value('renewal date', meta['Renewal_Date']) il.add_value('permit_lic_status', meta['permit_lic_status']) il.add_value('location_address_string', meta['location_address_string']) il.add_value('mail_address_string', meta['mailing_address']) il.add_value('person_name', meta['person_name']) il.add_value('person_subtype', meta['person_subtype']) il.add_value('company_phone', meta['company_phone']) il.add_value('company_fax', meta['fax']) il.add_value('permit_lic_eff_date', meta['permit_lic_eff_date']) il.add_value('permit_lic_exp_date', meta['permit_lic_exp_date']) il.add_value('approved date', meta['approved_date']) il.add_value('company_email', meta['email']) il.add_value('company_website', meta['website_address']) il.add_value('permit_lic_desc', meta['permit_lic_desc']) il.add_value('permit_type', 'therapy_license') return il.load_item()
def save_csv(self, response, data_dic): il = ItemLoader(item=HiSosSpiderItem(), response=response) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'HI_SOS') il.add_value('url', 'https://hbe.ehawaii.gov/documents/search.html') il.add_value('permit_type', 'business_license') for k in data_dic: il.add_value(k, data_dic[k]) return il
def save_csv(self,response,data_dic): il = ItemLoader(item=IlAgricultureLicensesSpiderItem()) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'IL_Agriculture_Licenses') il.add_value('permit_type', 'agriculture_license') il.add_value('url', 'https://www2.illinois.gov/sites/agr/licenses/Pages/A-Z-License-List.aspx') for k in data_dic: il.add_value(k,(data_dic[k])) return il
def save_to_csv(self, response, **meta): il = ItemLoader(item=WaKittitasBuildingPermitsSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'WA_Kittitas_Building_Permits') il.add_value( 'url', 'https://www.co.kittitas.wa.us/cds/building/reports.aspx') il.add_value('report date', meta['date']) il.add_value('permit_lic_no', meta['permit_number']) il.add_value('permit_subtype', meta['permit_type']) il.add_value('permit_lic_desc', meta['permit_lic_desc']) add = meta['address'].split(',') if len(add) > 3: meta['address'] = re.split("WA \d+", meta['address'])[0] + re.search( "WA \d+", meta['address']).group() else: if 'PERMIT' in meta['address']: meta['address'] = 'WA' else: meta['address'] = meta['address'] if ':,' in meta['address'] or ':AL,' in meta['address']: meta['address'] = meta['address'].replace(':AL,', ':,') meta['address'] = meta['address'].split(':,')[1] if ',' not in meta['address']: meta['address'] = meta['address'] + ', WA' il.add_value( 'location_address_string', meta['address'].replace('Address:', 'WA').replace('WA, WA', 'WA')) il.add_value('permit_lic_value', meta['valuation']) il.add_value('permit_lic_fee', meta['fees']) if meta['owner_name']: company_names = meta['owner_name'] meta['company_name'] = self._getDBA(company_names)[0] meta['dba_name'] = self._getDBA(company_names)[1] il.add_value('mixed_name', meta['company_name']) il.add_value('dba_name', meta['dba_name']) il.add_value('mixed_subtype', meta['mixed_subtype']) il.add_value('mail_address_string', meta['mailing']) il.add_value('contractor_company', meta['contractor']) il.add_value('contractor_dba', meta['contractor_dba']) if 'T' in meta['parcel_number'] or 'F' in meta[ 'parcel_number'] or 'M' in meta[ 'parcel_number'] or 'B' in meta['parcel_number']: meta['parcel_number'] = meta['parcel_number'].replace( 'B', 'T').replace('M', 'T').replace('F', 'T') il.add_value('parcel #', meta['parcel_number'].split('T')[0]) else: il.add_value('parcel #', meta['parcel_number']) if ':' in meta['issue_date']: il.add_value('permit_lic_eff_date', meta['issue_date'].split(':')[1]) else: il.add_value('permit_lic_eff_date', meta['issue_date']) il.add_value('permit_type', 'building_permit') return il.load_item()
def save_csv(self, response, data_dic): il = ItemLoader(item=MeSosSpiderItem(), link_page=response) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'ME_SOS') il.add_value('permit_type', 'business_license') il.add_value('url', 'https://icrs.informe.org/nei-sos-icrs/ICRS?MainPage=x') for k in data_dic: il.add_value(k, (self.remove_tag(data_dic[k]))) return il
def save_csv(self,response,data_dic): il = ItemLoader(item=AlFoodInspectionsSpiderItem(),response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags,lambda data:re.sub(r'\s+', ' ',data) if data else '',replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'AL_Food_Inspections') il.add_value('url', 'http://www.alabamapublichealth.gov/foodscores/index.html') for k in data_dic: il.add_value(k,(data_dic[k])) return il
class IlHospitalLicensesSpider(ExcelFeedSpider,DataFormatterMixin,LookupDatareaderMixin): name = '1390_il_hospital_licenses' allowed_domains = ['illinois.gov'] start_urls = ['https://data.illinois.gov/dataset/410idph_hospital_directory/resource/9bdedb85-77f3-490a-9bbd-2f3f5f227981'] custom_settings = { 'FILE_NAME':Utils.getRundateFileName('AI-1390_Licenses_Hospital_IL_CurationReady'), 'JIRA_ID':'AI_1390', 'DOWNLOAD_DELAY':5, 'COOKIES_ENABLED':True, 'COOKIES_DEBUG':True, 'HTTPCACHE_ENABLED':False, # 'JOBDIR' : CustomSettings.getJobDirectory('il_hospital_licenses'), 'TOP_HEADER':{ 'company_name': 'Hospitals/End Stage Renal Disease/Pregnancy Termination Specialty Centers', 'company_phone': 'Phone', 'company_subtype': 'Type', 'county': 'County', 'dba_name': '', 'location_address_string': 'Address', 'permit_lic_desc': '', 'permit_lic_exp': 'Exp. Date', 'permit_lic_no': 'License #/Medicare #', 'permit_type': ''}, 'FIELDS_TO_EXPORT':[ 'company_name', 'dba_name','location_address_string','county','company_phone','permit_lic_no','company_subtype','permit_lic_exp','permit_lic_desc','permit_type','sourceName','url', 'ingestion_timestamp' ], 'NULL_HEADERS':['county'] } # Do any adaptations you need here #def adapt_response(self, response): # return response def parse(self, response): yield scrapy.Request('https://data.illinois.gov/dataset/a552f663-74a8-4722-a506-0619e9356062/resource/9bdedb85-77f3-490a-9bbd-2f3f5f227981/download/siqueryinterns-2018-2019illinois.govhospitals-march-2019.xls', callback= self.parse_excel, dont_filter=True,encoding='utf-8') def parse_row(self, response, row): print(row) il = ItemLoader(item=IlHospitalLicensesSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('url', 'https://data.illinois.gov/dataset/410idph_hospital_directory/resource/9bdedb85-77f3-490a-9bbd-2f3f5f227981') il.add_value('sourceName', 'IL_Hospital_Licenses') il.add_value('permit_type', "medical_license") name=self._getDBA(row['Hospitals']) company_name=str(name[0]).replace(' -','') if ' -' in str(name[0]) else name[0] address=self.format__address_4(row['Address'],row['City'],'IL',row['Zipcode']) il.add_value('dba_name', name[1]) il.add_value('permit_lic_no', row.get('License #','')) il.add_value('permit_lic_exp', self.format_date(row.get('Exp. Date','')) if row.get('Exp. Date') else '') il.add_value('company_name', company_name) il.add_value('location_address_string', address) il.add_value('county', row.get('County','')) il.add_value('permit_lic_desc',"Medical License for "+company_name if name[0] else "Medical License") il.add_value('company_phone', row.get('Phone','')) il.add_value('company_subtype', row.get('Type','')) yield il.load_item()
def save_csv(self, response, data_dic): il = ItemLoader(item=OhSosSpiderItem(), response=response) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'OH_SOS') il.add_value( 'url', 'https://www5.sos.state.oh.us/ords/f?p=100:1:::NO:1:P1_TYPE:NAME') il.add_value('permit_type', 'business_license') for k in data_dic: il.add_value(k, data_dic[k]) return il
def parse_details(self, response): tr_list=response.xpath('//*[@id="ctl00_ContentPlaceHolder1_dtgResults"]//tr')[1:] for tr in tr_list: link=tr.xpath('td[10]/a/@href').extract_first() company_name=tr.xpath('td[4]/text()').extract_first() f_name=tr.xpath('td[1]/text()').extract_first() m_name=tr.xpath('td[2]/text()').extract_first() l_name=tr.xpath('td[3]/text()').extract_first() person_name=self.format_name(f_name,m_name,l_name) if company_name and len(company_name) > 2: company_name=company_name else: company_name=person_name if link: link_url='https://alboc.glsuite.us/GLSuiteWeb/Clients/ALBOC/public/'+str(link) parse_res=yield scrapy.Request(url=link_url,dont_filter=True) add=parse_res.xpath('//*[contains(text(),"City")]/following-sibling::td/span/text()').extract_first() state=parse_res.xpath('//*[contains(text(),"State")]/following-sibling::td/span/text()').extract_first() if add and state: location_address_string=add+', '+state else: location_address_string=state permit_lic_no=parse_res.xpath('//*[contains(text(),"License Number")]/following-sibling::td/span/text()').extract_first() permit_subtype=parse_res.xpath('//*[contains(text(),"License Type")]/following-sibling::td/span/text()').extract_first() permit_lic_exp_date=parse_res.xpath('//*[contains(text(),"License Expiration Date")]/following-sibling::td/span/text()').extract_first() permit_lic_status=parse_res.xpath('//*[contains(text(),"License Status")]/following-sibling::td/span/text()').extract_first() disciplinary_action=parse_res.xpath('//*[contains(text(),"Disciplinary Action")]/following-sibling::td/span/text()').extract_first() il = ItemLoader(item=AlCosmetologyLicensesSpiderItem(),response=response) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('url', 'https://alboc.glsuite.us/GLSuiteWeb/Clients/ALBOC/public/VerificationSearch.aspx') il.add_value('sourceName', 'AL_Cosmetology_Licenses') il.add_value('permit_lic_exp_date',permit_lic_exp_date) il.add_value('permit_lic_status',permit_lic_status) il.add_value('person_name', person_name) il.add_value('violation_type', '') il.add_value('disciplinary action', disciplinary_action) il.add_value('permit_lic_desc', ('Cosmetology License for'+' '+str(company_name)) if company_name and len(company_name)>2 else 'Cosmetology License') il.add_value('permit_type', 'cosmetology_license') il.add_value('location_address_string', location_address_string if location_address_string and len(location_address_string) > 2 else 'AL') il.add_value('permit_lic_no', permit_lic_no) il.add_value('company_name', company_name) il.add_value('permit_subtype', permit_subtype) yield il.load_item() pageee=response.xpath('//td[@colspan="10"]/span/following-sibling::a/@href').extract_first() if pageee: page_link=JavaScriptUtils.getValuesFromdoPost(pageee) page_data={'__EVENTTARGET':page_link['__EVENTTARGET'],'__EVENTARGUMENT':page_link['__EVENTARGUMENT'],'__VIEWSTATE':response.xpath('//*[@id="__VIEWSTATE"]/@value').extract_first(), '__VIEWSTATEGENERATOR':response.xpath('//*[@id="__VIEWSTATEGENERATOR"]/@value').extract_first(),'__EVENTVALIDATION':response.xpath('//*[@id="__EVENTVALIDATION"]/@value').extract_first(),'__VIEWSTATEENCRYPTED':response.xpath('//*[@id="__VIEWSTATEENCRYPTED"]/@value').extract_first()} yield scrapy.FormRequest(url=response.url,method='POST',formdata=page_data,callback=self.parse_details,dont_filter=True) elif len(self.search_element_a)>0: yield scrapy.Request(url=self.start_urls[0], callback=self.parse, dont_filter=True) elif len(self.search_element)>0: self.check_first=True yield scrapy.Request(url=self.start_urls[0], callback=self.parse, dont_filter=True)
def save_csv(self, response, data_dic): il = ItemLoader(item=GaHenryBuildingPermitsSpiderItem(), response=response) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'GA_Henry_Building_Permits') il.add_value( 'url', 'https://www.sagesgov.com/henrycounty-ga/Portal/Search.aspx') il.add_value('permit_type', 'building_permit') for k in data_dic: il.add_value(k, (data_dic[k])) return il
def parse_row(self, response, row): il = ItemLoader(item=IlAsbestosWorkerLicensesSpiderItem()) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'IL_Asbestos_Worker_Licenses') il.add_value('url', 'https://data.illinois.gov/dataset/378idph_asbestos_licensed_workers/resource/f3266216-1c0e-4326-acb7-0f4341d1b463') il.add_value('person_address_string', self.format__address_4(row['Expr1'],row['tech_city'],row['tech_state'],row['tech_zip'])) il.add_value('person_name', row['tech_name']+' '+row['LAST_NAME']) il.add_value('permit_lic_desc', 'Asbestos Contractor License') il.add_value('dba_name', '') il.add_value('person_phone', row['Expr2']) il.add_value('county', row['COUNTY']) il.add_value('permit_lic_no', '0'+row['lic_id_number'] if len(row['lic_id_number'])<9 else row['lic_id_number'] ) il.add_value('permit_type', 'asbestos_contractor_license') yield il.load_item()
def save_to_csv(self, response, **meta): il = ItemLoader(item=OrAlcoholServerEducatorLicensesSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'OR_Alcohol_Server_Educator_Licenses') il.add_value('url', meta['url']) il.add_value('type', meta['type_val']) il.add_value('company_name', self._getDBA(meta['company_name'])[0]) il.add_value('dba_name', self._getDBA(meta['company_name'])[1]) il.add_value('classes in/online course in', meta['class_in']) il.add_value('location_address_String', 'OR') il.add_value('company_phone', meta['phone'].replace('Phone:', '')) il.add_value('company_website', meta['email']) return il.load_item()
def parse_pdf(self, response): for row in self.__extractData(response): for col in row: # d = re.search(r"[\d]/[\d]/[\d]$", col['expiration']) # if d: # self.state['items_count'] = self.state.get('items_count', 0) + 1 il = ItemLoader(item=CtForestPractitionerLicenseSpiderItem()) il.default_input_processor = MapCompose( lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value( 'url', 'https://www.depdata.ct.gov/forestry/ForestPractitioner/directry.pdf' ) il.add_value('sourceName', 'CT_Forest_Practitioner_License') il.add_value('person_phone', col['phone']) name = col['f_name'] + ' ' + col['l_name'] il.add_value('person_name', name) if ' ' in col['expiration']: date = col['expiration'].split(' ')[0] e_permit = col['expiration'].split(' ')[1] else: date = col['expiration'] e_permit = '' print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@2", date) il.add_value('permit_lic_exp_date', date) if '490' in e_permit: e_permit = "490- permitted to assist landowners seeking classification of their land as 'Forest Land'" il.add_value('extended permit', e_permit) il.add_value('permit_lic_no', col['cert']) level_desc = col['level'] if level_desc == 'F': level_desc = 'FORESTER' elif level_desc == 'SFPH': level_desc = 'SUPERVISING FOREST PRODUCTS HARVESTER' elif level_desc == 'FPH': level_desc = 'FOREST PRODUCTS HARVESTER' il.add_value('level', col['level']) il.add_value('permit_subtype', level_desc) il.add_value('permit_lic_desc', level_desc) il.add_value('permit_type', 'forester_license') location_address_string = col['address'] + ', ' + col[ 'city'] + ', ' + col['state'] + ' ' + col['zip'] il.add_value('location_address_string', location_address_string) yield il.load_item()
def save_to_csv(self, response, **data_pass): il = ItemLoader(item=VaMecklenburgBuildingPermitsSpiderItem(), response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'VA_Mecklenburg_Building_Permits') il.add_value( 'url', 'https://webpermit.mecklenburgcountync.gov/Default.aspx?PossePresentation=SearchByAddress' ) il.add_value('person_address_string', data_pass['person_address_string']) il.add_value('permit_lic_no', data_pass['permit_lic_no']) il.add_value('master #', data_pass['master #']) il.add_value('submittal #', data_pass['submittal #']) il.add_value('permit_subtype', data_pass['permit_subtype']) il.add_value('permit_lic_status', data_pass['permit_lic_status']) il.add_value('location_address_string', data_pass['location_address_string']) il.add_value('parcel #', data_pass['parcel #']) il.add_value('occupancy_subtype', data_pass['occupancy type']) il.add_value('permit_subtype', data_pass['permit_subtype']) il.add_value('occupancy type', data_pass['occupancy type']) il.add_value('usdc code', data_pass['usdc code']) il.add_value('type of building', data_pass['type of building']) il.add_value('equipment type', data_pass['equipment type']) il.add_value('permit_lic_fee', data_pass['permit_lic_fee']) il.add_value('mixed_name', data_pass['mixed_name']) il.add_value('mixed_subtype', data_pass['mixed_subtype']) il.add_value('mixed_phone', data_pass['mixed_phone']) il.add_value('mixed_contractor_name', data_pass['mixed_contractor_name']) il.add_value('contractor id', data_pass['contractor id']) il.add_value('contractor_phone', data_pass['contractor_phone']) il.add_value('contractor_lic_no', data_pass['contractor_lic_no']) il.add_value('contractor_lic_type', data_pass['contractor_lic_type']) il.add_value('contractor_address_string', data_pass['contractor_address_string']) il.add_value('inspection_id', data_pass['inspection_id']) il.add_value('inspection_subtype', data_pass['inspection_subtype']) il.add_value('inspection_date', data_pass['inspection_date']) il.add_value('inspection_pass_fail', data_pass['inspection_pass_fail']) il.add_value('inspection_type', data_pass['inspection_type']) il.add_value('permit_type', 'building_permit') return il.load_item()
def save_csv(self,response,main_res,permit_lic_no): location_address_string=rem_esc(main_res.xpath("//em[contains(text(),'Location')]/following::text()").extract_first()) plat_lot=rem_esc(''.join(main_res.xpath('//em[contains(text(),"Plat")]/following::text()').extract()[:2])) Owner_name=rem_esc(main_res.xpath("//em[contains(text(),'Owner Name')]/following::text()").extract_first()) corp_owner=rem_esc(main_res.xpath("//em[contains(text(),'Corp Owner')]/following::text()").extract_first()) designer=rem_esc(main_res.xpath("//em[contains(text(),'Designer')]/following::text()").extract_first()) total=rem_esc(main_res.xpath("//em[contains(text(),'Total')]/following::text()").extract_first()) plat='' lot='' sublot='' if plat_lot: if 'Plat' in plat_lot and 'Lot' in plat_lot and 'Sublot' in plat_lot : plat=re.search('Plat.*Lot',plat_lot).group()[4:-3].strip() lot= re.search('Lot.*Sublot',plat_lot).group()[3:-6].strip() sublot=re.search('Sublot.*',plat_lot).group()[6:].strip() elif 'Plat' in plat_lot and 'Lot' in plat_lot : plat=re.search('Plat.*Lot',plat_lot).group()[4:-3].strip() lot= re.search('Lot.*',plat_lot).group()[3:].strip() elif 'Plat' in plat_lot and 'Sublot' in plat_lot : plat=re.search('Plat.*Sublot',plat_lot).group()[4:-6].strip() lot= re.search('Sublot.*',plat_lot).group()[6:].strip() elif 'plat' in plat_lot.lower(): plat=re.search('Plat.*',plat_lot).group()[4:].strip() il = ItemLoader(item=RiSepticSystemLicensesSpiderItem(),response=response) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'RI_Septic_System_Licenses') il.add_value('url', 'https://www.ri.gov/DEM/isdssearch/') il.add_value('permit_lic_no', permit_lic_no) il.add_value('city/town', response.meta['city']) il.add_value('location_address_string', location_address_string.strip()+", RI") il.add_value('plat', (plat.upper().strip())[:-1] if plat.endswith('&') else plat.upper()) il.add_value('lot', (lot.upper().strip())[:-1] if lot.endswith('&') else lot.upper()) il.add_value('sublot', (sublot.upper().strip())[:-1] if sublot.endswith('&') else sublot.upper()) company_name=corp_owner if corp_owner.strip() else Owner_name if Owner_name.strip() else designer if designer.strip() else '' com_name=self._getDBA(company_name) designer_dba=self._getDBA(designer) permit_lic_desc='Septic System Licenses' if com_name[0]: permit_lic_desc+=" For "+com_name[0] il.add_value('company_name', com_name[0] if company[0].strip() else designer[0]) il.add_value('dba_name', com_name[1] if com_name[1] else designer[1]) il.add_value('person_name', designer[0]) il.add_value('total flow','' if 'Not available' in total else total) il.add_value('permit_lic_desc', permit_lic_desc) il.add_value('permit_type', 'utility_license') yield il.load_item()
def parse_main_page(self, response): value1 = json.loads(response.body_as_unicode()) value2 = value1['d'].replace('},{', '}~~{').split('[')[1].split(']')[0] value3 = value2.split('~~') for i in value3: json_acceptable_string = i.replace("\\", "").replace( '"administrative medicine"', "'administrative medicine'") d = json.loads(json_acceptable_string) person_name = d['FullName'] permit_subtype = d['LicenseType'] permit_lic_no = d['License_Number'] if d['Address1'] and d['City'] and d['Zip']: location_address_string = d['Address1'] + ', ' + d[ 'City'] + ' ' + d['Zip'] violation_description = d['Publicfile'] permit_lic_desc = 'Medical License for ' + str(person_name) violation_type = 'professional_violation' vio = d['Action_Date'] if '-' in vio: violation_date = '' else: violation_date = time.strftime( '%m/%d/%Y', time.gmtime(int(re.split('\(|\)', vio)[1]) / 1000.)) violation_subtype = d['ActionTaken'] il = ItemLoader(item=AlMedicalLicenseViolationsSpiderItem(), response=response) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'AL_Medical_License_Violations') il.add_value( 'url', 'https://abme.igovsolution.com/online/Lookups/Publiclogfile.aspx' ) il.add_value('person_name', self._getDBA(person_name)[0]) il.add_value('dba_name', self._getDBA(person_name)[1]) il.add_value('permit_subtype', permit_subtype) il.add_value('permit_lic_no', permit_lic_no) il.add_value('location_address_string', location_address_string) il.add_value('violation_description', violation_description) il.add_value('permit_lic_desc', permit_lic_desc) il.add_value('violation_type', violation_type) il.add_value('violation_date', violation_date) il.add_value('violation_subtype', violation_subtype) il.add_value('permit_type', 'medical_license') yield il.load_item()
def save_to_csv(self, response, data_dic): il = ItemLoader(item=NhSosSpiderItem(), response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('url', 'https://quickstart.sos.nh.gov/online/BusinessInquire') il.add_value('sourceName', 'NH_SOS') il.add_value( 'creation_date', data_dic['business_creation_date'].replace('NOT-AVAILABLE', '')) il.add_value( 'dba_name', data_dic['dba_name'] if data_dic['dba_name'] and len(data_dic['dba_name']) > 3 else self._getDBA(data_dic['company_name'])[1]) il.add_value('non_profit_indicator', data_dic['non_profit_indicator']) il.add_value('mail_address_string', data_dic['mailing_address_string']) il.add_value('status', data_dic['business_status']) il.add_value('citizenship / state of formation', data_dic['state_of_formation']) il.add_value('duration', data_dic['duration']) il.add_value( 'mixed_name', '' if data_dic['mixed_name'] is None else data_dic['mixed_name']) il.add_value('company_name', self._getDBA(data_dic['company_name'])[0]) il.add_value('company_phone', data_dic['phone'].replace('NONE', '')) il.add_value('inactive_date', data_dic['inactive_date']) il.add_value('homestate name', self._getDBA(data_dic['host_name'])[0]) il.add_value('naics_description', data_dic['naics_description']) il.add_value('permit_type', 'business_license') il.add_value('mixed_subtype', data_dic['mixed_subtype']) il.add_value('previous name', data_dic['previous_name']) il.add_value('company_subtype', self._getDBA(data_dic['business_type'])[0]) il.add_value('entity_id', data_dic['business_id']) il.add_value( 'location_address_string', data_dic['location_address_string'] if data_dic['location_address_string'] and len(data_dic['location_address_string']) > 5 else 'NH') il.add_value('company_email', data_dic['business_mail'].replace('NONE', '')) il.add_value('person_address_string', data_dic['person_address_string']) return il
def save_to_csv(self, response, **meta): il = ItemLoader(item=WaKittitasBuildingPermitsSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'WA_Kittitas_Building_Permits') il.add_value( 'url', 'https://www.co.kittitas.wa.us/cds/building/reports.aspx') il.add_value('report date', meta['date']) il.add_value('permit_lic_no', meta['permit_number']) il.add_value('permit_subtype', meta['permit_type']) il.add_value('permit_lic_desc', meta['permit_lic_desc']) if str(meta['address']) == ', WA': meta['address'] = 'WA' il.add_value( 'location_address_string', meta['address'].replace('UNKNOWN', '').replace('UNKNOWN,', '').replace('UNKNOWN ,', '')) if meta['valuation']: meta['valuation'] = meta['valuation'].replace('$0.00', '').replace( '$-', '').replace('-', '') if str(meta['valuation']) == '0': meta['valuation'] = '' il.add_value('permit_lic_value', meta['valuation']) if meta['fees']: meta['fees'] = meta['fees'].replace('$-', '').replace('-', '') il.add_value('permit_lic_fee', meta['fees']) if meta['owner_name']: company_names = meta['owner_name'] meta['owner_name'] = self._getDBA(company_names)[0] meta['dba_name'] = self._getDBA(company_names)[1] il.add_value('mixed_name', meta['owner_name']) il.add_value('dba_name', meta['dba_name']) il.add_value('mixed_subtype', meta['mixed_subtype']) il.add_value('mail_address_string', meta['mailing']) if meta['contractor']: company_names = meta['contractor'] meta['contractor'] = self._getDBA(company_names)[0] meta['contractor_dba'] = self._getDBA(company_names)[1] il.add_value('contractor_company', meta['contractor']) il.add_value('contractor_dba', meta['contractor_dba']) il.add_value('parcel #', meta['parcel_number']) il.add_value('permit_lic_eff_date', meta['issue_date']) il.add_value('permit_type', 'building_permit') return il.load_item()
def getCustom_settings_WithHeader(custom_settings): req_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), settings['NEWSPIDER_MODULE'].split(".")[1], custom_settings['JIRA_ID']) logger.info("spider Path: %s", req_dir) import glob os.chdir(req_dir) if glob.glob("*.xlsx") or glob.glob("*.xls"): headers = Utils.getExcelHeaders( os.path.join(req_dir, glob.glob("*.xlsx")[0])) custom_settings['TOP_HEADER'] = headers['top_header'] custom_settings['FIELDS_TO_EXPORT'] = headers['feed_expo'] custom_settings['NULL_HEADERS'] = headers['null_header'] return custom_settings else: raise Exception( 'rquirement Excel File is missing in path: {}'.format(req_dir))
def save_to_csv(self, response, **meta_data): il = ItemLoader(item=FlClayBuildingPermitsSpiderItem(), response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('permit_lic_no', str(meta_data['permit_lic_no'])) il.add_value('permit_subtype', meta_data['permit_subtype']) il.add_value('permit_lic_desc', meta_data['permit_lic_desc']) il.add_value('location_address_string', meta_data['location_address_string']) il.add_value('permit_lic_eff_date', meta_data['permit_lic_eff_date']) il.add_value('notes', meta_data['notes']) il.add_value('mixed_name', meta_data['mixed_name']) il.add_value('mixed_subtype', meta_data['mixed_subtype']) il.add_value('person_address_string', meta_data['person_address_string']) il.add_value('mixed_contractor_name', meta_data['mixed_contractor_name']) il.add_value('contractor_lic_no', meta_data['contractor_lic_no']) il.add_value('contractor_lic_type', meta_data['contractor_lic_type']) il.add_value('permit_lic_value', meta_data['permit_lic_value']) if meta_data['number_of_stories'] == 'None': il.add_value('number_of_stories', '') else: il.add_value('number_of_stories', meta_data['number_of_stories']) if meta_data['year_built'] == 'None': il.add_value('year_built', '') else: il.add_value('year_built', meta_data['year_built']) il.add_value('inspection_id', meta_data['inspection_id']) il.add_value('inspection_date', meta_data['inspection_date']) il.add_value('inspection_subtype', meta_data['inspection_subtype']) il.add_value('inspection_pass_fail', meta_data['inspection_pass_fail']) il.add_value('inspector_comments', meta_data['inspector_comments']) il.add_value('inspection_type', meta_data['inspection_type']) il.add_value('permit_type', "building_permit") il.add_value( 'url', "http://www.claycountygov.com/about-us/local-government/public-records-search/permits" ) il.add_value('sourceName', 'FL_Clay_Building_Permits') return il
def save_to_csv(self, response, **meta): il = ItemLoader(item=IlChampaignBuildingPermitsSpiderItem(), response=response) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value( 'url', 'http://etrakit.ci.champaign.il.us/etrakit3/Search/permit.aspx') il.add_value('sourceName', 'IL_Champaign_Building_Permits') il.add_value('finaled date', meta['finaled_date']) il.add_value('inspection_date', meta['inspection_date']) il.add_value('contractor_dba', meta['contractor_dba']) il.add_value('mixed_contractor_name', meta['mixed_contractor_name']) il.add_value('dba_name', meta['dba_name']) il.add_value('apn', meta['apn']) il.add_value('permit_lic_fee', meta['permit_lic_fee']) il.add_value('location_address_string', meta['location_address_string']) il.add_value('person_address_string', meta['person_address_string']) il.add_value('subtype', meta['subtype']) il.add_value('permit_subtype', meta['permit_subtype']) il.add_value('inspection_subtype', meta['inspection_subtype']) il.add_value('mixed_subtype', meta['mixed_subtype']) il.add_value('contractor_address_string', meta['contractor_address_string']) il.add_value('permit_lic_status', meta['permit_lic_status']) il.add_value('permit_lic_exp_date', meta['permit_lic_exp_date']) il.add_value('permit_lic_no', meta['permit_lic_no']) il.add_value('notes', meta['notes']) il.add_value('property type', meta['property_type']) il.add_value('mixed_name', meta['mixed_name']) il.add_value('inspection_pass_fail', meta['inspection_pass_fail']) il.add_value('approved date', meta['approved_date']) il.add_value('permit_lic_eff_date', meta['permit_lic_eff_date']) il.add_value('permit_applied_date', meta['permit_applied_date']) il.add_value('scheduled date', meta['scheduled_date']) il.add_value( 'permit_lic_desc', meta['permit_lic_desc'] if meta['permit_lic_desc'] and len(meta['permit_lic_desc']) > 2 else meta['permit_subtype'] if meta['permit_subtype'] and len(meta['permit_subtype']) > 2 else 'Building Permit') il.add_value('inspection_type', meta['inspection_type']) il.add_value('permit_type', 'building_permit') return il.load_item()
def save_csv(self, response, data_dic): il = ItemLoader(item=VaSosSpiderItem()) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'VA_SOS') il.add_value('url', 'http://www.scc.virginia.gov/clk/dwnld.aspx') il.add_value('permit_type', 'business_license') for k in data_dic: il.add_value(k, data_dic[k]) return il # def parse_row(self, response, row): # il = ItemLoader(item=VaSosSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) # #il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) # il.add_value('sourceName', 'va_sos') # il.add_value('url', 'http://www.scc.virginia.gov/clk/dwnld.aspx') # il.add_value('type', row['Type']) # il.add_value('entity_id', row['EntityID']) # il.add_value('company_name', row['Name']) # il.add_value('dba_name', row['']) # il.add_value('status', row['Status']) # il.add_value('statusdate', row['StatusDate']) # il.add_value('duration', row['Duration']) # il.add_value('creation_date', row['IncorpDate']) # il.add_value('incorpstate', row['IncorpState']) # il.add_value('industrycode', row['IndustryCode']) # il.add_value('location_address_string', row['Street1+street2+city+state+zip']) # il.add_value('prinoffeffdate', row['PrinOffEffDate']) # il.add_value('mixed_name', row['RA-Name']) # il.add_value('mixed_subtype', row['']) # il.add_value('person_address_string', row['RA-Street1+street2+city+state+zip']) # il.add_value('ra-effdate', row['RA-EffDate']) # il.add_value('ra-status', row['RA-Status']) # il.add_value('ra-loc', row['RA-Loc']) # il.add_value('stockind', row['StockInd']) # il.add_value('totalshares', row['TotalShares']) # il.add_value('mergerind', row['MergerInd']) # il.add_value('assessind', row['AssessInd']) # il.add_value('stock', row['Stock']) # il.add_value('person_name', row['Officer Name']) # il.add_value('person_subtype', row['Officer Title']) # il.add_value('permit_type', row['']) # return il.load_item()
def save_to_csv(self,response,**det_dic): il = ItemLoader(item=AlForesterLicensesSpiderItem(),response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'AL_Forester_Licenses') il.add_value('url', 'http://asbrf.alabama.gov/vs2k5/rosterofforesters.aspx') il.add_value('permit_type', 'forester_license') # il.add_value('location_address_string', "AL") il.add_value('location_address_string', str(det_dic['person_addrs'])) il.add_value('county', str(det_dic['person_country'])) il.add_value('company_email', det_dic['person_mail_id']) il.add_value('person_subtype', det_dic['person_subtype']) il.add_value('permit_lic_no', det_dic['person_lic_num']) il.add_value('person_name', det_dic['user_name']) il.add_value('permit_lic_desc', det_dic['permit_lic_desc']) il.add_value('dba_name', det_dic['dba_name']) il.add_value('company_name', det_dic['comny_name']) il.add_value('company_phone', det_dic['person_phone_num']) return il
def __init__(self, settings, file_name, delimiter, fields_to_export, null_header, customHeader=False, topHeader=None): self.settings = settings self.file_name = file_name self.delimiter = delimiter self.fields_to_export = fields_to_export self.customHeader = customHeader self.chunk_folder = "chunk_{}".format(Utils.getingestion_timestamp()) self.topHeader = topHeader self.null_header = null_header self.items = [] self.chunk_number = 0 self.job_dir = settings.get('JOB_DIR_PAUSE_RESUME') self.appendMode = False
class AlPodiatryLicensesSpider(ExcelFeedSpider,DataFormatterMixin,LookupDatareaderMixin): name = '1474_al_podiatry_licenses' allowed_domains = ['alabama.gov'] start_urls = ['http://www.podiatryboard.alabama.gov/licensees.aspx'] custom_settings = { 'FILE_NAME':Utils.getRundateFileName('AI-1474_Licenses_Podiatry_AL_CurationReady'), 'JIRA_ID':'AI_1474', # 'JOBDIR' : CustomSettings.getJobDirectory('AlPodiatryLicensesSpider'), 'TOP_HEADER':{'company_name': 'Practice Name','company_phone': 'Office Phone #','controlled substance license #': 'Controlled Substance License #','dba_name': '','location_address_string': 'Address','permit_lic_desc': '','permit_lic_eff_date': 'Effective Date','permit_lic_exp_date': 'Expiration Date','permit_lic_no': 'License #','permit_type': '','person_name': 'Name'}, 'FIELDS_TO_EXPORT':['permit_lic_no','controlled substance license #','person_name','permit_lic_exp_date','permit_lic_eff_date','company_name','dba_name','location_address_string','company_phone','permit_lic_desc','permit_type','url','sourceName','ingestion_timestamp', ], 'NULL_HEADERS':['controlled substance license #'] } def parse(self, response): extension = response.xpath('//*[@id="form1"]/div[4]/div[1]/div[3]/div[1]/h3/a/@href').extract_first() if extension: next_page_url ='http://www.podiatryboard.alabama.gov/'+extension yield scrapy.Request(url = next_page_url, callback=self.parse_excel, dont_filter=True) def parse_row(self, response, row): il = ItemLoader(item=AlPodiatryLicensesSpiderItem()) #il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'AL_Podiatry_Licenses') il.add_value('url', 'http://www.podiatryboard.alabama.gov/licensees.aspx') il.add_value('permit_type', 'podiatry_license') il.add_value('permit_lic_exp_date', self.format_date(row['Expiration Date'])) il.add_value('permit_lic_no', row['License #']) il.add_value('dba_name',self._getDBA(row['Practice Name'])[1]) company_name = self._getDBA(row['Practice Name'])[0] company_name = company_name if len(company_name)>1 else row['First Name']+' '+ row['Last Name'] il.add_value('permit_lic_desc', 'Podiatry License for '+ company_name if len(company_name)>1 else 'Podiatry License') location_address = self.format__address_4(row['Address'], row['City'], row['State'], row['Zip Code']) il.add_value('location_address_string', location_address if len(location_address)>2 else 'AL') il.add_value('person_name', row['First Name']+' '+ row['Last Name']) il.add_value('permit_lic_eff_date', self.format_date(row['Effective Date'])) il.add_value('controlled substance license #', row['Controlled Substance License #']) il.add_value('company_name', company_name) il.add_value('company_phone', row['Office Phone #']) return il.load_item()
def save_to_csv(self, response, **meta): il = ItemLoader(item=WaWhatcomBellinghamBuildingPermitsSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('sourceName', 'WA_Whatcom_Bellingham_Building_Permits') il.add_value('url', 'https://www.cob.org/epermits/Search/permit.aspx') il.add_value('permit_lic_no', meta['record_number']) il.add_value('permit_subtype', meta['permit_lic_type']) il.add_value('subtype', meta['permit_subtype']) il.add_value('property type', meta["property_type"]) if meta["permit_lic_desc"]: meta["permit_lic_desc"] = meta["permit_lic_desc"] else: meta["permit_lic_desc"] = 'Building Permit' il.add_value('permit_lic_desc', meta["permit_lic_desc"]) il.add_value('Status', meta["permit_lic_status"]) il.add_value('permit_applied_date', meta["permit_applied_date"]) il.add_value('approved date', meta["approved_date"]) il.add_value('permit_lic_eff_date', meta["permit_lic_eff_date"]) il.add_value('finaled date', meta["finaled_date"]) il.add_value('permit_lic_exp_date', meta["permit_lic_exp_date"]) il.add_value('location_address_string', meta['address']) il.add_value('apn/pin', meta["apn_pin"]) il.add_value('parcel #', meta['parcel_number']) il.add_value('permit_lic_fee', meta['permit_lic_fee']) il.add_value('mixed_name', self._getDBA(meta['mixed_name'])[0]) il.add_value('dba_name', self._getDBA(meta['mixed_name'])[1]) il.add_value('mixed_subtype', meta["mixed_subtype"]) il.add_value('person_address_string', meta["person_address_string"]) il.add_value('mixed_contractor_name', self._getDBA(meta['mixed_contractor_name'])[0]) il.add_value('contractor_dba', self._getDBA(meta['mixed_contractor_name'])[1]) il.add_value('contractor_address_string', meta["contractor_address_string"]) il.add_value('inspection_subtype', meta["inspection_subtype"]) il.add_value('inspection_date', meta["completed_date"]) il.add_value('inspection_pass_fail', meta["inspection_pass_fail"]) il.add_value('inspection_type', meta["inspection_type"]) il.add_value('permit_type', 'building_permit') return il.load_item()
def parse_row(self, response, row): print(row) il = ItemLoader(item=IlHospitalLicensesSpiderItem()) # il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value('url', 'https://data.illinois.gov/dataset/410idph_hospital_directory/resource/9bdedb85-77f3-490a-9bbd-2f3f5f227981') il.add_value('sourceName', 'IL_Hospital_Licenses') il.add_value('permit_type', "medical_license") name=self._getDBA(row['Hospitals']) company_name=str(name[0]).replace(' -','') if ' -' in str(name[0]) else name[0] address=self.format__address_4(row['Address'],row['City'],'IL',row['Zipcode']) il.add_value('dba_name', name[1]) il.add_value('permit_lic_no', row.get('License #','')) il.add_value('permit_lic_exp', self.format_date(row.get('Exp. Date','')) if row.get('Exp. Date') else '') il.add_value('company_name', company_name) il.add_value('location_address_string', address) il.add_value('county', row.get('County','')) il.add_value('permit_lic_desc',"Medical License for "+company_name if name[0] else "Medical License") il.add_value('company_phone', row.get('Phone','')) il.add_value('company_subtype', row.get('Type','')) yield il.load_item()
def __createChunkFile(self, spider): remove_spec = lambda x: ''.join(e for e in x if e.isalnum()) if self.file_name: l = list(os.path.splitext(self.file_name)) if self.chunk_number != 0: l.insert(1, "_file_{}".format(str(self.chunk_number))) if hasattr(spider, 'start') and spider.start: if self.chunk_number != 0: l.insert( 2, "_{}_{}".format(remove_spec(spider.start), remove_spec(spider.end))) else: l.insert( 1, "_{}_{}".format(remove_spec(spider.start), remove_spec(spider.end))) file_name = "".join(l) if self.appendMode: outpath = os.path.join( self.settings.get('STORAGE_DIR'), self.settings.get('JIRA_ID'), 'resume_{}'.format(Utils.getingestion_timestamp()), file_name if self.file_name else '{}_file_{}.csv'.format( spider.name, str(self.chunk_number))) else: outpath = os.path.join( self.settings.get('STORAGE_DIR'), self.settings.get('JIRA_ID'), file_name if self.file_name else '{}_file_{}.csv'.format( spider.name, str(self.chunk_number))) self.createFolder(outpath) self.file = open(outpath, 'w+b') kwargs = {'delimiter': self.delimiter} if self.fields_to_export: kwargs['fields_to_export'] = self.fields_to_export if self.null_header: kwargs['null_header'] = self.null_header self.exporter = CustomCsvItemExporter(self.file, **kwargs) self.exporter.start_exporting() if self.customHeader: fields = self.fields_to_export values = [self.topHeader.get(i) for i in fields] self.exporter.csv_writer.writerow(values)
def parse_row(self, response, row): self.logger.info("started to extracting CSV data from {}".format( response.url)) il = ItemLoader(item=NyAlbanyStateItems()) lat, lng = map(str, row['Location'].splitlines()[-1].strip('()').split(',')) il.add_value('permit_lic_no', row['Permit Number']) il.add_value('permit_lic_eff_date', row['Date']) il.add_value('application_number', row['Application Number']) il.add_value('location_address_string', row['Address']) il.add_value('person_name', row['Owner']) il.add_value('person_subtype', "Owner") il.add_value('contractor_name', row['Contractor']) il.add_value('permit_lic_value', row['Estimated Cost']) il.add_value('permit_lic_fee', row['Fee']) il.add_value('permit_lic_desc', row['Description of Work']) il.add_value('longitude', lng) il.add_value('latitude', lat) il.add_value('permit_type', "building_permits") il.add_value('url', response.url) il.add_value('sourceName', "NY_Albany_Building_Permits") il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) return il.load_item()
def save_to_csv(self, response, **data_pass): if data_pass['permit_lic_desc'] == '' or data_pass[ 'permit_lic_desc'] == None: data_pass['permit_lic_desc'] = 'Building Permit' il = ItemLoader(item=WiDouglasBuildingPermitsSpiderItem(), response=response) il.default_input_processor = MapCompose(lambda v: v.strip(), remove_tags, replace_escape_chars) il.add_value('ingestion_timestamp', Utils.getingestion_timestamp()) il.add_value( 'url', 'https://gcs.douglascountywi.org/gcswebportal/search.aspx') il.add_value('sourceName', 'WI_Douglas_Building_Permits') il.add_value('inspector_comments', data_pass['inspector_comments']) il.add_value('mixed_name', data_pass['mixed_name']) il.add_value('permit_subtype', data_pass['permit_subtype']) il.add_value('permit_lic_desc', data_pass['permit_lic_desc']) il.add_value('mixed_subtype', data_pass['mixed_subtype']) il.add_value('permit_type', data_pass['permit_type']) il.add_value('permit_lic_fee', data_pass['permit_lic_fee']) il.add_value('inspection_pass_fail', data_pass['inspection_pass_fail']) il.add_value('permit_lic_status', data_pass['permit_lic_status']) il.add_value('location_address_string', data_pass['location_address_string']) il.add_value('dba_name', data_pass['dba_name']) il.add_value('inspection_subtype', data_pass['inspection_subtype']) il.add_value('permit_lic_eff_date', data_pass['permit_lic_eff_date']) il.add_value('permit_lic_no', data_pass['permit_lic_no']) il.add_value('prop type', data_pass['prop type']) il.add_value('inspection_date', data_pass['inspection_date']) il.add_value('inspection_type', data_pass['inspection_type']) il.add_value('person', data_pass['person']) il.add_value('municipality', data_pass['municipality']) il.add_value('issue #', data_pass['issue #']) il.add_value('parcel number', data_pass['parcel number']) il.add_value('mail_address_string', data_pass['mail_address_string']) return il.load_item()