def ChangeLocationBbs(pincode, store, base_url, location_id, store_id, sku, area): try: start_urls = base_url + sku browser = webdriver.Chrome('chromedriver.exe') browser.get(start_urls) time.sleep(2) location = browser.find_elements_by_xpath( '//*[@id="headercontroller"]/section[1]/div/div[2]/div/button') location[0].click() time.sleep(2) city = browser.find_elements_by_xpath('//*[@id="city-select"]') city[0].clear() city[0].send_keys(area) time.sleep(2) pin = pincode pincode1 = browser.find_elements_by_xpath('//*[@id="area-select"]') for pinc in pin: pincode1[0].send_keys(pinc) time.sleep(2) randomclick = browser.find_elements_by_css_selector('.ui-corner-all') print(randomclick) randomclick[0].click() time.sleep(2) complete = browser.find_elements_by_css_selector( '#choose-city-form > div.ng-scope > div.btn-green > button') complete[0].submit() time.sleep(2) browser.close() flag = 1 logger.info('Location changed to ' + str(location_id) + ' in Bbs') GetChromeCookies(pincode, store, base_url, location_id, store_id, sku) except ElementNotVisibleException as elem_not_vis: print(elem_not_vis) print('cant be scrapped') driver.close() name = ["Not Available"] price = ["Not Available"] stock = ["Not Available"] rating = ["Not Available"] sql2 = 'INSERT INTO scrape_reports(scrape_session_id,sku_code, store_id, location_id, item_name, stock_available, item_price, store_rating, scrape_datetime ) values("' + session_id + '","' + sku_id[ 0] + '","' + store_id[0] + '","' + location_id[0] + '","' + name[ 0] + '","' + price[0] + '", "' + stock[0] + '", "' + rating[ 0] + '")' cursor.execute(sql2) connection.commit() logger.exception('Bbs data not available for ' + sku + '.') except Exception as e: print(e) logger.error(e)
def storeItem(item, store, session_id, city, pincode, response): name = item['name'] price = item['price'] if len(price) < 1: price = ['000.00'] stock = item['stock'] rating = item['rating'] sku_id = item['sku_id'] store_id = item['store_id'] location_id = item['location_id'] scrape_datetime = str(datetime.now()) store_id = str(store_id[0]) location_id = str(location_id[0]) store_id = [str(store_id)] location_id = [str(location_id)] session_id = [str(session_id)] print(name) print(price) print(stock) print(rating) print(sku_id) print(store_id) print(location_id) print(session_id) print(store) try: sql2 = 'INSERT INTO scrape_reports(scrape_session_id,sku_code, store_id, location_id, item_name, stock_available, item_price, store_rating, scrape_datetime ) values("' + session_id[ 0] + '","' + sku_id[0] + '","' + store_id[0] + '","' + location_id[ 0] + '","' + name[0] + '","' + stock[0] + '", "' + price[ 0] + '", "' + rating[0] + '","' + scrape_datetime + '")' cursor.execute(sql2) connection.commit() logger.info('Data of ' + store + ' - ' + sku_id[0] + ' stored Successfully') csvfilegeneration(session_id, sku_id, store_id, location_id, city, name, stock, price, rating, scrape_datetime, store, pincode) except Exception as e: print(e) logger.critical(e) finally: return item
def storeItemBbs(item, sku_id, location_id, city, store_id, store, session_id, pincode): name = [item[0]] price = [item[1]] price = [price[0].replace('Rs ', '')] stock = ["Available"] rating = ["Not Applicable"] session_id = [str(session_id)] sku_id = [sku_id] location_id = [str(location_id)] store_id = [str(store_id)] scrape_datetime = str(datetime.now()) print(name) print(price) print(stock) print(rating) print(sku_id) print(store_id) print(location_id) print(store) try: sql2 = 'INSERT INTO scrape_reports(scrape_session_id,sku_code, store_id, location_id, item_name, stock_available, item_price, store_rating, scrape_datetime ) values("' + session_id[ 0] + '","' + sku_id[0] + '","' + store_id[0] + '","' + location_id[ 0] + '","' + name[0] + '","' + stock[0] + '", "' + price[ 0] + '", "' + rating[0] + '","' + scrape_datetime + '")' cursor.execute(sql2) connection.commit() logger.info('Data of ' + store + ' - ' + sku_id[0] + ' stored Successfully') csvfilegeneration(session_id, sku_id, store_id, location_id, city, name, stock, price, rating, scrape_datetime, store, pincode) except Exception as e: print(e) logger.critical(e)
exec( compile(source=open('database_config.py').read(), filename='database_config.py', mode='exec')) process = CrawlerProcess({'USER_AGENT': ('Chrome/69.0.3497.81')}) # Get date and time of current session start_date_time = str(datetime.now()) end_date_time = str(datetime.now()) scrape_result = 'SCRAPING IN PROGRESS' # Query to add session in database sql_insert_session = 'INSERT INTO scrape_sessions(session_start_datetime,session_end_datetime, scrape_result ) values("' + start_date_time + '","' + end_date_time + '", "' + scrape_result + '")' print(sql_insert_session) ab = cursor.execute(sql_insert_session) connection.commit() session_id = cursor.lastrowid logger.info('Session Started') except TypeError: print('Error occured while starting a new seesion') logger.exception('Error occured while starting a new seesion') try: # Loop1 to get all the storenames sql = 'SELECT store_name, id FROM stores' cursor1.execute(sql) for store, store_id in cursor1: store = store.lower() print(store) print(store_id)
def mailgeneration(store_id, store, session_id): try: sql = 'select recipient_email from report_recipients where is_active = 1' cursor4.execute(sql) fromaddr = "*****@*****.**" toaddr = [] for mail in cursor4: toaddr.append(mail[0]) print(toaddr) recipients = ', '.join(toaddr) # instance of MIMEMultipart msg = MIMEMultipart() # storing the senders email address msg['From'] = fromaddr # storing the receivers email address msg['To'] = recipients # storing the subject msg['Subject'] = "Product Availability Report" # string to store the body of the mail body = "Please find attached the report file(s)" # attach the body with the msg instance msg.attach(MIMEText(body, 'plain')) # open the file to be sent path = "csv_files/" pattern = "*_sid" + str(session_id) + ".csv" filenames = [] for root, dirs, files in os.walk(path): for name in files: if fnmatch.fnmatch(name, pattern): #filenames.append(os.path.join(root, name)) filenames.append(name) print(filenames) if len(filenames) == 0: raise Exception for file in filenames: # instance of MIMEBase and named as p part = MIMEBase('application', 'octet-stream') part.set_payload(open(path + file, 'rb').read()) encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="%s"' % file) msg.attach(part) # creates SMTP session s = smtplib.SMTP(host='smtp.gmail.com', port=587, timeout=300) # start TLS for security s.starttls() # Authentication s.login(fromaddr, "messi2009") # Converts the Multipart msg into a string text = msg.as_string() # sending the mail s.sendmail(fromaddr, toaddr, text) # terminating the session s.quit() logger.info('Mail Sent successfully.') end_time = str(datetime.now()) sql_update_end_time_and_status = 'UPDATE scrape_sessions SET session_end_datetime = "' + end_time + '", email_status = 1 where id = "' + str( session_id) + '" ' cursor.execute(sql_update_end_time_and_status) connection.commit() except Exception as e: logger.error(e) logger.error('Mail sending failed.') end_time = str(datetime.now()) sql_update_end_time_and_status = 'UPDATE scrape_sessions SET session_end_datetime = "' + end_time + '", email_status = 0 where id = "' + str( session_id) + '" ' cursor.execute(sql_update_end_time_and_status) connection.commit()
''' Created on 2017年6月16日 @author: Alex ''' import pymysql from twisted.conch.insults.window import cursor conn = pymysql.connect(host="localhost", user="******", passwd="admin", db="myfirstpydb") cursor = conn.cursor() sql = "select * from mytb" cursor.execute(sql) for i in cursor: print("当前是第"+str(cursor.rownumber)+"行") print("标题是: "+i[0]) print("关键词是: "+i[1])