def do_sheet(self, elem): bk = self.bk sheetx = bk.nsheets # print elem.attrib rid = elem.get(U_ODREL + 'id') sheetId = int(elem.get('sheetId')) name = unescape(unicode(elem.get('name'))) reltype = self.relid2reltype[rid] target = self.relid2path[rid] if self.verbosity >= 2: self.dumpout( 'sheetx=%d sheetId=%r rid=%r type=%r name=%r', sheetx, sheetId, rid, reltype, name) if reltype != 'worksheet': if self.verbosity >= 2: self.dumpout('Ignoring sheet of type %r (name=%r)', reltype, name) return bk._sheet_visibility.append(True) sheet = Sheet(bk, position=None, name=name, number=sheetx) sheet.utter_max_rows = X12_MAX_ROWS sheet.utter_max_cols = X12_MAX_COLS bk._sheet_list.append(sheet) bk._sheet_names.append(name) bk.nsheets += 1 self.sheet_targets.append(target) self.sheetIds.append(sheetId)
def __init__(self, server="smtp.gmail.com", port=587, username="", password="", auth_file=""): """ init """ try: self.sheet = Sheet(auth_file_name=auth_file, sheet_name="contacts") self.sheet.open_sheet() self.config_distrib_list() self.smtpserver = smtplib.SMTP(server, port) self.smtpserver.ehlo() self.smtpserver.starttls() self.smtpserver.ehlo() self.smtpserver.login(self.smtp_username, self.smtp_password) self.initiated = True except Exception as e: logging.debug(" unable to initiate email notifications ") logging.debug(e)
def __make_update(self): with open(self.__data_path + "/" + SHEETS_FILE) as f: sheet_keys = f.read().splitlines() for sheet_key in sheet_keys: sheet = Sheet(sheet_key) sheet.update() return
def null(message): admins = get_admins() if message.from_user.username in admins: sheet = Sheet("photos_vote_bot") sheet.null() bot.send_message(message.chat.id, "Таблицю обнулено.")
class Model: def __init__(self, filename=None): self.__sheet = Sheet() self.__cursor = Cursor() self.__io = FileIO(filename) self.__programIsRunning = True self.__lastCommand = "" self.load() def getCursor(self): return self.__cursor def getSheet(self): return self.__sheet def load(self): self.__sheet.loadPublicObject(self.__io.loadFile()) def save(self): self.__io.saveFile(self.__sheet.getPublicObject()) def quit(self): self.__programIsRunning = False def checkProgramExecution(self): return self.__programIsRunning def echoCommand(self, c): self.__lastCommand = c def getLastCommand(self): return self.__lastCommand
def easy_way(): metadata = MetaData() engine = Sqlite3Engine(":memory:") datatype = DataType() employee = Table("employee", metadata, Column("employee_id", datatype.text, primary_key=True), Column("age", datatype.integer), Column("height", datatype.real), Column("enroll_date", datatype.date), Column("create_datetime", datatype.datetime), ) metadata.create_all(engine) ins = employee.insert() timer.start() # === real work === sheet = Sheet({"employee_id": "TEXT", "age": "INTEGER", "height": "REAL", "enroll_date": "DATE", "create_datetime": "DATETIME"}) csvfile = CSVFile(r"testdata\bigcsvfile.txt", sep=",", header=True, dtype={"CREATE_DATE": "DATE", "CREATE_DATETIME": "DATETIME"}) sheet.match(csvfile.sample) for row in csvfile.generate_rows(): try: engine.insert_row(ins, Row.from_dict(sheet.convert(row))) except: pass timer.timeup() engine.prt_all(employee)
def __init__(self, filename=None): self.__sheet = Sheet() self.__cursor = Cursor() self.__io = FileIO(filename) self.__programIsRunning = True self.__lastCommand = "" self.load()
def __init__(self): self.idm = "01010312841a360d" self.isScaned = True #for demonstration self.isStepped = False self.IDs = ["01010a10e41a9f23", "01010A10E41A9F25"] self.sheet = Sheet() self.num = 0 self.sheet.write(self.IDs)
def test_sheetGeneration(): config = json.load( open("resources/powerup_config.json", "r", encoding="utf-8")) gen = Sheet(config) fields = json.load( open("resources/powerup_test_3.json", "r", encoding="utf-8")) gen.create_from_json(fields) pass
def begin(message): admins = get_admins() if message.from_user.username in admins: database_interface.free() global participants sheet = Sheet("photos_vote_bot") participants = sheet.get_participants() bot.send_message(message.chat.id, "Голосування запущено.")
def add_menus_from_file(self, filename): read_xls = xlrd.open_workbook(filename, formatting_info=True) for sheet_index in range(read_xls._all_sheets_count): sheet = Sheet(read_xls.sheet_by_index(sheet_index)) try: date = sheet.get_menu_date() products = sheet.get_products() except IndexError: raise Exception('Invalid menu format!') self.menus[date] = products
def open_file(self): # get the file existing_file = askopenfilename(title="Select file.") self.sheet = Sheet("Jeb") # call Sheet read function self.sheet.read_file(month, year, existing_file) self.display_day_accounts['text'] = "Accounts of the day\n{}".format( self.get_day_projects()) self.day_total_base = self.sheet.total_day(today)
def __init__(self, fname, sheet_name, feature_extractors, out_dir="out", out_format="png"): self.fname = fname self.sheet_name = sheet_name self.out_format = out_format self.out_dir = out_dir orig_img = cv2.imread(fname) self.sheet = Sheet(orig_img, dpi=self._dpi_from_exif(), save_image=self.save_image) self.feature_extractors = [ feat(self.sheet) for feat in feature_extractors]
def __init__(self): # SpreadSheet logger.info('--- Initialize Spreadsheet') self.mysheet = Sheet() # Google Drive logger.info('--- Initialize Google Drive') self.mydrive = GDrive() # Slack # logger.info('--- Initialize Slack') # self.slack = Slack() # Zoom logger.info('--- Initialize Zoom') self.zoom = Zoom()
def result(message): admins = get_admins() if message.from_user.username in admins: sheet = Sheet("photos_vote_bot") global participants if participants is None: participants = sheet.get_participants() if database_interface.count_votes(sheet, participants) is True: bot.send_message(message.chat.id, "Таблиця результатів оновлена.") else: bot.send_message( message.chat.id, "Щось пішло не так, спробуй ще раз або напиши розробнику.") else: bot.send_message( message.chat.id, "Упс... Схоже тебе немає в списку адміністраторів, звернись до розробника." )
def runUpdate(monthStart, yearStart, monthEnd, yearEnd): curMonth = monthStart curYear = yearStart updateList = [] while (curYear < yearEnd or (curYear == yearEnd and curMonth <= monthEnd)): for card in cards: for test in tests: search = genSearchString(card, test, curYear, curMonth) results = driveService.files().list( includeTeamDriveItems=True, supportsTeamDrives=True, pageSize=100, q=search, corpora="domain").execute() files = results["files"] for file in files: datetime = getEpochTime(file["name"]) curSheet = Sheet(test, card, datetime, file["id"]) updateList = updateList + curSheet.genUpdate(sheetService) time.sleep(1.3) curYear = curYear + 1 if curMonth == 12 else curYear curMonth = (curMonth % 12) + 1 query = [] for update in updateList: query.append( UpdateOne( { "sheetId": update["sheetId"], "subtest": update["subtest"], "test": update["test"], "type": update["type"] }, {"$set": update}, upsert=True)) collection.bulk_write(query)
def run(self): """ initialize sheets and notifier """ try: logging.info("\nOpening notification sheet ... ") self.ntfr = Notifier(auth_file=self.ui.get_google_auth_file_name()) logging.info("\nOpening the tracking sheet ... ") if (self.ui.get_google_auth_file_name() != ""): self.sheet = Sheet( auth_file_name=self.ui.get_google_auth_file_name()) else: self.sheet = Sheet() self.sheet.open_sheet() # get back to caller from this thread self.nt_ready_signal.emit() #self.ui.activateButton.setEnabled(True) #self.ui.deactivateButton.setEnabled(False) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.debug(exc_type, fname, exc_tb.tb_lineno) logging.debug(e) logging.error( " An issue occurred while initializing network components ") logging.error(e) #self.ui.activateButton.setEnabled(False) #self.ui.refreshButton.setEnabled(True) self.nt_ready_signal.emit()
def _init_existing_sheets(self): """Init Sheet() object for every sheet in this spreadsheet. Returns: dict: {<sheet_name>: <sheet.Sheet>} - sheets index. """ sheets = {} resp = self._ss_resource.get(spreadsheetId=self._id).execute() for sheet in resp["sheets"]: props = sheet["properties"] name = props["title"] if name == self._config.ARCHIVE_SHEET.get("name"): self._archive = ArchiveSheet(name, self._id, props["sheetId"]) continue sheets[name] = Sheet(name, self._id, props["sheetId"]) return sheets
def _build_new_sheets_requests(self, sheets_in_conf): """Build add-new-sheet requests for the new sheets. Args: sheets_in_conf (tuple): Sheets list from the configurations. Returns: list: List of add-new-sheet requests. """ add_sheet_reqs = [] for name in sheets_in_conf: if name not in self.sheets.keys(): self.sheets[name] = Sheet(name, self._id) add_sheet_reqs.append(self.sheets[name].create_request) if self._config.ARCHIVE_SHEET and self._archive.is_new: add_sheet_reqs.append(self._archive.create_request) self._archive.is_new = False return add_sheet_reqs
def openFile( self, file_name ): # used to read an excel sheet and parses all its sheets contents to separate sheet objects in fl #sheet = Sheet(file_name) # for separate excel sheet xls = pd.ExcelFile( file_name ) # initialize an excel sheet with path specified in file_name as xls self.file_path = file_name self.building_name, ext = path_leaf(file_name).split('.') for sheet_name in xls.sheet_names: # loop for every sheets in xls parses its dataframe by calling sheet objects methods sheet = Sheet(self.building_name + ' - ' + sheet_name) # add filename + " - " later sheet.zeroRemove(xls) if 'Max' not in sheet.df.index: sheet.addStatsRow() self.fl.append(sheet)
class mat: def __init__(self): self.idm = "01010312841a360d" self.isScaned = True #for demonstration self.isStepped = False self.IDs = ["01010a10e41a9f23", "01010A10E41A9F25"] self.sheet = Sheet() self.num = 0 self.sheet.write(self.IDs) def maintanance(self): while True: #sheet.open() # now = datetime.datetime.now() # current_hour = int(now.strftime("%H")) # if 5 < current_hour < 15: # self.__init__() # sheet.clear('B1',"=CHAR(HEX2DEC(\"1f917\"))") # sheet.clear('A1',"") time.sleep(60) # def ScanID(self): # reader = scanning() # while True: # reader.exe_scan() # led.led_on("blue") # self.isScaned = True # self.idm = reader.idm # print("detected card") # print("user id : " + self.idm) # #elf.show() # print("") def WaitStep(self): while True: time.sleep(0.3) if pressure_sensor.sensing() is True: self.isStepped = True # if self.isScaned is True: # time.sleep(10) def UpdateSheet(self): if not self.idm in self.IDs: self.IDs.append(self.idm) self.SendToLINE() self.sheet.write(self.IDs) time.sleep(10) self.IDs.remove(self.idm) self.sheet.write(self.IDs) else: print("cancel action") self.IDs.remove(self.idm) def SendToLINE(self): url = "https://notify-api.line.me/api/notify" token = "8KUextLc2r7ARf9uyoVeioxkrE18NU7FOTkmhxlb7uT" headers = {"Authorization": "Bearer " + token} payload = {"message": "yuto, wang, yuka"} #files = {"imageFile": open("test.jpg", "rb")} #バイナリで画像ファイルを開きます。対応している形式はPNG/JPEGです。 r = requests.post(url, headers=headers, params=payload) #, files=files) def processing(self): while True: if self.isStepped is True: #print("waiting for being stepped") #スキャンされてからマットを踏むまでの待機時間 name = "yuka" if self.isStepped is True: print(name + " stepped on the mat") self.UpdateSheet() else: print(name + " didn't stepped ") self.isScaned = True #for demonstration self.isStepped = False print("restart processing") print("") led.led_off("blue") led.led_off("red") else: pass
from sheet import Sheet import math import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D if __name__ == '__main__': N = 15 a = 2.46 a_1 = [a, 0, 0] a_2 = [a / 2.0, a * math.sqrt(3) / 2, 0] sites = [[0.0, 0.0, 0.0], [a / 2.0, a / (2.0 * math.sqrt(3)), 0.0]] s1 = Sheet([a_1, a_2, [0, 0, 1]], ['C', 'C'], sites, [0], [-N, -N], [N, N], 'graphene') s2 = Sheet([a_1, a_2, [0, 0, 1]], ['C', 'C'], sites, [0], [-N, -N], [N, N], 'graphene') s3 = Sheet([a_1, a_2, [0, 0, 1]], ['C', 'C'], sites, [0], [-N, -N], [N, N], 'graphene') h = Heterostructure((s1, s2, s3), [0, math.pi / 90, math.pi / 45], [0, 6, 12]) H = h.localitySweep(2) ''' val_arr = set() vec_arr = set() for i in np.linspace(0,1,4,endpoint=False): for j in np.linspace(0,1,4,endpoint=False): h.setShift([[0,0,0],[i,j,0]]) H12 = h.interHamiltonian(0,1) H21 = h.interHamiltonian(1,0)
#import audio from sheet import Sheet import pyglet WIDTH = 600 HEIGHT = 400 sheet = Sheet(width=WIDTH, height=HEIGHT, caption="Term Project -- Refactored", resizable=False) pyglet.gl.glClearColor(255, 255, 255, 1) pyglet.app.run()
def __init__(self): self.base_url = "https://rightmove.co.uk" # Default to 24 hour period for now, don't show let agreed self.base_search_url = self.base_url + \ "/property-to-rent/find.html?maxDaysSinceAdded=1&_includeLetAgreed=false&" self.sheet = Sheet(service_account_configuration_path, sheet_name)
from sheet import Sheet import bitly_api from dotenv import load_dotenv import os import settings load_dotenv() access_token = os.getenv("access_token") c = bitly_api.Connection(access_token = access_token) table = Sheet("arac-tel","Sheet1") raw_video_url = table.worksheet.acell(settings.long_url_cell).value starting_row = table.first_empty_row("shortened-video-url") end_row = starting_row + 30 for i in range(end_row-starting_row): phone_col_number = table.columns.index("phone")+1 phone = table.worksheet.cell(starting_row+i, phone_col_number).value shortened_dict = c.shorten(raw_video_url+"&r="+phone, preferred_domain = "j.mp") print(shortened_dict["url"]) table.update_one_cell(starting_row+i, "shortened-video-url", shortened_dict["url"])
def display_slow_sorted(wkset, parent_workset, duration=0.5, sort_field_index=2, limit=None): result_sheet = Sheet() result_sheet.header("URL", "%Hits", "%Slow") for p in wkset.pages.values(): hits_pc = float(p.hits) / parent_workset.total_hits * 100 slow_pc = float(p.shape.get_hits_above(duration)) / parent_workset.shape.get_hits_above(duration) * 100 result_sheet.line(p.url, hits_pc, slow_pc) hits_pc = float(wkset.shape.total_hits) / parent_workset.total_hits * 100 slow_pc = float(wkset.shape.get_hits_above(duration)) / parent_workset.shape.get_hits_above(duration) * 100 result_sheet.end_line("Total", hits_pc, slow_pc) result_sheet.sort(sort_field_index) if limit is not None: result_sheet.limit(limit) result_sheet.show()
grbl_out += readbuff.getvalue() """ print ' : ' + grbl_out.strip() except Exception as e: print e if __name__=='__main__': from sheet import Sheet g = GCodeExport() s = Sheet(2, 2, cell_width=1, cell_height=1) #s.load('../tests/square.cb') s.load('../tests/board.cb') #s.load('../tests/octantes1y4.cb') print s #heaters = g._get_segments(s.cols, s.rows, checker=s.is_heater) #print "Heaters", heaters g.from_sheet(s, visualize=True) #g._make_heaters() #s.set_cut(0, 0, [True, True]); s.set_cut(1, 0, [True, True]); s.set_cut(2, 0, [False, True]) #s.set_cut(0, 1, [False, False]); s.set_cut(1, 1, [True, True]); s.set_cut(2, 1, [False, True]) #s.set_cut(0, 2, [False, False]); s.set_cut(1, 2, [False, False]); s.set_cut(2, 2, [False, False]) #s.save('../tests/test.cb')
# Create an new Excel file and add a worksheet. workbook = xlsxwriter.Workbook('model.xlsx') business_transactions_sheet = workbook.add_worksheet( 'Input Business Transactions') it_transactions_sheet = workbook.add_worksheet('Input IT Transactions') # Create styles bold = workbook.add_format({'bold': True}) header_row = 7 btHeaders = [ 'Business Transaction Name', 'Transaction Description', 'Business Volumes', 'Frequency', 'Notes' ] btSheet = Sheet(business_transactions_sheet) btSheet.add_header(btHeaders) btSheet.write_headers(header_row, bold) itHeaders = [ 'Business Transaction', 'IT Transaction Name', 'IT Transaction Description', 'Qty per transaction', 'Transaction Rating', 'TPS', 'Notes' ] itSheet = Sheet(it_transactions_sheet) itSheet.add_header(itHeaders) itSheet.write_headers(header_row, bold) # Parse the xml data. tree = ET.parse('data.xml') root = tree.getroot()
class Mas: mysheet = {} def __init__(self): # SpreadSheet logger.info('--- Initialize Spreadsheet') self.mysheet = Sheet() # Google Drive logger.info('--- Initialize Google Drive') self.mydrive = GDrive() # Slack # logger.info('--- Initialize Slack') # self.slack = Slack() # Zoom logger.info('--- Initialize Zoom') self.zoom = Zoom() # Email # logger.info('--- Initialize Email') # self.email = Email() ''' Google Spreadsheet ''' def update_ccs_sheet(self): self.mysheet.update_ccs_sheet_from_df(self.updated_ccs) ''' Google Drive ''' def drive_setup(self): logger.info('--- Setup Google Drive') self.mydrive.setup(self.mysheet) self.updated_ccs = self.mydrive.generate_links_and_share_permission() self.update_ccs_sheet() ''' Slack ''' def slack_setup(self): self.updated_ccs = self.slack.setup(self.mysheet) self.update_ccs_sheet() ''' Zoom ''' def zoom_setup(self): self.updated_ccs = self.zoom.setup(self.mysheet, self.mydrive) # self.update_ccs_sheet() ''' Email ''' def email_setup(self): self.email.send_email() def run(self): # self.drive_setup() self.zoom_setup()
from sheet import Sheet import bitly_api from dotenv import load_dotenv import os import sys load_dotenv() access_token = os.getenv("access_token") c = bitly_api.Connection(access_token=access_token) table = Sheet("arac-tel", "Sheet1") row_count = len(table.worksheet.get_all_records()) for row_number in range(2, row_count): row = table.get_row(row_number) if row[3].isdigit() and int(row[3]) > 0: table.update_one_cell(row_number, "clicked", c.clicks(shortUrl=row[1])[0]["global_clicks"]) else: sys.exit("Dolu satırlar bitti.")
import sys from sheet import Sheet if __name__ == "__main__": config = json.load( open("resources/powerup_config.json", "r", encoding="utf-8")) if len(sys.argv) >= 2: # Format: <event-name> [event-id] [frc-api-token] config['event'] = sys.argv[1] if len(sys.argv) >= 3: config['event_code'] = sys.argv[2] if len(sys.argv) >= 4: config['frc_api'] = sys.argv[3] print("Generating scouting sheets for " + config['event_code'] + " - " + config['event']) gen = Sheet(config) fields = json.load( open("resources/powerup_test_3.json", "r", encoding="utf-8")) gen.create_from_json(fields) def test_sheetGeneration(): config = json.load( open("resources/powerup_config.json", "r", encoding="utf-8")) gen = Sheet(config) fields = json.load( open("resources/powerup_test_3.json", "r", encoding="utf-8")) gen.create_from_json(fields) pass
class SheetIO(object): """Encapsulates IO operations on sheets and shreds.""" def __init__(self, fname, sheet_name, feature_extractors, out_dir="out", out_format="png"): self.fname = fname self.sheet_name = sheet_name self.out_format = out_format self.out_dir = out_dir orig_img = cv2.imread(fname) self.sheet = Sheet(orig_img, dpi=self._dpi_from_exif(), save_image=self.save_image) self.feature_extractors = [ feat(self.sheet) for feat in feature_extractors] def _dpi_from_exif(self): def parse_resolution(val): x = map(int, map(str.strip, str(val).split("/"))) if len(x) == 1: return x[0] elif len(x) == 2: return int(round(float(x[0]) / x[1])) else: raise ValueError with open(self.fname, "rb") as f: tags = exifread.process_file(f) if "Image XResolution" in tags and "Image YResolution" in tags: try: return (parse_resolution(tags["Image XResolution"]), parse_resolution(tags["Image YResolution"])) except ValueError: return None return None def save_image(self, fname, img, format=None): full_out_dir, fname = os.path.split( os.path.join(self.out_dir, self.sheet_name, fname)) try: os.makedirs(full_out_dir) except OSError: pass if format is None: format = self.out_format fname = "%s/%s.%s" % (full_out_dir, fname, format) cv2.imwrite(fname, img) return fname def overlay_contours(self): # Draw contours on top of image with a nice yellow tint overlay = np.zeros(self.sheet.orig_img.shape, np.uint8) contours = map(lambda x: x.contour, self.get_shreds()) # Filled yellow poly. cv2.fillPoly(overlay, contours, [104, 255, 255]) img = self.sheet.orig_img.copy() + overlay # Add green contour. cv2.drawContours(img, contours, -1, [0, 180, 0], 2) return img def export_results_as_html(self): img_with_overlay = self.overlay_contours() path_to_image = self.save_image("full_overlay", img_with_overlay) # Export one processes page as html for further review tpl = env.get_template("page.html") shreds = self.get_shreds() for c in shreds: # Slight pre-processing of the features of each piece c.features["on_sheet_angle"] = "%8.1f°" % c.features["on_sheet_angle"] c.features["ratio"] = "%8.2f" % c.features["ratio"] c.features["solidity"] = "%8.2f" % c.features["solidity"] export_dir, img_name = os.path.split(path_to_image) with open("%s/index.html" % export_dir, "w") as fp: fp.write(tpl.render( img_name=img_name, contours=shreds, export_dir=export_dir, out_dir_name=self.sheet_name, )) def save_thumb(self, width=200): r = float(width) / self.sheet.orig_img.shape[1] dim = (width, int(self.sheet.orig_img.shape[0] * r)) resized = cv2.resize(self.sheet.orig_img, dim, interpolation=cv2.INTER_AREA) return self.save_image("thumb", resized) def get_shreds(self): return self.sheet.get_shreds(self.feature_extractors, self.sheet_name)
def status_top_cmd(args): if len(args) < 2: usage() http_code = 0 try: http_code = int(args[0]) except: usage() if http_code == 0: usage() engine = DefaultEngine() wkset = engine.workset_manager.load(args[1]) result_sheet = Sheet() result_sheet.header("URL", "Hits", "Ratio") total_code_hits = wkset.http_codes.get(http_code, None) if total_code_hits is None: return total_code_hits_check = 0 total_pc_check = 0.0 for p in wkset.pages.values(): code_hits = p.http_codes.get(http_code, None) if code_hits is None: continue code_pc = float(code_hits)/total_code_hits * 100 total_code_hits_check += code_hits total_pc_check += code_pc result_sheet.line(p.url, code_hits, code_pc) result_sheet.end_line("Total", total_code_hits_check, total_pc_check) result_sheet.end_line("Check", total_code_hits, 100) result_sheet.sort(2) result_sheet.limit(20) print "\nHTTP status code %d breakdown:\n" % (http_code) result_sheet.show()
from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options import time from sheet import Sheet import settings url = "https://messages.google.com/web/conversations/new" table = Sheet(settings.FILE_NAME,settings.SHEET_NAME) table.row_count = len(table.worksheet.get_all_records()) def start_web_driver(): options = Options() # options.add_argument(f"user-agent={user_agent}") options.add_argument('--disable-infobars') options.add_argument('--disable-extensions') options.add_argument('--incognito') options.add_argument('--profile-directory=Default') options.add_argument('--disable-plugins-dicovery') options.add_argument('--start-maximized') options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(settings.CHROME_DRIVER, options=options) return driver driver = start_web_driver() driver.get(url)
if __name__ == "__main__": from angora.STRING.formatmaster import fmter from sheet import Sheet from datetime import datetime, date def create_testdata(): df = [["eid001", 100, 1.23, date.today(), datetime.now()]] df = pd.DataFrame(df, columns=["TEXT", "INTEGER", "REAL", "DATE", "DATETIME"]) df.to_csv(r"testdata\with_header.txt", index=False) df.to_csv(r"testdata\without_header.txt", header=False, index=False) # create_testdata() # 定义你目标数据格式 sheet = Sheet({"_id": "TEXT", "age": "INTEGER", "height": "REAL", "create_date": "DATE", "datetime": "DATETIME"}) csvfile_with_header = CSVFile(r"testdata\with_header.txt", sep=",", header=True, usecols=[0,1,2,3,4], dtype={"DATE": "DATE", "DATETIME": "DATETIME"}) sheet.match(csvfile_with_header.sample) for row in csvfile_with_header.generate_rows(): print( sheet.convert(row) ) csvfile_without_header = CSVFile(r"testdata\without_header.txt", sep=",", header=False, usecols=[0,1,2,3,4],
class RightmoveScraper: def __init__(self): self.base_url = "https://rightmove.co.uk" # Default to 24 hour period for now, don't show let agreed self.base_search_url = self.base_url + \ "/property-to-rent/find.html?maxDaysSinceAdded=1&_includeLetAgreed=false&" self.sheet = Sheet(service_account_configuration_path, sheet_name) def build_search_url(self, location_identifier): parameters = config.items('filters') parameters.append(("locationIdentifier", location_identifier)) search_url = self.base_search_url + urlencode(parameters) return search_url def get_listing_urls(self, soup): listings = soup.find_all("a", class_="propertyCard-rentalPrice") listing_urls = [ self.base_url + listing.get("href") for listing in listings if listing.get("href") != "" ] return listing_urls def get_listings_available_after_n_weeks(self, n_weeks, listings): if (not len(listings)): return listings print( f'Filtering listings down only those available after {n_weeks} weeks from now' ) def convert_date(x): return datetime.datetime.strptime(x, "%d/%m/%Y").date() today = datetime.datetime.now().date() delta = datetime.timedelta(weeks=n_weeks) available_after = (today + delta) return [ listing for listing in listings if listing.has_date_available() and convert_date(listing.date_available) > available_after ] def get_location_name(self, dom): title = dom.title.string result = re.search(r".*Rent in (.*) \|", title) return result.group(1) def scrape(self, url): html = requests.get(url).text listing_dom = BeautifulSoup(html, 'html.parser') location = self.get_location_name(listing_dom) print(f"Scraping properties in {location}...") listing_urls = self.get_listing_urls(listing_dom) listings = [Listing(url) for url in listing_urls] for listing in listings: listing.scrape_details() return listings def remove_duplicate_listings(self, listings): if (not len(listings)): return listings unique = {} for listing in listings: if listing.url not in unique.keys(): unique[listing.url] = listing return unique.values() def filter_listings(self, listings): if (config.has_option("filters", "availableAfterNWeeks")): listings = self.get_listings_available_after_n_weeks( int(config.get("filters", "availableAfterNWeeks")), listings) return listings def scrape_listings(self, location_identifiers): listings = [] for identifier in location_identifiers: url = self.build_search_url(identifier) listings.extend(self.scrape(url)) listings = self.remove_duplicate_listings(listings) return listings def process_listings(self, listings): written, duplicates = self.sheet.add_listings(listings) message = dedent(f""" { len(listings) } eligible properties were found. {written} new properties were added to the worksheet {sheet_name}. {duplicates} properties already existed on the sheet and were ignored. """) print(message) if (config.has_section("mailer") and written): mailer_config = dict(config.items("mailer")) Mailer(mailer_config).send_mail(message) def run(self): location_identifiers = config.get("locations", "identifiers").split(",") listings = self.filter_listings( self.scrape_listings(location_identifiers)) if (len(listings)): self.process_listings(listings) else: print("No listings found for specified search criteria")