def testScrape(self): url = "https://www.bbc.com" s = Scrape() s.setUrl(url) keywords, links = s.scrape() self.assertTrue(keywords, msg='No Keywords found') self.assertTrue(links, msg='No links found')
def main(): page = 'https://www.canyon.com/en-us/outlet?--wysiwyg_cany\ on_products-factoryoutlet%5B%40package%5D=wysiwyg.canyon.products&--wysiwyg_cany\ on_products-factoryoutlet%5B%40controller%5D=factoryoutlet&--wysiwyg_canyon_pro\ ducts-factoryoutlet%5B%40action%5D=road&--wysiwyg_canyon_products-factoryoutlet\ %5B%40format%5D=html' links = Scrape.scrape(Scrape(), page) if links: message = Message.format(Message(), links) Message.send(Message(), message)
while stopTime < 0 or time.time() < stopTime: # Wait to receive a source from the master source = comm.recv(source=0) links = list() keywords = list() if source == '': # We got a blank link, wait for a while then ask again time.sleep(1) else: source = source.strip() parts = source.split('/') baseurl = '/'.join(parts[0:3]) rp = roboparser.RobotFileParser() rp.set_url(baseurl + '/robots.txt') rp.read() if rp.can_fetch('*', source): s.setUrl(source) keywords, links = s.scrape() # Persist keywords to the database # commenting out because this is such a bottleneck # s.submitWords(keywords) # Send new links back to the master queue comm.send((keywords, links), dest=0) time.sleep(1)
class Pokedex(wx.Frame): def __init__(self, parent, id, title): wx.Frame.__init__(self, parent, style=wx.DEFAULT_FRAME_STYLE ^ wx.RESIZE_BORDER ^ wx.MAXIMIZE_BOX) self.list_pokeweak = [] file = open('pokedata.cfg', 'r') all_data_str = file.read() file.close() if all_data_str == '': file = open('pokedata.default.cfg', 'r') all_data_str = file.read() file.close() file = open('pokedata.cfg', 'w') file.write(all_data_str) file.close() self.all_data = ast.literal_eval(all_data_str) self.reset(self.all_data) self.typestrings = [ "Quad", "Double", "Normal", "Half", "Fourth", "Immune" ] self.colors = [ '#E8E8E8', '#D0D0D0', '#B8B8B8', '#A0A0A0', '#888888', '#707070' ] self.icon = wx.Icon('Images/pokeball.ico', wx.BITMAP_TYPE_ICO) self.SetIcon(self.icon) self.SB1 = wx.StaticBox(self, -1, 'Stats') self.SB1.SetBackgroundColour('white') self.CB = wx.ComboBox(self, -1, value='', choices=['Name', 'Type'], style=wx.CB_READONLY, size=(103, -1)) self.CB.SetSelection(0) #create menubar self.menubar = wx.MenuBar() #create file menu self.filemenu = wx.Menu() self.m_stayontop = self.filemenu.Append(wx.ID_ANY, 'Stay on Top [Off]\tCtrl-S', '') self.m_resize = self.filemenu.Append(wx.ID_ANY, 'Resizable [Off]\tCtrl-E', '') self.m_track = self.filemenu.Append(wx.ID_ANY, 'Show Tracked\tCtrl-T', '') self.m_rescrape = self.filemenu.Append(wx.ID_ANY, 'Rescrape Html\tCtrl-R', '') self.m_close = self.filemenu.Append(wx.ID_ANY, 'Close\tCtrl-Q', '') self.menubar.Append(self.filemenu, 'File') #bind events to menu options self.Bind(wx.EVT_MENU, self.stay_on_top, self.m_stayontop) self.Bind(wx.EVT_MENU, self.resizable, self.m_resize) self.Bind(wx.EVT_MENU, self.toggle_track_mode, self.m_track) self.Bind(wx.EVT_MENU, self.rescrape, self.m_rescrape) self.Bind(wx.EVT_MENU, self.on_close, self.m_close) #Creating list that will store all pokemon self.LC = wx.ListCtrl(self, -1, style=wx.LC_REPORT | wx.LC_SINGLE_SEL) #Creating list for weaknesses self.LC2 = AutoWidthListCtrl(self) #Inserting columns into that list of pokemon self.LC.InsertColumn(0, 'Pokemon', format=wx.LIST_FORMAT_LEFT, width=90) self.LC.InsertColumn(1, 'Type', format=wx.LIST_FORMAT_CENTER, width=100) self.LC.InsertColumn(2, 'Total', format=wx.LIST_FORMAT_CENTER, width=45) self.LC.InsertColumn(3, 'HP', format=wx.LIST_FORMAT_CENTER, width=30) self.LC.InsertColumn(4, 'Atk', format=wx.LIST_FORMAT_CENTER, width=30) self.LC.InsertColumn(5, 'Def', format=wx.LIST_FORMAT_CENTER, width=30) self.LC.InsertColumn(6, 'SpAtk', format=wx.LIST_FORMAT_CENTER, width=44) self.LC.InsertColumn(7, 'SpDef', format=wx.LIST_FORMAT_CENTER, width=44) self.LC.InsertColumn(8, 'Spd', format=wx.LIST_FORMAT_CENTER, width=35) self.LC2.InsertColumn(0, 'Damage Taken [(2 ^ Row) * 1/4]', format=wx.LIST_FORMAT_RIGHT, width=355) #creating boxsizers mainbox = wx.BoxSizer(wx.VERTICAL) statsbox = wx.BoxSizer(wx.HORIZONTAL) imagebox = wx.StaticBoxSizer(self.SB1, wx.HORIZONTAL) defensebox = wx.BoxSizer(wx.HORIZONTAL) box = wx.BoxSizer(wx.VERTICAL) hbox = wx.BoxSizer(wx.HORIZONTAL) #creating initial image for imagebox self.image = wx.StaticBitmap( self, -1, wx.Bitmap('Images/0.png', wx.BITMAP_TYPE_PNG)) self.input = wx.TextCtrl(self, value='', size=(270, -1)) self.SetBackgroundColour('pink') #Adding text entry and combobox to a hbox then adding it the box hbox.Add(self.input, wx.EXPAND | wx.ALIGN_LEFT) hbox.Add(self.CB, wx.ALIGN_CENTER) box.Add(hbox, flag=wx.ALL) #Adding list that will store all pokemon box.Add(self.LC, flag=wx.ALL | wx.EXPAND) #Adding image area to imagebox imagebox.Add(self.image, flag=wx.ALIGN_CENTER | wx.ALL) defensebox.Add(self.LC2, flag=wx.ALIGN_CENTER | wx.EXPAND) #Adding image to boxsizer statsbox.Add(imagebox, flag=wx.ALIGN_CENTER | wx.ALL) #Adding boxsizer to mainbox box.Add(statsbox, flag=wx.ALIGN_CENTER | wx.EXPAND) self.SetMenuBar(self.menubar) statsbox.Add(defensebox, flag=wx.ALIGN_CENTER | wx.EXPAND) mainbox.Add(box, flag=wx.ALL | wx.EXPAND) #Bind Events to search bar self.Bind(wx.EVT_TEXT, self.search, self.input) self.Bind(wx.EVT_CHAR_HOOK, self.on_keyboard_search, self.input) #Bind Events to Search Combobox self.Bind(wx.EVT_COMBOBOX, self.search, self.CB) self.Bind(wx.EVT_CHAR_HOOK, self.on_keyboard_combobox, self.CB) #Bind Events to Pokemon ListCtrl self.Bind(wx.EVT_LIST_ITEM_FOCUSED, self.set_information, self.LC) self.Bind(wx.EVT_LIST_KEY_DOWN, self.on_keyboard_list, self.LC) self.Bind(wx.EVT_LIST_ITEM_RIGHT_CLICK, self.track, self.LC) self.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.track, self.LC) self.Bind(wx.EVT_TEXT_ENTER, self.track, self.input) #Resize and Refresh the list self.SetSizer(mainbox) mainbox.Fit(self) self.refresh('') def on_keyboard_search(self, event): if event.GetKeyCode() == wx.WXK_DOWN: if self.LC.GetItemCount(): self.LC.Focus(0) self.LC.SetFocus() elif event.GetKeyCode() == wx.WXK_UP: #Do Nothing pass elif event.GetKeyCode() == wx.WXK_TAB: self.CB.SetFocus() else: #Handle event default event.Skip() def on_keyboard_list(self, event): if event.GetKeyCode() == wx.WXK_UP: index = self.LC.GetFocusedItem() if index == 0: self.input.SetFocus() elif event.GetKeyCode() == wx.WXK_TAB: self.input.SetFocus() else: #Handle event default event.Skip() def on_keyboard_combobox(self, event): if event.GetKeyCode() == wx.WXK_TAB: self.input.SetFocus() else: #Handle Event default event.Skip() def on_close(self, event): self.Destroy() def reset(self, array): self.p_filter = '' self.track_mode = 0 self.Tracked = PokeList() self.Pokedex = PokeList() self.Scrape = Scrape() self.Pokedex.add_all(array) def stay_on_top(self, event): if self.ToggleWindowStyle(flag=wx.STAY_ON_TOP): self.m_stayontop.SetItemLabel('Stay on Top [On]\tCtrl-S') else: self.m_stayontop.SetItemLabel('Stay on Top [Off]\tCtrl-S') def resizable(self, event): if self.ToggleWindowStyle(flag=wx.RESIZE_BORDER): self.m_resize.SetItemLabel('Resizable [On]\tCtrl-E') else: self.m_resize.SetItemLabel('Resizable [Off]\tCtrl-E') def track(self, event): if self.LC.GetItemCount(): index = self.LC.GetFocusedItem() if index == -1: index = 0 ListItem = self.LC.GetItem(index, 0) Name = ListItem.GetText() if self.track_mode: self.Tracked.remove(Name) filter = self.input.GetValue().strip().lower() self.refresh(filter) else: pokemon = self.Pokedex.get(Name) self.Tracked.add(pokemon) if self.LC.GetItemCount() == 0: self.input.SetFocus() def toggle_track_mode(self, event): if self.track_mode: self.m_track.SetItemLabel('Show Tracked\tCtrl-T') self.track_mode = 0 else: self.m_track.SetItemLabel('Show All\tCtrl-T') self.track_mode = 1 self.input.SetValue('') self.refresh('') def set_item_color(self, index, color1, color2): if index % 2: self.LC.SetItemBackgroundColour(index, color1) else: self.LC.SetItemBackgroundColour(index, color2) def set_items(self, set): self.LC.DeleteAllItems() for index, pokemon in enumerate(set): self.append_item(pokemon) self.set_item_color(index, 'pink', 'white') def add_items(self, set): column = self.CB.GetSelection() i, max_i = (0, self.LC.GetItemCount()) j, max_j = (0, len(set)) while True: if j == max_j: break elif i == max_i: self.insert_item(i, set[j]) max_i += 1 else: ListItem = self.LC.GetItem(i, column) ItemText = ListItem.GetText() if not set[j].get_name() == ItemText: self.insert_item(i, set[j]) max_i += 1 self.set_item_color(i, 'pink', 'white') i, j = (i + 1, j + 1) def refresh(self, filter): if self.track_mode: if filter == '': subset = self.Tracked.get_all() elif self.CB.GetSelection(): subset = self.Tracked.type_filter(filter) else: subset = self.Tracked.name_filter(filter) else: if filter == '': subset = self.Pokedex.get_all() elif self.CB.GetSelection(): subset = self.Pokedex.type_filter(filter) else: subset = self.Pokedex.name_filter(filter) if subset[0].get_name() == '???': self.LC.DeleteAllItems() elif self.p_filter in filter: self.set_items(subset) else: self.add_items(subset) self.p_filter = filter self.SB1.SetLabel(subset[0].get_name()) self.set_image(subset[0].get_image()) self.set_weaknesses(subset[0]) self.LC.Select(0, on=1) def insert_item(self, index, pokemon): self.LC.InsertItem(index, pokemon.get_name()) self.LC.SetItem(index, 1, pokemon.get_type()) self.LC.SetItem(index, 2, pokemon.get_total()) self.LC.SetItem(index, 3, pokemon.get_hp()) self.LC.SetItem(index, 4, pokemon.get_atk()) self.LC.SetItem(index, 5, pokemon.get_def()) self.LC.SetItem(index, 6, pokemon.get_spatk()) self.LC.SetItem(index, 7, pokemon.get_spdef()) self.LC.SetItem(index, 8, pokemon.get_spd()) def append_item(self, pokemon): self.LC.Append(pokemon.get()) def set_information(self, event): self.LC2.DeleteAllItems() ListItem = self.LC.GetItem(self.LC.GetFocusedItem(), 0) Name = ListItem.GetText() pokemon = self.Pokedex.get(Name) self.set_weaknesses(pokemon) self.SB1.SetLabel(pokemon.get_name()) self.set_image(pokemon.get_image()) def set_weaknesses(self, pokemon): self.LC2.DeleteAllItems() self.LC2.InsertItem(0, pokemon.get_quad()) self.LC2.SetItemBackgroundColour(0, self.colors[0]) self.LC2.InsertItem(1, pokemon.get_double()) self.LC2.SetItemBackgroundColour(1, self.colors[1]) self.LC2.InsertItem(2, pokemon.get_normal()) self.LC2.SetItemBackgroundColour(2, self.colors[2]) self.LC2.InsertItem(3, pokemon.get_half()) self.LC2.SetItemBackgroundColour(3, self.colors[3]) self.LC2.InsertItem(4, pokemon.get_fourth()) self.LC2.SetItemBackgroundColour(4, self.colors[4]) self.LC2.InsertItem(5, pokemon.get_immune()) self.LC2.SetItemBackgroundColour(5, self.colors[5]) self.LC2.resizeLastColumn(-1) def set_image(self, image): self.image.SetBitmap(image) def search(self, event): filter = self.input.GetValue().strip().lower() self.refresh(filter) def rescrape(self, event): self.Hide() self.mySplash = wx.adv.SplashScreen( app.myBitmap, wx.adv.SPLASH_NO_TIMEOUT | wx.adv.SPLASH_CENTER_ON_SCREEN, -1, None) self.mySplash.Show() self.all_data = self.Scrape.scrape() self.reset(self.all_data) self.refresh('') self.mySplash.Destroy() self.Show()
from scrape import Scrape from stats import Stats stats_list = [ "remaining_residence_plan_balance", "v1_cafe_times", "meal_expense_60days", "average_meal_expense_60days", "missed breakfasts and lunches" ] stats_table = {} for i in range(len(stats_list)): stats_table.update({stats_list[i]: 0}) data = Scrape("id", "pin") sources = [] sources = data.scrape() records = Stats(sources[0], sources[1], stats_table) records.form_transactions() records.get_residence_plan_balance() records.stats() print(stats_table) # print(pageSource)
[str(content).strip() for content in data.p.contents]) if i % 2 == 0 ]) if '(' in address: address = address[:address.index('(') - 1] return [name, address] def scrapeState(state): states.append(state) locations = sc.scrape(endpoint + state, 'div', attrs={'class': 'location'}) scraped = [parse_div(location) for location in locations] return scraped every_chickfila_in_the_country = [ scrapeState(state) for state in [ re.search(r"\((.*)\)", str(state))[0].replace('(', '').replace( ')', '').lower() for state in sc.scrape(endpoint, 'li') if state.a and state.a.has_attr('href') and '/locations/browse/' == state.a['href'][:len('/locations/browse/')] ] ] with open('every_chikfila_in_the_country.txt', 'w') as f: for i, state in enumerate(every_chickfila_in_the_country): for store in state: f.write("{}:{}:{}\n".format(states[i], store[0], store[1]))