def boi_rate(cls): url = [ 'https://www.boi.org.il/en/Markets/ExchangeRates/Pages/Default.aspx' ] driver = webdriver.Firefox( executable_path=r'project/firefox_driver/geckodriver.exe') for x in url: try: driver.get(x) soup = BeautifulSoup(driver.page_source, 'lxml') td = soup.find_all('td') list_rate = [] for z in td: #rate=z.find('td',{'data-wcag-dynamic-font':'true'}).text list_rate.append(z.text) if len(list_rate) > 1: Database().delete_from('rate') d_rate = list_rate[8] d_xrate = list_rate[9] p_rate = list_rate[14] p_xrate = list_rate[15] y_rate = list_rate[20] y_xrate = list_rate[21] e_rate = list_rate[26] e_xrate = list_rate[27] Database().insert_into('rate', d_rate, d_xrate, p_rate, p_xrate, y_rate, y_xrate, e_rate, e_xrate) except Exception as err: print(err) finally: driver.close()
def twitter_api(cls): config = cls.config() API_KEY = config['twitter'][0]['api_key'] API_SECRET_KEY = config['twitter'][0]['api_secret_key'] ACCESS_TOKEN = config['twitter'][0]['access_token'] ACCESS_TOKEN_SECRET = config['twitter'][0]['access_token_secret'] print(API_KEY, ACCESS_TOKEN_SECRET) twitter = Twython(API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) twitter = twitter.get_home_timeline() if twitter != None: Database().delete_from('twitter_timeline', None, None) x = 0 while x < len(twitter): try: created = twitter[x]['created_at'] text = twitter[x]['text'] link = twitter[x]['user']['url'] name = twitter[x]['user']['screen_name'] img = twitter[x]['user']['profile_image_url'] Database().insert_into('twitter_timeline', created, text, link, name, img) x += 1 except Exception as err: print(err)
def populer_machine(cls): url = ['https://www.popularmechanics.com/'] driver = webdriver.Firefox( executable_path=r'project/firefox_driver/geckodriver.exe') for z in url: try: driver.get(z) soup = BeautifulSoup(driver.page_source, 'lxml') link_item = soup.find_all('div', class_='full-item') dict_items = { 'link': [], 'img': [], 'title': [], 'content': [], 'author': [] } for x in link_item: link = x.find('a', class_='full-item-image item-image')['href'] img = x.find('span')['data-lqip'] img = img.split('?')[0] title = x.find('a', class_='full-item-title item-title').string content = x.find('div', class_='full-item-dek item-dek').text author = x.find('span', class_='byline-name').string dict_items['link'].append(link) dict_items['img'].append(img) dict_items['title'].append(title) dict_items['content'].append(content) dict_items['author'].append(author) if dict_items['link'] != None: Database().delete_from('popluer_machine') z = 0 while z < len(dict_items['link']): if z > 1 and z < 7: Database().insert_into('popluer_machine', str(dict_items['link'][z]), str(dict_items['img'][z]), str(dict_items['title'][z]), str(dict_items['content'][z]), str(dict_items['author'][z])) z += 1 except Exception as err: print(err) finally: driver.close()
def youtube_scrap(cls): url = [ 'https://www.youtube.com/user/schafer5/videos', 'https://www.youtube.com/user/TechGuyWeb/videos', 'https://www.youtube.com/channel/UCnxGkOGNMqQEUMvroOWps6Q/videos' ] Database().delete_from('yt_channels') for z in url: try: driver = webdriver.Firefox( executable_path=r'project/firefox_driver/geckodriver.exe') driver.get(z) soup = BeautifulSoup(driver.page_source, 'lxml') link = soup.find_all('a', id='video-title') x = 0 yt_dict = {'title': [], 'views': [], 'href': []} while x < len(link): temp = link[x].get('aria-label') if temp != None and temp != '': yt_dict['title'].append(temp.split('by')[0]) yt_dict['views'].append(temp.split('by')[1]) href = link[x].get('href') if href != None and href != '': href = href.split('=')[1] yt_dict['href'].append(href) x += 1 if yt_dict != None: i = 0 while i < 5: Database().insert_into( 'yt_channels', None, yt_dict['title'][i], yt_dict['views'][i], yt_dict['href'][i], ) i += 1 except Exception as err: print(err) finally: driver.close()
def imdb_scrap(cls): url = 'https://www.imdb.com/?ref_=nv_home' #try: r = requests.get(url) if r.status_code == 200: Database().delete_from('imdb_movie', None, None) soup = BeautifulSoup(r.text, 'lxml') link = soup.find_all('div', class_='title') x = 0 while x < 3: film = (link[x].text) Api().imdb_api(film) x += 1
def login(): loginForm = LoginForm() #e=bcrypt.generate_password_hash('*****@*****.**').decode('utf-8') #p=bcrypt.generate_password_hash('orange_boom').decode('utf-8') user_db = Database().select_user() #print(user_db[0][2]) #print(loginForm.email.data) if loginForm.validate_on_submit(): if user_db[0][1] == loginForm.username.data: if bcrypt.check_password_hash( user_db[0][2], loginForm.email.data) and bcrypt.check_password_hash( user_db[0][3], loginForm.password.data): return redirect(url_for('index')) else: return redirect(url_for('login')) return render_template('login.html', loginForm=loginForm)
def imdb_api(cls, movie): config = cls.config() config = config['omdb'] url = f'http://www.omdbapi.com/?apikey={config}&t={movie}' try: r = requests.get(url) if r.status_code == 200: data = json.loads(r.text) title = data['Title'] year = data['Year'] runtime = data['Runtime'] genre = data['Genre'] director = data['Director'] actors = data['Actors'] plot = data['Plot'] poster = data['Poster'] rating = data['imdbRating'] production = data['Production'] Database().insert_into('imdb_movie', title, year, runtime, genre, director, actors, plot, poster, rating, production) except Exception as err: print(err)
def index(): user = session.get('user') defImg = url_for('static', filename='img/def.png') logoutForm = LogoutForm(prefix='A') wikiForm = WikiForm(prefix='B') eventForm = EventForm(prefix='C') delForm = DelForm(prefix='E') googleForm = GoogleForm(prefix='D') #FROM HANDLER if logoutForm.validate_on_submit() and logoutForm.logout.data: session.pop(logoutForm.username.data, None) return redirect(url_for('login')) if googleForm.validate_on_submit() and googleForm.submit_g.data: Scrap().google_query(googleForm.query_g.data) return redirect(url_for('index')) if wikiForm.validate_on_submit() and wikiForm.submit.data: res = Api().wiki_search(wikiForm.query.data) if type(res) == list: Database().delete_from('wiki_search', None, None) Database().insert_into('wiki_search', res[0], user, res[2], res[1], res[3]) return redirect(url_for('index')) if eventForm.validate_on_submit() and eventForm.save_e.data: Database().insert_into('events', user, eventForm.event.data, eventForm.hour.data, eventForm.discreption.data, eventForm.type_e.data, eventForm.date.data) #flush return redirect(url_for('index')) if delForm.validate_on_submit() and delForm.submitDel.data: Database().delete_from('events', user, delForm.delE.data) return redirect(url_for('index')) #MYSQL SELECT rate = Database().select_from('rate') events = Database().select_from('events', 'username', user) wikiQuery = Database().select_from('wiki_search', 'user', user) if len(wikiQuery) < 1: res = Api().wiki_search('internet') Database().insert_into('wiki_search', res[0], user, res[2], res[1], res[3]) wikiQuery = Database().select_from('wiki_search', None, None) imdb = Database().select_from('imdb_movie') if len(imdb) < 1: Database().insert_into('imdb_movie', None, None, None, None, None, None, None, None, None, None) imdb = Database().select_from('imdb_movie') twitter = Database().select_from('twitter_timeline') if len(twitter) < 1: Database().insert_into('twitter_timeline', None, None, None, None, None) twitter = Database().select_from('twitter_timeline') science_news = Database().select_from('science_news') if len(science_news) <= 1: x = 0 while x <= 4: Database().insert_into('science_news', None, defImg, None, None, None, None) x += 1 science_news = Database().select_from('science_news') popluer_machine = Database().select_from('popluer_machine') if len(popluer_machine) < 1: x = 0 while x <= 4: Database().insert_into('popluer_machine', None, defImg, None, None, None) x += 1 popluer_machine = Database().select_from('popluer_machine') youtube = Database().select_from('yt_channels') if 'user' in session: user = session.get('user') return render_template('index.html', user=user, googleForm=googleForm, logoutForm=logoutForm, rate=rate, events=events, science_news=science_news, popluer_machine=popluer_machine, wikiForm=wikiForm, wikiQuery=wikiQuery, eventForm=eventForm, delForm=delForm, imdb=imdb, twitter=twitter, youtube=youtube) return redirect(url_for('login'))
def science_scrap(cls): url = ['https://www.scientificamerican.com/'] driver = webdriver.Firefox( executable_path=r'project/firefox_driver/geckodriver.exe') try: for z in url: driver.get(z) soup = BeautifulSoup(driver.page_source, 'lxml') #grid=soup.find('div',class_='listing-wide__thumb') grid = soup.find_all('div', class_='grid__col large-up-1 medium-1-2') h3 = soup.find_all('div', class_='listing-wide__inner') p = soup.find_all('p', class_='t_body listing-wide__inner__desc') meta = soup.find_all('div', class_='t_meta') science_dict = { 'source': [], 'img': [], 'link': [], 'title': [], 'text': [], 'meta': [] } for x in grid: source = x.find('source')['srcset'] img = x.find('img')['src'] science_dict['source'].append(source) science_dict['img'].append(img) for x in h3: link = x.find('a')['href'] title = x.find('a').string science_dict['link'].append(link) science_dict['title'].append(title) for x in p: text = x.string science_dict['text'].append(text) for x in meta: meta = x.string science_dict['meta'].append(meta) if science_dict['source'] != None: Database().delete_from('science_news') x = 0 while x < 5: Database().insert_into('science_news', str(science_dict['source'][x]), str(science_dict['img'][x]), str(science_dict['link'][x]), str(science_dict['title'][x]), str(science_dict['text'][x]), str(science_dict['meta'][x])) x += 1 except Exception as err: print(err) finally: driver.close()