def load(): try: with open('restaurants.pkl') as file: file.close() pass except IOError as e: main() restaurants = pickle.load(open('restaurants.pkl', 'rb')) getItems.restaurants = restaurants getRestaurants.restaurants = restaurants getMenu.restaurants = restaurants
def init_get_data(): global DATA data = {} # try: scraper.main() # except: # print('data not updated') try: with open('./series.json', 'r') as f: text = f.read() data = json.loads(text)['data'] f.close() except: print('No data found.') DATA = data
def main(): scraper.main() try: file = open("output.csv") reader = csv.reader(file) if len(list(reader)) > 1: send_results.main() print("Sent results") else: print("No results to send") except: pass
def scrape_service_update(context): ids = scraper.main() print(ids) notification.send_notifications(context, ids["resolved"], "resolved") notification.send_notifications(context, ids["new"], "new") notification.send_notifications(context, ids["updated"], "updated")
def addCourse(url, session=None, major_id=None): if session is None: Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() if len(url) == 0: return course = scraper.main([url]) if len(course.lectures) == 0: return # Create course print "Creating course: " + course.title c = Course(name=course.title, description=course.description) session.add(c) session.commit() # Add to major if major_id is not None: rel = MajorCourse(major_id=major_id, course_id=c.course_id) session.add(rel) # Add Lectures for lecture in course.lectures: task = Task(course_id=c.course_id, name=lecture[0], url=lecture[1] ) session.add(task) session.commit()
def home(request): title = "Title Goes Here" Search = SearchBoxForm(request.POST or None) score = None polarity = None subjectivity = None buzzwords = None queried = False numberOfComments = 0 if Search.is_valid(): print "CALCULATE SUBREDDIT SCORE" title = Search.cleaned_data.get('Peek') score, buzzwords, polarity, subjectivity, numberOfComments = scraper.main( title) title = "/r/" + title print score queried = True Search.save() context = { "title": title, "searchBar": Search, "results": score, "polarity": polarity, "subjectivity": subjectivity, "queried": queried, "buzzwords": buzzwords, "numberOfComments": numberOfComments } return render(request, "home.html", context)
def checkAvtivatie(versleutelBestand, ww, timer, tijd, sleutel): #deze variabel slaat de inhoud van de tweet op de aangegeven tijdstip in de csv betand bericht = tweet.main(gbr, ww, gebruiker, path) berichten = bericht # deze loop controleerd of er een tweet is gevond op de account door te kijken in de csv bestand if not berichten: #als er geen bericht is gevonden ... timer = timer - 30 sleep( 20 ) # er word een pauze ingelast van 20 seconde zodat twitter het account niet blockeerd if timer == 0: #... en als de timer is afgegaan , start de versleuteling versleutel(versleutelBestand, ww) else: #zo niet check net zolanf to de timer is gestopt of er een tweet met de key is verstuurd print( "Er zijn geen tweets gevonden. Heb gedult er moet nog gecheck worden ..." ) checkAvtivatie(versleutelBestand, ww, timer) else: # als er wel een tweet is met de key als inhoud stop het verleutelingsprogramma. print(berichten[tijd]) if berichten[tijd] == sleutel: print( "Het versleutelmeganisme is uitgeschakeld - overgeschakeld op exit mode" ) exit() else: checkAvtivatie(versleutelBestand, ww, timer, tijd, sleutel) print("okey")
def home(request): title = "Title Goes Here" Search = SearchBoxForm(request.POST or None) score = None polarity = None subjectivity = None buzzwords = None queried = False numberOfComments = 0 if Search.is_valid(): print "CALCULATE SUBREDDIT SCORE" title = Search.cleaned_data.get('Peek') score, buzzwords, polarity, subjectivity, numberOfComments = scraper.main(title) title = "/r/" + title print score queried = True Search.save() context = { "title": title, "searchBar": Search, "results": score, "polarity": polarity, "subjectivity": subjectivity, "queried": queried, "buzzwords": buzzwords, "numberOfComments": numberOfComments } return render(request, "home.html", context)
def addCourses(page, session=None): if session is None: Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() print "Locating course pages" coursesToAdd = scraper.getAllCoursePages(page) print "Locating course lecture videos" #major_id = majors.index("Computer Science") + 1 course_id = 1 for url in coursesToAdd: course = scraper.main([url]) if len(course.lectures) == 0: continue print "Creating course: " + course.title c = Course(name=course.title, description=course.description) session.add(c) #rel = MajorCourse(major_id=2, course_id=course_id) #session.add(rel) for lecture in course.lectures: task = Task(course_id=course_id, name=lecture[0], url=lecture[1] ) session.add(task) course_id += 1 session.commit()
def main(key=None, refresh=False): scrap_dict = scraper.main(refresh=refresh) # print(result_list) model = WhiteFishModel(scrap_dict) model.commit_to_SQL() # print("committed to SQL") # model.get_recent_SQL("example.sqlite") return model
def main(): print('To skip a step, press enter without typing "yes".') answer = input('Enter yes to overwrite or create the database.') if 'yes' == answer: create_yaml_lists.main() else: print('We will skip this and continue with the next step:') answer = input('Enter yes to start building a web scraper for images.') if 'yes' == answer: scraper.main() else: print('We will skip this and continue with the next step:') answer = input('Enter yes to check the new product names.') if 'yes' == answer: check_model_names.main() else: print('We will skip this and continue with the next step:') answer = input('Enter yes to check for annotations without metadata.') if 'yes' == answer: check_annotations.main() else: print('We will skip this and continue with the next step:') answer = input('Enter yes to remove images with duplicates in image URLs.') if 'yes' == answer: remove_images_with_URL_duplicates.main() else: print('We will skip this and continue with the next step:') answer = input( 'Enter yes to remove images with annotations and URL specified by filename of the image.' ) if 'yes' == answer: remove_annotated_images.main() else: print('We will skip this and continue with the next step:') answer = input('Enter yes to build tfrecord with data augmentation.') if 'yes' == answer: tfrecord_pipeline.main() else: print('We skipped this.')
def main(): pp1, score_date2, user_name1 = scraper.main() pp1, score_date2 = zip(*sorted(zip(score_date2, pp1))) # sorts dates and pp # print(pp1) # print(score_date2) set_sizes() plot_graph(pp1, score_date2, user_name1)
def create(): objects = main() db.create_all() for i in objects: try: newProj = Project(i.title, i.des, i.link, i.image, i.source) db.session.add(newProj) db.session.commit() except IntegrityError: db.session.rollback()
def main(topteams=False, stop_short='99999999', period=7): ''' By default, this updates the data by scraping games through yesterday and returns an elo simulation run on the latest data It includes several options: - output the x 'topteams' by elo rating along with each teams projected point spread over the next team and their change in elo in the last 'period' days - 'stop_short' of simulating through the entire dataset by specifying a day to simulate through instead ''' filepath = utils.get_latest_data_filepath() yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y%m%d') if filepath[len(DATA_FOLDER):][-12:-4] != yesterday: print('updating data...') scrape_start = utils.shift_dstring(filepath[len(DATA_FOLDER):][-12:-4], 1) scraper.main(filepath[len(DATA_FOLDER):][0:8], scrape_start, yesterday, filepath) filepath = utils.get_latest_data_filepath() data = utils.read_csv(filepath) this_sim = sim(data, K_FACTOR, SEASON_CARRY, HOME_ADVANTAGE, stop_short, period) if topteams != False: output = pd.DataFrame( this_sim.get_top(int(topteams)), columns=['Team', 'Elo Rating', '%i Day Change' % period]) output['Point Spread vs. Next Rank'] = [ "{0:+.1f}".format( ((output['Elo Rating'][i] - output['Elo Rating'][i + 1]) / ELO_TO_POINTS_FACTOR)) for i in range(topteams - 1) ] + [''] output['Rank'] = [i for i in range(1, topteams + 1)] utils.table_output( output, 'Ratings through ' + this_sim.date + ' - Top ' + str(topteams), [ 'Rank', 'Team', 'Elo Rating', 'Point Spread vs. Next Rank', '%i Day Change' % period ]) return this_sim
def handle_message(uid, message): cache = r.get('CACHE:' + uid) if cache is not None: app.logger.info('cache hit for uid:{}'.format(uid)) send_message(uid, 'Cache Hit' + str(cache)) else: details = r.get(uid) details = json.loads(details) app.logger.info('scraping for uid:{}'.format(uid)) resp = scraper.main(details['regno'], details['password']) app.logger.info('setting cache for uid:{}'.format(uid)) r.setex('CACHE:' + uid, json.dumps(resp), 600) send_message(uid, 'Ran Scraper' + str(resp))
def get_results(): library = request.args.get("library") search_keywords = request.args.get("search_keywords") page = request.args.get("page") if library == "" or library is None: results = {"error": "cannot execute search without library input"} return jsonify(results) if search_keywords == "" or search_keywords is None: results = {"error": "cannot execute search without search keywords"} return jsonify(results) try: if page is None: results = main(library, search_keywords) else: results = main(library, search_keywords, page) except Exception as e: results = {"error": str(e)} return jsonify(results)
def home(): infoList = ['', ''] j = 0 for i in session.pop('mylist', []): infoList[j] = i j += 1 JSON, dining_transactions, schiller_transactions = scraper.main(infoList[0], infoList[1]) if not JSON: return redirect(url_for('login')) parseJSON = json.loads(JSON) dining_dollars = parseJSON["dining_dollars"] schillers = parseJSON["schillers"] guest_swipes = parseJSON["guest_meals"] if 'meals_week' in parseJSON: meals_left = parseJSON["meals_week"] else: meals_left = '' json_obj = JSON temp = 0 if int(guest_swipes) > 3: temp = meals_left meals_left = guest_swipes guest_swipes = temp if 'spending' in parseJSON: spending = parseJSON["spending"] else: spending = '' dining_transactions = "[{day: '%s', balance: %s}, {day: '%s', balance: %s}]" % (datetime.datetime.strftime(datetime.datetime.now() + datetime.timedelta(days=-1), '%Y-%m-%d %H:%M'), dining_dollars, time.strftime('%Y-%m-%d %H:%M'), dining_dollars) schiller_transactions = "[{day: '%s', balance: %s}, {day: '%s', balance: %s}]" % (datetime.datetime.strftime(datetime.datetime.now() + datetime.timedelta(days=-1), '%Y-%m-%d %H:%M'), schillers, time.strftime('%Y-%m-%d %H:%M'), schillers) if 'swipes' in parseJSON: swipes = parseJSON["swipes"] else: swipes = '' week, diningDollarBudget, dailyDiningBudget, laundryLeft, LDCCardSwipes, burtonCardSwipes = getASI(parseJSON) return render_template('index.html', dining= "$" + dining_dollars, meals=meals_left, schill="$" + schillers, guest=guest_swipes, spending=spending, swipes=swipes, laundry=laundryLeft, diningBudget=diningDollarBudget, dailyDiningBudget=dailyDiningBudget, ldc=LDCCardSwipes, burton=burtonCardSwipes, dining_transactions=dining_transactions, schiller_transactions=schiller_transactions)
def loadData(): eventsAndHorses = main() newDict = {} for i in eventsAndHorses: horseSents = {} for j in eventsAndHorses[i]: sents = mainAnalysis(j) horseSents[j] = sents print(horseSents) newDict[i] = horseSents print(newDict) with open(FILE, 'wb') as handle: pickle.dump(newDict, handle)
def main(): args = parse_input() log.info("INPUT: %s" % os.path.basename(args.PATH)) media_files = find_all_files(args.PATH, True) # True for subtitles, FIX for media_file in media_files: media_info = scraper.main(media_file) if 'episode' not in media_info: # Grab movie file and directory media_file = findfile(media_file['file']) # Replace defined Format with real values dst = rename(args.set, media_file, media_info) elif 'episode' in media_info: media_file = findfile(media_file['file']) # Replace defined Format with real values dst = rename(args.set, media_file, media_info) else: log.error("ERROR FINDING FILE.") # Update destination dst = os.path.join(args.output, dst) #print(media_file) # Execute changes exe_changes(media_file, dst, args.action, args.conflict)
"""Make tuple containing date from string (from google event json) Param str_: 'yyyy-mm-ddT.......' Return: tuple ({year}, {month}, {day})""" str_ = str_[:str_.index('T')] date = [int(a) for a in str_.split('-')] return tuple(date) if __name__ == '__main__': if not os.path.exists('token.pkl'): make_token() username = input('username: '******'password: '******'token.pkl', 'rb') as file: credentials = pickle.load(file) with open('schedule.json', 'r') as file: mydict = json.load(file) schedule = Schedule.from_dict(mydict) days_to_clear = schedule.days_updated service = build('calendar', 'v3', credentials=credentials) calendar_id = get_cal_id(service, 'schedule_migrater') event_ids = get_event_ids_by_dts(service, calendar_id, days_to_clear) if event_ids: for event_id in track( event_ids, description='deleting events from google calendar'):
def home(): return json.dumps(scraper.main(), indent=2)
import re import scraper class BatotoScraper(scraper.Scraper): feed_url = 'http://www.batoto.net/recent_rss' title_re = re.compile(r'(?P<series>.+?) - (?P<language>\w+) - ' r'(:?Vol.(?P<volume>\d+) )?Ch.(?P<chapter>\w+?):?(?P<chapter_title>.+)') result_groups = ('volume', 'series', 'chapter', 'chapter_title', 'language') def read_item(self, raw_item): item = super(BatotoScraper, self).read_item(raw_item) results = self.title_re.match(item['title']) for group in self.result_groups: item[group] = results.group(group) return item if __name__ == '__main__': scraper.main(None, scraper_class=BatotoScraper)
import finder import scraper import reader with open('log.txt', 'w', newline = '', encoding = 'UTF-8') as writer: writer.write("Master: START--START--START--START--START\n") print("Master: START--START--START--START--START") """ * Starting point, give it a real player ID * zsda123 is only a placeholder!!! * If it happens to be a real ID or your ID, I apologize for that! """ finder.main(writer, "zsda123") with open('userIdList.txt', 'r', newline = '', encoding = 'UTF-8') as f: userIdListStr = f.read() userIdList = userIdListStr.split() scraper.main(writer, userIdList) reader.main(writer) writer.write("Master: DONE--DONE--DONE--DONE--DONE\n") print("Master: DONE--DONE--DONE--DONE--DONE")
def refresh(self, sender): scraper.main() self.reload_data(None)
def main(event, context): scraper.main()
def test_parses_kanjipedia_EI(self): output = scraper.main() print(output) # assert kanji_string in output # self.assertEquals(output, "") self.assertIn("エイ", output)
def returnCalendar(): list = request.get_json(force=True) m = scraper.main(list) print(list) return m
from multiprocessing import freeze_support from scraper import main if __name__ == '__main__': freeze_support() main()
import scraper from requests import get from bs4 import BeautifulSoup as bs4 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from urllib.parse import urlparse from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import NoAlertPresentException import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException import re info = scraper.main() url = 'https://zoom.us/join' scraper.driver.get(info["url"]) time.sleep(5) try: WebDriverWait(scraper.driver, 3).until( EC.alert_is_present(), 'Timed out waiting for PA creation ' + 'confirmation popup to appear.') scraper.driver.switch_to.alert().accept() except NoAlertPresentException as e: print("no alert")
import scraper import gsheets_writer if __name__ == "__main__": scraper.main() gsheets_writer.main()
def setUp(self): self.n_tags, self.top_five = scraper.main(verbose=False) self.top_five_tags = [x[0] for x in self.top_five] self.top_five_counts = [x[1] for x in self.top_five]
IDEntry = Entry(root) IDEntry.grid(row=4, column=2, sticky=E) # Remove ID removeButton = Button(root, text="Remove", fg="purple", command=lambda: removeID()) removeButton.grid(row=4, column=3) # Scrape scrapeButton = Button(root, text="Scrape", fg="orange", command=lambda: scrap.main()) scrapeButton.grid(row=5, column=1) # Refresh CSV File refreshButton = Button(root, text="Refresh", fg="green", command=lambda: c.resetIDs()) refreshButton.grid(row=5, column=2) # Quit quitButton = Button(root, text="Quit", command=root.quit) quitButton.grid(row=5, column=3) root.mainloop()
def retrieve_data(): text = request.form["selectcountries"] return render_template("postform.html", text=text, data=scraper.main(text))
def hello_world(): link,ver = scraper.main() return jsonify(link=link, version=ver)
def main(): scraper.main()
) session.add(newMajor) # Add users admin = User(name="John Sutton", email="*****@*****.**", picture="https://lh4.googleusercontent.com/-C6cSzCA5-Bw/AAAAAAAAAAI/AAAAAAAACVo/OrC0MgMptnI/photo.jpg", isAdmin=True) session.add(admin) # Add courses print "Locating course pages" coursesToAdd = scraper.getAllCoursePages() print "Locating course lecture videos" major_id = majors.index("Computer Science") + 1 course_id = 1 for url in coursesToAdd: course = scraper.main([url]) if len(course.lectures) == 0: continue print "Creating course: " + course.title c = Course(name=course.title, description=course.description) session.add(c) #rel = MajorCourse(major_id=2, course_id=course_id) #session.add(rel) for lecture in course.lectures: task = Task(course_id=course_id, name=lecture[0], url=lecture[1] ) session.add(task) course_id += 1
def scrape(): import scraper return scraper.main()
teamDict = mi.read('teamDict.json') newDict = {} for i in teamDict: listOfIDs = [] try: for j in rosterDict[i]['2017']: listOfIDs.append(j) newDict[teamDict[i]] = listOfIDs except: pass #mi.write('teamAndPlayerIDS.json', newDict) #pdb.set_trace() playerStatisticsDict = {} for i in newDict: #pdb.set_trace() urls = generateURLs(i, newDict[i]) start = time.time() elements = scraper.main(urls, newDict[i]) end = time.time() print(end - start) playerStatisticsDict[str(i)] = elements mi.write('finalPlayerStatsDict.json', playerStatisticsDict)
from datetime import date from dateutil.rrule import rrule, DAILY import scraper import parser a = date(2013, 3, 4) b = date(2013, 3, 5) if __name__ == '__main__': for dt in rrule(DAILY, dtstart=a, until=b): current_date = dt.strftime("%Y/%m/%d") print 'Scraping ' + current_date scraper.main(dt.strftime("%d/%m/%Y")) print 'Parsing ' + current_date parser.main(dt.strftime("%Y/%m/%d"))