def __init__(self, verbose=False): if verbose: print("Initializing Scraper") self.verbose = verbose self.parser = Parser(verbose) self.persister = Persister()
def OnAddedToSpace(self, ballpark, dbspacecomponent): persister = Persister(ballpark.solarsystemID, self.itemID, dbspacecomponent) bountyEscrowBonus, bounties = persister.GetStateForSystem() iskRegistry = IskRegistry(bounties) iskMover = IskMover(ballpark.broker.account) itemCreator = GetItemCreator(ballpark.inventory2, ballpark, self.attributes.tagTypeIDs.keys()) escrow = Escrow(self, ballpark, iskRegistry, iskMover, itemCreator, persister) item = ballpark.inventory2.GetItem(self.itemID) eventLogger = EventLogger(ballpark.broker.eventLog, ballpark.solarsystemID, item.ownerID, self.itemID) notifier = Notifier(ballpark.broker.notificationMgr) self.rangeNotifier = RangeNotifier(ballpark.solarsystemID, ballpark, ballpark.broker.machoNet, self.GetWallclockTime) ballpark.proximityRegistry.RegisterForProximity( self.itemID, 30000, self.rangeNotifier.PlayerInRange) lock = Lock(self) self.warpScrambler = WarpScrambler(self.itemID, lock, ballpark.dogmaLM) self.Initialize(ballpark, escrow, lock, persister, eventLogger, notifier) self.escrow.SetBonus(bountyEscrowBonus)
class Scraper(): OPEN_URL_ONE = "http://games.crossfit.com/scores/leaderboard.php?stage=0&sort=0&page=" OPEN_URL_TWO = "&division=1®ion=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year=" OPEN_URL_THREE = "&full=0&showtoggles=1&hidedropdowns=1&showathleteac=0&=&is_mobile=&scaled=0&fittest=1&fitSelect=0" ATHLETE_URL = "http://games.crossfit.com/athlete/" # Start Region Public Methods def scrape_athletes(self, starting_id = 0): athlete_ids = self.persister.get_athlete_ids() for athlete_id in athlete_ids: self.scrape_athlete(athlete_id) def scrape_open(self, start = 1, end = 50): while start <= end: url = self.get_crossfit_games_url(str(start)) self.scrape_open_url(url) start = start + 1 # End Region Public Methods # Start Region Private Methods def __init__(self, verbose=False): if verbose: print("Initializing Scraper") self.verbose = verbose self.parser = Parser(verbose) self.persister = Persister() def scrape_athlete(self, athlete_id): if self.verbose: print("Scraping athlete " + str(athlete_id)) response = requests.get(self.ATHLETE_URL + str(athlete_id)) response.raise_for_status() athlete = self.parser.parse_athlete(athlete_id, response) self.persister.persist_athlete(athlete) def scrape_open_url(self, url): response = requests.get(url) response.raise_for_status() event_data = self.parser.parse_open(response) self.persister.persist_events(event_data) def get_crossfit_games_url(self, page="1", year="16"): url = self.OPEN_URL_ONE + page + self.OPEN_URL_TWO + year + self.OPEN_URL_THREE if self.verbose: print("Getting URL:\n" + url + "\n") return url # End Region Private Methods
class Scraper(): OPEN_URL_ONE = "http://games.crossfit.com/scores/leaderboard.php?stage=0&sort=0&page=" OPEN_URL_TWO = "&division=1®ion=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year=" OPEN_URL_THREE = "&full=0&showtoggles=1&hidedropdowns=1&showathleteac=0&=&is_mobile=&scaled=0&fittest=1&fitSelect=0" ATHLETE_URL = "http://games.crossfit.com/athlete/" # Start Region Public Methods def scrape_athletes(self, starting_id=0): athlete_ids = self.persister.get_athlete_ids() for athlete_id in athlete_ids: self.scrape_athlete(athlete_id) def scrape_open(self, start=1, end=50): while start <= end: url = self.get_crossfit_games_url(str(start)) self.scrape_open_url(url) start = start + 1 # End Region Public Methods # Start Region Private Methods def __init__(self, verbose=False): if verbose: print("Initializing Scraper") self.verbose = verbose self.parser = Parser(verbose) self.persister = Persister() def scrape_athlete(self, athlete_id): if self.verbose: print("Scraping athlete " + str(athlete_id)) response = requests.get(self.ATHLETE_URL + str(athlete_id)) response.raise_for_status() athlete = self.parser.parse_athlete(athlete_id, response) self.persister.persist_athlete(athlete) def scrape_open_url(self, url): response = requests.get(url) response.raise_for_status() event_data = self.parser.parse_open(response) self.persister.persist_events(event_data) def get_crossfit_games_url(self, page="1", year="16"): url = self.OPEN_URL_ONE + page + self.OPEN_URL_TWO + year + self.OPEN_URL_THREE if self.verbose: print("Getting URL:\n" + url + "\n") return url
def run(self): logger.info('starting skyline analyzer') pid = getpid() Analyzer(pid).start() Persister(pid).start() while 1: sleep(100)
def __init__(self, user_id): """ Initialize a Presenter object to read and process data. Params: None Return: None """ self.user_id = user_id self.persister = Persister(self.user_id) self.result = self.persister.read() # init self.config = self.result.get('config') self.data = self.result.get('data') self.TF_data = self.data.get('top_favorites') self.LF_data = self.data.get('latent_factors') self.NN_data = self.data.get('nearest_neighbours')
i += 1 pass config_file = os.path.join(statedir, config_file) config = Config(basedir = statedir, plugin_basedir = '.') try: config.load(config_file) except ConfigError, err: print >>sys.stderr, "In "+config_file+":" print >>sys.stderr, err return 1 if verbose: config["verbose"] = True persister = Persister(os.path.join(statedir, "state"), Rawdog, locking) try: rawdog = persister.load() except KeyboardInterrupt: return 1 except: print "An error occurred while reading state from " + statedir + "/state." print "This usually means the file is corrupt, and removing it will fix the problem." return 1 if not rawdog.check_state_version(): print "The state file " + statedir + "/state was created by an older" print "version of rawdog, and cannot be read by this version." print "Removing the state file will fix it." return 1
from applog import logger TEACHER_ID = -1 RH3K_ID = -2 OK = 'OK' DISCONNECTED = 'disconnected' STUDENT_NS = '/student' TEACHER_NS = '/teacher' ALL_NS = (TEACHER_NS, STUDENT_NS) names: List[str] = [] stations: List[Dict[str, Any]] = [{} for i in range(settings['columns'] * settings['rows'])] teacher_password = '' # Change this authenticated = False persister = Persister() app = Flask(__name__) app.config['SECRET_KEY'] = 'secret!' socketio = SocketIO(app, ping_interval=20) # A bit more frequent than the default of 25, to try to avoid timeouts causing disconnections @app.route('/') def index(): r = request seat_index = persister.seat_indexes_by_ip.get(r.remote_addr, -1) logger.info(f'Student page requested from {r.remote_addr} (last seat index: {seat_index})') return render_template('student.html', settings=json.dumps(settings), names=names, lastSeatIndex=seat_index) @app.route('/teacher')
from geoetl import GeoEtl from persister import Persister if __name__ == "__main__": getl = GeoEtl() getl.obtainData() getl.getDataKpis() persist = Persister(getl.dataframe) print("Data de paises preparada satisfactoriamente")
""" build_data.py - Build data Author: Hoanh An ([email protected]) Date: 04/27/18 """ from persister import Persister from config import * if __name__ == "__main__": for i in range(MIN_ID, MAX_ID): persister = Persister(i) persister.write() print(persister.read())
print "No " + statedir + " directory" return 1 sys.path.append(".") config = Config() try: config.load("config") except ConfigError, err: print >>sys.stderr, "In config:" print >>sys.stderr, err return 1 if verbose: config["verbose"] = True persister = Persister("state", Rawdog, locking) try: rawdog = persister.load(no_block = no_lock_wait) if rawdog is None: return 0 except KeyboardInterrupt: return 1 except: print "An error occurred while reading state from " + statedir + "/state." print "This usually means the file is corrupt, and removing it will fix the problem." return 1 if not rawdog.check_state_version(): print "The state file " + statedir + "/state was created by an older" print "version of rawdog, and cannot be read by this version." print "Removing the state file will fix it."
class Presenter(): def __init__(self, user_id): """ Initialize a Presenter object to read and process data. Params: None Return: None """ self.user_id = user_id self.persister = Persister(self.user_id) self.result = self.persister.read() # init self.config = self.result.get('config') self.data = self.result.get('data') self.TF_data = self.data.get('top_favorites') self.LF_data = self.data.get('latent_factors') self.NN_data = self.data.get('nearest_neighbours') def process(self, data): """ Parse string to URL scheme. Params: data <list>: List of data to process Return: List of workable string endpoints. """ endpoints = [] for item in data: # Chop off unnecessary part to get a broader search result item = item.split('(')[0] # Parse URL query = urllib.parse.quote(item) endpoint = MOVIE_SEARCH_ENDPOINT + query + '&page=1' endpoints.append(endpoint) return endpoints def get_config(self): """ Get configurations. Params: None Return: Configuration data in dictionary format. """ return self.config def get_data(self, method): """ Get trained data for a specific method. Params: method <str>: Method to get Return: Data in dictionary format. """ if method == 'TF': data = self.TF_data elif method == 'NN': data = self.NN_data elif method == 'LF': data = self.LF_data return data def get_endpoints(self, method): """ Get processed endpoints for a specific method. Params: method <str>: Method to get Return: List of workable string endpoints. """ endpoints = [] if method == 'TF': data = self.TF_data elif method == 'NN': data = self.NN_data elif method == 'LF': data = self.LF_data for item in self.process(data): endpoints.append(item) return endpoints def get_posters(self, method, n): """ Get posters with titles and links for a specific method. Params: method <str>: Method to get n <int>: Number of returned posters Return: List of poster objects with titles and links. """ poster_paths = [] for endpoint in self.get_endpoints(method): poster_path_r = requests.get(endpoint) first_result = json.loads(poster_path_r.text).get('results') try: first_result = json.loads(poster_path_r.text).get('results')[0] except Exception as e: print(e) first_result = "" if first_result != "": # pprint(first_result) original_title = first_result.get('original_title') poster_path = first_result.get('poster_path') poster = 'https://image.tmdb.org/t/p/w500{}'.format( poster_path) else: poster = "" poster_paths.append(poster) titles = self.get_data(method) posters = [] for i in range(len(titles)): posters.append({'title': titles[i], 'link': poster_paths[i]}) return posters[:n]
def upgrade(olddir, newdir): """Given a rawdog 1.x state directory and a rawdog 2.x state directory, copy the ordering information from the old one into the new one. Since rawdog 2.0 mangles articles in a slightly different way, this needs to do approximate matching to find corresponding articles.""" print "Importing state from " + olddir + " into " + newdir print "Loading old state" f = open(olddir + "/state") oldrawdog = pickle.load(f) print "Loading new state" os.chdir(newdir) persister = Persister("state", Rawdog) newrawdog = persister.load() print "Copying feed state" oldfeeds = {} newfeeds = {} for url, oldfeed in oldrawdog.feeds.items(): if newrawdog.feeds.has_key(url): last_update = oldfeed.last_update print "Setting feed", url, "last update time to", format_time(last_update) newrawdog.feeds[url].last_update = last_update oldfeeds[url] = {} newfeeds[url] = {} else: print "Old feed", url, "not in new state" print "Copying article state" # Seperate out the articles by feed. for oldhash, oldarticle in oldrawdog.articles.items(): if oldfeeds.has_key(oldarticle.feed): oldfeeds[oldarticle.feed][oldhash] = oldarticle for newhash, newarticle in newrawdog.articles.items(): if newfeeds.has_key(newarticle.feed): newfeeds[newarticle.feed][newhash] = newarticle # Now fuzzily match articles. for url, oldarticles in oldfeeds.items(): for newhash, newarticle in newfeeds[url].items(): matches = [] for oldhash, oldarticle in oldarticles.items(): score = 0 olink = oldarticle.link nlink = newarticle.entry_info.get("link") if olink is not None and nlink is not None and olink == nlink: score += 1 otitle = oldarticle.title ntitle = newarticle.entry_info.get("title") if otitle is not None and ntitle is not None and approximately_equal(otitle, ntitle): score += 1 odesc = oldarticle.description ndesc = newarticle.entry_info.get("description") if odesc is not None and ndesc is not None and approximately_equal(odesc, ndesc): score += 1 matches.append((score, oldhash)) matches.sort() if matches != [] and matches[-1][0] > 1: oldhash = matches[-1][1] oldarticle = oldarticles[oldhash] newarticle.sequence = oldarticle.sequence newarticle.last_seen = oldarticle.last_seen newarticle.added = oldarticle.added print "Matched new", newhash, "to old", oldhash, "in", url else: print "No match for", newhash, "in", url print "Saving new state" newrawdog.modified() persister.save() print "Done"