def start_local_cluster() -> Client: global cluster global client if cluster is None: cluster = LocalCluster(n_workers=config.dask_workers) client = Client(cluster) log.info('Cluster info: {}'.format(str(cluster))) return client
def dl_dir(self): loc = self.status['loc'] web = self.status['web'] # mkdir dir_fullname = os.path.join(self.parent_dir, loc['name']) log.info(dir_fullname) if not os.path.exists(dir_fullname): os.mkdir(dir_fullname) # start a thread to watch the download process dir_thread = DirThread(self) dir_thread.start() for i in web['aaData']: p = pq(i[0]) value = p('input').attr('value') dl_type = p('input').attr('name') p = pq(i[1]) name = p('a').text() if 'file' in dl_type: url = p('a').attr('href') if loc.get(value) and loc[value]: log.info('file {} {} downloaded, skip it'.format( value, name)) else: lst = list(self.urlparsed) lst[3] = lst[4] = lst[5] = "" lst[2] = url ct_file = CtFile(urlunparse(lst), self.args, name, value, dir_fullname, self.s) file_thread = FileThread(ct_file) # get the semaphare before create thread r = False while not r: r = g_sem.acquire(timeout=1) if dir_thread.link_timeout: dir_thread.quit = True return False, DL_ERROR_FILELINKTIMEOUT file_thread.start() dir_thread.add(file_thread) elif 'folder' in dl_type: log.debug('{} {} is a folder'.format(value, name)) ct_subdir = CtDir(self.args, dir_fullname, value) ct_subdir.get_dir_list(True) success, error = ct_subdir.dl_dir() if not success: dir_thread.quit = True return success, error dir_thread.quit = True return True, None
def get_dir_list(self, get_dir_from_web): log.info('Get list for {}'.format(self.url)) if get_dir_from_web or not self.status.get('web'): loc = self.status['loc'] log.info('get list from website') headers = {'origin': self.urlparsed.netloc} # parameters params = { 'd': self.urlparsed.path.split('/')[-1], 'folder_id': self.urlparsed.query, 'passcode': '', 'r': str(random.random()), 'ref': '', } r = self.s.get(GET_DIR_URL, params=params, headers=headers) j = json.loads(r.text.encode().decode('utf-8-sig')) requests_debug(r) if j.get('code') == 404 or j.get('code') == 503: log.error('dl_dir_list error: {}, {}'.format( self.url, j.get('message'))) loc['name'] = j['folder_name'] loc['url'] = j['url'] # real url log.info('folder name: {}'.format(loc['name'])) log.info('folder url: {}'.format(loc['url'])) r = self.s.get(WEBAPI_HOST + loc['url']) self.status['web'] = json.loads(r.text) self.save_status()
async def on_ready(): """for guild in bot.guilds: if bot.user.id == guild.owner_id: await guild.delete() """ try: log.info( "Connected to Discord. Generating Nest messages and sending them." ) emote_refs = await get_emotes(bot, nesting_mons, config) for d, area in discord_message_data: """if len(config.emote_server) > 0: log.info("Createing emotes") server = await bot.fetch_guild(config.emote_server) for mon_id in [nest.mon_id for nest in [area.nests for area in full_areas][0]]: emote_name = f"m{mon_id}" image_url = config.icon_repo + f"pokemon_icon_{str(mon_id).zfill(3)}_00.png" image = requests.get(image_url).content emote = await server.create_custom_emoji(name=emote_name, image=image) emote_refs[mon_id] = emote.id""" channel = await bot.fetch_channel(d) found = False embed_dict, _ = area.get_nest_text(config, emote_refs) embed = discord.Embed().from_dict(embed_dict) async for message in channel.history(): if message.author == bot.user: embeds = message.embeds if len(embeds) > 0: if embeds[0].title == embed.title: found = True break if found: await message.edit(embed=embed) log.success( f"Found existing Nest message for {area.name} and edited it" ) else: await channel.send(embed=embed) log.success(f"Sent a new Nest message for {area.name}") """if len(emote_refs) > 0: log.info("Deleting emotes again") for emote_id in emote_refs.values(): emote = await server.fetch_emoji(emote_id) await emote.delete()""" except Exception as e: log.exception(e) await bot.logout()
def get_osm_data(bbox, date, osm_file_name): got_data = False while not got_data: free_slot = False while not free_slot: r = requests.get("http://overpass-api.de/api/status").text if "available now" in r: free_slot = True else: if "Slot available after" in r: rate_seconds = int( r.split(", in ")[1].split(" seconds.")[0]) + 15 log.warning( f"Overpass is rate-limiting you. Gonna have to wait {rate_seconds} seconds before continuing" ) time.sleep(rate_seconds) else: log.warning( "Had trouble finding out about your overpass status. Waiting 1 minute before trying again" ) time.sleep(60) log.info( "Getting OSM data. This will take ages if this is your first run.") osm_time_start = timeit.default_timer() nest_json = fetch_data(bbox, date) osm_time_stop = timeit.default_timer() seconds = round(osm_time_stop - osm_time_start, 1) if len(nest_json.get("elements", [])) == 0: log.error( f"Did not get any data from overpass in {seconds} seconds. This probably means that you were rate-limited by overpass. Sleeping 5 minutes and trying again.\nIf you want, you can share the below log entry in Discord" ) log.error(nest_json.get("remark")) time.sleep(60 * 5) else: got_data = True with open(osm_file_name, mode='w', encoding="utf-8") as osm_file: osm_file.write(json.dumps(nest_json, indent=4)) log.success( f"Done. Got all OSM data in {seconds} seconds and saved it.") return nest_json
def main(): global g_sem parser = argparse.ArgumentParser(description='Download from CTDisk.') parser.add_argument('-d', '--dir', help='download a directory') parser.add_argument('-f', '--file', help='download a file') parser.add_argument('-s', '--split', type=int, default=SPLIT_CNT, help='split a files to parts') parser.add_argument('-c', '--dl_cnt', type=int, default=DOWNLOAD_CNT, help='download files concorrent') args = parser.parse_args() g_sem = threading.Semaphore(args.dl_cnt) if args.dir: stop = False get_dir_from_web = False error = None ct_dir = CtDir(args) while not stop: if error == DL_ERROR_FILELINKTIMEOUT: get_dir_from_web = True else: get_dir_from_web = False ct_dir.get_dir_list(get_dir_from_web) success, error = ct_dir.dl_dir() if success: stop = True log.info('download finished') elif args.file: ct_file = CtFile(args.file, args) ct_file.dl() log.info('download finished')
async def get_emotes(bot, nesting_mons, config): try: with open("data/emotes.json", "r") as f: emote_servers = json.load(f) except (IOError, OSError): emote_servers = {} log.info("This seems to be your first run. Your bot will now create 1 server and fill it with emotes, so prepare for some wait time.") emotes = {} final_emotes = {} for server, data in emote_servers.items(): for monid, emoteid in data.items(): final_emotes[int(monid)] = int(emoteid) emotes[monid] = { "emote_id": int(emoteid), "server_id": int(server) } log.info("Comparing your bot's emotes to needed nesting mons.") for monid in nesting_mons: if monid in emotes.keys(): emotes.pop(monid) continue free_emotes = False for guild_id in emote_servers.keys(): guild = await bot.fetch_guild(guild_id) if len(guild.emojis) < 50: free_emotes = True break if not free_emotes: guild = await bot.create_guild("Nest Emotes") channel = await guild.create_text_channel("hello") invite = await channel.create_invite() emote_servers[guild.id] = {} log.info(f"Created Emote Server. Invite code: {invite.code}") emote_name = "m" + monid image_url = config.icon_repo + f"pokemon_icon_{monid.zfill(3)}_00.png" image = requests.get(image_url).content emote = await guild.create_custom_emoji(name=emote_name, image=image) emote_servers[guild.id][monid] = emote.id final_emotes[int(monid)] = emote.id for monid, data in emotes.items(): guild = await bot.fetch_guild(data["server_id"]) emote = await guild.fetch_emoji(data["emote_id"]) await emote.delete() with open("data/emotes.json", "w+") as f: f.write(json.dumps(emote_servers, indent=4)) return final_emotes
def analyze_nests(config, area, nest_mons, queries, reset_time, nodelete): OSM_DATE = osm_date() # Getting OSM/overpass data osm_file_name = f"data/osm_data/{area.name} {OSM_DATE.replace(':', '')}.json" try: with open(osm_file_name, mode="r", encoding="utf-8") as osm_file: nest_json = json.load(osm_file) except: nest_json = get_osm_data(area.bbox, OSM_DATE, osm_file_name) # Getting area data area_file_name = f"data/area_data/{area.name}.json" area_file_data = {} try: with open(area_file_name, mode="r", encoding="utf-8") as area_file: log.info("Found area data file. Reading and using data from it now") area_file_data_raw = json.load(area_file) for k, v in area_file_data_raw.items(): area_file_data[int(k)] = v except FileNotFoundError: pass """db_file_name = f"data/db_data/{area.name}.json" try: with open(db_file_name, mode="r", encoding="utf-8") as db_file: db_data = json.load(db_file) except FileNotFoundError: db_data = {}""" if not nodelete: queries.nest_delete(area.sql_fence) log.info(f"Got all relevant information. Searching for nests in {area.name} now") nodes = {} ways = [] relations = [] for element in nest_json['elements']: if not "type" in element: continue if element["type"] == "node": nodes[element["id"]] = { "lat": element["lat"], "lon": element["lon"] } elif element["type"] == "way": if "nodes" not in element and not element["nodes"]: continue ways.append(WayPark(element, config)) elif element["type"] == "relation": if "members" not in element and not element["members"]: continue relations.append(RelPark(element, config)) parks = ways + relations # Check Relations failed_nests = defaultdict(int) failed_nests["Total Nests found"] = 0 double_ways = [] start = timeit.default_timer() if config.less_queries: log.info("Getting DB data") all_spawns = [(str(_id), geometry.Point(lon, lat)) for _id, lat, lon in queries.spawns(area.sql_fence)] all_mons = queries.all_mons(str(tuple(nest_mons)), str(reset_time), area.sql_fence) all_mons = [(_id, geometry.Point(lon, lat)) for _id, lat, lon in all_mons] with Progress() as progress: #check_rels_task = progress.add_task("Generating Polygons", total=len(parks)) for park in relations: double_ways = park.get_polygon(nodes, ways, double_ways) #progress.update(check_rels_task, advance=1) for park in ways: park.get_polygon(nodes) #progress.update(check_rels_task, advance=1) for osm_id, data in area_file_data.items(): for connect_id in data["connect"]: for i, park in enumerate(parks): if park.id == osm_id: big_park = park big_park_i = i if park.id == connect_id: small_park = park small_park_i = i parks[big_park_i].connect.append(connect_id) parks[big_park_i].polygon = cascaded_union([big_park.polygon, small_park.polygon]) parks.pop(small_park_i) # NOW CHECK ALL AREAS ONE AFTER ANOTHER check_nest_task = progress.add_task("Nests found: 0", total=len(parks)) nests = [] for park in parks: progress.update(check_nest_task, advance=1, description=f"Nests found: {failed_nests['Total Nests found']}") if not park.is_valid: failed_nests["Geometry is not valid"] += 1 continue if not area.polygon.contains(park.polygon): failed_nests["Not in Geofence"] += 1 continue if park.id in double_ways: failed_nests["Avoiding double nests"] += 1 continue pokestop_in = None stops = [] if config.scanner == "rdm" and config.pokestop_pokemon: # Get all Pokestops with id, lat and lon for pkstp in queries.stops(park.sql_fence): stops.append(str(pkstp[0])) pokestop_in = "'{}'".format("','".join(stops)) if config.less_queries: spawns = [s[0] for s in all_spawns if park.polygon.contains(s[1])] else: spawns = [str(s[0]) for s in queries.spawns(park.sql_fence)] if not stops and not spawns: failed_nests["No Stops or Spawnpoints"] += 1 continue if (len(stops) < 1) and (len(spawns) < area.settings['min_spawnpoints']): failed_nests["Not enough Spawnpoints"] += 1 continue spawnpoint_in = "'{}'".format("','".join(spawns)) if spawnpoint_in == "''": spawnpoint_in = "NULL" # This will handle the SQL warning since a blank string shouldn't be used for a number if config.less_queries: mons = [s[0] for s in all_mons if park.polygon.contains(s[1])] if len(mons) == 0: failed_nests["No Pokemon"] += 1 continue most_id = max(set(mons), key=mons.count) poke_data = [most_id, mons.count(most_id)] else: poke_data = queries.mons(spawnpoint_in, str(tuple(nest_mons)), str(reset_time), pokestop_in) if poke_data is None: failed_nests["No Pokemon"] += 1 continue park.mon_data(poke_data[0], poke_data[1], area.settings['scan_hours_per_day'], len(spawns) + len(stops)) if park.mon_count < area.settings['min_pokemon']: failed_nests["Not enough Pokemon"] += 1 continue if park.mon_avg < area.settings['min_average']: failed_nests["Average spawnrate too low"] += 1 continue if park.mon_ratio < area.settings['min_ratio']: failed_nests["Average spawn ratio too low"] += 1 continue try: park.generate_details(area_file_data, failed_nests["Total Nests found"]) except TopologicalError: failed_nests["Geometry is not valid"] += 1 # Insert Nest data to db insert_args = { "nest_id": park.id, "name": park.name, "form": park.mon_form, "lat": park.lat, "lon": park.lon, "pokemon_id": park.mon_id, "type": 0, "pokemon_count": park.mon_count, "pokemon_avg": park.mon_avg, "pokemon_ratio": park.mon_ratio, "poly_path": json.dumps(park.path), "poly_type": 1 if isinstance(park, RelPark) else 0, "current_time": int(time.time()) } failed_nests["Total Nests found"] += 1 nests.append(park) queries.nest_insert(insert_args) stop = timeit.default_timer() log.success(f"Done finding nests in {area.name} ({round(stop - start, 1)} seconds)") for k, v in failed_nests.items(): log.info(f" - {k}: {v}") def sort_avg(nest): return nest.mon_avg new_area_data = {} for nest in sorted(nests, key=sort_avg, reverse=True): new_area_data[nest.id] = { "name": nest.name, "center": [nest.lat, nest.lon], "connect": nest.connect } for oid, data in area_file_data.items(): if oid not in [n.id for n in nests]: new_area_data[oid] = { "name": data["name"], "center": data["center"], "connect": data["connect"] } with open(area_file_name, mode="w+") as area_file: area_file.write(json.dumps(new_area_data, indent=4)) log.info("Saved area data") log.success(f"All done with {area.name}\n") return nests
import time from filters.barcode import get_collisions from loaders.file_provenance import load_file_provenance from tasks.check_barcodes import config from utils.logging import log ## load fpr start_time = time.time() log.info('Loading file provenance report from: {}'.format(config.fpr_path)) fpr = load_file_provenance(config.fpr_path) log.info( 'Completed loading file provenance report in {:.1f}s'.format(time.time() - start_time)) ## select only CASAVA lane_name + sample_name + workflow_run + barcode lanes = fpr.loc[(fpr['Workflow Name'] == "CASAVA") & (fpr['Sample Name'].notnull()), ['Lane Name', 'Sample Name', 'Workflow Run SWID', 'IUS Tag' ]].drop_duplicates() lanes.reset_index() start_time = time.time() collisions_df = get_collisions(lanes, config.collision_threshold, config.collision_operator) log.info('get_collisions completed in {:.1f}s'.format(time.time() - start_time)) log.info('Collisions (threshold = {}, operator = {}) = {}'.format( config.collision_threshold, str(config.collision_operator.__name__), len(collisions_df)))
def dl(self): log.info('download {}'.format(self.url)) # step 1 headers = { 'origin': self.urlparsed.netloc, } # parameters params = { 'f': self.url.split('/')[-1], 'passcode': '', 'r': str(random.random()), 'ref': '', } r = self.s.get(GET_FILE_URL1, params=params, headers=headers) j = json.loads(r.text) log.debug('step 1') requests_debug(r) # link error handler if j.get('code') == 404: log.error('dl_file error: {}'.format(j.get('message'))) if j.get('message') == DL_ERROR_FILELINKTIMEOUT: log.error('need get dir list again') return False, j.get('message') if not self.filename: self.filename = j['file_name'] # step 2 params = { 'uid': j['userid'], 'fid': j['file_id'], 'folder_id': 0, 'file_chk': j['file_chk'], 'mb': 0, 'app': 0, 'acheck': 1, 'verifycode': '', 'rd': str(random.random()) } while True: r = self.s.get(GET_FILE_URL2, params=params, headers=headers) j = json.loads(r.text) log.debug('step 2') requests_debug(r) if j.get('code') == 503: params['rd'] = str(random.random()) else: break # create an empty file filename = os.path.join(self.parent_dir, self.filename) filesize = int(j['file_size']) temp_filename = filename + '.ctdown' log.debug('create empty file {} size {}'.format( temp_filename, filesize)) with open(temp_filename, 'wb') as fd: fd.truncate(filesize) # donwload with thread threads = [] for i in range(self.args.split): start = i * filesize // self.args.split end = ( i + 1 ) * filesize // self.args.split - 1 if i != self.args.split - 1 else filesize t = SplitThread(i, j['downurl'].replace(r'\/', r'/'), params, headers, filename, start, end) log.debug('dl-{:03d} download range start={} end={}'.format( i + 1, start, end)) threads.append(t) t.start() # time.sleep(1) progressbar = tqdm.tqdm(total=filesize, desc=filename, ascii=' #', unit="B", unit_scale=True, unit_divisor=1024) downloaded_bytes = 0 last_downloaded_bytes = 0 download_success = True while downloaded_bytes < filesize: downloaded_bytes = 0 for t in threads: if t._status == DL_Thread_status.E404: log.error('dl-{:03d} download {} Fail'.format( t._index, filename)) download_success = False break downloaded_bytes += t.downloaded_bytes() if not download_success: log.error('exit') break progressbar.update(downloaded_bytes - last_downloaded_bytes) last_downloaded_bytes = downloaded_bytes log.debug("{} {}".format(downloaded_bytes, filesize)) time.sleep(1) log.debug('quit') for i in range(self.args.split): threads[i].join() os.rename(temp_filename, filename) return True, None
event_start = datetime.strptime(event["start"], "%Y-%m-%d %H:%M") if event_start > local_time: continue event_end = datetime.strptime(event["end"], "%Y-%m-%d %H:%M") if event_end <= last_migration: continue if event_start <= last_migration: continue if event_end < local_time: td = local_time - event_end last_migration = event_end log.info( f"Overwriting nest migration with the end time of {event['name']}" ) else: td = local_time - event_start last_migration = event_start log.info( f"Overwriting nest migration with the start time of {event['name']}" ) days, seconds = td.days, td.seconds config.hours_since_change = math.floor(days * 24 + seconds / 3600) log.success(f"Hours since last migration: {config.hours_since_change}") if args.hours is not None: config.hours_since_change = int(args.hours) log.info(