class JohnHopkinsDataCrawler(): def __init__(self): self.session = Session() # The date where it has fix structure until now self.start_date = date(2020, 3, 22) self.end_date = date.today() def crawl_data(self): self.session.query(JohnHopkinsData).delete(synchronize_session=False) self.session.commit() for single_date in daterange(self.start_date, self.end_date): self.crawl_individual_csv(single_date) print("Success crawl raw data from JohnHopkins Repo") def crawl_individual_csv(self, date_to_crawl: date): csv_base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports" date_str = date_to_crawl.strftime("%m-%d-%Y") csv_file = f"{csv_base_url}/{date_str}.csv" print(f"[START]Crawl data for {date_str}") try: data_to_store = [] with requests.get(csv_file, stream=True) as f: if f.status_code != HTTPStatus.NOT_FOUND: lines = (line.decode('utf-8') for line in f.iter_lines()) idx = 0 for row in csv.reader(lines): if idx > 0: try: data_to_store.append( JohnHopkinsData( fips=(row[0] if row[0] != '' else None), date=date_to_crawl, admin2=row[1], province_state=row[2], country_region=row[3], last_update=row[4], lat=(row[5] if row[5] != '' else 0.0), long=(row[6] if row[6] != '' else 0.0), confirmed=(row[7] if row[7] != '' else 0), death=(row[8] if row[8] != '' else 0), recovered=(row[9] if row[9] != '' else 0), combined_key=row[10])) except Exception as e: print(e) idx += 1 self.session.add_all(data_to_store) self.session.commit() print(f"[END]Success crawl {idx} data for {date_to_crawl}") else: print(f"Can't find data for {date_str}") except Exception as e: print(e)
def session_scope(): session = Session() try: yield session session.commit() except Exception: session.rollback() raise finally: session.close()
def create_session(): """Contextmanager that will create and teardown a session.""" session = Session() try: yield session session.commit() except Exception: session.rollback() raise finally: session.close()
def erase_tables(): db = Session() meta = Base.metadata for table in reversed(meta.sorted_tables): if engine.dialect.has_table(engine, table.name): print('Clear table {}'.format(table.name)) db.execute(table.delete()) db.commit() db.close()
def on_created(event): print(event) file_path = event.src_path pipeline_info = parse_json(file_path) file_extension = os.path.splitext(os.path.basename(file_path))[1] print(file_extension) if file_extension == '.json': file_path = move_file(file_path) session = Session() metadata_entry = MetadataRegistry( file_path, pipeline_info.get('version', '0.0.1'), False) session.add(metadata_entry) session.commit() session.close()
class OWDCrawler(): def __init__(self): self.session = Session() def crawl_data(self): print(f"[START]Crawl data from OWD Dataset") file_url = "https://covid.ourworldindata.org/data/owid-covid-data.csv" # clean up existing table first self.session.query(OWDData).delete(synchronize_session=False) with requests.get(file_url, stream=True) as f: if f.status_code != HTTPStatus.NOT_FOUND: lines = (line.decode('utf-8') for line in f.iter_lines()) idx = 0 data_to_store = [] for row in csv.reader(lines): if idx > 0: data_to_store.append( OWDData(iso_code=row[0], continent=row[1], location=row[2], date=row[3], total_cases=row[4], new_cases=row[5], total_deaths=row[6], new_deaths=row[7], total_cases_per_million=parseToFloat( row[8], 0.0), new_cases_per_million=parseToFloat( row[9], 0.0), total_deaths_per_million=parseToFloat( row[10], 0.0), new_deaths_per_million=parseToFloat( row[11], 0.0), total_tests=parseToFloat(row[12], 0.0), new_tests=parseToFloat(row[13], 0.0), total_tests_per_thousand=parseToFloat( row[14], 0.0), new_tests_per_thousand=parseToFloat( row[15], 0.0), tests_unit=row[16], last_updated=datetime.now())) idx += 1 self.session.add_all(data_to_store) self.session.commit() print(f"[END]Success crawl {idx} data from OWD Dataset")
async def run_main(): logger.info('fetching ip info...') data = await asyncio.gather(ip_service.fetch_ip_api_com(), ip_service.fetch_ipapi_co()) for item in data: logger.info('fetched info: {}'.format(item)) session = Session() ip = IP(country=item.country, region=item.region, city=item.city, zip=item.zip, lat=item.lat, lon=item.lon, ip=item.ip, source=item.source) session.add(ip) session.commit() session.close() logger.info('saved to db: {}'.format(ip))
class LeaderBoyt: def __init__(self, bot): logging.basicConfig(level=logging.INFO) Base.metadata.create_all(engine, checkfirst=True) self.session = Session() self.bot = bot def parse_messages(self, messages, temp_cache): for message in messages: current_user = message.author current_message = message.content current_message_id = message.id current_user_index = -1 current_user_in_db = False if (current_message is None or current_message == ''): current_message = '\n'.join( [i['url'] for i in message.attachments]) if (current_user.id not in temp_cache['user_keys']): current_user_index = len(temp_cache['new_users']) temp_cache['new_users'].append(current_user) temp_cache['user_keys'].append(current_user.id) else: current_user_index = [ i for i, d in enumerate(temp_cache['new_users']) if d.id == current_user.id ] if (len(current_user_index) == 0): current_user_in_db = True else: current_user_index = current_user_index[0] if (message.id not in temp_cache['message_keys']): temp_cache['new_messages'].append({ 'id': message.id, 'content': current_message, 'timestamp': message.timestamp, 'rxns': message.reactions, 'discord_id': current_message_id, 'user_index': current_user_index, 'user_in_db': current_user_in_db, 'author': current_user }) async def download_messages(self, channel, limit, current_count, last_msg, msg_handle, temp_cache): before = None dwnld_limit = 100 if last_msg is not None: before = last_msg if (current_count >= limit): await self.bot.edit_message(msg_handle, 'Finished downloading messages.') return current_count batch_size = 0 msg_set = [] async for message in self.bot.logs_from(channel, limit=dwnld_limit, before=before): batch_size += 1 last_msg = message msg_set.append(message) self.parse_messages(msg_set, temp_cache) if (current_count % 1000 == 0): await self.bot.edit_message( msg_handle, 'Downloaded ' + str(current_count) + ' messages.') current_count += batch_size if batch_size < 100: await self.bot.edit_message(msg_handle, 'Finished downloading messages.') return current_count else: return current_count + await self.download_messages( channel, limit, current_count, last_msg, msg_handle, temp_cache) async def write_to_db(self, temp_cache): new_users = [] user_cache = {} server = temp_cache['server'] for user in temp_cache['new_users']: new_user = User(user.id, user.name, user.display_name) self.session.add(new_user) new_users.append(new_user) for message in temp_cache['new_messages']: if (message['user_in_db'] and message['author'].id not in user_cache): msg_user = self.session.query(User).filter( User.discord_id == message['author'].id).first() user_cache[message['author'].id] = msg_user elif (message['user_in_db']): msg_user = user_cache[message['author'].id] else: msg_user = new_users[message['user_index']] rx1 = [d for d in message['rxns'] if str(d.emoji) == server.rx1] rx2 = [d for d in message['rxns'] if str(d.emoji) == server.rx2] if (len(rx1) == 0 or len(rx2) == 0): logging.info('Skipping due to no reactions.') continue else: rx1, rx2 = rx1[0], rx2[0] new_message = Message(message['id'], temp_cache['server'], msg_user, message['content'], message['timestamp'], rx1.count, rx2.count) self.session.add(new_message) self.session.commit() await self.bot.send_message(temp_cache['ctx'].message.channel, 'Wrote messages to the database.') logging.info('Writing to database') @commands.command(pass_context=True, no_pm=True) async def init(self, ctx): logging.info('Start configuration for server:' + ctx.message.server.id) discord_server = ctx.message.server discord_user = ctx.message.author db_user = self.session.query(User).filter( User.discord_id == discord_user.id).first() db_server = self.session.query(Server).filter( Server.discord_id == discord_server.id).first() db_status = self.session.query(Status).join(Server).filter( Server.discord_id == ctx.message.server.id).first() if ((db_status is not None and db_status.server_status == 2) and not (ctx.message.author.id == db_status.user.discord_id)): logging.info('Attemted to init the server, aborting') await self.bot.send_message( ctx.message.channel, 'Only ' + db_status.user.user_name + ' can initialize the bot again') return if (db_user is None): new_user = User(discord_user.id, discord_user.name, discord_user.display_name) self.session.add(new_user) db_user = new_user if (db_server is None): db_status = self.session.query(Status).join(Server).filter( Server.discord_id == discord_server.id).first() if (db_status is not None): self.session.delete(db_status) new_server = Server(discord_server.id, discord_server.name, '', '', '', '') new_status = Status(0, db_user, new_server) self.session.add(new_server) self.session.add(new_status) self.session.commit() await self.bot.send_message( ctx.message.channel, 'Started bot configuration for this server.') await self.bot.send_message( ctx.message.channel, 'Use `!check` to check the status, and set them with `!set <param> <value>`.' ) await self.bot.send_message( ctx.message.channel, 'If using emojis not in this server, use the fully qualified name, eg `<:downvote:335141916989456384>` while setting `up` and `down`.' ) @commands.command(pass_context=True, no_pm=True) async def check(self, ctx): if (not await self.check_and_dismiss(ctx, True)): return server_configuration = '' server = self.session.query(Server).filter( Server.discord_id == ctx.message.server.id).first() if (server.channel == ''): server_configuration += 'Channel not set. Do `!set chan #<channelname>`\n' else: server_configuration += 'Channel: <#' + server.channel + '>\n' if (server.rx1 == ''): server_configuration += 'Upvote emoji not set. Do `!set up <emoji>`\n' else: server_configuration += 'Upvote emoji: ' + server.rx1 + '\n' if (server.rx2 == ''): server_configuration += 'Downvote emoji not set. Do `!set down <emoji>`\n' else: server_configuration += 'Downvote emoji: ' + server.rx2 + '\n' await self.bot.send_message(ctx.message.channel, server_configuration) logging.info('Check status.') @commands.command(pass_context=True, no_pm=True) async def set(self, ctx, attribute: str, val: str): if (not await self.check_and_dismiss(ctx, True)): return status = self.session.query(Status).join(Server).filter( Server.discord_id == ctx.message.server.id).first() if (not (status.user.discord_id == ctx.message.author.id) and not (status.server_status == 2)): self.bot.send_message( ctx.message.channel, 'Only the user (' + status.user.display_name + ') who started the configuration can set.') return db_status = self.session.query(Status).join(Server).filter( Server.discord_id == ctx.message.server.id).first() if ((db_status is not None and db_status.server_status == 2) and not (ctx.message.author.id == db_status.user.discord_id)): logging.info('Attemted to init the server, aborting') await self.bot.send_message( ctx.message.channel, 'Only ' + db_status.user.user_name + ' can initialize the bot again') return server_configuration = '' server = self.session.query(Server).filter( Server.discord_id == ctx.message.server.id).first() if (attribute == 'chan'): server.channel = re.sub('[<#>]', '', val) elif (attribute == 'up'): server.rx1 = val elif (attribute == 'down'): server.rx2 = val if (server.channel == ''): server_configuration += 'Channel not set. Do `!set chan #<channelname>`\n' if (server.rx1 == ''): server_configuration += 'Upvote emoji not set. Do `!set up <emoji>`\n' if (server.rx2 == ''): server_configuration += 'Downvote emoji not set. Do `!set down <emoji>`\n' if (server_configuration == ''): status.server_status = 2 else: status.server_status = 1 self.session.commit() if (not (server_configuration == '')): await self.bot.send_message(ctx.message.channel, server_configuration) else: await self.bot.send_message( ctx.message.channel, 'Finished configuring bot for this server.') logging.info('Set ' + attribute + ' as ' + val + '.') @commands.command(pass_context=True, no_pm=True) async def populate(self, ctx, count): if (not await self.check_and_dismiss(ctx)): return db_server = self.session.query(Server).filter( Server.discord_id == ctx.message.server.id).first() temp_cache = {} temp_cache['server'] = db_server temp_cache['new_messages'] = [] temp_cache['new_users'] = [] temp_cache['ctx'] = ctx temp_cache['message_keys'] = [ key[0] for key in self.session.query(Message.discord_id).filter( Message.server_id == db_server.id).all() ] temp_cache['user_keys'] = [ key[0] for key in self.session.query(User.discord_id).all() ] logging.info('MSG COUNT:' + str(len(temp_cache['message_keys']))) logging.info('USR COUNT:' + str(len(temp_cache['user_keys']))) channel = discord.utils.get(ctx.message.server.channels, id=db_server.channel) logging.info('Issued download in: ' + channel.name + '.') resp = await self.bot.send_message(ctx.message.channel, 'Downloading messages.') await self.download_messages(channel, int(count), 0, None, resp, temp_cache) await self.write_to_db(temp_cache) logging.info('Populate the database with data from ' + str(db_server.discord_id) + ':' + db_server.name) @commands.command(pass_context=True, no_pm=True) async def top(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, 'number_up', lim, is_span)) logging.info('Get top memers.') @commands.command(pass_context=True, no_pm=True) async def bottom(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, 'number_down', lim, is_span)) logging.info('Get shit memers.') @commands.command(pass_context=True, no_pm=True) async def ptop(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, '%_up', lim, is_span)) logging.info('Get Top % memers.') @commands.command(pass_context=True, no_pm=True) async def pbottom(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, '%_down', lim, is_span)) logging.info('Get Shit % memers.') @commands.command(pass_context=True, no_pm=True) async def atop(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, 'avg_up', lim, is_span)) logging.info('Get Top avg memers.') @commands.command(pass_context=True, no_pm=True) async def abottom(self, ctx, lim: str = '10'): if (not await self.check_and_dismiss(ctx)): return is_span = False if (not self.is_int(lim)): is_span = True await self.bot.send_message(ctx.message.channel, embed=self.generate_memer_board( ctx, 'avg_down', lim, is_span)) logging.info('Get Shit avg memers.') @commands.command(pass_context=True, no_pm=True) async def stats(self, ctx, target: str = ''): if (not await self.check_and_dismiss(ctx)): return if (target is ''): logging.info('CHeck for self') target = ctx.message.author.id else: logging.info('Checking for user ' + target) target = re.sub('[<@!>]', '', target) logging.info('Target: ' + target) db_user = self.session.query(User).filter( User.discord_id == target).first() db_server = self.session.query(Server).filter( Server.discord_id == ctx.message.server.id).first() if (db_server is None): await self.bot.send_message(ctx.message.channel, 'Bot not initialized in server.') return if (db_user is None): await self.bot.send_message(ctx.message.channel, 'No data on user.') db_user = User(ctx.message.author.id, ctx.message.author.name, ctx.message.author.display_name) self.session.add(db_user) self.session.commit() return db_nick = self.session.query(Nickname).filter( Nickname.user_id == db_user.id, Nickname.server_id == db_server.id).first() if (db_nick is None or db_nick.display_name == ''): nickname = db_user.display_name else: nickname = db_nick.display_name total_doots = self.session.query( func.sum(Message.rx1_count), func.sum(Message.rx2_count), func.avg(Message.rx1_count), func.avg(Message.rx2_count)).filter( Message.server_id == db_server.id, Message.user_id == db_user.id).group_by( Message.user_id).first() total_memes = self.session.query(Message.id).filter( Message.server_id == db_server.id, Message.user_id == db_user.id).count() board_embed = discord.Embed(title='Statistics for ' + nickname + ' for a total of ' + str(total_memes) + ' memes') board_embed.set_author( name='LeaderBOYT', url='https://github.com/itsmehemant123/me-discord-leaderboard', icon_url= 'https://photos.hd92.me/images/2018/03/23/martin-shkreli.png') metric_list = 'Total Upvotes\nTotal Downvotes\nAverage # of Upvotes\nAverage # of Downvotes\n%ge of Upvotes' stat_list = str(total_doots[0]) + '\n' + \ str(total_doots[1]) + '\n' + '%.2f' % (total_doots[2]) + '\n' + \ '%.2f' % (total_doots[3]) + '\n' + '%.2f' % ((total_doots[0]/(total_doots[0] + total_doots[1])) * 100) + ' %' board_embed.add_field(name='Metric', value=metric_list, inline=True) board_embed.add_field(name='Stats', value=stat_list, inline=True) await self.bot.send_message(ctx.message.channel, embed=board_embed) logging.info('Checking stats.') @commands.command(pass_context=True, no_pm=True) async def test(self, ctx): if (not await self.check_and_dismiss(ctx)): return logging.info('lol') def generate_memer_board(self, ctx, method, lim, span): current_server = ctx.message.server db_server = self.session.query(Server).filter( Server.discord_id == str(current_server.id)).first() if (db_server is None): return message_count = 10 start_date = datetime.now() if (not span): message_count = int(lim) start_date = datetime.min elif (lim == '1d'): start_date = start_date - timedelta(hours=24) elif (lim == '1w'): start_date = start_date - timedelta(weeks=1) else: start_date = start_date - timedelta(weeks=4) if (message_count > 10): message_count = 10 # Until rich embeds are switched for generic messages heading = 'Memers' if (method == 'number_up'): memers = self.session.query( Message.user_id, func.sum(Message.rx1_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( func.sum(Message.rx1_count).desc()).limit( message_count).all() heading = 'Top ' + heading elif (method == 'number_down'): memers = self.session.query( Message.user_id, func.sum(Message.rx2_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( func.sum(Message.rx2_count).desc()).limit( message_count).all() heading = 'Shit ' + heading elif (method == '%_up'): memers = self.session.query( Message.user_id, cast(func.sum(Message.rx1_count), Float) / (cast(func.sum(Message.rx1_count), Float) + cast(func.sum(Message.rx2_count), Float)), func.sum(Message.rx1_count), func.sum(Message.rx2_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( (cast(func.sum(Message.rx1_count), Float) / (cast(func.sum(Message.rx1_count), Float) + cast(func.sum(Message.rx2_count), Float)) ).desc()).limit(message_count).all() heading = 'Top ' + heading elif (method == '%_down'): memers = self.session.query( Message.user_id, cast(func.sum(Message.rx1_count), Float) / (cast(func.sum(Message.rx1_count), Float) + cast(func.sum(Message.rx2_count), Float)), func.sum(Message.rx1_count), func.sum(Message.rx2_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( (cast(func.sum(Message.rx1_count), Float) / (cast(func.sum(Message.rx1_count), Float) + cast(func.sum(Message.rx2_count), Float)) ).asc()).limit(message_count).all() heading = 'Shit ' + heading elif (method == 'avg_up'): memers = self.session.query( Message.user_id, func.avg(Message.rx1_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( func.avg(Message.rx1_count).desc()).limit( message_count).all() heading = 'Top' + heading + ' by average' else: #if (method == 'avg_down'): memers = self.session.query( Message.user_id, func.avg(Message.rx2_count)).filter( Message.server_id == db_server.id, Message.created_at > start_date).group_by( Message.user_id).order_by( func.avg(Message.rx2_count).desc()).limit( message_count).all() heading = 'Shit' + heading + ' by average' board_embed = discord.Embed(title='Leaderboard') board_embed.set_author( name='LeaderBOYT', url='https://github.com/itsmehemant123/me-discord-leaderboard', icon_url= 'https://photos.hd92.me/images/2018/03/23/martin-shkreli.png') user_list = '' stat_list = '' for ind, memer in enumerate(memers): user = self.session.query(User).filter(User.id == memer[0]).first() nick = self.session.query(Nickname).filter( Nickname.user_id == memer[0], Nickname.server_id == db_server.id).first() if (nick is None or nick.display_name == ''): nickname = user.display_name else: nickname = nick.display_name user_list += str(ind + 1) + ') ' + nickname + '\n' if (method == 'number_up'): stat_list += str(memer[1]) + ' ' + db_server.rx1 + '\n' elif (method == 'number_down'): stat_list += str(memer[1]) + ' ' + db_server.rx2 + '\n' elif (method == '%_up' or method == '%_down'): stat_list += '%.2f' % ( memer[1] * 100) + '% ' + db_server.rx1 + '/' + db_server.rx2 + '\n' elif (method == 'avg_up'): stat_list += '%.2f' % (memer[1]) + \ ' ' + db_server.rx1 + '\n' else: stat_list += '%.2f' % (memer[1]) + \ ' ' + db_server.rx2 + '\n' board_embed.add_field(name=heading, value=user_list, inline=True) board_embed.add_field(name='Stats', value=stat_list, inline=True) return board_embed async def readmeme(self, message): logging.info('Processing incoming meme.') current_user = message.author current_server = message.server if (current_server is None or current_user is None): logging.info('Missing info. Discarding.') return db_server = self.session.query(Server).filter( Server.discord_id == current_server.id).first() if (db_server is None): return if (not self.is_correct_channel_and_message(message, db_server)): logging.info('Not in selected channel. Discarding.') return db_user = self.session.query(User).filter( User.discord_id == current_user.id).first() if (db_user is None): db_user = User(current_user.id, current_user.name, current_user.display_name) self.session.add(db_user) content = self.get_message_content(message) self.session.add( Message(message.id, db_server, db_user, content, message.timestamp, 1, 1)) self.session.commit() logging.info('Wrote new meme.') async def add_reaction(self, reaction, user): self.update_reactions(reaction, user) logging.info('Add reaction.') async def remove_reaction(self, reaction, user): self.update_reactions(reaction, user) logging.info('Remove reaction.') async def clear_reaction(self, message, reactions): db_message = self.session.query(Message).filter( Message.discord_id == str(message.id)).first() if (db_message is None): return db_message.rx1_count = 0 db_message.rx2_count = 0 self.session.commit() logging.info('Clear reaction.') async def check_and_dismiss(self, ctx, is_being_configured=False): is_set = self.check_status(ctx.message.server.id, is_being_configured) if (not is_set): await self.bot.send_message( ctx.message.channel, 'Bot not configured yet. Run `!init` to get started.') return is_set def update_reactions(self, reaction, user): current_user = user current_message = reaction.message current_server = reaction.message.server db_server = self.session.query(Server).filter( Server.discord_id == str(current_server.id)).first() if (db_server is None): # Lol, gotem logging.info('Server not found, bot not configured for: ' + str(current_server.id) + ':' + current_server.name) return if (not self.is_correct_channel_and_message(current_message, db_server)): return db_user = self.session.query(User).filter( User.discord_id == str(current_user.id)).first() if (db_user is None): db_user = User(current_user.id, current_user.name, current_user.display_name) self.session.add(db_user) db_message = self.session.query(Message).filter( Message.discord_id == str(current_message.id)).first() if (db_message is None): content = self.get_message_content(current_message) db_message = Message(str(current_message.id), db_server, db_user, content, current_message.timestamp, 0, 0) self.session.add(db_message) if (str(reaction.emoji) == db_server.rx1): db_message.rx1_count = reaction.count elif (str(reaction.emoji) == db_server.rx2): db_message.rx2_count = reaction.count self.session.commit() logging.info('Updated reactions.') def update_nickname(self, before, after): if (after.nick is None): logging.info('Not a nick update, skipping.') return db_user = self.session.query(User).filter( User.discord_id == before.id).first() db_server = self.session.query(Server).filter( Server.discord_id == before.server.id).first() if (db_server is None): logging.info( 'Update nick attempt on uninitialized server, aborting.') return if (db_user is None): new_user = User(after.id, after.name, after.nick) self.session.add(new_user) db_user = new_user db_nickname = self.session.query(Nickname).filter( Nickname.user_id == db_user.id, Nickname.server_id == db_server.id).first() if (db_nickname is None): new_nick = Nickname(db_user, db_server, after.display_name) self.session.add(new_nick) else: db_nickname.display_name = after.display_name self.session.commit() logging.info('Updated nickname') def is_correct_channel_and_message(self, message, server): if (not (server.channel == str(message.channel.id))): return False current_message = self.get_message_content(message) if (not (current_message.startswith("http") and "/" in current_message and "." in current_message and " " not in current_message)): # stolen from dino return False return True def get_message_content(self, message): content = message.content if (content == '' or content is None): content = '\n'.join([i['url'] for i in message.attachments]) return content def check_status(self, server_id, is_being_configured): status = self.session.query(Status).join(Server).filter( Server.discord_id == server_id).first() if (status is None): return False if (is_being_configured): return True if (not (status.server_status == 2)): return False return True def is_int(self, val): try: int(val) return True except ValueError: return False def shutdown(self): self.session.close()
def update_crawler_timestamp(): session = Session() session.add(CrawlerTimestamp(crawled_at=datetime.now())) session.commit()
class GuardianDataCrawler(): def __init__(self): self.session = Session() def crawl_data(self): print("[START] Crawl Guardian Australian data") url = "https://interactive.guim.co.uk/docsdata/1q5gdePANXci8enuiS4oHUJxcxC13d6bjMRSicakychE.json" response = requests.get(url) data = response.json() self.crawl_daily_data(data) self.crawl_latest_data(data) print("[END] Crawl Guardian Australian data") def crawl_latest_data(self, data): self.session.query(AustraliaLatest).delete(synchronize_session=False) idx = 1 print("[START] Crawl latest Australia stats") for row in data["sheets"]["latest totals"]: self.session.add(AustraliaLatest( state=row["State or territory"], state_name=row["Long name"], confirmed=parseEmptyStringToInteger(row["Confirmed cases (cumulative)"]), deaths=parseEmptyStringToInteger(row["Deaths"]), recovered=parseEmptyStringToInteger(row["Recovered"]), active_cases=parseEmptyStringToInteger(row["Active cases"]), test_conducted=parseEmptyStringToInteger(row["Tests conducted"]), tests_per_million=parseEmptyStringToInteger(row["Tests per million"]), percent_positive=row["Percent positive"], current_hospitalisation=row["Current hospitalisation"], current_icu=row["Current ICU"], current_in_ventilator=parseEmptyStringToInteger(row["Current ventilator use"]), last_updated=row["Last updated"] )) self.session.commit() print("[END] Crawl latest Australia stats") def crawl_daily_data(self, data): print("[START] Crawl daily updates data") self.session.query(GuardianAustraliaData).delete(synchronize_session=False) idx = 1 for row in data["sheets"]["updates"]: self.session.add(GuardianAustraliaData( community=parseEmptyStringToInteger(row.get("Community", 0)), community_unknown=parseEmptyStringToInteger(row.get("Community - no known source", 0)), confirmed=parseEmptyStringToInteger(row.get("Cumulative case count", 0)), recovered=parseEmptyStringToInteger(row.get("Recovered (cumulative)", 0)), deaths=parseEmptyStringToInteger(row.get("Cumulative deaths", 0)), date=parseDateString(row.get("Date")), hospitalisation=parseEmptyStringToInteger(row.get("Hospitalisations (count)", 0)), intensive_care=parseEmptyStringToInteger(row.get("Intensive care (count)", 0)), notes=row.get("Notes", ""), under_60=parseEmptyStringToInteger(row.get("Under 60", 0)), over_60=parseEmptyStringToInteger(row.get("Over 60", 0)), state=row.get("State", ""), test_conducted_neg=parseEmptyStringToInteger(row.get("Tests conducted (negative)", 0)), test_conducted_tot=parseEmptyStringToInteger(row.get("Tests conducted (total)", 0)), travel_related=parseEmptyStringToInteger(row.get("Travel-related", 0)), under_investigation=parseEmptyStringToInteger(row.get("Under investigation", 0)), update_source=row.get("Update Source", 0), ventilator_usage=parseEmptyStringToInteger(row.get("Ventilator usage (count)", 0)) )) idx += 1 self.session.commit() print(f"Successfully insert {idx} rows of Guardian australian data") print("[END] Crawl daily updates data")
class DatahubCrawler(): def __init__(self): data_url = 'https://datahub.io/core/covid-19/datapackage.json' # to load Data Package into storage package = datapackage.Package(data_url) # to load only tabular data resources = package.resources self.time_series_csv = "" self.country_aggregate_csv = "" self.world_aggregate_csv = "" self.session = Session() print("Fetching dataset from datahub") for resource in resources: if resource.tabular: if resource.descriptor.get( "name") == "time-series-19-covid-combined": self.time_series_csv = resource.descriptor['path'] if resource.descriptor.get("name") == "countries-aggregated": self.country_aggregate_csv = resource.descriptor['path'] if resource.descriptor.get("name") == "worldwide-aggregated": self.world_aggregate_csv = resource.descriptor['path'] def crawl_data(self): self.crawl_time_series_data(self.time_series_csv) self.crawl_country_aggregated_data(self.country_aggregate_csv) self.crawl_world_aggregated_data(self.world_aggregate_csv) def crawl_time_series_data(self, file_url: str): idx = 0 tsc_data = [] print("[START]Insert time series data") print(f"Crawl data using {self.time_series_csv}") with requests.get(file_url, stream=True) as tsc: lines = (line.decode('utf-8') for line in tsc.iter_lines()) self.session.query(TimeSeriesData).delete() for row in csv.reader(lines): if idx > 0 and len(row) > 0: confirmed = (row[5] if row[5] != '' else '0') recovered = (row[6] if row[6] != '' else '0') death = (row[7] if row[7] != '' else '0') tsc_data.append( TimeSeriesData(date=row[0], country=row[1], state=row[2], lat=row[3], long=row[4], confirmed=confirmed, recovered=recovered, death=death)) idx += 1 self.session.add_all(tsc_data) self.session.commit() print(f"[END]Insert time series data. Success inserting {idx} records") def crawl_country_aggregated_data(self, file_url: str): ca_data = [] idx = 0 print("[START]Insert country aggregated data") print(f"Crawl data using {self.country_aggregate_csv}") with requests.get(file_url, stream=True) as ca: lines = (line.decode('utf-8') for line in ca.iter_lines()) self.session.query(CountryAggregated).delete() for row in csv.reader(lines): if idx > 0 and len(row) > 0: confirmed = (row[2] if row[2] != '' else '0') recovered = (row[3] if row[3] != '' else '0') death = (row[4] if row[4] != '' else '0') ca_data.append( CountryAggregated(date=row[0], country=row[1], confirmed=confirmed, recovered=recovered, death=death)) idx += 1 self.session.add_all(ca_data) self.session.commit() print( f"[END]Insert country aggregated data. Success inserting {idx} records" ) def crawl_world_aggregated_data(self, file_url: str): wwa_data = [] idx = 0 print("[START]Insert world aggregated data") print(f"Crawl data using {self.world_aggregate_csv}") with requests.get(file_url, stream=True) as wwa: lines = (line.decode('utf-8') for line in wwa.iter_lines()) self.session.query(WorldwideAggregated).delete() for row in csv.reader(lines): if idx > 0 and len(row) > 0: confirmed = (row[1] if row[1] != '' else '0') recovered = (row[2] if row[2] != '' else '0') death = (row[3] if row[3] != '' else '0') wwa_data.append( WorldwideAggregated(date=row[0], confirmed=confirmed, recovered=recovered, death=death)) idx += 1 self.session.add_all(wwa_data) self.session.commit() print( f"[END]Insert world aggregated data. Success inserting {idx} records" ) print("Finish run DataHub crawler")
from models.prediction import prediction import datetime Base.metadata.create_all(engine) session = Session() praca = poi("praca", 4.124124124, 15.123123123) avenida = poi("avenida", 4.154152324, 12.124562123) museu = poi("museu", 4.124124124, 15.123123123) session.add(praca) session.add(avenida) session.add(museu) session.commit() camera1 = camera("0.0.0.0", 1) camera2 = camera("1.1.1.1", 2) camera3 = camera("2.2.2.2", 3) session.add(camera1) session.add(camera2) session.add(camera3) session.commit() ''' prediction1 = prediction("low_density", 1) prediction2 = prediction("medium_low_density", 2) prediction3 = prediction("high_density", 3) '''
def insertPOI(poiname, geolat, geolong): session = Session() newPOI = poi(poiname, geolat, geolong) session.add(newPOI) session.commit()
def insertPrediction(predict, id_camara): session = Session() timestamp = datetime.datetime.now(datetime.timezone.utc) newPrediction = prediction(predict, id_camara, timestamp) session.add(newPrediction) session.commit()
def insertCamera(ip_address, poi_id): session = Session() newCamera = camera(ip_address, poi_id) session.add(newCamera) session.commit()