Exemple #1
0
class JohnHopkinsDataCrawler():
    def __init__(self):
        self.session = Session()
        # The date where it has fix structure until now
        self.start_date = date(2020, 3, 22)
        self.end_date = date.today()

    def crawl_data(self):
        self.session.query(JohnHopkinsData).delete(synchronize_session=False)
        self.session.commit()
        for single_date in daterange(self.start_date, self.end_date):
            self.crawl_individual_csv(single_date)
        print("Success crawl raw data from JohnHopkins Repo")

    def crawl_individual_csv(self, date_to_crawl: date):
        csv_base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports"

        date_str = date_to_crawl.strftime("%m-%d-%Y")
        csv_file = f"{csv_base_url}/{date_str}.csv"
        print(f"[START]Crawl data for {date_str}")

        try:
            data_to_store = []
            with requests.get(csv_file, stream=True) as f:
                if f.status_code != HTTPStatus.NOT_FOUND:
                    lines = (line.decode('utf-8') for line in f.iter_lines())
                    idx = 0

                    for row in csv.reader(lines):
                        if idx > 0:
                            try:
                                data_to_store.append(
                                    JohnHopkinsData(
                                        fips=(row[0]
                                              if row[0] != '' else None),
                                        date=date_to_crawl,
                                        admin2=row[1],
                                        province_state=row[2],
                                        country_region=row[3],
                                        last_update=row[4],
                                        lat=(row[5] if row[5] != '' else 0.0),
                                        long=(row[6] if row[6] != '' else 0.0),
                                        confirmed=(row[7]
                                                   if row[7] != '' else 0),
                                        death=(row[8] if row[8] != '' else 0),
                                        recovered=(row[9]
                                                   if row[9] != '' else 0),
                                        combined_key=row[10]))
                            except Exception as e:
                                print(e)
                        idx += 1

                    self.session.add_all(data_to_store)
                    self.session.commit()
                    print(f"[END]Success crawl {idx} data for {date_to_crawl}")
                else:
                    print(f"Can't find data for {date_str}")
        except Exception as e:
            print(e)
Exemple #2
0
def session_scope():
    session = Session()
    try:
        yield session
        session.commit()
    except Exception:
        session.rollback()
        raise
    finally:
        session.close()
Exemple #3
0
def create_session():
    """Contextmanager that will create and teardown a session."""
    session = Session()
    try:
        yield session
        session.commit()
    except Exception:
        session.rollback()
        raise
    finally:
        session.close()
Exemple #4
0
def erase_tables():
    db = Session()
    meta = Base.metadata

    for table in reversed(meta.sorted_tables):
        if engine.dialect.has_table(engine, table.name):
            print('Clear table {}'.format(table.name))
            db.execute(table.delete())

    db.commit()
    db.close()
Exemple #5
0
 def on_created(event):
     print(event)
     file_path = event.src_path
     pipeline_info = parse_json(file_path)
     file_extension = os.path.splitext(os.path.basename(file_path))[1]
     print(file_extension)
     if file_extension == '.json':
         file_path = move_file(file_path)
         session = Session()
         metadata_entry = MetadataRegistry(
             file_path, pipeline_info.get('version', '0.0.1'), False)
         session.add(metadata_entry)
         session.commit()
         session.close()
class OWDCrawler():
    def __init__(self):
        self.session = Session()

    def crawl_data(self):
        print(f"[START]Crawl data from OWD Dataset")
        file_url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
        # clean up existing table first
        self.session.query(OWDData).delete(synchronize_session=False)
        with requests.get(file_url, stream=True) as f:
            if f.status_code != HTTPStatus.NOT_FOUND:
                lines = (line.decode('utf-8') for line in f.iter_lines())
                idx = 0
                data_to_store = []
                for row in csv.reader(lines):
                    if idx > 0:
                        data_to_store.append(
                            OWDData(iso_code=row[0],
                                    continent=row[1],
                                    location=row[2],
                                    date=row[3],
                                    total_cases=row[4],
                                    new_cases=row[5],
                                    total_deaths=row[6],
                                    new_deaths=row[7],
                                    total_cases_per_million=parseToFloat(
                                        row[8], 0.0),
                                    new_cases_per_million=parseToFloat(
                                        row[9], 0.0),
                                    total_deaths_per_million=parseToFloat(
                                        row[10], 0.0),
                                    new_deaths_per_million=parseToFloat(
                                        row[11], 0.0),
                                    total_tests=parseToFloat(row[12], 0.0),
                                    new_tests=parseToFloat(row[13], 0.0),
                                    total_tests_per_thousand=parseToFloat(
                                        row[14], 0.0),
                                    new_tests_per_thousand=parseToFloat(
                                        row[15], 0.0),
                                    tests_unit=row[16],
                                    last_updated=datetime.now()))
                    idx += 1
                self.session.add_all(data_to_store)
                self.session.commit()
                print(f"[END]Success crawl {idx} data from OWD Dataset")
Exemple #7
0
async def run_main():
    logger.info('fetching ip info...')
    data = await asyncio.gather(ip_service.fetch_ip_api_com(),
                                ip_service.fetch_ipapi_co())

    for item in data:
        logger.info('fetched info: {}'.format(item))
        session = Session()
        ip = IP(country=item.country,
                region=item.region,
                city=item.city,
                zip=item.zip,
                lat=item.lat,
                lon=item.lon,
                ip=item.ip,
                source=item.source)
        session.add(ip)
        session.commit()
        session.close()
        logger.info('saved to db: {}'.format(ip))
class LeaderBoyt:
    def __init__(self, bot):
        logging.basicConfig(level=logging.INFO)
        Base.metadata.create_all(engine, checkfirst=True)

        self.session = Session()
        self.bot = bot

    def parse_messages(self, messages, temp_cache):
        for message in messages:
            current_user = message.author
            current_message = message.content
            current_message_id = message.id
            current_user_index = -1
            current_user_in_db = False

            if (current_message is None or current_message == ''):
                current_message = '\n'.join(
                    [i['url'] for i in message.attachments])

            if (current_user.id not in temp_cache['user_keys']):
                current_user_index = len(temp_cache['new_users'])
                temp_cache['new_users'].append(current_user)
                temp_cache['user_keys'].append(current_user.id)
            else:
                current_user_index = [
                    i for i, d in enumerate(temp_cache['new_users'])
                    if d.id == current_user.id
                ]
                if (len(current_user_index) == 0):
                    current_user_in_db = True
                else:
                    current_user_index = current_user_index[0]

            if (message.id not in temp_cache['message_keys']):
                temp_cache['new_messages'].append({
                    'id': message.id,
                    'content': current_message,
                    'timestamp': message.timestamp,
                    'rxns': message.reactions,
                    'discord_id': current_message_id,
                    'user_index': current_user_index,
                    'user_in_db': current_user_in_db,
                    'author': current_user
                })

    async def download_messages(self, channel, limit, current_count, last_msg,
                                msg_handle, temp_cache):
        before = None
        dwnld_limit = 100

        if last_msg is not None:
            before = last_msg

        if (current_count >= limit):
            await self.bot.edit_message(msg_handle,
                                        'Finished downloading messages.')
            return current_count

        batch_size = 0
        msg_set = []
        async for message in self.bot.logs_from(channel,
                                                limit=dwnld_limit,
                                                before=before):
            batch_size += 1
            last_msg = message
            msg_set.append(message)

        self.parse_messages(msg_set, temp_cache)

        if (current_count % 1000 == 0):
            await self.bot.edit_message(
                msg_handle, 'Downloaded ' + str(current_count) + ' messages.')

        current_count += batch_size
        if batch_size < 100:
            await self.bot.edit_message(msg_handle,
                                        'Finished downloading messages.')
            return current_count
        else:
            return current_count + await self.download_messages(
                channel, limit, current_count, last_msg, msg_handle,
                temp_cache)

    async def write_to_db(self, temp_cache):
        new_users = []
        user_cache = {}
        server = temp_cache['server']

        for user in temp_cache['new_users']:
            new_user = User(user.id, user.name, user.display_name)
            self.session.add(new_user)
            new_users.append(new_user)

        for message in temp_cache['new_messages']:
            if (message['user_in_db']
                    and message['author'].id not in user_cache):
                msg_user = self.session.query(User).filter(
                    User.discord_id == message['author'].id).first()
                user_cache[message['author'].id] = msg_user
            elif (message['user_in_db']):
                msg_user = user_cache[message['author'].id]
            else:
                msg_user = new_users[message['user_index']]

            rx1 = [d for d in message['rxns'] if str(d.emoji) == server.rx1]
            rx2 = [d for d in message['rxns'] if str(d.emoji) == server.rx2]

            if (len(rx1) == 0 or len(rx2) == 0):
                logging.info('Skipping due to no reactions.')
                continue
            else:
                rx1, rx2 = rx1[0], rx2[0]

            new_message = Message(message['id'], temp_cache['server'],
                                  msg_user, message['content'],
                                  message['timestamp'], rx1.count, rx2.count)
            self.session.add(new_message)

        self.session.commit()
        await self.bot.send_message(temp_cache['ctx'].message.channel,
                                    'Wrote messages to the database.')

        logging.info('Writing to database')

    @commands.command(pass_context=True, no_pm=True)
    async def init(self, ctx):
        logging.info('Start configuration for server:' + ctx.message.server.id)

        discord_server = ctx.message.server
        discord_user = ctx.message.author

        db_user = self.session.query(User).filter(
            User.discord_id == discord_user.id).first()
        db_server = self.session.query(Server).filter(
            Server.discord_id == discord_server.id).first()
        db_status = self.session.query(Status).join(Server).filter(
            Server.discord_id == ctx.message.server.id).first()

        if ((db_status is not None and db_status.server_status == 2)
                and not (ctx.message.author.id == db_status.user.discord_id)):
            logging.info('Attemted to init the server, aborting')
            await self.bot.send_message(
                ctx.message.channel, 'Only ' + db_status.user.user_name +
                ' can initialize the bot again')
            return

        if (db_user is None):
            new_user = User(discord_user.id, discord_user.name,
                            discord_user.display_name)
            self.session.add(new_user)
            db_user = new_user

        if (db_server is None):
            db_status = self.session.query(Status).join(Server).filter(
                Server.discord_id == discord_server.id).first()
            if (db_status is not None): self.session.delete(db_status)

            new_server = Server(discord_server.id, discord_server.name, '', '',
                                '', '')
            new_status = Status(0, db_user, new_server)
            self.session.add(new_server)
            self.session.add(new_status)

        self.session.commit()
        await self.bot.send_message(
            ctx.message.channel, 'Started bot configuration for this server.')
        await self.bot.send_message(
            ctx.message.channel,
            'Use `!check` to check the status, and set them with `!set <param> <value>`.'
        )
        await self.bot.send_message(
            ctx.message.channel,
            'If using emojis not in this server, use the fully qualified name, eg `<:downvote:335141916989456384>` while setting `up` and `down`.'
        )

    @commands.command(pass_context=True, no_pm=True)
    async def check(self, ctx):
        if (not await self.check_and_dismiss(ctx, True)): return

        server_configuration = ''
        server = self.session.query(Server).filter(
            Server.discord_id == ctx.message.server.id).first()

        if (server.channel == ''):
            server_configuration += 'Channel not set. Do `!set chan #<channelname>`\n'
        else:
            server_configuration += 'Channel: <#' + server.channel + '>\n'
        if (server.rx1 == ''):
            server_configuration += 'Upvote emoji not set. Do `!set up <emoji>`\n'
        else:
            server_configuration += 'Upvote emoji: ' + server.rx1 + '\n'
        if (server.rx2 == ''):
            server_configuration += 'Downvote emoji not set. Do `!set down <emoji>`\n'
        else:
            server_configuration += 'Downvote emoji: ' + server.rx2 + '\n'

        await self.bot.send_message(ctx.message.channel, server_configuration)
        logging.info('Check status.')

    @commands.command(pass_context=True, no_pm=True)
    async def set(self, ctx, attribute: str, val: str):
        if (not await self.check_and_dismiss(ctx, True)): return

        status = self.session.query(Status).join(Server).filter(
            Server.discord_id == ctx.message.server.id).first()
        if (not (status.user.discord_id == ctx.message.author.id)
                and not (status.server_status == 2)):
            self.bot.send_message(
                ctx.message.channel,
                'Only the user (' + status.user.display_name +
                ') who started the configuration can set.')
            return

        db_status = self.session.query(Status).join(Server).filter(
            Server.discord_id == ctx.message.server.id).first()
        if ((db_status is not None and db_status.server_status == 2)
                and not (ctx.message.author.id == db_status.user.discord_id)):
            logging.info('Attemted to init the server, aborting')
            await self.bot.send_message(
                ctx.message.channel, 'Only ' + db_status.user.user_name +
                ' can initialize the bot again')
            return

        server_configuration = ''
        server = self.session.query(Server).filter(
            Server.discord_id == ctx.message.server.id).first()

        if (attribute == 'chan'):
            server.channel = re.sub('[<#>]', '', val)
        elif (attribute == 'up'):
            server.rx1 = val
        elif (attribute == 'down'):
            server.rx2 = val

        if (server.channel == ''):
            server_configuration += 'Channel not set. Do `!set chan #<channelname>`\n'
        if (server.rx1 == ''):
            server_configuration += 'Upvote emoji not set. Do `!set up <emoji>`\n'
        if (server.rx2 == ''):
            server_configuration += 'Downvote emoji not set. Do `!set down <emoji>`\n'

        if (server_configuration == ''):
            status.server_status = 2
        else:
            status.server_status = 1

        self.session.commit()
        if (not (server_configuration == '')):
            await self.bot.send_message(ctx.message.channel,
                                        server_configuration)
        else:
            await self.bot.send_message(
                ctx.message.channel,
                'Finished configuring bot for this server.')
        logging.info('Set ' + attribute + ' as ' + val + '.')

    @commands.command(pass_context=True, no_pm=True)
    async def populate(self, ctx, count):
        if (not await self.check_and_dismiss(ctx)):
            return
        db_server = self.session.query(Server).filter(
            Server.discord_id == ctx.message.server.id).first()

        temp_cache = {}
        temp_cache['server'] = db_server
        temp_cache['new_messages'] = []
        temp_cache['new_users'] = []
        temp_cache['ctx'] = ctx

        temp_cache['message_keys'] = [
            key[0] for key in self.session.query(Message.discord_id).filter(
                Message.server_id == db_server.id).all()
        ]
        temp_cache['user_keys'] = [
            key[0] for key in self.session.query(User.discord_id).all()
        ]
        logging.info('MSG COUNT:' + str(len(temp_cache['message_keys'])))
        logging.info('USR COUNT:' + str(len(temp_cache['user_keys'])))

        channel = discord.utils.get(ctx.message.server.channels,
                                    id=db_server.channel)
        logging.info('Issued download in: ' + channel.name + '.')
        resp = await self.bot.send_message(ctx.message.channel,
                                           'Downloading messages.')

        await self.download_messages(channel, int(count), 0, None, resp,
                                     temp_cache)
        await self.write_to_db(temp_cache)
        logging.info('Populate the database with data from ' +
                     str(db_server.discord_id) + ':' + db_server.name)

    @commands.command(pass_context=True, no_pm=True)
    async def top(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)): return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, 'number_up', lim, is_span))
        logging.info('Get top memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def bottom(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)): return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, 'number_down', lim, is_span))
        logging.info('Get shit memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def ptop(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)):
            return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, '%_up', lim, is_span))
        logging.info('Get Top % memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def pbottom(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)):
            return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, '%_down', lim, is_span))
        logging.info('Get Shit % memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def atop(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)):
            return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, 'avg_up', lim, is_span))
        logging.info('Get Top avg memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def abottom(self, ctx, lim: str = '10'):
        if (not await self.check_and_dismiss(ctx)):
            return

        is_span = False
        if (not self.is_int(lim)):
            is_span = True

        await self.bot.send_message(ctx.message.channel,
                                    embed=self.generate_memer_board(
                                        ctx, 'avg_down', lim, is_span))
        logging.info('Get Shit avg memers.')

    @commands.command(pass_context=True, no_pm=True)
    async def stats(self, ctx, target: str = ''):
        if (not await self.check_and_dismiss(ctx)):
            return

        if (target is ''):
            logging.info('CHeck for self')
            target = ctx.message.author.id
        else:
            logging.info('Checking for user ' + target)
            target = re.sub('[<@!>]', '', target)

        logging.info('Target: ' + target)
        db_user = self.session.query(User).filter(
            User.discord_id == target).first()
        db_server = self.session.query(Server).filter(
            Server.discord_id == ctx.message.server.id).first()

        if (db_server is None):
            await self.bot.send_message(ctx.message.channel,
                                        'Bot not initialized in server.')
            return

        if (db_user is None):
            await self.bot.send_message(ctx.message.channel,
                                        'No data on user.')
            db_user = User(ctx.message.author.id, ctx.message.author.name,
                           ctx.message.author.display_name)
            self.session.add(db_user)
            self.session.commit()
            return

        db_nick = self.session.query(Nickname).filter(
            Nickname.user_id == db_user.id,
            Nickname.server_id == db_server.id).first()

        if (db_nick is None or db_nick.display_name == ''):
            nickname = db_user.display_name
        else:
            nickname = db_nick.display_name

        total_doots = self.session.query(
            func.sum(Message.rx1_count), func.sum(Message.rx2_count),
            func.avg(Message.rx1_count), func.avg(Message.rx2_count)).filter(
                Message.server_id == db_server.id,
                Message.user_id == db_user.id).group_by(
                    Message.user_id).first()
        total_memes = self.session.query(Message.id).filter(
            Message.server_id == db_server.id,
            Message.user_id == db_user.id).count()

        board_embed = discord.Embed(title='Statistics for ' + nickname +
                                    ' for a total of ' + str(total_memes) +
                                    ' memes')
        board_embed.set_author(
            name='LeaderBOYT',
            url='https://github.com/itsmehemant123/me-discord-leaderboard',
            icon_url=
            'https://photos.hd92.me/images/2018/03/23/martin-shkreli.png')

        metric_list = 'Total Upvotes\nTotal Downvotes\nAverage # of Upvotes\nAverage # of Downvotes\n%ge of Upvotes'
        stat_list = str(total_doots[0]) + '\n' + \
            str(total_doots[1]) + '\n' + '%.2f' % (total_doots[2]) + '\n' + \
            '%.2f' % (total_doots[3]) + '\n' + '%.2f' % ((total_doots[0]/(total_doots[0] + total_doots[1])) * 100) + ' %'

        board_embed.add_field(name='Metric', value=metric_list, inline=True)
        board_embed.add_field(name='Stats', value=stat_list, inline=True)
        await self.bot.send_message(ctx.message.channel, embed=board_embed)
        logging.info('Checking stats.')

    @commands.command(pass_context=True, no_pm=True)
    async def test(self, ctx):
        if (not await self.check_and_dismiss(ctx)): return
        logging.info('lol')

    def generate_memer_board(self, ctx, method, lim, span):
        current_server = ctx.message.server
        db_server = self.session.query(Server).filter(
            Server.discord_id == str(current_server.id)).first()
        if (db_server is None):
            return

        message_count = 10
        start_date = datetime.now()

        if (not span):
            message_count = int(lim)
            start_date = datetime.min
        elif (lim == '1d'):
            start_date = start_date - timedelta(hours=24)
        elif (lim == '1w'):
            start_date = start_date - timedelta(weeks=1)
        else:
            start_date = start_date - timedelta(weeks=4)

        if (message_count > 10):
            message_count = 10  # Until rich embeds are switched for generic messages

        heading = 'Memers'

        if (method == 'number_up'):
            memers = self.session.query(
                Message.user_id, func.sum(Message.rx1_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            func.sum(Message.rx1_count).desc()).limit(
                                message_count).all()
            heading = 'Top ' + heading
        elif (method == 'number_down'):
            memers = self.session.query(
                Message.user_id, func.sum(Message.rx2_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            func.sum(Message.rx2_count).desc()).limit(
                                message_count).all()
            heading = 'Shit ' + heading
        elif (method == '%_up'):
            memers = self.session.query(
                Message.user_id,
                cast(func.sum(Message.rx1_count), Float) /
                (cast(func.sum(Message.rx1_count), Float) +
                 cast(func.sum(Message.rx2_count), Float)),
                func.sum(Message.rx1_count),
                func.sum(Message.rx2_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            (cast(func.sum(Message.rx1_count), Float) /
                             (cast(func.sum(Message.rx1_count), Float) +
                              cast(func.sum(Message.rx2_count), Float))
                             ).desc()).limit(message_count).all()
            heading = 'Top ' + heading
        elif (method == '%_down'):
            memers = self.session.query(
                Message.user_id,
                cast(func.sum(Message.rx1_count), Float) /
                (cast(func.sum(Message.rx1_count), Float) +
                 cast(func.sum(Message.rx2_count), Float)),
                func.sum(Message.rx1_count),
                func.sum(Message.rx2_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            (cast(func.sum(Message.rx1_count), Float) /
                             (cast(func.sum(Message.rx1_count), Float) +
                              cast(func.sum(Message.rx2_count), Float))
                             ).asc()).limit(message_count).all()
            heading = 'Shit ' + heading
        elif (method == 'avg_up'):
            memers = self.session.query(
                Message.user_id, func.avg(Message.rx1_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            func.avg(Message.rx1_count).desc()).limit(
                                message_count).all()
            heading = 'Top' + heading + ' by average'
        else:  #if (method == 'avg_down'):
            memers = self.session.query(
                Message.user_id, func.avg(Message.rx2_count)).filter(
                    Message.server_id == db_server.id,
                    Message.created_at > start_date).group_by(
                        Message.user_id).order_by(
                            func.avg(Message.rx2_count).desc()).limit(
                                message_count).all()
            heading = 'Shit' + heading + ' by average'

        board_embed = discord.Embed(title='Leaderboard')
        board_embed.set_author(
            name='LeaderBOYT',
            url='https://github.com/itsmehemant123/me-discord-leaderboard',
            icon_url=
            'https://photos.hd92.me/images/2018/03/23/martin-shkreli.png')

        user_list = ''
        stat_list = ''

        for ind, memer in enumerate(memers):
            user = self.session.query(User).filter(User.id == memer[0]).first()
            nick = self.session.query(Nickname).filter(
                Nickname.user_id == memer[0],
                Nickname.server_id == db_server.id).first()
            if (nick is None or nick.display_name == ''):
                nickname = user.display_name
            else:
                nickname = nick.display_name
            user_list += str(ind + 1) + ') ' + nickname + '\n'
            if (method == 'number_up'):
                stat_list += str(memer[1]) + ' ' + db_server.rx1 + '\n'
            elif (method == 'number_down'):
                stat_list += str(memer[1]) + ' ' + db_server.rx2 + '\n'
            elif (method == '%_up' or method == '%_down'):
                stat_list += '%.2f' % (
                    memer[1] *
                    100) + '% ' + db_server.rx1 + '/' + db_server.rx2 + '\n'
            elif (method == 'avg_up'):
                stat_list += '%.2f' % (memer[1]) + \
                    ' ' + db_server.rx1 + '\n'
            else:
                stat_list += '%.2f' % (memer[1]) + \
                    ' ' + db_server.rx2 + '\n'

        board_embed.add_field(name=heading, value=user_list, inline=True)
        board_embed.add_field(name='Stats', value=stat_list, inline=True)

        return board_embed

    async def readmeme(self, message):
        logging.info('Processing incoming meme.')
        current_user = message.author
        current_server = message.server

        if (current_server is None or current_user is None):
            logging.info('Missing info. Discarding.')
            return

        db_server = self.session.query(Server).filter(
            Server.discord_id == current_server.id).first()
        if (db_server is None): return
        if (not self.is_correct_channel_and_message(message, db_server)):
            logging.info('Not in selected channel. Discarding.')
            return

        db_user = self.session.query(User).filter(
            User.discord_id == current_user.id).first()
        if (db_user is None):
            db_user = User(current_user.id, current_user.name,
                           current_user.display_name)
            self.session.add(db_user)

        content = self.get_message_content(message)

        self.session.add(
            Message(message.id, db_server, db_user, content, message.timestamp,
                    1, 1))
        self.session.commit()
        logging.info('Wrote new meme.')

    async def add_reaction(self, reaction, user):
        self.update_reactions(reaction, user)
        logging.info('Add reaction.')

    async def remove_reaction(self, reaction, user):
        self.update_reactions(reaction, user)
        logging.info('Remove reaction.')

    async def clear_reaction(self, message, reactions):
        db_message = self.session.query(Message).filter(
            Message.discord_id == str(message.id)).first()
        if (db_message is None):
            return

        db_message.rx1_count = 0
        db_message.rx2_count = 0
        self.session.commit()
        logging.info('Clear reaction.')

    async def check_and_dismiss(self, ctx, is_being_configured=False):
        is_set = self.check_status(ctx.message.server.id, is_being_configured)
        if (not is_set):
            await self.bot.send_message(
                ctx.message.channel,
                'Bot not configured yet. Run `!init` to get started.')

        return is_set

    def update_reactions(self, reaction, user):
        current_user = user
        current_message = reaction.message
        current_server = reaction.message.server

        db_server = self.session.query(Server).filter(
            Server.discord_id == str(current_server.id)).first()
        if (db_server is None):
            # Lol, gotem
            logging.info('Server not found, bot not configured for: ' +
                         str(current_server.id) + ':' + current_server.name)
            return
        if (not self.is_correct_channel_and_message(current_message,
                                                    db_server)):
            return

        db_user = self.session.query(User).filter(
            User.discord_id == str(current_user.id)).first()
        if (db_user is None):
            db_user = User(current_user.id, current_user.name,
                           current_user.display_name)
            self.session.add(db_user)

        db_message = self.session.query(Message).filter(
            Message.discord_id == str(current_message.id)).first()
        if (db_message is None):
            content = self.get_message_content(current_message)
            db_message = Message(str(current_message.id), db_server, db_user,
                                 content, current_message.timestamp, 0, 0)
            self.session.add(db_message)

        if (str(reaction.emoji) == db_server.rx1):
            db_message.rx1_count = reaction.count
        elif (str(reaction.emoji) == db_server.rx2):
            db_message.rx2_count = reaction.count

        self.session.commit()
        logging.info('Updated reactions.')

    def update_nickname(self, before, after):
        if (after.nick is None):
            logging.info('Not a nick update, skipping.')
            return
        db_user = self.session.query(User).filter(
            User.discord_id == before.id).first()
        db_server = self.session.query(Server).filter(
            Server.discord_id == before.server.id).first()

        if (db_server is None):
            logging.info(
                'Update nick attempt on uninitialized server, aborting.')
            return

        if (db_user is None):
            new_user = User(after.id, after.name, after.nick)
            self.session.add(new_user)
            db_user = new_user

        db_nickname = self.session.query(Nickname).filter(
            Nickname.user_id == db_user.id,
            Nickname.server_id == db_server.id).first()
        if (db_nickname is None):
            new_nick = Nickname(db_user, db_server, after.display_name)
            self.session.add(new_nick)
        else:
            db_nickname.display_name = after.display_name

        self.session.commit()
        logging.info('Updated nickname')

    def is_correct_channel_and_message(self, message, server):
        if (not (server.channel == str(message.channel.id))):
            return False

        current_message = self.get_message_content(message)
        if (not (current_message.startswith("http") and "/" in current_message
                 and "." in current_message and " " not in current_message)):
            # stolen from dino
            return False

        return True

    def get_message_content(self, message):
        content = message.content
        if (content == '' or content is None):
            content = '\n'.join([i['url'] for i in message.attachments])

        return content

    def check_status(self, server_id, is_being_configured):
        status = self.session.query(Status).join(Server).filter(
            Server.discord_id == server_id).first()

        if (status is None): return False

        if (is_being_configured): return True
        if (not (status.server_status == 2)):
            return False

        return True

    def is_int(self, val):
        try:
            int(val)
            return True
        except ValueError:
            return False

    def shutdown(self):
        self.session.close()
Exemple #9
0
def update_crawler_timestamp():
    session = Session()
    session.add(CrawlerTimestamp(crawled_at=datetime.now()))
    session.commit()
Exemple #10
0
class GuardianDataCrawler():
    def __init__(self):
        self.session = Session()

    def crawl_data(self):
        print("[START] Crawl Guardian Australian data")
        url = "https://interactive.guim.co.uk/docsdata/1q5gdePANXci8enuiS4oHUJxcxC13d6bjMRSicakychE.json"
        response = requests.get(url)
        data = response.json()

        self.crawl_daily_data(data)
        self.crawl_latest_data(data)
        print("[END] Crawl Guardian Australian data")

    def crawl_latest_data(self, data):
        self.session.query(AustraliaLatest).delete(synchronize_session=False)
        idx = 1
        print("[START] Crawl latest Australia stats")
        for row in data["sheets"]["latest totals"]:
           self.session.add(AustraliaLatest(
               state=row["State or territory"],
               state_name=row["Long name"],
               confirmed=parseEmptyStringToInteger(row["Confirmed cases (cumulative)"]),
               deaths=parseEmptyStringToInteger(row["Deaths"]),
               recovered=parseEmptyStringToInteger(row["Recovered"]),
               active_cases=parseEmptyStringToInteger(row["Active cases"]),
               test_conducted=parseEmptyStringToInteger(row["Tests conducted"]),
               tests_per_million=parseEmptyStringToInteger(row["Tests per million"]),
               percent_positive=row["Percent positive"],
               current_hospitalisation=row["Current hospitalisation"],
               current_icu=row["Current ICU"],
               current_in_ventilator=parseEmptyStringToInteger(row["Current ventilator use"]),
               last_updated=row["Last updated"]
           ))
        self.session.commit()
        print("[END] Crawl latest Australia stats")

    def crawl_daily_data(self, data):
        print("[START] Crawl daily updates data")
        self.session.query(GuardianAustraliaData).delete(synchronize_session=False)
        idx = 1

        for row in data["sheets"]["updates"]:
            self.session.add(GuardianAustraliaData(
                community=parseEmptyStringToInteger(row.get("Community", 0)),
                community_unknown=parseEmptyStringToInteger(row.get("Community - no known source", 0)),
                confirmed=parseEmptyStringToInteger(row.get("Cumulative case count", 0)),
                recovered=parseEmptyStringToInteger(row.get("Recovered (cumulative)", 0)),
                deaths=parseEmptyStringToInteger(row.get("Cumulative deaths", 0)),
                date=parseDateString(row.get("Date")),
                hospitalisation=parseEmptyStringToInteger(row.get("Hospitalisations (count)", 0)),
                intensive_care=parseEmptyStringToInteger(row.get("Intensive care (count)", 0)),
                notes=row.get("Notes", ""),
                under_60=parseEmptyStringToInteger(row.get("Under 60", 0)),
                over_60=parseEmptyStringToInteger(row.get("Over 60", 0)),
                state=row.get("State", ""),
                test_conducted_neg=parseEmptyStringToInteger(row.get("Tests conducted (negative)", 0)),
                test_conducted_tot=parseEmptyStringToInteger(row.get("Tests conducted (total)", 0)),
                travel_related=parseEmptyStringToInteger(row.get("Travel-related", 0)),
                under_investigation=parseEmptyStringToInteger(row.get("Under investigation", 0)),
                update_source=row.get("Update Source", 0),
                ventilator_usage=parseEmptyStringToInteger(row.get("Ventilator usage (count)", 0))
            ))
            idx += 1

        self.session.commit()
        print(f"Successfully insert {idx} rows of Guardian australian data")
        print("[END] Crawl daily updates data")
class DatahubCrawler():
    def __init__(self):
        data_url = 'https://datahub.io/core/covid-19/datapackage.json'

        # to load Data Package into storage
        package = datapackage.Package(data_url)

        # to load only tabular data
        resources = package.resources
        self.time_series_csv = ""
        self.country_aggregate_csv = ""
        self.world_aggregate_csv = ""
        self.session = Session()
        print("Fetching dataset from datahub")
        for resource in resources:
            if resource.tabular:
                if resource.descriptor.get(
                        "name") == "time-series-19-covid-combined":
                    self.time_series_csv = resource.descriptor['path']
                if resource.descriptor.get("name") == "countries-aggregated":
                    self.country_aggregate_csv = resource.descriptor['path']
                if resource.descriptor.get("name") == "worldwide-aggregated":
                    self.world_aggregate_csv = resource.descriptor['path']

    def crawl_data(self):
        self.crawl_time_series_data(self.time_series_csv)
        self.crawl_country_aggregated_data(self.country_aggregate_csv)
        self.crawl_world_aggregated_data(self.world_aggregate_csv)

    def crawl_time_series_data(self, file_url: str):
        idx = 0
        tsc_data = []
        print("[START]Insert time series data")
        print(f"Crawl data using {self.time_series_csv}")
        with requests.get(file_url, stream=True) as tsc:
            lines = (line.decode('utf-8') for line in tsc.iter_lines())
            self.session.query(TimeSeriesData).delete()
            for row in csv.reader(lines):
                if idx > 0 and len(row) > 0:
                    confirmed = (row[5] if row[5] != '' else '0')
                    recovered = (row[6] if row[6] != '' else '0')
                    death = (row[7] if row[7] != '' else '0')
                    tsc_data.append(
                        TimeSeriesData(date=row[0],
                                       country=row[1],
                                       state=row[2],
                                       lat=row[3],
                                       long=row[4],
                                       confirmed=confirmed,
                                       recovered=recovered,
                                       death=death))
                idx += 1
            self.session.add_all(tsc_data)
            self.session.commit()
        print(f"[END]Insert time series data. Success inserting {idx} records")

    def crawl_country_aggregated_data(self, file_url: str):
        ca_data = []
        idx = 0
        print("[START]Insert country aggregated data")
        print(f"Crawl data using {self.country_aggregate_csv}")
        with requests.get(file_url, stream=True) as ca:
            lines = (line.decode('utf-8') for line in ca.iter_lines())
            self.session.query(CountryAggregated).delete()
            for row in csv.reader(lines):
                if idx > 0 and len(row) > 0:
                    confirmed = (row[2] if row[2] != '' else '0')
                    recovered = (row[3] if row[3] != '' else '0')
                    death = (row[4] if row[4] != '' else '0')
                    ca_data.append(
                        CountryAggregated(date=row[0],
                                          country=row[1],
                                          confirmed=confirmed,
                                          recovered=recovered,
                                          death=death))
                idx += 1
            self.session.add_all(ca_data)
            self.session.commit()
        print(
            f"[END]Insert country aggregated data. Success inserting {idx} records"
        )

    def crawl_world_aggregated_data(self, file_url: str):
        wwa_data = []
        idx = 0
        print("[START]Insert world aggregated data")
        print(f"Crawl data using {self.world_aggregate_csv}")
        with requests.get(file_url, stream=True) as wwa:
            lines = (line.decode('utf-8') for line in wwa.iter_lines())
            self.session.query(WorldwideAggregated).delete()
            for row in csv.reader(lines):
                if idx > 0 and len(row) > 0:
                    confirmed = (row[1] if row[1] != '' else '0')
                    recovered = (row[2] if row[2] != '' else '0')
                    death = (row[3] if row[3] != '' else '0')
                    wwa_data.append(
                        WorldwideAggregated(date=row[0],
                                            confirmed=confirmed,
                                            recovered=recovered,
                                            death=death))
                idx += 1
            self.session.add_all(wwa_data)
            self.session.commit()
        print(
            f"[END]Insert world aggregated data. Success inserting {idx} records"
        )
        print("Finish run DataHub crawler")
Exemple #12
0
from models.prediction import prediction
import datetime

Base.metadata.create_all(engine)

session = Session()

praca = poi("praca", 4.124124124, 15.123123123)
avenida = poi("avenida", 4.154152324, 12.124562123)
museu = poi("museu", 4.124124124, 15.123123123)

session.add(praca)
session.add(avenida)
session.add(museu)

session.commit()

camera1 = camera("0.0.0.0", 1)
camera2 = camera("1.1.1.1", 2)
camera3 = camera("2.2.2.2", 3)

session.add(camera1)
session.add(camera2)
session.add(camera3)

session.commit()
'''
prediction1 = prediction("low_density", 1)
prediction2 = prediction("medium_low_density", 2)
prediction3 = prediction("high_density", 3)
'''
def insertPOI(poiname, geolat, geolong):
    session = Session()
    newPOI = poi(poiname, geolat, geolong)
    session.add(newPOI)
    session.commit()
def insertPrediction(predict, id_camara):
    session = Session()
    timestamp = datetime.datetime.now(datetime.timezone.utc)
    newPrediction = prediction(predict, id_camara, timestamp)
    session.add(newPrediction)
    session.commit()
def insertCamera(ip_address, poi_id):
    session = Session()
    newCamera = camera(ip_address, poi_id)
    session.add(newCamera)
    session.commit()