def handle(self):
        self.is_run = True

        req_queue = Queue()
        rcv_queue = Queue()

        config = configparser.ConfigParser()
        config.read("config.ini")

        db_host = config.get("DB", "HOST")
        db_port = int(config.get("DB", "PORT"))
        db_user = config.get("DB", "USER")
        db_pw = config.get("DB", "PASSWORD")
        db_name = config.get("DB", "DB_NAME")
        db_charset = config.get("DB", "CHAR_SET")

        db_handler_thd = DbHandler(req_queue, rcv_queue, db_host, db_port, db_user, db_pw, db_name, db_charset)
        db_handler_thd.start()

        req_thd = threading.Thread(target=self._request_listener, args=(req_queue,))
        req_thd.start()

        rcv_thd = threading.Thread(target=self._receive_listener, args=(rcv_queue,))
        rcv_thd.start()

        req_thd.join()
        rcv_thd.join()

        db_handler_thd.is_run = False
 def __init__(self):
     self.current_user = None
     self.current_stock = None  # Stores the current stock that the user is watching.
     self.stock_dict = {}  # {stock_id: stock_ticker}
     self.current_df = None  # Stores the current dataframe. Changes if the user changes the stock.
     self.stock_change = True  # Checks if the user changes the current stock.
     self.time_change = False  # Checks if the user changes the timeframe.
     self.timeperiod = '1M'  # Changes the graph
     self.dbh = DbHandler()  # Initialises a database handler
    def __init__(self):
        """
        Initializes model instance and gets needed data from instagram
        :return: None
        """
        print('starting session and getting headers ... ', end='')
        self.session = requests.Session()

        self.get_initial_headers()
        print(colored('DONE', 'green'))
        self.db_handler = DbHandler()
Beispiel #4
0
 def __init__(self, db_handler: DbHandler):
     self.db_handler = db_handler
     self.https_proxies = db_handler.select_proxies()
     self.proxy_number = 1
     self.proxy_dict = {
         "https": "https://" + self.https_proxies[self.proxy_number][0]
     }
 def run(self):
     while True:
         notes = DbHandler.get_note_by_current_time()
         if len(notes) > 0:
             for item in notes:
                 text = ""
                 text = text + ("Дата напоминания: " + str(item[1]) + "\n" +
                                "Содержимое: " + str(item[3]) + "\n\n")
                 vk.method(
                     'messages.send', {
                         'user_id': item[4],
                         'message': text,
                         'random_id': random.getrandbits(64)
                     })
                 DbHandler.delete_note(item[0])
         time.sleep(0.01)
Beispiel #6
0
 def new_message(self, message):
     DbHandler.add_user(self._USER_ID, self._USERNAME)
     if message.upper() == self._COMMANDS[0]:
         self.send_message(
             f"Привет-привет, {self._USERNAME}! \n " +
             f"Список команд: \n\n" +
             f"ПРИВЕТ - получить список команд \n\n" +
             f"ЗАПОМНИ - создать напоминание. Например, запомни/Вынести мусор/2020-12-20 10:00/ \n\n"
             +
             f"ПОКАЖИ - показать список всех напоминаний. Если ввести ПОКАЖИ, то выведутся"
             +
             f" все заметки за все время, если ввести ПОКАЖИ/2020-10-10/, "
             f"то выведутся все заметки на эту дату \n\n" +
             f"ПОВЕСЕЛИ - получить случайный мем")
         # f"ХОЧУ - получить список доступных жанров мемов \n\n" +
         # f"МНЕ НРАВИТСЯ - задать свои предпочтения в мемах \n\n" +
     elif str(message).upper().startswith(self._COMMANDS[1]):
         command = str(message).split('/')
         DbHandler.add_note(self._USER_ID, command[1], command[2])
         self.send_message("Готово, бро!")
     elif str(message).upper().startswith(self._COMMANDS[2]):
         command = str(message).split('/')
         if len(command) == 1:
             self.send_message(DbHandler.get_notes_by_user_id(
                 self._USER_ID))
         else:
             self.send_message(
                 DbHandler.get_notes_by_user_id(self._USER_ID, command[1]))
     elif message.upper() == self._COMMANDS[3]:
         self.send_attachment(DbHandler.get_random_image())
     else:
         self.send_message("Неизвестная команда")
Beispiel #7
0
 def setUp(self):
     # create a connection to test database
     self.db = DbHandler()
     # create a test client
     self.app = app.test_client()
     # propagate exceptions to the test client
     self.app.testing = True
     # default pre-configured user
     self.user_id = "aaa10022-38b0-4a1a-95af-776f35aa2b8f"
class MainWindow(QWidget):
    def __init__(self):
        super().__init__()

        self.db = DbHandler()
        self.initUI()

    def initUI(self):

        grid = QGridLayout()
        grid.setSpacing(1)

        self.balance = QLabel(f'Balance: {self.db.get_balance()}')
        grid.addWidget(self.balance, 0, 0)

        self.expense_table = QTableWidget(self)
        self.load_db_data()
        grid.addWidget(self.expense_table, 1, 0)

        refresh_expenses_btn = QPushButton('Refresh Expenses', self)
        refresh_expenses_btn.clicked.connect(self.load_db_data)
        grid.addWidget(refresh_expenses_btn, 2, 0)

        add_expense_btn = QPushButton('Add Expense', self)
        add_expense_btn.clicked.connect(self.add_expense_window)
        grid.addWidget(add_expense_btn, 3, 0)

        self.setLayout(grid)

        self.setGeometry(300, 300, 600, 500)
        self.setWindowTitle('Review')
        self.show()

    def load_db_data(self):

        self.balance.setText(f'Balance: {self.db.get_balance()}')

        self.expense_table.setRowCount(0)
        self.expense_table.setRowCount(5)

        self.expense_table.setColumnCount(0)
        self.expense_table.setColumnCount(5)

        self.expense_table.setHorizontalHeaderLabels(
            ['Amount', 'Description', 'Category', 'Date', 'Balance'])

        expenses = self.db.get_all_expenses()

        for row_idx, row_data in enumerate(expenses):
            self.expense_table.insertRow(row_idx)
            for column_idx, data in enumerate(row_data):
                if column_idx > 0:
                    self.expense_table.setItem(row_idx, column_idx - 1,
                                               QTableWidgetItem(str(data)))

    def add_expense_window(self):
        self.add_expense_popup = AddExpensePopup()
Beispiel #9
0
 def do_POST(self):
     content_length = int(self.headers['Content-Length'])
     post_data = self.rfile.read(content_length).decode('UTF-8')
     if not PostHandler.check_validity(post_data):
         self.set_response(405, "message", "Wrong input")
     else:
         values = PostHandler.unpack_values(post_data)
         secret_data = DbHandler.post_secret(values["secret"],
                                             values["expireAfter"],
                                             values["expireAfterViews"])
         self.set_response(200, "data", secret_data)
Beispiel #10
0
async def test_containing_db(loop):
    async def write_data_to_db(record, recid, tags, db=None):

        await db.insert_to_table('records', record)
        for tag in tags:
            try:
                await db.insert_to_table('tags', {'tagname': tag},
                                         ignore_conflict=[
                                             'tagname',
                                         ])
            except UniqueViolationError:
                pass
            try:
                await db.insert_to_table('records_tags', {
                    'tagname': tag,
                    'recordid': recid
                })
            except UniqueViolationError:
                pass

    aio_jikan = AioJikan(loop=loop)

    cfg = Config()
    pg_pool = await asyncpg.create_pool(cfg.DB_ADDRESS)
    db = DbHandler(pg_pool=pg_pool, cfg=cfg)

    year_2000 = 2000
    seasons = ['winter', 'spring', 'summer', 'fall']

    for year in range(2015, 2019):
        for season in seasons:

            print(f'[+] reading {season} in {year}')

            season_year = await aio_jikan.season(year=year, season=season)

            for item in season_year['anime']:
                title = item['title']
                title_tag = ('_'.join(re.findall(r'\W?(\w+)\W?',
                                                 title))).lower()
                recid = recid_gen()
                record = {
                    'recordid': recid,
                    'username': '******',
                    'record_headline': title,
                    'record_text': f'{title} ({season} {year})'
                }
                tags = [title_tag, str(year), season]
                await write_data_to_db(record, recid, tags, db=db)

    await aio_jikan.close()
    await pg_pool.close()
Beispiel #11
0
def main():
    conf = read_config()
    path_watchers = []
    global db
    db = DbHandler()
    for c in conf['sync']:
        ssh_clients.append(
            SshClient(c['server'], c['dirs']['to'], c['dirs']['from'],
                      c['key_file']))
        path_watchers.append(
            PathWatcher(c['dirs']['from'], ssh_clients[-1], db))
    signal.signal(signal.SIGINT, clean_up)
    signal.signal(signal.SIGTERM, clean_up)
Beispiel #12
0
 def do_GET(self):
     # ignoring request for icon while using browser
     if "favicon.ico" not in self.path:
         path = self.path
         if "/v1/secret/" in path:
             requested_hash = path.lstrip("/v1/secret/")
             secret = DbHandler.get_secret(requested_hash)
             if not secret:
                 self.set_response(404, "message", "Secret not found")
             else:
                 self.set_response(200, "data", secret)
         else:
             self.set_response(405, "message",
                               "You've reached wrong endpoint.")
    def __init__(self):
        self.db = DbHandler()
        self.config = configurator.get_config()
        self.args = self.parse_arguments()
        self.initialize_logger()

        _id = self.get_id()
        self.r = praw.Reddit(
            user_agent="PrawWallpaperDownloader 1.0.0 by /u/Pusillus",
            client_id=_id["id"],
            client_secret=_id["secret"])

        self.n_posts = 0
        self.albums = 0
        self.succeeded = 0
        self.failed = 0
        self.skipped = 0
        self.deleted = 0

        self.notify = False

        self.posts = []
        self.que = queue.Queue()
        self.downloaded_images = []
class Session:
    """Stores all of the current session variables"""
    def __init__(self):
        self.current_user = None
        self.current_stock = None  # Stores the current stock that the user is watching.
        self.stock_dict = {}  # {stock_id: stock_ticker}
        self.current_df = None  # Stores the current dataframe. Changes if the user changes the stock.
        self.stock_change = True  # Checks if the user changes the current stock.
        self.time_change = False  # Checks if the user changes the timeframe.
        self.timeperiod = '1M'  # Changes the graph
        self.dbh = DbHandler()  # Initialises a database handler

    def update(self):
        self.stock_dict = self.dbh.get_user_stocks(self)

    def current_stock_ticker(self):
        """Returns the current stock ticker"""
        try:
            return self.stock_dict[self.current_stock]
        except AttributeError:
            return None
    def __init__(self):
        self.db = DbHandler()
        self.config = configurator.get_config()
        self.args = self.parse_arguments()
        self.initialize_logger()

        _id = self.get_id()
        self.r = praw.Reddit(user_agent="PrawWallpaperDownloader 1.0.0 by /u/Pusillus", client_id=_id["id"], client_secret=_id["secret"])

        self.n_posts = 0
        self.albums = 0
        self.succeeded = 0
        self.failed = 0
        self.skipped = 0
        self.deleted = 0

        self.notify = False

        self.posts = []
        self.que = queue.Queue()
        self.downloaded_images = []
class DbHandlerTests(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        pass

    @classmethod
    def tearDownClass(cls):
        pass

    def setUp(self):
        # create a connection to test database
        self.db = DbHandler()
        # default pre-configured user
        self.user_id = "aaa10022-38b0-4a1a-95af-776f35aa2b8f"

    def tearDown(self):
        pass

    def test_get_participant(self):
        participant = self.db.get_participant(self.user_id)
        self.assertNotEqual(participant, None)
        self.assertEqual(participant[1], self.user_id)
    def __init__(self):
        QWidget.__init__(self)

        self.db = DbHandler()
        self.initUI()
    def __init__(self):
        super().__init__()

        self.db = DbHandler()
        self.initUI()
class AddExpensePopup(QWidget):
    def __init__(self):
        QWidget.__init__(self)

        self.db = DbHandler()
        self.initUI()

    def initUI(self):

        grid = QGridLayout()
        grid.setSpacing(1)

        amount_title = QLabel('Amount  ')
        grid.addWidget(amount_title, 0, 0)
        self.amount = QLineEdit(self)
        grid.addWidget(self.amount, 0, 1)

        description_title = QLabel('Description  ')
        grid.addWidget(description_title, 1, 0)
        self.description = QLineEdit(self)
        grid.addWidget(self.description, 1, 1)

        date_title = QLabel('Date (yyyy-mm-dd)  ')
        grid.addWidget(date_title, 2, 0)
        self.date = QLineEdit(self)
        self.date.setPlaceholderText(str(datetime.date.today()))
        grid.addWidget(self.date, 2, 1)

        category_title = QLabel('Category  ')
        grid.addWidget(category_title, 3, 0)
        self.category = QComboBox(self)
        self.category.addItem("food and drink")
        self.category.addItem("entertainment")
        self.category.addItem("self edification")
        self.category.addItem("video game")
        self.category.addItem("travel")
        self.category.addItem("bills")
        self.category.addItem("other")
        grid.addWidget(self.category, 3, 1)

        self.submit_button = QPushButton('Add Expense', self)
        self.submit_button.clicked.connect(self.add_expense)
        grid.addWidget(self.submit_button, 4, 2)

        self.setLayout(grid)
        self.setGeometry(300, 300, 500, 200)
        self.setWindowTitle('Add Expense')
        self.amount.setFocus()
        self.show()

    def add_expense(self):
        amount = self.amount.text()
        description = self.description.text()
        date = self.get_date()
        category = str(self.category.currentText())

        if self.are_valid_entries(amount, date):
            self.db.add_expense(amount, description, category, date)
            self.reset_add_expense_widgets()
        else:
            # ADD POPUP WARNING
            print("INVALID ENTRIES")

    def get_date(self):
        if self.date.text() == "":
            return str(datetime.date.today())
        return self.date.text()

    def print_expense_data(self, amount, description, category, date):
        print(f"amount: {amount}")
        print(f"description: {description}")
        print(f"date: {date}")
        print(f"category: {category}")

    def are_valid_entries(self, amount, date):
        try:
            float(amount)
        except ValueError:
            print("INVALID AMOUNT")
            return False

        try:
            datetime.datetime.strptime(date, "%Y-%m-%d")
        except ValueError:
            print("INVALID DATE")
            return False

        return True

    def reset_add_expense_widgets(self):
        """ Resets all widgets in AddExpensePopup window so another expense can be added."""
        self.amount.setText("")
        self.description.setText("")
        self.date.setText(str(datetime.date.today()))
Beispiel #20
0
import time
from flask import Flask
from flask import request
from db_handler import DbHandler
from flask_cors import CORS

app = Flask(__name__)
db_handler = DbHandler()
CORS(app)


@app.route("/")
def hello_world():
    return "<p>Hello, World!</p>"


@app.route("/subscribe", methods=['POST'])
def subscribe():
    global db_handler
    print(request.json)
    email = request.json['email']
    application = request.json['application']
    print(email)
    print(application)
    before = time.time()
    db_handler.subscribe(email, application)
    after = time.time()
    return f"<p>{(after-before)*1000}ms</p>"


@app.route("/unsubscribe", methods=['POST'])
Beispiel #21
0
from crawler import Spider
from db_handler import DbHandler
import scrapy
from scrapy.crawler import CrawlerProcess

# input_handler = XlsHandler('./hubspot_list.xlsx','Meta Data')

db = DbHandler()

process = CrawlerProcess(
    {'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'})

temp_url_list = db.getUrls()
domain_list = db.getDomains()
url_list = []

for item in temp_url_list:
    if item != None:
        for url in item.split(";"):
            url_list.append("https://" + url)

print(domain_list)
print(url_list)


class MySpider(Spider):
    allowed_domains = domain_list
    start_urls = url_list


process.crawl(MySpider)
 def setUp(self):
     # create a connection to test database
     self.db = DbHandler()
     # default pre-configured user
     self.user_id = "aaa10022-38b0-4a1a-95af-776f35aa2b8f"
class Model:
    def __init__(self):
        """
        Initializes model instance and gets needed data from instagram
        :return: None
        """
        print('starting session and getting headers ... ', end='')
        self.session = requests.Session()

        self.get_initial_headers()
        print(colored('DONE', 'green'))
        self.db_handler = DbHandler()

    def get_initial_headers(self):
        """
        Sends to instagram initial requests to get needed http headers to session
        :return: None
        """
        q = 'https://www.instagram.com/accounts/login/ajax/'
        res = self.session.get(q)

        auth_headers = {
            'referer': 'https://www.instagram.com/accounts/login',
            'X-CSRFToken': res.cookies['csrftoken']
        }
        auth = {
            'username': '******',
            'enc_password':
            '******',
            'optIntoOneTap': 'false'
        }
        res = self.session.post(q, data=auth, headers=auth_headers)
        self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

    def specify_needed_headers_to_session(self):
        """
        Specifies all needed http headers to session
        :return: None
        """
        self.session.headers[
            'User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0'
        self.session.headers['Accept'] = '*/*'
        self.session.headers['Accept-Language'] = 'en-US,en;q=0.5'
        self.session.headers['Accept-Encoding'] = 'gzip, deflate, br'
        self.session.headers[
            'X-IG-WWW-Claim'] = 'hmac.AR3Kny3HF-Th32yVAevSRIsFGdIwF_BK--Utv75fBqFu_g5b'
        self.session.headers['X-IG-App-ID'] = '936619743392459'
        self.session.headers['X-Requested-With'] = 'XMLHttpRequest'
        self.session.headers['DNT'] = '1'
        self.session.headers['Connection'] = 'keep-alive'
        self.session.headers['Pragma'] = 'no-cache'
        self.session.headers['Cache-Control'] = 'no-cache'
        self.session.headers['Cookie'] = [
            f'{x.name}={x.value}; ' for x in self.session.cookies
        ].__str__().replace('[', '').replace(']',
                                             '').replace(',',
                                                         '').replace('\'', '')

    def get_overall_account_information(self, username):
        """
        Downloads overall information abount account, like user_id,
        count of followers, count of following accounts
        :param username: client instagram username
        :return: tuple with (user_id, followers_count, following_count) if account exists,
            and (None, None, None) otherwise
        """

        user_id = None
        followers_info = None
        following_info = None

        self.session.headers[
            'Referer'] = f'https://www.instagram.com/{username}/'

        overall_account_info_url = f'https://www.instagram.com/{username}/?__a=1'
        res = self.session.get(overall_account_info_url,
                               headers=self.session.headers)
        self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

        if res.status_code == 200:
            res = res.json()
        else:
            return user_id, followers_info, following_info

        try:
            user_id = res['graphql']['user']['id']
            followers_info = res['graphql']['user']['edge_followed_by'][
                'count']
            following_info = res['graphql']['user']['edge_follow']['count']

        except KeyError as kerr:
            pass

        return user_id, followers_info, following_info

    def get_unfollowers(self, user_id, username, download=True):
        """
        Gets list of accounts who doesn't follow client
        :param user_id: user instagram id
        :param username: instagram username
        :param download: flag which says if its need to download data from instagram, or get from dump
        :return: list of tuples with accounts who doesn't follow user in format (user_id, username)
        """
        not_following_back_wo_whitelist = []  # wo - without
        whitelist = self.get_user_whitelist(user_id)

        not_following_back = self.get_full_list_of_unfollowers(
            user_id=user_id, username=username, download=download)

        if whitelist.__len__() > 0:
            not_following_back_wo_whitelist = self.get_not_following_back_wo_whitelist(
                not_following_back, whitelist)
        else:
            not_following_back_wo_whitelist = not_following_back

        return not_following_back_wo_whitelist

    def get_full_list_of_unfollowers(self, user_id, username, download=True):
        """
        Returns full list of accounts who doesn't follow back user
        :param user_id: user's instagram id
        :param username: user's instagram username
        :param download: boolean flag, indicates data need to be downloaded, or got from dump
        :return: full list of accounts eho doesn't follow user back
        """
        not_following_back = []
        if download:
            print('\n')
            print('-' * 30)

            followers = self.download_followers_list(user_id, username)
            following = self.download_accounts_user_following_list(
                user_id, username)

            print(f'followers: {followers.__len__()}')
            print(f'following: {following.__len__()}')

            self.dump_followers(user_id, followers, following)

            print('calculating rats ... ', end='')

            for followee in following:
                if followee not in followers:
                    not_following_back.append(followee)

            print(colored('DONE', 'green'))
            self.dump_full_not_following_back_list(user_id, not_following_back)
        else:
            with open(
                    f'{config.WORKING_DIR.replace("scripts", "")}client_data/{user_id}/{user_id}_not_following_back_full.pickle',
                    'rb') as f:
                not_following_back = pickle.load(f)

        return not_following_back

    def get_user_whitelist(self, client_instagram_id):
        """
        Returns list with instagram account ids which was added by user
        for specified account as whitelist
        :param client_instagram_id: instagram account id
        :return: list with instagram ids, 'whitelist'
        """
        whitelist = self.db_handler.get_whitelist_for_instagram_id(
            client_instagram_id)

        return whitelist

    def get_not_following_back_wo_whitelist(self, not_following_back,
                                            whitelist):
        """
        Deletes from list of accounts who doesn't follow back those, who r in whitelist
        :param not_following_back: list of accounts who doesn't follow back
        :param whitelist: list of user's whitelist
        :return: list of accounts who doesn't follow, with whitelist excluded
        """
        not_following_back_wo_whitelist = []

        for account in not_following_back:
            if account[0] not in [acc[0].__str__() for acc in whitelist]:
                not_following_back_wo_whitelist.append(account)

        return not_following_back_wo_whitelist

    def download_followers_list(self, user_id, username):
        """
        Downloading from instagram list of client's followers
        :param user_id: client instagram id
        :param username: client username
        :return: list with tuples of client's followers i.e. (instagram_id, username)
        """
        followers = []
        end_cursor = ''
        has_next_page = True

        self.session.headers[
            'Referer'] = f'https://www.instagram.com/{username}/followers'

        first_page_url = f'https://www.instagram.com/graphql/query/?query_hash=c76146de99bb02f6415203be841dd25a&' \
                     f'variables=%7B%22id%22%3A%22{user_id}%22%2C%22include_reel%22%3Atrue%2C%22' \
                     f'fetch_mutual%22%3Atrue%2C%22first%22%3A24%7D'

        res = self.session.get(first_page_url, headers=self.session.headers)
        self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

        print('getting followers ... ', end='')
        while has_next_page:
            try:
                has_next_page = bool(
                    res.json()['data']['user']['edge_followed_by']['page_info']
                    ['has_next_page'])
            except KeyError as kerr:
                pass
            try:
                for node in res.json(
                )['data']['user']['edge_followed_by']['edges']:
                    followers.append(
                        (node['node']['id'], node['node']['username']))
            except Exception as err:
                print()

            if has_next_page:
                end_cursor = res.json()['data']['user']['edge_followed_by'][
                    'page_info']['end_cursor'].replace('==', '')
                next_page = f'https://www.instagram.com/graphql/query/?query_hash=c76146de99bb02f6415203be841dd25a' \
                            f'&variables=%7B%22id%22%3A%22{user_id}%22%2C%22' \
                            f'include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A14%2C%22' \
                            f'after%22%3A%22{end_cursor}%3D%3D%22%7D'

                # time.sleep(randint(2,5))    # delay, prevents instagram ban
                res = self.session.get(next_page, headers=self.session.headers)
                self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

            else:
                print(colored('DONE', 'green'))

        return followers

    def download_accounts_user_following_list(self, user_id, username):
        """
        Downloading from instagram list of users which client follows
        :param user_id: client instagram id
        :param username: client username
        :return: list with tuples of following users by client i.e. (instagram_id, username)
        """
        following = []
        end_cursor = ''
        has_next_page = True

        first_page = f'https://www.instagram.com/graphql/query/?query_hash=d04b0a864b4b54837c0d870b0e77e076&' \
                     f'variables=%7B%22id%22%3A%22{user_id}%22%2C%22include_reel%22%3Atrue%2C%22' \
                     f'fetch_mutual%22%3Afalse%2C%22first%22%3A24%7D'

        res = self.session.get(first_page, headers=self.session.headers)
        self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

        print('getting following ... ', end='')
        while has_next_page:
            try:
                has_next_page = bool(res.json()['data']['user']['edge_follow']
                                     ['page_info']['has_next_page'])
            except KeyError as kerr:
                pass

            for node in res.json()['data']['user']['edge_follow']['edges']:
                following.append(
                    (node['node']['id'], node['node']['username']))

            if has_next_page:
                end_cursor = res.json()['data']['user']['edge_follow'][
                    'page_info']['end_cursor'].replace('==', '')
                next_page = f'https://www.instagram.com/graphql/query/?query_hash=d04b0a864b4b54837c0d870b0e77e076' \
                            f'&variables=%7B%22id%22%3A%22{user_id}%22%2C%22include_reel%22%3Atrue%2C%22' \
                            f'fetch_mutual%22%3Afalse%2C%22first%22%3A12%2C%22after%22%3A%22{end_cursor}%3D%3D%22%7D'
                # time.sleep(randint(2, 5))  # delay, prevents instagram ban
                res = self.session.get(next_page, headers=self.session.headers)
                self.session.headers['X-CSRFToken'] = res.cookies['csrftoken']

            else:
                print(colored('DONE', 'green'))

        return following

    def dump_followers(self, user_id, followers, following):
        """
        Dumps to disk lists with followers and following people using pickle module,
         to directory with user instagram id, which located at /client_data dir.
        :param user_id: instagram user id of client
        :param followers: list with tuples of (follower_id, follower_nickname)
        :param following: list with tuples of (following_id, following_nickname)
        :return: None
        """
        if not os.path.exists(
                config.WORKING_DIR.replace('scripts',
                                           f'client_data/{user_id}')):
            try:
                os.mkdir(
                    config.WORKING_DIR.replace('scripts',
                                               f'client_data/{user_id}'))
            except OSError as oserr:
                print(colored(oserr, 'red'))

        with open(
                config.WORKING_DIR.replace(
                    "scripts",
                    f"/client_data/{user_id}/{user_id}_followers.pickle"),
                'wb') as f:
            pickle.dump(followers, f)
        with open(
                config.WORKING_DIR.replace(
                    "scripts",
                    f"/client_data/{user_id}/{user_id}_followed.pickle"),
                'wb') as f:
            pickle.dump(following, f)

    def dump_full_not_following_back_list(self, user_id, not_following_back):
        """
        Dumps to disk list of people who doesn't follow back user with pickle module
        :param user_id: user instagram id
        :param not_following_back: list with tuples of (user_id, username)
        :return: None
        """
        path = config.WORKING_DIR.replace(
            'scripts',
            f'client_data/{user_id}/{user_id}_not_following_back_full.pickle')
        with open(path, 'wb') as f:
            pickle.dump(not_following_back, f)

    def dump_not_following_without_whitelist(self, user_id,
                                             not_following_back_wo_whitelist):
        """
        Dumps data about accounts who doesb't follow user back to pickle file,
        with whitelist excluded
        :param user_id: user instagram id
        :param not_following_back_wo_whitelist: list with users who doesn't follow user back
        :return: None
        """
        path = f'{config.WORKING_DIR.replace("scripts", "")}client_data/{user_id}/{user_id}' \
               f'_not_following_back_without_whitelist.pickle'
        with open(path, 'wb') as f:
            pickle.dump(not_following_back_wo_whitelist, f)

    def add_account_to_whitelist(self, client_id, client_telegram_id,
                                 account_id):
        """
        Calls db handler's method which adds account to whitelist
        :param client_id: client instagram id
        :param client_telegram_id:client telegram id
        :param account_id: instagram id of account which need 2 b added to whitelist
        :return:None
        """
        self.db_handler.add_account_to_whitelist(client_id, client_telegram_id,
                                                 account_id)
class Scraper:
    """
    The scraper, pretty much does all the hard work. 
    When the Scraper is initialized it wil parse commandline arguments.
    utilizes DbHandler.py,´and configurator.py
    """
    def __init__(self):
        self.db = DbHandler()
        self.config = configurator.get_config()
        self.args = self.parse_arguments()
        self.initialize_logger()

        _id = self.get_id()
        self.r = praw.Reddit(
            user_agent="PrawWallpaperDownloader 1.0.0 by /u/Pusillus",
            client_id=_id["id"],
            client_secret=_id["secret"])

        self.n_posts = 0
        self.albums = 0
        self.succeeded = 0
        self.failed = 0
        self.skipped = 0
        self.deleted = 0

        self.notify = False

        self.posts = []
        self.que = queue.Queue()
        self.downloaded_images = []

    @staticmethod
    def get_id():
        if os.path.isfile('client_secret.json'):
            with open('client_secret.json', 'r') as id_file:
                return json.loads("".join(id_file.readlines()))
        else:
            logging.error('Client_secret.json not found, exiting')
            sys.exit('Unable to locate client_secret.json.\n'
                     'Please have a look at README.md '
                     'and follow the instructions')

    def parse_arguments(self):
        """Parse arguments from commandline"""
        parser = argparse.ArgumentParser()
        parser.add_argument("-s",
                            "--subreddit",
                            help="specify subreddit to scrape",
                            default=self.config['Sub'])
        parser.add_argument(
            "-se",
            "--section",
            help=
            "specify section of subreddit to scrape (hot, top, rising or new)",
            default=self.config['section'])
        parser.add_argument("-l",
                            "--limit",
                            help="set amount of posts to sift through "
                            "(default " + self.config['Limit'] + ")",
                            default=int(self.config['Limit']),
                            type=int)
        parser.add_argument(
            "--log",
            help="save a log of wallpapers downloaded/skipped/failed",
            action="store_true",
            default=False)
        parser.add_argument(
            "-re",
            "--redownload",
            help="attempt to download all the links in the database",
            action="store_true",
            default=False)
        parser.add_argument("-v",
                            "--verbose",
                            help="increase output detail",
                            action="store_true",
                            default=False)
        parser.add_argument('-nc',
                            "--noclean",
                            help="Skip cleaning off small images (Cleaning: " +
                            self.config['Clean'] + ")",
                            action="store_true",
                            default=not self.config.getboolean('Clean'))
        parser.add_argument(
            '-ns',
            '--nosort',
            help="Skip sorting out previously downloaded images (Sorting: {})".
            format(self.config['sort']),
            action="store_true",
            default=not self.config.getboolean('Sort'))
        parser.add_argument('-na',
                            '--noalbum',
                            help='Skip imgur albums',
                            action='store_true',
                            default=not self.config.getboolean('Albums'))
        parser.add_argument('-t',
                            '--threads',
                            help='Amount of threads for downloading images',
                            default=int(self.config['Threads']),
                            type=int)
        parser.add_argument('-con',
                            '--configure',
                            help="Change settings",
                            action='store_true',
                            default=False)
        parser.add_argument(
            '-rlock',
            '--ratiolock',
            help="Sort out images with incorrect aspect ratio, 0 for no lock, "
            "1 for full lock (Ratio lock: {})".format(
                self.config['ratiolock']),
            default=float(self.config['ratiolock']),
            type=float)
        parser.add_argument('-q',
                            '--search',
                            help="Scrape by search term",
                            default=False,
                            type=str)
        args = parser.parse_args()
        if args.ratiolock < 0 or args.ratiolock > 1:
            sys.exit(
                "Incorrect ratio lock, please keep it between 0.0 and 1.0 (Currently {})"
                .format(args.ratiolock))
        return args

    def initialize_logger(self):
        handlers = []
        if self.args.log:
            # Windows default encoding for text files isn't UTF-8 (it's ANSI afaik)
            # So we need to create a custom FileHandler which opens the text file in UTF-8
            file_handler = logging.FileHandler(filename='papers.log',
                                               mode='w',
                                               encoding="utf8")
            handlers.append(file_handler)
        if self.args.verbose:
            # Create stream handler pointing to stdout (terminal) and add it to handlers.
            stream_handler = logging.StreamHandler(stream=sys.stdout)
            handlers.append(stream_handler)
        elif not self.args.log:
            handlers.append(
                logging.StreamHandler(
                    stream=open(os.devnull, 'w', encoding="utf-8")))

        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s',
                            datefmt='%d/%m/%y %H:%M:%S:',
                            handlers=handlers)

        logging.info('Logger started')
        settings = "Arguments:\n"
        for key, val in zip(vars(self.args).keys(), vars(self.args).values()):
            settings += "{}: {}\n".format(key, val)
        logging.info(settings)

    def get_submissions(self, subreddit):
        """
        Get submissions from reddit
        Takes a subreddit object from PRAW as argument
        Returns list of PRAW submission objects
        """
        section = self.args.section.lower().strip()
        limit = self.args.limit
        if self.args.search:
            return subreddit.search(self.args.search)
        elif section == "top":
            return subreddit.top(limit=limit)
        elif section == "new":
            return subreddit.new(limit=limit)
        elif section == "rising":
            return subreddit.rising(limit=limit)
        else:
            if section != "hot":
                logging.warning("Unknown section, defaulting to hot")
                print("Unknown section, defaulting to hot")
            return subreddit.hot(limit=limit)

    def extract_submission_data(self, submission):
        """
        Exctract direct image links, and relevant data from a PRAW submission
        object
        Takes a PRAW submission object as arguments and appends a dictionary
        in the following format to self.posts:

        {"url": image-link,
         "title": submission title,
         "author": author of the submission (reddit user name),
         "parent_id": None (only used for images in albums)}

        If the submission link to an album this function will instead return
        an album dictionary for further processing later in the process
        album dictionary format:
        {"url": link to imgur album,
         "title": submission title,
         "author": submission author (reddit username}
        """
        url = submission.url
        # Check for author
        if not submission.author:
            author = '[User Deleted]'
        else:
            author = str(submission.author)

        # Direct jpg and png links
        if url.endswith(".jpg") or url.endswith(".png"):
            context = {
                "url": url,
                "title": submission.title,
                "author": author,
                "parent_id": None
            }
            self.posts.append(context)

        # Imgur support
        elif ("imgur.com" in url) and ("/a/" not in url):
            if url.endswith("/new"):
                url = url.rsplit("/", 1)[0]
            id = url.rsplit("/", 1)[1].rsplit(".", 1)[0]
            link = "http://i.imgur.com/" + id + ".jpg"
            context = {
                "url": link,
                "title": submission.title,
                "author": author,
                "parent_id": None
            }
            self.posts.append(context)

        # Album support
        elif ("imgur.com" in url) and ("/a/" in url):
            album_context = {
                "url": url,
                "title": submission.title,
                "author": author
            }
            return album_context

    def handle_submissions(self, submissions):
        """Get and sort posts from reddit"""
        albums = []  # Array to hold all the album elements for later.
        for submission in submissions:
            album = self.extract_submission_data(submission)
            if album:
                albums.append(album)

        # Extract all image links from the imgur albums
        if not self.args.noalbum:
            self.handle_albums(albums)

        # Save amount of valid images
        self.n_posts = len(self.posts)

        # Sort out previously downloaded images
        if not self.args.nosort:
            if int(self.config["MaxAge"]) == 0:
                self.posts = self.db.sort_links(self.posts)
            else:
                self.posts = self.db.sort_links(
                    self.posts, age_limit=self.config["MaxAge"])
            self.skipped = self.n_posts - len(self.posts)

    def handle_albums(self, albums):
        """Extract all links from a list of imgur albums"""
        logging.info('Extracting albums...')
        albums = self.db.sort_albums(albums)
        n_albums = len(albums)

        for _id, album in enumerate(albums):
            print("\rHandling album: {}/{}".format(_id + 1, n_albums), end='')
            logging.info('Handling album {}/{}'.format(_id + 1, n_albums))
            # Download imgur album
            res = requests.get(album["url"])
            try:
                res.raise_for_status()
            except Exception as exc:
                self.handle_error(exc, album)
                continue

            # Parse through the html fetching all link elements
            soup = bs4.BeautifulSoup(res.text, 'html.parser')
            link_elements = soup.select('a.zoom')

            # Insert link to get id
            album['length'] = len(link_elements)
            album_id = self.db.insert_album(album)

            if len(link_elements) > 0:
                for a_id, ele in enumerate(link_elements):
                    # Put the data in context for later
                    context = {
                        "url": "http:" + ele.get('href'),
                        "title": album["title"],
                        "parent_id": album_id,
                        "id": a_id,
                        "author": album["author"]
                    }
                    self.posts.append(context)
            self.albums += 1
        print()  # Add missing newline from printing album nr

    def handle_error(self, err, post):
        """Handles error stats and prints a message if verbose is enabled"""
        self.failed += 1
        logging.error('Error occurred at:{} {}: {}'.format(
            post["title"], type(err), str(err)))

    def grab_image(self, download_folder, bar):
        """
        Worker function for downloading images, keeps pulling a new link
        from image que, downloads it, and saves it, untill the que is empty.
        Takes 2 arguments:
        download_folder: Path to desired save folder
        bar: PyCLIBar to be stepped after image has been downloaded
        """
        while True:
            try:
                submission = self.que.get(block=False)
            except queue.Empty:
                logging.info('Download thread done... Stopping')
                return

            # Try to download image
            logging.info('Downloading image {}'.format(submission["title"]))
            try:
                response = requests.get(submission["url"], timeout=10)
                response.raise_for_status()
            except Exception as exc:
                self.handle_error(exc, submission)

            # Try to determine file format from headers fall back to trying to
            # determine it from url if content-type is missing.
            # Most modern operating systems will open the file regardless
            # of format suffix
            try:
                type_header = response.headers['Content-Type'].split('/')
                if type_header[0] == 'image':
                    content_type = type_header[1]
                else:
                    # Sometimes the content-type is incorrect
                    # try to guess it from URL
                    if submission["url"].endswith('.png'):
                        content_type = "png"
                    else:
                        content_type = "jpg"
            except KeyError and IndexError:
                # Missing content-type header, guess from link
                if submission["url"].endswith('.png'):
                    content_type = "png"
                else:
                    content_type = "jpg"

            # content-headers describe .jpg images with jpeg
            if content_type == 'jpeg':
                image_format = '.jpg'
            else:
                image_format = '.' + content_type

            # If there's an id key in the submission it's from an album and
            # should be suffixed with it's position within that album
            if 'id' in submission:
                file_path = os.path.join(
                    download_folder,
                    re.sub(r'[\\/:*?"<>|]', '', submission["title"][:25]) +
                    '_' + str(submission['id'] + 1) + image_format)
            else:
                file_path = os.path.join(
                    download_folder,
                    re.sub(r'[\\/:*?"<>|]', '', submission["title"][:25]) +
                    image_format)
            # Try to save the image to disk
            try:
                with open(file_path, 'wb') as image:
                    for chunk in response.iter_content(4096):
                        image.write(chunk)
                self.succeeded += 1
                self.downloaded_images.append(file_path)
                bar.step()
            except Exception as exc:
                self.handle_error(exc, submission)

    def update_screen(self, bar):
        """
        Keeps refreshing the CLIbar every .5 seconds as llong as self.notify
        is true, always run as a seperate thread
        """
        while self.notify:
            print("{}".format(bar.get_progress_bar()), flush=True, end='\r')
            time.sleep(0.5)
        logging.info('Notify thread stopping')
        return

    def download_images(self):
        """Create folders and try to download/save the image links
         in self.posts, assumes all links are image links"""
        # Stop if there's not posts to download
        if len(self.posts) < 1:
            logging.info('No images to download, stopping')
            sys.exit("No new images to download.")

        logging.info('Starting download')
        # Make folders
        logging.info('Creating folders')
        os.makedirs("Downloads", exist_ok=True)
        download_folder = os.path.join("Downloads", self.args.subreddit)
        os.makedirs(download_folder, exist_ok=True)

        for post in self.posts:
            self.que.put(post)

        # Create progress bar
        bar = CLIBar(_max=len(self.posts))

        threads = []
        print("Starting {} threads".format(self.args.threads))
        for n in range(min(len(self.posts), self.args.threads)):
            logging.info('Starting thread: {}'.format(n))
            thread = threading.Thread(target=self.grab_image,
                                      args=(download_folder, bar))
            thread.start()
            threads.append(thread)

        print("Downloading images")
        bar.start()

        self.notify = True
        logging.info('Starting notify thread')
        threading.Thread(target=self.update_screen, args=(bar, )).start()

        logging.info('Waiting for download threads to finish')
        for thread in threads:
            try:
                thread.join()
            except KeyboardInterrupt:
                # Don't know how to handle this, ideas?
                pass
        logging.info('Done, telling notify thread to stop')
        self.notify = False

    def print_stats(self):
        """Print download stats to console"""
        print()
        new_images = self.succeeded - self.deleted
        print('Albums: {}\nImages downloaded: {}/{} \nSkipped: {}\n'
              'Failed: {}\nDeleted: {}\n'
              'New images: {}'.format(self.albums, self.succeeded,
                                      self.n_posts, self.skipped, self.failed,
                                      self.deleted, new_images))

    def save_posts(self):
        """Save posts currently in self.posts to database"""
        for post in self.posts:
            self.db.insert_link(post)
        self.db.save_changes()

    def clean_up(self):
        """Examines all downloaded images, deleting duds"""
        print('\nCleaning up')
        min_ratio = (int(self.config['MinWidth']) /
                     int(self.config['MinHeight'])) * self.args.ratiolock
        max_ratio = (int(self.config['MinWidth']) /
                     int(self.config['MinHeight'])) * (2 - self.args.ratiolock)
        if float(self.args.ratiolock) <= 0:
            max_ratio = 100000
        logging.info(
            "Ratio settings: RatioLock: {} | MinRatio: {} | MaxRatio: {}".
            format(self.args.ratiolock, min_ratio, max_ratio))
        for image_path in self.downloaded_images:
            try:
                logging.info("Checking: {}".format(image_path))
                image = Image.open(image_path)
                image_ratio = image.size[0] / image.size[1]
                logging.info("Image size: {}x{}".format(
                    image.size[0], image.size[1]))
                logging.info("Image ratio: {}".format(image_ratio))
            except OSError:
                continue
            # Check for size.
            if image.size[0] < int(self.config['MinWidth'])\
                    or image.size[1] < int(self.config['MinHeight']):
                image.close()
                try:
                    os.remove(image_path)
                    logging.info(
                        'Removing image due to size: {}'.format(image_path))
                    self.deleted += 1
                except PermissionError as e:
                    logging.warning('Error deleting image: {}: {}: {}'.format(
                        image_path, type(e), str(e)))
                    print('\nCan\'t delete ' + image_path +
                          ' image is currently in use')
                continue
            else:
                logging.info("Image size ok, checking ratio")
            # Check for ratio
            if min_ratio > image_ratio or max_ratio > max_ratio:
                logging.info(
                    'Removing image due to ratio: {}'.format(image_path))
                image.close()
                try:
                    os.remove(image_path)
                    self.deleted += 1
                except PermissionError as e:
                    logging.warning('Error deleting image: {}: {}: {}'.format(
                        image_path, type(e), str(e)))
                    print('\nCan\'t delete ' + image_path +
                          ' image is currently in use')
            else:
                logging.info('Ratio ok.')
                image.close()

    def re_download(self):
        """Attempts to re-download all links in the database"""
        self.posts = self.db.get_posts()
        self.n_posts = len(self.posts)
        self.download_images()
        if len(self.downloaded_images) > 0 and not self.args.noclean:
            self.clean_up()
        self.print_stats()

    def run(self):
        """Run the scraper"""
        try:
            if not self.args.search:
                print('Getting {} posts from the {} section of {}'.format(
                    self.args.limit, self.args.section, self.args.subreddit))
            else:
                print('Searching {} for {}'.format(self.args.subreddit,
                                                   self.args.search))
            # Get submissions from sub
            submissions = self.get_submissions(
                self.r.subreddit(self.args.subreddit))
            # Handle the submissions and add them to self.posts
            # Sorts out previously downloaded images and extracts imgur albums
            self.handle_submissions(submissions)
        except RequestException:
            sys.exit('\nError connecting to reddit, please check your '
                     'internet connection')
        except ResponseException:
            sys.exit('\nError connecting to reddit.\n'
                     'Probably because you wrote an invalid subreddit name.\n'
                     'If that\'s not the case it\'s probably an'
                     'invalid client_secret.json file\nPlease see README.md '
                     'about how to set up the file properly.')
        except Exception as e:
            sys.exit('\nAn unknown error occurred:\n{}: {}'.format(
                type(e), str(e)))
        # Download all images in self.posts.
        self.download_images()
        self.save_posts()
        if len(self.downloaded_images) > 0 and not self.args.noclean:
            self.clean_up()
        self.print_stats()
Beispiel #25
0
 def __init__(self):
     self.db = DbHandler()
Beispiel #26
0
class TitsaBot:
    CUSTOM_OR_DEFAULT, INSERT_CUSTOM, TRANVIA, BROADCAST_TEXT = range(4)

    def __init__(self):
        config = configparser.ConfigParser()
        config.read('bot_config.ini')

        bot_token = config.get("TELEGRAM", "token")

        titsa_idApp = config.get("TITSA", "idApp")

        self.adminId = config.get("ADMIN", "chatId")

        self.apiHandler = ApiHandler(titsa_idApp)

        self.dbHandler = DbHandler()

        self.transportThread = OpenTransitThread(
            "http://www.titsa.com/Google_transit.zip", 7 * 24 * 60 * 60)
        self.transportThread.start()

        updater = Updater(token=bot_token, use_context=True)
        logging.basicConfig(
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            level=logging.INFO)

        b1 = telegram.KeyboardButton("⭐ Consultar favorito ⭐")
        b2 = telegram.KeyboardButton("✖️ Borrar favorito ✖️")
        b3 = telegram.KeyboardButton("🚊 Tranvia de Tenerife 🚊")
        b4 = telegram.KeyboardButton("📍 Ver paradas cercanas 📍",
                                     request_location=True)
        self.keyboard = telegram.ReplyKeyboardMarkup([[b3, b4], [b1, b2]],
                                                     resize_keyboard=True)

        h1 = MessageHandler(Filters.regex(r"^.+Consultar favorito.+$"),
                            self.favKeyBoard)
        h2 = MessageHandler(Filters.regex(u"^\U0001F68F.+(\d{4})"),
                            self.replyToFav)
        h3 = MessageHandler(Filters.regex(r"^.+Borrar favorito.+$"),
                            self.favKeyBoard)
        h4 = MessageHandler(Filters.regex(u"^\u2716.+(\d{4})"), self.eraseFav)

        updater.dispatcher.add_handler(CommandHandler("start", self.start))
        updater.dispatcher.add_handler(
            MessageHandler(Filters.regex(r"^\d{4}$"), self.responder_a_codigo))
        updater.dispatcher.add_handler(h1)
        updater.dispatcher.add_handler(h2)
        updater.dispatcher.add_handler(h3)
        updater.dispatcher.add_handler(h4)

        updater.dispatcher.add_handler(
            ConversationHandler(
                entry_points=[
                    MessageHandler(Filters.regex(r"^.+Tranvia de Tenerife.+$"),
                                   self.listStops),
                ],
                states={
                    TitsaBot.TRANVIA:
                    [MessageHandler(Filters.all, self.queryTram)]
                },
                fallbacks=[]))

        updater.dispatcher.add_handler(
            MessageHandler(Filters.location, self.nearStops))

        updater.dispatcher.add_handler(
            ConversationHandler(
                entry_points=[
                    CommandHandler("broadcast", self.newBroadcast),
                ],
                states={
                    TitsaBot.BROADCAST_TEXT:
                    [MessageHandler(Filters.all, self.broadcast)]
                },
                fallbacks=[]))

        updater.dispatcher.add_handler(
            ConversationHandler(entry_points=[
                CommandHandler("addFav",
                               self.addFavCommand,
                               pass_args=True,
                               pass_user_data=True),
                CallbackQueryHandler(self.addFavQuery,
                                     pattern=r"^\d{4}$",
                                     pass_user_data=True)
            ],
                                states={
                                    TitsaBot.CUSTOM_OR_DEFAULT: [
                                        CallbackQueryHandler(
                                            self.setFavNameOption,
                                            pass_user_data=True)
                                    ],
                                    TitsaBot.INSERT_CUSTOM: [
                                        MessageHandler(Filters.text,
                                                       self.customName,
                                                       pass_user_data=True)
                                    ]
                                },
                                fallbacks=[h1, h2, h3, h4]))

        updater.dispatcher.add_handler(
            ConversationHandler(entry_points=[
                CallbackQueryHandler(self.reloadStationQuery,
                                     pattern=r"^Repetir \d{4}$",
                                     pass_user_data=True)
            ],
                                states={
                                    TitsaBot.CUSTOM_OR_DEFAULT: [
                                        CallbackQueryHandler(
                                            self.reloadStationQuery,
                                            pass_user_data=True)
                                    ]
                                },
                                fallbacks=[h1, h2, h3, h4]))

        updater.dispatcher.add_handler(
            CallbackQueryHandler(self.sendStopAndLocation,
                                 pattern=r"^Locate \d{4}$",
                                 pass_user_data=True))

        updater.start_polling()
        updater.idle()
        self.dbHandler.save()
        self.transportThread.stop()

    def build_minutes_text(self, status, bus=True):
        if status is not None:
            text = "🚏 *" + status.name + "* 🚏\n\n"
            sorted_lines = sorted(status.minutes.items(),
                                  key=lambda line: int(line[1][0]["minutes"]))
            print(sorted_lines)
            for linea, data in sorted_lines:
                for entry in data:
                    emoji = "🚊*" if not bus else "🚍*"
                    text += emoji + linea + "* (" + entry["dest"] + \
                            "): "+ entry["minutes"] + " minutos \n"

            return text, 1
        else:
            text = "⚠ Parada no encontrada o sin pasos registrados ⚠"
            return text, 0

    def start(self, update: Update, context: CallbackContext):
        context.bot.send_message(
            chat_id=update.message.chat_id,
            text=
            "Hola! Gracias por usarme! \nEnvíame el código de una parada :)",
            reply_markup=self.keyboard)

    def addFavCommand(self, update: Update, context: CallbackContext):
        return self.newFavMethod(update.message.from_user.id, context.args[0],
                                 context.bot, update.message.chat.id,
                                 context.user_data)

    def addFavQuery(self, update: Update, context: CallbackContext):
        return self.newFavMethod(update.callback_query.from_user.id,
                                 update.callback_query.data, context.bot,
                                 update.callback_query.message.chat.id,
                                 context.user_data)

    def reloadStationQuery(self, update: Update, context: CallbackContext):
        codeText = update.callback_query.data.replace("Repetir ", "")
        self.stationQuery(context.bot, update.callback_query.message.chat_id,
                          update.callback_query.from_user.id, codeText)

    def newFavMethod(self, user, station, bot, chat, user_data):
        if self.dbHandler.check_duplicate(user, station):
            bot.send_message(chat_id=chat,
                             text="Ya está en favoritos",
                             reply_markup=None)
            return -1
        else:
            logging.info(msg="New fav required user:%s id:%s" %
                         (user, station))
            stationName = StopsHandler.stationName(station)
            text = "Nombre: " + stationName + "\n¿Quiere usar otro?"
            b1 = telegram.InlineKeyboardButton(text="Sí", callback_data="si")
            b2 = telegram.InlineKeyboardButton(text="No", callback_data="no")
            b3 = telegram.InlineKeyboardButton(text="❌Cancelar",
                                               callback_data="cancel")
            bot.send_message(chat_id=chat,
                             text=text,
                             reply_markup=telegram.InlineKeyboardMarkup(
                                 [[b1, b2, b3]]))
            user_data["currentFavStationId"] = station
            user_data["currentFavStationName"] = stationName
            return TitsaBot.CUSTOM_OR_DEFAULT

    def responder_a_codigo(self, update: Update, context: CallbackContext):
        self.stationQuery(context.bot, update.message.chat_id,
                          update.message.from_user.id, update.message.text)

    def stationQuery(self, bot, chat_id, user_id, text):
        if not self.dbHandler.check_duplicate_user(chat_id):
            self.dbHandler.addUser(chat_id)
        logging.info(msg="Message %s" % (text))
        if text.isdigit() and len(text) == 4:
            texto = self.build_minutes_text(self.apiHandler.new_request(text),
                                            True)[0]
            button = telegram.InlineKeyboardButton(
                text="⭐ Añadir a favoritos ⭐", callback_data=text)
            buttonReload = telegram.InlineKeyboardButton(
                text="🔃 Repetir consulta 🔃", callback_data="Repetir " + text)
            if not self.dbHandler.check_duplicate(user_id, text):
                keyboard = telegram.InlineKeyboardMarkup([[button],
                                                          [buttonReload]])
            else:
                keyboard = telegram.InlineKeyboardMarkup([[buttonReload]])
            bot.send_message(chat_id=chat_id,
                             text=texto,
                             parse_mode=telegram.ParseMode.MARKDOWN,
                             reply_markup=keyboard)
        else:
            bot.send_message(chat_id=chat_id, text="Código inválido")

        bot.send_message(chat_id=chat_id,
                         text="Escribe el número de parada",
                         reply_markup=self.keyboard)

    def setFavNameOption(self, update: Update, context: CallbackContext):
        logging.info(
            msg="Answer for the fav question: user:%s reply:%s" %
            (update.callback_query.from_user.id, update.callback_query.data))
        if update.callback_query.data == "no":
            self.dbHandler.addUserFav(
                update.callback_query.from_user.id,
                context.user_data["currentFavStationId"],
                context.user_data["currentFavStationName"])
            text = "*Favorito añadido*\n" + context.user_data[
                "currentFavStationName"] + "(" + context.user_data[
                    "currentFavStationId"] + ")"
            context.bot.edit_message_text(
                text,
                update.callback_query.message.chat.id,
                update.callback_query.message.message_id,
                reply_markup=None,
                parse_mode=telegram.ParseMode.MARKDOWN)
            context.bot.send_message(
                chat_id=update.callback_query.message.chat.id,
                text="Escribe el número de parada",
                reply_markup=self.keyboard)
            return -1
        elif update.callback_query.data == "si":
            context.bot.edit_message_text(
                "¿Qué nombre?",
                update.callback_query.message.chat.id,
                update.callback_query.message.message_id,
                reply_markup=None)
            return TitsaBot.INSERT_CUSTOM
        else:
            context.bot.delete_message(
                update.callback_query.message.chat.id,
                update.callback_query.message.message_id, None)
            context.user_data.pop("currentFavStationId", None)
            context.user_data.pop("currentFavStationName", None)
            context.bot.send_message(
                chat_id=update.callback_query.message.chat.id,
                text="Escribe el número de parada",
                reply_markup=self.keyboard)
            return -1

    def customName(self, update: Update, context: CallbackContext):
        logging.info(msg="Custom name: user:%s reply:%s" %
                     (update.message.from_user.id, update.message.text))
        self.dbHandler.addUserFav(update.message.from_user.id,
                                  context.user_data["currentFavStationId"],
                                  update.message.text)
        text = "*Favorito añadido*\n" + update.message.text + "(" + context.user_data[
            "currentFavStationId"] + ")"
        context.bot.send_message(update.message.chat.id,
                                 text=text,
                                 reply_markup=self.keyboard,
                                 parse_mode=telegram.ParseMode.MARKDOWN)
        return -1

    def listStops(self, update: Update, context: CallbackContext):
        if not self.dbHandler.check_duplicate_user(update.message.chat_id):
            self.dbHandler.addUser(update.message.chat_id)
        logging.info(msg="Listing tram stations")
        stations = self.apiHandler.tranvia_stations()
        if stations is not None and len(stations) > 0:
            buttons = []
            for station in stations.items():
                buttons.append([
                    telegram.KeyboardButton(u"🚋" + station[0] + " (" +
                                            station[1] + ")")
                ])
            context.bot.send_message(
                update.message.chat.id,
                text="Elige estación",
                reply_markup=telegram.ReplyKeyboardMarkup(buttons),
                resize_keyboard=True)
            return TitsaBot.TRANVIA
        return -1

    def queryTram(self, update: Update, context: CallbackContext):
        p = re.compile(u"^\U0001F68B.+(\w{3})")
        stop = p.search(update.message.text).group(1)
        status = self.apiHandler.tranvia_request(stop)
        texto = self.build_minutes_text(status, False)[0]
        context.bot.send_message(chat_id=update.message.chat_id,
                                 text=texto,
                                 parse_mode=telegram.ParseMode.MARKDOWN,
                                 reply_markup=self.keyboard)
        return -1

    def favKeyBoard(self, update: Update, context: CallbackContext):
        logging.info(msg="Fav request from user %s" %
                     (update.message.from_user.id))

        stations = self.dbHandler.getUserFavs(update.message.from_user.id)
        if len(stations) > 0:
            buttons = []
            emoji = u"🚏" if not "Borrar" in update.message.text else u"✖️"
            for station in stations:
                buttons.append([
                    telegram.KeyboardButton(emoji + station[1] + " (" +
                                            station[0] + ")")
                ])
            context.bot.send_message(
                update.message.chat.id,
                text="Elige estación",
                reply_markup=telegram.ReplyKeyboardMarkup(buttons),
                resize_keyboard=True)
        else:
            context.bot.send_message(update.message.chat.id,
                                     text="No tienes favoritos",
                                     reply_markup=self.keyboard,
                                     resize_keyboard=True)

    def replyToFav(self, update: Update, context: CallbackContext):
        p = re.compile(u"^\U0001F68F.+(\d{4})")
        code = p.search(update.message.text).group(1)
        update.message.text = code
        self.responder_a_codigo(update, context)

    def eraseFav(self, update: Update, context: CallbackContext):
        p = re.compile(u"^\u2716.+(\d{4})")
        code = p.search(update.message.text).group(1)
        self.dbHandler.deleteUserFav(update.message.from_user.id, code)
        context.bot.send_message(update.message.chat.id,
                                 text="Favorito eliminado",
                                 reply_markup=self.keyboard,
                                 resize_keyboard=True)

    def broadcast(self, update: Update, context: CallbackContext):
        logging.info(msg="Broadcasting message %s" % update.message.text)
        if (update.message.chat.id == int(self.adminId)):
            for user in self.dbHandler.getAllUsers():
                logging.info(msg="Broadcasted to %s" % (user))
                context.bot.send_message(str(user),
                                         text=update.message.text,
                                         reply_markup=self.keyboard,
                                         resize_keyboard=True)

        return -1

    def newBroadcast(self, update: Update, context: CallbackContext):
        return TitsaBot.BROADCAST_TEXT

    def nearStops(self, update: Update, context: CallbackContext):
        logging.info(msg="Nearest from user %s" %
                     (update.message.from_user.id))

        stations = StopsHandler.nearestStops(
            4, float(update.message.location.latitude),
            float(update.message.location.longitude))
        if stations is not None:
            buttons = []

            for station in stations:
                buttons.append([
                    telegram.InlineKeyboardButton(
                        text=f"{station.name} ({station.id})",
                        callback_data=f"Locate {station.id}")
                ])
            context.bot.send_message(
                update.message.chat.id,
                text="Estas son tus estaciones cercanas",
                reply_markup=telegram.InlineKeyboardMarkup(buttons))
        else:
            context.bot.send_message(update.message.chat.id,
                                     text="⚠ No hay información disponible ⚠",
                                     reply_markup=self.keyboard,
                                     resize_keyboard=True)

    def sendStopAndLocation(self, update: Update, context: CallbackContext):
        context.bot.answer_callback_query(update.callback_query.id)
        logging.info(msg="Requested from nearest( user %s)" %
                     (update.callback_query.message.chat_id))
        id = update.callback_query.data.replace("Locate ", "")
        location = StopsHandler.stopLocation(id)
        context.bot.send_location(update.callback_query.message.chat_id,
                                  latitude=location[0],
                                  longitude=location[1])

        self.stationQuery(context.bot, update.callback_query.message.chat_id,
                          update.callback_query.from_user.id, id)
Beispiel #27
0
from flask import Flask, request, Response
import json

from db_handler import DbHandler
from auth_handler import AuthHandler
from qc.qc_handler import QcHandler

from util import log

app = Flask(__name__)

# TODO move to config file (along with configs in DbHandler and AuthHandler mayhaps)
app.config['MAIN_RECORDINGS_PATH'] = '/data/eyra/recordings'

dbHandler = DbHandler(app)
authHandler = AuthHandler(
    app)  # sets up /auth/login @app.route and @login_required()
qcHandler = QcHandler(app, dbHandler)

# SUBMISSION ROUTES


@app.route('/submit/general/<method>', methods=['POST'])
@authHandler.login_required()
def submit_general(method):
    """
    supports everything in the client-server API
    right now, /submit/general/{device,instuctor}
    
    requires sender to be authenticated with JWT, see auth_handler.py 
Beispiel #28
0
from discord import message
from discord.ext import commands
from discord.ext.commands.errors import MissingRequiredArgument
from dotenv import load_dotenv

from db_handler import DbHandler

load_dotenv()
TOKEN = os.getenv('DISCORD_TOKEN')
HOST = os.getenv('HOST')
USER = os.getenv('USER')
PASSWORD = os.getenv('PASSWORD')
DATABASE = os.getenv('DATABASE')

bot = commands.Bot(command_prefix='!')
db = DbHandler(HOST, USER, PASSWORD, DATABASE)


@bot.event
async def on_ready():
    print(f"{bot.user.name} has connected to Discord!")


@bot.command(
    name="add",
    help=
    "Προσθέτω εργασία. Το date να είναι της μόρφης 2021-03-25, ενώ η ώρα 21:00:00."
)
async def add_command(ctx, assignment_name, class_name, turn_in_date,
                      turn_in_time):
    author = ctx.message.author
Beispiel #29
0
from http.server import HTTPServer
from server import MyServer
from os import path
from db_handler import DbHandler

if __name__ == "__main__":
    if not path.exists("task_db.db"):
        DbHandler.initialize()
    server_address = ("localhost", 8000)
    server = HTTPServer(server_address, MyServer)
    print("Starting http server on Localhost:8000")
    server.serve_forever()
class Scraper:
    """
    The scraper, pretty much does all the hard work. 
    When the Scraper is initialized it wil parse commandline arguments.
    utilizes DbHandler.py,´and configurator.py
    """

    def __init__(self):
        self.db = DbHandler()
        self.config = configurator.get_config()
        self.args = self.parse_arguments()
        self.initialize_logger()

        _id = self.get_id()
        self.r = praw.Reddit(user_agent="PrawWallpaperDownloader 1.0.0 by /u/Pusillus", client_id=_id["id"], client_secret=_id["secret"])

        self.n_posts = 0
        self.albums = 0
        self.succeeded = 0
        self.failed = 0
        self.skipped = 0
        self.deleted = 0

        self.notify = False

        self.posts = []
        self.que = queue.Queue()
        self.downloaded_images = []

    @staticmethod
    def get_id():
        if os.path.isfile('client_secret.json'):
            with open('client_secret.json', 'r') as id_file:
                return json.loads("".join(id_file.readlines()))
        else:
            logging.error('Client_secret.json not found, exiting')
            sys.exit('Unable to locate client_secret.json.\n'
                     'Please have a look at README.md '
                     'and follow the instructions')

    def parse_arguments(self):
        """Parse arguments from commandline"""
        parser = argparse.ArgumentParser()
        parser.add_argument("-s", "--subreddit",
                            help="specify subreddit to scrape",
                            default=self.config['Sub'])
        parser.add_argument("-se", "--section",
                            help="specify section of subreddit to scrape (hot, top, rising or new)",
                            default=self.config['section'])
        parser.add_argument("-l", "--limit",
                            help="set amount of posts to sift through "
                                 "(default " + self.config['Limit'] + ")",
                            default=int(self.config['Limit']), type=int)
        parser.add_argument("--log",
                            help="save a log of wallpapers downloaded/skipped/failed",
                            action="store_true", default=False)
        parser.add_argument("-re", "--redownload",
                            help="attempt to download all the links in the database",
                            action="store_true", default=False)
        parser.add_argument("-v", "--verbose", help="increase output detail",
                            action="store_true",
                            default=False)
        parser.add_argument('-nc', "--noclean", help="Skip cleaning off small images (Cleaning: " + self.config['Clean'] + ")",
                            action="store_true", default= not self.config.getboolean('Clean'))
        parser.add_argument('-ns', '--nosort', help="Skip sorting out previously downloaded images (Sorting: {})".format(self.config['sort']),
                            action="store_true", default= not self.config.getboolean('Sort'))
        parser.add_argument('-na', '--noalbum', help='Skip imgur albums',
                            action='store_true', default= not self.config.getboolean('Albums'))
        parser.add_argument('-t', '--threads', help='Amount of threads for downloading images',
                            default=int(self.config['Threads']), type=int)
        parser.add_argument('-con', '--configure', help="Change settings",
                            action='store_true', default=False)
        parser.add_argument('-rlock', '--ratiolock',
                            help="Sort out images with incorrect aspect ratio, 0 for no lock, "
                                 "1 for full lock (Ratio lock: {})".format(self.config['ratiolock']),
                            default=float(self.config['ratiolock']), type=float)
        parser.add_argument('-q', '--search', help="Scrape by search term", default=False,
                            type=str)
        args = parser.parse_args()
        if args.ratiolock < 0 or args.ratiolock > 1:
            sys.exit("Incorrect ratio lock, please keep it between 0.0 and 1.0 (Currently {})".format(args.ratiolock))
        return args

    def initialize_logger(self):
        handlers = []
        if self.args.log:
            # Windows default encoding for text files isn't UTF-8 (it's ANSI afaik)
            # So we need to create a custom FileHandler which opens the text file in UTF-8
            file_handler = logging.FileHandler(filename='papers.log', mode='w', encoding="utf8")
            handlers.append(file_handler)
        if self.args.verbose:
            # Create stream handler pointing to stdout (terminal) and add it to handlers.
            stream_handler = logging.StreamHandler(stream=sys.stdout)
            handlers.append(stream_handler)
        elif not self.args.log:
            handlers.append(logging.StreamHandler(stream=open(os.devnull, 'w', encoding="utf-8")))

        logging.basicConfig(level=logging.INFO,
                            format='%(asctime)s %(message)s',
                            datefmt='%d/%m/%y %H:%M:%S:',
                            handlers=handlers)

        logging.info('Logger started')
        settings = "Arguments:\n"
        for key, val in zip(vars(self.args).keys(), vars(self.args).values()):
            settings += "{}: {}\n".format(key, val)
        logging.info(settings)

    def get_submissions(self, subreddit):
        """
        Get submissions from reddit
        Takes a subreddit object from PRAW as argument
        Returns list of PRAW submission objects
        """
        section = self.args.section.lower().strip()
        limit = self.args.limit
        if self.args.search:
            return subreddit.search(self.args.search)
        elif section == "top":
            return subreddit.top(limit=limit)
        elif section == "new":
            return subreddit.new(limit=limit)
        elif section == "rising":
            return subreddit.rising(limit=limit)
        else:
            if section != "hot":
                logging.warning("Unknown section, defaulting to hot")
                print("Unknown section, defaulting to hot")
            return subreddit.hot(limit=limit)

    def extract_submission_data(self, submission):
        """
        Exctract direct image links, and relevant data from a PRAW submission
        object
        Takes a PRAW submission object as arguments and appends a dictionary
        in the following format to self.posts:

        {"url": image-link,
         "title": submission title,
         "author": author of the submission (reddit user name),
         "parent_id": None (only used for images in albums)}

        If the submission link to an album this function will instead return
        an album dictionary for further processing later in the process
        album dictionary format:
        {"url": link to imgur album,
         "title": submission title,
         "author": submission author (reddit username}
        """
        url = submission.url
        # Check for author
        if not submission.author:
            author = '[User Deleted]'
        else:
            author = str(submission.author)

        # Direct jpg and png links
        if url.endswith(".jpg") or url.endswith(".png"):
            context = {"url": url,
                       "title": submission.title,
                       "author": author,
                       "parent_id": None}
            self.posts.append(context)

        # Imgur support
        elif ("imgur.com" in url) and ("/a/" not in url):
            if url.endswith("/new"):
                url = url.rsplit("/", 1)[0]
            id = url.rsplit("/", 1)[1].rsplit(".", 1)[0]
            link = "http://i.imgur.com/" + id + ".jpg"
            context = {"url": link,
                       "title": submission.title,
                       "author": author,
                       "parent_id": None}
            self.posts.append(context)

        # Album support
        elif ("imgur.com" in url) and ("/a/" in url):
            album_context = {"url": url,
                             "title": submission.title,
                             "author": author}
            return album_context

    def handle_submissions(self, submissions):
        """Get and sort posts from reddit"""
        albums = []  # Array to hold all the album elements for later.
        for submission in submissions:
            album = self.extract_submission_data(submission)
            if album:
                albums.append(album)

        # Extract all image links from the imgur albums
        if not self.args.noalbum:
            self.handle_albums(albums)

        # Save amount of valid images
        self.n_posts = len(self.posts)

        # Sort out previously downloaded images
        if not self.args.nosort:
            if int(self.config["MaxAge"]) == 0:
                self.posts = self.db.sort_links(self.posts)
            else:
                self.posts = self.db.sort_links(self.posts, age_limit=self.config["MaxAge"])
            self.skipped = self.n_posts - len(self.posts)

    def handle_albums(self, albums):
        """Extract all links from a list of imgur albums"""
        logging.info('Extracting albums...')
        albums = self.db.sort_albums(albums)
        n_albums = len(albums)

        for _id, album in enumerate(albums):
            print("\rHandling album: {}/{}".format(_id+1, n_albums), end='')
            logging.info('Handling album {}/{}'.format(_id+1, n_albums))
            # Download imgur album
            res = requests.get(album["url"])
            try:
                res.raise_for_status()
            except Exception as exc:
                self.handle_error(exc, album)
                continue

            # Parse through the html fetching all link elements
            soup = bs4.BeautifulSoup(res.text, 'html.parser')
            link_elements = soup.select('a.zoom')

            # Insert link to get id
            album['length'] = len(link_elements)
            album_id = self.db.insert_album(album)

            if len(link_elements) > 0:
                for a_id, ele in enumerate(link_elements):
                    # Put the data in context for later
                    context = {"url": "http:" + ele.get('href'),
                               "title": album["title"],
                               "parent_id": album_id,
                               "id": a_id,
                               "author": album["author"]}
                    self.posts.append(context)
            self.albums += 1
        print()  # Add missing newline from printing album nr

    def handle_error(self, err, post):
        """Handles error stats and prints a message if verbose is enabled"""
        self.failed += 1
        logging.error('Error occurred at:{} {}: {}'.format(post["title"],
                                                           type(err),
                                                           str(err)))

    def grab_image(self, download_folder, bar):
        """
        Worker function for downloading images, keeps pulling a new link
        from image que, downloads it, and saves it, untill the que is empty.
        Takes 2 arguments:
        download_folder: Path to desired save folder
        bar: PyCLIBar to be stepped after image has been downloaded
        """
        while True:
            try:
                submission = self.que.get(block=False)
            except queue.Empty:
                logging.info('Download thread done... Stopping')
                return

            # Try to download image
            logging.info('Downloading image {}'.format(submission["title"]))
            try:
                response = requests.get(submission["url"], timeout=10)
                response.raise_for_status()
            except Exception as exc:
                self.handle_error(exc, submission)

            # Try to determine file format from headers fall back to trying to
            # determine it from url if content-type is missing.
            # Most modern operating systems will open the file regardless
            # of format suffix
            try:
                type_header = response.headers['Content-Type'].split('/')
                if type_header[0] == 'image':
                    content_type = type_header[1]
                else:
                    # Sometimes the content-type is incorrect
                    # try to guess it from URL
                    if submission["url"].endswith('.png'):
                        content_type = "png"
                    else:
                        content_type = "jpg"
            except KeyError and IndexError:
                # Missing content-type header, guess from link
                if submission["url"].endswith('.png'):
                    content_type = "png"
                else:
                    content_type = "jpg"

            # content-headers describe .jpg images with jpeg
            if content_type == 'jpeg':
                image_format = '.jpg'
            else:
                image_format = '.' + content_type

            # If there's an id key in the submission it's from an album and
            # should be suffixed with it's position within that album
            if 'id' in submission:
                file_path = os.path.join(download_folder,
                                         re.sub(r'[\\/:*?"<>|]',
                                                '',
                                                submission["title"][:25
                                                ]) + '_' + str(submission['id']+1) + image_format)
            else:
                file_path = os.path.join(download_folder,
                                         re.sub(r'[\\/:*?"<>|]',
                                                '',
                                                submission["title"][:25]) + image_format)
            # Try to save the image to disk
            try:
                with open(file_path, 'wb') as image:
                    for chunk in response.iter_content(4096):
                        image.write(chunk)
                self.succeeded += 1
                self.downloaded_images.append(file_path)
                bar.step()
            except Exception as exc:
                self.handle_error(exc, submission)

    def update_screen(self, bar):
        """
        Keeps refreshing the CLIbar every .5 seconds as llong as self.notify
        is true, always run as a seperate thread
        """
        while self.notify:
            print("{}".format(bar.get_progress_bar()),
                  flush=True, end='\r')
            time.sleep(0.5)
        logging.info('Notify thread stopping')
        return

    def download_images(self):
        """Create folders and try to download/save the image links
         in self.posts, assumes all links are image links"""
        # Stop if there's not posts to download
        if len(self.posts) < 1:
            logging.info('No images to download, stopping')
            sys.exit("No new images to download.")

        logging.info('Starting download')
        # Make folders
        logging.info('Creating folders')
        os.makedirs("Downloads", exist_ok=True)
        download_folder = os.path.join("Downloads", self.args.subreddit)
        os.makedirs(download_folder, exist_ok=True)

        for post in self.posts:
            self.que.put(post)

        # Create progress bar
        bar = CLIBar(_max=len(self.posts))

        threads = []
        print("Starting {} threads".format(self.args.threads))
        for n in range(min(len(self.posts), self.args.threads)):
            logging.info('Starting thread: {}'.format(n))
            thread = threading.Thread(target=self.grab_image,
                                      args=(download_folder, bar))
            thread.start()
            threads.append(thread)

        print("Downloading images")
        bar.start()

        self.notify = True
        logging.info('Starting notify thread')
        threading.Thread(target=self.update_screen,
                         args=(bar, )).start()

        logging.info('Waiting for download threads to finish')
        for thread in threads:
            try:
                thread.join()
            except KeyboardInterrupt:
                # Don't know how to handle this, ideas?
                pass
        logging.info('Done, telling notify thread to stop')
        self.notify = False

    def print_stats(self):
        """Print download stats to console"""
        print()
        new_images = self.succeeded-self.deleted
        print('Albums: {}\nImages downloaded: {}/{} \nSkipped: {}\n'
              'Failed: {}\nDeleted: {}\n'
              'New images: {}'.format(self.albums,
                                      self.succeeded,
                                      self.n_posts,
                                      self.skipped,
                                      self.failed,
                                      self.deleted,
                                      new_images))

    def save_posts(self):
        """Save posts currently in self.posts to database"""
        for post in self.posts:
            self.db.insert_link(post)
        self.db.save_changes()

    def clean_up(self):
        """Examines all downloaded images, deleting duds"""
        print('\nCleaning up')
        min_ratio = (int(self.config['MinWidth'])/int(self.config['MinHeight']))*self.args.ratiolock
        max_ratio = (int(self.config['MinWidth'])/int(self.config['MinHeight']))*(2-self.args.ratiolock)
        if float(self.args.ratiolock) <= 0:
            max_ratio = 100000
        logging.info("Ratio settings: RatioLock: {} | MinRatio: {} | MaxRatio: {}".format(self.args.ratiolock,
                                                                                          min_ratio, max_ratio))
        for image_path in self.downloaded_images:
            try:
                logging.info("Checking: {}".format(image_path))
                image = Image.open(image_path)
                image_ratio = image.size[0] / image.size[1]
                logging.info("Image size: {}x{}".format(image.size[0], image.size[1]))
                logging.info("Image ratio: {}".format(image_ratio))
            except OSError:
                continue
            # Check for size.
            if image.size[0] < int(self.config['MinWidth'])\
                    or image.size[1] < int(self.config['MinHeight']):
                image.close()
                try:
                    os.remove(image_path)
                    logging.info('Removing image due to size: {}'.format(image_path))
                    self.deleted += 1
                except PermissionError as e:
                    logging.warning('Error deleting image: {}: {}: {}'.format(
                        image_path, type(e), str(e)))
                    print('\nCan\'t delete ' + image_path + ' image is currently in use')
                continue
            else:
                logging.info("Image size ok, checking ratio")
            # Check for ratio
            if min_ratio > image_ratio or max_ratio > max_ratio:
                logging.info('Removing image due to ratio: {}'.format(image_path))
                image.close()
                try:
                    os.remove(image_path)
                    self.deleted += 1
                except PermissionError as e:
                    logging.warning('Error deleting image: {}: {}: {}'.format(
                        image_path, type(e), str(e)))
                    print('\nCan\'t delete ' + image_path + ' image is currently in use')
            else:
                logging.info('Ratio ok.')
                image.close()

    def re_download(self):
        """Attempts to re-download all links in the database"""
        self.posts = self.db.get_posts()
        self.n_posts = len(self.posts)
        self.download_images()
        if len(self.downloaded_images) > 0 and not self.args.noclean:
            self.clean_up()
        self.print_stats()

    def run(self):
        """Run the scraper"""
        try:
            if not self.args.search:
                print('Getting {} posts from the {} section of {}'
                      .format(self.args.limit, self.args.section, self.args.subreddit))
            else:
                print('Searching {} for {}'.format(self.args.subreddit, self.args.search))
            # Get submissions from sub
            submissions = self.get_submissions(self.r.subreddit(self.args.subreddit))
            # Handle the submissions and add them to self.posts
            # Sorts out previously downloaded images and extracts imgur albums
            self.handle_submissions(submissions)
        except RequestException:
            sys.exit('\nError connecting to reddit, please check your '
                     'internet connection')
        except ResponseException:
            sys.exit('\nError connecting to reddit.\n'
                     'Probably because you wrote an invalid subreddit name.\n'
                     'If that\'s not the case it\'s probably an'
                     'invalid client_secret.json file\nPlease see README.md '
                     'about how to set up the file properly.')
        except Exception as e:
            sys.exit('\nAn unknown error occurred:\n{}: {}'.format(type(e),
                                                                   str(e)))
        # Download all images in self.posts.
        self.download_images()
        self.save_posts()
        if len(self.downloaded_images) > 0 and not self.args.noclean:
            self.clean_up()
        self.print_stats()
Beispiel #31
0
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('bot_config.ini')

        bot_token = config.get("TELEGRAM", "token")

        titsa_idApp = config.get("TITSA", "idApp")

        self.adminId = config.get("ADMIN", "chatId")

        self.apiHandler = ApiHandler(titsa_idApp)

        self.dbHandler = DbHandler()

        self.transportThread = OpenTransitThread(
            "http://www.titsa.com/Google_transit.zip", 7 * 24 * 60 * 60)
        self.transportThread.start()

        updater = Updater(token=bot_token, use_context=True)
        logging.basicConfig(
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            level=logging.INFO)

        b1 = telegram.KeyboardButton("⭐ Consultar favorito ⭐")
        b2 = telegram.KeyboardButton("✖️ Borrar favorito ✖️")
        b3 = telegram.KeyboardButton("🚊 Tranvia de Tenerife 🚊")
        b4 = telegram.KeyboardButton("📍 Ver paradas cercanas 📍",
                                     request_location=True)
        self.keyboard = telegram.ReplyKeyboardMarkup([[b3, b4], [b1, b2]],
                                                     resize_keyboard=True)

        h1 = MessageHandler(Filters.regex(r"^.+Consultar favorito.+$"),
                            self.favKeyBoard)
        h2 = MessageHandler(Filters.regex(u"^\U0001F68F.+(\d{4})"),
                            self.replyToFav)
        h3 = MessageHandler(Filters.regex(r"^.+Borrar favorito.+$"),
                            self.favKeyBoard)
        h4 = MessageHandler(Filters.regex(u"^\u2716.+(\d{4})"), self.eraseFav)

        updater.dispatcher.add_handler(CommandHandler("start", self.start))
        updater.dispatcher.add_handler(
            MessageHandler(Filters.regex(r"^\d{4}$"), self.responder_a_codigo))
        updater.dispatcher.add_handler(h1)
        updater.dispatcher.add_handler(h2)
        updater.dispatcher.add_handler(h3)
        updater.dispatcher.add_handler(h4)

        updater.dispatcher.add_handler(
            ConversationHandler(
                entry_points=[
                    MessageHandler(Filters.regex(r"^.+Tranvia de Tenerife.+$"),
                                   self.listStops),
                ],
                states={
                    TitsaBot.TRANVIA:
                    [MessageHandler(Filters.all, self.queryTram)]
                },
                fallbacks=[]))

        updater.dispatcher.add_handler(
            MessageHandler(Filters.location, self.nearStops))

        updater.dispatcher.add_handler(
            ConversationHandler(
                entry_points=[
                    CommandHandler("broadcast", self.newBroadcast),
                ],
                states={
                    TitsaBot.BROADCAST_TEXT:
                    [MessageHandler(Filters.all, self.broadcast)]
                },
                fallbacks=[]))

        updater.dispatcher.add_handler(
            ConversationHandler(entry_points=[
                CommandHandler("addFav",
                               self.addFavCommand,
                               pass_args=True,
                               pass_user_data=True),
                CallbackQueryHandler(self.addFavQuery,
                                     pattern=r"^\d{4}$",
                                     pass_user_data=True)
            ],
                                states={
                                    TitsaBot.CUSTOM_OR_DEFAULT: [
                                        CallbackQueryHandler(
                                            self.setFavNameOption,
                                            pass_user_data=True)
                                    ],
                                    TitsaBot.INSERT_CUSTOM: [
                                        MessageHandler(Filters.text,
                                                       self.customName,
                                                       pass_user_data=True)
                                    ]
                                },
                                fallbacks=[h1, h2, h3, h4]))

        updater.dispatcher.add_handler(
            ConversationHandler(entry_points=[
                CallbackQueryHandler(self.reloadStationQuery,
                                     pattern=r"^Repetir \d{4}$",
                                     pass_user_data=True)
            ],
                                states={
                                    TitsaBot.CUSTOM_OR_DEFAULT: [
                                        CallbackQueryHandler(
                                            self.reloadStationQuery,
                                            pass_user_data=True)
                                    ]
                                },
                                fallbacks=[h1, h2, h3, h4]))

        updater.dispatcher.add_handler(
            CallbackQueryHandler(self.sendStopAndLocation,
                                 pattern=r"^Locate \d{4}$",
                                 pass_user_data=True))

        updater.start_polling()
        updater.idle()
        self.dbHandler.save()
        self.transportThread.stop()
Beispiel #32
0
class InputValidator:
    def __init__(self):
        self.db = DbHandler()

    # internal methods to check validity of inputs
    def _is_user_id_valid(self, user_id):
        # check if user id is a valid UUIDv4
        try:
            uuid_obj = uuid.UUID(user_id, version=4)
        except ValueError:
            return False

        # check if UUID object is the same as the input string
        if str(uuid_obj) != user_id:
            return False

        # check if we have a participant for the input user id
        data = self.db.get_participant(user_id)
        if data is None:
            return False

        return True

    def _is_ref_domain_valid(self, ref_domain):
        ref_domains = [elem[0] for elem in self.db.get_all_ref_domains()]
        if ref_domain in ref_domains:
            return True

        return False

    def _is_created_domain_valid(self, created_domain):
        # check if special characters are in created domain
        if any([
                char in created_domain for char in [
                    "_", "<", ">", "&", "'", "\"", "=", "[", "]", "(", ")",
                    "%", "$", "?", "#", "*", "+", "/", " ", ",", ";", ":"
                ]
        ]):
            return False

        return True

    def _is_valid_integer(self, test_string):
        try:
            int(test_string)
            return True
        except ValueError:
            return False

    def _is_rated_domain_valid(self, rated_domain):
        test_domains = [elem[0] for elem in self.db.get_all_test_domains()]
        if rated_domain in test_domains:
            return True

        ref_domains = [elem[0] for elem in self.db.get_all_ref_domains()]
        if rated_domain in ref_domains:
            return True

        step1_domains = [elem[0] for elem in self.db.get_all_step1_domains()]
        if rated_domain in step1_domains:
            return True

        return False

    def check_input_user_id(self, user_id):
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}

        return {"result": True, "message": ""}

    def check_last_unfinished_step(self, user_id, step_id):
        if os.environ["CHECK_LAST_UNFINISHED_STEP"] == "1":
            data = self.db.get_participant(user_id)
            current_step = "step1"
            if data[7] == 1:
                current_step = "step2"
            if data[8] == 1:
                current_step = "step3"
            if data[9] == 1:
                current_step = "step4"
            if data[10] == 1:
                current_step = "step5"
            if data[11] == 1:
                current_step = "questionnaire"
            if data[12] == 1:
                current_step = "final_notes"

            if current_step != step_id:
                return {"result": True, "current_step": current_step}
        else:
            return {"result": False, "current_step": "test"}

        return {"result": False, "current_step": ""}

    def check_log_time(self, user_id, time_type, time):
        valid_time_types = ["experiment_start_time", "experiment_end_time",\
                            "step1_start_time", "step1_end_time",\
                            "step2_start_time", "step2_end_time",\
                            "step3_start_time", "step3_end_time",\
                            "step4_start_time", "step4_end_time",\
                            "step5_start_time", "step5_end_time",\
                            "questionnaire_start_time", "questionnaire_end_time"]
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if time_type not in valid_time_types:
            return {"result": False, "message": "Invalid time type!"}
        if self._is_valid_integer(time) is False:
            return {"result": False, "message": "Invalid current time!"}

        return {"result": True, "message": ""}

    def check_input_set_step_finished(self, user_id, step_id):
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if step_id not in ["step1", "step2", "step3", "step4", "step5"]:
            return {"result": False, "message": "Invalid step ID!"}

        return {"result": True, "message": ""}

    def check_input_step1_result(self, user_id, ref_domain, created_domain,
                                 elapsed_time, domain_position):
        if self.db.get_count("step1", user_id)[0] >= 10:
            return {
                "result": False,
                "message": "Already created maximum number of domains!"
            }
        if self.db.is_step_finished(user_id, "finished_step1") is not None:
            return {"result": False, "message": "Step is already finished!"}
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if self._is_ref_domain_valid(ref_domain) is False:
            return {"result": False, "message": "Invalid reference domain!"}
        if self._is_created_domain_valid(created_domain) is False:
            return {"result": False, "message": "Invalid created domain!"}
        if self._is_valid_integer(elapsed_time) is False:
            return {"result": False, "message": "Invalid elapsed time!"}
        if self._is_valid_integer(domain_position) is False:
            return {"result": False, "message": "Invalid domain position!"}

        return {"result": True, "message": ""}

    def check_input_step2_result(self, user_id, ref_domain, squatting_technique, squatting_technique_infos,\
                                 created_domain, elapsed_time, domain_position, squatting_techniques_order):
        valid_squatting_techniques = [
            "wrong_tld", "homograph", "typosquatting", "combosquatting",
            "subdomain"
        ]
        valid_squatting_technique_infos = ["", "prepend_www", "omit_character", "duplicate_character",\
                                           "swap_characters", "replace_qwerty", "none_before_none_behind",\
                                           "term_before_none_behind", "chars_before_none_behind",\
                                           "none_before_term_behind", "none_before_chars_behind",\
                                           "term_before_term_behind", "chars_before_chars_behind",\
                                           "chars_before_term_behind", "term_before_chars_behind"]

        if self.db.get_count("step2", user_id)[0] >= 10:
            return {
                "result": False,
                "message": "Already created maximum number of domains!"
            }
        if self.db.is_step_finished(user_id, "finished_step2") is not None:
            return {"result": False, "message": "Step is already finished!"}
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if self._is_ref_domain_valid(ref_domain) is False:
            return {"result": False, "message": "Invalid reference domain!"}
        if squatting_technique not in valid_squatting_techniques:
            return {"result": False, "message": "Invalid squatting technique!"}
        if squatting_technique_infos not in valid_squatting_technique_infos:
            return {
                "result": False,
                "message": "Invalid squatting technique infos!"
            }
        if self._is_created_domain_valid(created_domain) is False:
            return {"result": False, "message": "Invalid created domain!"}
        if self._is_valid_integer(elapsed_time) is False:
            return {"result": False, "message": "Invalid elapsed time!"}
        if self._is_valid_integer(domain_position) is False:
            return {"result": False, "message": "Invalid domain position!"}
        if set(squatting_techniques_order.split(",")) != set(
                valid_squatting_techniques):
            return {
                "result": False,
                "message": "Invalid order of squatting techniques!"
            }

        return {"result": True, "message": ""}

    def check_input_step3_result(self, user_id, rated_domain, type,
                                 elapsed_time, rating, domain_position):
        if self.db.get_step3_rated_domains_count(user_id) >= 30:
            return {
                "result": False,
                "message": "Already created maximum number of domains!"
            }
        if self.db.is_step_finished(user_id, "finished_step3") is not None:
            return {"result": False, "message": "Step is already finished!"}
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if self._is_rated_domain_valid(rated_domain) is False:
            return {"result": False, "message": "Invalid rated domain!"}
        if type not in ["step1", "ref_domain", "phishing_domain"]:
            return {"result": False, "message": "Invalid domain type!"}
        if self._is_valid_integer(elapsed_time) is False:
            return {"result": False, "message": "Invalid elapsed time!"}
        if rating not in ["1", "2", "3", "4", "5"]:
            return {"result": False, "message": "Invalid rating!"}
        if int(domain_position) < 1 or int(domain_position) > 30:
            return {"result": False, "message": "Invalid domain position!"}

        return {"result": True, "message": ""}

    def check_input_step4_result(self, user_id, created_domain, elapsed_time,
                                 domain_position):
        if self.db.get_count("step4", user_id)[0] >= 10:
            return {
                "result": False,
                "message": "Already created maximum number of domains!"
            }
        if self.db.is_step_finished(user_id, "finished_step4") is not None:
            return {"result": False, "message": "Step is already finished!"}
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if self._is_created_domain_valid(created_domain) is False:
            return {"result": False, "message": "Invalid created domain!"}
        if self._is_valid_integer(elapsed_time) is False:
            return {"result": False, "message": "Invalid elapsed time!"}
        if self._is_valid_integer(domain_position) is False:
            return {"result": False, "message": "Invalid domain position!"}

        return {"result": True, "message": ""}

    def check_input_step5_result(self, user_id, selected_domains, elapsed_time,
                                 counter):
        if self.db.get_count("step5", user_id)[0] >= 10:
            return {
                "result": False,
                "message": "Already created maximum number of domains!"
            }
        if self.db.is_step_finished(user_id, "finished_step5") is not None:
            return {"result": False, "message": "Step is already finished!"}
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if selected_domains.count(",") != 30 or selected_domains.count(
                ";") != 9:
            return {"result": False, "message": "Invalid selected domains!"}
        if self._is_valid_integer(elapsed_time) is False:
            return {"result": False, "message": "Invalid elapsed time!"}
        if self._is_valid_integer(counter) is False:
            return {"result": False, "message": "Invalid counter!"}

        return {"result": True, "message": ""}

    def check_input_questionnaire_result(self, user_id, age, gender_current, education, origin,\
                                         f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12,\
                                         f13, f14, f15, f16, attention_test1, attention_test2):
        countries = [elem[0] for elem in self.db.get_countries()]
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if age not in [
                "18_25", "26_35", "36_45", "46_55", "over_55", "not_answer"
        ]:
            return {"result": False, "message": "Invalid age!"}
        if gender_current not in [
                "male", "female", "non_binary", "transgender", "other",
                "not_answer"
        ]:
            return {"result": False, "message": "Invalid current gender!"}
        if education not in ["less_high_school", "high_school", "associate", "no_degree",\
                             "bachelor", "master", "over_master", "not_answer"]:
            return {"result": False, "message": "Invalid education!"}
        if origin not in countries:
            return {"result": False, "message": "Invalid origin!"}

        sebis_valid_answers = [
            "never", "rarely", "sometimes", "often", "always"
        ]
        if f1 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f1!"}
        if f2 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f2!"}
        if f3 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f3!"}
        if f4 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f4!"}
        if f5 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f5!"}
        if f6 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f6!"}
        if f7 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f7!"}
        if f8 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f8!"}
        if f9 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f9!"}
        if f10 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f10!"}
        if f11 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f11!"}
        if f12 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f12!"}
        if f13 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f13!"}
        if f14 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f14!"}
        if f15 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f15!"}
        if f16 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid f16!"}

        if attention_test1 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid attention 1!"}
        if attention_test2 not in sebis_valid_answers:
            return {"result": False, "message": "Invalid attention 2!"}

        return {"result": True, "message": ""}

    def check_input_final_notes_feedback(self, user_id, feedback):
        if self._is_user_id_valid(user_id) is False:
            return {"result": False, "message": "Invalid user ID!"}
        if len(feedback) > 500:
            feedback_length = len(feedback) - 500
            return {
                "result":
                False,
                "message":
                "Feedback is " + str(feedback_length) + " characters too long!"
            }
        if any([
                char in feedback for char in [
                    "<", ">", "&", "'", "\"", "=", "(", ")", "%", "$", "?",
                    "#", "*", "+", "/"
                ]
        ]):
            return {"result": False, "message": "Not allowed characters used!"}

        return {"result": True, "message": ""}