Esempio n. 1
0
 def __init__(self, db_config):
     """
     Creates a Database() object and passes it to Scraper/Connector upon creation.
     """
     self.db = Database(db_config)
     self.sc = Scraper(self.db)
     self.co = Connector(self.db)
Esempio n. 2
0
 def get_db(self):
     if not self._db:
         db = Database()
         conn = db.get_connect()
         if not conn:
             return None
         self._db = db
     return self._db
Esempio n. 3
0
 def __init__(self, db):
     """
     Takes either a Database() object or config sufficient for creating one.
     """
     if isinstance(db, Database):
         self.db = db
     else:
         self.db = Database(db)
Esempio n. 4
0
 def __init__(self, override_cfg_file: Optional[str] = None) -> None:
     self.config = Config(override_cfg_file)
     self.database = Database(self.config.db_path)
     self.video_id_filter: List[int] = []
     self.channel_filter: List[str] = []
     self.date_begin_filter = 0.0
     self.date_end_filter = (0.0, False)
     self.include_watched_filter = False
def get_db_obj():
    """
    Used to initialize the DB session
    """
    global db_obj
    if db_obj:
        return db_obj
    db_obj = Database()
    db_obj.connect_to_database(db_name="mytest")
    return db_obj
Esempio n. 6
0
 def __init__(self,parent=None):
     QMainWindow.__init__(self,parent)
     loadUi("ui/MainWindow.ui",self)
     print(self.menuBar().__dir__())
     self.ogrenci_ekle.triggered.connect(self.ogrenciEkleWindowShow)
     self.ogrenciList.triggered.connect(self.ogrenciListesi)
     self.db = Database("db/main.db")
Esempio n. 7
0
 def __init__(self, config):
     Runnable.__init__(self)
     self.db = Database()
     self.config = config
     self.load_configs()
     self.api = API(callback_add=self.callback_add_info_site)
     self.driver = WebDriver.build(config)
Esempio n. 8
0
    def __init__(self, secret):
        # Accedo a la base de datos
        self.db = DB(secret)

        # Arranco en Async Scheduler
        self.sched = AsyncIOScheduler()
        self.sched.start()
Esempio n. 9
0
def modify_user_deck(user, deck_id, user_input):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = Deck(user_input=user_input)

    return user.save_deck(deck_id, mydeck)
Esempio n. 10
0
def save_user_library(user, user_input):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = Deck(user_input=user_input)

    return user.save_library(mydeck)
Esempio n. 11
0
def users_cards_save(user, user_input):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = Deck(user_input=user_input)

    user.save_cards(mydeck)
Esempio n. 12
0
def print_user_library(user):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = user.get_library()

    return mydeck.print_price_table()
Esempio n. 13
0
def print_user_deck(user, deck_id):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = user.get_deck(deck_id)

    return mydeck.print_price_table()
Esempio n. 14
0
def process(user, user_input):

    outlog = []

    Card.db = Database(DatabaseConfig)

    mydeck = Deck(user=user, user_input=user_input)

    return mydeck.print_price_table()
Esempio n. 15
0
def export(format, file_name, cityname=None):
    db = Database(os.path.join(EXPORT_DIR, 'storage.db'))
    keys = ['city_id', 'city', 'date', 'temp', 'weather_id']

    if cityname:
        db.cursor.execute(f"select * from weather where city='{cityname}'")
        result = db.cursor.fetchone()
        if result:
            result = dict(zip(keys, result))
        else:
            print('City not found!')
            exit(1)
    else:
        db.cursor.execute(f'select * from weather')
        result = db.cursor.fetchall()
        result = [dict(zip(keys, item)) for item in result]

    if result:
        filename = os.path.join(EXPORT_DIR, file_name)

        if format == 'csv':
            filename += '.csv'
            with open(filename, 'w') as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=keys)
                if isinstance(result, list):
                    writer.writerows(result)
                else:
                    writer.writerow(result)

        elif format == 'json':
            filename += '.json'
            with open(filename, 'w') as jsonfile:
                json.dump(result, jsonfile)

        elif format == 'html':
            filename += '.html'
            if isinstance(result, list):
                html_output = ''
                for item in result:
                    weather_item = Weather(API_KEY, item['city_id'])
                    html_output += weather_item.html_output()
                    pass
            else:
                weather_item = Weather(API_KEY, result['city_id'])
                html_output = weather_item.html_output()

            with open(filename, 'w') as htmlfile:
                htmlfile.write(html_output)
    else:
        print('City not found!')
        exit(1)
Esempio n. 16
0
def process_by_id(card_id):

    outlog = []

    Card.db = Database(DatabaseConfig)

    card_list = [
        {
            'id': card_id,
            'count': 1
        },
    ]

    mydeck = Deck(card_list=card_list)

    price_table, l = mydeck.print_price_table()

    card_details = price_table['body'][0]

    return card_details, price_table, l
Esempio n. 17
0
from libs.city import City
from libs.weather import Weather
from libs.database import Database

import os
import re
from settings import *

# Download cities
print("Initializing weather app ... please wait")
City.download_archive(ARCHIVE_URL, EXPORT_DIR)
cities = City.get_list(EXPORT_DIR)

# Init database
db = Database(os.path.join(EXPORT_DIR, 'storage.db'))

# Ask city/country name from user
input_name = input("Type city/country name to find: ")

# Try to find city by name with regex
pattern = re.compile(r'(' + input_name + ')+', flags=re.IGNORECASE)
founded_items = list(filter(lambda item: pattern.match(item.name), cities))

print("\nCities founded: ")
for x in founded_items:
    print(x)

# Get city ID
city_id = int(input('\nType city id: '))

# Try to get JSON weather
Esempio n. 18
0
class Reminder:
    """ Clase Reminder

    Se encarga de almacenar los eventos y crear recordatorios.
    """
    def __init__(self, secret):
        # Accedo a la base de datos
        self.db = DB(secret)

        # Arranco en Async Scheduler
        self.sched = AsyncIOScheduler()
        self.sched.start()

    @property
    def action(self):
        return self._action

    @action.setter
    def action(self, value):
        if not callable(value):
            raise ValueError("The value must be a function")
        self._action = value

    @property
    def reminders(self):
        return self._reminders

    @reminders.setter
    def reminders(self, value):
        if not isinstance(value, list):
            raise ValueError("The value must be a list")
        self._reminders = value

    # Funciones publicas

    async def add(self, date, time, time_zone, channel, text, author):
        """Agrega un nuevo evento y crea los recordatorios"""

        try:
            date_time = datetime.fromisoformat(f"{date}T{time}{time_zone}")
            date_time_now = datetime.utcnow().replace(tzinfo=timezone.utc)

            # Si la fecha del evento es anterior a la actual salgo
            if date_time < date_time_now:
                return []

            event = self._generate_event(date_time, channel, text, author)
            jobs_id = self._create_jobs(event)

            # Guardo el evento en la base de datos
            data = {
                "author": event['author'],
                "text": event['text'],
                "time": self.db.q.time(event['time'].isoformat()),
                "channel": event['channel'],
                "jobs": jobs_id
            }

            # Genero un registro local
            return self.db.create("Events", data)
        except:
            # Si el formato de la fecha es incorrecto
            return None

    async def load(self):
        """Carga los eventos de la base de datos

        Se utiliza para cargar los eventos que están guardados en la base de
        datos al momento de inciar el programa.

        Lee los eventos de la base de datos, los carga en el scheduler y
        actuliza la base de datos con los nuevos jobs_id
        """
        docs = self.db.get_all("all_events")
        new_docs = []
        for doc in docs['data']:
            event = {
                "text":
                doc['data']['text'],
                "time":
                datetime.fromisoformat(
                    f"{doc['data']['time'].value[:-1]}+00:00"),
                "channel":
                doc['data']['channel'],
                "reminders":
                self.reminders
            }

            # Creo los jobs
            jobs_id = self._create_jobs(event)
            new_docs.append((doc['ref'].id(), {"jobs": jobs_id}))

        # Actulizo la base de datos con los nuevos jobs_id
        return self.db.update_all_jobs("Events", new_docs)

    async def list(self):
        """Lista todos los eventos programados"""
        # events = self.db.get_by_author("events_by_author", author)
        events = self.db.get_all("all_events")
        return events['data']

    async def remove(self, id_, author):
        """Borro un evento programado"""
        return self._remove_by_id_and_author(id_, author)

    # Funciones privadas

    def _remove_by_id(self, id_: str):
        try:
            doc = self.db.delete("Events", id_)
            log.info("hola: %s", doc)
            for job in doc['data']['jobs']:
                self.sched.remove_job(job)
            return doc
        except:
            return []

    def _remove_by_id_and_author(self, id_: str, author: str):
        try:
            doc = self.db.delete_by_id_and_author("Events",
                                                  "event_by_id_and_author",
                                                  id_, author)
            for job in doc['data']['jobs']:
                self.sched.remove_job(job)
            return doc
        except:
            return []

    async def _remove_old_event(self):
        self.db.delete_by_expired_time("all_events_by_time")

    def _create_jobs(self, event):
        dt_event = event['time']
        dt_now = datetime.utcnow().replace(tzinfo=timezone.utc)

        jobs_id = []
        for reminder in event['reminders']:
            if dt_event > dt_now + reminder['delta']:
                log.info("Added event")
                job = self.sched.add_job(
                    self.action,
                    'date',
                    run_date=(dt_event - reminder['delta']),
                    args=[
                        reminder['message'], event['text'], event['channel']
                    ])
                jobs_id.append(job.id)

        # Job para eliminar el registro de la base de datos
        job = self.sched.add_job(self._remove_old_event,
                                 'date',
                                 run_date=(dt_event),
                                 args=[])
        jobs_id.append(job.id)

        return jobs_id

    def _generate_event(self, date_time, channel, text, author):
        return {
            "author": f"{author}",
            "text": text,
            "time": date_time,
            "channel": int(channel),
            "reminders": self.reminders
        }
Esempio n. 19
0
class BaseModel:
    _db = None

    def __init__(self):
        self._db = Database()

    def get_db(self):
        return self._db

    def query_raw(self, query):
        return self._db.query_raw(query)

    def dict_to_create_table_sql(self, dictionary):
        return self._db.dict_to_create_table_sql(dictionary)

    def dict_to_insert_condition(self, dictionary, allow_key=None):
        return self._db.dict_to_insert_condition(dictionary, allow_key)

    def dict_to_where_condition(self, dictionary):
        return self._db.dict_to_where_condition(dictionary)

    def dict_to_set_condition(self, dictionary):
        return self._db.dict_to_set_condition(dictionary)

    def list_to_in_condition(self, list_data):
        return self._db.list_to_in_condition(list_data)

    def insert_obj(self, table, data, insert_id=False):
        return self._db.insert_obj(table, data, insert_id)

    def insert_raw(self, query, insert_id=False):
        return self._db.insert_raw(query, insert_id)

    def update_obj(self, table, data, where=None):
        return self._db.update_obj(table, data, where)

    def select_obj(self, table, where, select_field=None):
        return self._db.select_obj(table, where, select_field)

    def select_raw(self, query):
        return self._db.select_raw(query)

    def delete_obj(self, table, where=None):
        return self.delete_obj(table, where)
Esempio n. 20
0
class Linker:
    """
    Dedicated to linking SDK and CR. Works with the DB directly, after building.
    """

    # self.db = Database()

    def __init__(self, db_config):
        """
        Creates a Database() object and passes it to Scraper/Connector upon creation.
        """
        self.db = Database(db_config)
        self.sc = Scraper(self.db)
        self.co = Connector(self.db)

    def link(self, editions):
        """
        For each edition:
          1) relate through rel_editions to get "edition_sdk" and "edition_cr"
          2) count how many cards total
          3) count how many special cards (tokens, emblems)
          4) count how many cards match directly (and how many missing)
          5) count how many mismatching cards
            5a) has to be an even number
            5b) has to add up to total - special
          6) ...magic...
          7) ...
          8) profit
        TODO: all-editions should use the relation table and allow passing 2 different editions to link_edition(s)
        """
        edition_list = self.get_rel_edition_lists()

        edition_dict = {}
        for edition_pair in edition_list:
            edition_dict[edition_pair[0]] = edition_pair[1]

        if editions == []:
            double_list = edition_list
        else:
            # TODO: this needs to assume CR and look up API and make pairs
            double_list = []
            for edition in editions:
                if isinstance(edition, (list, tuple)):
                    if len(edition) == 2:
                        double_list.append(edition)
                else:
                    try:
                        # assume CR and look up API
                        double_list.append([edition, edition_dict[edition]])
                    except:
                        print(
                            "Input CR edition `{}` doesn't exist in both or is not matched. Exiting."
                            .format(edition))
                        sys.exit()

        for edition_pair in double_list:
            self.link_edition(edition_pair)

    def get_rel_edition_lists(self):
        query = """ SELECT editions.id, code
                    FROM editions
                        INNER JOIN rel_editions
                            ON rel_editions.id_cr = editions.id
                        INNER JOIN sdk_editions
                            ON sdk_editions.code = rel_editions.id_sdk

                    UNION ALL

                    SELECT editions.id, code
                    FROM editions
                        JOIN sdk_editions
                            ON editions.id = sdk_editions.code; """
        result = self.db.query(query)

        double_list = []
        for edition in result:
            double_list.append([edition[0], edition[1]])
        return double_list

    def link_edition(self, edition_pair):
        edition_cr = edition_pair[0]
        edition_api = edition_pair[1]
        api_original = self.total("api", edition_api)
        api_standard = self.standard("api", edition_api)
        cr_original = self.total("cr", edition_cr)
        cr_standard = self.standard("cr", edition_cr)

        n_directly_matching = self.direct_matches(edition_pair)

        if cr_standard >= api_standard:
            n_cards = cr_standard
        else:
            n_cards = api_standard
        n_missing_cards = n_cards - n_directly_matching
        n_inserted = 0

        self.trouble = []

        # Is it reasonable to try direct matching?
        if n_directly_matching > 0:

            n_direct_inserted = self.insert_direct_match(edition_pair)
            n_inserted += n_direct_inserted
            if n_direct_inserted != n_directly_matching:
                self.trouble.append("{}/{} direct inserted".format(
                    n_direct_inserted, n_directly_matching))

        # Is it reasonable to try image matching?
        if api_standard == cr_standard:
            # Is there even anything to image match?
            if n_missing_cards > 0:
                n_image_matched = self.image_match(edition_pair)
                # stopped here
                n_inserted += n_image_matched
                if n_image_matched != n_missing_cards:
                    self.trouble.append("{}/{} image_matches inserted".format(
                        n_image_matched, n_missing_cards))
        else:
            self.trouble.append(
                "API ({} -> {}) / CR ({} -> {}) mismatch".format(
                    api_original, api_standard, cr_original, cr_standard))

        # Final report
        if len(self.trouble):
            trouble_string = "(" + ", ".join(self.trouble) + ")"
        else:
            trouble_string = ""
        try:
            perc = int(100 * n_inserted / n_cards)
        except ZeroDivisionError:
            perc = "-"
        if n_inserted == n_cards:
            log.info("{}: {}% ({}/{} cards inserted) {}".format(
                edition_api, perc, n_inserted, n_cards, trouble_string))
        else:
            log.info("{}: {}% ({}/{} cards inserted) {}".format(
                edition_api, perc, n_inserted, n_cards, trouble_string))

    def total(self, source, edition):
        count = -1
        if source == "api":
            query = """SELECT COUNT(*) FROM `sdk_cards` WHERE `set` = "{}";""".format(
                edition)
            result = self.db.query(query)
            count = result[0][0]
        elif source == "cr":
            query = """SELECT COUNT(*) FROM `cards` WHERE `edition_id` = "{}";""".format(
                edition)
            result = self.db.query(query)
            count = result[0][0]
        return count

    def standard(self, source, edition):
        count = -1
        if source == "api":
            query = """ SELECT COUNT(*) FROM (
                    	SELECT * FROM sdk_cards WHERE NOT ((`layout`="double-faced" OR `layout`="meld") and mana_cost is null and `type` !="Land") AND NOT (`layout`="flip" OR `layout`="split") AND (`set`="{}")
                    	UNION ALL
                    	SELECT `names` as `name`, `names`, `mid`, 'join' as `layout`, GROUP_CONCAT(mana_cost SEPARATOR "/") as mana_cost,	`type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'split'  ORDER BY `id` DESC ) as ordered   WHERE `set` = "{}" GROUP BY `mid`
                        UNION ALL
    		            SELECT SUBSTRING_INDEX(`names`, ' // ', 1 ) as `name`, `names`, `mid`, 'unflip' as `layout`, `mana_cost`, `type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'flip') as split_cards WHERE `set` = "{}" GROUP BY `mid`
                    ) as a;
                    """.format(edition, edition, edition)
            result = self.db.query(query)
            count = result[0][0]
        elif source == "cr":
            query = """SELECT COUNT(*) FROM `cards` WHERE `edition_id` = "{}" AND id not like "tokens%" AND name not like "Token - %" AND name not like "Hero - %" AND name not like "Emblem - %";""".format(
                edition)
            result = self.db.query(query)
            count = result[0][0]
        return count

    def direct_matches(self, edition_pair):

        query = """ SET @edition_cr = "{}";
                    SET @edition_api = "{}";
                    SELECT COUNT(*) FROM (
                        SELECT REPLACE(name,'´', '\\\'') as name_replaced from cards where edition_id = @edition_cr
                        ) as cr
                    JOIN (
                        SELECT * from sdk_cards where `set` = @edition_api and NOT (`layout`="split" OR `layout`="flip")
                        UNION ALL
                        SELECT `names` as `name`, `names`, `mid`, 'join' as `layout`, GROUP_CONCAT(mana_cost SEPARATOR "/") as mana_cost,	`type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'split'  ORDER BY `id` DESC ) as ordered   WHERE `set` = @edition_api GROUP BY `mid`
                        UNION ALL
    		            SELECT SUBSTRING_INDEX(`names`, ' // ', 1 ) as `name`, `names`, `mid`, 'unflip' as `layout`, `mana_cost`, `type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'flip') as split_cards WHERE `set` = @edition_api GROUP BY `mid`
                        ) as api
                    ON cr.name_replaced = api.name;
                """.format(edition_pair[0], edition_pair[1])
        results = self.db.cursor.execute(query, multi=True)
        self.db.cnx.commit()
        for cur in results:
            if cur.with_rows:
                count = cur.fetchall()[0][0]

        return count

    def image_match(self, edition_pair):
        """
        Tries to match cards not covered by direct_matches() using differences in images.

        IMPROVE Could be optimized to make a new matcher for every kind of land in order
        to only do 3x3 matrices for 'Plains' instead of 15x15 for all lands.
        """
        cr_ids = self.extra_from_cr(edition_pair)
        api_ids = self.extra_from_api(edition_pair)

        if len(cr_ids) == len(api_ids):
            m = Matcher(cr_ids, api_ids)
            matches, info = m.get_matches()
            if matches is not None:
                log.debug("All matches are unambiguous (unique).")
                self.insert_image_match(matches)
                if len(matches) != len(cr_ids) and info:
                    self.trouble.append("matcher: {}.".format(info))
                return len(matches)
            else:
                self.trouble.append("matcher: {}.".format(info))
                return 0

        else:
            log.debug(
                "{}: WARNING: image_match() found {} in API and {} in CR. Not even trying."
                .format(edition_pair[0], len(api_ids), len(cr_ids)))
            self.trouble.append("image_match uneven {}!={}".format(
                len(api_ids), len(cr_ids)))
            return 0

    def insert_image_match(self, matches):
        """
        Replaces new values into `rel_cards`.
        TODO: find out the amount inserted to report back
        """
        log.debug("Inserting image match...")
        for cr_id, mid in matches.items():
            query = """
                REPLACE INTO `rel_cards`
                    (`id_cr`, `id_sdk`)
                VALUES
                    (%s, %s);
                """

            self.db.insert(query, [cr_id, mid])
        log.debug("Done.")

    def extra_from_cr(self, edition_pair):
        """
        Returns a list of cards from CR that are not covered by direct_matches().
        """
        query_cr = """
                SET @edition_cr = "{}";
                SET @edition_api = "{}";

            	SELECT id_cr
            	FROM (
            		SELECT * FROM sdk_cards WHERE NOT ((`layout`="double-faced" OR `layout`="meld") and mana_cost is null and `type` !="Land") AND NOT (`layout`="flip" OR `layout`="split") AND (`set`=@edition_api)
                    UNION ALL
                    SELECT `names` as `name`, `names`, `mid`, 'join' as `layout`, GROUP_CONCAT(mana_cost SEPARATOR "/") as mana_cost,	`type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'split'  ORDER BY `id` DESC ) as ordered   WHERE `set` = @edition_api GROUP BY `mid`
                    UNION ALL
		            SELECT SUBSTRING_INDEX(`names`, ' // ', 1 ) as `name`, `names`, `mid`, 'unflip' as `layout`, `mana_cost`, `type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'flip') as split_cards WHERE `set` = @edition_api GROUP BY `mid`
            	) as t1
            	RIGHT JOIN (
            		SELECT REPLACE(name,'´', '\\\'') as name_replaced, id as id_cr FROM cards WHERE edition_id=@edition_cr AND id not like "tokens%" AND name not like "Token - %" AND name not like "Hero - %" AND name not like "Emblem - %"
            	) as t2
            	ON t1.name = t2.name_replaced
            	WHERE `name` is null;
                """.format(edition_pair[0], edition_pair[1])

        results = self.db.cursor.execute(query_cr, multi=True)
        self.db.cnx.commit()
        out = []
        for cur in results:
            if cur.with_rows:
                rows = cur.fetchall()
                for row in rows:
                    out.append(row[0])
        return out

    def extra_from_api(self, edition_pair):
        """
        Returns a list of cards from API that are not covered by direct_matches().
        """
        query_cr = """
                SET @edition_cr = "{}";
                SET @edition_api = "{}";

                SELECT mid
            	FROM (
            		SELECT * FROM sdk_cards WHERE NOT ((`layout`="double-faced" OR `layout`="meld") and mana_cost is null and `type` !="Land") AND NOT (`layout`="flip" OR `layout`="split") AND (`set`=@edition_api)
                    UNION ALL
                    SELECT `names` as `name`, `names`, `mid`, 'join' as `layout`, GROUP_CONCAT(mana_cost SEPARATOR "/") as mana_cost,	`type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'split'  ORDER BY `id` DESC ) as ordered   WHERE `set` = @edition_api GROUP BY `mid`
                    UNION ALL
		            SELECT SUBSTRING_INDEX(`names`, ' // ', 1 ) as `name`, `names`, `mid`, 'unflip' as `layout`, `mana_cost`, `type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'flip') as split_cards WHERE `set` = @edition_api GROUP BY `mid`
            	) as t1
            	LEFT JOIN (
            		SELECT REPLACE(name,'´', '\\\'') as name_replaced FROM cards WHERE edition_id=@edition_cr AND id not like "tokens%" AND name not like "Token - %" AND name not like "Hero - %" AND name not like "Emblem - %"
            	) as t2
            	ON t1.name = t2.name_replaced
            	WHERE `name_replaced` is null;
                """.format(edition_pair[0], edition_pair[1])

        results = self.db.cursor.execute(query_cr, multi=True)
        self.db.cnx.commit()
        out = []
        for cur in results:
            if cur.with_rows:
                rows = cur.fetchall()
                for row in rows:
                    out.append(row[0])
        return out

    def insert_direct_match(self, edition_pair):
        query = """ SET @edition_cr = %s ;
                    SET @edition_api = %s ;
                    REPLACE INTO rel_cards
                    SELECT `id_cr`, `mid` as `id_sdk` FROM (
                       SELECT REPLACE(name,'´', '\\\'') as name_replaced, `id` as `id_cr` from cards where edition_id = @edition_cr
                       ) as cr
                    JOIN (
                       SELECT * from sdk_cards where `set` = @edition_api and NOT (`layout`="split" OR `layout`="flip")
                       UNION ALL
                       SELECT `names` as `name`, `names`, `mid`, 'join' as `layout`, GROUP_CONCAT(mana_cost SEPARATOR "/") as mana_cost,	`type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'split'  ORDER BY `id` DESC ) as ordered   WHERE `set` = @edition_api GROUP BY `mid`
                       UNION ALL
   		               SELECT SUBSTRING_INDEX(`names`, ' // ', 1 ) as `name`, `names`, `mid`, 'unflip' as `layout`, `mana_cost`, `type`, `rarity`, `set`, GROUP_CONCAT(id SEPARATOR "-") as id FROM (SELECT * FROM sdk_cards WHERE layout = 'flip') as split_cards WHERE `set` = @edition_api GROUP BY `mid`
                       ) as api
                    ON cr.name_replaced = api.name;
        """
        number_of_changed_rows = self.db.multiinsert(
            query, (edition_pair[0], edition_pair[1]))
        if number_of_changed_rows % 2 == 0:
            return int(number_of_changed_rows / 2)
        else:
            return number_of_changed_rows / 2
Esempio n. 21
0
 def __init__(self):
     self._db = Database()
Esempio n. 22
0
class Scraper:
    """
    Object encapsulating scraping CR eshop, (re)building and checking DB.
    Updating shoud be implemented. Possibly dates of last build per edition.
    """

    # self.db = Database()

    def __init__(self, db):
        """
        Takes either a Database() object or config sufficient for creating one.
        """
        if isinstance(db, Database):
            self.db = db
        else:
            self.db = Database(db)

    def build_edition_list(self, editions):
        # Build edition list
        self.scraped_editions = self.get_edition_list()
        self.insert_editions(self.scraped_editions)

    def build(self, editions):
        """
        Builds the database to its 'default' state.
        Assumes empty but existing tables.
        """
        # Scrape all editions in incoming list
        done = []
        for edition, edition_name in self.scraped_editions:
            if editions is None or edition in editions:
                if edition not in done:
                    log.info("[{}] Starting to scrape edition {}.".format(
                        edition, edition_name))
                    cards = self.scrape_edition(edition, sleep=0.5)
                    cards = self.format_cards(cards)
                    log.info(
                        '[{}] {} cards left after cleaning. Inserting into database.'
                        .format(edition, str(len(cards))))
                    self.insert_cards(cards)
                    done.append(edition)
        log.info('Done.')

    def rebuild(self):
        """
        Empties all necessary tables, then builds.
        """
        self.empty_db()
        self.build()

    def get_edition_size(self, edition):
        """
        Looks up number of cards on CR eshop in a specific edition, any foil.
        Expects edition ID like KLD, SOI or M16 and returns int.
        """
        html = self.fetch_url(
            'http://cernyrytir.cz/index.php3?akce=3&limit=0&edice_magic=' +
            edition +
            '&poczob=30&foil=A&triditpodle=ceny&hledej_pouze_magic=1&submit=Vyhledej'
        )
        matches = re.findall('<td><span class="kusovkytext">Nalezeno (\d*)',
                             html, re.DOTALL)

        if (matches and (matches[0] == matches[1])):
            log.debug(
                str(matches[0]) + ' rows should be available in edition ' +
                edition + '.')
            return int(matches[0])
        else:
            log.info('(!) Regex to get # of cards in ' + edition + ' failed.')

    def get_edition_list(self):
        """
        Looks up the list of all available editions on CR eshop.
        Does not return, inserts into DB instead.
        """
        html = self.fetch_url('http://cernyrytir.cz/index.php3?akce=3')
        select = re.findall('<select name="edice_magic"(.*?)</select>', html,
                            re.DOTALL)
        matches = re.findall('<option value="([^"]*)" >([^<]*)</option>',
                             select[0], re.DOTALL)

        editions = []
        for match in matches:
            if (match[0] != 'standard') and (match[0] != 'modern'):
                editions.append(match)
        return editions

    def insert_editions(self, editions):
        """
        Inserts pairs of (edition_id, edition_name into DB).
        Assumes that table `editions` is empty.
        """
        duplicates = 0
        for edition in editions:
            query = """
                INSERT INTO `editions`
                (`id`, `name`)
                VALUES
                (%s, %s)
                """
            try:
                self.db.insert(query, (
                    edition[0],
                    edition[1],
                ))
            except Error as err:
                if err.errno == errorcode.ER_DUP_ENTRY:
                    log.debug(
                        "Edition {} is already in the database, skipping...".
                        format(edition))
                    duplicates += 1
                else:
                    raise
        if duplicates:
            log.info(
                "(~) There were {} duplicates in edition list. Probably OK. ".
                format(duplicates))

    def scrape_edition(self, edition, sleep=0.1):
        """
        Scrapes CR eshop for all cards in specific edition and takes note of all foil and non-foil costs.
        Keeps card info in a dictionary, future uses might change this to Card() object.
        Sleep tries to be kind to the eshop by sleeping sleep = 0.1 seconds by default between each request.
        """
        size_of_edition = self.get_edition_size(edition)
        cards = {}
        scraped_rows = 0
        for limit in np.arange(0, size_of_edition, 30):  # Assuming pagesize 30
            url = 'http://cernyrytir.cz/index.php3?akce=3&limit=' + \
                str(limit) + '&edice_magic=' + str(edition) + \
                '&poczob=100&foil=A&triditpodle=ceny&hledej_pouze_magic=1&submit=Vyhledej'
            html = self.fetch_url(url)
            matches = re.findall(
                '/images/kusovkymagicsmall/([^.]*)\.jpg.*?<font style="font-weight : bolder;">([^<]*)</font></div>.*?(/images/kusovky/.*?\.gif[^&]*).*?>(\d*)&nbsp;Kč',
                html, re.DOTALL)

            if matches:
                scraped_rows += len(matches)
                for match in matches:
                    manacosts = re.findall('/([0-9a-z]*?)\.gif', match[2],
                                           re.DOTALL)
                    manacost = ''.join(manacosts)

                    card = {}
                    card_id = match[0].replace('/', '_')
                    card['name'] = match[1]
                    card['edition'] = edition
                    # Fixing lands having no manacost
                    if manacost == edition.lower():
                        card['manacost'] = "-"
                    else:
                        card['manacost'] = manacost

                    card['cost'] = match[3]

                    # Foil vs non-foil X existing non-existing
                    # Ignoring played/non-english/strange cards
                    if self.is_normal(card):
                        card_id, card['name'] = self.fix_known_cr_mistakes(
                            card_id, card['name'])

                        if card_id in cards:
                            if not self.is_foil(card):
                                foil_cost = cards[card_id]['cost_buy_foil']
                                card['cost_buy_foil'] = foil_cost
                                cards[card_id] = card
                            else:
                                cards[card_id]['cost_buy_foil'] = card['cost']
                        else:
                            if not self.is_foil(card):
                                card['cost_buy_foil'] = None
                                cards[card_id] = card
                            else:
                                card['cost_buy_foil'] = card['cost']
                                card['cost'] = None
                                card['name'] = card['name'].replace(
                                    ' - foil', '')
                                cards[card_id] = card

            else:
                log.info(
                    '(!) A scrape of a page was useless. Not a big deal but possibly not correct.'
                )
            time.sleep(sleep)

        log.info('[{}] {} rows scraped in edition.'.format(
            edition, str(scraped_rows)))

        if not cards:
            log.info(
                '[{}] No cards found, something is probably wrong.'.format(
                    edition))
        else:
            log.info('[{}] {} unique cards found.'.format(
                edition, str(len(cards))))
        return cards

    def fix_known_cr_mistakes(self, card_id, card_name):
        known_id_errors = {
            # ZEN large-graphics lands
            # This is done because on CR some full-art lands have different "foil" and "normal" IDs
            'ZEN_258': 'ZEN_235',
            'ZEN_256': 'ZEN_237',
            'ZEN_260': 'ZEN_236',
            'ZEN_262': 'ZEN_238',  # Forests
            'ZEN_264': 'ZEN_240',
            'ZEN_266': 'ZEN_241',
            'ZEN_268': 'ZEN_242',
            'ZEN_270': 'ZEN_239',  # Islands
            'ZEN_272': 'ZEN_244',
            'ZEN_274': 'ZEN_243',
            'ZEN_276': 'ZEN_245',
            'ZEN_278': 'ZEN_246',  # Mountains
            'ZEN_282': 'ZEN_249',
            'ZEN_284': 'ZEN_248',
            'ZEN_286': 'ZEN_250',
            'ZEN_288': 'ZEN_247',  # Plains
            'ZEN_292': 'ZEN_253',
            'ZEN_294': 'ZEN_254',
            'ZEN_296': 'ZEN_251',
            'ZEN_298': 'ZEN_252',  # Swamps
            # DTK mismatched images fixes
            'DTK_254': 'DTK_133',  # Misthoof Kirin
            'DTK_140': 'DTK_124',  # Center Soul
            # M12 missing images
            'C14_200': 'M12_078',  # Turn to Frog
        }
        if card_id in known_id_errors:
            return known_id_errors[card_id], card_name
        elif card_name == "Scion of Ugin":
            # This is here because Scion of Ugin has a wrong image
            return 'DTK_990', card_name
        elif card_name == "Prey Upon" and card_id == 'EMN_062':
            # This is here because Prey Upon has a wrong image
            return 'ISD_990', card_name
        elif card_name == "Blood Rites" and card_id == 'CHK_159':
            # This is here because Blood Rites [CHK] has image of Brothers Yamazaki (#1)
            return 'CHK_990', card_name
        else:
            return card_id, card_name

    def format_cards(self, cards):
        """
        Filters scraped cards. Currently only filters played cards, but could filter more in the future.
        Not sure how it handles used foils?
        """
        removals = [" (KLD)", " (AER)"]
        corrections = {
            "Jaces´s Scrutiny": "Jace´s Scrutiny",
            "Berserker´s Onslaught": "Berserkers´ Onslaught",
        }

        cards_out = {}
        for card_id, card in cards.items():
            if card_id not in cards_out:
                old_name = card['name']
                for rem in removals:
                    old_name = old_name.replace(rem, "")
                    card['name'] = old_name
                if old_name in corrections:
                    new_name = corrections[old_name]
                    card['name'] = new_name
                    log.info("(~) Corrected card >{}< to >{}<.".format(
                        old_name, new_name))
                cards_out[card_id] = card
            else:
                log.info(
                    '(!) A duplicate entry was attempted, something might be wrong.'
                )

        return cards_out

    def insert_cards(self, cards):
        """
        Inserts dictionary of cards into database, split into `cards` and `costs`.
        """
        duplicates = 0
        duplicates_costs = 0
        for card_id, card in cards.items():
            # Insert into list of cards
            name_md5 = hashlib.md5(
                card['name'].lower().encode('utf-8')).hexdigest()
            query = """
                INSERT INTO `cards`
                (`id`, `name`, `edition_id`, `manacost`, `md5`)
                VALUES
                (%s, %s, %s, %s, %s)
                """
            try:
                log.debug("Inserting card:\n{}\n{}\n{}\n{}\n{}\n".format(
                    card_id, card['name'], card['edition'], card['manacost'],
                    name_md5))
                self.db.insert(query, (
                    card_id,
                    card['name'],
                    card['edition'],
                    card['manacost'],
                    name_md5,
                ))
            except Error as err:
                if err.errno == errorcode.ER_DUP_ENTRY:
                    log.debug(
                        "Card {} is already in the database, skipping...".
                        format(card['name']))
                    duplicates += 1
                else:
                    raise

            # Insert into costs
            query = """
                INSERT INTO `costs`
                (`card_id`, `buy`, `buy_foil`)
                VALUES
                (%s, %s, %s)
                """

            try:
                log.debug("Inserting card cost:\n{}\n{}\n{}\n".format(
                    card_id, card['cost'], card['cost_buy_foil']))
                self.db.insert(query, (
                    card_id,
                    card['cost'],
                    card['cost_buy_foil'],
                ))
            except Error as err:
                if err.errno == errorcode.ER_DUP_ENTRY:
                    log.debug(
                        "Costs of card {} are already in the database, skipping..."
                        .format(card['name']))
                    duplicates_costs += 1
                else:
                    raise
        if duplicates:
            log.info(
                "(~) There were {} duplicates in edition list. Probs OK. ".
                format(duplicates))
        if duplicates_costs:
            log.info(
                "(~) There were {} duplicates in edition list. Probs OK. ".
                format(duplicates_costs))

    def empty_db(self):
        """ Calls for truncate of all tables used by Scraper. """

        self.db.insert("SET FOREIGN_KEY_CHECKS=0")
        self.db.truncate_table('editions')
        self.db.truncate_table('cards')
        self.db.truncate_table('costs')
        self.db.insert("SET FOREIGN_KEY_CHECKS=1")

    def get_db_info(self):
        """
        Fetches and returns database statistics in the form of a list of strings.
        """
        log.debug('--- Finished. Statistics: ---')
        query = """SELECT COUNT(*) FROM `cards`;"""
        result = self.db.query(query)
        number_of_cards = result[0][0]

        query = """SELECT COUNT(*) FROM `editions`;"""
        result = self.db.query(query)
        number_of_editions = result[0][0]

        query = """SELECT COUNT(DISTINCT `edition_id`) FROM `cards`;"""
        result = self.db.query(query)
        known_editions = result[0][0]

        query = """SELECT COUNT(`buy`) FROM `costs`;"""
        result = self.db.query(query)
        number_of_normal_costs = result[0][0]

        query = """SELECT COUNT(*) FROM `costs` WHERE `buy` IS NOT NULL OR `buy_foil` IS NOT NULL OR `sell` IS NOT NULL OR `sell_foil` IS NOT NULL;"""
        result = self.db.query(query)
        number_of_unique_costs = result[0][0]

        query = """SELECT DISTINCT `edition_id` from `cards`;"""
        result = self.db.query(query)
        result_list = [ed[0] for ed in result]
        editions = ','.join(result_list)

        data = {
            'number_of_cards': number_of_cards,
            'number_of_editions': number_of_editions,
            'known_editions': known_editions,
            'editions': editions,
            'number_of_normal_costs': number_of_normal_costs,
            'number_of_unique_costs': number_of_unique_costs
        }

        log.debug(
            "Scraped info:\n"
            "{number_of_cards} cards, {known_editions} editions out of {number_of_editions} known.\n"
            "Scraped editions: {editions}.\n"
            "{number_of_normal_costs} normal costs, {number_of_unique_costs} unique."
            .format(**data))

        return data

    def is_normal(self, card):
        normal = True
        if (str.find(card['name'], '- lightly played') != -1) or \
            (str.find(card['name'], '/ lightly played') != -1) or \
            (str.find(card['name'], '- moderately played') != -1) or \
            (str.find(card['name'], '- heavily played') != -1) or \
            (str.find(card['name'], '- japanese') != -1) or \
            (str.find(card['name'], '- korean') != -1) or \
            (str.find(card['name'], '- chinese') != -1) or \
            (str.find(card['name'], '- russian') != -1) or \
                (str.find(card['name'], '- non-english') != -1):
            normal = False
        return normal

    def is_foil(self, card):
        """ Returns if card dictionary item is or isn't foil. """
        return (str.find(card['name'], '- foil') != -1)

    def get_build_time(self):
        """ Returns current build time from DB. """
        query = """SELECT `created` from `info` WHERE `key`=1;"""
        result = self.db.query(query)
        return result[0][0]

    def fetch_url(self, url):
        """
        Downloads HTML contents of a webpage in encoding windows-1250.
        """
        response = requests.get(url)
        response.encoding = 'windows-1250'
        data = response.text
        return data
Esempio n. 23
0
class Builder:
    """
    Dedicated to building the DB, mostly by calling Scraper/Connector.
    """

    # self.sc = Scraper()
    # self.co = Connector()
    # self.db = Database()

    def __init__(self, db_config):
        """
        Creates a Database() object and passes it to Scraper/Connector upon creation.
        """
        self.db = Database(db_config)
        self.sc = Scraper(self.db)
        self.co = Connector(self.db)

    def build(self, editions):
        self.update_build_time()

        self.sc.build_edition_list(editions)
        self.co.build_edition_list(editions)

        scrape_start = time.time()
        self.sc.build(editions)
        scrape_took = time.time() - scrape_start

        if editions is None:
            translated_editions = None
        else:
            edition_dict = self.get_rel_edition_dict()
            translated_editions = [
                edition_dict[edition] or edition for edition in editions
            ]

        connect_start = time.time()
        self.co.build(translated_editions)
        connect_took = time.time() - connect_start

        log.info("Scraping took {}.".format(self.readable_time(scrape_took)))
        log.info("Connecting took {}.".format(
            self.readable_time(connect_took)))

    def readable_time(self, seconds):
        m, s = divmod(seconds, 60)
        h, m = divmod(m, 60)
        return "%d:%02d:%02d" % (h, m, s)

    def scrape(self, editions):
        self.sc.build(editions)

    def connect(self, editions):
        self.co.build(translated_editions)

    def get_rel_edition_dict(self):
        '''
        This is a semi-duplicate of linker.get_rel_edition_lists.
        '''
        query = """ SELECT editions.id, code
                    FROM editions
                        INNER JOIN rel_editions
                            ON rel_editions.id_cr = editions.id
                        INNER JOIN sdk_editions
                            ON sdk_editions.code = rel_editions.id_sdk

                    UNION ALL

                    SELECT editions.id, code
                    FROM editions
                        JOIN sdk_editions
                            ON editions.id = sdk_editions.code; """
        result = self.db.query(query)

        double_dict = {}
        for edition in result:
            double_dict[edition[0]] = edition[1]
        return double_dict

    def load_sql(self, filename):
        with open(filename, "rt", encoding='utf-8') as in_file:
            contents = in_file.read()
            statements = contents.split(';')
            for statement in statements:

                # trim whitespace
                statement = statement.strip()

                if statement is not "":
                    log.debug(
                        "executing SQL statement:\n+++{}+++".format(statement))
                    self.db.insert(statement)

    def update_build_time(self):
        """ Inserts current time into the DB as build time into `info`.`created`. """
        dtime = time.strftime('%Y-%m-%d %H:%M:%S')
        query = """
            INSERT INTO info (`key`, `created`) VALUES (1, "{}") ON DUPLICATE KEY UPDATE `created` = "{}";
            """.format(dtime, dtime)
        self.db.insert(query)

    def get_build_time(self):
        """ Returns current build time from DB. """
        query = """SELECT `created` from `info` WHERE `key`=1;"""
        result = self.db.query(query)
        return result[0][0]

    def get_db_info(self):
        """
        Fetches, combines and returns database statistics in the form of a list of strings.
        """
        info = {
            'db_built_time': self.get_build_time(),
            'scrapper': self.sc.get_db_info(),
            'connector': self.co.get_db_info()
        }
        return info
Esempio n. 24
0
class Connector:
    """
    Object dedicated to communicating with the Mtg API via the python MtgSDK.
    """
    # self.db = Database()

    def __init__(self, db):
        """
        Takes either a Database() object or config sufficient for creating one.
        """
        if isinstance(db, Database):
            self.db = db
        else:
            self.db = Database(db)

    def build_edition_list(self, editions):
        # Build edition list
        self.loaded_editions = self.get_edition_list()
        self.insert_editions(self.loaded_editions)

    def build(self, editions):
        """
        Builds the database to its 'default' state.
        Assumes empty but existing tables.
        """

        # Load all editions from edition list
        for edition in self.loaded_editions:
            edition_code = edition[0]
            if editions is None or edition_code in editions:
                log.info("[{}] Loading edition from API...".format(edition_code))
                cards = self.load_edition(edition_code)
                self.insert_cards(cards)
        log.info('Done.')

    def rebuild(self):
        """
        Empties all necessary tables, then builds.
        """
        self.empty_db()
        self.build()

    def get_edition_list(self):
        """
        Loads all editions from the API.
        For a SET, mtg api has the following properties:
            # code
            # name
            # gatherer_code
            old_code
            # magic_cards_info_code
            # release_date
            # border
            # type
            # block
            online_only
            booster
            mkm_id
            mkm_name
        We are using the commented ones, but more could be fetched from the API.
        """

        editions = []
        all_sets = Set.all()
        for s in all_sets:
            editions.append([s.code, s.name, s.gatherer_code, s.magic_cards_info_code, s.release_date,
                             s.border, s.type, s.block])
        return editions

    def insert_editions(self, editions):
        """
        Truncates `sdk_editions` table, then inserts (edition_id, edition_name, ...) into DB.
        Assumes that table `sdk_editions` is empty.
        """

        for edition in editions:
            query = """
                INSERT INTO `sdk_editions`
                    (`code`, `name`, `gatherer_code`, `magic_cards_info_code`, `release_date`, `border`, `type`, `block`)
                VALUES
                    (%s, %s, %s, %s, %s, %s, %s, %s);
                """

            self.db.insert(query, edition)

    def load_edition(self, edition):
        """
        Loads all cards from a specific edition from the API.
        For a CARD, mtg api has the following properties:

            # name
            # multiverse_id
            layout
            names
            # mana_cost
            cmc
            # colors
            # type
            supertypes
            subtypes
            # rarity
            text
            flavor
            artist
            number
            power
            toughness
            loyalty
            variations
            watermark
            border
            timeshifted
            hand
            life
            reserved
            release_date
            starter
            rulings
            foreign_names
            printings
            original_text
            original_type
            legalities
            source
            image_url
            # set
            set_name
            # id
        """
        all_cards = Card.where(set=edition).all()
        number_of_cards = len(all_cards)

        log.info("[{}] Found {} cards in API. Starting to fetch.".format(edition, number_of_cards))

        cards = []
        for c in all_cards:
            names = None
            if c.names:
                names = " // ".join(c.names)

            cards.append([c.name, names, c.multiverse_id, c.layout,
                          c.mana_cost, c.type, c.rarity, c.set, c.id])

        log.info("[{}] Inserting {} cards into DB.".format(edition, len(cards)))

        return cards

    def insert_cards(self, cards):
        """
        Inserts new values into `sdk_cards`.
        Assumes `sdk_cards` is empty.
        """
        for card in cards:
            query = """
                INSERT INTO `sdk_cards`
                    (`name`, `names`, `mid`, `layout`, `mana_cost`, `type`, `rarity`, `set`, `id`)
                VALUES
                    (%s, %s, %s, %s, %s, %s, %s, %s, %s);
                """
            if card[2] is None:
                log.warning("SDK Fail - multiverseid is None. Card details: {}".format(card))

            log.debug("Inserting card: {}".format(card))
            self.db.insert(query, card)

    def empty_db(self):
        """ Calls for truncate of all tables used by Connector. """

        self.db.insert("SET FOREIGN_KEY_CHECKS=0")
        self.db.truncate_table('sdk_editions')
        self.db.truncate_table('sdk_cards')
        self.db.insert("SET FOREIGN_KEY_CHECKS=1")

    def get_db_info(self):
        """
        Fetches and returns database statistics in the form of a list of strings.
        """
        log.debug('--- Finished. Statistics: ---')
        query = """SELECT COUNT(*) FROM `sdk_cards`;"""
        result = self.db.query(query)
        number_of_cards = result[0][0]

        query = """SELECT COUNT(*) FROM `sdk_editions`;"""
        result = self.db.query(query)
        known_editions = result[0][0]

        query = """SELECT COUNT(DISTINCT `set`) FROM `sdk_cards`;"""
        result = self.db.query(query)
        number_of_editions = result[0][0]

        query = """SELECT DISTINCT `set` from `sdk_cards`;"""
        result = self.db.query(query)
        result_list = [ed[0] for ed in result]
        editions = ','.join(result_list)

        data = {
            'number_of_cards': number_of_cards,
            'number_of_editions': number_of_editions,
            'known_editions': known_editions,
            'editions': editions}

        log.debug(
            "Loaded info:\n"
            "{number_of_cards} cards, {known_editions} editions out of {number_of_editions} known.\n"
            "Loaded editions: {editions}.".format(
                **data))

        return data
Esempio n. 25
0
from urllib.parse import urlparse, urlunparse, parse_qs
from urllib.request import urlopen
from lxml import etree
from shutil import copyfile
from apscheduler.schedulers.blocking import BlockingScheduler
from sqlalchemy import asc, desc

from helper import Commands

runscheduler = True

#defaults
config = Config()
helper = Commands()

database = Database(config.db_path)
ytcm = YTCM()
left = "W"
right = "E"
top = "N"
bottom = "S"
ttop = "top"
rightclick = "<Button-" + config.mousebuttons + ">"
testURL = ""

monitor_thread = ""
logfile_name = config.serverdir + config.statuslog
t = Thread()

active_view = ""
Esempio n. 26
0
class YTCM:
    """The Ytcc class handles updating the RSS feeds and playing and listing/filtering videos.

    Filters can be set with with following methods:
    * ``set_channel_filter``
    * ``set_date_begin_filter``
    * ``set_date_end_filter``
    * ``set_include_watched_filter``
    """
    def __init__(self, override_cfg_file: Optional[str] = None) -> None:
        self.config = Config(override_cfg_file)
        self.database = Database(self.config.db_path)
        self.video_id_filter: List[int] = []
        self.channel_filter: List[str] = []
        self.date_begin_filter = 0.0
        self.date_end_filter = (0.0, False)
        self.include_watched_filter = False

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        self.database.__exit__(exc_type, exc_val, exc_tb)

    def close(self) -> None:
        """Close open resources like the database connection."""
        self.database.close()

    @staticmethod
    def get_youtube_video_url(yt_videoid: Optional[str]) -> str:
        """Return the YouTube URL for the given youtube video ID.

        :param yt_videoid:  The YouTube video ID.
        :return: The YouTube URL for the given youtube video ID.
        """
        if yt_videoid is None:
            raise YtccException("Video id is none!")

        return f"https://www.youtube.com/watch?v={yt_videoid}"

    def set_channel_filter(self, channel_filter: List[str]) -> None:
        """Set the channel filter.

        The results when listing videos will only include videos by channels specified in the
        filter.

        :param channel_filter: The list of channel names.
        """
        self.channel_filter.clear()
        self.channel_filter.extend(channel_filter)

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos newer than the given time.

        :param begin: The lower bound of the time filter.
        """
        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos older than the given time.

        :param end: The upper bound of the time filter.
        """
        self.date_end_filter = (end.timestamp(), True)

    def set_include_watched_filter(self) -> None:
        """Set the "watched video" filter.

        The results when listing videos will include both watched and unwatched videos.
        """
        self.include_watched_filter = True

    def set_video_id_filter(self, ids: Optional[Iterable[int]] = None) -> None:
        """Set the id filter.

        This filter overrides all other filters.
        :param ids: IDs to filter for.
        """
        self.video_id_filter.clear()
        if ids is not None:
            self.video_id_filter.extend(ids)

    @staticmethod
    def _update_channel(channel: Channel) -> List[Video]:
        yt_channel_id = channel.yt_channelid
        url = _get_youtube_rss_url(yt_channel_id)
        feed = feedparser.parse(url)
        if "bozo_exception" in feed:
            logfile = "logs/exception_" + yt_channel_id + "_log.txt"
            f = open(logfile, "w")
            f.write(str(feed))
            f.close()
        #create timestamp
        return [
            Video(yt_videoid=str(entry.yt_videoid),
                  title=str(entry.title),
                  description=str(entry.description),
                  publisher=yt_channel_id,
                  publish_date=_get_unix_ts(entry.published),
                  watched=False) for entry in feed.entries
        ]

    @staticmethod
    def _add_channel(channel: Channel) -> List[Video]:
        yt_channel_id = channel.yt_channelid
        url = _get_youtube_rss_url(yt_channel_id)
        feed = feedparser.parse(url)
        channel2 = "https://www.youtube.com/channel/%s" % yt_channel_id
        proc = subprocess.Popen(
            ["youtube-dl", "-j", "--flat-playlist", channel2],
            stdout=subprocess.PIPE)
        if "bozo_exception" in feed:
            logfile = "logs/" + yt_channel_id + "_log.txt"
            f = open(logfile, "w")
            f.write(str(feed))
            f.close()
        arr = []
        #create timestamp
        ts = time.time()
        while True:
            nextline = proc.stdout.readline()
            if len(nextline) >= 1:
                jsonarr = json.loads(nextline)
                arr += [
                    Video(yt_videoid=str(jsonarr["url"]),
                          title=str(jsonarr["title"]),
                          description=str(jsonarr["title"]),
                          publisher=yt_channel_id,
                          publish_date=ts,
                          watched=False)
                ]
            else:
                break

        return arr

    def update_all(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels()

        #update channel timestamp
        ts = int(time.time())
        for channel in channels:
            print("Updating (updateall) TS on %s" % channel.displayname)
            self.database.update_channel_last_check(channel.yt_channelid, ts)

        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        self.database.add_videos(videos)

    def update_archive(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels_archived()
        #update channel timestamp
        ts = int(time.time())
        for channel in channels:
            print("Updating TS on %s" % channel.displayname)
            self.database.update_channel_last_check(channel.yt_channelid, ts,
                                                    "true")

        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        self.database.add_videos(videos)

    def update_one(self, yt_channel_id) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels_filter(
            yt_channel_id)  #for channel
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        self.database.add_videos(videos)

    def download_video(self,
                       video: Video,
                       path: str = "",
                       audio_only: bool = False,
                       publisherID: str = "",
                       videotitle: str = "") -> bool:
        """Download the given video with youtube-dl and mark it as downloaded.

        If the path is not given, the path is read from the config file.

        :param video: The video to download.
        :param path: The directory where the download is saved.
        :param audio_only: If True, only the audio track is downloaded.
        :return: True, if the video was downloaded successfully. False otherwise.
        """
        folderwatch = self.config.watcherdir

        dldir = self.database.get_channel_dir(publisherID)
        url = self.get_youtube_video_url(video.yt_videoid)
        ts = int(time.time())
        self.database.update_channel_ts(publisherID, ts)

        #write to jdownloader watcher directory
        cmd = ""
        cmd += "#download %s\n" % videotitle
        cmd += "text=\"%s\"\n" % url
        #removed package name to use JDownloader preferences
        #cmd += "packageName=%s\n" % videotitle
        cmd += "enabled=true\n"
        cmd += "autoStart=TRUE\n"
        cmd += "forcedStart=Default\n"
        cmd += "autoConfirm=TRUE\n"
        cmd += "downloadFolder=%s/<jd:packagename>\n" % dldir
        cmd += "priority=DEFAULT\n"
        cmd += "downloadPassword=null\n"
        filename = folderwatch + video.yt_videoid + ".crawljob"
        f = open(filename, "w+")
        f.write(cmd)
        f.close()
        video.watched = True
        return True

    def add_channel(self, displayname: str, dldir: str,
                    channel_url: str) -> bool:
        """Subscribe to a channel.

        :param displayname: A human readable name of the channel.
        :param dldir: download directory
        :param channel_url: The url to a page that can identify the channel.
        :raises ChannelDoesNotExistException: If the given URL does not exist.
        :raises DuplicateChannelException: If the channel already exists in the database.
        :raises BadURLException: If the given URL does not refer to a YouTube channel.
        """
        known_yt_domains = [
            "youtu.be", "youtube.com", "youtubeeducation.com",
            "youtubekids.com", "youtube-nocookie.com", "yt.be", "ytimg.com"
        ]

        url_parts = urlparse(channel_url, scheme="https")
        if not url_parts.netloc:
            url_parts = urlparse("https://" + channel_url)

        domain = url_parts.netloc.split(":")[0]
        domain = ".".join(domain.split(".")[-2:])

        if domain not in known_yt_domains:
            raise BadURLException(f"{channel_url} is not a valid URL")

        url = urlunparse(
            ("https", url_parts.netloc, url_parts.path, url_parts.params,
             url_parts.query, url_parts.fragment))

        try:
            response = urlopen(url).read().decode('utf-8')
        except URLError:
            raise BadURLException(f"{channel_url} is not a valid URL")

        parser = etree.HTMLParser()
        root = etree.parse(StringIO(response), parser).getroot()
        site_name_node = root.xpath(
            '/html/head/meta[@property="og:site_name"]')
        channel_id_node = root.xpath('//meta[@itemprop="channelId"]')

        if not site_name_node or site_name_node[0].attrib.get("content",
                                                              "") != "YouTube":
            raise BadURLException(
                f"{channel_url} does not seem to be a YouTube URL")

        if not channel_id_node:
            raise ChannelDoesNotExistException(
                f"{channel_url} does not seem to be a YouTube URL")

        yt_channelid = channel_id_node[0].attrib.get("content")

        try:
            self.database.add_channel(
                Channel(displayname=displayname,
                        dldir=dldir,
                        yt_channelid=yt_channelid))
            print("%s Channel Added" % displayname)
            self.update_one(
                yt_channelid)  #get all videos for channel when adding
            return True
        except sqlalchemy.exc.IntegrityError:
            raise DuplicateChannelException(
                f"Channel already subscribed: {displayname}")
        return False

    def import_channels(self, file: TextIO) -> None:
        """Import all channels from YouTube's subscription export file.

        :param file: The file to read from.
        """
        def _create_channel(elem: etree.Element) -> Channel:
            rss_url = urlparse(elem.attrib["xmlUrl"])
            query_dict = parse_qs(rss_url.query, keep_blank_values=False)
            channel_id = query_dict.get("channel_id", [])
            if len(channel_id) != 1:
                message = f"'{file.name}' is not a valid YouTube export file"
                raise InvalidSubscriptionFileError(message)
            return Channel(displayname=elem.attrib["title"],
                           yt_channelid=channel_id[0])

        try:
            root = etree.parse(file)
        except Exception:
            raise InvalidSubscriptionFileError(
                f"'{file.name}' is not a valid YouTube export file")

        elements = root.xpath('//outline[@type="rss"]')
        self.database.add_channels((_create_channel(e) for e in elements))

    def export_channels(self, outstream: BinaryIO) -> None:
        """Export all channels as OPML file.

        :param outstream: The file/stream the OPML file will be written to.
        """
        opml = etree.Element("opml", version="1.1")
        body = etree.SubElement(opml, "body")
        outline = etree.SubElement(body,
                                   "outline",
                                   text="ytcc subscriptions",
                                   title="ytcc subscriptions")
        for channel in self.get_channels():
            outline.append(
                etree.Element("outline",
                              text=channel.displayname,
                              title=channel.displayname,
                              type="rss",
                              xmlUrl=_get_youtube_rss_url(
                                  channel.yt_channelid)))

        outstream.write(etree.tostring(opml, pretty_print=True))

    def list_videos(self) -> List[Video]:
        """Return a list of videos that match the filters set by the set_*_filter methods.

        :return: A list of videos.
        """
        if self.video_id_filter:
            return self.database.session.query(Video) \
                .join(Channel, Channel.yt_channelid == Video.publisher) \
                .filter(Video.id.in_(self.video_id_filter)) \
                .order_by(*self.config.order_by).all()

        if not self.date_end_filter[1]:
            date_end_filter = 0
        else:
            date_end_filter = self.date_end_filter[0]

        query = self.database.session.query(Video) \
            .join(Channel, Channel.yt_channelid == Video.publisher)
        # \
        # .filter(Video.publish_date > self.date_begin_filter) \
        # .filter(Video.publish_date < date_end_filter)

        if self.channel_filter:
            query = query.filter(Channel.displayname.in_(self.channel_filter))

        if not self.include_watched_filter:
            query = query.filter(~Video.watched)

        query = query.order_by(*self.config.order_by)
        return query.all()

    def delete_channels(self, displaynames: List[str]) -> None:
        """Delete (or unsubscribe) channels.

        :param displaynames: The names of channels to delete.
        """
        self.database.delete_channels(displaynames)
        print("%s deleted" % displaynames)

    def rename_channel(self, oldname: str, newname: str) -> None:
        """Rename the given channel.

        :param oldname: The name of the channel.
        :param newname: The new name of the channel.
        :raises ChannelDoesNotExistException: If the given channel does not exist.
        :raises DuplicateChannelException: If new name already exists.
        """
        self.database.rename_channel(oldname, newname)

    def get_channels(self) -> List[Channel]:
        """Get the list of all subscribed channels.

        :return: A list of channel names.
        """
        return self.database.get_channels()

    def cleanup(self) -> None:
        """Delete old videos from the database."""
        self.database.cleanup()
Esempio n. 27
0
    def get_deck_list(self):

        query = """
            SELECT `d`.`id`, `d`.`name`, `d`.`info`, SUM(`c`.`count`)
            FROM `users_decks` AS `d`
            LEFT JOIN `users_decks_cards` AS `c`
            ON `d`.`id` = `c`.`deck_id` AND `d`.`user_id` = `c`.`user_id`
            WHERE `d`.`user_id` = %s
            GROUP BY `d`.`id`, `d`.`user_id`
            ORDER BY `d`.`id` ASC
            """

        decks = self.db.query(query, (self.id, ))

        # If user has no decks yet, the query returns [(None, None, ..., None,)]
        if len(decks) == 1 and not any(decks[0]):
            decks = []

        decks = [{
            'id': d[0],
            'name': d[1],
            'info': d[2],
            'card_count': d[3]
        } for d in decks]

        return decks


User.db = Database(DatabaseConfig)