def normalizeEditions(): print("TOTAL", len(phppgadmin.query("SELECT code FROM editions"))) print( "CK UPDATES", phppgadmin.execute( "UPDATE editions SET code_ck = ck.id FROM ck_editions ck WHERE lower(ck.name) = lower(editions.name)" )) print( "MKM UPDATES", phppgadmin.execute( "UPDATE editions SET code_mkm = mkm.id FROM mkm_editions mkm WHERE lower(mkm.name) = lower(editions.name)" ))
def groupPrices(edition): traduccionessql = "" for i, foilcard in reversed(list(enumerate(edition.cards))): if (foilcard.foil): #buscar la correspondiente no foil for normalcard in edition.cards: if (not normalcard.foil and normalcard.name == foilcard.name): traduccionessql += "({},{}),".format(foilcard.id, normalcard.id) normalcard.prices.extend(foilcard.prices) edition.cards.pop(i) break if (traduccionessql != ""): traduccionessql = "INSERT INTO ck_idtranslator(foil,normal) VALUES" + traduccionessql[:-1] phppgadmin.execute(traduccionessql)
def saveData(edition): cardsql = "INSERT INTO ck_cards(id,name,edition) VALUES" pricesql = "INSERT INTO ck_cardprices(card,edition,foil,price,available,condition) VALUES" for card in edition.cards: cardname = card.name.replace("'","''") cardsql = cardsql + "({},'{}',{}),".format(card.id, cardname, edition.id) for price in card.prices: pricesql = pricesql + "({},{},{},{},{},'{}'),".format(card.id, edition.id, "true" if price.foil else "false", price.price, price.available, price.condition) editiondir = "{}/{}".format(cachedir, edition.name.replace(":","")) cardsfile = "{}/cards.sql".format(editiondir) pricesfile = "{}/prices.sql".format(editiondir) with open(cardsfile, "w", encoding="utf8") as f: f.write(cardsql[:-1]) with open(pricesfile, "w", encoding="utf8") as f: f.write(pricesql[:-1]) #phppgadmin.execute(cardsql[:-1]) phppgadmin.execute(pricesql[:-1])
def getAllPrices(): cards = phppgadmin.query( "SELECT c.name, s.name as set, c.idmkm, s.isfoil FROM scr_cards c LEFT JOIN (select s1.code, s1.name, s1.set_type, s1.digital, false as isfoil from scr_sets s1 union all select s2.code, s2.name, s2.set_type, s2.digital, true as isfoil from scr_sets s2 where code not in ('lea','leb','2ed','cei','ced','arn','atq','3ed','leg','sum','drk','fem','4ed','ice','chr','hml','all','rqs','mir','mgb','itp','vis','5ed','por','wth','tmp','sth','p02','exo','ugl','usg','ath','6ed','ptk','s99','brb','s00','btd','dkm','phpr') and not foil) s on c.set = s.code WHERE NOT idmkm IS NULL AND NOT s.digital AND s.set_type in ('archenemy','commander','conspiracy','core','duel_deck','expansion','from_the_vault','masterpiece','planechase','premium_deck','starter')" ) n = 1000 # with open("output.csv", "w", newline='\n') as f: # writer = csv.DictWriter(f, fieldnames=["idmkm", "price", "isfoil", "available", "seller", "itemlocation"], delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # writer.writeheader() q = Queue() startTGetPriceData(6, q) for i in range((int)(len(cards) / n) + 1): print("i=", i) cardswprices = cards[i * n:i * n + n] try: for card in cardswprices: card["isFoil"] = True if card["isfoil"] == "TRUE" else False card["idLanguage"] = 1 q.put(card) q.join() except KeyboardInterrupt: sys.exit(1) # a csv # with open("output.csv", "a", newline='\n') as f: # writer = csv.DictWriter(f, fieldnames=["idmkm", "price", "isfoil", "available", "seller", "itemlocation"], delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # for card in cardswprices: # for price in card["prices"]: # writer.writerow({ "idmkm": card["idmkm"], "price": price["price"], "isfoil": card["isfoil"], "available": price["available"], "seller": price["seller"], "itemlocation": price["itemlocation"] }) # a db sql = "" for card in cardswprices: for price in card["prices"]: sql += "('{}',{},{},{},'{}','{}'),".format( card["idmkm"], price["price"], card["isfoil"], price["available"], price["seller"].replace("'", "''"), price["itemlocation"]) if sql != "": affected = phppgadmin.execute( "INSERT INTO mkm_cardprices(id,price,foil,available,seller,itemlocation) VALUES" + sql[:-1]) print("Total prices inserted: {}".format(affected)) # materialized view completa phppgadmin.execute( "DROP MATERIALIZED VIEW mkm_cardpricesranked;CREATE MATERIALIZED VIEW mkm_cardpricesranked AS SELECT id, price, foil, available, seller, itemlocation, dense_rank() OVER (PARTITION BY id,foil ORDER BY price ASC),first_value(price) OVER (PARTITION BY id,foil ORDER BY price ASC),lead(price) OVER (PARTITION BY id,foil ORDER BY price ASC) FROM mkm_cardprices WITH DATA; ALTER TABLE mkm_cardpricesranked OWNER TO postgres;" )
def ckprocess_savebuylist(): buylist = CK.buylist() # backup y borrado de datos print("{} precios guardados en histórico buylist".format( phppgadmin.execute( "INSERT INTO ck_buylist_history(id,foil,price,available,timestamp) SELECT id,foil,price,available,timestamp FROM ck_buylist" ))) print("{} precios borrados de buylist actual".format( phppgadmin.execute("DELETE FROM ck_buylist"))) sql = "INSERT INTO ck_buylist(id,foil,price,available) VALUES" for card in buylist: for entry in card.entries: sql = sql + "({},{},{},{}),".format( card.id, "true" if entry.foil else "false", entry.price, entry.count) sys.stdout.write("Guardando buylist...") sys.stdout.flush() print("{} precios guardados en buylist actual".format( phppgadmin.execute(sql[:-1]))) sys.stdout.write("OK")
def crawlEditions(): def do_work(edition): sys.stdout.write("Ediciones restantes: %d \r" % q.qsize()) sys.stdout.flush() page = requests.get(edition["url"]) edition["url"] = page.url def worker(): while True: do_work(q.get()) q.task_done() q = Queue() for i in range(8): t = threading.Thread(target=worker) t.daemon = True t.start() baseurl = "www.cardkingdom.com/catalog/view/" page = requests.get( "http://www.cardkingdom.com/catalog/magic_the_gathering/by_az") tree = html.fromstring(page.text) editions = [] for link in tree.xpath("//a[contains(@href,'" + baseurl + "')]"): #TODO: dejar de depender de IDs...por el bien de la humanidad href = link.attrib["href"] edition = { "id": href[href.rfind("/") + 1:], "name": link.text.replace("'", "''"), "url": href } editions.append(edition) q.put(edition) q.join() print("") sql = "DELETE FROM ck_editions;INSERT INTO ck_editions(id,name,url) VALUES" for edition in editions: sql += "({},'{}','{}'),".format(edition["id"], edition["name"], edition["url"]) print(" {} ediciones almacenadas".format(phppgadmin.execute(sql[:-1]))) return editions
def crawlEditions(): page = requests.get(MKM.baseurl + "/Expansions") tree = html.fromstring(page.text) xpatheditions = tree.xpath("//a[@class='alphabeticExpansion']") for edition in xpatheditions: relativeurl = edition.attrib["href"] editions.append({ "id": relativeurl.replace("/Expansions/", ""), "name": edition.xpath("./div[@class='yearExpansionName']/text()")[0], "url": MKM.baseurl + relativeurl.replace("/Expansions/", "/Products/Singles/"), }) if not os.path.exists(datadir): os.makedirs(datadir) sql = "DELETE FROM mkm_editions;INSERT INTO mkm_editions(id,name,url) VALUES" with open(datafile, "w", newline='\n') as f: writer = csv.DictWriter(f, fieldnames=["id", "name", "url"], delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writeheader() for edition in editions: writer.writerow({ "id": edition["id"], "name": edition["name"], "url": edition["url"] }) sql += "('{}','{}','{}'),".format( edition["id"], edition["name"].replace("'", "''"), edition["url"]) sql = sql[:-1] #TODO: Actualizar PG a 9.5++ #sql += " ON CONFLICT (id) DO UPDATE SET name = excluded.name, url = excluded.url" print(phppgadmin.execute(sql)) return editions
def mkmprocess_savestore(): #phppgadmin.execute("delete from mkm_cardprices") sqlnextedition = "select e.code_mkm as id, e.name from editions e inner join mkm_editions mkm on e.code_mkm = mkm.id left join mkm_cardprices p on p.edition = mkm.id group by e.code_mkm, e.name, mkm.locked having count(p.card) = 0 and not mkm.locked limit 1" while True: editions = phppgadmin.query(sqlnextedition) if (len(editions) == 1): edition = editions[0] phppgadmin.execute( "update mkm_editions set locked = true where id = '{}'".format( edition["id"])) print(edition["name"]) cards = MKM.getPrices(edition) sql = "" for card in cards: for entry in card.entries: sql += "('{}','{}',{},{},{},'{}','{}'),".format( card.id, card.edition, entry.price, entry.foil, entry.count, entry.seller.replace("'", "''"), entry.location) if sql != "": affected = phppgadmin.execute( "INSERT INTO mkm_cardprices(card,edition,price,foil,available,seller,itemlocation) VALUES" + sql[:-1]) print("Total prices inserted: {}".format(affected)) else: print("No price data") phppgadmin.execute( "update mkm_editions set locked = false where id = '{}'". format(edition["id"])) else: break # Rehacer tabla de precios minimos cuando no hay mas ediciones a procesar if (phppgadmin.count("select id from mkm_editions where locked") == 0): print("Creando tabla de precios min para hoy (unos 5 minutos)") phppgadmin.execute( "DROP MATERIALIZED VIEW mkm_cardpricesmin;CREATE MATERIALIZED VIEW mkm_cardpricesmin AS SELECT mkm_cardprices.edition,mkm_cardprices.card as name, mkm_cardprices.foil, min(mkm_cardprices.price) AS price FROM mkm_cardprices GROUP BY mkm_cardprices.edition, mkm_cardprices.card, mkm_cardprices.foil WITH DATA; ALTER TABLE mkm_cardpricesmin OWNER TO postgres;" ) print("Finished")
start = time.perf_counter() for edition in editions: editiondir = "{}/{}".format(cachedir, edition.name.replace(":","")) if not os.path.exists(editiondir): with lock: os.makedirs(editiondir) q.put(CardPage(edition)) q.put(CardPage(edition, True)) q.join() print("Finished parsing\n") phppgadmin.execute("DELETE FROM ck_cardprices;") print("Datos antiguos borrados") phppgadmin.execute("DELETE FROM ck_idtranslator;") print("Traducciones ID borradas") for edition in editions: groupPrices(edition) print(edition.name, len(edition.cards)) saveData(edition) print("=[ Sales ]=") for sale in sales: print(sale) print('time:',time.perf_counter() - start) #select 'editions.append(Edition('||cast(e.id as varchar)||','||chr(34)||e.name||chr(34)||','||chr(34)||e.url||chr(34)||'))' from ck_editions e