def leerPGN(self, fichero, dlTmp): erroneos = duplicados = importados = n = 0 t1 = time.time() - 0.7 # para que empiece enseguida next_n = random.randint(100, 200) codec = Util.file_encoding(fichero) sicodec = codec not in ("utf-8", "ascii") liRegs = [] nRegs = 0 conexion = self._conexion cursor = self._cursor cursor.execute("SELECT FEN FROM GAMES") liRows = cursor.fetchall() stRegs = set(row[0] for row in liRows) sql = "insert into GAMES (FEN,EVENT,SITE,DATE,WHITE,BLACK,RESULT,XPV,PGN,PLIES) values (?,?,?,?,?,?,?,?,?,?);" liCabs = self.liCamposBase[:-1] # all except PLIES PGN, TAGS liCabs.append("PLYCOUNT") with LCEngine.PGNreader(fichero, 0) as fpgn: for n, (pgn, pv, dCab, raw, liFens) in enumerate(fpgn, 1): if "FEN" not in dCab: erroneos += 1 else: fen = dCab["FEN"] if fen in stRegs: dup = True else: cursor.execute( "SELECT COUNT(*) FROM GAMES WHERE FEN = ?", (fen, )) num = cursor.fetchone()[0] dup = num > 0 if dup: duplicados += 1 else: stRegs.add(fen) if sicodec: for k, v in dCab.iteritems(): dCab[k] = unicode(v, encoding=codec, errors="ignore") if pgn: pgn = unicode(pgn, encoding=codec, errors="ignore") event = dCab.get("EVENT", "") site = dCab.get("SITE", "") date = dCab.get("DATE", "") white = dCab.get("WHITE", "") black = dCab.get("BLACK", "") result = dCab.get("RESULT", "") plies = (pv.count(" ") + 1) if pv else 0 if pgn: pgn = Util.var2blob(pgn) xpv = pv2xpv(pv) reg = (fen, event, site, date, white, black, result, xpv, pgn, plies) liRegs.append(reg) nRegs += 1 importados += 1 if nRegs == 10000: cursor.executemany(sql, liRegs) liRegs = [] stRegs = set() conexion.commit() if n == next_n: if time.time() - t1 > 0.8: if not dlTmp.actualiza( erroneos + duplicados + importados, erroneos, duplicados, importados): break t1 = time.time() next_n = n + random.randint(100, 500) if liRegs: cursor.executemany(sql, liRegs) conexion.commit() dlTmp.actualiza(erroneos + duplicados + importados, erroneos, duplicados, importados) dlTmp.ponSaving() conexion.commit() dlTmp.ponContinuar() self.lee_rowids()
def leerPGNs(self, ficheros, dlTmp): erroneos = duplicados = importados = n = 0 t1 = time.time()-0.7 # para que empiece enseguida if self.with_dbSTAT: self.dbSTAT.massive_append_set(True) def write_logs(fich, pgn): with open(fich, "ab") as ferr: ferr.write(pgn) ferr.write("\n") codec = Util.file_encoding(ficheros[0]) sicodec = codec not in ("utf-8", "ascii") liRegs = [] stRegs = set() nRegs = 0 conexion = self._conexion cursor = self._cursor sql = "insert into games (XPV,EVENT,SITE,DATE,WHITE,BLACK,RESULT,ECO,WHITEELO,BLACKELO,PGN,PLIES) values (?,?,?,?,?,?,?,?,?,?,?,?);" liCabs = self.liCamposBase[:-1] # all except PLIES PGN, TAGS liCabs.append("PLYCOUNT") for fichero in ficheros: nomfichero = os.path.basename(fichero) fich_erroneos = os.path.join(VarGen.configuracion.carpetaTemporal(), nomfichero[:-3] + "errors.pgn") fich_duplicados = os.path.join(VarGen.configuracion.carpetaTemporal(), nomfichero[:-3] + "duplicates.pgn") dlTmp.pon_titulo(nomfichero) next_n = random.randint(100, 200) with LCEngine.PGNreader(fichero, self.depthStat()) as fpgn: for n, (pgn, pv, dCab, raw, liFens) in enumerate(fpgn, 1): if not pv: erroneos += 1 write_logs(fich_erroneos, pgn) else: fen = dCab.get("FEN", None) if fen and fen != ControlPosicion.FEN_INICIAL: erroneos += 1 else: xpv = pv2xpv(pv) if xpv in stRegs: dup = True else: cursor.execute("SELECT COUNT(*) FROM games WHERE XPV = ?", (xpv,)) num = cursor.fetchone()[0] dup = num > 0 if dup: duplicados += 1 write_logs(fich_duplicados, pgn) else: stRegs.add(xpv) if sicodec: for k, v in dCab.iteritems(): dCab[k] = unicode(v, encoding=codec, errors="ignore") if pgn: pgn = unicode(pgn, encoding=codec, errors="ignore") if raw: # si no tiene variantes ni comentarios, se graba solo las tags que faltan liRTags = [(k,v) for k, v in dCab.iteritems() if k not in liCabs] # k is always upper if liRTags: pgn = {} pgn["RTAGS"] = liRTags else: pgn = None event = dCab.get("EVENT", "") site = dCab.get("SITE", "") date = dCab.get("DATE", "") white = dCab.get("WHITE", "") black = dCab.get("BLACK", "") result = dCab.get("RESULT", "") eco = dCab.get("ECO", "") whiteelo = dCab.get("WHITEELO", "") blackelo = dCab.get("BLACKELO", "") plies = (pv.count(" ")+1) if pv else 0 if pgn: pgn = Util.var2blob(pgn) reg = (xpv, event, site, date, white, black, result, eco, whiteelo, blackelo, pgn, plies) if self.with_dbSTAT: self.dbSTAT.append_fen(pv, result, liFens) liRegs.append(reg) nRegs += 1 importados += 1 if nRegs == 10000: nRegs = 0 cursor.executemany(sql, liRegs) liRegs = [] stRegs = set() conexion.commit() if self.with_dbSTAT: self.dbSTAT.massive_append_set(False) self.dbSTAT.commit() self.dbSTAT.massive_append_set(True) if n == next_n: if time.time()-t1> 0.8: if not dlTmp.actualiza(erroneos+duplicados+importados, erroneos, duplicados, importados): break t1 = time.time() next_n = n + random.randint(100, 500) if liRegs: cursor.executemany(sql, liRegs) conexion.commit() dlTmp.actualiza(erroneos+duplicados+importados, erroneos, duplicados, importados) dlTmp.ponSaving() if self.with_dbSTAT: self.dbSTAT.massive_append_set(False) self.dbSTAT.commit() conexion.commit() dlTmp.ponContinuar()