Esempio n. 1
0
    def leerPGN(self, fichero, dlTmp):
        erroneos = duplicados = importados = n = 0

        t1 = time.time() - 0.7  # para que empiece enseguida

        next_n = random.randint(100, 200)

        codec = Util.file_encoding(fichero)
        sicodec = codec not in ("utf-8", "ascii")

        liRegs = []
        nRegs = 0

        conexion = self._conexion
        cursor = self._cursor

        cursor.execute("SELECT FEN FROM GAMES")
        liRows = cursor.fetchall()
        stRegs = set(row[0] for row in liRows)

        sql = "insert into GAMES (FEN,EVENT,SITE,DATE,WHITE,BLACK,RESULT,XPV,PGN,PLIES) values (?,?,?,?,?,?,?,?,?,?);"
        liCabs = self.liCamposBase[:-1]  # all except PLIES PGN, TAGS
        liCabs.append("PLYCOUNT")

        with LCEngine.PGNreader(fichero, 0) as fpgn:
            for n, (pgn, pv, dCab, raw, liFens) in enumerate(fpgn, 1):
                if "FEN" not in dCab:
                    erroneos += 1
                else:
                    fen = dCab["FEN"]
                    if fen in stRegs:
                        dup = True
                    else:
                        cursor.execute(
                            "SELECT COUNT(*) FROM GAMES WHERE FEN = ?",
                            (fen, ))
                        num = cursor.fetchone()[0]
                        dup = num > 0
                    if dup:
                        duplicados += 1
                    else:
                        stRegs.add(fen)
                        if sicodec:
                            for k, v in dCab.iteritems():
                                dCab[k] = unicode(v,
                                                  encoding=codec,
                                                  errors="ignore")
                            if pgn:
                                pgn = unicode(pgn,
                                              encoding=codec,
                                              errors="ignore")

                        event = dCab.get("EVENT", "")
                        site = dCab.get("SITE", "")
                        date = dCab.get("DATE", "")
                        white = dCab.get("WHITE", "")
                        black = dCab.get("BLACK", "")
                        result = dCab.get("RESULT", "")
                        plies = (pv.count(" ") + 1) if pv else 0
                        if pgn:
                            pgn = Util.var2blob(pgn)

                        xpv = pv2xpv(pv)

                        reg = (fen, event, site, date, white, black, result,
                               xpv, pgn, plies)
                        liRegs.append(reg)
                        nRegs += 1
                        importados += 1
                        if nRegs == 10000:
                            cursor.executemany(sql, liRegs)
                            liRegs = []
                            stRegs = set()
                            conexion.commit()
                if n == next_n:
                    if time.time() - t1 > 0.8:
                        if not dlTmp.actualiza(
                                erroneos + duplicados + importados, erroneos,
                                duplicados, importados):
                            break
                        t1 = time.time()
                    next_n = n + random.randint(100, 500)

        if liRegs:
            cursor.executemany(sql, liRegs)
            conexion.commit()

        dlTmp.actualiza(erroneos + duplicados + importados, erroneos,
                        duplicados, importados)
        dlTmp.ponSaving()

        conexion.commit()

        dlTmp.ponContinuar()
        self.lee_rowids()
Esempio n. 2
0
    def leerPGNs(self, ficheros, dlTmp):
        erroneos = duplicados = importados = n = 0

        t1 = time.time()-0.7  # para que empiece enseguida

        if self.with_dbSTAT:
            self.dbSTAT.massive_append_set(True)

        def write_logs(fich, pgn):
            with open(fich, "ab") as ferr:
                ferr.write(pgn)
                ferr.write("\n")

        codec = Util.file_encoding(ficheros[0])
        sicodec = codec not in ("utf-8", "ascii")

        liRegs = []
        stRegs = set()
        nRegs = 0

        conexion = self._conexion
        cursor = self._cursor

        sql = "insert into games (XPV,EVENT,SITE,DATE,WHITE,BLACK,RESULT,ECO,WHITEELO,BLACKELO,PGN,PLIES) values (?,?,?,?,?,?,?,?,?,?,?,?);"
        liCabs = self.liCamposBase[:-1] # all except PLIES PGN, TAGS
        liCabs.append("PLYCOUNT")

        for fichero in ficheros:
            nomfichero = os.path.basename(fichero)
            fich_erroneos = os.path.join(VarGen.configuracion.carpetaTemporal(), nomfichero[:-3] + "errors.pgn")
            fich_duplicados = os.path.join(VarGen.configuracion.carpetaTemporal(), nomfichero[:-3] + "duplicates.pgn")
            dlTmp.pon_titulo(nomfichero)
            next_n = random.randint(100, 200)
            with LCEngine.PGNreader(fichero, self.depthStat()) as fpgn:
                for n, (pgn, pv, dCab, raw, liFens) in enumerate(fpgn, 1):
                    if not pv:
                        erroneos += 1
                        write_logs(fich_erroneos, pgn)
                    else:
                        fen = dCab.get("FEN", None)
                        if fen and fen != ControlPosicion.FEN_INICIAL:
                            erroneos += 1
                        else:
                            xpv = pv2xpv(pv)
                            if xpv in stRegs:
                                dup = True
                            else:
                                cursor.execute("SELECT COUNT(*) FROM games WHERE XPV = ?", (xpv,))
                                num = cursor.fetchone()[0]
                                dup = num > 0
                            if dup:
                                duplicados += 1
                                write_logs(fich_duplicados, pgn)
                            else:
                                stRegs.add(xpv)
                                if sicodec:
                                    for k, v in dCab.iteritems():
                                        dCab[k] = unicode(v, encoding=codec, errors="ignore")
                                    if pgn:
                                        pgn = unicode(pgn, encoding=codec, errors="ignore")

                                if raw: # si no tiene variantes ni comentarios, se graba solo las tags que faltan
                                    liRTags = [(k,v) for k, v in dCab.iteritems() if k not in liCabs] # k is always upper
                                    if liRTags:
                                        pgn = {}
                                        pgn["RTAGS"] = liRTags
                                    else:
                                        pgn = None

                                event = dCab.get("EVENT", "")
                                site = dCab.get("SITE", "")
                                date = dCab.get("DATE", "")
                                white = dCab.get("WHITE", "")
                                black = dCab.get("BLACK", "")
                                result = dCab.get("RESULT", "")
                                eco = dCab.get("ECO", "")
                                whiteelo = dCab.get("WHITEELO", "")
                                blackelo = dCab.get("BLACKELO", "")
                                plies = (pv.count(" ")+1) if pv else 0
                                if pgn:
                                    pgn = Util.var2blob(pgn)

                                reg = (xpv, event, site, date, white, black, result, eco, whiteelo, blackelo, pgn, plies)
                                if self.with_dbSTAT:
                                    self.dbSTAT.append_fen(pv, result, liFens)
                                liRegs.append(reg)
                                nRegs += 1
                                importados += 1
                                if nRegs == 10000:
                                    nRegs = 0
                                    cursor.executemany(sql, liRegs)
                                    liRegs = []
                                    stRegs = set()
                                    conexion.commit()
                                    if self.with_dbSTAT:
                                        self.dbSTAT.massive_append_set(False)
                                        self.dbSTAT.commit()
                                        self.dbSTAT.massive_append_set(True)
                    if n == next_n:
                        if time.time()-t1> 0.8:
                            if not dlTmp.actualiza(erroneos+duplicados+importados, erroneos, duplicados, importados):
                                break
                            t1 = time.time()
                        next_n = n + random.randint(100, 500)

        if liRegs:
            cursor.executemany(sql, liRegs)
            conexion.commit()
        dlTmp.actualiza(erroneos+duplicados+importados, erroneos, duplicados, importados)
        dlTmp.ponSaving()

        if self.with_dbSTAT:
            self.dbSTAT.massive_append_set(False)
            self.dbSTAT.commit()
        conexion.commit()
        dlTmp.ponContinuar()