def addLoadedFiles(dbFname, fileNames): " given a sqlite db, create a table loadedFiles and add fileNames to it " fileNames = [(basename(x), ) for x in fileNames] # sqlite only accepts tuples, strip path con, cur = maxTables.openSqlite(dbFname, lockDb=True) cur.execute("CREATE TABLE IF NOT EXISTS loadedFiles (fname TEXT PRIMARY KEY);") con.commit() sql = "INSERT INTO loadedFiles (fname) VALUES (?)" cur.executemany(sql, list(fileNames)) con.commit()
def openArticleDb(datasetName): if datasetName in conCache: con, cur = conCache[datasetName] else: path = getArtDbPath(datasetName) if not isdir(path): return None, None logging.debug("Opening db %s" % path) con, cur = maxTables.openSqlite(path, asDict=True) conCache[datasetName] = (con,cur) return con, cur
def openArticleDb(datasetName): " open an article sqlite DB, return (conn, cur) tuple " if datasetName in conCache: con, cur = conCache[datasetName] else: path = getArtDbPath(datasetName) if path is None or not isfile(path): logging.error("Could not find %s" % path) return None, None logging.debug("Opening db %s" % path) con, cur = maxTables.openSqlite(path, asDict=True) conCache[datasetName] = (con,cur) return con, cur
def lookupArticleData(articleId, lookupKey="articleId"): " lookup article meta data for an article via a database " #conn = maxTables.hgSqlConnect(pubConf.mysqlDb, charset="utf8", use_unicode=True) #sql = "SELECT * from %s where articleId=%s" % (dataset, articleId) #rows = maxTables.sqlGetRows(conn,sql) if lookupKey=="pmid": dataset = "medline" elif lookupKey=="articleId": dataset = articleIdToDataset(articleId) assert(dataset!=None) textDir = join(pubConf.textBaseDir, dataset) if textDir not in conCache: dbPath = join(textDir, "articles.db") #assert(isfile(dbPath)) if not (isfile(dbPath)): return None cur, con = maxTables.openSqlite(dbPath, asDict=True) conCache[textDir] = (cur, con) else: cur, con = conCache[textDir] sql = "SELECT * from articles where %s=%s" % (lookupKey, articleId) rows = list(cur.execute(sql)) #assert(len(rows)==1) if len(rows)==0: #raise Exception("Could not find article %s in textDir %s" % (articleId, textDir)) logging.error("Could not find article %s in textDir %s" % (articleId, textDir)) return None articleData = rows[0] #authors = row["authors"] #author = author.split(",")[0]+" et al., "+row["journal"] #title = row["title"] #year = row["year"] #journal = row["journal"] #title = title.encode("latin1").decode("utf8") #text = '<small>%s (%s)</small><br><a href="%s">%s</a>' % (author, dataset, row["fulltextUrl"], title) return articleData
def getUnloadedFnames(dbFname, newFnames): """ given a sqlite db and a list of filenames, return those that have not been loaded yet into the db comparison looks only at basename of files """ con, cur = maxTables.openSqlite(dbFname) loadedFnames = [] try: for row in cur.execute("SELECT fname from loadedFiles"): loadedFnames.append(row[0]) except sqlite3.OperationalError: logging.debug("No loadedFiles table yet in %s" % dbFname) return newFnames #logging.debug("Files that have been loaded already: %s" % loadedFnames) # keep only filenames that haven't been loaded yet loadedFnames = set(loadedFnames) toLoadFnames = [] for newFname in newFnames: if basename(newFname) not in loadedFnames: toLoadFnames.append(newFname) #logging.debug("Files that have not been loaded yet: %s" % toLoadFnames) return toLoadFnames
def openArticleDb(datasetName): path = getArtDbPath(datasetName) con, cur = maxTables.openSqlite(path) con.row_factory = sqlite3.Row cur = con.cursor() return con, cur