Exemple #1
0
def addLoadedFiles(dbFname, fileNames):
    " given a sqlite db, create a table loadedFiles and add fileNames to it "
    fileNames = [(basename(x), ) for x in fileNames] # sqlite only accepts tuples, strip path
    con, cur = maxTables.openSqlite(dbFname, lockDb=True)
    cur.execute("CREATE TABLE IF NOT EXISTS loadedFiles (fname TEXT PRIMARY KEY);")
    con.commit()
    sql = "INSERT INTO loadedFiles (fname) VALUES (?)"
    cur.executemany(sql, list(fileNames))
    con.commit()
Exemple #2
0
def openArticleDb(datasetName):
    if datasetName in conCache:
        con, cur = conCache[datasetName]
    else:
        path = getArtDbPath(datasetName)
        if not isdir(path):
            return None, None
        logging.debug("Opening db %s" % path)
        con, cur = maxTables.openSqlite(path, asDict=True)
        conCache[datasetName] = (con,cur)
    return con, cur
Exemple #3
0
def openArticleDb(datasetName):
    " open an article sqlite DB, return (conn, cur) tuple "
    if datasetName in conCache:
        con, cur = conCache[datasetName]
    else:
        path = getArtDbPath(datasetName)
        if path is None or not isfile(path):
            logging.error("Could not find %s" % path)
            return None, None
        logging.debug("Opening db %s" % path)
        con, cur = maxTables.openSqlite(path, asDict=True)
        conCache[datasetName] = (con,cur)
    return con, cur
Exemple #4
0
def lookupArticleData(articleId, lookupKey="articleId"):
    " lookup article meta data for an article via a database "
    #conn = maxTables.hgSqlConnect(pubConf.mysqlDb, charset="utf8", use_unicode=True)
    #sql = "SELECT * from %s where articleId=%s" % (dataset, articleId)
    #rows = maxTables.sqlGetRows(conn,sql) 
    if lookupKey=="pmid":
        dataset = "medline"
    elif lookupKey=="articleId":
        dataset = articleIdToDataset(articleId)
    assert(dataset!=None)
    textDir = join(pubConf.textBaseDir, dataset)

    if textDir not in conCache:
        dbPath = join(textDir, "articles.db")
        #assert(isfile(dbPath))
        if not (isfile(dbPath)):
            return None
        cur, con = maxTables.openSqlite(dbPath, asDict=True)
        conCache[textDir] = (cur, con)
    else:
        cur, con = conCache[textDir]
        
    sql = "SELECT * from articles where %s=%s" % (lookupKey, articleId)
    rows = list(cur.execute(sql))
    #assert(len(rows)==1)
    if len(rows)==0:
        #raise Exception("Could not find article %s in textDir %s" % (articleId, textDir))
        logging.error("Could not find article %s in textDir %s" % (articleId, textDir))
        return None
    articleData = rows[0]
    #authors = row["authors"]
    #author = author.split(",")[0]+" et al., "+row["journal"]
    #title = row["title"]
    #year = row["year"]
    #journal = row["journal"]
    #title = title.encode("latin1").decode("utf8")
    #text = '<small>%s (%s)</small><br><a href="%s">%s</a>' % (author, dataset, row["fulltextUrl"], title)
    return articleData
Exemple #5
0
def getUnloadedFnames(dbFname, newFnames):
    """ given a sqlite db and a list of filenames, return those that have not been loaded yet into the db 
    comparison looks only at basename of files 
    """
    con, cur = maxTables.openSqlite(dbFname)
    loadedFnames = []
    try:
        for row in cur.execute("SELECT fname from loadedFiles"):
            loadedFnames.append(row[0])
    except sqlite3.OperationalError:
        logging.debug("No loadedFiles table yet in %s" % dbFname)
        return newFnames
    #logging.debug("Files that have been loaded already: %s" % loadedFnames)

    # keep only filenames that haven't been loaded yet
    loadedFnames = set(loadedFnames)
    toLoadFnames = []
    for newFname in newFnames:
        if basename(newFname) not in loadedFnames:
            toLoadFnames.append(newFname)
            
    #logging.debug("Files that have not been loaded yet: %s" % toLoadFnames)
    return toLoadFnames
Exemple #6
0
def openArticleDb(datasetName):
    path = getArtDbPath(datasetName)
    con, cur = maxTables.openSqlite(path)
    con.row_factory = sqlite3.Row
    cur = con.cursor()
    return con, cur