Beispiel #1
0
def analyze(meta, dbPath=None, resultPath=None, verbose=False):
    meta = result.getMeta(meta)
    if dbPath == None:
        dbPath = settings.CGI_DB_PATH
    print "Analyzing", dbPath
    con = DB.connect(dbPath)
    result.sortFeatures(meta)
    features = meta["features"]
    count = 1
    numFeatures = len(features)
    nonSelected = []
    for featureName in features:
        if not isinstance(features[featureName], int):
            if verbose:
                print "Processing feature", featureName, str(count) + "/" + str(numFeatures)
            geneName = getGeneName(featureName)
            if geneName != None:
                mappings = getTermAnalysis(con, geneName, "disease")
                result.setValue(features[featureName], "CancerGeneIndex", mappings)
                mappings = getTermAnalysis(con, geneName, "drug")
                result.setValue(features[featureName], "CancerGeneDrug", mappings)
        else:
            geneName = getGeneName(featureName)
            if geneName != None:
                nonSelected.append(geneName)
        count += 1
    result.setValue(meta, "CancerGeneIndex", analyzeTermCoverage(features), "analysis")
    result.setValue(meta["analysis"], "non-selected", getCancerGeneCoverage(con, nonSelected), "CancerGeneIndex")
    if resultPath != None:
        result.saveMeta(meta, resultPath)
    return meta
def processGeneEntries(xmlFile, dbPath):
    con = DB.connect(dbPath)
    tableValuePaths = OrderedDict()
    inserts = {}
    for tableName in sorted(settings.CGI_TABLES.keys()):
        columns = getColumns(tableName)
        inserts[tableName] = DB.defineSQLInsert(tableName, columns)
        columnToElement = {v:k for k, v in settings.CGI_TABLES[tableName]["columns"].items()}
        valueElementPaths = []
        for i in range(len(columns)):
            valueElementPaths.append(columnToElement[columns[i][0]])
        tableValuePaths[tableName] = valueElementPaths
    for elem in iterparse(xmlFile, tag='GeneEntry'):
        for tableName in tableValuePaths:
            valuePaths = tableValuePaths[tableName]
            valueLists = []
            if "elements" in settings.CGI_TABLES[tableName]:
                listElemPath = settings.CGI_TABLES[tableName]["elements"]
                elemList = elem.findall(listElemPath)
            else:
                listElemPath = ""
                elemList = [elem]
            for listElem in elemList:
                values = []
                for valueElementPath in valuePaths:
                    valueElementPath = valueElementPath.replace(listElemPath, "").strip("/")
                    if len(valueElementPath) > 0:
                        if valueElementPath.startswith("../"):
                            valueElem = elem.find(valueElementPath.strip("../"))
                        else:
                            valueElem = listElem.find(valueElementPath)
                    else:
                        valueElem = listElem
                    if valueElem != None:
                        values.append(valueElem.text)
                    else:
                        values.append(None)
                #print inserts[tableName]
                #print values
                #con.execute(inserts[tableName], values)
                valueLists.append(values)
            if "preprocess" in settings.CGI_TABLES[tableName]:
                valueLists = settings.CGI_TABLES[tableName]["preprocess"](tableName, elem, valueLists)
            #print inserts[tableName]
            #print valueLists
            con.executemany(inserts[tableName], valueLists)
        print elem.find("HUGOGeneSymbol").text
    con.commit()
    con.close()
def initDB(dbPath, clear=True):
    # Initialize the database
    if clear and os.path.exists(dbPath):
        print "Removing existing database", dbPath
        os.remove(dbPath)
    if not os.path.exists(os.path.dirname(dbPath)):
        os.makedirs(os.path.dirname(dbPath))
    con = DB.connect(dbPath)
    
    for tableName in sorted(settings.CGI_TABLES.keys()):
        columns = getColumns(tableName)
        table = settings.CGI_TABLES[tableName]
        con.execute(DB.defineSQLTable(tableName, columns, table.get("primary_key", None)))
        if "indices" in table:
            DB.addIndices(con, tableName, table["indices"])
    return con
def makeCountTables(filename):
    con = DB.connect(filename)
    con.execute("""
    CREATE TABLE disease AS
    SELECT hugo_gene_symbol, matched_disease_term, nci_disease_concept_code, organism, COUNT(*) 
    AS term_count
    FROM sentence
    WHERE matched_disease_term IS NOT NULL
    GROUP BY hugo_gene_symbol, matched_disease_term, nci_disease_concept_code, organism 
    ORDER BY hugo_gene_symbol;
    """)
    DB.addIndices(con, "disease", ["hugo_gene_symbol"])
    con.execute("""
    CREATE TABLE drug AS
    SELECT hugo_gene_symbol, drug_term, nci_drug_concept_code, organism, COUNT(*) 
    AS term_count
    FROM sentence
    WHERE drug_term IS NOT NULL
    GROUP BY hugo_gene_symbol, drug_term, nci_drug_concept_code, organism 
    ORDER BY hugo_gene_symbol;
    """)
    DB.addIndices(con, "drug", ["hugo_gene_symbol"])
    con.commit()
    con.close()