Python loadLex Beispiele

Programmiersprache: Python

Namespace / Paketname: fastFind

Methode / Funktion: loadLex

Beispiele auf hotexamples.com: 18

Python loadLex - 18 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die fastFind.loadLex, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

0

Datei anzeigen

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex, blackList
    lex = fastFind.loadLex(symFname)
    #result["geneCount"] = {}
    #result["allCount"] = {}
    blackList = set(open(pubConf.bncFname).read().splitlines()[:1000])

Beispiel #2

0

Datei anzeigen

Datei: flankFinder.py Projekt: Moxikai/pubMunch

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex
    wordFname = paramDict["wordFname"]
    lex = fastFind.loadLex(join(dirname(wordFname), wordFname.split(".")[0]+".marshal.gz"))
    global blackList
    blackList = set(open(pubConf.bncFname).read().splitlines()[:10000])

Beispiel #3

0

Datei anzeigen

Datei: symbolGeneProteinFinder.py Projekt: Moxikai/pubMunch

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex, blackList
    lex = fastFind.loadLex(symFname)
    #result["geneCount"] = {}
    #result["allCount"] = {}
    blackList = set(open(pubConf.bncFname).read().splitlines()[:1000])

Beispiel #4

0

Datei anzeigen

Datei: diseaseSearch.py Projekt: strbean/pubMunch-BRCA

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     if "dict" not in paramDict:
         dictFname = "/hive/data/inside/pubs/geneDisease/diseaseDictionary/malacards/dictionary.marshal.gz"
     else:
         dictFname = paramDict["dict"]
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)

Beispiel #5

0

Datei anzeigen

Datei: flankFinder.py Projekt: strbean/pubMunch-BRCA

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex
    wordFname = paramDict["wordFname"]
    lex = fastFind.loadLex(
        join(dirname(wordFname),
             wordFname.split(".")[0] + ".marshal.gz"))
    global blackList
    blackList = set(open(pubConf.bncFname).read().splitlines()[:10000])

Beispiel #6

0

Datei anzeigen

Datei: pubNlp.py Projekt: maximilianh/pubMunch

def findDiseases(text):
    """ find diseases in string and return as (start, end, diseaseName)
    >>> list(findDiseases("AlzhEImer's Disease"))
    [(0, 19, 'Alzheimer Disease')]
    """
    global disLex
    if disLex==None:
        disPath = join(pubConf.staticDataDir, "diseases", "diseases.marshal.gz")
        disLex = fastFind.loadLex(disPath)

    for (start, end, name) in fastFind.fastFind(text, disLex, toLower=True):
        yield start, end, name

Beispiel #7

0

Datei anzeigen

Datei: pubNlp.py Projekt: maximilianh/pubMunch

def findDiseases(text):
    """ find diseases in string and return as (start, end, diseaseName)
    >>> list(findDiseases("AlzhEImer's Disease"))
    [(0, 19, 'Alzheimer Disease')]
    """
    global disLex
    if disLex == None:
        disPath = join(pubConf.staticDataDir, "diseases",
                       "diseases.marshal.gz")
        disLex = fastFind.loadLex(disPath)

    for (start, end, name) in fastFind.fastFind(text, disLex, toLower=True):
        yield start, end, name

Beispiel #8

0

Datei anzeigen

Datei: pubNlp.py Projekt: maximilianh/pubMunch

def findDrugs(text):
    """ find drugs in string and return as (start, end, drugbankName)
    >>> list(findDrugs("Acetaminophen, Penicillin V and Herceptin."))
    [(0, 13, 'Acetaminophen'), (15, 27, 'Penicillin V'), (32, 41, 'Trastuzumab')]
    """
    global drugLex
    if drugLex==None:
        drugPath = join(pubConf.staticDataDir, "drugs", "drugbank.marshal.gz")
        drugLex = fastFind.loadLex(drugPath)

    for (start, end, name) in fastFind.fastFind(text, drugLex, toLower=True):
        if name.lower() in drugBlacklist:
            continue
        yield start, end, name

Beispiel #9

0

Datei anzeigen

Datei: pubNlp.py Projekt: maximilianh/pubMunch

def findDrugs(text):
    """ find drugs in string and return as (start, end, drugbankName)
    >>> list(findDrugs("Acetaminophen, Penicillin V and Herceptin."))
    [(0, 13, 'Acetaminophen'), (15, 27, 'Penicillin V'), (32, 41, 'Trastuzumab')]
    """
    global drugLex
    if drugLex == None:
        drugPath = pubConf.getStaticFile("drugs", "drugbank.marshal.gz")
        drugLex = fastFind.loadLex(drugPath)

    for (start, end, name) in fastFind.fastFind(text, drugLex, toLower=True):
        if name.lower() in drugBlacklist:
            continue
        yield start, end, name

Beispiel #10

0

Datei anzeigen

Datei: pubNlp.py Projekt: maximilianh/pubMunch

def findCells(text):
    """ find cell types
    >>> list(findCells("Oligodendrocytes and neural progenitors."))
    [(0, 16, 'oligodendrocyte')]
    """
    global cellLex
    dictFname = pubConf.getStaticFile("cellTypes", "cellTypes.marshal")
    if cellLex is None:
        cellLex = fastFind.loadLex(dictFname)

    for (start, end, name) in fastFind.fastFind(text.lower(), cellLex):
        if name.lower() in cellBlackList:
            continue
        yield start, end, name

Beispiel #11

0

Datei anzeigen

def startup(paramDict):
    """ parse file into lexicons """
    global lexes
    for fname in paramDict["fnames"].split(","):
        lexName = basename(fname).split(".")[0]
        lexes[lexName]=loadLex(fname)

    if "toLower" in paramDict:
        global toLower
        toLower = bool(int(paramDict["toLower"]))
        logging.info("toLower is %s" % toLower)

    if "reqStrings" in paramDict:
        global reqStrings
        reqStrings = paramDict["reqStrings"].split(",")

Beispiel #12

0

Datei anzeigen

Datei: dictFinder.py Projekt: maximilianh/pubMunch

def startup(paramDict):
    """ parse file into lexicons """
    global lexes
    for fname in paramDict["fnames"].split(","):
        lexName = basename(fname).split(".")[0]
        lexes[lexName] = loadLex(fname)

    if "toLower" in paramDict:
        global toLower
        toLower = bool(int(paramDict["toLower"]))
        logging.info("toLower is %s" % toLower)

    if "reqStrings" in paramDict:
        global reqStrings
        reqStrings = paramDict["reqStrings"].split(",")

Beispiel #13

0

Datei anzeigen

Datei: taxonSearch.py Projekt: Moxikai/pubMunch

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     dictFname = join(dirname(__file__), "data/speciesDict.marshal.gz")
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)

Beispiel #14

0

Datei anzeigen

Datei: simpleGeneSearch.py Projekt: strbean/pubMunch-BRCA

def startup(paramDict):
    """ parse HUGO file into dict """
    global lex
    lex = loadLex(dataFname)

Beispiel #15

0

Datei anzeigen

Datei: geneSearch.py Projekt: joepickrell/pubMunch

def startup(paramDict):
    """ parse HUGO file into dict """
    global lex
    lex = loadLex(dataFname)

Beispiel #16

0

Datei anzeigen

Datei: geneFinder.py Projekt: maximilianh/pubMunch

def initData(markerTypes=None, exclMarkerTypes=None, addOptional=False):
    """ compile regexes and read filter files.
    
    MarkerTypes is the list of markers to prepare, some can be excluded with exclMarkerTypes

    In many applications, looking for dna sequences might not be desireable, as it requires
    a BLAT server which takes a lot of memory, in this case, you can switch off blatting by specifying
    exclMarkerTypes=["dnaSeq"]

    """
    # setup list of marker types as specified
    reDict = compileREs(addOptional)
    if markerTypes == None:
        markerTypes = set(reDict.keys())
        markerTypes.add("geneName")
        markerTypes.add("symbol")
        markerTypes.add("symbolMaybe")
        markerTypes.add("dnaSeq")

    if exclMarkerTypes != None:
        for m in exclMarkerTypes:
            markerTypes.remove(m)

    global searchTypes
    searchTypes = markerTypes

    global filterDict
    kwDictList = []
    for markerType in markerTypes:
        if markerType == "dnaSeq":
            continue
        # special case for long gene names
        if markerType == "geneName":
            global geneNameLex
            fname = join(GENEDATADIR, "geneNames.marshal.gz")
            logging.info("Loading %s" % fname)
            geneNameLex = fastFind.loadLex(fname)
            continue

        # special case for bands
        if markerType == "band":
            global bandToEntrezSyms
            #fname = join(GENEDATADIR, "bandToEntrez.marshal.gz")
            fname = join(GENEDATADIR, "bandGenes.tab")
            logging.info("Loading %s" % fname)
            #bandToEntrezSyms = marshal.loads(gzip.open(fname).read())
            bandToEntrezSyms = parseBands(fname)

        # special case for gene symbols
        if markerType == "symbol" or markerType == "symbolMaybe":
            global geneSymLex
            fname = join(GENEDATADIR, "symbols.marshal.gz")
            logging.info("Loading %s" % fname)
            geneSymLex = fastFind.loadLex(fname)

            global symLeftReqWords, symRightReqWords
            symLeftReqWords = readBestWords(join(GENEDATADIR, "left.tab"), 500)
            symRightReqWords = readBestWords(join(GENEDATADIR, "right.tab"),
                                             500)
            continue

        markerRe = reDict[markerType]
        kwDictList.append((markerType, markerRe))
        if markerType in requiresFilter:
            filterFname = os.path.join(DICTDIR, markerType + "Accs.txt.gz")
            #filterFname = pubGeneric.getFromCache(filterFname)
            logging.info("Opening %s" % filterFname)
            if not isfile(filterFname):
                logging.warn("Cannot filter %s accessions, %s not found" % \
                    (markerType, filterFname))
                filterDict[markerType] = None
                continue
            filterSet = pubKeyVal.openDb(filterFname)
            filterDict[markerType] = filterSet

    global markerDictList
    markerDictList = kwDictList
    logging.debug("Loaded marker dict for these types: %s" %
                  [x for x, y in markerDictList])

Beispiel #17

0

Datei anzeigen

Datei: geneFinder.py Projekt: bylin/text-mining

def initData(markerTypes=None, exclMarkerTypes=None, addOptional=False):
    """ compile regexes and read filter files.
    
    MarkerTypes is the list of markers to prepare, some can be excluded with exclMarkerTypes

    In many applications, looking for dna sequences might not be desireable, as it requires
    a BLAT server which takes a lot of memory, in this case, you can switch off blatting by specifying
    exclMarkerTypes=["dnaSeq"]

    """
    # setup list of marker types as specified
    reDict = compileREs(addOptional)
    if markerTypes==None:
        markerTypes = set(reDict.keys())
        markerTypes.add("geneName")
        markerTypes.add("symbol")
        markerTypes.add("symbolMaybe")
        markerTypes.add("dnaSeq")

    if exclMarkerTypes!=None:
        for m in exclMarkerTypes:
            markerTypes.remove(m)

    global searchTypes
    searchTypes = markerTypes

    global filterDict
    kwDictList = []
    for markerType in markerTypes:
        if markerType=="dnaSeq":
            continue
        # special case for long gene names
        if markerType=="geneName":
            global geneNameLex
            fname = join(GENEDATADIR, "geneNames.marshal.gz")
            logging.info("Loading %s" % fname)
            geneNameLex = fastFind.loadLex(fname)
            continue

        # special case for bands
        if markerType=="band":
            global bandToEntrezSyms
            fname = join(GENEDATADIR, "bandToEntrez.marshal.gz")
            logging.info("Loading %s" % fname)
            bandToEntrezSyms = marshal.loads(gzip.open(fname).read())

        # special case for gene symbols
        if markerType=="symbol" or markerType=="symbolMaybe":
            global geneSymLex
            fname = join(GENEDATADIR, "symbols.marshal.gz")
            logging.info("Loading %s" % fname)
            geneSymLex = fastFind.loadLex(fname)

            global symLeftReqWords, symRightReqWords
            symLeftReqWords = readBestWords(join(GENEDATADIR, "left.tab"), 500)
            symRightReqWords = readBestWords(join(GENEDATADIR, "right.tab"), 500)
            continue

        markerRe = reDict[markerType]
        kwDictList.append((markerType, markerRe))
        if markerType in requiresFilter:
            #filterFname = os.path.join(DICTDIR, markerType+"b.gz")
            filterFname = os.path.join(DICTDIR, markerType+"Accs.txt.gz")
            #filterFname = pubGeneric.getFromCache(filterFname)
            logging.info("Opening %s" % filterFname)
            #filterSet = set(gzip.open(filterFname).read().splitlines())
            filterSet = pubKeyVal.openDb(filterFname)
            filterDict[markerType] = filterSet

    global markerDictList
    markerDictList = kwDictList
    logging.debug("Loaded marker dict for these types: %s" % [x for x,y in markerDictList])

Beispiel #18

0

Datei anzeigen

Datei: taxonSearch.py Projekt: strbean/pubMunch-BRCA

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     dictFname = join(dirname(__file__), "data/speciesDict.marshal.gz")
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)