Python loadLexの例

プログラミング言語: Python

名前空間/パッケージ名: fastFind

メソッド/関数: loadLex

hotexamples.comのコード掲載数: 18

Python loadLex - 18件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのfastFind.loadLexの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

0

ファイルを表示

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex, blackList
    lex = fastFind.loadLex(symFname)
    #result["geneCount"] = {}
    #result["allCount"] = {}
    blackList = set(open(pubConf.bncFname).read().splitlines()[:1000])

コード例 #2

0

ファイルを表示

ファイル: flankFinder.py プロジェクト: Moxikai/pubMunch

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex
    wordFname = paramDict["wordFname"]
    lex = fastFind.loadLex(join(dirname(wordFname), wordFname.split(".")[0]+".marshal.gz"))
    global blackList
    blackList = set(open(pubConf.bncFname).read().splitlines()[:10000])

コード例 #3

0

ファイルを表示

ファイル: symbolGeneProteinFinder.py プロジェクト: Moxikai/pubMunch

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex, blackList
    lex = fastFind.loadLex(symFname)
    #result["geneCount"] = {}
    #result["allCount"] = {}
    blackList = set(open(pubConf.bncFname).read().splitlines()[:1000])

コード例 #4

0

ファイルを表示

ファイル: diseaseSearch.py プロジェクト: strbean/pubMunch-BRCA

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     if "dict" not in paramDict:
         dictFname = "/hive/data/inside/pubs/geneDisease/diseaseDictionary/malacards/dictionary.marshal.gz"
     else:
         dictFname = paramDict["dict"]
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)

コード例 #5

0

ファイルを表示

ファイル: flankFinder.py プロジェクト: strbean/pubMunch-BRCA

def startup(paramDict, result):
    """ parse sym file into lex """
    global lex
    wordFname = paramDict["wordFname"]
    lex = fastFind.loadLex(
        join(dirname(wordFname),
             wordFname.split(".")[0] + ".marshal.gz"))
    global blackList
    blackList = set(open(pubConf.bncFname).read().splitlines()[:10000])

コード例 #6

0

ファイルを表示

ファイル: pubNlp.py プロジェクト: maximilianh/pubMunch

def findDiseases(text):
    """ find diseases in string and return as (start, end, diseaseName)
    >>> list(findDiseases("AlzhEImer's Disease"))
    [(0, 19, 'Alzheimer Disease')]
    """
    global disLex
    if disLex==None:
        disPath = join(pubConf.staticDataDir, "diseases", "diseases.marshal.gz")
        disLex = fastFind.loadLex(disPath)

    for (start, end, name) in fastFind.fastFind(text, disLex, toLower=True):
        yield start, end, name

コード例 #7

0

ファイルを表示

ファイル: pubNlp.py プロジェクト: maximilianh/pubMunch

def findDiseases(text):
    """ find diseases in string and return as (start, end, diseaseName)
    >>> list(findDiseases("AlzhEImer's Disease"))
    [(0, 19, 'Alzheimer Disease')]
    """
    global disLex
    if disLex == None:
        disPath = join(pubConf.staticDataDir, "diseases",
                       "diseases.marshal.gz")
        disLex = fastFind.loadLex(disPath)

    for (start, end, name) in fastFind.fastFind(text, disLex, toLower=True):
        yield start, end, name

コード例 #8

0

ファイルを表示

ファイル: pubNlp.py プロジェクト: maximilianh/pubMunch

def findDrugs(text):
    """ find drugs in string and return as (start, end, drugbankName)
    >>> list(findDrugs("Acetaminophen, Penicillin V and Herceptin."))
    [(0, 13, 'Acetaminophen'), (15, 27, 'Penicillin V'), (32, 41, 'Trastuzumab')]
    """
    global drugLex
    if drugLex==None:
        drugPath = join(pubConf.staticDataDir, "drugs", "drugbank.marshal.gz")
        drugLex = fastFind.loadLex(drugPath)

    for (start, end, name) in fastFind.fastFind(text, drugLex, toLower=True):
        if name.lower() in drugBlacklist:
            continue
        yield start, end, name

コード例 #9

0

ファイルを表示

ファイル: pubNlp.py プロジェクト: maximilianh/pubMunch

def findDrugs(text):
    """ find drugs in string and return as (start, end, drugbankName)
    >>> list(findDrugs("Acetaminophen, Penicillin V and Herceptin."))
    [(0, 13, 'Acetaminophen'), (15, 27, 'Penicillin V'), (32, 41, 'Trastuzumab')]
    """
    global drugLex
    if drugLex == None:
        drugPath = pubConf.getStaticFile("drugs", "drugbank.marshal.gz")
        drugLex = fastFind.loadLex(drugPath)

    for (start, end, name) in fastFind.fastFind(text, drugLex, toLower=True):
        if name.lower() in drugBlacklist:
            continue
        yield start, end, name

コード例 #10

0

ファイルを表示

ファイル: pubNlp.py プロジェクト: maximilianh/pubMunch

def findCells(text):
    """ find cell types
    >>> list(findCells("Oligodendrocytes and neural progenitors."))
    [(0, 16, 'oligodendrocyte')]
    """
    global cellLex
    dictFname = pubConf.getStaticFile("cellTypes", "cellTypes.marshal")
    if cellLex is None:
        cellLex = fastFind.loadLex(dictFname)

    for (start, end, name) in fastFind.fastFind(text.lower(), cellLex):
        if name.lower() in cellBlackList:
            continue
        yield start, end, name

コード例 #11

0

ファイルを表示

def startup(paramDict):
    """ parse file into lexicons """
    global lexes
    for fname in paramDict["fnames"].split(","):
        lexName = basename(fname).split(".")[0]
        lexes[lexName]=loadLex(fname)

    if "toLower" in paramDict:
        global toLower
        toLower = bool(int(paramDict["toLower"]))
        logging.info("toLower is %s" % toLower)

    if "reqStrings" in paramDict:
        global reqStrings
        reqStrings = paramDict["reqStrings"].split(",")

コード例 #12

0

ファイルを表示

ファイル: dictFinder.py プロジェクト: maximilianh/pubMunch

def startup(paramDict):
    """ parse file into lexicons """
    global lexes
    for fname in paramDict["fnames"].split(","):
        lexName = basename(fname).split(".")[0]
        lexes[lexName] = loadLex(fname)

    if "toLower" in paramDict:
        global toLower
        toLower = bool(int(paramDict["toLower"]))
        logging.info("toLower is %s" % toLower)

    if "reqStrings" in paramDict:
        global reqStrings
        reqStrings = paramDict["reqStrings"].split(",")

コード例 #13

0

ファイルを表示

ファイル: taxonSearch.py プロジェクト: Moxikai/pubMunch

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     dictFname = join(dirname(__file__), "data/speciesDict.marshal.gz")
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)

コード例 #14

0

ファイルを表示

ファイル: simpleGeneSearch.py プロジェクト: strbean/pubMunch-BRCA

def startup(paramDict):
    """ parse HUGO file into dict """
    global lex
    lex = loadLex(dataFname)

コード例 #15

0

ファイルを表示

ファイル: geneSearch.py プロジェクト: joepickrell/pubMunch

def startup(paramDict):
    """ parse HUGO file into dict """
    global lex
    lex = loadLex(dataFname)

コード例 #16

0

ファイルを表示

ファイル: geneFinder.py プロジェクト: maximilianh/pubMunch

def initData(markerTypes=None, exclMarkerTypes=None, addOptional=False):
    """ compile regexes and read filter files.
    
    MarkerTypes is the list of markers to prepare, some can be excluded with exclMarkerTypes

    In many applications, looking for dna sequences might not be desireable, as it requires
    a BLAT server which takes a lot of memory, in this case, you can switch off blatting by specifying
    exclMarkerTypes=["dnaSeq"]

    """
    # setup list of marker types as specified
    reDict = compileREs(addOptional)
    if markerTypes == None:
        markerTypes = set(reDict.keys())
        markerTypes.add("geneName")
        markerTypes.add("symbol")
        markerTypes.add("symbolMaybe")
        markerTypes.add("dnaSeq")

    if exclMarkerTypes != None:
        for m in exclMarkerTypes:
            markerTypes.remove(m)

    global searchTypes
    searchTypes = markerTypes

    global filterDict
    kwDictList = []
    for markerType in markerTypes:
        if markerType == "dnaSeq":
            continue
        # special case for long gene names
        if markerType == "geneName":
            global geneNameLex
            fname = join(GENEDATADIR, "geneNames.marshal.gz")
            logging.info("Loading %s" % fname)
            geneNameLex = fastFind.loadLex(fname)
            continue

        # special case for bands
        if markerType == "band":
            global bandToEntrezSyms
            #fname = join(GENEDATADIR, "bandToEntrez.marshal.gz")
            fname = join(GENEDATADIR, "bandGenes.tab")
            logging.info("Loading %s" % fname)
            #bandToEntrezSyms = marshal.loads(gzip.open(fname).read())
            bandToEntrezSyms = parseBands(fname)

        # special case for gene symbols
        if markerType == "symbol" or markerType == "symbolMaybe":
            global geneSymLex
            fname = join(GENEDATADIR, "symbols.marshal.gz")
            logging.info("Loading %s" % fname)
            geneSymLex = fastFind.loadLex(fname)

            global symLeftReqWords, symRightReqWords
            symLeftReqWords = readBestWords(join(GENEDATADIR, "left.tab"), 500)
            symRightReqWords = readBestWords(join(GENEDATADIR, "right.tab"),
                                             500)
            continue

        markerRe = reDict[markerType]
        kwDictList.append((markerType, markerRe))
        if markerType in requiresFilter:
            filterFname = os.path.join(DICTDIR, markerType + "Accs.txt.gz")
            #filterFname = pubGeneric.getFromCache(filterFname)
            logging.info("Opening %s" % filterFname)
            if not isfile(filterFname):
                logging.warn("Cannot filter %s accessions, %s not found" % \
                    (markerType, filterFname))
                filterDict[markerType] = None
                continue
            filterSet = pubKeyVal.openDb(filterFname)
            filterDict[markerType] = filterSet

    global markerDictList
    markerDictList = kwDictList
    logging.debug("Loaded marker dict for these types: %s" %
                  [x for x, y in markerDictList])

コード例 #17

0

ファイルを表示

ファイル: geneFinder.py プロジェクト: bylin/text-mining

def initData(markerTypes=None, exclMarkerTypes=None, addOptional=False):
    """ compile regexes and read filter files.
    
    MarkerTypes is the list of markers to prepare, some can be excluded with exclMarkerTypes

    In many applications, looking for dna sequences might not be desireable, as it requires
    a BLAT server which takes a lot of memory, in this case, you can switch off blatting by specifying
    exclMarkerTypes=["dnaSeq"]

    """
    # setup list of marker types as specified
    reDict = compileREs(addOptional)
    if markerTypes==None:
        markerTypes = set(reDict.keys())
        markerTypes.add("geneName")
        markerTypes.add("symbol")
        markerTypes.add("symbolMaybe")
        markerTypes.add("dnaSeq")

    if exclMarkerTypes!=None:
        for m in exclMarkerTypes:
            markerTypes.remove(m)

    global searchTypes
    searchTypes = markerTypes

    global filterDict
    kwDictList = []
    for markerType in markerTypes:
        if markerType=="dnaSeq":
            continue
        # special case for long gene names
        if markerType=="geneName":
            global geneNameLex
            fname = join(GENEDATADIR, "geneNames.marshal.gz")
            logging.info("Loading %s" % fname)
            geneNameLex = fastFind.loadLex(fname)
            continue

        # special case for bands
        if markerType=="band":
            global bandToEntrezSyms
            fname = join(GENEDATADIR, "bandToEntrez.marshal.gz")
            logging.info("Loading %s" % fname)
            bandToEntrezSyms = marshal.loads(gzip.open(fname).read())

        # special case for gene symbols
        if markerType=="symbol" or markerType=="symbolMaybe":
            global geneSymLex
            fname = join(GENEDATADIR, "symbols.marshal.gz")
            logging.info("Loading %s" % fname)
            geneSymLex = fastFind.loadLex(fname)

            global symLeftReqWords, symRightReqWords
            symLeftReqWords = readBestWords(join(GENEDATADIR, "left.tab"), 500)
            symRightReqWords = readBestWords(join(GENEDATADIR, "right.tab"), 500)
            continue

        markerRe = reDict[markerType]
        kwDictList.append((markerType, markerRe))
        if markerType in requiresFilter:
            #filterFname = os.path.join(DICTDIR, markerType+"b.gz")
            filterFname = os.path.join(DICTDIR, markerType+"Accs.txt.gz")
            #filterFname = pubGeneric.getFromCache(filterFname)
            logging.info("Opening %s" % filterFname)
            #filterSet = set(gzip.open(filterFname).read().splitlines())
            filterSet = pubKeyVal.openDb(filterFname)
            filterDict[markerType] = filterSet

    global markerDictList
    markerDictList = kwDictList
    logging.debug("Loaded marker dict for these types: %s" % [x for x,y in markerDictList])

コード例 #18

0

ファイルを表示

ファイル: taxonSearch.py プロジェクト: strbean/pubMunch-BRCA

 def startup(self, paramDict):
     """ parse dictioary of keywords """
     dictFname = join(dirname(__file__), "data/speciesDict.marshal.gz")
     logging.info("Reading %s" % dictFname)
     self.lex = fastFind.loadLex(dictFname)