Esempi in Python per SEG.set

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: smallseg

Classe/tipologia: SEG

Metodo/funzione: set

Esempi su hotexamples.com: 4

SEG.set in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per smallseg.SEG.set, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

cut(6)

SEG(5)

set(2)

Esempio n. 1

Mostra file

def get_data():
    '''
	Get the training and text datasets from local folds
	Positive and negative datasets were stored in different folds
	When loading the datasets , do sentences segmentation with smallseg tool
	'''
    posPath = '/home/zhouxc/skindetector/AdultWebsiteText/'
    negPath = '/home/zhouxc/skindetector/NormalWebsiteText/'
    posFiles = os.listdir(posPath)
    negFiles = os.listdir(negPath)

    trainingData = []
    seg = SEG()
    seg.set(dic)
    c = 0
    print '---------------------Read Positive DataSet-----------------'
    for fileName in posFiles:
        #if c > 100: break
        c += 1
        print "PositiveData" + str(c)
        path = posPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Positive'))
    print '---------------------Positive DataSet done-----------------'
    c = 0

    print '---------------------Read Negative DataSet-----------------'
    for fileName in negFiles:
        #if c > 100:	break
        c += 1
        print "NegativeData" + str(c)
        path = negPath + fileName
        data = seg.cut(open(path).read())
        text = [
            word.encode('utf-8') for word in data
            if word.encode('utf-8') in pornDict
        ]
        trainingData.append((text, 'Negative'))
    print '--------Negative DataSet  done-----------------------------------'

    return trainingData, trainingData

Esempio n. 2

Mostra file

File: textClassifier.py Progetto: GarfieldEr007/Adult-WebSite-Classifier

def get_data():
	'''
	Get the training and text datasets from local folds
	Positive and negative datasets were stored in different folds
	When loading the datasets , do sentences segmentation with smallseg tool
	'''
	posPath = '/home/zhouxc/skindetector/AdultWebsiteText/'
	negPath = '/home/zhouxc/skindetector/NormalWebsiteText/'
	posFiles = os.listdir(posPath)
	negFiles = os.listdir(negPath)

	trainingData = []
	seg = SEG()
	seg.set(dic)
	c = 0
	print '---------------------Read Positive DataSet-----------------'
	for fileName in posFiles:
		#if c > 100: break
		c += 1
		print "PositiveData" + str(c)
		path = posPath + fileName
		data = seg.cut(open(path).read())
		text = [word.encode('utf-8') for word in data if word.encode('utf-8') in pornDict]
		trainingData.append((text , 'Positive'))
	print '---------------------Positive DataSet done-----------------'
	c = 0
	
	print '---------------------Read Negative DataSet-----------------'
	for fileName in negFiles:
		#if c > 100:	break
		c += 1
		print "NegativeData" + str(c)
		path = negPath + fileName
		data = seg.cut(open(path).read())
		text = [word.encode('utf-8') for word in data if word.encode('utf-8')  in pornDict]
		trainingData.append((text , 'Negative'))
	print '--------Negative DataSet  done-----------------------------------'
	
	return trainingData  , trainingData

Esempio n. 3

Mostra file

File: pirrerank.py Progetto: swapnadesale/pmir

                    + str(i) \
                    + '''')" href="'''\
                    + url \
                    + '''" target="_blank"><font size="3">''' \
                    + arrowscript \
                    + title \
                    + '''</font></a><br /><font size="-1">''' \
                    + snippet \
                    + '''<br /><font color="#008000">''' \
                    + url \
                    + '''<br /></font></font></td></tr></table>\n'''
        pageStr += resultStr
        i += 1

    return pageStr


if __name__ == '__main__':
    #resultsList = ["我是中国人民的儿子", "你是我儿子", "中国人民万岁", "我永远是中国人民的儿子"]
    seg = SEG()
    #print 'Load dict...'
    words = "main.dic"
    seg.set(words)
    #print "Dict is OK."

    #print psudorerank(resultsList, 2)
    username = "******"
    engine = request.GET.get("engine", "")
    resultsTable = ResultInfoTable[engine]
    [query, pagecontent] = userFeedbackRerank(username, resultsTable, seg)

Esempio n. 4

Mostra file

File: benchmark.py Progetto: Nuos/lab

#encoding=utf-8
try:
    import psyco
    psyco.full()
except:
    pass

s3 = file("text.txt").read()
words = [x.rstrip() for x in file("main.dic") ]
from smallseg import SEG
seg = SEG()
print 'Load dict...'
seg.set(words)
print "Dict is OK."
from time import time

for i in xrange(1,101):
    start = time()
    for j in xrange(0,i):
        A = seg.cut(s3)
    cost = time()-start
    print i,"times, cost:",cost

print "********************************"