Python stop3 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: stop

메소드/함수: stop3

hotexamples.com에서의 예제들: 3

Python stop3 - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 stop.stop3에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: word2vec_post.py 프로젝트: b01502102/Data-Science-Final-Project

cleanFileName = './%s/%s_%sData_clean.txt' % (person,person,dataType)

f = open(dataSegFileName,'r')
l = open(cleanFileName,'a')
count = 0
while True:
    line = f.readline()
    count += 1
    if  line == '':
        break
    if  count <= 194833:
        continue
    line = line.split()[1:]
    for term in line:
        try:
            if  stop.stop3(term.split('/')[1]):
                continue
        except:
            print count
            print 'ERROR'
            print term
            #quit()
        term = term.split('/')[0]
        l.write(term + ' ')
    l.write('\n')
f.close()
l.close()

quit()
word2vec.doc2vec(cleanFileName, fileName, cbow=0, size=50, window=10, negative=5, hs=0, sample='1e-4', threads=12, iter_=20, min_count=1, verbose=True)
word2vec.word2phrase(cleanFileName, fileName, verbose=True)

예제 #2

파일 보기

파일: LDA_post.py 프로젝트: b01502102/Data-Science-Final-Project

def LDA_INIT():
    global vocab,word_dict
    global model
    global id_list
    global topic_num
    global vocab
    global idToTitle_dict
    global word_dict
    global docTermMatrix
       
    ### Construct Vocabulary List
    vocab = []
    w = open(vocabFileName,'r')
    while True:
        line = w.readline()
        if  line == '':
            break
        
        try:
            line.split()[1]
        except:
            continue
        if  int(line.split()[1]) < termFreqLowBound:
            break
        line = line.split()[0]
        word_dict[line] = 0
    w.close()
    
    vocab = word_dict.keys()
    #############################
    ### Construct Title List
    w = open(cleanFileName,'r')
    while True:
        line = w.readline()
        if  line == '':
            break
        
        try:
            idToTitle_dict.append(line)
        except:
            continue
    w.close()
    #############################
    ### Construct Doc-Term Matrix
    f = open(dataSegFileName,'r')
    count = 0
    
    while not loadJson:
        line = f.readline()
        count += 1
        print count
        if  line == '':
            break
        
        ID = line.split()[0]
        id_list.append(ID)
        line = line.split()[1:]  
        reset_word_dict(word_dict)
        for term in line:
            termAttribute = term.split('/')[-1]
            if  stop.stop3(termAttribute):
                continue
            
            try:
                #print term
                term = term.split('/')[0]
                if  word_dict.has_key(term):
                    word_dict[term] += 1
                #else:
                    #print 'ERROR: word_dict does not have term'
            except:
                print 'ERROR: term error'
                print term
        
        vec = word_dict.values()
        docTermMatrix.append(vec)
    
    f.close()
    if  loadJson:
        jsonFileName = './%s/%s_LDA_Doc_Term_Matrix_%s.json' % (person,person,dataType)
        docTermMatrix = json.load(open(jsonFileName,'r'))
    
    if  writeJson:
        jsonFileName = './%s/%s_LDA_Doc_Term_Matrix_%s.json' % (person,person,dataType)
        json.dump(docTermMatrix, open(jsonFileName,'w'))
    #############################
    print 'LDA_INIT DONE!!!'

예제 #3

파일 보기

파일: vocab.py 프로젝트: b01502102/Data-Science-Final-Project

while True:
    line = f.readline()
    count += 1
    if  line == '':
        break
    line = line.split('\t')
    
    try:
        sentence = line[1].split()
    except:
        print count,sentence
        quit()
       
    for i in range(len(sentence)):
        termAttr = sentence[i].split('/')[-1]
        if  stop.stop3(termAttr):
            continue
        bigram = ''
        if  i is len(sentence)-1:
            word = sentence[i].split('/')[0]
            addVocab(word)
            vocab[word] += 1
        else:
            word = sentence[i].split('/')[0]
            
            try:
                if  bigram and i+1 != len(sentence) and not stop.stop3(sentence[i+1].split('/')[1]):
                    bigram = word + sentence[i+1].split('/')[0]
                    addVocab(bigram)
                    vocab[bigram] += 1
            except: