Python Vectors.classifier 예제들

프로그래밍 언어: Python

클래스/타입: Vectors

메소드/함수: classifier

hotexamples.com에서의 예제들: 2

Python Vectors.classifier - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Vectors.classifier 패키지로부터 imagej_macros_and_scripts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

V4D(10)

Coord(5)

Vector(2)

Vector3d(2)

addVects(2)

classifier(2)

URM(1)

compute_feature_vector(1)

예제 #1

파일 보기

파일: QAsystem.py 프로젝트: RunqianHuang/NLP-Project3

def main():
    dev = load()
    dev_inner = dev["data"]
    result = {}
    classifier = V.classifier()

    for documents in dev_inner:
        for paragraph in documents['paragraphs']:
            pos_dict = {}
            word_dict = {}
            entities = {}
            entity_dict = {}
            para = normalize(paragraph['context'])
            tokens = para.encode("utf-8").split()
            #encodetoken=[]
            #for x in tokens:
            #    encodetoken.append(x.encode("utf-8"))
            tagged = nltk.pos_tag(nltk.word_tokenize(paragraph['context']))
            #tagged = nltk.pos_tag(tokens)
            named_entities = nltk.chunk.ne_chunk(tagged)
            tags = named_entities.pos()
            for t in tags:
                word = t[0][0]
                if t[0][1] not in pos_dict:
                    pos_dict[t[0][1]] = []
                    pos_dict[t[0][1]].append(t[0][0])
                elif t[0][0] not in pos_dict[t[0][1]]:
                    pos_dict[t[0][1]].append(t[0][0])
                if t[0][0] not in word_dict:
                    word_dict[t[0][0]] = []
                    word_dict[t[0][0]].append(t[0][1])
                elif t[0][1] not in word_dict[t[0][0]]:
                    word_dict[t[0][0]].append(t[0][1])
                if t[1] not in entities:
                    entities[t[1]] = []
                    entities[t[1]].append(t[0][0])
                elif t[0][0] not in entities[t[1]]:
                    entities[t[1]].append(t[0][0])
                if word not in entity_dict:
                    entity_dict[word] = []
                    entity_dict[word].append(t[1])
                elif t[1] not in entity_dict[word]:
                    entity_dict[word].append(t[1])

            # for e in entities:
            #     print e
            #     print entities[e]

            paragraph_tokens=paragraph['context'].split() ##get tokens from context
            numofsen=0  ##number of sentences
            sentence={}
            sentence[numofsen] = ([])
            for x in paragraph_tokens:
                sentence[numofsen].append(normalize(x))
                if '.'in x or ','in x or '?'in x or '!'in x or ';'in x or ':'in x:  ##if reach any punctuation, sign as the end of the sentence, or the sentence will be very long
                    numofsen+=1
                    sentence[numofsen] = ([])

            sentence_vectors = []
            for i in range(0,numofsen):
                sentence_vectors.append(classifier.create_avg_vector(sentence[i]))

            for qa in paragraph['qas']: ##for each question, compute the unigram overlap
                tag= "OTHER"
                question = qa['question'].lower()
                if "what time" in question or "which time" in question or "what year" in question or "which year" in question or "what century" in question or "which century" in question or "what month" in question or "which month" in question or "what decade" in question or "which decade" in question:
                    tag = "TIM"
                elif "what place" in question or "which place" in question or "what area" in question or "which area" in question or "what town" in question or "which town" in question or "what state" in question or  "which state" in question or "what city" in question or "which city" in question or "what country" in question or "which country" in question:
                    tag = "LOC"
                elif "what person" in question or "which person" in question:
                    tag = "PER"
                elif "what" in question or "which" in question:
                    tag = "OTHER1"
                #check for anytime what/how comes before when, tag as OTHER
                #check for "on what" tag as LOC
                #check for what followed by a LOC/PER/TIM tag
                #check for "what time" tag as TIM
                elif "where" in question:
                    tag = "LOC"
                elif "when" in question:
                    tag = "TIM"
                elif "who" in question:
                    #covers whom/whose as well
                    tag = "PER"
                else:
                    tag = "OTHER2"

                maxsim = -1 ##maximum of the similarity
                maxnum = 0 ##the sentence with the maximal similarity
                qa_tokens=normalize(qa['question']).split()
                qa_vector = classifier.create_avg_vector(qa_tokens)
                for i in range(0,numofsen):
                    total=0
                    total= classifier.cosine_similarity(sentence_vectors[i], qa_vector)##compute the similarity
                    # print total
                    if total>=maxsim: ##find the maximum similarity
                        #print ("in if", i, total)
                        maxsim=total
                        maxnum=i
                better_qa(tag, sentence[maxnum], numofsen, qa['id'], qa['question'], entities, word_dict, result)
        store(result) ##write the answers to json file

예제 #2

파일 보기

    with open('findsentence.json', 'w') as json_file:
        json_file.write(json.dumps(data))

##transform json file to dictionary
def load():
    with open('training.json') as json_file:
        data = json.load(json_file)
        return data

if __name__ == "__main__":

    data = {}
    data = load()
    dataset = data['data']
    result = {}
    classifier = V.classifier()
    ##here we compute unigram overlap, because the order of the words in the questions are often different from contexts, unigram is more reliable
    for article in dataset:
        for paragraph in article['paragraphs']:
            paragraph_tokens=paragraph['context'].split() ##get tokens from context
            numofsen=0  ##number of sentences
            sentence={}
            sentence[numofsen] = ([])
            for x in paragraph_tokens:
                sentence[numofsen].append(normalize(x))
                if '.'in x or ',' in x or '?' in x or '!' in x or ';' in x or ':' in x:  ##sign as the end of the sentence
                    numofsen+=1
                    sentence[numofsen] = ([])

            #print('start_classifier')