コード例 #1
0
ファイル: NLP_OTHER.py プロジェクト: hyw2/GAT
def top20_verbs(txt_name):
    nlp = dao.spacy_load_en()
    with open(txt_name, 'r') as myfile:
        article = myfile.read().replace('\n', '')
    results = []
    for token in nlp(article):
        if token.pos_ == 'VERB':
            results.append(token.lemma_)
    results = [
        e for e in results
        if e not in ['will', 'would', 'could', 'may', 'can']
    ]
    counts = Counter(results)
    labels, values = zip(*counts.items())
    indSort = np.argsort(values)[::-1]
    if len(indSort) > 20:
        indSort = indSort[:19]
    labels = np.array(labels)[indSort]
    values = np.array(values)[indSort]

    indexes = np.arange(len(labels))

    bar_width = 0.35
    plt.figure()
    plt.bar(indexes, values)

    # add labels
    plt.xticks(indexes + bar_width, labels, rotation=45)
    plt.show()
    plt.savefig("out/nlp/nlp_top20_verbs.png", dpi=100)
    return "out/nlp/nlp_top20_verbs.png"
コード例 #2
0
ファイル: Spacy_SVO.py プロジェクト: hyw2/GAT
 def __init__(self, language='english'):
     """
     Initialize
     """
     self.nlp = dao.spacy_load_en()
     self.sent_detector = data.load('tokenizers/punkt/english.pickle')
     self.analyzer = SentimentIntensityAnalyzer()  # for sentiment analysis
コード例 #3
0
ファイル: NLP_OTHER.py プロジェクト: hyw2/GAT
def top20_organizations(txt_name):
    nlp = dao.spacy_load_en()
    with open(txt_name, 'r') as myfile:
        article = myfile.read().replace('\n', '')
    parsed_phrase = nlp(article)
    results = []
    names = list(parsed_phrase.ents)
    for e in names:
        if e.label_ == 'ORG':
            results.append(e.text)
    counts = Counter(results)
    labels, values = zip(*counts.items())
    indSort = np.argsort(values)[::-1]
    if len(indSort) > 20:
        indSort = indSort[:19]
    labels = np.array(labels)[indSort]
    values = np.array(values)[indSort]

    indexes = np.arange(len(labels))

    bar_width = 0.35
    plt.figure()
    plt.bar(indexes, values)

    # add labels
    plt.xticks(indexes + bar_width, labels, rotation=90)
    plt.show()
    plt.savefig("out/nlp/top20_organizations.png", dpi=100)
    return "out/nlp/top20_organizations.png"
コード例 #4
0
ファイル: NLP_OTHER.py プロジェクト: hyw2/GAT
def lemmatize(txt_name):
    nlp = dao.spacy_load_en()
    with open(txt_name, 'r') as myfile:
        article = myfile.read().replace('\n', '')
    results = []
    for token in nlp(article):
        results.append(token.lemma_)
    return ' '.join(results)
コード例 #5
0
 def __init__(self, language='english'):
     """
     Initialize 
     """
     self.nlp = dao.spacy_load_en()
     self.sent_detector = data.load('tokenizers/punkt/english.pickle')
     self.analyzer = SentimentIntensityAnalyzer()  # for sentiment analysis
     self.keyverbs = list(
         pd.read_csv('gat/service/nlp_resources/KeyVerbs.csv')['key_verbs'])
コード例 #6
0
ファイル: smart_search_thread.py プロジェクト: hyw2/GAT
 def __init__(self,
              language='english',
              search_question='',
              article_count=0):
     super().__init__()
     self.messages = []
     self.messages_lock = threading.Lock()
     self.result = None
     self.result_lock = threading.Lock()
     self.result_ontology = None
     self.result_ontology_lock = threading.Lock()
     self._nlp = dao.spacy_load_en()
     self.__sent_detector = data.load('tokenizers/punkt/english.pickle')
     self.__analyzer = SentimentIntensityAnalyzer(
     )  # for sentiment analysis
     current_file_path = os.path.dirname(os.path.abspath(__file__))
     self.__keyverbs = list(
         pd.read_csv(os.path.join(current_file_path,
                                  'KeyVerbs.csv'))['key_verbs'])
     self.__allcities = list(
         pd.read_csv(os.path.join(current_file_path,
                                  'Allcities.csv'))['City'])
     self.__search_question = search_question
     self.__article_count = int(article_count)
コード例 #7
0
def top5accuracy(y_true, y_pred):
    correct=0
    for i in range(len(y_true)):
        if y_true[i] in y_pred[i]:
            correct=correct+1
    return correct*1.0/len(y_true)
                    



model=joblib.load('gat/CameoPrediction/model.pkl')
vector_rule=pd.read_csv('gat/CameoPrediction/vectorize_rules.txt', sep='	',header=None)
cameo_book=pd.read_csv('gat/CameoPrediction/CAMEO_code_new.csv')
top_words=list(pd.read_csv('gat/CameoPrediction/top_all_words_from_analysis.txt',sep='	',header=None).head(3000)[0])

nlp=dao.spacy_load_en()

def top5CAMEO(sentence):
    phrase=[e.lemma_ for e in nlp(sentence)]
    sentence_binary=np.zeros(3000,dtype=int)
    
    for i in range(len(top_words)):
        if top_words[i] in phrase:
            sentence_binary[i]=1
    sentence_binary=sentence_binary.reshape(1,-1)
    pred_ba=model.predict_proba(sentence_binary)
    pred_top5=top5pred(pred_ba)[0]
    cameo_top5=list(vector_rule[vector_rule[1].isin(pred_top5)][0])
    cameo_5=list(cameo_book[cameo_book['Code'].isin(cameo_top5)]['Move'])
    return cameo_5
コード例 #8
0
ファイル: spacy_nlp.py プロジェクト: hyw2/GAT
def loadModel(language):
    #Only loads english.
    #Loads SpaCy language model. Separate because computationally expensive.
    return dao.spacy_load_en()