def graph(): file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"), ("all files", "*.*"))) f = open(file) raw = f.read() sentences = nltk.sent_tokenize(raw) command = 'download' arguments = ['fasttext-social-network-model'] if command == 'download': downloader = DataDownloader() for filename in arguments: if filename not in AVAILABLE_FILES: raise ValueError(f'Unknown package: {filename}') source, destination = AVAILABLE_FILES[filename] destination_path: str = os.path.join(DATA_BASE_PATH, destination) if os.path.exists(destination_path): continue downloader.download(source=source, destination=destination) else: raise ValueError('Unknown command') import dostoevsky from dostoevsky.tokenization import RegexTokenizer from dostoevsky.models import FastTextSocialNetworkModel tokenizer = RegexTokenizer() tokens = tokenizer.split( 'всё очень плохо') # [('всё', None), ('очень', None), ('плохо', None)] model = FastTextSocialNetworkModel(tokenizer=tokenizer) messages = sentences results = model.predict(messages, k=2) for message, sentiment in zip(messages, results): positive_values_all = [ sentiment.get('positive') for message, sentiment in zip(messages, results) ] positive_values = [ 0.0 if value == None else value for value in positive_values_all ] negative_values_all = [ sentiment.get('negative') for message, sentiment in zip(messages, results) ] negative_values = [ 0.0 if value == None else value for value in negative_values_all ] summary = (len(negative_values)) n_value = np.array(negative_values) p_value = np.array(positive_values) counts_value = np.arange(summary) plt.plot(counts_value, p_value, n_value) plt.show()
def init_dostoevsky(): global MODEL downloader = DataDownloader() for filename in ['vk-embeddings', 'cnn-social-network-model']: source, destination = AVAILABLE_FILES[filename] destination_path = os.path.join(DATA_BASE_PATH, destination) if os.path.exists(destination_path): continue downloader.download(source=source, destination=destination) tokenizer = UDBaselineTokenizer() word_vectors_container = SocialNetworkWordVectores() MODEL = SocialNetworkModel( tokenizer=tokenizer, word_vectors_container=word_vectors_container, lemmatize=False, )
def begin(): file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"), ("all files", "*.*"))) f = open(file) raw = f.read() sentences = nltk.sent_tokenize(raw) command = 'download' arguments = ['fasttext-social-network-model'] if command == 'download': downloader = DataDownloader() for filename in arguments: if filename not in AVAILABLE_FILES: raise ValueError(f'Unknown package: {filename}') source, destination = AVAILABLE_FILES[filename] destination_path: str = os.path.join(DATA_BASE_PATH, destination) if os.path.exists(destination_path): continue downloader.download(source=source, destination=destination) else: raise ValueError('Unknown command') tokenizer = RegexTokenizer() tokens = tokenizer.split( 'всё очень плохо') # [('всё', None), ('очень', None), ('плохо', None)] model = FastTextSocialNetworkModel(tokenizer=tokenizer) messages = sentences results = model.predict(messages, k=2) for message, sentiment in zip(messages, results): analysis_line = '\n', message, '\n', '->', '\n', sentiment, '\n' text.insert(END, analysis_line)
def data_downloader(): return DataDownloader()
import os import sys import typing from dostoevsky.data import DataDownloader, DATA_BASE_PATH, AVAILABLE_FILES if __name__ == '__main__': if '--dry-run' in sys.argv: sys.exit(0) command: str = sys.argv[1] arguments: typing.List[str] = sys.argv[2:] if command == 'download': downloader = DataDownloader() for filename in arguments: if filename not in AVAILABLE_FILES: raise ValueError(f'Unknown package: {filename}') source, destination = AVAILABLE_FILES[filename] destination_path: str = os.path.join(DATA_BASE_PATH, destination) if os.path.exists(destination_path): continue downloader.download(source=source, destination=destination) else: raise ValueError('Unknown command')
def download_dostoevsky_data(): downloader = DataDownloader() filename = 'fasttext-social-network-model' source, destination = AVAILABLE_FILES[filename] downloader.download(source=source, destination=destination)