def setUp(self): self.max = 5 self.api = Datamuse() _api_url = 'https://api.datamuse.com/words' _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, '?sl=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_rhy=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_nry=orange?max=5'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?ml=ringing+in+the+ears'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?s=por&max=3'), json=response_json, status=200)
def generate_word(self): """Generates a word to use in the game""" topics = [ "animals", "plants", "buildings", "places", "farming", "school", "food" ] api = Datamuse() result = api.words(topics=choice(topics)) words = [res["word"] for res in result] return choice(list(filter(lambda x: (len(x) > 5), words)))
def sologame(): global guess guess = [] # from https://pypi.python.org/pypi/RandomWords/0.1.5 rw = RandomWords() hint = rw.random_word() # from https://www.datamuse.com/api/ api = Datamuse() foo_complete = api.words(ml=hint, max=4) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values wordList = set() level = 0 if score > 5: level = 2 for i in values: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in val: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in v: if 3+level < len(i) < 7+level and i.isalpha(): pass wordList.add(i) wordList = list(wordList) if len(wordList) < 2: sologame() while len(wordList) > 3: wordList.pop() soloB = screen.Solo.game(wordList) try: main(wordList, soloB, hint, "solo") except: sologame()
def game(): rw = RandomWords() hint = rw.random_word() api = Datamuse() foo_complete = api.words(ml=hint, max=3) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values words = set() for i in values: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in val: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in v: if 3 < len(i) < 8 and i.isalpha(): pass words.add(i) words = list(words) if len(words) < 1: game() global guess guess = [] try: return words[0], hint except: game()
def __init__(self, acronym: str): self.acronym = acronym api = Datamuse() dictionary: Dict[str, List[str]] = {} splitted = self.splitting_pattern.findall(acronym) self.length = len(splitted) for start in splitted: res = api.words(sp='{}*'.format(start)) dictionary[start] = [obj['word'] for obj in res] self.grammar = Grammar(dictionary) self.grammar.add_modifiers(base_english) self.rule = '#{}.capitalize#'.format('.capitalize# #'.join(splitted))
def setUp(self): self.api = Datamuse() self.max = 5
import random from datamuse import Datamuse api = Datamuse() default_syllables = 8 default_topics = [ 'cars', 'movies', 'books', 'music', 'people', 'earth', 'buildings', 'games', 'animals', 'fruits', 'vegetables', 'adventures', 'planets', 'government', 'enlightenment', 'peace', 'gangs', 'war', 'soul', 'religion', 'politics' ] common_words = [ 'the',
def __init__(self): self.api = Datamuse() self.grammar = Grammar({}) self.grammar.add_modifiers(base_english)
def __init__(self, input_texts: str): nltk.download('punkt') self.nlp = spacy.load('en_core_web_lg') self.summarizer = LsaSummarizer(Stemmer('english')) self.summarizer.stop_words = get_stop_words('english') self.cleaner = CleaningProcessor() self.synonyms: Dict[str, Optional[List[str]]] = {} if path.isfile('src/syns.yaml'): with open('src/syns.yaml', 'r') as f: self.synonyms = yaml.safe_load(f) if self.synonyms is None: self.synonyms = {} self.patterns: Dict[str, str] = OrderedDict() self.rev_patterns: Dict[str, str] = OrderedDict() with open('src/spreadr_shreddr/data.yaml', 'r') as f: data = yaml.safe_load(f) self.patterns.update(data['shorten']) self.patterns.update(data['expand']) data['filler'].extend( pycorpora.get_file('humans', 'prefixes')['prefixes']) self.patterns.update({k: '' for k in data['filler']}) for obj in pycorpora.get_file('words', 'compounds')['compounds']: key = '{} {}'.format(obj['firstWord'], obj['secondWord']) if key not in self.patterns: self.patterns[key] = obj['compoundWord'] self.patterns.update( {k.capitalize(): v.capitalize() for k, v in self.patterns.items()}) self.brits = data['brit_am'] self.murcans = {v: k for k, v in self.brits.items()} changed = False api = Datamuse() for text in input_texts: text >>= self.cleaner for sent in sent_tokenize(text): for index, word in enumerate(self.nlp(sent)): orth = word.orth_.lower() key = self.separator.join((orth, word.tag_)) if key not in self.synonyms: changed = True syns: List[str] = [] if (word.pos_ in UNIVERSAL_TO_DATAMUSE and len(wn.synsets(orth)) <= 1): res = api.words(ml=orth) if len(res) > 0: syns = self._get_synonyms( ' '.join(sent), (index, word), res) if len(syns) > 1: self.synonyms[key] = syns else: self.synonyms[key] = None if changed: changed = False with open('src/syns.yaml', 'a') as f: f.write(yaml.dump({key: self.synonyms[key]}))
import unittest import datamuse from datamuse import Datamuse from glove import loadGloveModel import Queue import unicodedata from tsne import projectWordsNoFile from tsne import projectWordsNoFileDoubleProject import numpy as np dm = Datamuse() model,vectors,keys = loadGloveModel("../data/glove.6B.50d.txt") # print keys[0:100] cryptic = ['mother','ordered','sail','fabrics','materials',"mater", "they", "trade", "in", "french","sea","songs","merchants","mer","chants", "chew","honeydew","fruit","melon","lemon", "lap", "dancing", "friend", "pal", "outlaw", "leader", "managing", "money","banking","ban","king" "beheaded", "celebrity", "is", "sailor", "tar","star", "challenging", "sweetheart", "heartlessly", "daring","darling", "found", "ermine", "deer", "hides", "damaged","undermined" ] crypticKeys = [] crypticVectors = []
from file_manager import File_manager from naive import Naive from datamuse import Datamuse import asyncio TRAIN_FILENAME = "train_data.tsv" TEST_FILENAME = "data_estag_ds.tsv" if __name__ == '__main__': filemng = File_manager() train_instances = filemng.get_instances_of_file(TRAIN_FILENAME) test_instances = filemng.get_instances_of_file(TEST_FILENAME) nv = Naive() dic = filemng.read_dictionary() dtmuse = Datamuse() loop = asyncio.get_event_loop() category_synonymous = loop.run_until_complete( dtmuse.get_synonymous('smartphone')) loop.close() category_synonymous = eval(category_synonymous) nv.naive_bayes(train_instances, dic, test_instances, category_synonymous)