class AcronymProcessor(Processor): splitting_pattern = re.compile(r'[A-Z][^A-Z]*') def __init__(self): self.api = Datamuse() self.grammar = Grammar({}) self.grammar.add_modifiers(base_english) def process_text(self, input_text: str, **kwargs) -> str: topics = kwargs.get('topics', None) splitted = self.splitting_pattern.findall(input_text) dictionary: Dict[str, List[str]] = {} for start in (x for x in splitted if x not in self.grammar.symbols): if topics is None: res = self.api.words(sp='{}*'.format(start), max=1000) else: res = self.api.words(sp='{}*'.format(start), topics=topics, max=1000) dictionary[start] = [obj['word'] for obj in res] for k, v in dictionary.items(): self.grammar.push_rules(k, v) return self.grammar.flatten('#{}.capitalize#'.format( '.capitalize# #'.join(splitted)))
def setUp(self): self.max = 5 self.api = Datamuse() _api_url = 'https://api.datamuse.com/words' _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, '?sl=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_rhy=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_nry=orange?max=5'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?ml=ringing+in+the+ears'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?s=por&max=3'), json=response_json, status=200)
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo': 42} with self.assertRaises(ValueError): data = self.api.words(**args)
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo':42} with self.assertRaises(ValueError): data = self.api.words(**args)
def generate_word(self): """Generates a word to use in the game""" topics = [ "animals", "plants", "buildings", "places", "farming", "school", "food" ] api = Datamuse() result = api.words(topics=choice(topics)) words = [res["word"] for res in result] return choice(list(filter(lambda x: (len(x) > 5), words)))
def sologame(): global guess guess = [] # from https://pypi.python.org/pypi/RandomWords/0.1.5 rw = RandomWords() hint = rw.random_word() # from https://www.datamuse.com/api/ api = Datamuse() foo_complete = api.words(ml=hint, max=4) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values wordList = set() level = 0 if score > 5: level = 2 for i in values: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in val: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in v: if 3+level < len(i) < 7+level and i.isalpha(): pass wordList.add(i) wordList = list(wordList) if len(wordList) < 2: sologame() while len(wordList) > 3: wordList.pop() soloB = screen.Solo.game(wordList) try: main(wordList, soloB, hint, "solo") except: sologame()
def game(): rw = RandomWords() hint = rw.random_word() api = Datamuse() foo_complete = api.words(ml=hint, max=3) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values words = set() for i in values: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in val: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in v: if 3 < len(i) < 8 and i.isalpha(): pass words.add(i) words = list(words) if len(words) < 1: game() global guess guess = [] try: return words[0], hint except: game()
def __init__(self, acronym: str): self.acronym = acronym api = Datamuse() dictionary: Dict[str, List[str]] = {} splitted = self.splitting_pattern.findall(acronym) self.length = len(splitted) for start in splitted: res = api.words(sp='{}*'.format(start)) dictionary[start] = [obj['word'] for obj in res] self.grammar = Grammar(dictionary) self.grammar.add_modifiers(base_english) self.rule = '#{}.capitalize#'.format('.capitalize# #'.join(splitted))
def setUp(self): self.api = Datamuse() self.max = 5
import random from datamuse import Datamuse api = Datamuse() default_syllables = 8 default_topics = [ 'cars', 'movies', 'books', 'music', 'people', 'earth', 'buildings', 'games', 'animals', 'fruits', 'vegetables', 'adventures', 'planets', 'government', 'enlightenment', 'peace', 'gangs', 'war', 'soul', 'religion', 'politics' ] common_words = [ 'the',
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo': 42} with self.assertRaises(ValueError): data = self.api.words(**args) def test_set_max(self): self.assertTrue(self.api.max, 100) self.api.set_max_default(10) self.assertEqual(self.api.max, 10) data = self.api.words(ml='ringing in the ears') self.assertEqual(len(data), 10) def test_set_max_error(self): with self.assertRaises(ValueError): self.api.set_max_default(-2) self.api.set_max_default(0) self.api.set_max_default(1001)
import unittest import datamuse from datamuse import Datamuse from glove import loadGloveModel import Queue import unicodedata from tsne import projectWordsNoFile from tsne import projectWordsNoFileDoubleProject import numpy as np dm = Datamuse() model,vectors,keys = loadGloveModel("../data/glove.6B.50d.txt") # print keys[0:100] cryptic = ['mother','ordered','sail','fabrics','materials',"mater", "they", "trade", "in", "french","sea","songs","merchants","mer","chants", "chew","honeydew","fruit","melon","lemon", "lap", "dancing", "friend", "pal", "outlaw", "leader", "managing", "money","banking","ban","king" "beheaded", "celebrity", "is", "sailor", "tar","star", "challenging", "sweetheart", "heartlessly", "daring","darling", "found", "ermine", "deer", "hides", "damaged","undermined" ] crypticKeys = [] crypticVectors = []
class DatamuseTestCase(unittest.TestCase): @responses.activate def setUp(self): self.max = 5 self.api = Datamuse() _api_url = 'https://api.datamuse.com/words' _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, '?sl=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_rhy=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_nry=orange?max=5'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?ml=ringing+in+the+ears'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?s=por&max=3'), json=response_json, status=200) def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_set_max(self): self.assertTrue(self.api.max, 100) self.api.set_max_default(10) self.assertEqual(self.api.max, 10) data = self.api.words(ml='ringing in the ears') self.assertEqual(len(data), 10) def test_set_max_error(self): with self.assertRaises(ValueError): self.api.set_max_default(-2) self.api.set_max_default(0) self.api.set_max_default(1001) def test_suggest(self): response = self.api.suggest(s='por', max_results=3, vocabulary='es') assert len(response) == 3 assert isinstance(response, list) assert response[1]['word'] == 'porque'
def __init__(self): self.api = Datamuse() self.grammar = Grammar({}) self.grammar.add_modifiers(base_english)
def __init__(self, input_texts: str): nltk.download('punkt') self.nlp = spacy.load('en_core_web_lg') self.summarizer = LsaSummarizer(Stemmer('english')) self.summarizer.stop_words = get_stop_words('english') self.cleaner = CleaningProcessor() self.synonyms: Dict[str, Optional[List[str]]] = {} if path.isfile('src/syns.yaml'): with open('src/syns.yaml', 'r') as f: self.synonyms = yaml.safe_load(f) if self.synonyms is None: self.synonyms = {} self.patterns: Dict[str, str] = OrderedDict() self.rev_patterns: Dict[str, str] = OrderedDict() with open('src/spreadr_shreddr/data.yaml', 'r') as f: data = yaml.safe_load(f) self.patterns.update(data['shorten']) self.patterns.update(data['expand']) data['filler'].extend( pycorpora.get_file('humans', 'prefixes')['prefixes']) self.patterns.update({k: '' for k in data['filler']}) for obj in pycorpora.get_file('words', 'compounds')['compounds']: key = '{} {}'.format(obj['firstWord'], obj['secondWord']) if key not in self.patterns: self.patterns[key] = obj['compoundWord'] self.patterns.update( {k.capitalize(): v.capitalize() for k, v in self.patterns.items()}) self.brits = data['brit_am'] self.murcans = {v: k for k, v in self.brits.items()} changed = False api = Datamuse() for text in input_texts: text >>= self.cleaner for sent in sent_tokenize(text): for index, word in enumerate(self.nlp(sent)): orth = word.orth_.lower() key = self.separator.join((orth, word.tag_)) if key not in self.synonyms: changed = True syns: List[str] = [] if (word.pos_ in UNIVERSAL_TO_DATAMUSE and len(wn.synsets(orth)) <= 1): res = api.words(ml=orth) if len(res) > 0: syns = self._get_synonyms( ' '.join(sent), (index, word), res) if len(syns) > 1: self.synonyms[key] = syns else: self.synonyms[key] = None if changed: changed = False with open('src/syns.yaml', 'a') as f: f.write(yaml.dump({key: self.synonyms[key]}))
from file_manager import File_manager from naive import Naive from datamuse import Datamuse import asyncio TRAIN_FILENAME = "train_data.tsv" TEST_FILENAME = "data_estag_ds.tsv" if __name__ == '__main__': filemng = File_manager() train_instances = filemng.get_instances_of_file(TRAIN_FILENAME) test_instances = filemng.get_instances_of_file(TEST_FILENAME) nv = Naive() dic = filemng.read_dictionary() dtmuse = Datamuse() loop = asyncio.get_event_loop() category_synonymous = loop.run_until_complete( dtmuse.get_synonymous('smartphone')) loop.close() category_synonymous = eval(category_synonymous) nv.naive_bayes(train_instances, dic, test_instances, category_synonymous)