class AcronymProcessor(Processor): splitting_pattern = re.compile(r'[A-Z][^A-Z]*') def __init__(self): self.api = Datamuse() self.grammar = Grammar({}) self.grammar.add_modifiers(base_english) def process_text(self, input_text: str, **kwargs) -> str: topics = kwargs.get('topics', None) splitted = self.splitting_pattern.findall(input_text) dictionary: Dict[str, List[str]] = {} for start in (x for x in splitted if x not in self.grammar.symbols): if topics is None: res = self.api.words(sp='{}*'.format(start), max=1000) else: res = self.api.words(sp='{}*'.format(start), topics=topics, max=1000) dictionary[start] = [obj['word'] for obj in res] for k, v in dictionary.items(): self.grammar.push_rules(k, v) return self.grammar.flatten('#{}.capitalize#'.format( '.capitalize# #'.join(splitted)))
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo': 42} with self.assertRaises(ValueError): data = self.api.words(**args)
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo':42} with self.assertRaises(ValueError): data = self.api.words(**args)
def sologame(): global guess guess = [] # from https://pypi.python.org/pypi/RandomWords/0.1.5 rw = RandomWords() hint = rw.random_word() # from https://www.datamuse.com/api/ api = Datamuse() foo_complete = api.words(ml=hint, max=4) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values wordList = set() level = 0 if score > 5: level = 2 for i in values: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in val: if 3+level < len(i) < 7+level and i.isalpha(): wordList.add(i) for i in v: if 3+level < len(i) < 7+level and i.isalpha(): pass wordList.add(i) wordList = list(wordList) if len(wordList) < 2: sologame() while len(wordList) > 3: wordList.pop() soloB = screen.Solo.game(wordList) try: main(wordList, soloB, hint, "solo") except: sologame()
def game(): rw = RandomWords() hint = rw.random_word() api = Datamuse() foo_complete = api.words(ml=hint, max=3) foo_df = scripts.dm_to_df(foo_complete) loo = api.words(rel_par=hint, max=1) loo_df = scripts.dm_to_df(loo) maybe = api.words(rel_trg=hint, max=1) maybe_df = scripts.dm_to_df(maybe) values = foo_df['word'].values val = loo_df['word'].values v = maybe_df['word'].values words = set() for i in values: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in val: if 3 < len(i) < 8 and i.isalpha(): words.add(i) for i in v: if 3 < len(i) < 8 and i.isalpha(): pass words.add(i) words = list(words) if len(words) < 1: game() global guess guess = [] try: return words[0], hint except: game()
def generate_word(self): """Generates a word to use in the game""" topics = [ "animals", "plants", "buildings", "places", "farming", "school", "food" ] api = Datamuse() result = api.words(topics=choice(topics)) words = [res["word"] for res in result] return choice(list(filter(lambda x: (len(x) > 5), words)))
class DatamuseTestCase(unittest.TestCase): def setUp(self): self.api = Datamuse() self.max = 5 # words endpoint def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_bad_request(self): args = {'foo': 42} with self.assertRaises(ValueError): data = self.api.words(**args) def test_set_max(self): self.assertTrue(self.api.max, 100) self.api.set_max_default(10) self.assertEqual(self.api.max, 10) data = self.api.words(ml='ringing in the ears') self.assertEqual(len(data), 10) def test_set_max_error(self): with self.assertRaises(ValueError): self.api.set_max_default(-2) self.api.set_max_default(0) self.api.set_max_default(1001)
def __init__(self, acronym: str): self.acronym = acronym api = Datamuse() dictionary: Dict[str, List[str]] = {} splitted = self.splitting_pattern.findall(acronym) self.length = len(splitted) for start in splitted: res = api.words(sp='{}*'.format(start)) dictionary[start] = [obj['word'] for obj in res] self.grammar = Grammar(dictionary) self.grammar.add_modifiers(base_english) self.rule = '#{}.capitalize#'.format('.capitalize# #'.join(splitted))
class DatamuseTestCase(unittest.TestCase): @responses.activate def setUp(self): self.max = 5 self.api = Datamuse() _api_url = 'https://api.datamuse.com/words' _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, '?sl=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_rhy=orange?max=5'), json=response_json, status=200) responses.add(responses.GET, urljoin(_api_url, '?rel_nry=orange?max=5'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?ml=ringing+in+the+ears'), json=response_json, status=200) _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json') with open(_fp) as response_json: response_json = json.load(response_json) responses.add(responses.GET, urljoin(_api_url, f'?s=por&max=3'), json=response_json, status=200) def test_sounds_like(self): args = {'sl': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(type(data), list) print("sounds like", data) def test_rhymes(self): args = {'rel_rhy': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("rhyme", data) def test_near_rhymes(self): args = {'rel_nry': 'orange', 'max': self.max} data = self.api.words(**args) self.assertTrue(len(data) <= self.max) print("near rhyme", data) def test_set_max(self): self.assertTrue(self.api.max, 100) self.api.set_max_default(10) self.assertEqual(self.api.max, 10) data = self.api.words(ml='ringing in the ears') self.assertEqual(len(data), 10) def test_set_max_error(self): with self.assertRaises(ValueError): self.api.set_max_default(-2) self.api.set_max_default(0) self.api.set_max_default(1001) def test_suggest(self): response = self.api.suggest(s='por', max_results=3, vocabulary='es') assert len(response) == 3 assert isinstance(response, list) assert response[1]['word'] == 'porque'
def __init__(self, input_texts: str): nltk.download('punkt') self.nlp = spacy.load('en_core_web_lg') self.summarizer = LsaSummarizer(Stemmer('english')) self.summarizer.stop_words = get_stop_words('english') self.cleaner = CleaningProcessor() self.synonyms: Dict[str, Optional[List[str]]] = {} if path.isfile('src/syns.yaml'): with open('src/syns.yaml', 'r') as f: self.synonyms = yaml.safe_load(f) if self.synonyms is None: self.synonyms = {} self.patterns: Dict[str, str] = OrderedDict() self.rev_patterns: Dict[str, str] = OrderedDict() with open('src/spreadr_shreddr/data.yaml', 'r') as f: data = yaml.safe_load(f) self.patterns.update(data['shorten']) self.patterns.update(data['expand']) data['filler'].extend( pycorpora.get_file('humans', 'prefixes')['prefixes']) self.patterns.update({k: '' for k in data['filler']}) for obj in pycorpora.get_file('words', 'compounds')['compounds']: key = '{} {}'.format(obj['firstWord'], obj['secondWord']) if key not in self.patterns: self.patterns[key] = obj['compoundWord'] self.patterns.update( {k.capitalize(): v.capitalize() for k, v in self.patterns.items()}) self.brits = data['brit_am'] self.murcans = {v: k for k, v in self.brits.items()} changed = False api = Datamuse() for text in input_texts: text >>= self.cleaner for sent in sent_tokenize(text): for index, word in enumerate(self.nlp(sent)): orth = word.orth_.lower() key = self.separator.join((orth, word.tag_)) if key not in self.synonyms: changed = True syns: List[str] = [] if (word.pos_ in UNIVERSAL_TO_DATAMUSE and len(wn.synsets(orth)) <= 1): res = api.words(ml=orth) if len(res) > 0: syns = self._get_synonyms( ' '.join(sent), (index, word), res) if len(syns) > 1: self.synonyms[key] = syns else: self.synonyms[key] = None if changed: changed = False with open('src/syns.yaml', 'a') as f: f.write(yaml.dump({key: self.synonyms[key]}))
q.put((k,0)) while q.qsize() > 0: current,level = q.get() print current,level,q.qsize() if level < neighborDepth: if current in keys and current not in queried: crypticKeys.append(current) v = model[current] crypticVectors.append(v) crypticModel[current] = v args = {'ml': current, 'max': maxNeighbors} queried.append(current) neighbors = [unicodedata.normalize('NFKD',item['word']).encode('ascii','ignore')\ for item in dm.words(**args)] crypticNeighbors[current] = neighbors nextLevel = level + 1 for n in neighbors: neighborsForFile.append([current, n]) q.put((n,nextLevel)) print crypticKeys print "---" # print crypticVectors # print crypticModel print neighborsForFile crypticKeys = crypticKeys + keys[0:1000] crypticVectors = crypticVectors + vectors[0:1000]