def save(date, name_newspaper, words): for a_word in words: obj_newspaper = Newspaper.objects.get(name=name_newspaper) obj = Word( newspaper=obj_newspaper, _datetime=date, word=a_word, ) obj.save()
def wordPop(LANGUAGE_TO_POPULATE): ''' Populate the -Words- and -Lemmas- ''' fileName = ''.join(['data/langs/', LANGUAGE_TO_POPULATE, '.txt']) languageObject = Language.objects.get(name=LANGUAGE_TO_POPULATE) with open(fileName, "r", encoding="utf8") as wordData: for line in wordData: # Lemma Word Tagset - delimiter ('/t') rowContent = line.split('\t') # print(rowContent) if (len(rowContent) >= 3): # checks if line is valid tagsetName = rowContent[-1].rstrip() tagSetObject, created = TagSet.objects.get_or_create( name=tagsetName) lemmaName = rowContent[0] wordName = rowContent[1] allLabels = tagsetName.split( ";") # last block of words corrensponds to allLabels for currLabel in allLabels: try: currFeature = findFeature[currLabel.upper()] featObject = Feature.objects.get(name=currFeature) tagSetObject.features.add(featObject) except KeyError: print(f'{currLabel} - not exist') posName = findFeature[allLabels[0].upper()].rstrip() posObject = POS.objects.get(name=posName) # If lemma exists try: lemmaObject = Lemma.objects.get(name=lemmaName, pos=posObject.id, language=languageObject.id) # If not create a new one except Lemma.DoesNotExist: lemmaObject = Lemma(name=lemmaName) lemmaObject.language = languageObject lemmaObject.pos = posObject lemmaObject.save() # Finally create the word finally: wordObject = Word(name=wordName) wordObject.lemma = lemmaObject wordObject.tagset = tagSetObject wordObject.language = languageObject wordObject.save()
def undo_lowercase_words(_, __): Word.objects.all().delete() tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(tweet.content) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word(word=token, frequency=-1) word.save()
def create_snippet(word, rawUrl, url, starttime, endtime): try: word_object = Word.objects.get(value=word) except models.Word.DoesNotExist: word_object = Word(value=word) word_object.save() audio_object = Audio.objects.get(url=rawUrl) new_Snippet = Snippet(word=word_object, audio=audio_object, start=starttime, end=endtime, url=url) new_Snippet.save() print "Saved \"%s\" Snippet to database" % word
def populate_words(_, __): tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(tweet.content) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word( word=token, ) word.save()
def undo_stem_words(_, __): Word.objects.all().delete() tweets = Tweet.objects.all() all_tokens = [] for tweet in tweets: tokens_from_tweet = word_tokenize(str(tweet.content).lower()) for token in tokens_from_tweet: if token not in all_tokens: all_tokens.append(token) for token in all_tokens: word = Word( word=token, frequency=-1, ) if Word.objects.filter(word=token).count() is 0: word.save()
def mutate_and_get_payload(cls, root, info): word = Word() word.save() return IntroduceWord(word=word)