Example #1
0
def save(date, name_newspaper, words):
    for a_word in words:
        obj_newspaper = Newspaper.objects.get(name=name_newspaper)
        obj = Word(
            newspaper=obj_newspaper,
            _datetime=date,
            word=a_word,
        )
        obj.save()
Example #2
0
def wordPop(LANGUAGE_TO_POPULATE):
    ''' Populate the -Words- and -Lemmas- '''

    fileName = ''.join(['data/langs/', LANGUAGE_TO_POPULATE, '.txt'])
    languageObject = Language.objects.get(name=LANGUAGE_TO_POPULATE)
    with open(fileName, "r", encoding="utf8") as wordData:

        for line in wordData:
            # Lemma Word Tagset - delimiter ('/t')
            rowContent = line.split('\t')
            # print(rowContent)
            if (len(rowContent) >= 3):  # checks if line is valid
                tagsetName = rowContent[-1].rstrip()
                tagSetObject, created = TagSet.objects.get_or_create(
                    name=tagsetName)
                lemmaName = rowContent[0]
                wordName = rowContent[1]

                allLabels = tagsetName.split(
                    ";")  # last block of words corrensponds to allLabels
                for currLabel in allLabels:
                    try:
                        currFeature = findFeature[currLabel.upper()]
                        featObject = Feature.objects.get(name=currFeature)
                        tagSetObject.features.add(featObject)
                    except KeyError:
                        print(f'{currLabel}  - not exist')

                posName = findFeature[allLabels[0].upper()].rstrip()
                posObject = POS.objects.get(name=posName)

                # If lemma exists
                try:
                    lemmaObject = Lemma.objects.get(name=lemmaName,
                                                    pos=posObject.id,
                                                    language=languageObject.id)
                # If not create a new one
                except Lemma.DoesNotExist:
                    lemmaObject = Lemma(name=lemmaName)
                    lemmaObject.language = languageObject
                    lemmaObject.pos = posObject
                    lemmaObject.save()
                # Finally create the word
                finally:
                    wordObject = Word(name=wordName)
                    wordObject.lemma = lemmaObject
                    wordObject.tagset = tagSetObject
                    wordObject.language = languageObject
                    wordObject.save()
def undo_lowercase_words(_, __):
    Word.objects.all().delete()

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(tweet.content)
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(word=token, frequency=-1)
        word.save()
Example #4
0
def create_snippet(word, rawUrl, url, starttime, endtime):
    try:
        word_object = Word.objects.get(value=word)
    except models.Word.DoesNotExist:
        word_object = Word(value=word)
        word_object.save()

    audio_object = Audio.objects.get(url=rawUrl)
    new_Snippet = Snippet(word=word_object,
                          audio=audio_object,
                          start=starttime,
                          end=endtime,
                          url=url)
    new_Snippet.save()
    print "Saved \"%s\" Snippet to database" % word
def populate_words(_, __):

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(tweet.content)
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(
            word=token,
        )
        word.save()
def undo_stem_words(_, __):
    Word.objects.all().delete()

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(str(tweet.content).lower())
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(
            word=token,
            frequency=-1,
        )
        if Word.objects.filter(word=token).count() is 0:
            word.save()
Example #7
0
    def mutate_and_get_payload(cls, root, info):
        word = Word()
        word.save()

        return IntroduceWord(word=word)