예제 #1
0
def save(date, name_newspaper, words):
    for a_word in words:
        obj_newspaper = Newspaper.objects.get(name=name_newspaper)
        obj = Word(
            newspaper=obj_newspaper,
            _datetime=date,
            word=a_word,
        )
        obj.save()
def undo_lowercase_words(_, __):
    Word.objects.all().delete()

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(tweet.content)
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(word=token, frequency=-1)
        word.save()
예제 #3
0
파일: db_util.py 프로젝트: cchyung/bumbl-b
def create_snippet(word, rawUrl, url, starttime, endtime):
    try:
        word_object = Word.objects.get(value=word)
    except models.Word.DoesNotExist:
        word_object = Word(value=word)
        word_object.save()

    audio_object = Audio.objects.get(url=rawUrl)
    new_Snippet = Snippet(word=word_object,
                          audio=audio_object,
                          start=starttime,
                          end=endtime,
                          url=url)
    new_Snippet.save()
    print "Saved \"%s\" Snippet to database" % word
def populate_words(_, __):

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(tweet.content)
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(
            word=token,
        )
        word.save()
def undo_stem_words(_, __):
    Word.objects.all().delete()

    tweets = Tweet.objects.all()
    all_tokens = []

    for tweet in tweets:
        tokens_from_tweet = word_tokenize(str(tweet.content).lower())
        for token in tokens_from_tweet:
            if token not in all_tokens:
                all_tokens.append(token)

    for token in all_tokens:
        word = Word(
            word=token,
            frequency=-1,
        )
        if Word.objects.filter(word=token).count() is 0:
            word.save()
예제 #6
0
 def create(name, description, words):
     """ Create a new category """
     words_obj = []
     if words:
         for word in words:
             words_obj.append(Word(word=word))
     category = Category(name=name,
                         description=description,
                         words=words_obj)
     return category.save()
예제 #7
0
def wordPop(LANGUAGE_TO_POPULATE):
    ''' Populate the -Words- and -Lemmas- '''

    fileName = ''.join(['data/langs/', LANGUAGE_TO_POPULATE, '.txt'])
    languageObject = Language.objects.get(name=LANGUAGE_TO_POPULATE)
    with open(fileName, "r", encoding="utf8") as wordData:

        for line in wordData:
            # Lemma Word Tagset - delimiter ('/t')
            rowContent = line.split('\t')
            # print(rowContent)
            if (len(rowContent) >= 3):  # checks if line is valid
                tagsetName = rowContent[-1].rstrip()
                tagSetObject, created = TagSet.objects.get_or_create(
                    name=tagsetName)
                lemmaName = rowContent[0]
                wordName = rowContent[1]

                allLabels = tagsetName.split(
                    ";")  # last block of words corrensponds to allLabels
                for currLabel in allLabels:
                    try:
                        currFeature = findFeature[currLabel.upper()]
                        featObject = Feature.objects.get(name=currFeature)
                        tagSetObject.features.add(featObject)
                    except KeyError:
                        print(f'{currLabel}  - not exist')

                posName = findFeature[allLabels[0].upper()].rstrip()
                posObject = POS.objects.get(name=posName)

                # If lemma exists
                try:
                    lemmaObject = Lemma.objects.get(name=lemmaName,
                                                    pos=posObject.id,
                                                    language=languageObject.id)
                # If not create a new one
                except Lemma.DoesNotExist:
                    lemmaObject = Lemma(name=lemmaName)
                    lemmaObject.language = languageObject
                    lemmaObject.pos = posObject
                    lemmaObject.save()
                # Finally create the word
                finally:
                    wordObject = Word(name=wordName)
                    wordObject.lemma = lemmaObject
                    wordObject.tagset = tagSetObject
                    wordObject.language = languageObject
                    wordObject.save()
예제 #8
0
# load dic file from json format
dictionary = json.load(dict_file)


print("Loaded: reading file")

word_objs = []
defs_objs = []
exam_objs = []
syno_objs = []

for entry in dictionary:
    print(f"getting word: {entry['word']}")
    # make the word objec
    word_obj = Word(w_id=entry['link_id'].lower(), word=entry['word'].lower(), url=entry['url'], etymology=entry['etymology'], notes=entry['notes'])
    # make the definition objects
    definitions = entry['definitions']
    if isinstance(definitions, list): 
        for definition in definitions:
            defs_objs.append(Definition(word=word_obj, definition=definition, syntax=None))
    else:
        for key in definitions.keys(): 
            for definition in definitions[key]:
                defs_objs.append(Definition(word=word_obj, definition=definition, syntax=key))
    # make examples objects
    for example in entry["examples"]:
        exam_objs.append(Example(word=word_obj, example=example))
    # put word into objs list
    word_objs.append(word_obj)
    
예제 #9
0
    def mutate_and_get_payload(cls, root, info):
        word = Word()
        word.save()

        return IntroduceWord(word=word)