Example #1
0
    def testDeleteDictEntry(self):
        today = datetime.date.today()
        current_time = int(time.time())
        u = self.createUser("da_zbur", "enabled", 10)
        u.use_questions = "yes"
        u.put()

        d1 = self.createDictEntry("da_zbur", 2, "lucrative", \
            u"profitable, moneymaking, remunerative", "[LOO-kruh-tiv]")
        d2 = self.createDictEntry("da_zbur", 2, "ferociously(en)", \
            u"жестоко, яростно, свирепо, дико, неистово. Ужасно, невыносимо.")

        l1 = self.createLearnListItem("da_zbur", d1, today, current_time)
        l2 = self.createLearnListItem("da_zbur", d2, today, current_time)

        self.createQuestion(l1, today, "da_zbur", d1.word,
             "profitable, moneymaking", 1, today, 100)
        self.createQuestion(l2, today, "da_zbur", d2.word,
             u"лажа", 2, today, 0)
        deleteDictEntry(u, "lucrative[LOO-kruh-tiv]")

        self.assertEqual(None, Dictionary.all().\
            filter("word =", "lucrative").get())
        self.assertEqual(1, LearnList.all().count())
        self.assertEqual(1, Question.all().count())
        self.assertEqual(1, Dictionary.all().count())
Example #2
0
 def __init__(self, name, path):
     super().__init__()
     self.name = name
     self.path = path
     self.dictionary = Dictionary()
     self.readedWords = []
     self.totalWords = 0
Example #3
0
def create_dictionary(entries, dataset):
    dictionary = Dictionary()
    for ent in entries:
        if dataset == 'vqd':
            qs = ent['question']
        else:
            qs = ent['sentence']['sent']
        dictionary.tokenize(qs, True)
    return dictionary
Example #4
0
    def __init__(self, name, path, type):
        super().__init__()
        self.name = name
        self.path = path
        self.type = type

        self.dictionary = Dictionary()
        self.documents = []

        self.totalCountedWords = 0
Example #5
0
def create_dictionary(ds):
    dictionary = Dictionary()
    entries = []   
    for group in ['train','test']:        
        with open( dataset[ds][group],'rb') as f:
            d = pickle.load(f)
            entries.extend(d)
    for ent in entries:
        qs = ent['question']
        dictionary.tokenize(qs, True)
    return dictionary
Example #6
0
    def __init__(self, **kwargs):

        file = kwargs.get('file')

        self.isnms = kwargs.get('isnms')
        self.trainembd = kwargs.get('trainembd')

        #6 postion encoded vectors as used by irls
        self.spatial = True

        with open(file, 'rb') as f:
            self.data = pickle.load(f)

        if self.trainembd:
            self.dictionary = Dictionary.load_from_file(
                kwargs.get('dictionaryfile'))

        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.pool_features_path_coco = kwargs.get('coco_pool_features')
        self.pool_features_path_genome = kwargs.get('genome_pool_features')
        self.poolcoco_id_to_index = self._poolcreate_coco_id_to_index(
            self.pool_features_path_coco)
        self.poolcoco_id_to_index_gen = self._poolcreate_coco_id_to_index(
            self.pool_features_path_genome)

        self.image_features_path_coco = kwargs.get('coco_bottomup')
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        self.image_features_path_genome = kwargs.get('genome_bottomup')
        self.genome_id_to_index = self.id_to_index(
            self.image_features_path_genome)
Example #7
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        #only use the questinos having 1 bbox as answer
        datanew = []
        for ent in self.data:
            gtbox = ent['gtbox']
            if len(gtbox[0]) != 0 and len(gtbox) == 1:
                datanew.append(ent)
        self.data = datanew

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        self.image_features_path_coco = kwargs.get('coco_bottomup')
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))
Example #8
0
    def get(self):
        parser = reqparse.RequestParser()
        parser.add_argument('id', required=False)
        args = parser.parse_args()

        if args['id']:
            if Dictionary.query.filter(Dictionary.id == args['id']).count():
                dictionaries = Dictionary.query.filter(
                    Dictionary.id == args['id'])
                js = Dictionary.serialize(dictionaries[0])
            else:
                return {'message': 'Dictionary not found', 'result': {}}, 404
        else:
            dictionaries = Dictionary.query.all()
            js = [Dictionary.serialize(d) for d in dictionaries]

        return {'message': 'Success', 'result': js}, 200
Example #9
0
def getUsers():
    userlist = []
    for user in User.all().order("-total_points").run():
        c = Dictionary.all().\
            filter("twitter_user ="******"username": user.twitter, "points": user.total_points,
                        "wordscount": c})
    return json.dumps(userlist)
Example #10
0
    def __init__(self):
        i = words_iter()
        if i:
            self.words_iterator = i

        else:
            words = Dictionary.all().filter("twitter_user ="******"words":words,"pos":0}
            set_words_iter(self.words_iterator)
Example #11
0
 def get(self):
     user = current_user()
     if not user:
         self.redirect("/login")
     else:
         parameters = {}
         parameters["total_points"] = User.all().\
             filter("twitter =", user.twitter).get().total_points
         parameters["total_words"] = Dictionary.all().\
             filter("twitter_user =", user.twitter).count()
         self.view(parameters)
Example #12
0
class Group:
    def __init__(self, name, path, type):
        super().__init__()
        self.name = name
        self.path = path
        self.type = type

        self.dictionary = Dictionary()
        self.documents = []

        self.totalCountedWords = 0

    def readDocuments(self, stopWords=[], headers=[], fastReading=False):
        self.dictionary.clean()

        print(f"Start reading group {self.name}, type: {self.type}")
        bar = defaultProgress(len(self.documents)).start()
        i = 0
        for document in self.documents:
            document.readWords(stopWords, headers, fastReading)

            for word in document.dictionary.words:
                self.dictionary.searchAndAddWord(
                    GroupedWord(word.text, self, word.counted, 1))

            document.clearReadedWords()
            i += 1
            bar.update(i)
        self.setTotalCountedWords()
        bar.finish()
        print(f"Done reading group {self.name}")

    def setTotalCountedWords(self):
        self.totalCountedWords = 0
        for word in self.dictionary.words:
            self.totalCountedWords += word.counted

    def __str__(self):
        return f"Group: {self.name}"
Example #13
0
def editDictEntry(user, original_word, new_string):
    original_word, _ = parseOpt(original_word)
    dict_entry = Dictionary.all().\
        filter("twitter_user ="******"word =", original_word.strip()).get()
    if dict_entry:
        parsed_dict = parseMessage(new_string, '')
        if parsed_dict != {}:
            dict_entry.word = parsed_dict["word"]
            dict_entry.meaning = parsed_dict["meaning"]
            dict_entry.pronounce = parsed_dict["pronounce"]
            dict_entry.put()
    return json.dumps({})
Example #14
0
def deleteDictEntry(user, word):
    word, _ = parseOpt(word)
    dict_entry = Dictionary.all().\
        filter("twitter_user ="******"word =", word.strip()).get()
    if dict_entry:
        lli = LearnList.all().\
            filter("dict_entry =", dict_entry.key()).get()
        for q in Question.all().filter("lli_ref =", lli.key()).run():
            q.delete()
        lli.delete()
        dict_entry.delete()
    return json.dumps({})
Example #15
0
 def testProcessDuplicateWord(self):
     json_file = open("files/direct_message1.json")
     message_json = simplejson.load(json_file)
     twitter_dm = DirectMessage.NewFromJsonDict(message_json)
     processMessage(twitter_dm)
     
     json_file = open("files/direct_message1.json")
     message_json = simplejson.load(json_file)
     twitter_dm = DirectMessage.NewFromJsonDict(message_json)
     processMessage(twitter_dm)
     
     query = Dictionary.all()        
     results =   query.fetch(1)
     self.assertEqual(1, len(results))
Example #16
0
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('name', required=True)
        args = parser.parse_args()
        dictionary = Dictionary(args['name'])

        session.session.add(dictionary)
        session.session.commit()
        return {
            'message': 'Success',
            'result': {}
        }, 201, {
            'Location': '/dictionary/:' + str(dictionary.id)
        }
Example #17
0
 def testProcessMessageFromNonExistentUser(self):
     # Message from user "spammer" who doesn't exist in database
     # It must not be processed and must not be saved
     json_file = open("files/direct_message_spammer.json")
     message_json = simplejson.load(json_file)
     twitter_dm = DirectMessage.NewFromJsonDict(message_json)
     processMessage(twitter_dm)
     query = Dictionary.all()        
     results =   query.fetch(1)
     self.assertEqual(0, len(results))
     self.assertEqual("spammer", twitter_dm.sender_screen_name)
     # Test integration with LearnList
     query = LearnList.all()
     ll_results = query.fetch(2)
     self.assertEqual(0, len(ll_results))
Example #18
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', "vqd" + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        #only use 1 or more gt bos
        if dataset == 'vqd1':
            print('VQDv1 1box loaded .......')

            #only use the questinos having 1 bbox as answer
            datanew = []
            for ent in self.data:
                gtbox = ent['gtbox']
                if len(gtbox[0]) != 0 and len(gtbox) == 1:
                    datanew.append(ent)
            self.data = datanew

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        self.image_features_path_coco = kwargs.get('vqd_detfeats').format(
            split)
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))

        cocoids = set(self.coco_id_to_index)
        if kwargs.get('istrain'):
            cocoids.remove(81768)
        #some qid doesnot exist
        datanew = []
        for ent in self.data:
            #some image ids are not in the dataset
            if ent['image_id'] in cocoids:
                datanew.append(ent)
        self.data = datanew
Example #19
0
    def __init__(self,**kwargs):
    
        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        
        data_json = osp.join('cache/prepro', dataset +"_"+ splitBy , split +'.json')
        
        with open(data_json,'r') as f:
            self.data = json.load(f)


        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)    
        if kwargs.get('testrun'):
            self.data = self.data[:32]
            
        self.spatial = True            
        print ("Dataset [{}] loaded....".format(dataset,split))
        print ("Split [{}] has {} ref exps.".format(split,len(self.data)))
Example #20
0
class Document:
    def __init__(self, name, path):
        super().__init__()
        self.name = name
        self.path = path
        self.dictionary = Dictionary()
        self.readedWords = []
        self.totalWords = 0

    def readWords(self, stopWords=[], headers=[], fastReading=False):
        self.dictionary.clean()

        file = open(self.path, 'r', encoding="ISO-8859-1")

        lines = file.readlines()

        if headers is not None:
            for line in lines:
                for header in headers:
                    if line.startswith(header):
                        lines.remove(line)

        if fastReading is False:
            vectorizer = CountVectorizer(stop_words=stopWords)
            x = vectorizer.fit_transform(lines)
            self.readedWords = vectorizer.get_feature_names()
            self.totalWords = len(self.readedWords)

            for arrayLine in x.toarray():
                for i in range(0, len(arrayLine)):
                    if arrayLine[i] != 0:
                        self.dictionary.searchAndAddWord(
                            CountedWord(self.readedWords[i], arrayLine[i]))
        else:
            words = []
            for line in lines:
                words += line.split()
            self.totalWords = len(words)
            for word in words:
                try:
                    wordInStopList = stopWords.index(word)
                except (ValueError, AttributeError):
                    self.dictionary.searchAndAddWord(CountedWord(word.lower()))

    def clearReadedWords(self):
        self.readedWords = []
Example #21
0
 def testProcessMessageNormalAddForExistingUser(self):
     json_file = open("files/direct_message1.json")
     message_json = simplejson.load(json_file)
     twitter_dm = DirectMessage.NewFromJsonDict(message_json)
     processMessage(twitter_dm)
     query = Dictionary.all()        
     results =   query.fetch(1)
     self.assertEqual(1, len(results))
     self.assertEqual("", results[0].pronounce)
     self.assertEqual("da_zbur", results[0].twitter_user)
     self.assertEqual(289180663729512448L, results[0].message_id)
     self.assertEqual("to advet", results[0].word)
     self.assertEqual(u"обращаться к,ссылаться на",\
      results[0].meaning)
     self.assertEqual(0, results[0].served)
     self.assertEqual(None, results[0].source_lang)
     self.assertEqual(1, User.all().filter("twitter =",\
      "da_zbur").get().total_points)
     # Test integration with LearnList
     query = LearnList.all()
     ll_results = query.fetch(2)
     self.assertEqual(1, len(ll_results))
     # Check if LearnList references same object
     self.assertEqual(ll_results[0].dict_entry.key(), results[0].key())
Example #22
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        feats_use = '{}_{}_det_feats.h5'.format(dataset, splitBy)
        self.image_features_path_coco = osp.join(kwargs.get('refcoco_frcnn'),
                                                 feats_use)
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))
Example #23
0
 def createDictEntry(self, twitter_user, message_id, word, meaning,\
      pronounce=""):
     dictEntry = Dictionary()
     dictEntry.twitter_user = twitter_user
     dictEntry.message_id = message_id
     dictEntry.word = word
     dictEntry.pronounce = pronounce
     dictEntry.meaning = meaning
     dictEntry.served = 0
     dictEntry.source_lang = ""
     dictEntry.target_lang = ""
     dictEntry.put()
     return dictEntry
Example #24
0
def addNewDictEntry(twitter_user, message_id,  entry, served):
    new_dict_entry = None
    # No duplicate words allowed for a single user
    c = Dictionary.all().filter("word =", entry["word"]).\
        filter("twitter_user ="******"Count for word %s is %s" % (entry["word"], c))
    if c == 0:
        new_dict_entry = Dictionary()
        new_dict_entry.pronounce = entry["pronounce"]
        new_dict_entry.twitter_user = twitter_user
        new_dict_entry.message_id = message_id
        new_dict_entry.word = entry["word"]
        new_dict_entry.meaning = entry["meaning"]
        new_dict_entry.served = served
        new_dict_entry.source_lang = entry["source_lang"]
        new_dict_entry.put()
    return new_dict_entry
Example #25
0
    import os.path as osp
    import json

    for ds in config.dataset:
        kwargs = {**config.global_config, **config.dataset[ds]}
        data_root = kwargs.get('data_root')
        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        splits = kwargs.get('splits')
        data = []
        for split in splits + ['train']:
            data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                                 split + '.json')
            with open(data_json, 'r') as f:
                d = json.load(f)
                data.extend(d)

        d = create_dictionary(data, dataset=dataset)
        basedir = os.path.dirname(kwargs['dictionaryfile'].format(dataset))
        if not os.path.exists(basedir):
            os.mkdir(basedir)
        d.dump_to_file(kwargs['dictionaryfile'].format(dataset))
        d = Dictionary.load_from_file(kwargs['dictionaryfile'].format(dataset))
        emb_dim = 300
        glove = 'glove/glove.6B.%dd.txt' % emb_dim
        embedding_basedir = os.path.dirname(kwargs['glove'])
        glove_file = embedding_basedir.format(glove)
        weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
        np.save(
            os.path.join(embedding_basedir.format(ds),
                         'glove6b_init_%dd.npy' % emb_dim), weights)
Example #26
0
    def postMessage(self, user):
        #print "You are %s " % user.twitter
        words = Dictionary.all()\
                        .filter("twitter_user ="******"served < ", user.repeat_times)
            
        dict_entry_list = []
        message = ""
        
        for entry in words:
            dict_entry_list.append(entry)

        #If user has enough his own words to fill all slots for the day
        # If not we need to fill slots with words from people he follows
        if len(dict_entry_list) < user.messages_per_day:
            follow_list = user.i_follow.split(",")
            # for an empty string split() return list with one '' element
            if follow_list == ['']:
                follow_list = []
            # Let's shuffle the list so we get some variety in users
            random.shuffle(follow_list)
            for follow_user in follow_list:
                f_repeat = 0
                for f_user in User.all().filter("twitter =", follow_user):
                    f_repeat = f_user.repeat_times
                    # Getting list of languages user follows
                    follow_lang_list = f_user.follow_lang_list
                    l = []
                    for lang in follow_lang_list.split(","):
                        l.append("'"+lang+"'")
                    lang_str = "(" + ",".join(l) + ")"
                        
                words = Dictionary.all()\
                .filter("twitter_user ="******"served < ", f_repeat)\
                .filter("source_lang IN ", lang_str)
                for entry in words:
                    dict_entry_list.append(entry)
                    #print "Adding %s from %s" % (entry.word, follow_user)
                if len(dict_entry_list) >= user.messages_per_day:
                    break
        #print "You have %d words in your list" % len(dict_entry_list)    

        # If we have any messages to send at all
        if len(dict_entry_list) > 0:
            dict_entry = random.sample(dict_entry_list,1)[0]
            served = dict_entry.served + 1
            if dict_entry.pronounce:
                pronounce = dict_entry.pronounce
            else:
                pronounce = ""
            count = " [%s]" % served
            # If we are posting message from one of the followed_by list
            # need to add (via @username) if total message is less than 140
            # characters
            if dict_entry.twitter_user != user.twitter:
                via = "(via " + dict_entry.twitter_user + ")"
            else:
                via = ""
            
            if user.default_source_lang != dict_entry.source_lang:
                lang = " ("+dict_entry.source_lang+")"
            else:
                lang = ""

            message = dict_entry.word+lang+pronounce+": "+dict_entry.meaning+count

            if len(message+via) < 140:
                message = message + via

            if user.message_type == "reply":
                try:
                    self.twitter.api.PostUpdate("@" +  user.twitter + " " + message,
                    in_reply_to_status_id=dict_entry.message_id)
                    #print "You will be sent word %s %s" % (dict_entry.word, via)
                except TwitterError:
                    logging.error("Twitter error: %s when sending message %s" %
                    (TwitterError.message, "@" +  dict_entry.twitter_user+
                    " " + message))
                        
                # Direct message are no longer user
                #if user.message_type == "direct":
                #    self.twitter.api.PostDirectMessage(dict_entry.twitter_user, message)

            # We do not change served field for word from other users
            if via == "":
                dict_entry.served = dict_entry.served + 1
                dict_entry.put()


        return message
Example #27
0
def create_glove_embedding_init(idx2word, glove_file):
    word2emb = {}
    with open(glove_file, 'r') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = [float(val) for val in  vals[1:]]
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    ds = 'Ourdb'
    d = create_dictionary(ds)
    d.dump_to_file('data/dictionary.pickle')

    d = Dictionary.load_from_file('data/dictionary.pickle')
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)