def _cache_related(self, cls, query): """ database cache """ for _cls in cls: grammaticalclass = models.GrammaticalClass(name=_cls["name"], translation=query) grammaticalclass.save() for name in _cls['words']: name = name.strip() word = models.Word(name=name, grammaticalClass=grammaticalclass) word.save() for name in _cls["details"][name]: name = name.strip() obj, created = models.ReverseWord.objects.get_or_create(name=name) word.reversewords.add(obj)
def import_dataset(): from app import test_dataset for user in test_dataset.datasets: user_model = models.User(social_id=user['social_id'], nickname=user['nickname'], email=user['email']) db.session.add(user_model) db.session.commit() for word in user['words']: word_model = models.Word(name=word['name'], explain=word['explain'], example=word['example'], created_at=word['created_at'], updated_at=word['updated_at'], user_id=user_model.id) db.session.add(word_model) db.session.commit() print 'Import ends'
def upload_file(book_id): """ A simple GUI-based upload form to simplify testing, which allows to upload the abstract. """ if request.method == 'POST': min_letters = int(request.form.get('letters', '3')) # check if the post request has the file part if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also submits an empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): try: filename = secure_filename(file.filename) path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(path) user, catalogue = getUserAndCatalogue() existing = models.Vocabulary.query.filter_by( catalogueId=catalogue.id, bookId=book_id).first() if existing is not None: # Clean up existing vocabulary in case it already exists db.session.delete(existing) db.session.commit() # Create a new vocabulary to hold the data vocabulary = models.Vocabulary(catalogueId=catalogue.id, bookId=book_id, letterLimit=min_letters) db.session.add(vocabulary) book = models.Book.query.filter_by(id=book_id).first() keywords = process_abstract(path, min_letters) os.remove(path) for keyword in keywords: word = models.Word.query.filter_by(language=book.language, text=keyword).first() if word is None: word = models.Word(text=keyword, language=book.language) db.session.add(word) usage = models.WordUsage(vocabularyId=vocabulary.id, word=word, creation=datetime.datetime.now()) db.session.add(usage) db.session.commit() db.session.refresh(vocabulary) json_obj = add_service_info_to_json( code='OK', state='Vocabulary created successfully', json_obj=vocabulary.json()) except IOError as ioerr: json_obj = add_service_info_to_json(code='Error', state=str(ioerr)) except exc.OperationalError as err: json_obj = add_service_info_to_json(code='Error', state=str(err)) return json.dumps(json_obj, sort_keys=True) return app.config['FORM_DATA']
def vocab_analysis(comments, username): vocab = {} newWords = [] uniqueWords = [] commentWords = {} databaseWords = (db.session.execute( "SELECT SUM(totalUse) AS wordSum FROM words").first()).wordSum if databaseWords is None: databaseWords = 0 newUser = True if models.Username.query.get(username) is not None: newUser = False if debug: print "old user" else: if debug: print "new user" Username = models.Username(Username=username) db.session.add(Username) for comment in comments: commentID = comment.id.encode('ascii', 'ignore') if debug: print "commentID: " + commentID text = comment.body if debug: print "Text: " + repr(text) noPunctText = translate_non_alphanumerics(text) words = noPunctText.encode('ascii', 'ignore').lower().split() if models.Comment.query.get(commentID) is None: '''processes all the words from new comments, new words are added to the list''' if debug: print "New comment" for word in words: if len(word) < 20: if debug: print "added " + word + " to new comments words" commentWords[word] = commentWords.get(word, 0) + 1 db.session.add(models.Comment(comment=commentID)) else: if debug: print "Old comment" for word in words: '''puts all appropriate words into the vocab dict''' if len(word) < 20: '''Consider parsing for links etc''' if debug: print "added " + word + " to vocab list" vocab[word] = vocab.get(word, 0) + 1 for word in commentWords: if db.session.query( models.Word).filter(models.Word.word == word).count() == 0: if debug: print "new word: " + word + "!" newWordEntry = models.Word(word=word, totalUse=commentWords[word]) db.session.add(newWordEntry) newWords.append(word) else: if debug: print "old word: " + word vocab[word] = vocab.get(word, 0) + 1 db.session.query( models.Word).filter(models.Word.word == word).update( {models.Word.totalUse: models.Word.totalUse + 1}) for word in vocab: otherUses = (models.Word.query.get(word).totalUse) if otherUses == vocab[word]: uniqueWords.append(word) else: vocab[word] = float(vocab[word]) / float(otherUses) for word in uniqueWords: del vocab[word] db.session.commit() if debug: print "newWords: " + str(newWords) if debug: print vocab return dict( sorted(vocab.iteritems(), key=operator.itemgetter(1), reverse=True)[:20]), newWords, uniqueWords
if not os.path.exists(SQLALCHEMY_MIGRATE_REPO): api.create(SQLALCHEMY_MIGRATE_REPO, 'database repository') api.version_control(SQLALCHEMY_DATABASE_URI, SQLALCHEMY_MIGRATE_REPO) app.logger.info("Creating database... {0}".format(SQLALCHEMY_DATABASE_URI)) app.logger.info("Importing word lists...") app.logger.info("Downloading wordlist from {0}".format(WORD_FILE_URL)) c = 0 d = FileDownloader(WORD_FILE_URL) d.DownloadUrl(WORD_FILE) app.logger.info("Wordlist downloaded and saved to {0}".format(WORD_FILE)) if os.path.isfile(WORD_FILE): with open(WORD_FILE) as f: for line in f: c += 1 line = line.strip() w = models.Word(word=line, wordlength=len(line), sortedword=''.join(sorted(line))) db.session.add(w) db.session.commit() app.logger.info("Completed.") app.logger.info("%d words added." % (c)) else: app.logger.info( "World file not found '{0}'. Could not create word list.".format( WORD_FILE)) else: api.version_control(SQLALCHEMY_DATABASE_URI, SQLALCHEMY_MIGRATE_REPO, api.version(SQLALCHEMY_MIGRATE_REPO))
import codecs import re f = open('chinese-dictionary.txt','r') i =0 create_engine('sqlite:///app.db?encoding=Unicode') for line in f.readlines(): try: completecompetition = line.split(" ",2) traditional = completecompetition[0] simplified = completecompetition[1] pinyin = re.findall(r'\[([^]]*)\]', completecompetition[2]) english = re.findall(r'\/([^]]*)\/', completecompetition[2]) i+=1 if i % 1000 == 0: print float(i / 110255.0) if not len(english) == 0: new_word = models.Word(traditional=unicode(traditional, "utf-8"),simplified=unicode(simplified,'utf-8'),pinyin=unicode(pinyin[0],'utf-8'),english=unicode(english[0],'utf-8')) db.session.add(new_word) db.session.commit() except: pass #To query everything in the Word table #word=models.Word.query.all() #print word f.close()