def add_word(word): from text_indexer.orm.base import session db_word = session.query(Word).filter_by(word=word).first() if not db_word: db_word = Word(word) session.add(db_word) session.commit() return db_word
def get_groups(name="", type=""): from text_indexer.orm.base import session groups = session.query(Group) if name: groups = groups.filter_by(name=name) if type: groups = groups.filter_by(type=type) return groups.all()
def import_file(self, name, writer, performer, path): """ @param name: The name of the song @param writer: The song writer. @param path: The path of the song. """ db_song = session.query(Song).filter_by(name=name).first() if not db_song: db_song = Song(name=name, writer=writer, performer=performer) session.add(db_song) self.load_file_into_song(path, db_song)
def get_stanza(self, stanza_number): """ This method returns a stanza from this song. @param stanza_number: The number of the stanza to return. """ from text_indexer.orm.base import session from text_indexer.orm.word_position import WordPosition wps = session.query(WordPosition).filter_by(song_id=self.id, stanza_number=stanza_number).order_by(WordPosition.stanza_line_number,WordPosition.row_word_number).all() stanza_line_number=1 stanza = '' for w in wps: if w.stanza_line_number > stanza_line_number: stanza+='\n' stanza_line_number+=1 stanza+=w.word.word + ' ' return stanza
def __init__(self, parent): from text_indexer.orm.base import session from text_indexer.orm.word_position import WordPosition wx.Panel.__init__(self, parent, -1) self.grid = wx.grid.Grid(self, -1, (100, 50), (1220, 400)) self.wps = session.query(WordPosition).order_by(WordPosition.song_id, WordPosition.line_number,WordPosition.row_word_number).all() self.grid.CreateGrid(len(self.wps), 8) self.grid.SetColSize(0, 130) self.grid.SetColSize(1, 130) self.grid.SetColSize(2, 130) self.grid.SetColSize(3, 130) self.grid.SetColSize(4, 130) self.grid.SetColSize(5, 130) self.grid.SetColSize(6, 130) self.grid.SetColSize(7, 130) self.grid.SetColLabelValue(0, "Song") self.grid.SetColLabelValue(1, "Words") self.grid.SetColLabelValue(2, "Number in song") self.grid.SetColLabelValue(3, "Paragraph") self.grid.SetColLabelValue(4, "Number in paragraph") self.grid.SetColLabelValue(5, "Line number") self.grid.SetColLabelValue(6, "number in line") self.grid.SetColLabelValue(7, "word occurrence") group_text = wx.StaticText(self, -1, "Select Group", (100, 480)) self.groups_list = [g.name for g in Group.get_groups(type='group')] self.select_group = wx.ComboBox(self, 500, "", (100, 500), (160, -1), self.groups_list, wx.CB_DROPDOWN) self.group_button = wx.Button(self, -1, "Show Group Index", (320, 500)) self.Bind(wx.EVT_BUTTON, self.groupChosen, self.group_button) self.refresh_button = wx.Button(self, -1, "Refresh", (500, 500)) self.Bind(wx.EVT_BUTTON, self.refresh, self.refresh_button) self.show_words()
def get_songs(name="", writer="", performer="", word=""): """ This method returns the songs from the db. @param name: The name of the song @param writer: The writer of the song @param word: A word that is in the song. """ from text_indexer.orm.base import session songs = session.query(Song) if name: songs = songs.filter_by(name=name) if writer: songs = songs.filter_by(writer=writer) if performer: songs = songs.filter_by(performer=performer) if word: songs = songs.join(Song.words).filter_by(word=word) return songs.all()
def phraseChosen(self, evt): from text_indexer.orm.base import session text = self.t3.GetValue() start,end = self.t3.GetSelection() selected = text[start:end] words = selected.replace('\n',' ').strip().replace(' ', ' ').split(' ') matches = set() number_of_words = len(words) word = session.query(Word).filter_by(word=words[0])[0] wps = word.word_positions for w in wps: i = 1 wp = w.get_next_word() while i < number_of_words and wp.word.word == words[i]: i+=1 wp = wp.get_next_word() if i == number_of_words: matches.add(w) wps = set() text = '' songs = [self.lb1.Items[song_selection] for song_selection in self.lb1.Selections] for song in songs: added_song_name = False for wp in matches: if wp.song.name == song: if not added_song_name: text+= song + ':\n\n' added_song_name = True if (wp.song.id, wp.stanza_number) not in wps: wps.add((wp.song.id, wp.stanza_number)) text+= wp.song.get_stanza(wp.stanza_number) text+= '\n\n\n' # # self.t3.SetValue(str(words)) self.t3.SetValue(text) self.t3.SetScrollPos(1,1) for m in re.finditer(" " + selected, text): self.t3.SetStyle(m.start()+1, m.end(), wx.TextAttr("RED", "YELLOW")) for m in re.finditer("\n" + word.word, text ): self.t3.SetStyle(m.start()+1, m.end(), wx.TextAttr("RED", "YELLOW"))
def _create_word_in_DB( self, word, db_song, stanza_number, row_word_number, line_number, stanza_line_number, number_in_song ): db_word = session.query(Word).filter_by(word=word).first() if not db_word: if word in self.words_to_add.keys(): db_word = self.words_to_add[word] else: self.words_to_add[word] = Word(word=word) word_position = WordPosition( word=db_word, song=db_song, row_word_number=row_word_number, stanza_number=stanza_number, line_number=line_number, stanza_line_number=stanza_line_number, number_in_song=number_in_song, ) self.word_positions.append(word_position)
def get_text(self): """ This method returns the text of the song. """ from text_indexer.orm.base import session from text_indexer.orm.word_position import WordPosition wps = session.query(WordPosition).filter_by(song_id=self.id).order_by(WordPosition.line_number,WordPosition.row_word_number).all() song = '' stanza_number=1 stanza_line_number=1 for w in wps: if w.stanza_number > stanza_number: stanza_number+=1 song+='\n\n' stanza_line_number=1 if w.stanza_line_number > stanza_line_number: song+='\n' stanza_line_number+=1 song+=w.word.word + ' ' return song
def onSearch(self, evt): try: from text_indexer.orm.base import session song_name = self.lb1.Items[self.lb1.GetSelection()] song = Song.get_songs(name=song_name)[0] query = session.query(WordPosition).filter_by(song_id=song.id) if self.radio_selected == self.radio1: query = query.filter_by(number_in_song=self.text1.Value) wp = query.first() self.word_text.Value = wp.word.word elif self.radio_selected == self.radio2: query = query.filter_by(line_number=self.text2_1.Value, row_word_number=self.text2_2.Value) wp = query.first() self.word_text.Value = wp.word.word elif self.radio_selected == self.radio3: stanza = song.get_stanza(int(self.text3_1.Value) - 1) self.word_text.Value = stanza.split(" ")[int(self.text3_2.Value) - 1] if not self.word_text.Value: self.word_text.Value = "No word found" except Exception, e: self.word_text.Value = "No word found"
def add_expression(expression): from text_indexer.orm.word import Word from text_indexer.orm.base import session db_words = [] for word in expression.split(' '): db_word = session.query(Word).filter_by(word=word).first() if not db_word: db_word = Word(word=word) session.add(db_word) db_words.append(db_word) session.commit() db_expression = Expression(name=expression) session.add(db_expression) session.commit() number=1 for db_word in db_words: word_expression = WordExpressionAssocaition(word_id=db_word.id, expression_id=db_expression.id, place=number) session.add(word_expression) number+=1 session.commit() return db_expression
def get_next_word(self): from text_indexer.orm.base import session return session.query(WordPosition).filter_by(song_id=self.song_id, number_in_song=self.number_in_song+1).first()
def get_words(word=""): from text_indexer.orm.base import session words = session.query(Word).order_by(Word.word) if word: words = words.filter_by(word=word) return words.all()
for word_position in self.word_positions: session.add(word_position) session.commit() def _create_word_in_DB( self, word, db_song, stanza_number, row_word_number, line_number, stanza_line_number, number_in_song ): db_word = session.query(Word).filter_by(word=word).first() if not db_word: if word in self.words_to_add.keys(): db_word = self.words_to_add[word] else: self.words_to_add[word] = Word(word=word) word_position = WordPosition( word=db_word, song=db_song, row_word_number=row_word_number, stanza_number=stanza_number, line_number=line_number, stanza_line_number=stanza_line_number, number_in_song=number_in_song, ) self.word_positions.append(word_position) if __name__ == "__main__": # FileImporter().import_file("Call Me Maybe", "Carly Rae Jepsen", r"C:\text\text_indexer\songs\call_me_maybe.txt") song = session.query(Song).first() song.get_stanza(2)
def __init__(self, parent): wx.Panel.__init__(self, parent, -1) text = "Statistics" text = wx.StaticText(self, -1, text, (600, 50)) font = wx.Font(24, wx.SWISS, wx.NORMAL, wx.NORMAL, underline=True) text.SetFont(font) self.grid = wx.grid.Grid(self, -1, (200, 150), (900, 140)) self.grid.CreateGrid(2, 4) self.grid.SetColSize(0, 200) self.grid.SetColSize(1, 200) self.grid.SetColSize(2, 200) self.grid.SetColSize(3, 200) self.grid.SetColLabelValue(0, "Word") self.grid.SetColLabelValue(1, "Line") self.grid.SetColLabelValue(2, "Paragraph") self.grid.SetColLabelValue(3, "Song") self.grid.SetRowSize(0, 50) self.grid.SetRowSize(1, 50) self.grid.SetRowLabelValue(0, "Chars") self.grid.SetRowLabelValue(1, "Words") from text_indexer.orm.base import session number_of_words = 0 number_of_chars = 0 number_of_lines = 0 number_of_paragraphs = 0 line=0 stanza=0 song_id=0 for wp in session.query(WordPosition).order_by(WordPosition.song_id, WordPosition.line_number).all(): number_of_words+=1 number_of_chars+=len(wp.word.word) if wp.song_id != song_id: line = wp.line_number stanza = wp.stanza_number song_id = wp.song_id number_of_lines+=1 number_of_paragraphs+=1 elif wp.stanza_number != stanza: stanza = wp.stanza_number line= wp.line_number number_of_lines+=1 number_of_paragraphs+=1 elif wp.line_number != line: number_of_lines+=1 chars_per_word = float(number_of_chars)/number_of_words self.grid.SetCellValue(0, 0, str(chars_per_word)) self.grid.SetCellValue(0, 1, str(chars_per_word*number_of_words/number_of_lines)) self.grid.SetCellValue(0, 2, str(chars_per_word*number_of_words/number_of_paragraphs)) self.grid.SetCellValue(0, 3, str(number_of_words * chars_per_word/len(Song.get_songs()))) self.grid.SetCellValue(1, 0, '1') self.grid.SetCellValue(1, 1, str(float(number_of_words)/number_of_lines)) self.grid.SetCellValue(1, 2, str(float(number_of_words)/number_of_paragraphs)) self.grid.SetCellValue(1, 3, str(float(number_of_words)/len(Song.get_songs())))
def get_expressions(expression=""): from text_indexer.orm.base import session expressions = session.query(Expression) if expression: expressions = expressions.filter_by(name=expression) return expressions.all()