def __songfiles(self): """Create an array of song objects for this card""" for songpath in self.body.splitlines(): card_root = GlobalConfig.get("paths", "data_root") + "/private" songpath = card_root + "/" + self.config.get( "paths", "songs") + "/" + songpath self.songs.append(MedusaSong(songpath))
def __add_file_to_index(self, fnmtime, filename, ctype="news"): """ Reads in a file, processes it into lines, ElementTree grabs text out of the tags, processes the input to remove banal words and symbols, and then adds it to the index. """ # Enable writing to our chosen index. To limit the index # locking, this is the only function that writes to the index. writer = self.index.writer() card_root = GlobalConfig.get("paths", "data_root") + "/private" card_path = card_root + "/" + self.config.get("paths", ctype) with open(card_path + "/" + filename, 'r', encoding='utf-8') as indexfh: body = "" lines = indexfh.read().splitlines() unrolled = unroll_newlines(lines) body += unrolled[0] body += unrolled[1] for line in unrolled: if line.find('<p') == 0: e = fromstring(escape_amp(line)) for t in e.itertext(): body += t + " " self.__process_input(body, returning="contents") # Update wraps add if the document hasn't been inserted, and # replaces current indexed data if it has been inserted. This # requires the file parameter to be set "unique" in the Schema writer.update_document(file=filename, ctype=ctype, mtime=str(fnmtime), content=self.content) # Finish by commiting the updates writer.commit()
def __random_choice(self): """ If the configuration supports a random theme, and we didn't have a theme provided in the initial state, let's choose one randomly """ seed() # Enable non-seeded choice self.index = randint(0, self.count - 1) self.theme = GlobalConfig.get("themes", str(self.index)) self.random = True
def __choose_theme(self, desired_theme=None): """ Given a valid index or "random", properly set the theme value from one of the numbered-index values in constantina.ini. """ if desired_theme is None and self.theme == "random": self.__random_choice() else: self.index = [ int(x[0]) for x in GlobalConfig.items("themes")[1:] if x[1] == self.theme ][0]
def __init__(self): """ Read the theme settings from a file, and set any defaults. desired_theme is either a numeric index into the array of themes, or 'default' to allow for either default-config or random choice. self.theme is the directory path to the theme. self.index is which Nth value in the config has our theme. self.index and preferences.thm should be the same """ self.index = None self.theme = None self.random = False self.count = len(GlobalConfig.items("themes")) - 1 self.default = GlobalConfig.get("themes", "default") # The index and theme values must be set to something. Otherwise, # if the preferences cookie was malformed, we assume a valid session, # and then set an incorrect preferences cookie theme index. if self.default == "random": self.__random_choice() else: self.theme = self.default self.__choose_theme()
def set(self, desired_theme=None): """ The Global Theme is set during state loading, but we manage the attempted/imported values here, in case we need to deconflict between state-cookie theme settings and preferences theme settings. """ if desired_theme is None: # Choose the plain default value self.theme = self.default elif desired_theme == -1: # Random choice selected from the menu self.__random_choice() else: # Choose based on user input, mod'ing to the number of themes # if the user input was some out-of-range number self.index = int(desired_theme) % self.count self.theme = GlobalConfig.get("themes", str(self.index)) self.random = False # We either have index or "random" now. Make a final choice self.__choose_theme(desired_theme)
def __add_ctype_to_index(self, ctype): """Take a file type, list all the files there, and add all the body contents to the index.""" # Make sure BaseFiles is populated opendir(self.config, ctype) card_root = GlobalConfig.get("paths", "data_root") + "/private" card_path = card_root + "/" + self.config.get("paths", ctype) for filename in BaseFiles[ctype]: try: fnmtime = int(os.path.getmtime(card_path + "/" + filename)) except: return # File has been removed, nothing to index lastmtime = '' try: lastmtime = int(float(self.searcher.document(file=filename)['mtime'])) except: lastmtime = 0 # If small revisions were made after the fact, the indexes won't # be accurate unless we reindex this file now if lastmtime < fnmtime: self.__add_file_to_index(fnmtime, filename, ctype)
def __init__(self, state): self.config = state.medusa.config # Upper limit on the permitted number of searchable items. # Since we use this as an array slice, add one to support N-1 elements self.max_query_count = GlobalConfig.getint("miscellaneous", "max_state_parameters") + 1 # List of symbols to filter out in the unsafe input self.ignore_symbols = [] # Regex of words that won't be indexed self.ignore_words = '' # After processing unsafe_query or unsafe_filter, save it in the object # Assume we're searching for all terms, unless separated by pluses self.query_string = '' self.filter_string = '' # The contents of the last file we read self.content = '' # Notes on what was searched for. This will either be an error # message, or provide context on the search results shown. # Array of ctypes, each with an array of filename hits self.hits = {} # Whoosh object defaults self.schema = '' self.index = '' self.query = '' self.parser = '' self.searcher = '' self.results = '' # Max search results per page is equal to the number of cards that would # be shown on a normal news page. And while whoosh expects pages starting # at one, the page state counting will be from zero self.page = state.page + 1 self.resultcount = state.max_items self.filtered = state.filtered # File paths for loading things card_root = GlobalConfig.get("paths", "data_root") + "/private" self.index_dir = card_root + "/" + self.config.get('search', 'index_dir') self.words_file = card_root + "/" + self.config.get('search', 'ignore_words') self.symobls_file = card_root + "/" + self.config.get('search', 'ignore_symbols') self.search_types = self.config.get("card_properties", "search").replace(" ", "").split(",") unsafe_query_terms = state.medusa.search unsafe_filter_terms = state.medusa.card_filter # Support for Japanese text indexing self.tk = TinySegmenterTokenizer(tinysegmenter.TinySegmenter()) # Define the indexing schema. Include the mtime to track updated # content in the backend, ctype so that we can manage the distribution # of returned search results similar to the normal pages, and the # filename itself as a unique identifier (most filenames are utimes). self.schema = Schema(file=ID(stored=True, unique=True, sortable=True), ctype=ID(stored=True), mtime=ID(stored=True), content=TEXT(analyzer=self.tk)) # If index doesn't exist, create it if index.exists_in(self.index_dir): self.index = index.open_dir(self.index_dir) # syslog.syslog("Index exists") else: self.index = index.create_in(self.index_dir, schema=self.schema) # syslog.syslog("Index not found -- creating one") # Prepare for query searching (mtime update, search strings) self.searcher = self.index.searcher() for ctype in self.search_types: # Prior to processing input, prepare the results arrays. # Other functions will expect this to exist regardless. self.hits[ctype] = [] # Process the filter strings first, in case that's all we have if unsafe_filter_terms is not None: self.__process_input(' '.join(unsafe_filter_terms[0:self.max_query_count]), returning="filter") # Double check if the query terms exist or not if unsafe_query_terms is None: if self.filter_string != '': self.__filter_cardtypes() self.searcher.close() return else: self.searcher.close() return # If the query string is null after processing, don't do anything else. # Feed our input as a space-delimited set of terms. NOTE that we limit # this in the __import_state function in MedusaState. if not self.__process_input(' '.join(unsafe_query_terms[0:self.max_query_count])): if self.filter_string != '': self.__filter_cardtypes() self.searcher.close() return else: self.searcher.close() return for ctype in self.search_types: # Now we have good safe input, but we don't know if our index is # up-to-date or not. If have changed since their last-modified date, # reindex all the modified files self.__add_ctype_to_index(ctype) # Return only up to CARD_COUNT items per page for each type of returned # search result query. We calculate the max sum of all returned items, # and then we'll later determine which of these results we'll display # on the returned search results page. self.__search_index() self.searcher.close()
def __interpretfile(self, thisfile): """File opening heuristics. First, assume that files in each folder are indicative of their relative type. Images are in the image folder, for instance. Secondly, assume that non-media folders follow the "news entity" format of title-line, keywords-line, and then body. Prove these heuristics with a Python file-type check. Anything that doesn't pass muster returns "wrongtype". """ magi = magic.Magic(mime=True) card_root = GlobalConfig.get("paths", "data_root") + "/private" base_path = card_root + "/" + self.config.get("paths", self.ctype) fpath = base_path + "/" + thisfile if self.hidden is True: fpath = base_path + "/hidden/" + thisfile try: with open(fpath, 'r', encoding='utf-8') as cfile: ftype = magi.from_file(fpath) # News entries or features are processed the same way if (("text" in ftype) and ((self.config.get("paths", "news") in cfile.name) or (self.config.get("paths", "heading") in cfile.name) or (self.config.get("paths", "quotes") in cfile.name) or (self.config.get("paths", "topics") in cfile.name) or (self.config.get("paths", "features") in cfile.name))): self.title = cfile.readline().replace("\n", "") rawtopics = cfile.readline().replace("\n", "") for item in rawtopics.split(', '): self.topics.append(item) self.body = cfile.read() # Multiple-song playlists if (("text" in ftype) and (self.config.get("paths", "songs") in cfile.name)): self.title = fpath self.topics.append("Song Playlist") self.body = cfile.read() self.__songfiles() # Read song metadata # Single-image cards if ((("jpeg" in ftype) or ("png" in ftype)) and (self.config.get("paths", "images") in cfile.name)): # TODO: alt/img metadata self.title = fpath self.topics.append("Images") self.body = fpath # Single-song orphan cards if ((("mpeg" in ftype) and ("layer iii" in ftype)) and (self.config.get("paths", "songs") in cfile.name)): self.title = fpath # TODO: filename from title self.topics.append("Songs") # TODO: include the album self.body = fpath self.__songfiles() # Read song metadata # If the filename is in unix-time format, track the creation date if thisfile.isdigit(): if int(thisfile) > 1141161200: self.cdate = datetime.fromtimestamp( int(thisfile)).strftime("%B %-d, %Y") else: fnmtime = os.path.getmtime(fpath) self.cdate = datetime.fromtimestamp( int(fnmtime)).strftime("%B %-d, %Y") except IOError: # File got moved in between dirlist caching and us reading it self.topics = [ ] # Makes the card go away if we had an error reading content return self.config.get("card_defaults", "file") if self.hidden is True: return self.config.get("paths", self.ctype) + "/hidden/" + thisfile else: return self.config.get("paths", self.ctype) + "/" + thisfile