def translate_path(self, opath): path = urllib.unquote(opath) # pathprefix is something like /uprcl path = path.replace(self.uprclpathprefix, '', 1) q = urlparse.urlparse(path) path = q.path embedded = False pq = urlparse.parse_qs(q.query) if 'embed' in pq: embedded = True for fsp, htp in self.uprclpathmap.iteritems(): if path.startswith(fsp): path = path.replace(fsp, htp, 1) if embedded: # Embedded image urls have had a .jpg or .png # appended. Remove it to restore the track path # name. i = path.rfind('.') path = path[:i] return path, embedded # Security feature here: never allow access to anything not in # the path map uplog("HTTP: translate_path: %s not found in path map" % opath) return None, None
def getindextags(self): indextags = [] sit = self.conf.get("minimserver.indexTags") uplog("Minim:getindextags:in: [%s]" % sit) if sit: indextags = self.minimsplitsplit(sit) uplog("Minim:getindextags:out: %s" % indextags) return indextags
def getitemtags(self): itemtags = [] sit = self.conf.get("minimserver.itemTags") uplog("Minim:getitemtags:in: [%s]" % sit) if sit: itemtags = [i.strip() for i in sit.split(',')] uplog("Minim:getitemtags:out: %s" % itemtags) return itemtags
def translate_path(self, opath): path = urllib.unquote(opath) path = path.replace(self.uprclpathprefix, '', 1) for fsp,htp in self.uprclpathmap.iteritems(): if path.startswith(fsp): return path.replace(fsp, htp, 1) uplog("HTTP: translate_path: %s not found in path map" % opath) return None
def getexcludepatterns(self): spats = self.conf.get("minimserver.excludePattern") if spats: lpats = conftree.stringToStrings(spats, quotes=self.quotes, escape=self.escape, whitespace=self.whitespace) spats = conftree.stringsToString(lpats) uplog("skippedNames from Minim excludePattern: %s" % spats) return spats
def __init__(self, fn=''): if fn: self.conf = conftree.ConfSimple(fn) uplog("Minim config read: contentDir: %s" % self.conf.get("minimserver.contentDir")) else: self.conf = conftree.ConfSimple('/dev/null') self.quotes = "\"'" self.escape = '' self.whitespace = ', '
def _initconfdir(confdir, topdirs): if os.path.exists(confdir): raise Exception("_initconfdir: exists already: %s" % confdir) os.mkdir(confdir) datadir = os.path.dirname(__file__) uplog("datadir: %s" % datadir) shutil.copyfile(os.path.join(datadir, "rclconfig-fields"), os.path.join(confdir, "fields")) f = open(os.path.join(confdir, "recoll.conf"), "w") f.write("topdirs=%s\n" % topdirs) f.write("idxabsmlen=0\n") f.write("loglevel=2\n") f.write("noaspell=1\n") f.close()
def gettagaliases(self): aliases = [] saliases = self.conf.get("minimserver.aliasTags") uplog("Minim:gettagaliases:in: [%s]" % saliases) lst = self.minimsplitsplit(saliases) for orig, target in lst: orig = orig.lower() target = target.lower() rep = True if target[0] == '-'[0]: rep = False target = target[1:] aliases.append((orig, target, rep)) uplog("Minim:gettagaliases:out: %s" % aliases) return aliases
def browse(pid, flag, httphp, pathprefix): diridx = _objidtodiridx(pid) # If there is only one entry in root, skip it. This means that 0 # and 1 point to the same dir, but this does not seem to be an # issue if diridx == 0 and len(dirvec[0]) == 2: diridx = 1 entries = [] # The basename call is just for diridx==0 (topdirs). Remove it if # this proves a performance issue for nm, ids in _dirvec[diridx].iteritems(): if nm == "..": continue thisdiridx = ids[0] thisdocidx = ids[1] if thisdocidx >= 0: doc = g_alldocs[thisdocidx] else: uplog("No doc for %s" % pid) doc = None if thisdiridx >= 0: # Skip empty directories if len(dirvec[thisdiridx]) == 1: continue id = _foldersIdPfx + '$' + 'd' + str(thisdiridx) if doc and doc.albumarturi: arturi = doc.albumarturi else: arturi = _arturifordir(thisdiridx) entries.append( rcldirentry(id, pid, os.path.basename(nm), arturi=arturi)) else: # Not a directory. docidx had better been set if thisdocidx == -1: uplog("folders:docidx -1 for non-dir entry %s" % nm) continue doc = g_alldocs[thisdocidx] id = _foldersIdPfx + '$i' + str(thisdocidx) e = rcldoctoentry(id, pid, httphp, pathprefix, doc) if e: entries.append(e) return sorted(entries, cmp=cmpentries)
def _appendterms(out, v, field, oper): uplog("_appendterms: v %s field <%s> oper <%s>" % (v, field, oper)) swords = "" phrases = [] for w in v: if len(w.split()) == 1: if swords: swords += "," swords += w else: phrases.append(w) out.append(swords) for ph in phrases: out.append(field) out.append(oper) out.append('"' + ph + '"')
def _parsestring(s, i=0): uplog("parseString: input: <%s>" % s[i:]) # First change '''"hello \"one phrase\"''' world" into # '''hello "one phrase" world''' # Note that we can't handle quoted dquotes inside string str = '' escape = False instring = False for j in range(i, len(s)): if instring: if escape: if s[j] == '"': str += '"' instring = False else: str += '\\' + s[j] escape = False else: if s[j] == '\\': escape = True else: str += s[j] else: if escape: str += s[j] escape = False if s[j] == '"': instring = True else: if s[j] == '\\': escape = True elif s[j] == '"': j += 2 break else: str += s[j] tokens = stringToStrings(str) return j, tokens
def _fetchalldocs(confdir): start = timer() allthedocs = [] rcldb = recoll.connect(confdir=confdir) rclq = rcldb.query() rclq.execute("mime:*", stemming=0) uplog("Estimated alldocs query results: %d" % (rclq.rowcount)) totcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: allthedocs.append(doc) totcnt += 1 if (_maxrclcnt > 0 and totcnt >= _maxrclcnt) or \ len(docs) != rclq.arraysize: break time.sleep(0) end = timer() uplog("Retrieved %d docs in %.2f Seconds" % (totcnt, end - start)) return allthedocs
def _rootentries(): # Build up root directory. This is our top internal structure. We # let the different modules return their stuff, and we take note # of the objid prefixes for later dispatching entries = [] nents = uprcltags.rootentries(g_myprefix) for e in nents: rootmap[e['id']] = 'tags' entries += nents nents = uprcluntagged.rootentries(g_myprefix) for e in nents: rootmap[e['id']] = 'untagged' entries += nents nents = uprclfolders.rootentries(g_myprefix) for e in nents: rootmap[e['id']] = 'folders' entries += nents uplog("Browse root: rootmap now %s" % rootmap) return entries
def handle_error(self, request, client_address): # Actually, we generally don't care about errors... return uplog('-' * 40) uplog('Exception happened during processing of request from %s' % str(client_address)) import traceback traceback.print_exc() # XXX But this goes to stderr! (jf: yep :) uplog('-' * 40)
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix): rcls = upnpsearchtorecoll(upnps) filterdir = uprclfolders.dirpath(objid) if filterdir and filterdir != "/": rcls += " dir:\"" + filterdir + "\"" uplog("Search: recoll search: <%s>" % rcls) rcldb = recoll.connect(confdir=rclconfdir) try: rclq = rcldb.query() rclq.execute(rcls) except Exception as e: uplog("Search: recoll query raised: %s" % e) return [] uplog("Estimated query results: %d" % (rclq.rowcount)) if rclq.rowcount == 0: return [] entries = [] maxcnt = 0 while True: docs = rclq.fetchmany() for doc in docs: id = idprefix + '$' + 'seeyoulater' e = rcldoctoentry(id, objid, httphp, pathprefix, doc) if e: entries.append(e) if (maxcnt > 0 and len(entries) >= maxcnt) or \ len(docs) != rclq.arraysize: break uplog("Search retrieved %d docs" % (len(entries), )) return sorted(entries, cmp=cmpentries)
def _prepareTags(): global g_tagdisplaytag global g_tagtotable global g_indextags indextagsp = uprclinit.g_minimconfig.getindextags() itemtags = uprclinit.g_minimconfig.getitemtags() if not indextagsp: indextagsp = [('Artist', ''), ('Date', ''), ('Genre', ''), ('Composer', '')] # Compute the list of index tags and the for v, d in indextagsp: if v.lower() == 'none': g_indextags = [] g_tagdisplaytag = {} break g_indextags.append(v) g_tagdisplaytag[v] = d if d else v uplog("prepareTags: g_indextags: %s g_tagdisplaytag %s" % (g_indextags, g_tagdisplaytag)) # Compute an array of (table name, recoll field) # translations. Most often they are identical. This also # determines what fields we create tables for. tabtorclfield = [] for nm in g_indextags: tb = _alltagtotable[nm] if not tb: continue g_tagtotable[nm] = tb rclfld = _coltorclfield[tb] if tb in _coltorclfield else tb uplog("recolltosql: using rclfield [%s] for sqlcol [%s]" % (rclfld, tb)) tabtorclfield.append((tb, rclfld)) for nm in itemtags: tb = _alltagtotable[nm] if not tb: continue rclfld = _coltorclfield[tb] if tb in _coltorclfield else tb uplog("recolltosql: using rclfield [%s] for sqlcol [%s]" % (rclfld, tb)) tabtorclfield.append((tb, rclfld)) return tabtorclfield
def recolltosql(conn, rcldocs): start = timer() _createsqdb(conn) tabtorclfield = _prepareTags() maxcnt = 0 totcnt = 0 c = conn.cursor() for docidx in range(len(rcldocs)): doc = rcldocs[docidx] totcnt += 1 if totcnt % 1000 == 0: time.sleep(0) # No need to include non-audio or non-tagged types if doc.mtype not in audiomtypes or doc.mtype == 'inode/directory' \ or doc.mtype == 'audio/x-mpegurl': continue # Do the artist apart from the other attrs, as we need the # value for album creation. if doc.albumartist: trackartid = _auxtableinsert(conn, 'artist', doc.albumartist) elif doc.artist: trackartid = _auxtableinsert(conn, 'artist', doc.artist) else: trackartid = None album_id, albartist_id = _maybecreatealbum(conn, doc, trackartid) trackno = _tracknofordoc(doc) if doc.url.find('file://') == 0: path = doc.url[7:] else: path = '' # Set base values for column names, values list, placeholders columns = [ 'docidx', 'album_id', 'trackno', 'title', 'path', 'artist_id' ] values = [docidx, album_id, trackno, doc.title, path, trackartid] placehold = ['?', '?', '?', '?', '?', '?'] # Append data for each auxiliary table if the doc has a value # for the corresponding field (else let SQL set a dflt/null value) for tb, rclfld in tabtorclfield: if tb == 'artist': # already done continue value = getattr(doc, rclfld, None) if not value: continue rowid = _auxtableinsert(conn, tb, value) columns.append(_clid(tb)) values.append(rowid) placehold.append('?') # Create the main record in the tracks table. stmt='INSERT INTO tracks(' + ','.join(columns) + \ ') VALUES(' + ','.join(placehold) + ')' c.execute(stmt, values) #uplog(doc.title) ## End Big doc loop _setalbumartists(conn) _createmergedalbums(conn) conn.commit() end = timer() uplog("recolltosql: processed %d docs in %.2f Seconds" % (totcnt, end - start))
def _uprcl_init_worker(): global httphp, pathprefix, rclconfdir, g_rcldocs # pathprefix would typically be something like "/uprcl". It's used # for dispatching URLs to the right plugin for processing. We # strip it whenever we need a real file path if "UPMPD_PATHPREFIX" not in os.environ: raise Exception("No UPMPD_PATHPREFIX in environment") pathprefix = os.environ["UPMPD_PATHPREFIX"] if "UPMPD_CONFIG" not in os.environ: raise Exception("No UPMPD_CONFIG in environment") upconfig = conftree.ConfSimple(os.environ["UPMPD_CONFIG"]) httphp = upconfig.get("uprclhostport") if httphp is None: ip = findmyip() httphp = ip + ":" + "9090" uplog("uprclhostport not in config, using %s" % httphp) rclconfdir = upconfig.get("uprclconfdir") if rclconfdir is None: uplog("uprclconfdir not in config, using /var/cache/upmpdcli/uprcl") rclconfdir = "/var/cache/upmpdcli/uprcl" rcltopdirs = upconfig.get("uprclmediadirs") if rcltopdirs is None: raise Exception("uprclmediadirs not in config") pthstr = upconfig.get("uprclpaths") if pthstr is None: uplog("uprclpaths not in config") pthlist = stringToStrings(rcltopdirs) pthstr = "" for p in pthlist: pthstr += p + ":" + p + "," pthstr = pthstr.rstrip(",") uplog("Path translation: pthstr: %s" % pthstr) lpth = pthstr.split(',') pathmap = {} for ptt in lpth: l = ptt.split(':') pathmap[l[0]] = l[1] # Update or create index. uplog("Creating updating index in %s for %s" % (rclconfdir, rcltopdirs)) start = timer() uprclindex.runindexer(rclconfdir, rcltopdirs) # Wait for indexer while not uprclindex.indexerdone(): time.sleep(.5) fin = timer() uplog("Indexing took %.2f Seconds" % (fin - start)) g_rcldocs = uprclfolders.inittree(rclconfdir, httphp, pathprefix) uprcltags.recolltosql(g_rcldocs) uprcluntagged.recoll2untagged(g_rcldocs) host, port = httphp.split(':') if True: # Running the server as a thread. We get into trouble because # something somewhere writes to stdout a bunch of --------. # Could not find where they come from, happens after a sigpipe # when a client closes a stream. The --- seem to happen before # and after the exception strack trace, e.g: # ---------------------------------------- # Exception happened during processing of request from ('192... # Traceback... # [...] # error: [Errno 32] Broken pipe # ---------------------------------------- # # **Finally**: found it: the handle_error SocketServer method # was writing to stdout. Overriding it should have fixed the # issue. Otoh the separate process approach works, so we kept # it for now httpthread = threading.Thread(target=uprclhttp.runHttp, kwargs={ 'host': host, 'port': int(port), 'pthstr': pthstr, 'pathprefix': pathprefix }) httpthread.daemon = True httpthread.start() else: # Running the HTTP server as a separate process cmdpath = os.path.join(os.path.dirname(sys.argv[0]), 'uprclhttp.py') cmd = subprocess.Popen((cmdpath, host, port, pthstr, pathprefix), stdin=open('/dev/null'), stdout=sys.stderr, stderr=sys.stderr, close_fds=True) global _initrunning _initrunning = False msgproc.log("Init done")
def upnpsearchtorecoll(s): uplog("upnpsearchtorecoll:in: <%s>" % s) s = re.sub('[\t\n\r\f ]+', ' ', s) out = [] hadDerived = False i = 0 field = "" oper = "" while True: i, c = _getchar(s, i) if not c: break #uplog("upnpsearchtorecoll: nextchar: <%s>" % c) if c.isspace(): continue if c == "*": if (len(out) > 1 or (len(out) == 1 and not out[-1].isspace())) or \ (len(s[i:]) and not s[i:].isspace()): raise Exception("If * is used it must be the only input") out = ["mime:*"] break if c == '(' or c == ')': out.append(c) elif c == '>' or c == '<' or c == '=': oper += c else: if c == '"': i, v = _parsestring(s, i) uplog("_parsestring ret: %s" % v) _appendterms(out, v, field, oper) oper = "" field = "" continue else: i -= 1 i, w = _readword(s, i) #uplog("_readword returned <%s>" % w) if w == 'contains': out.append(':') oper = ':' elif w == 'doesNotContain': if len(out) < 1: raise Exception("doesNotContain can't be the first word") out.insert(-1, "-") out.append(':') oper = ':' elif w == 'derivedFrom': hadDerived = True out.append(':') oper = ':' elif w == 'true': out.append('*') oper = "" elif w == 'false': out.append('xxxjanzocsduochterrrrm') elif w == 'exists': out.append(':') oper = ':' elif w == 'and': # Recoll has implied AND, but see next pass elif w == 'or': # Does not work because OR/AND priorities are reversed # between recoll and upnp. This would be very # difficult to correct, let's hope that the callers # use parentheses out.append('OR') else: try: field = upnp2rclfields[w] except: field = w out.append(field) oper = "" ostr = "" for tok in out: ostr += tok + " " uplog("upnpsearchtorecoll:out: <%s>" % ostr) return ostr
def _createmergedalbums(conn): c = conn.cursor() # Remember already merged merged = set() # All candidates for merging: albums with a disc number not yet # merged (albalb is null) c.execute('''SELECT album_id, albtitle, artist_id, albfolder FROM albums WHERE albalb IS NULL AND albtdisc IS NOT NULL''') c1 = conn.cursor() for r in c: albid = r[0] if albid in merged: continue albtitle = r[1] artist = r[2] folder = r[3] #uplog("_createmergedalbums: albid %d artist_id %s albtitle %s" % # (albid, artist, albtitle)) # Look for albums not already in a group, with the same title and artist if artist: c1.execute( '''SELECT album_id, albtdisc, albfolder FROM albums WHERE albtitle = ? AND artist_id = ? AND albalb is NULL AND albtdisc IS NOT NULL''', (albtitle, artist)) else: c1.execute( '''SELECT album_id, albtdisc, albfolder FROM albums WHERE albtitle = ? AND artist_id IS NULL AND albalb is NULL AND albtdisc IS NOT NULL''', (albtitle, )) rows = c1.fetchall() rows1 = _mergealbumsfilterfolders(folder, rows, 2) #uplog("_createmergedalbums: got %d possible(s) title %s" % # (len(rows1), albtitle)) if len(rows1) > 1: albids = [row[0] for row in rows1] dnos = sorted([row[1] for row in rows1]) if not _checkseq(dnos): uplog("mergealbums: not merging bad seq %s for albtitle %s " % (dnos, albtitle)) c1.execute( '''UPDATE albums SET albtdisc = NULL WHERE album_id in (%s)''' % ','.join('?' * len(albids)), albids) continue # Create record for whole album by copying the first # record, setting its album_id and albtdisc to NULL topalbid = _membertotopalbum(conn, albids[0]) # Update all album disc members with the top album id values = [ topalbid, ] + albids c1.execute( '''UPDATE albums SET albalb = ? WHERE album_id in (%s)''' % ','.join('?' * len(albids)), values) merged.update(albids) #uplog("_createmergedalbums: merged: %s" % albids) elif len(rows1) == 1: # Album with a single disc having a discnumber. Just unset # the discnumber, we won't use it and its presence would # prevent the album from showing up. Alternatively we # could set albalb = album_id? #uplog("Setting albtdisc to NULL albid %d" % albid) c1.execute( '''UPDATE albums SET albtdisc = NULL WHERE album_id= ?''', (albid, )) # finally, set albalb to albid for all single-disc albums c.execute('''UPDATE albums SET albalb = album_id WHERE albtdisc IS NULL''')
def send_head(self): """Common code for GET and HEAD commands. This sends the response code and MIME headers. Return value is either a file object (which has to be copied to the outputfile by the caller unless the command was HEAD, and must be closed by the caller under all circumstances), or None, in which case the caller has nothing further to do. """ uplog("HTTP: path: %s" % self.path) path = self.translate_path(self.path) uplog("HTTP: translated path: %s" % urllib.quote(path)) if not path or not os.path.exists(path): self.send_error(404) return (None, 0, 0) if not os.path.isfile(path): self.send_error(405) return (None, 0, 0) f = None ctype = self.guess_type(path) try: f = open(path, 'rb') except: self.send_error(404, "File not found") return (None, 0, 0) if "Range" in self.headers: self.send_response(206) else: self.send_response(200) self.send_header("Content-type", ctype) fs = os.fstat(f.fileno()) size = int(fs[6]) start_range = 0 end_range = size self.send_header("Accept-Ranges", "bytes") if "Range" in self.headers: s, e = self.headers['range'][6:].split('-', 1) sl = len(s) el = len(e) if sl > 0: start_range = int(s) if el > 0: end_range = int(e) + 1 elif el > 0: ei = int(e) if ei < size: start_range = size - ei self.send_header("Content-Range", 'bytes ' + str(start_range) + '-' + str(end_range - 1) + '/' + str(size)) self.send_header("Content-Length", end_range - start_range) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() #uplog("Sending Bytes %d to %d" % (start_range, end_range)) return (f, start_range, end_range)
_idxproc = subprocess.Popen(["recollindex", "-c", confdir]) def indexerdone(): global _idxproc, _lastidxstatus if _idxproc is None: return True _lastidxstatus = _idxproc.poll() if _lastidxstatus is None: return False _idxproc = None return True def indexerstatus(): return _lastidxstatus # Only used for testing if __name__ == '__main__': if len(sys.argv) != 3: print("Usage: uprclindex.py <confdir> <topdirs>", file=sys.stderr) sys.exit(1) runindexer(sys.argv[1], sys.argv[2]) while True: if indexerdone(): uplog("Indexing done, status: %d" % indexerstatus()) sys.exit(0) uplog("Waiting for indexer") time.sleep(1)
def _rcl2folders(docs, confdir, httphp, pathprefix): global dirvec dirvec = [] start = timer() rclconf = rclconfig.RclConfig(confdir) topdirs = [ os.path.expanduser(d) for d in shlex.split(rclconf.getConfParam('topdirs')) ] topdirs = [d.rstrip('/') for d in topdirs] dirvec.append({}) dirvec[0][".."] = (0, -1) for d in topdirs: dirvec.append({}) dirvec[0][d] = (len(dirvec) - 1, -1) dirvec[-1][".."] = (0, -1) # Walk the doc list and update the directory tree according to the # url (create intermediary directories if needed, create leaf # entry for docidx in range(len(docs)): doc = docs[docidx] arturi = docarturi(doc, httphp, pathprefix) if arturi: # The uri is quoted, so it's ascii and we can just store # it as a doc attribute doc.albumarturi = arturi # No need to include non-audio types in the visible tree. if doc.mtype not in audiomtypes: continue url = doc.getbinurl() url = url[7:] try: decoded = url.decode('utf-8') except: decoded = urllib.quote(url).decode('utf-8') # Determine the root entry (topdirs element). Special because # path not simple name fathidx = -1 for rtpath, idx in dirvec[0].iteritems(): if url.startswith(rtpath): fathidx = idx[0] break if fathidx == -1: uplog("No parent in topdirs: %s" % decoded) continue # Compute rest of path url1 = url[len(rtpath):] if len(url1) == 0: continue # If there is a contentgroup field, just add it as a virtual # directory in the path. This only affects the visible tree, # not the 'real' URLs of course. if doc.contentgroup: a = os.path.dirname(url1).decode('utf-8', errors='replace') b = os.path.basename(url1).decode('utf-8', errors='replace') url1 = os.path.join(a, doc.contentgroup, b) # Split path, then walk the vector, possibly creating # directory entries as needed path = url1.split('/')[1:] #uplog("%s"%path, file=sys.stderr) for idx in range(len(path)): elt = path[idx] if elt in dirvec[fathidx]: # This path element was already seen # If this is the last entry in the path, maybe update # the doc idx (previous entries were created for # intermediate elements without a Doc). if idx == len(path) - 1: dirvec[fathidx][elt] = (dirvec[fathidx][elt][0], docidx) #uplog("updating docidx for %s" % decoded) # Update fathidx for next iteration fathidx = dirvec[fathidx][elt][0] else: # Element has no entry in father directory (hence no # dirvec entry either). if idx != len(path) - 1: # This is an intermediate element. Create a # Doc-less directory fathidx = _createdir(dirvec, fathidx, -1, elt) else: # Last element. If directory, needs a dirvec entry if doc.mtype == 'inode/directory': fathidx = _createdir(dirvec, fathidx, docidx, elt) #uplog("Setting docidx for %s" % decoded) else: dirvec[fathidx][elt] = (-1, docidx) if False: for ent in dirvec: uplog("%s" % ent) end = timer() uplog("_rcl2folders took %.2f Seconds" % (end - start)) return dirvec