def getPropertyFloorArea(houseProperty): logger.debug('#############################################START GET PROPERTY FLOOR AREA#############################################') propertyInfo = houseProperty.find("div","searchPropertyInfo") previousTag = '' for divTags in propertyInfo.find_all('div'): for pTags in divTags.find_all('p'): logger.debug('PTags : %s', pTags.contents[0]) ##PREVIOUS TAG is one of the ones we need to fetch, than let's pick the info if previousTag == 'estado': logger.debug('PreviousTag is %s',previousTag) elif previousTag == 'areautil': logger.debug('PreviousTag is %s',previousTag) elif previousTag == 'areabruta': logger.debug('PreviousTag is %s',previousTag) previousTag = utility.remove_accents(pTags.contents[0]) logger.debug('New previous Tag is : %s',previousTag) logger.debug('Search Property Info %s', divTags) result = divTags.Text logger.debug('#############################################END GET PROPERTY FLOOR AREA#############################################') return result
def build_directory(path, mtime, flags, server): dir = Directory() dir.mtime = mtime dir.is_dir = 1 dir.names = [ ] basename = os.path.basename(path) dir.info = { 'type' : 'directory', 'filename' : basename, 'length' : 0L, 'path' : string.split(path,'/') } #print dir.info['path'] names = os.listdir(path) #for i in range(len(names)): # names[i] = utility.force_unicode(names[i]) # Option to limit number of files published # TODO: make it work with subdirectories if flags['max'] != None: names = names[:flags['max']] if flags.get('name'): if server.cache.has_key((path,mtime)): dir.hash, dir.length = server.cache[(path,mtime)] else: dir.length = 0 dir.hash = hash.hash_of('basename') server.cache[(path,mtime)] = (dir.hash, dir.length) dir.info['name'] = dir.hash dir.names.append(dir.hash) if not flags.get('name'): dir.info['local_path'] = path str = utility.remove_accents(string.lower(basename)) keywords = [ ] if flags.get('filename'): keywords.append(str) if flags.get('keywords'): for char in '+-_.,?!()[]': str = string.replace(str,char," ") keywords.extend(string.split(str)) dir.info['keywords'] = [ ] dir.files = [ ] for item in names: if item[0] != '.': dir.files.append(os.path.join(path,item)) #for the moment do not publish directories return dir for word in keywords: word=utility.force_string(word) if len(word) >= min_search_keyword_len and word not in dir.info['keywords']: dir.info['keywords'].append(word) if flags.get('name'): dir.names.append(hash.hash_of(word)) # publish directory... # todo: publish after all files have been hashed, # generate name from their hash if flags.get('name'): if not server.entries.has_key(path): for name in dir.names: server.node.publish(name, dir.info) elif server.entries[path].mtime != mtime: #first unpublish outdated info #print "unpublishing outdated dir" server.node.unpublish(dir.info) for name in dir.names: server.node.publish(name, dir.info) server.entries[path] = dir server.paths[dir.hash] = (path, dir.mtime) server.names[path] = dir.hash return dir
def build_file(path, mtime,flags,server): file = File() file.is_dir = 0 file.mtime = mtime # basename = utility.force_unicode(os.path.basename(path)) # do not convert to unicode, because published data should not # depend on the terminal encoding of the client basename = os.path.basename(path) file.length = os.stat(path)[6] file.names = [ ] file.info = { 'type' : 'file', 'filename' : basename, 'length' : file.length, } if flags.get('name'): if server.cache.has_key((path,mtime)): file.hash, file.length = server.cache[(path,mtime)] else: try: f = open(path,'rb') m = md5.new() file.length = 0L while 1: str = f.read(1<<20) if str == '': break m.update(str) file.length = file.length + len(str) f.close() file.hash = m.digest() except IOError: raise Error('bad file') server.cache[(path,mtime)] = (file.hash, file.length) file.info['name'] = file.hash file.names.append(file.hash) if flags.get('local'): file.info['local_path'] = path str = utility.remove_accents(string.lower(basename)) keywords = [ ] if flags.get('filename'): keywords.append(str) if flags.get('keywords'): for char in '+-_.,?!()[]': str = string.replace(str,char," ") keywords.extend(string.split(str)) if flags.get('mime'): list = {} if string.lower(path[-4:]) =='.mp3': list = mp3.mp3_info(path) elif string.lower(path[-4:]) =='.ogg': list = mp3.ogg_info(path) if list: for (k,v) in list.items(): file.info[k] = v if file.info.get('music_title'): keywords.extend(string.split( utility.remove_accents(string.lower(file.info['music_title'])))) if file.info.get('music_artist'): keywords.extend(string.split( utility.remove_accents(string.lower(file.info['music_artist'])))) file.info['keywords'] = [ ] if flags.get('mime'): import classify try: information = classify.classifier.information(path) for key in information.keys(): if information[key] == None: #print "[Harmless warning] Can not classify : ", path continue if len(information[key]) >= min_search_keyword_len: file.info[key] = information[key] except: sys.stderr.write("Exception caught while classifying file.\n") for word in keywords: word=utility.force_string(word) if len(word) >= min_search_keyword_len and word not in file.info['keywords']: file.info['keywords'].append(word) if flags.get('name'): file.names.append(hash.hash_of(word)) # publish immediately... if flags.get('name'): if not server.entries.has_key(path): for name in file.names: server.node.publish(name, file.info) elif server.entries[path].mtime != mtime: #first unpublish outdated info print "unpublishing outdated:",path server.node.unpublish(file.info) for name in file.names: server.node.publish(name, file.info) server.entries[path] = file server.paths[file.hash] = (path, file.mtime) server.names[path] = file.hash return file