def delete(self, paths, delete_marker=False): """ Deletes the provided paths from the metastore. Completly removing files from the metastore can cause problems because the s3 listing may show the files even though the data may not be available. This will cause MR jobs to fail. The delete marker can be used to hide files from the listing. Example: s.delete([path1, path2]) -> True """ if (self.disabled): return paths = self.__as_paths(paths) if delete_marker: for path in paths: item = self.db.get_item(path=path.parent().normalize(), file=path.filename()) item['deleted'] = "true" else: with self.db.batch_write() as batch: for path in paths: batch.delete_item(path=path.parent().normalize(), file=path.filename())
def title(self): if not self._title: title = re.findall("<h1>(.*?)</h1>", self.html) self._title = title[0] if title else filename( self.destfile).rsplit(".")[0] #print("self._title:"+self._title) return self._title
def abstract(self): if not self._abstract: abstract = re.findall("<p>(.*?)</p>", self.html, re.S) self._abstract = abstract[0] if abstract else filename( self.destfile).rsplit(".")[0] #print("self._abstract:"+self._abstract) return self._abstract
def __init__(self, from_file): if not os.path.isfile(from_file): raise RuntimeError("not a file") self.fromfile = from_file post_dir = join(root_dir, "post") self.destfile = join(dirname(self.fromfile.replace(post_dir, website_dir)), splitext(filename(self.fromfile))[0] + ".html") self.url = pathname2url(self.destfile.split(website_dir)[1]) self._html = None self._title = None
def title(self): if not self._title: title1 = re.findall("<h1>(.*?)</h1>", self.html) if title1: title = title1 else: title = re.findall("<h2>(.*?)</h2>", self.html) self._title = title[0] if title else filename( self.destfile).rsplit(".")[0] return self._title
def __init__(self, from_file): if not os.path.isfile(from_file): raise RuntimeError("not a file") self.fromfile = from_file post_dir = join(root_dir, "post") self.destfile = join( dirname(self.fromfile.replace(post_dir, website_dir)), splitext(filename(self.fromfile))[0] + ".html") self.url = pathname2url(self.destfile.split(website_dir)[1]) self._html = None self._title = None
def __init__(self, from_file): if not os.path.isfile(from_file): raise RuntimeError("not a file") self.fromfile = from_file post_dir = join(root_dir, "post") self.destfile = join( dirname(self.fromfile.replace(post_dir, website_dir)), splitext(filename(self.fromfile))[0] + ".html") self.url = pathname2url(self.destfile.split(website_dir)[1]) self._html = None self._title = None self._image = None self._abstract = None self.lexer = get_lexer_by_name("java", stripall=True) self.formatter = HtmlFormatter()
def delete(self, paths, delete_marker=False): """ Deletes the provided paths from the metastore. Completly removing files from the metastore can cause problems because the s3 listing may show the files even though the data may not be available. This will cause MR jobs to fail. The delete marker can be used to hide files from the listing. Example: s.delete([path1, path2]) -> True """ if self.disabled: return paths = self.__as_paths(paths) if delete_marker: for path in paths: item = self.db.get_item(path=path.parent().normalize(), file=path.filename()) item["deleted"] = "true" else: with self.db.batch_write() as batch: for path in paths: batch.delete_item(path=path.parent().normalize(), file=path.filename())
def add(self, paths): """ Adds a list of Paths to the file metastore and returns True on success. Example: s.add([path1, path2]) -> True """ if self.disabled: return epoch = self.__time_now() paths = self.__as_paths(paths) with self.db.batch_write() as batch: for path in paths: batch.put_item(data={"path": path.parent().normalize(), "file": path.filename(), "epoch": epoch})
def add(self, paths): """ Adds a list of Paths to the file metastore and returns True on success. Example: s.add([path1, path2]) -> True """ if self.disabled: return epoch = self.__time_now() paths = self.__as_paths(paths) with self.db.batch_write() as batch: for path in paths: batch.put_item( data={ 'path': path.parent().normalize(), 'file': path.filename(), 'epoch': epoch })
def log_to_file(original_function, outfile=None, configfile=None): import logging, datetime from os import path as path logging._srcfile = None logging.logThreads = 0 logging.logProcesses = 0 configfile = kwargs.get('configfile', 'generic_logger_config.ini') if outfile is None: outfile = kwargs.get('outfile', str(original_function.__name__ + "_log.txt")) outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile)) if configfile is not None: logging.config.fileConfig(path.normpath(configfile)) else: logging.basicConfig(filename=outfile, level=logging.DEBUG) # level=logging.INFO) start_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d--%H:%M.%S') print "Logging to … {0}".format(path.abspath(outfile)) def new_function(*args, **kwargs): result = original_function(*args, **kwargs) with open(outfile, "ab+") as logfile: logfile.write("Function '%s' called with positional arguments %s and keyword arguments %s. The result was %s.\n" % (original_function.__name__, args, kwargs, result)) return result return new_function
def basic_log_file_obj(log_configuration='Admin_Client', **kwargs): import logging, datetime import logging.config logging._srcfile = None logging.logThreads = 0 logging.logProcesses = 0 from os import path as path configfile = kwargs.get('configfile', 'generic_logger_config.ini') if outfile is None: outfile = kwargs.get('outfile', str(__file__.__name__ + "_log.txt")) outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile)) if configfile is not None and path.isfile(configfile): logging.config.fileConfig(path.normpath(configfile)) hdlr = logging.FileHandler(outfile) #formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') formatter = logging.Formatter('%(asctime)s | %(name)s | %(module)s-%(lineno)04d | %(levelname)s | %(message)s') hdlr.setFormatter(formatter) myLogger = logging.getLogger(log_configuration) myLogger.addHandler(hdlr) myLogger.setLevel(logging.WARNING) return myLogger else: logging.basicConfig(filename=outfile, filemode='w', level=logging.DEBUG) # level=logging.INFO) myLogger = logging.getLogger(log_configuration) imsg='\nLOGGING Level 1 - Active....\nINFO MODE SET' wmsg='\nLOGGING Level 2 - Active....\nWARNING MODE SET' emsg='\nLOGGING Level 3 - Active....\nERROR MODE SET' cmsg='\nLOGGING Level 4 - Active....\nCRITICAL MODE SET' exmsg='\nEXCEPTION!!!!!....\n--------Exception Raised---------\n' dmsg='\nLOGGING DEBUGER - Active....\nDEBUG MODE SET' myLogger.info(imsg) myLogger.warn(wmsg) myLogger.error(emsg) myLogger.critical(cmsg) myLogger.exception(exmsg) myLogger.debug(dmsg) myLogger.setLevel(logging.WARNING) return myLogger
def title(self): if not self._title: title = re.findall("<h2>(.*?)</h2>", self.html) self._title = title[0] if title else filename(self.destfile).rsplit(".")[0] return self._title