Esempio n. 1
0
    def delete(self, paths, delete_marker=False):
        """ Deletes the provided paths from the metastore.  
        
            Completly removing files from the metastore can cause problems 
            because the s3 listing may show the files even though the data may 
            not be available.  This will cause MR jobs to fail.  The delete marker
            can be used to hide files from the listing.
        
            Example:
            s.delete([path1, path2]) -> True
        """
        if (self.disabled):
            return

        paths = self.__as_paths(paths)

        if delete_marker:
            for path in paths:
                item = self.db.get_item(path=path.parent().normalize(),
                                        file=path.filename())
                item['deleted'] = "true"
        else:
            with self.db.batch_write() as batch:
                for path in paths:
                    batch.delete_item(path=path.parent().normalize(),
                                      file=path.filename())
Esempio n. 2
0
 def title(self):
     if not self._title:
         title = re.findall("<h1>(.*?)</h1>", self.html)
         self._title = title[0] if title else filename(
             self.destfile).rsplit(".")[0]
         #print("self._title:"+self._title)
     return self._title
Esempio n. 3
0
 def abstract(self):
     if not self._abstract:
         abstract = re.findall("<p>(.*?)</p>", self.html, re.S)
         self._abstract = abstract[0] if abstract else filename(
             self.destfile).rsplit(".")[0]
         #print("self._abstract:"+self._abstract)
     return self._abstract
Esempio n. 4
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(dirname(self.fromfile.replace(post_dir, website_dir)),
                          splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
Esempio n. 5
0
 def title(self):
     if not self._title:
         title1 = re.findall("<h1>(.*?)</h1>", self.html)
         if title1:
             title = title1
         else:
             title = re.findall("<h2>(.*?)</h2>", self.html)
         self._title = title[0] if title else filename(
             self.destfile).rsplit(".")[0]
     return self._title
Esempio n. 6
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(
         dirname(self.fromfile.replace(post_dir, website_dir)),
         splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
Esempio n. 7
0
 def __init__(self, from_file):
     if not os.path.isfile(from_file): raise RuntimeError("not a file")
     self.fromfile = from_file
     post_dir = join(root_dir, "post")
     self.destfile = join(
         dirname(self.fromfile.replace(post_dir, website_dir)),
         splitext(filename(self.fromfile))[0] + ".html")
     self.url = pathname2url(self.destfile.split(website_dir)[1])
     self._html = None
     self._title = None
     self._image = None
     self._abstract = None
     self.lexer = get_lexer_by_name("java", stripall=True)
     self.formatter = HtmlFormatter()
Esempio n. 8
0
    def delete(self, paths, delete_marker=False):
        """ Deletes the provided paths from the metastore.  
        
            Completly removing files from the metastore can cause problems 
            because the s3 listing may show the files even though the data may 
            not be available.  This will cause MR jobs to fail.  The delete marker
            can be used to hide files from the listing.
        
            Example:
            s.delete([path1, path2]) -> True
        """
        if self.disabled:
            return

        paths = self.__as_paths(paths)

        if delete_marker:
            for path in paths:
                item = self.db.get_item(path=path.parent().normalize(), file=path.filename())
                item["deleted"] = "true"
        else:
            with self.db.batch_write() as batch:
                for path in paths:
                    batch.delete_item(path=path.parent().normalize(), file=path.filename())
Esempio n. 9
0
    def add(self, paths):
        """ Adds a list of Paths to the file metastore and returns True on success. 
        
            Example:
            s.add([path1, path2]) -> True
        """
        if self.disabled:
            return

        epoch = self.__time_now()

        paths = self.__as_paths(paths)

        with self.db.batch_write() as batch:
            for path in paths:
                batch.put_item(data={"path": path.parent().normalize(), "file": path.filename(), "epoch": epoch})
Esempio n. 10
0
    def add(self, paths):
        """ Adds a list of Paths to the file metastore and returns True on success. 
        
            Example:
            s.add([path1, path2]) -> True
        """
        if self.disabled:
            return

        epoch = self.__time_now()

        paths = self.__as_paths(paths)

        with self.db.batch_write() as batch:
            for path in paths:
                batch.put_item(
                    data={
                        'path': path.parent().normalize(),
                        'file': path.filename(),
                        'epoch': epoch
                    })
Esempio n. 11
0
def log_to_file(original_function, outfile=None, configfile=None):
    import logging, datetime
    from os import path as path
    logging._srcfile = None
    logging.logThreads = 0
    logging.logProcesses = 0
    configfile = kwargs.get('configfile', 'generic_logger_config.ini')
    if outfile is None:
        outfile = kwargs.get('outfile', str(original_function.__name__ + "_log.txt"))
        outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile))
    if configfile is not None:
        logging.config.fileConfig(path.normpath(configfile))
    else:
        logging.basicConfig(filename=outfile, level=logging.DEBUG) # level=logging.INFO)
    start_time = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d--%H:%M.%S')
    print "Logging to … {0}".format(path.abspath(outfile))
    def new_function(*args, **kwargs):
        result = original_function(*args, **kwargs)
        with open(outfile, "ab+") as logfile:
            logfile.write("Function '%s' called with positional arguments %s and keyword arguments %s. The result was %s.\n" % (original_function.__name__, args, kwargs, result))
        return result
    return new_function
Esempio n. 12
0
def basic_log_file_obj(log_configuration='Admin_Client', **kwargs):
    import logging, datetime
    import logging.config
    logging._srcfile = None
    logging.logThreads = 0
    logging.logProcesses = 0
    from os import path as path
    configfile = kwargs.get('configfile', 'generic_logger_config.ini')
    if outfile is None:
        outfile = kwargs.get('outfile', str(__file__.__name__ + "_log.txt"))
        outfile = path.join("/root/DropboxSync/bflyProdimagesSync/log", path.filename(outfile))
    if configfile is not None and path.isfile(configfile):
        logging.config.fileConfig(path.normpath(configfile))
        hdlr = logging.FileHandler(outfile)
        #formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        formatter = logging.Formatter('%(asctime)s | %(name)s | %(module)s-%(lineno)04d | %(levelname)s | %(message)s')
        hdlr.setFormatter(formatter)
        myLogger = logging.getLogger(log_configuration)
        myLogger.addHandler(hdlr) 
        myLogger.setLevel(logging.WARNING)
        return myLogger
    else:
        logging.basicConfig(filename=outfile, filemode='w', level=logging.DEBUG) # level=logging.INFO)
        myLogger = logging.getLogger(log_configuration)      
        imsg='\nLOGGING Level 1 - Active....\nINFO MODE SET'
        wmsg='\nLOGGING Level 2 - Active....\nWARNING MODE SET'
        emsg='\nLOGGING Level 3 - Active....\nERROR MODE SET'
        cmsg='\nLOGGING Level 4 - Active....\nCRITICAL MODE SET'
        exmsg='\nEXCEPTION!!!!!....\n--------Exception Raised---------\n'
        dmsg='\nLOGGING DEBUGER - Active....\nDEBUG MODE SET'
        myLogger.info(imsg)
        myLogger.warn(wmsg)
        myLogger.error(emsg)
        myLogger.critical(cmsg)
        myLogger.exception(exmsg)
        myLogger.debug(dmsg)
        myLogger.setLevel(logging.WARNING)
        return myLogger
Esempio n. 13
0
 def title(self):
     if not self._title:
         title = re.findall("<h2>(.*?)</h2>", self.html)
         self._title = title[0] if title else filename(self.destfile).rsplit(".")[0]
     return self._title