Esempio n. 1
0
    def to_meta(self, md5=None, file=None):
        '''Return a dictionary of metadata, for use in the Remote api'''
        import json
        import os
        from collections import OrderedDict


        if not md5:
            if not file:
                raise ValueError("Must specify either file or md5")
        
            from util import md5_for_file
            
            md5 = md5_for_file(file)
            size = os.stat(file).st_size
        else:
            size = None
        
        return {
                'id':self.id_, 
                'identity': json.dumps(self.dict),
                'name':self.sname,
                'fqname':self.fqname,
                'md5':md5,
                # This causes errors with calculating the AWS signature
                'size': size
                }
Esempio n. 2
0
 def __init__(self, filename):
     """
     Initialization
     """
     self.filename = filename
     self.content = open(filename, 'r').read().split('\n')
     self.connection = dbutils.get_connection()
     self.md5sum_file = util.md5_for_file(open(self.filename, 'r'))
     self.logger = bp_logger('Log Parser')
Esempio n. 3
0
 def __init__(self, filename):
     """
     Initialization
     """
     self.filename = filename
     self.content = open(filename, 'r').read().split('\n')
     self.connection = dbutils.get_connection()
     self.md5sum_file = util.md5_for_file(open(self.filename, 'r'))
     self.logger = bp_logger('Log Parser')
Esempio n. 4
0
    def add_md5(self, md5=None, file=None):
        import json

        if not md5:
            if not file:
                raise ValueError("Must specify either file or md5")

            from util import md5_for_file

            md5 = md5_for_file(file)

        self.md5 = md5

        return self
Esempio n. 5
0
 def to_meta(self, md5=None, file=None):
     '''Return a dictionary of metadata, for use in the Remote api'''
     import json
     
     if not md5:
         if not file:
             raise ValueError("Must specify either file or md5")
     
         from util import md5_for_file
         
         md5 = md5_for_file(file)
     
     return {
             'id':self.id_, 
             'identity': json.dumps(self.to_dict()),
             'name':self.name, 
             'md5':md5}
Esempio n. 6
0
def ttttt(n, test_urls, log):
    import util, os
    fetcher = HttpFetcher(log)
    mm = ''
    for md5, url in test_urls.items():
        with open(md5, 'w') as fp:
            size = HttpFetcher.get_content_len(url)
            clips = HttpFetcher.div_file(size, 3)
            assert clips
            if n == 1:
                fetcher.fetch(url, fp)
            else:
                for r in clips:
                    fetcher.fetch(url, fp, data_range=r)
            log.info('========= checking n=%d ===================', n)
        with open(md5, 'r') as fp:
            mm = util.md5_for_file(fp)
        os.remove(md5)
        assert md5 == mm
Esempio n. 7
0
 def check_if_file_exists_in_db(self):
     """
     Check if the file exists in the database.
     """
     try :
         md5sum =  util.md5_for_file(open(self.filename, 'r'))
     except IOError:
         self.logger.error("File {:s} does not exist.".format(self.filename))
         print "Error: File {:s} does not exist.".format(self.filename)
         print "Usage: python parser.py <relative-path-to-file>"
         sys.exit(1)
     
     exists = dbutils.check_if_file_exists_in_db(self.connection, md5sum)
     
     if exists > 0 :
         self.logger.error("Error: The file already exists in the database.")
         return True
     
     return False
Esempio n. 8
0
    def check_if_file_exists_in_db(self):
        """
        Check if the file exists in the database.
        """
        try:
            md5sum = util.md5_for_file(open(self.filename, 'r'))
        except IOError:
            self.logger.error("File {:s} does not exist.".format(
                self.filename))
            print "Error: File {:s} does not exist.".format(self.filename)
            print "Usage: python parser.py <relative-path-to-file>"
            sys.exit(1)

        exists = dbutils.check_if_file_exists_in_db(self.connection, md5sum)

        if exists > 0:
            self.logger.error(
                "Error: The file already exists in the database.")
            return True

        return False
Esempio n. 9
0
    def synchronize(self, delete=False):
        """Synchronize the localpath to S3.

        Upload new or changed files.
        Delete files that no longer exist locally."""
        bucket = self.get_bucket()
        s3_paths = s3_util.get_paths_from_keys(bucket)
        local_files = set()
        for dirpath, dirnames, filenames in os.walk(self.localpath):
            for filename in filenames:
                file_path = os.path.join(dirpath,filename)
                file_key = os.path.relpath(file_path,self.localpath)
                if os.sep == "\\":
                    #Windows paths need conversion
                    local_files.add(file_key.replace("\\","/"))
                else:
                    local_files.add(file_key)
                try:
                    s3_key = s3_paths[file_key]
                except KeyError:
                    #File is new
                    s3_key = bucket.new_key(file_key)
                    logger.info("Uploading new file: {0}".format(file_key))
                    s3_key.set_contents_from_filename(file_path)
                    s3_key.set_acl("public-read")
                else:
                    #File already exists, check if it's changed.
                    local_md5 = util.md5_for_file(file_path)
                    if local_md5 != s3_key.etag.replace("\"",""):
                        #File has changed
                        logger.info("Uploading changed file: {0}".format(file_key))
                        s3_key.set_contents_from_filename(file_path)
        if delete:
            #Delete all files that don't exist locally
            for name, key in s3_paths.items():
                if name not in local_files:
                    #Delete it.
                    logger.info("Deleting old file: {0}".format(name))
                    key.delete()