def to_meta(self, md5=None, file=None): '''Return a dictionary of metadata, for use in the Remote api''' import json import os from collections import OrderedDict if not md5: if not file: raise ValueError("Must specify either file or md5") from util import md5_for_file md5 = md5_for_file(file) size = os.stat(file).st_size else: size = None return { 'id':self.id_, 'identity': json.dumps(self.dict), 'name':self.sname, 'fqname':self.fqname, 'md5':md5, # This causes errors with calculating the AWS signature 'size': size }
def __init__(self, filename): """ Initialization """ self.filename = filename self.content = open(filename, 'r').read().split('\n') self.connection = dbutils.get_connection() self.md5sum_file = util.md5_for_file(open(self.filename, 'r')) self.logger = bp_logger('Log Parser')
def add_md5(self, md5=None, file=None): import json if not md5: if not file: raise ValueError("Must specify either file or md5") from util import md5_for_file md5 = md5_for_file(file) self.md5 = md5 return self
def to_meta(self, md5=None, file=None): '''Return a dictionary of metadata, for use in the Remote api''' import json if not md5: if not file: raise ValueError("Must specify either file or md5") from util import md5_for_file md5 = md5_for_file(file) return { 'id':self.id_, 'identity': json.dumps(self.to_dict()), 'name':self.name, 'md5':md5}
def ttttt(n, test_urls, log): import util, os fetcher = HttpFetcher(log) mm = '' for md5, url in test_urls.items(): with open(md5, 'w') as fp: size = HttpFetcher.get_content_len(url) clips = HttpFetcher.div_file(size, 3) assert clips if n == 1: fetcher.fetch(url, fp) else: for r in clips: fetcher.fetch(url, fp, data_range=r) log.info('========= checking n=%d ===================', n) with open(md5, 'r') as fp: mm = util.md5_for_file(fp) os.remove(md5) assert md5 == mm
def check_if_file_exists_in_db(self): """ Check if the file exists in the database. """ try : md5sum = util.md5_for_file(open(self.filename, 'r')) except IOError: self.logger.error("File {:s} does not exist.".format(self.filename)) print "Error: File {:s} does not exist.".format(self.filename) print "Usage: python parser.py <relative-path-to-file>" sys.exit(1) exists = dbutils.check_if_file_exists_in_db(self.connection, md5sum) if exists > 0 : self.logger.error("Error: The file already exists in the database.") return True return False
def check_if_file_exists_in_db(self): """ Check if the file exists in the database. """ try: md5sum = util.md5_for_file(open(self.filename, 'r')) except IOError: self.logger.error("File {:s} does not exist.".format( self.filename)) print "Error: File {:s} does not exist.".format(self.filename) print "Usage: python parser.py <relative-path-to-file>" sys.exit(1) exists = dbutils.check_if_file_exists_in_db(self.connection, md5sum) if exists > 0: self.logger.error( "Error: The file already exists in the database.") return True return False
def synchronize(self, delete=False): """Synchronize the localpath to S3. Upload new or changed files. Delete files that no longer exist locally.""" bucket = self.get_bucket() s3_paths = s3_util.get_paths_from_keys(bucket) local_files = set() for dirpath, dirnames, filenames in os.walk(self.localpath): for filename in filenames: file_path = os.path.join(dirpath,filename) file_key = os.path.relpath(file_path,self.localpath) if os.sep == "\\": #Windows paths need conversion local_files.add(file_key.replace("\\","/")) else: local_files.add(file_key) try: s3_key = s3_paths[file_key] except KeyError: #File is new s3_key = bucket.new_key(file_key) logger.info("Uploading new file: {0}".format(file_key)) s3_key.set_contents_from_filename(file_path) s3_key.set_acl("public-read") else: #File already exists, check if it's changed. local_md5 = util.md5_for_file(file_path) if local_md5 != s3_key.etag.replace("\"",""): #File has changed logger.info("Uploading changed file: {0}".format(file_key)) s3_key.set_contents_from_filename(file_path) if delete: #Delete all files that don't exist locally for name, key in s3_paths.items(): if name not in local_files: #Delete it. logger.info("Deleting old file: {0}".format(name)) key.delete()