def read_config(args): if not os.path.isfile("config.json"): print "config.json does not exist! Please copy config-sample.json to config.json and edit to your liking, then run the script." sys.exit(1) countries = args.country if isinstance(countries, basestring): countries = [countries] countries = [country.lower().strip() for country in countries] for country in countries: if not os.path.isfile("proxies/proxies-%s.json" % country): print "The proxy configuration file proxies-%s.json does not exist! Exiting." % country sys.exit(1) content = util.get_contents("config.json") config = util.json_decode(content) if args.ip: config["public_ip"] = args.ip if args.bind_ip: config["bind_ip"] = args.ip if args.base_ip: config["base_ip"] = args.base_ip if args.base_port: config["base_port"] = args.base_port if not config["public_ip"]: try: print("Autodetecting public IP address...") public_ip = urllib2.urlopen("http://l2.io/ip").read().strip() print("Detected public IP as %s. If it's wrong, please cancel the script now and set it in config.json or specify with --ip" % public_ip) time.sleep(1) config["public_ip"] = public_ip except: print("Could not detect public IP. Please update the public_ip setting in config.json or specify with --ip.") sys.exit(1) if args.save: util.put_contents('config.json', util.json_encode(config)) groups = {} for country in countries: groups.update(util.json_decode(util.get_contents("proxies/proxies-%s.json" % country))) if args.only: only = set(args.only) for item in args.only: if item not in groups: print "Nonexistent Item: %s, exiting" % item sys.exit() for item in groups.keys(): if item not in only: del groups[item] elif args.skip: for item in args.skip: del groups[item] config["groups"] = groups return config
def license_from_copying_hash(copying, srcdir, config): """Add licenses based on the hash of the copying file.""" try: data = get_contents(copying) except FileNotFoundError: # LICENSE file is a bad symlink (qemu-4.2.0!) return if data.startswith(b'#!'): # Not a license if this is a script return data = decode_license(data) if not data: return hash_sum = get_sha1sum(copying) if config.license_fetch: values = {'hash': hash_sum, 'text': data, 'package': tarball.name} data = urllib.parse.urlencode(values) data = data.encode('utf-8') buffer = download.do_curl(config.license_fetch, post=data, is_fatal=True) response = buffer.getvalue() page = response.decode('utf-8').strip() if page: print("License : ", page, " (server) (", hash_sum, ")") process_licenses(page, config.license_translations, config.license_blacklist) if page != "none": # Strip the build source directory off the front lic_path = copying[len(srcdir):] # Strip any leading slashes while lic_path.startswith('/'): lic_path = lic_path[1:] lic_path = shlex.quote(lic_path) license_files.append(lic_path) hashes[lic_path] = hash_sum return if hash_sum in config.license_hashes: add_license(config.license_hashes[hash_sum], config.license_translations, config.license_blacklist) else: if not config.license_show: return print_warning("Unknown license {0} with hash {1}".format( copying, hash_sum)) hash_url = config.license_show % {'HASH': hash_sum} print_warning("Visit {0} to enter".format(hash_url))
def test_license_from_copying_hash_bad_license(self): """ Test license_from_copying_hash with invalid license file """ content = util.get_contents("tests/COPYING_TEST").replace( b"GNU", b"SNU") m_open = MagicMock() m_open.__str__.return_value = content with patch('license.get_contents', m_open, create=True): license.license_from_copying_hash('copying.txt', '') self.assertEquals(license.licenses, [])
def get_backedup_files( machine_path ): """ return a dict with all of the files we've backed up for this machine """ backedup = {} backups = get_backups( machine_path ) for bk in backups: # fetch the contents of the backup log contents = get_contents(machine_path,bk.timestamp+"/bkp/bkp."+bk.timestamp+".log",verbose) # collect the newest version if contents: if verbose: print >>sys.stderr, "Found log file and processing it" past_config = False for l in StringIO.StringIO(contents): if not past_config: if l.startswith("end_config"): past_config = True elif l.strip(): local_path,remote_path,status,msg = l.strip().split(";",3) if local_path in backedup: backedup[local_path].append( bk.time ) else: backedup[local_path] = [bk.time] else: # ok this is a screwed up one that doesn't have a log so recurse using ls and build the list off of that for l in StringIO.StringIO(fs_mod.fs_ls(bk.path,True)): prefix,path = re.split(bk.timestamp,l) path = path.strip() local_path = urllib.url2pathname(path) if local_path in backedup: backedup[local_path].append(bk.time) else: backedup[local_path] = [bk.time] return backedup
def backup(): """ driver to perform backup """ global machine_path, start_time, end_time, backup_path, remote_log_name, local_log_name, errors_count, backedup_files try: # check for any aborted backups and send an e-mail about them check_interrupted() # the backups for a given machine will be in s3://bucket/bkp/machine_name machine_path = bkp_conf.get_config()["bucket"]+"/bkp/"+platform.node() # get the backed up files for this machine backedup_files = get_backedup_files(machine_path) # the start time for the next backup is in the "next" file in the root for that machine # if it is empty or doesn't exist then we start from the beginning of time # first thing we do is write the current time to the "next" file for the next backup # even if two backups are running concurrently they shouldn't interfere since the files shouldn't overlap next = get_contents( machine_path, "next", verbose) if next: start_time = float(next) else: start_time = 0.0 end_time = time.time() put_contents( machine_path, "next", end_time, dryrun, bkp_conf.get_config, verbose ) end_time_t = time.localtime(end_time) bkp_conf.get_config()["start_time"] = start_time bkp_conf.get_config()["end_time"] = end_time # the backup root path is s3://bucket/bkp/machine_name/datetime timestamp = "%04d.%02d.%02d.%02d.%02d.%02d"%(end_time_t.tm_year, end_time_t.tm_mon, end_time_t.tm_mday, end_time_t.tm_hour, end_time_t.tm_min, end_time_t.tm_sec) backup_path = machine_path + "/" + timestamp # we log locally and snapshot the log to a remote version in the backup # directory remote_log_name = backup_path + "/bkp/bkp."+ timestamp + ".log" local_log_name = os.path.expanduser("~/.bkp/bkp."+timestamp+".log") # write config and restart info to the start of the local log bkp_conf.save_config(open(local_log_name,"a+"),True) # start the logger thread start_logger( perform_logging ) # fire up the worker threads start_workers() # loop over the paths provided and add them to the work queue for d in bkp_conf.get_config()["dirs"]: backup_directory( d ) # wait for queue to empty wait_for_workers() # wait for the logger to finish wait_for_logger() # snapshot the log if not dryrun: fs_mod.fs_put(local_log_name,remote_log_name, verbose=verbose) finally: stop_workers() stop_logger() # send the log to the logging e-mail if errors_count: mail_error( None, open(local_log_name,"r"), verbose ) os.remove(local_log_name) return 1 else: mail_log( None, open(local_log_name,"r"), False, verbose ) os.remove(local_log_name) return 0
def restore( machine=platform.node(), restore_path = "", exclude_pats = [], asof = "", restore_pats = [] ): """ main restore driver, will loop over all backups for this server and restore all files to the restore path that match the restore_pats and are not excluded by the exlcude patterns up to the asof date """ try: # start the logger start_logger() # expand user path references in restore_path restore_path = os.path.expanduser(restore_path) # if asof is not specified then restore as of now if not asof: end_time_t = time.localtime(time.time()) asof = "%04d.%02d.%02d.%02d.%02d.%02d"%(end_time_t.tm_year, end_time_t.tm_mon, end_time_t.tm_mday, end_time_t.tm_hour, end_time_t.tm_min, end_time_t.tm_sec) # get asof as a time value asof_time = bkp_mod.timestamp2time( asof ) # the backups for a given machine will be in s3://bucket/bkp/machine_name machine_path = bkp_conf.get_config()["bucket"]+"/bkp/"+machine try: # get backup paths and timestamps returns Backup objects with (time, timestamp, path) backups = bkp_mod.get_backups( machine_path ) # loop over the backups, process the log files and collect the correct versions of matching files restore_map = {} for bk in backups: if bkp_mod.verbose: log("Examining backup: %s"%(bk.path)) # if the backup is after the asof date then skip it if bk.time > asof_time: if bkp_mod.verbose: log("Skipping because it is newer than asof backup: %s"%(bk.path)) continue # fetch the contents of the backup log contents = get_contents(machine_path,bk.timestamp+"/bkp/bkp."+bk.timestamp+".log",bkp_mod.verbose) # collect the newest version less than the asof time and apply all the filters # if there's a backup log then we do this the easy way if contents: if bkp_mod.verbose: log("Found log file and processing it") past_config = False for l in StringIO.StringIO(contents): if not past_config: if l.startswith("end_config"): past_config = True else: local_path,remote_path,status,msg = l.split(";",3) if status == "error": if bkp_mod.verbose: log("Skipping because of error: %s"%(local_path)) continue if local_path in restore_map and restore_map[local_path].time > bk.time: if bkp_mod.verbose: log("Skipping because we already have a newer one: %s"%(local_path)) continue exclude = False for ex in exclude_pats: if re.match(ex,local_path): exclude = True break if exclude: if bkp_mod.verbose: log("Skipping because of exclude %s %s"%(ex,local_path)) continue restore = False for rs in restore_pats: if re.match(rs,local_path): restore = True break if not restore: if bkp_mod.verbose: log("Skipping because not included: %s"%(local_path)) continue if bkp_mod.verbose: log("Including: %s"%(local_path)) restore_map[local_path] = Restore(remote_path,local_path,os.path.join(restore_path,local_path[1:]),bk.time) else: if bkp_mod.verbose: log("No log file doing a recursive ls of %s"%bk.path) # ok this is a screwed up one that doesn't have a log so recurse using ls and build the list off of that for l in StringIO.StringIO(fs_ls(bk.path,True)): prefix,path = re.split(bk.timestamp,l) path = path.strip() local_path = urllib.url2pathname(path) remote_path = bk.path + path[1:] if local_path in restore_map: if bkp_mod.verbose: log( "Found in map: %s"%(local_path)) if restore_map[local_path].time > bk.time: if bkp_mod.verbose: log("Skipping because we already have a newer one %s"%(local_path)) continue exclude = False for ex in exclude_pats: if re.match(ex,local_path): exclude = True break if exclude: if bkp_mod.verbose: log("Skipping because of exclude %s %s"%(ex,local_path)) continue restore = False for rs in restore_pats: if re.match(rs,local_path): restore = True break if not restore: if bkp_mod.verbose: log("Skipping because not included %s"%(local_path)) continue if bkp_mod.verbose: log("Including: %s"%(local_path)) restore_map[local_path] = Restore(remote_path,local_path,os.path.join(restore_path,local_path[1:]),bk.time) except: log("Exception while processing: "+traceback.format_exc()) # if we have things to restore then go for it if restore_map: # start up the restore workers start_restore_workers() # enqueue all of the restore tasks for rest in restore_map.itervalues(): restore_work_queue.put(rest) # wait for the restore workers wait_for_restore_workers() # wait for logging to complete wait_for_logger() finally: # stop the restore workers stop_restore_workers() # stop the restore logger stop_logger()