def hanode_relation_joined(relid=None): relation_set( relation_id=relid, relation_settings={ 'private-address': get_relation_ip('hanode'), 'hostname': get_hostname()} )
def main(): result = commands.getoutput("nvidia-smi") result = re.sub('[-+=|]', '', result) result = result.strip().split('\n') hostname = get_hostname() timestamp = result[0] gpu_detail = 0 gpu_info = '' if len(result) < 7: exit() for i, line in enumerate(result[7:]): gpu_info = parse_gpu_detail(i, line, gpu_info, timestamp, hostname) if line == '' and result[i + 8].strip() == '': gpu_detail = i break for i, line in enumerate(result[gpu_detail + 13:]): column = line.split() if len(column) == 5: process_gpu = column[0] pid = column[1] usage = column[-1] ps_result = commands.getoutput("ps aux | grep {}".format(pid)) user, ps_pid, com = parse_ps(ps_result, pid) with open('/var/log/gpu_ps.log', 'a') as fp: fp.write('{}\t{}\t{}\t{}\n'.format( hostname, user, ps_pid, com)) with open('/var/log/gpu_job.log', 'a') as fp: fp.write('{}\t{}\t{}\t{}\n'.format( hostname, process_gpu, pid, usage[:-3]))
def mpi_define_env(args): from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() world_size = comm.Get_size() master_addr = None if rank == 0: master_addr = get_hostname() master_addr = comm.bcast(master_addr, root=0) # Determine local rank by assuming hostnames are unique proc_name = MPI.Get_processor_name() all_procs = comm.allgather(proc_name) local_rank = sum([i == proc_name for i in all_procs[:rank]]) os.environ['RANK'] = str(rank) os.environ['WORLD_SIZE'] = str(world_size) args.local_rank = local_rank args.world_size = world_size args.rank = rank os.environ['MASTER_ADDR'] = master_addr os.environ['MASTER_PORT'] = "29500" # TORCH_DISTRIBUTED_DEFAULT_PORT = 29500 print( "Discovered MPI settings of world_rank={}, local_rank={}, world_size={}, master_addr={}, master_port={}" .format(os.environ['RANK'], args.local_rank, os.environ['WORLD_SIZE'], os.environ['MASTER_ADDR'], os.environ['MASTER_PORT']))
def stop(): # The pcmk.delete_node handles several known failure modes so # failure_is_fatal=True actually helps as it causes retries. node = get_hostname() pcmk.set_node_status_to_maintenance(node) pcmk.delete_node(node, failure_is_fatal=True) apt_purge(['corosync', 'pacemaker'], fatal=True)
def __init__(self, **kwargs): """ Initialise the CasperFpga object :param host: """ self.host, self.bitstream = get_hostname(**kwargs) self.memory_devices = None self.prog_info = {'last_uploaded': '', 'last_programmed': '', 'system_name': ''}
def __init__(self, api_url, interval): self.api_url = api_url self.interval = interval self.stop = False self.hostname = get_hostname() self.ip = get_ip_by_nic("eth0") super(Heartbeat, self).__init__()
def index(): global launch_at return render_template('index.html', launch_at=launch_at, Hostname=utils.get_hostname(), localaddress=utils.get_local_address(), remote_addr=request.remote_addr, visitor_addrs=utils.record_visitors( request.remote_addr).items(), hits=str(utils.hit_count()))
def send_alert(subject, details): receiper = get_client_settings()["email"] content = mail_template.format(receiper, subject, get_hostname(), details) tmp_content_file = "/tmp/tmpcontentfile" with open(tmp_content_file, "w") as f: f.write(content) os.system("sendmail -vt < " + tmp_content_file) os.system("rm -rf " + tmp_content_file)
def __init__(self): self.config = PullClientConfig.instance() HTTPClient.__init__(self, self.config.Dlp.base_url, self.config.Service.name) if self.config.Dlp.hostname: self.hostname = self.config.Dlp.hostname else: self.hostname = get_hostname() if self.config.Dlp.mac_list: self.mac_list = self.config.Dlp.mac_list else: self.mac_list = get_mac_addresses()
def _get_ha_metrics(self, bean): for metric in self._metrics['HAMetrics']: if "HAState" in metric: active = bean['Name'].split("@")[1] cur = utils.get_hostname() label = [self._cluster, cur] if active == cur: value = 1.0 else: value = 0.0 self._hadoop_resourcemanager_metrics['HAMetrics'][ metric].add_metric(label, value)
def __init__(self, **kwargs): """ :param host: """ self.host, self.bitstream = get_hostname(**kwargs) self.memory_devices = None self.prog_info = { 'last_uploaded': '', 'last_programmed': '', 'system_name': '' }
def send_json(): _handle_args(request.args) uptime = datetime.datetime.now().replace(microsecond=0) - startTime return jsonify({ 'Release': MY_RELEASE, 'StartTime': startTimeStr, 'Uptime': str(uptime), 'Hostname': utils.get_hostname(), 'LocalAddress': utils.get_local_address(), 'RemoteAddress': request.remote_addr, 'ServerHit': str(utils.get_server_hit_count()), 'WorkerInstance': utils.get_worker_instance(), 'Target': utils.get_target(), })
def send_json(): global launch_at return jsonify({ 'launch_at': launch_at, 'Hostname': utils.get_hostname(), 'LocalAddress': utils.get_local_address(), 'RemoteAddress': request.remote_addr, 'visitor_addrs': utils.record_visitors(request.remote_addr).items(), 'Server Hit': str(utils.hit_count()) })
def __init__(self, node): try: self.api = LinstorAPI() self.sp = self.api.get_storagepool([node]) self.res = self.api.get_resource([node]) except AttributeError: self.sp = None self.res = None if node == utils.get_hostname(): self.conn = None else: self.conn = utils.SSHConn(node) self.pv_list = self.get_pvs() self.vg_list = self.get_vgs() self.lv_list = self.get_lvs()
def show_details(): _handle_args(request.args) uptime = datetime.datetime.now().replace(microsecond=0) - startTime return "<html>" + \ "<head><title>Demo Application</title></head>" + \ "<body>" + \ "<table>" + \ "<tr><td> Release </td> <td>" + MY_RELEASE + "</td> </tr>" + \ "<tr><td> Start Time </td> <td>" + startTimeStr + "</td> </tr>" + \ "<tr><td> Up Time </td> <td>" + str(uptime) + "</td> </tr>" + \ "<tr><td> Hostname </td> <td>" + utils.get_hostname() + "</td> </tr>" + \ "<tr><td> Local Address </td> <td>" + utils.get_local_address() + "</td> </tr>" + \ "<tr><td> Remote Address </td> <td>" + request.remote_addr + "</td> </tr>" + \ "<tr><td> Server Hit </td> <td>" + str(utils.get_server_hit_count()) + "</td> </tr>" + \ "<tr><td> Worker Instance </td> <td>" + utils.get_worker_instance() + "</td> </tr>" + \ "<tr><td> Target </td> <td>" + utils.get_target() + "</td> </tr>" + \ "</table>" + \ "</body>" + \ "</html>"
def __init__(self, conf_file=CERTMASTER_CONFIG): self.cfg = read_config(conf_file, CMConfig) usename = utils.get_hostname(talk_to_certmaster=False) self.logger = logger.Logger().logger self.audit_logger = logger.AuditLogger() self.cakey = {} self.cacert = {} for (s_caname,a_ca) in self.cfg.ca.iteritems(): s_cadir = a_ca.cadir if s_caname == "": mycn = '%s-CA-KEY' % usename else: mycn = '%s-%s-CA-KEY' % (s_caname.upper(),usename) s_ca_key_file = '%s/certmaster.key' % s_cadir s_ca_cert_file = '%s/certmaster.crt' % s_cadir # if ca_key_file exists and ca_cert_file is missing == minion only setup if os.path.exists(s_ca_key_file) and not os.path.exists(s_ca_cert_file): continue try: if not os.path.exists(s_cadir): os.makedirs(s_cadir) if not os.path.exists(s_ca_key_file) and not os.path.exists(s_ca_cert_file): certs.create_ca(CN=mycn, ca_key_file=s_ca_key_file, ca_cert_file=s_ca_cert_file, hash_function=a_ca.hash_function) except (IOError, OSError), e: print 'Cannot make certmaster certificate authority keys/certs for CA %s, aborting: %s' % (s_caname, e) sys.exit(1) # open up the cakey and cacert so we have them available a_ca.cakey = certs.retrieve_key_from_file(s_ca_key_file) a_ca.cacert = certs.retrieve_cert_from_file(s_ca_cert_file) for dirpath in [a_ca.cadir, a_ca.certroot, a_ca.csrroot, a_ca.csrroot]: if not os.path.exists(dirpath): os.makedirs(dirpath)
def __init__(self, conf_file=CERTMASTER_CONFIG): self.cfg = read_config(conf_file, CMConfig) usename = utils.get_hostname(talk_to_certmaster=False) mycn = '%s-CA-KEY' % usename self.ca_key_file = '%s/certmaster.key' % self.cfg.cadir self.ca_cert_file = '%s/certmaster.crt' % self.cfg.cadir self.logger = logger.Logger().logger self.audit_logger = logger.AuditLogger() try: if not os.path.exists(self.cfg.cadir): os.makedirs(self.cfg.cadir) if not os.path.exists(self.ca_key_file) and not os.path.exists(self.ca_cert_file): certs.create_ca(CN=mycn, ca_key_file=self.ca_key_file, ca_cert_file=self.ca_cert_file) except (IOError, OSError), e: print 'Cannot make certmaster certificate authority keys/certs, aborting: %s' % e sys.exit(1)
def evaluate_file_systems(settings): result = [] tmp_file = "/tmp/fsinfo" os.system("df -hT | grep -v tmpfs | grep -iv filesystem > {0}".format(tmp_file)) with open(tmp_file, "r") as f: lines = f.readlines() for line in lines: line_tokens = line.split() fs_usage_procent = int(line_tokens[5].split("%")[0]) # 14.02.2020 # WE NEED TO IMPLEMENT REAL HANDLING OF THIS # CURRENTLY THIS IS NOT TRIGGERED AS OF # CURRENT TIME THE IDEA IS TO TEST END/END SOCKET # AND THEN INSERT INTO THE DB if fs_usage_procent >= settings["threshold"]: result.append({ "Hostname" : get_hostname(), "Filesystem" : line_tokens[0], "Type" : line_tokens[1], "Size" : line_tokens[2], "Use%" : fs_usage_procent, "Mount Point" : line_tokens[6], "TimeInsertion" : get_current_time() }) fs_data = pickle.dumps(result) os.system("rm -rf {0}".format(tmp_file)) socket_client = socket.socket() socket_client.connect(("127.0.0.1", 8080)) socket_client.send(fs_data) server_answer = socket_client.recv(1024) print(server_answer)
def handle_get_info(self, data, sender): ''' Handle get_info event. @type data: Packet @param data: First packet of the communication. @type sender: utils.IPv4Address @param sender: Sender IP address. ''' packet1 = get_info.parse_packet_1(data) window_title = utils.get_focused_window()[:46] + '\x00' current_user = utils.get_current_user()[:30] + '\x00' hostname = utils.get_hostname()[:30] + '\x00' packet2 = get_info.make_packet_2( get_info.STATUS_AVAIL, window_title, current_user, hostname, self.config.get_listen_addr(), self.config.get_mac_addr(), 1800) self.network.write(packet2, sender)
def __init__(self, *args, **kwargs): """ :param host: the hostname of this CasperFpga :return: """ if len(args) > 0: try: kwargs['host'] = args[0] kwargs['port'] = args[1] except IndexError: pass self.host, self.bitstream = get_hostname(**kwargs) # some transports, e.g. Skarab, need to know their parent kwargs['parent_fpga'] = self # was the transport specified? transport = get_kwarg('transport', kwargs) if transport: self.transport = transport(**kwargs) else: transport_class = choose_transport(self.host) self.transport = transport_class(**kwargs) # this is just for code introspection self.devices = None self.memory_devices = None self.other_devices = None self.sbrams = None self.qdrs = None self.registers = None self.gbes = None self.snapshots = None self.system_info = None self.rcs_info = None # /just for introspection self._reset_device_info() LOGGER.debug('%s: now a CasperFpga' % self.host)
def make_csr(pkey, dest=None, cn=None): req = crypto.X509Req() req.get_subject() subj = req.get_subject() subj.C = def_country subj.ST = def_state subj.L = def_local subj.O = def_org subj.OU = def_ou if cn: subj.CN = cn else: subj.CN = utils.get_hostname() subj.emailAddress = 'root@%s' % subj.CN req.set_pubkey(pkey) req.sign(pkey, 'md5') if dest: destfd = os.open(dest, os.O_RDWR|os.O_CREAT, 0644) os.write(destfd, crypto.dump_certificate_request(crypto.FILETYPE_PEM, req)) os.close(destfd) return req
def make_csr(pkey, dest=None, cn=None): req = crypto.X509Req() req.get_subject() subj = req.get_subject() subj.C = def_country subj.ST = def_state subj.L = def_local subj.O = def_org subj.OU = def_ou if cn: subj.CN = cn else: subj.CN = utils.get_hostname() subj.emailAddress = 'root@%s' % subj.CN req.set_pubkey(pkey) req.sign(pkey, 'md5') if dest: destfd = os.open(dest, os.O_RDWR | os.O_CREAT, 0644) os.write(destfd, crypto.dump_certificate_request(crypto.FILETYPE_PEM, req)) os.close(destfd) return req
def __init__(self, conf_file=CERTMASTER_CONFIG): self.cfg = read_config(conf_file, CMConfig) usename = utils.get_hostname(talk_to_certmaster=False) mycn = '%s-CA-KEY' % usename self.ca_key_file = '%s/certmaster.key' % self.cfg.cadir self.ca_cert_file = '%s/certmaster.crt' % self.cfg.cadir self.logger = logger.Logger().logger self.audit_logger = logger.AuditLogger() # if ca_key_file exists and ca_cert_file is missing == minion only setup if os.path.exists(self.ca_key_file) and not os.path.exists(self.ca_cert_file): return try: if not os.path.exists(self.cfg.cadir): os.makedirs(self.cfg.cadir) if not os.path.exists(self.ca_key_file) and not os.path.exists(self.ca_cert_file): certs.create_ca(CN=mycn, ca_key_file=self.ca_key_file, ca_cert_file=self.ca_cert_file) except (IOError, OSError), e: print 'Cannot make certmaster certificate authority keys/certs, aborting: %s' % e sys.exit(1)
def parse_url(self, response): base = utils.get_hostname(response.url) # may want to add stop words for domains for bad redirects if 'dnsrsearch' in base: return if not base in data: data[base] = { 'breadcrumbs': [], 'partners': [], 'low_confidence': [] } # TODO: Create utils functions for the validation # TODO: Make selection more dynamic # check to see if current page has potential partners in plain text on_page_kws = ['members'] if utils.valid_partner_url(response.url, self.logger): for header in response.xpath('//h1/text()'): if any(kw in header.get().lower() for kw in on_page_kws): data[base]['low_confidence'] += [ item.get() for item in response.xpath('//ul//li/text()') ] # TODO: create xpath string generator this should really be # TODO: '//a[not(contains(@href, "youtube")) and not(contains(@href, "facebook"))]' # TODO: because the facebook, twitter, etc links are still being crawled just not visited # anything on page that matches <a> tags for link in response.xpath('//a'): # get the link name, get the link url link_url = link.xpath('@href').get() full_url = response.urljoin(link_url) # on the right path - keep going if (utils.valid_partner_url(link_url, self.logger) and # partner kw is in URL full_url not in data[base]['breadcrumbs'] and # haven't already been here utils.get_hostname(response.url) == utils.get_hostname(full_url)): # still on same site data[base]['breadcrumbs'].append(full_url) yield scrapy.Request(full_url, callback=self.parse_url) # not a partner match, but came from a partner page i.e mysite/partners -> yoursite.org elif utils.valid_partner_url(response.url): # get base name of partner partner_name = utils.get_hostname(full_url) # if the partner link is not in the list and make sure host names are not the same if partner_name not in data[base][ 'partners'] and utils.valid_partner( base, full_url, self.logger): data[base]['partners'].append(partner_name) # add template of partner info for secondary crawler partner_info[partner_name] = { 'url': partner_name, 'name': [], 'phone': [], 'address': [] }
def __init__(self): super(HadoopNameNode, self).__init__('hadoop', 'namenode', utils.get_hostname(), 50070)
def send_mail_to_admin(subject, content): utils.sendIp(utils.get_hostname() + ":" + subject, content, config.mail_from_usr, config.mail_from_usr_pw, config.mail_to_usr)
def __init__(self, *args, **kwargs): """ :param args[0] - host: the hostname of this CasperFpga """ if len(args) > 0: try: kwargs['host'] = args[0] kwargs['port'] = args[1] except IndexError: pass self.host, self.bitstream = get_hostname(**kwargs) # Need to check if any logger-based parameters have been spec'd self.getLogger = getLogger try: self.logger = kwargs['logger'] except KeyError: # Damn result, self.logger = self.getLogger(name=self.host) if not result: # Problem if self.logger.handlers: # Logger already exists warningmsg = 'Logger for {} already exists'.format( self.host) self.logger.warning(warningmsg) else: errmsg = 'Problem creating logger for {}'.format(self.host) raise ValueError(errmsg) # some transports, e.g. Skarab, need to know their parent kwargs['parent_fpga'] = self # Setup logger to be propagated through transports # either set log level manually or default to error try: self.set_log_level(log_level=kwargs['log_level']) except KeyError: self.set_log_level(log_level='ERROR') # was the transport specified? transport = get_kwarg('transport', kwargs) if transport: self.transport = transport(**kwargs) else: transport_class = self.choose_transport(self.host) self.transport = transport_class(**kwargs) # this is just for code introspection self.devices = None self.memory_devices = None self.adc_devices = None self.other_devices = None self.sbrams = None self.qdrs = None self.hmcs = None self.registers = None self.gbes = None self.snapshots = None self.system_info = None self.rcs_info = None # /just for introspection self._reset_device_info() self.logger.debug('%s: now a CasperFpga' % self.host) # The Red Pitaya doesn't respect network-endianness. It should. # For now, detect this board so that an endianness flip can be # inserted between the CasperFpga and the underlying transport layer. # We try detection again after programming, in case this fails here. try: self._detect_little_endianness() except: pass
class Builder: """ A generic build class. """ project = "vrayblender" version = utils.VERSION revision = utils.REVISION # Patches revision brev = None # Blender master revision commits = '0' # Directories dir_build = utils.path_join(os.getcwd(), "build") dir_install = utils.path_join(os.getcwd(), "install") dir_release = utils.path_join(os.getcwd(), "release") dir_source = "" dir_blender = "" dir_blender_svn = "" # Installation diractory name dir_install_name = "vrayblender" dir_install_path = utils.path_join(dir_install, dir_install_name) # Build archive for Mac and Linux # or NSIS installer for Windows generate_package = False generate_desktop = False generate_docs = False with_installer = 'NSIS' # Test mode - just print messages, does nothing mode_test = True # Special mode used only by me =) mode_developer = False # Debug output of the script mode_debug = False # Add V-Ray/Blender patches add_patches = True # Add V-Ray/Blender datafiles add_datafiles = True # Add patches from "extra" directory add_extra = False # Add themes from "themes" directory add_themes = False # Host info host_os = utils.get_host_os() host_arch = utils.get_host_architecture() host_name = utils.get_hostname() host_username = utils.get_username() host_linux = utils.get_linux_distribution() # Install dependencies install_deps = False build_deps = False use_build_deps = False # Update sources update_blender = True update_patch = True # Blender option use_debug = False use_openmp = True use_collada = False use_sys_python = True use_sys_ffmpeg = True # Build settings build_arch = host_arch build_threads = 4 build_optimize = False build_optimize_type = "INTEL" build_clean = False build_release = False build_upload = False checkout_revision = None use_env_msvc = False # user-config.py file path user_config = "" # Use user defined user-config.py user_user_config = "" # Max OS X specific osx_sdk = "10.6" with_cycles = False with_tracker = False with_cuda = False cuda_gpu = "sm_21" with_osl = False with_player = False with_ge = False use_proxy = None use_github_branch = None use_exp_branch = None use_blender_hash = None add_branch_name = None vb30 = None vc2013 = None # Only prepare sources export_only = None def __init__(self, params): if not params: sys.stdout.write("Params are empty - using defaults...\n") for param in params: setattr(self, param, params[param]) if self.mode_debug: for param in params: print("%s => %s" % (param, params[param])) print("") if not self.dir_source: sys.stderr.write("Fatal error!\n") sys.stderr.write("Source directory not specified!\n") sys.exit(2) if self.vb30: self.project += "3" elif self.use_github_branch == "dev/vray_for_blender/stable": self.project += "1" else: self.project += "2" def info(self): sys.stdout.write("\n") sys.stdout.write("Build information:\n") sys.stdout.write("OS: %s\n" % (self.host_os.title())) if self.host_os == utils.LNX: sys.stdout.write("Distribution: %s %s\n" % (self.host_linux["long_name"], self.host_linux["version"])) sys.stdout.write("Architecture: %s\n" % (self.host_arch)) sys.stdout.write("Build architecture: %s\n" % (self.build_arch)) sys.stdout.write("Target: %s %s (%s)\n" % (self.project, self.version, self.revision)) sys.stdout.write("Source directory: %s\n" % (self.dir_source)) sys.stdout.write("Build directory: %s\n" % (self.dir_build)) sys.stdout.write("Install directory: %s\n" % (self.dir_install_path)) sys.stdout.write("Release directory: %s\n" % (self.dir_release)) sys.stdout.write("\n") def update_sources(self): """ Getting/updating sources """ def exportSources(): sys.stdout.write("Exporting sources...\n") if self.mode_test: return if os.path.exists(self.dir_blender): utils.remove_directory(self.dir_blender) # Copy full tree to have proper build info. shutil.copytree(self.dir_blender_svn, self.dir_blender) os.chdir(self.dir_blender) os.system("git remote update github") os.system("git checkout -b {branch} github/{branch}".format(branch=self.use_github_branch)) if self.checkout_revision is not None: os.chdir(self.dir_blender) os.system("git checkout %s" % self.checkout_revision) # Update Blender sources if self.update_blender: if os.path.exists(self.dir_blender): sys.stdout.write("Removing exported sources...\n") if not self.mode_test: utils.remove_directory(self.dir_blender) if not os.path.exists(self.dir_blender_svn): sys.stdout.write("Obtaining Blender sources...\n") if not self.mode_test: os.chdir(self.dir_source) # Obtain sources os.system("git clone %s blender" % GITHUB_REPO) # Now set origin to Blender's git and additional github remote # This is needed for proper submodules init os.chdir(self.dir_blender) os.system("git remote set-url origin %s" % OFFICIAL_REPO) os.system("git remote add github %s" % GITHUB_REPO) os.system("git remote update") os.system("git pull --rebase") os.chdir(self.dir_blender) os.system("git submodule update --init --recursive") os.system("git submodule foreach git checkout master") os.system("git submodule foreach git pull --rebase origin master") os.chdir(self.dir_source) # Move "blender" to "blender-git" utils.move_directory(self.dir_blender, self.dir_blender_svn) else: sys.stdout.write("Updating Blender sources...\n") if not self.mode_test: os.chdir(self.dir_blender_svn) # Update sources os.system("git pull --rebase") os.system("git submodule foreach git pull --rebase origin master") exportSources() # Update Blender libs lib_dir = None svn_cmd = None if self.host_os != utils.LNX: if self.host_os == utils.WIN: lib_dir = utils.path_join(self.dir_source, "lib", "windows") svn_cmd = "svn checkout https://svn.blender.org/svnroot/bf-blender/trunk/lib/windows lib/windows" if self.host_arch == "x86_64": if self.vc2013: lib_dir = utils.path_join(self.dir_source, "lib", "win64_vc12") svn_cmd = "svn checkout https://svn.blender.org/svnroot/bf-blender/trunk/lib/win64_vc12 lib/win64_vc12" else: lib_dir = utils.path_join(self.dir_source, "lib", "win64") svn_cmd = "svn checkout https://svn.blender.org/svnroot/bf-blender/trunk/lib/win64 lib/win64" elif self.host_os == utils.MAC: lib_dir = utils.path_join(self.dir_source, "lib", "darwin-9.x.universal") svn_cmd = "svn checkout https://svn.blender.org/svnroot/bf-blender/trunk/lib/darwin-9.x.universal lib/darwin-9.x.universal" if not os.path.exists(lib_dir): sys.stdout.write("Getting \"lib\" data...\n") if not self.mode_test: os.chdir(self.dir_source) os.system(svn_cmd) else: sys.stdout.write("Updating \"lib\" data...\n") if not self.mode_test: os.chdir(lib_dir) os.system("svn update") # Update V-Ray/Blender patchset if self.update_patch and not self.mode_developer: vb25_patch = utils.path_join(self.dir_source, "vb25-patch") if os.path.exists(vb25_patch): sys.stdout.write("Updating V-Ray/Blender patches...\n") if not self.mode_test: os.chdir(vb25_patch) os.system("git pull") else: sys.stdout.write("Getting V-Ray/Blender patches...\n") if not self.mode_test: os.chdir(self.dir_source) os.system("git clone git://github.com/bdancer/vb25-patch.git") def update(self): self.revision, self.brev, self.commits = utils.get_svn_revision(self.dir_blender) self.version = utils.get_blender_version(self.dir_blender)[0] self.versionArr = utils.get_blender_version(self.dir_blender) if self.build_release: self.dir_install_name = utils.GetInstallDirName(self) else: self.dir_install_name = self.project self.dir_install_path = utils.path_join(self.dir_install, self.dir_install_name) def patch(self): patch_dir = utils.path_join(self.dir_source, "vb25-patch") if self.use_blender_hash: patchBin = utils.find_patch() patchFilepath = os.path.join(tempfile.gettempdir(), "vray_for_blender.patch") os.chdir(self.dir_blender) os.system("git checkout %s" % self.use_github_branch) # Checkout exporter branch os.system("git diff master > %s" % patchFilepath) # Generate diff with master os.system("git fetch --tags") # Hash could be tag also os.system("git checkout %s" % self.use_blender_hash) # Checkout needed revision os.system("git checkout -b vray_for_blender") # Create some branch for patching os.system("patch -Np1 -i %s" % patchFilepath) # Apply patch os.remove(patchFilepath) # Add datafiles: splash, default scene etc if self.add_datafiles: sys.stdout.write("Adding datafiles...\n") datafiles_path = utils.path_join(self.dir_blender, "release", "datafiles") if not self.mode_test: # Change splash for splash_filename in ["splash.png", "splash_2x.png"]: splash_path_src = utils.path_join(patch_dir, "datafiles", splash_filename) splash_path_dst = utils.path_join(datafiles_path, splash_filename) shutil.copyfile(splash_path_src, splash_path_dst) # Change icons for subdir in ["blender_icons16", "blender_icons32"]: icons_path_src = utils.path_join(patch_dir, "datafiles", subdir) icons_path_dst = utils.path_join(datafiles_path, subdir) shutil.rmtree(icons_path_dst) shutil.copytree(icons_path_src, icons_path_dst) def docs(self): if self.generate_docs: api_dir = utils.path_join(self.dir_install_path, "api") sys.stdout.write("Generating API documentation: %s\n" % (api_dir)) if self.host_os != utils.LNX: sys.stdout.write("API documentation generation is not supported on this platform.\n") else: if not self.mode_test: sphinx_doc_gen = "doc/python_api/sphinx_doc_gen.py" # Create API directory os.system("mkdir -p %s" % api_dir) # Generate API docs os.chdir(self.dir_blender) os.system("%s -b -P %s" % (utils.path_join(self.dir_install_path, "blender"), sphinx_doc_gen)) os.system("sphinx-build doc/python_api/sphinx-in %s" % api_dir) def post_init(self): """ Override this method in subclass. """ pass def init_paths(self): if self.generate_package: if not self.mode_test: utils.path_create(self.dir_release) self.dir_build = utils.path_slashify(self.dir_build) self.dir_source = utils.path_slashify(self.dir_source) self.dir_install_path = utils.path_slashify(self.dir_install_path) self.dir_blender = utils.path_join(self.dir_source, "blender") self.dir_blender_svn = utils.path_join(self.dir_source, "blender-git") self.user_config = utils.path_join(self.dir_blender, "user-config.py") if self.user_user_config: self.user_user_config = utils.pathExpand(self.user_user_config) if self.build_clean: if os.path.exists(self.dir_build): shutil.rmtree(self.dir_build) def config(self): """ Override this method in subclass. """ sys.stderr.write("Base class method called: config() This souldn't happen.\n") def compile(self): if self.host_os == utils.LNX and hasattr(self, 'compile_linux'): self.compile_linux() elif self.host_os == utils.MAC and hasattr(self, 'compile_osx'): self.compile_osx() else: compileCmd = [sys.executable] compileCmd.append("scons/scons.py") if not self.build_clean: compileCmd.append("--implicit-deps-unchanged") compileCmd.append("--max-drift=1") if self.host_os != utils.WIN: compileCmd.append('CXXFLAGS="-w"') compileCmd.append('CCFLAGS="-w"') if self.use_env_msvc: compileCmd.append(r'env="PATH:%PATH%,INCLUDE:%INCLUDE%,LIB:%LIB%"') if self.vc2013: compileCmd.append(r'MSVS_VERSION=12.0') cleanCmd = [sys.executable] cleanCmd.append("scons/scons.py") cleanCmd.append("clean") if not self.mode_test: os.chdir(self.dir_blender) if self.build_clean: sys.stdout.write("Calling: %s\n" % (" ".join(cleanCmd))) subprocess.call(cleanCmd) sys.stdout.write("Calling: %s\n" % (" ".join(compileCmd))) res = subprocess.call(compileCmd) if not res == 0: sys.stderr.write("There was an error during the compilation!\n") sys.exit(1) def compile_post(self): if self.host_os == utils.WIN: runtimeDir = utils.path_join(self.dir_source, "vb25-patch", "non-gpl", self.build_arch) files = [] if self.vc2013: files.extend([ "msvcp120.dll", "msvcr120.dll", "vcomp120.dll", ]) else: files.append("vcomp90.dll") for f in files: shutil.copy(utils.path_join(runtimeDir, f), self.dir_install_path) def exporter(self): """ Add script and modules """ scriptsPath = utils.path_join(self.dir_install, self.dir_install_name, self.version, "scripts") if self.host_os == utils.MAC: scriptsPath = utils.path_join(self.dir_install, self.dir_install_name, "blender.app", "Contents", "Resources", self.version, "scripts") addonsPath = utils.path_join(scriptsPath, "addons") startupPath = utils.path_join(scriptsPath, "startup") clonePath = addonsPath if self.vb30 else startupPath sys.stdout.write("Adding exporter...\n") sys.stdout.write(" in: %s\n" % clonePath) if not self.mode_test: if not os.path.exists(clonePath): sys.stderr.write("Something went wrong! Can't add Python modules and exporter!\n") sys.exit(3) if self.vb30: os.chdir(clonePath) exporterPath = utils.path_join(clonePath, "vb30") if os.path.exists(exporterPath): utils.remove_directory(exporterPath) os.system("git clone --recursive https://github.com/bdancer/vb30.git") else: os.chdir(clonePath) exporterPath = utils.path_join(clonePath, "vb25") if os.path.exists(exporterPath): utils.remove_directory(exporterPath) os.system("git clone --recursive https://github.com/bdancer/vb25.git") if self.use_exp_branch not in {'master'}: os.chdir(exporterPath) os.system("git remote update") os.system("git checkout -b {branch} origin/{branch}".format(branch=self.use_exp_branch)) os.chdir(exporterPath) os.system("git submodule update --init --recursive") os.system("git submodule foreach git checkout master") os.system("git submodule foreach git pull --rebase origin master") def package(self): """ Override this method in subclass. """ sys.stderr.write("Base class method called: package() This souldn't happen.\n") def build(self): self.init_paths() self.post_init() self.update_sources() self.update() self.info() self.patch() if not self.export_only: self.config() self.compile() self.compile_post() if not self.mode_developer: self.exporter() self.docs() if self.generate_package: if self.mode_developer: sys.stdout.write("Package generation is disabled in 'Developer' mode.\n") else: if self.build_release: releaeSubdir, releasePackage = self.package() if self.build_upload != 'off': self.upload(releaeSubdir, releasePackage) else: sys.stdout.write("Package generation is disabled in non-release mode.\n") def upload(self, subdir, filepath): if self.build_upload == 'http': import requests from ConfigParser import RawConfigParser config = RawConfigParser() config.read(os.path.expanduser("~/.passwd")) data = { "password" : config.get('cgdo.ru', 'upload_password'), "subdir" : subdir, } files = { "file" : open(filepath, "rb"), } proxies = {} if self.use_proxy: proxies = { "http" : self.use_proxy, "https" : self.use_proxy, } sys.stdout.write("Uploading package '%s' to '%s'...\n" % (filepath, subdir)) requests.post("http://cgdo.ru/upload", files=files, data=data, proxies=proxies) elif self.build_upload == 'ftp': from ConfigParser import RawConfigParser config = RawConfigParser() config.read(os.path.expanduser("~/.passwd")) now = datetime.datetime.now() subdir = now.strftime("%Y%m%d") cmd = None if sys.platform == 'win32': ftpScriptFilepath = os.path.join(tempfile.gettempdir(), "blender_for_vray_upload.txt") with open(ftpScriptFilepath, 'w') as f: f.write('option batch abort\n') f.write('option confirm off\n') f.write('open ftp://%s:%s@%s -rawsettings ProxyMethod=%s ProxyHost=%s ProxyPort=%s\n' % ( config.get('nightlies.ftp', 'user'), config.get('nightlies.ftp', 'pass'), config.get('nightlies.ftp', 'host'), config.get('nightlies.ftp', 'proxy_type'), config.get('nightlies.ftp', 'proxy_host'), config.get('nightlies.ftp', 'proxy_port'), )) f.write('option transfer binary\n') f.write('put %s /%s/\n' % (filepath, subdir)) f.write('exit\n') f.write('\n') cmd = ['winscp'] cmd.append('/passive') cmd.append('/script="%s"' % ftpScriptFilepath) if not self.mode_test: os.system(' '.join(cmd)) else: cmd = ['curl'] cmd.append('--no-epsv') if self.use_proxy: cmd.append('--proxy') cmd.append(self.use_proxy) cmd.append('--user') cmd.append('%s:%s' % ( config.get('nightlies.ftp', 'user'), config.get('nightlies.ftp', 'pass'), )) cmd.append('--upload-file') cmd.append(filepath) cmd.append('ftp://%s/%s/' % ( config.get('nightlies.ftp', 'host'), subdir, )) if not self.mode_test: subprocess.call(cmd) if self.mode_test: print(' '.join(cmd))
def process_html(self, msg): if avoid_captive_portal(msg): return # Check if the requested URL is already in the database req = msg.flow.request url = "{}://{}{}".format(req.get_scheme(), "".join(req.headers["host"]), req.path) db = sqlite3.connect("db/blackout.db") db.row_factory = sqlite3.Row cursor = db.cursor() cursor.execute('''SELECT * FROM resources WHERE url=?''', (url,)) resource = cursor.fetchone() client_ip = msg.flow.client_conn.address.address[0] if resource is None: # If not, add the URL to the database with current timestamp # and pass the page on as usual hostname = get_hostname(client_ip, global_config["router_IPs"]["blackout"]) or client_ip cursor.execute('''INSERT INTO resources(url, last_accessed, life_remaining, accessed_by) VALUES(?, ?, 0, ?)''', (url, int(time()), hostname)) db.commit() else: # If yes in the database, was it accessed within the past 24 hours? now = int(time()) then = resource["last_accessed"] accessed_by = resource["accessed_by"] blackout_time = 86400 #86400 # 24 hours in seconds if now - then > blackout_time: # If not accessed in 24 hours, update the database with the current timestamp # and pass the page on as usual hostname = get_hostname(client_ip, global_config["router_IPs"]["blackout"]) or client_ip cursor.execute('''UPDATE resources SET last_accessed = ?, accessed_by = ? WHERE url = ?''', (now, hostname, url)) db.commit() else: # If it was accessed in past 24 hours, display the blackout page with info then_date = datetime.fromtimestamp(then) now_date = datetime.fromtimestamp(now) available_date = then_date + timedelta(0, blackout_time) time_diff = now_date - then_date available_diff = available_date - now_date # Figure natural description for last access day accessed_day = "" if time_diff.days == 0: accessed_day = "today" elif time_diff.days == 1: accessed_day = "yesterday" else: accessed_day = "{} days ago".format(time_diff.days) # Figure natural description for day page will be accessible again available_day = "" if available_diff.days == 0: available_day = "today" elif available_diff.days == 1: available_day = "tomorrow" else: available_day = "in {} days".format(available_diff.days) blackout_diff = available_date - now_date minutes,seconds = divmod(blackout_diff.total_seconds(), 60) hours, minutes = divmod(minutes, 60) then_string = "{} at {}".format(accessed_day, then_date.strftime("%H:%M")) template = template_env.get_template("blackout/notavailable.html") msg.content = template.render(url=url, access_time = then_string, hours=int(hours), minutes=int(minutes), seconds=int(seconds), accessed_by=accessed_by) # Force unicode msg.content = msg.content.encode("utf-8") msg.headers["content-type"] = ["{}; charset=utf-8".format(msg.headers["content-type"][0])] # Force uncompressed response msg.headers["content-encoding"] = [""] # Don't cache msg.headers["Pragma"] = ["no-cache"] msg.headers["Cache-Control"] = ["no-cache, no-store"] # Allow any script del(msg.headers["content-security-policy"]) db.close()
from models.UNet3D import UNet3D from models.UNet2D1D import UNet2D1D from models.UNet2D2D import UNet2D2D from models.UNet3D_old import UNet3D_old from utils import send_email from utils import write_to_h5 from utils import get_hostname from utils import extract_images from utils import load_training_settings from loss.custom_loss import * from metrics.custom_metrics import * host_name = get_hostname() NETWORK_TYPES = {'UNet3D': UNet3D, 'UNet2D1D': UNet2D1D, 'UNet3D_old': UNet3D_old, 'UNet2D2D': UNet2D2D} OPTIMIZER_TYPES = {'Adam': tf.keras.optimizers.Adam, 'RMSprop': tf.keras.optimizers.RMSprop} LOSS_TYPES = {'ssim_loss': ssim_loss, 'psnr_loss': psnr_loss} METRICS_TYPES = {'ssim': ssim, 'psnr': psnr} SEND_SUMMARY = True tasks = load_training_settings() no_of_tasks = len(tasks) if SEND_SUMMARY: send_email('The task on {} has started, total tasks: {}.'.format(host_name, no_of_tasks), 'Details of the training are in the attachment ', files=['./config/training_config.yaml']) for index, task in enumerate(tasks):
import os import sys import argparse import subprocess import utils import config from argconfig.argconfig import argconfig as argcfg __all__ = ['submit'] argcfg.add_path( os.path.dirname( os.path.realpath(__file__) ) ) # add $HOME/.submit path to config.ini searching paths argcfg.add_path( os.path.join( os.path.expanduser('~'), '.submit') ) argcfg.set_section( ['default', utils.get_hostname()] ) _cores = open('/proc/cpuinfo').read().count('processor') # default options default = { # resources settings 'nodes' : argcfg(dest='nodes').get_default(1), 'cores' : argcfg(dest='cores').get_default(_cores), 'mpis' : argcfg(dest='mpis').get_default(1), 'gpus' : argcfg(dest='gpus').get_default(), # cluser settings 'queue' : argcfg(dest='queue').get_default(), 'group' : argcfg(dest='group').get_default(), 'account' : argcfg(dest='account').get_default(), # walltime settings
def namer(): return '.%s/.at/.%s/.%s'%\ (get_login(), get_hostname(), sys._getframe().f_code.co_name)
def hostname(): return utils.get_hostname()
def __init__(self): super(HadoopYarn, self).__init__('hadoop', 'resourcemanager', utils.get_hostname(), 8088)
def __init__(self, *args, **kwargs): """ :param args[0] - host: the hostname of this CasperFpga :return: <nothing> """ if len(args) > 0: try: kwargs['host'] = args[0] kwargs['port'] = args[1] except IndexError: pass self.host, self.bitstream = get_hostname(**kwargs) # Need to check if any logger-based parameters have been spec'd self.getLogger = getLogger try: self.logger = kwargs['logger'] except KeyError: # Damn result, self.logger = self.getLogger(name=self.host) if not result: # Problem if self.logger.handlers: # Logger already exists warningmsg = 'Logger for {} already exists'.format(self.host) self.logger.warning(warningmsg) else: errmsg = 'Problem creating logger for {}'.format(self.host) raise ValueError(errmsg) # some transports, e.g. Skarab, need to know their parent kwargs['parent_fpga'] = self # Setup logger to be propagated through transports self.logger.setLevel(logging.NOTSET) # define a custom log level between DEBUG and INFO # PDEBUG = 15 # logging.addLevelName(PDEBUG, "PDEBUG") # # self.logger.pdebug = pdebug kwargs['logger'] = self.logger # was the transport specified? transport = get_kwarg('transport', kwargs) if transport: self.transport = transport(**kwargs) else: transport_class = self.choose_transport(self.host) self.transport = transport_class(**kwargs) # this is just for code introspection self.devices = None self.memory_devices = None self.other_devices = None self.sbrams = None self.qdrs = None self.registers = None self.gbes = None self.snapshots = None self.system_info = None self.rcs_info = None # /just for introspection self._reset_device_info() self.logger.debug('%s: now a CasperFpga' % self.host) # Set log level to ERROR self.logger.setLevel(logging.ERROR)
time.sleep(0.1) if settings.has_display: display.display_image_file("images/yrl028-white.pbm") time.sleep(0.2) if settings.has_display: display.display_image_file("images/apihat-black.pbm") time.sleep(0.1) if settings.has_display: display.display_image_file("images/apihat-white.pbm") time.sleep(0.8) #Detect if switch attached; if not, display IP address, else start demo threads has_switch = switch.detect() if not has_switch: #Display the IP address [and optionally hostname] on the display if settings.has_display: if (settings.SHOW_HOSTNAME): display.two_line_text_wrapped(utils.get_hostname(), utils.get_ip()) else: display.two_line_text_wrapped("IP Address:", utils.get_ip()) time.sleep(1) demo_mode_enabled = settings.ENABLE_DEMO_MODE stats_mode_enabled = settings.ENABLE_STATS_MODE autosense_mode_enabled = settings.ENABLE_AUTOSENSE_MODE settings.sensor_list = sensors.detect_sensors() write_headers() if has_switch: demo.setup_demo() demo.start_demo_threads() handler_running = True
sa, cw = get_sa_cw() def save_ts_ip(ip): ts = str_tstamp(fmt='%Y-%m-%dT%H:%M:%S') ofname = '%s/%s' % (get_homedir(), namer()) rec = '%s %s' % (ts, ip) save_textfile(rec, ofname, mode='w', verb=False) return rec def add_record_to_log(rec): save_textfile(rec, logname(), mode='a', verb=False) #------------------------------ if __name__ == "__main__": print('get_enviroment("PWD") : %s' % get_enviroment(env='PWD')) print('get_homedir() : %s' % get_homedir()) print('get_login() : %s' % get_login()) print('get_hostname() : %s' % get_hostname()) print('get_cwd() : %s' % get_cwd()) print('str_tstamp() : %s' % str_tstamp(fmt='%Y-%m-%dT%H:%M')) print('namer() : %s' % namer()) print('cwname() : %s' % cwname()) sys.exit('END OF TEST') #------------------------------
#!/usr/bin/python import sys import config import utils import os import shutil hostname = utils.get_hostname() BASE_FOLDER = config.hosts[hostname]['base_folder'] TRANSFER_BASE_FOLDER = config.transfer['transfer_base_folder'] TRANSFER_FOLDER = os.path.join(BASE_FOLDER,TRANSFER_BASE_FOLDER) hosts_completed = [ os.path.join(host + '.completed') for host in config.hosts.keys()] #print hosts_completed for folder in os.listdir(TRANSFER_FOLDER): abs_folder = os.path.join(TRANSFER_FOLDER,folder) if not os.path.isdir(abs_folder): continue all_exists = True for host_completed in hosts_completed: abs_host_copmleted = os.path.join(abs_folder,host_completed) if not os.path.exists(abs_host_copmleted): all_exists = False if all_exists: print "delete %s" % abs_host_copmleted
#| MY_IP [global constant] #| #| The IP address, on the sensor net's wireless network, #| of the host that this server is running on. #| #| NOTE: This is really only needed when the server has #| more than one active network interface card, each with #| its own IP address. Otherwise, the utils.get_my_ip() #| function can determine the IP address satisfactorily. #| #|vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv # utils.get_my_ip() would work fine in this case (the machine has only 1 NIC) # but this assignment is still here as a piece of legacy code if utils.get_hostname() == 'COSMICi': # Dell Precision T3400 on Mike's desk in APCR-DRDL lab. MY_IP = "192.168.0.2" # The static private IP address that is assigned to the central # server node in our wireless router's DHCP config. #elif utils.get_hostname() == 'Linux-PC': # This was the Acer, but it's now no longer # MY_IP = "192.168.0.4" # in use as a server. # The below is commented out because get_my_ip() works fine on this machine instead. #elif utils.get_hostname() == 'Theo': # Mike's home office desktop. #MY_IP = '192.168.0.102' # This is Theo's IP address when using my router at home. else: MY_IP = utils.get_my_ip() # Would this work in the above cases too? Need to test. #|================================================================= #|
#| MY_IP [global constant] #| #| The IP address, on the sensor net's wireless network, #| of the host that this server is running on. #| #| NOTE: This is really only needed when the server has #| more than one active network interface card, each with #| its own IP address. Otherwise, the utils.get_my_ip() #| function can determine the IP address satisfactorily. #| #|vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv # utils.get_my_ip() would work fine in this case (the machine has only 1 NIC) # but this assignment is still here as a piece of legacy code if utils.get_hostname( ) == 'COSMICi': # Dell Precision T3400 on Mike's desk in APCR-DRDL lab. MY_IP = "192.168.0.2" # The static private IP address that is assigned to the central # server node in our wireless router's DHCP config. #elif utils.get_hostname() == 'Linux-PC': # This was the Acer, but it's now no longer # MY_IP = "192.168.0.4" # in use as a server. # The below is commented out because get_my_ip() works fine on this machine instead. #elif utils.get_hostname() == 'Theo': # Mike's home office desktop. #MY_IP = '192.168.0.102' # This is Theo's IP address when using my router at home. else: MY_IP = utils.get_my_ip( ) # Would this work in the above cases too? Need to test. #|=================================================================
def main(plot=True): if args.means != '': means = [float(x.strip()) for x in args.means.strip('[').strip(']').split(',')] else: means = [] if args.stds != '': stds = [float(x.strip()) for x in args.stds.strip('[').strip(']').split(',')] else: stds = [] ## hostname = utils.get_hostname() ''' cuda ''' use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print(f'device = {device}') ''' ''' store_net = True other_stats = dict({'sj':sj,'satid':satid,'hostname':hostname,'label_corrupt_prob':args.label_corrupt_prob}) ''' reproducibility setup/params''' #num_workers = 2 # how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. githash = subprocess.check_output(["git", "describe", "--always"]).strip() seed = args.seed if seed is None: # if seed is None it has not been set, so get a random seed, else use the seed that was set seed = int.from_bytes(os.urandom(7), byteorder="big") print(f'seed: {seed}') ## SET SEED/determinism num_workers = 3 torch.manual_seed(seed) #torch.backends.cudnn.deterministic=True ''' date parameters setup''' today_obj = date.today() # contains datetime.date(year, month, day); accessible via .day etc day = today_obj.day month = calendar.month_name[today_obj.month] setup_time = time.time() ''' filenames ''' ## folder names results_root = './test_runs_flatness5_ProperOriginalExpt' expt_folder = f'flatness_{month}_label_corrupt_prob_{args.label_corrupt_prob}_exptlabel_{args.exptlabel}_' \ f'only_1st_layer_BIAS_{args.only_1st_layer_bias}_data_set_{args.data_set}_reg_param_{args.reg_param}' ## filenames matlab_file_name = f'flatness_{day}_{month}_sj_{sj}_staid_{satid}_seed_{seed}_{hostname}' net_file_name = f'net_{day}_{month}_sj_{sj}_staid_{satid}_seed_{seed}_{hostname}' ## folder to hold all nets all_nets_folder = f'nets_folder_{day}_{month}_sj_{sj}_staid_{satid}_seed_{seed}_{hostname}' ## experiment path expt_path = os.path.join(results_root,expt_folder) ''' data set ''' data_path = './data' standardize = not args.dont_standardize_data # x - mu / std , [-1,+1] trainset, testset, classes = data_class.get_data_processors(data_path,args.label_corrupt_prob,dataset_type=args.data_set,standardize=standardize,type_standardize=args.type_standardize) ''' experiment params ''' evalaute_mdl_data_set = get_function_evaluation_from_name(args.evalaute_mdl_data_set) suffle_test = False shuffle_train = True nb_epochs = 4 if args.epochs is None else args.epochs batch_size = 256 #batch_size_train,batch_size_test = batch_size,batch_size batch_size_train = batch_size batch_size_test = 256 ''' get NN ''' nets = [] mdl = args.mdl do_bn = args.use_bn other_stats = dict({'mdl':mdl,'do_bn':do_bn, 'type_standardize':args.type_standardize},**other_stats) print(f'model = {mdl}') if mdl == 'cifar_10_tutorial_net': suffle_test = False net = nn_mdls.Net() nets.append(net) elif mdl == 'debug': suffle_test = False nb_conv_layers=1 ## conv params Fs = [3]*nb_conv_layers Ks = [2]*nb_conv_layers ## fc params FC = len(classes) C,H,W = 3,32,32 net = nn_mdls.LiaoNet(C,H,W,Fs,Ks,FC,do_bn) nets.append(net) elif mdl == 'sequential': batch_size_train = 256 batch_size_test = 256 ## batch_size = batch_size_train suffle_test = False ## FC = [10,10] C,H,W = 3, 32, 32 # net = torch.nn.Sequential(OrderedDict([ # ('Flatten',Flatten()), # ('FC1', torch.nn.Linear(C*H*W,FC[0])), # ('FC2', torch.nn.Linear(FC[0],FC[1])) # ])) # net = torch.nn.Sequential(OrderedDict([ # ('Flatten',Flatten()), # ('FC1', torch.nn.Linear(C*H*W,FC[0])), # ('relu1', torch.nn.ReLU()), # ('FC2', torch.nn.Linear(FC[0],FC[1])) # ])) net = torch.nn.Sequential(OrderedDict([ ('conv0', torch.nn.Conv2d(3,420,5,bias=True)), ('relu0', torch.nn.ReLU()), ('conv1', torch.nn.Conv2d(420,50,5, bias=True)), ('relu1', torch.nn.ReLU()), ('Flatten',Flatten()), ('FC1', torch.nn.Linear(28800,50,bias=True)), ('relu2', torch.nn.ReLU()), ('FC2', torch.nn.Linear(50, 10, bias=True)) ])) ## nets.append(net) elif mdl == 'BoixNet': batch_size_train = 256 batch_size_test = 256 ## batch_size = batch_size_train suffle_test = False ## conv params nb_filters1,nb_filters2 = 32, 32 nb_filters1, nb_filters2 = 32, 32 kernel_size1,kernel_size2 = 5,5 ## fc params nb_units_fc1,nb_units_fc2,nb_units_fc3 = 512,256,len(classes) C,H,W = 3,32,32 net = nn_mdls.BoixNet(C,H,W,nb_filters1,nb_filters2, kernel_size1,kernel_size2, nb_units_fc1,nb_units_fc2,nb_units_fc3,do_bn) nets.append(net) elif mdl == 'LiaoNet': suffle_test = False nb_conv_layers=5 ## conv params Fs = [32]*nb_conv_layers Ks = [10]*nb_conv_layers ## fc params FC = len(classes) C,H,W = 3,32,32 net = nn_mdls.LiaoNet(C,H,W,Fs,Ks,FC,do_bn) nets.append(net) elif mdl == 'GBoixNet': #batch_size_train = 16384 # 2**14 #batch_size_test = 16384 batch_size_train = 2**10 batch_size_test = 2**10 ## batch_size = batch_size_train suffle_test = False ## conv params nb_conv_layers=2 Fs = [34]*nb_conv_layers Ks = [5]*nb_conv_layers #nb_conv_layers = 4 #Fs = [60] * nb_conv_layers #Ks = [5] * nb_conv_layers ## fc params FCs = [len(classes)] ## print(f'------> FCs = {FCs}') if args.data_set == 'mnist': CHW = (1, 28, 28) else: CHW = (3,32,32) net = nn_mdls.GBoixNet(CHW,Fs,Ks,FCs,do_bn,only_1st_layer_bias=args.only_1st_layer_bias) print(f'net = {net}') ## if len(means) != 0 and len(stds) != 0: params = net.named_parameters() dict_params = dict(params) i = 0 for name, param in dict_params.items(): if name in dict_params: print(name) if name != 'conv0.bias': mu,s = means[i], stds[i] param.data.normal_(mean=mu,std=s) i+=1 ## expt_path = f'{expt_path}_means_{args.means}_stds_{args.stds}' other_stats = dict({'means': means, 'stds': stds}, **other_stats) ## nets.append(net) other_stats = dict({'only_1st_layer_bias': args.only_1st_layer_bias}, **other_stats) elif mdl == 'AllConvNetStefOe': #batch_size_train = 16384 # 2**14 #batch_size_test = 16384 #batch_size_train = 2**10 # batch_size_train = 2**10 # batch_size_test = 2**10 batch_size_train = 32 batch_size_test = 124 ## batch_size = batch_size_train suffle_test = False ## AllConvNet only_1st_layer_bias = args.only_1st_layer_bias CHW = (3,32,32) dropout = args.use_dropout net = nn_mdls.AllConvNetStefOe(nc=len(CHW),dropout=dropout,only_1st_layer_bias=only_1st_layer_bias) ## nets.append(net) other_stats = dict({'only_1st_layer_bias': args.only_1st_layer_bias,'dropout':dropout}, **other_stats) expt_path = f'{expt_path}_dropout_{dropout}' elif mdl == 'AndyNet': #batch_size_train = 16384 # 2**14 #batch_size_test = 16384 #batch_size_train = 2**10 batch_size_train = 2**10 batch_size_test = 2**10 # batch_size_train = 32 # batch_size_test = 124 ## batch_size = batch_size_train suffle_test = False ## AndyNet #only_1st_layer_bias = args.only_1st_layer_bias ## TODO fix only_1st_layer_bias = args.only_1st_layer_bias CHW = (3,32,32) net = nn_mdls.get_AndyNet() ## nets.append(net) other_stats = dict({'only_1st_layer_bias': args.only_1st_layer_bias}, **other_stats) expt_path = f'{expt_path}' elif mdl == 'interpolate': suffle_test = True batch_size = 2**10 batch_size_train, batch_size_test = batch_size, batch_size iterations = inf # controls how many epochs to stop before returning the data set error #iterations = 1 # controls how many epochs to stop before returning the data set error ''' ''' path_nl = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_NL_polestar/net_27_April_sj_343_staid_1_seed_56134200848018679') path_rl_nl = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_RLNL_polestar/net_27_April_sj_345_staid_1_seed_57700439347820897') ''' restore nets''' net_nl = utils.restore_entire_mdl(path_nl) net_rlnl = utils.restore_entire_mdl(path_rl_nl) nets.append(net_nl) nets.append(net_rlnl) elif mdl == 'radius_flatness': suffle_test = True batch_size = 2**10 batch_size_train, batch_size_test = batch_size, batch_size iterations = 11 # controls how many epochs to stop before returning the data set error #iterations = inf # controls how many epochs to stop before returning the data set error other_stats = dict({'iterations':iterations},**other_stats) ''' load net ''' if args.net_name == 'NL': #path = os.path.join(results_root,'flatness_28_March_label_corrupt_prob_0.0_exptlabel_BoixNet_polestar_300_stand_natural_labels/net_28_March_206') path = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_NL_polestar/net_27_April_sj_343_staid_1_seed_56134200848018679') else: # RLNL #path = os.path.join(results_root,'flatness_28_March_label_corrupt_prob_0.0_exptlabel_re_train_RLBoixNet_noBN_polestar_150/net_28_March_18') path = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_RLNL_polestar/net_27_April_sj_345_staid_1_seed_57700439347820897') ''' restore nets''' net = utils.restore_entire_mdl(path) nets.append(net) store_net = False elif mdl == 'sharpness': suffle_test=False #doesn't matter ''' load net ''' if args.net_name == 'NL': #path = os.path.join(results_root,'flatness_28_March_label_corrupt_prob_0.0_exptlabel_BoixNet_polestar_300_stand_natural_labels/net_28_March_206') path = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_NL_polestar/net_27_April_sj_343_staid_1_seed_56134200848018679') path_adverserial_data = os.path.join('./data/sharpness_data_NL/','sdata_NL_net_27_April_sj_343_staid_1_seed_56134200848018679.npz') else: # RLNL #path = os.path.join(results_root,'flatness_28_March_label_corrupt_prob_0.0_exptlabel_re_train_RLBoixNet_noBN_polestar_150/net_28_March_18') path = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_RLNL_polestar/net_27_April_sj_345_staid_1_seed_57700439347820897') path_adverserial_data = os.path.join('./data/sharpness_data_RLNL/','sdata_RLNL_net_27_April_sj_345_staid_1_seed_57700439347820897.npz') ''' restore nets''' net = torch.load(path) nets.append(net) store_net = False elif mdl == 'divide_constant': ''' ''' # both false because we want low variation on the output of the error iterations = inf # controls how many epochs to stop before returning the data set error #iterations = 11 # controls how many epochs to stop before returning the data set error batch_size = 2**10 batch_size_train, batch_size_test = batch_size, batch_size shuffle_train = True suffle_test = False ''' load net ''' ## NL #path_nl = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_NL_polestar/net_27_April_sj_343_staid_1_seed_56134200848018679') #path_nl = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_SGD_ManyRuns_Momentum0.9/net_17_May_sj_641_staid_5_seed_31866864409272026_polestar-old') path_nl = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_MovieNL_lr_0.01_momentum_0.9/net_22_May_sj_1168_staid_1_seed_59937023958974481_polestar-old_epoch_173') ## RLNL #path_rlnl = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_RLNL_polestar/net_27_April_sj_345_staid_1_seed_57700439347820897') path_rlnl = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_MovieRLNLmdls_label_corruption0.5_lr_0.01_momentum_0.9/net_22_May_sj_1172_staid_1_seed_38150714758131256_polestar-old_epoch_148') ## net_nl = torch.load(path_nl) net_rlnl = torch.load(path_rlnl) ''' ''' print('NL') l2_norm_all_params(net_nl) print('RLNL') l2_norm_all_params(net_rlnl) ''' modify nets ''' W_nl = 1 W_rlnl = (get_norm(net_rlnl, l=2)/get_norm(net_nl, l=2)) # 2.284937620162964 W_rlnl = (10)**(1.0/3.0) #W_rlnl = 1/0.57775 #W_rlnl = 1/0.7185 #W_rlnl = 1/0.85925 #W_rlnl = 1 print(f'W_rlnl = {W_rlnl}') print(f'norm of weight BEFORE division: get_norm(net_nl,l=2)={get_norm(net_nl,l=2)}, get_norm(net_rlnl,l=2)={get_norm(net_rlnl,l=2)}') #net_nl = divide_params_by(W_nl, net_nl) #net_rlnl = divide_params_by(W_rlnl, net_rlnl) net_rlnl = divide_params_by_taking_bias_into_account(W=W_rlnl,net=net_rlnl) print(f'norm of weight AFTER division: get_norm(net_nl,l=2)={get_norm(net_nl,l=2)}, get_norm(net_rlnl,l=2)={get_norm(net_rlnl,l=2)}') nets.append(net_nl) nets.append(net_rlnl) other_stats = dict({'W_rlnl':W_rlnl,'W_nl':W_nl}) elif mdl == 'load_nl_and_rlnl': ''' load net ''' # NL #path = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_NL_polestar/net_27_April_sj_343_staid_1_seed_56134200848018679') path = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_MovieNL_lr_0.01_momentum_0.9/net_22_May_sj_1168_staid_1_seed_59937023958974481_polestar-old_epoch_173') net = torch.load(path) nets.append(net) # RLNL #path_rlnl = os.path.join(results_root,'flatness_27_April_label_corrupt_prob_0.0_exptlabel_GB_24_24_10_2C1FC_momentum_RLNL_polestar/net_27_April_sj_345_staid_1_seed_57700439347820897') path_rlnl = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_MovieRLNLmdls_label_corruption0.5_lr_0.01_momentum_0.9/net_22_May_sj_1172_staid_1_seed_38150714758131256_polestar-old_epoch_148') net_rlnl = torch.load(path_rlnl) nets.append(net_rlnl) other_stats = dict({'path': path, 'path_rlnl': path_rlnl}, **other_stats) elif mdl == 'load_one_net': # path = os.path.join(results_root, '/') ''' load net ''' ## 0.0001 path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0001_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_974_staid_1_seed_44940314088747654_polestar-old') ## 0.001 path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.001_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_967_staid_1_seed_1986409594254668_polestar-old') ## 0.01 path = os.path.join(results_root, 'flatness_June_label_corrupt_prob_0.01_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_976_staid_1_seed_34669758900780265_polestar-old') ## 0.1 path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.1_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_977_staid_1_seed_57003505407221650_polestar-old') ## 0.2 path = os.path.join(results_root, 'flatness_June_label_corrupt_prob_0.2_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_978_staid_1_seed_63479113068450657_polestar-old') ## 0.5 path = os.path.join(results_root, 'flatness_June_label_corrupt_prob_0.5_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_979_staid_1_seed_51183371945505111_polestar-old') ## 0.75 path = os.path.join(results_root, 'flatness_June_label_corrupt_prob_0.75_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_980_staid_1_seed_63292262317939652_polestar-old') ## 1.0 path = os.path.join(results_root, 'flatness_June_label_corrupt_prob_1.0_exptlabel_RLInits_only_1st_layer_BIAS_True_batch_size_train_1024_lr_0.01_momentum_0.9_scheduler_milestones_200,250,300_gamma_1.0/net_21_June_sj_981_staid_1_seed_34295360820373818_polestar-old') ''' load net ''' net = torch.load(path) nets.append(net) other_stats = dict({'path': path}, **other_stats) elif mdl == 'l2_norm_all_params': ''' load net ''' # path = os.path.join(results_root,'flatness_June_label_corrupt_sqprob_0.0_exptlabel_WeightDecay_lambda100_lr_0.1_momentum_0.0/net_1_June_sj_2833_staid_2_seed_45828051420330772_polestar-old') # path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda1_lr_0.1_momentum_0.0/net_1_June_sj_2830_staid_1_seed_53714812690274511_polestar-old') # path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda0.1_lr_0.1_momentum_0.0/net_1_June_sj_2835_staid_2_seed_66755608399194708_polestar-old') # path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda0.01_lr_0.1_momentum_0.0/net_1_June_sj_2832_staid_1_seed_47715620118836168_polestar-old') #path = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda0.1_lr_0.01_momentum_0.9/net_31_May_sj_2784_staid_1_seed_59165331201064855_polestar-old') #path = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda0.01_lr_0.01_momentum_0.9/net_31_May_sj_2792_staid_1_seed_42391375291583068_polestar-old') #path = os.path.join(results_root,'flatness_May_label_corrupt_prob_0.0_exptlabel_WeightDecay_lambda0.001_lr_0.01_momentum_0.9/net_31_May_sj_2793_staid_2_seed_47559284752010338_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda1_lr_0.1_momentum_0.0/net_1_June_sj_2841_staid_2_seed_29441453139027048_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda0.1_lr_0.1_momentum_0.0/net_1_June_sj_2839_staid_2_seed_35447208985369634_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda0.01_lr_0.1_momentum_0.0/net_1_June_sj_2837_staid_2_seed_57556488720733908_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda0.001_lr_0.1_momentum_0.0/net_1_June_sj_2848_staid_1_seed_48943421305461120_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda0.0001_lr_0.1_momentum_0.0/net_1_June_sj_2850_staid_1_seed_2881772832480048_polestar-old') #path = os.path.join(results_root,'flatness_June_label_corrupt_prob_0.0_exptlabel_L2_squared_lambda0.00001_lr_0.1_momentum_0.0/net_1_June_sj_2852_staid_1_seed_24293440492629928_polestar-old') print(f'path = {path}') net = torch.load(path) ''' l2_norm_all_params ''' l2_norm_all_params(net) ''' evaluate data set ''' standardize = not args.dont_standardize_data # x - mu / std , [-1,+1] error_criterion = metrics.error_criterion criterion = torch.nn.CrossEntropyLoss() trainset, testset, classes = data_class.get_data_processors(data_path, args.label_corrupt_prob,dataset_type=args.data_set,standardize=standardize) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=shuffle_train,num_workers=num_workers) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_test, shuffle=suffle_test,num_workers=num_workers) train_loss_epoch, train_error_epoch = evalaute_mdl_data_set(criterion, error_criterion, net,trainloader,device) test_loss_epoch, test_error_epoch = evalaute_mdl_data_set(criterion, error_criterion, net,testloader,device) print(f'[-1, -1], (train_loss: {train_loss_epoch}, train error: {train_error_epoch}) , (test loss: {test_loss_epoch}, test error: {test_error_epoch})') ''' end ''' nets.append(net) sys.exit() else: print('RESTORED FROM PRE-TRAINED NET') suffle_test = False ''' RESTORED PRE-TRAINED NET ''' # example name of file, os.path.join(results_root,expt_path,f'net_{day}_{month}_{seed}') # args.net_path = 'flatness_27_March_label_corrupt_prob_0_exptlabel_BoixNet_stand_600_OM/net_27_Match_64' path_to_mdl = args.mdl path = os.path.join(results_root,path_to_mdl) # net = utils.restore_entire_mdl(path) net = torch.load(path) nets.append(net) print(f'nets = {nets}') ''' cuda/gpu ''' for net in nets: net.to(device) nb_params = nn_mdls.count_nb_params(net) ''' stats collector ''' stats_collector = StatsCollector(net) ''' get data set ''' trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=shuffle_train, num_workers=num_workers) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_test, shuffle=suffle_test, num_workers=num_workers) ''' Cross Entropy + Optmizer ''' lr = args.lr momentum = 0.9 ## Error/Loss criterions error_criterion = metrics.error_criterion criterion = torch.nn.CrossEntropyLoss() #criterion = torch.nn.MultiMarginLoss() #criterion = torch.nn.MSELoss(size_average=True) print(f'Training Algorithm = {args.train_alg}') if args.train_alg == 'SGD': optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum) elif args.train_alg == 'Adam': optimizer = optim.Adam(net.parameters(), lr=lr) else: raise ValueError(f'Training alg not existent: {args.train_alg}') other_stats = dict({'nb_epochs':nb_epochs,'batch_size':batch_size,'mdl':mdl,'lr':lr,'momentum':momentum, 'seed':seed,'githash':githash},**other_stats) expt_path = f'{expt_path}_args.train_alg_{args.train_alg}_batch_train_{batch_size_train}_lr_{lr}_moment_{momentum}_epochs_{nb_epochs}' ''' scheduler ''' #milestones = [20, 30, 40] milestones = [200, 250, 300] #milestones = [700, 800, 900] #milestones = [1700, 1800, 1900] scheduler_gamma = args.decay_rate scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=scheduler_gamma) other_stats = dict({'milestones': milestones, 'scheduler_gamma': scheduler_gamma}, **other_stats) milestones_str = ','.join(str(m) for m in milestones) #expt_path = f'{expt_path}_scheduler_milestones_{milestones_str}_gamma_{gamma}' expt_path = f'{expt_path}_scheduler_gamma_{scheduler_gamma}' print(f'scheduler_gamma = {scheduler_gamma}') ''' Verify model you got has the right error''' train_loss_epoch, train_error_epoch = evalaute_mdl_data_set(criterion, error_criterion, net, trainloader, device) test_loss_epoch, test_error_epoch = evalaute_mdl_data_set(criterion, error_criterion, net, testloader, device) print(f'train_loss_epoch, train_error_epoch = {train_loss_epoch}, {train_error_epoch} \n test_loss_epoch, test_error_epoch = {test_loss_epoch}, {test_error_epoch}') ''' Is it over parametrized?''' overparametrized = len(trainset)<nb_params # N < W ? print(f'Model overparametrized? N, W = {len(trainset)} vs {nb_params}') print(f'Model overparametrized? N < W = {overparametrized}') other_stats = dict({'overparametrized':overparametrized,'nb_params':nb_params}, **other_stats) ''' report time for setup''' seconds_setup,minutes_setup,hours_setup = utils.report_times(setup_time,'setup') other_stats = dict({'seconds_setup': seconds_setup, 'minutes_setup': minutes_setup, 'hours_setup': hours_setup}, **other_stats) ''' Start Training ''' training_time = time.time() print(f'----\nSTART training: label_corrupt_prob={args.label_corrupt_prob},nb_epochs={nb_epochs},batch_size={batch_size},lr={lr},momentum={momentum},mdl={mdl},batch-norm={do_bn},nb_params={nb_params}') ## START TRAIN if args.train_alg == 'SGD' or args.train_alg == 'Adam': #iterations = 4 # the number of iterations to get a sense of test error, smaller faster larger more accurate. Grows as sqrt(n) though. iterations = inf ''' set up Trainer ''' if args.save_every_epoch: save_every_epoch = args.save_every_epoch trainer = Trainer(trainloader, testloader, optimizer, scheduler, criterion, error_criterion, stats_collector, device, expt_path,net_file_name,all_nets_folder,save_every_epoch,args.evalaute_mdl_data_set, reg_param=args.reg_param,p=args.Lp_norm) else: trainer = Trainer(trainloader,testloader, optimizer, scheduler, criterion,error_criterion, stats_collector, device,evalaute_mdl_data_set=args.evalaute_mdl_data_set,reg_param=args.reg_param,p=args.Lp_norm) last_errors = trainer.train_and_track_stats(net, nb_epochs,iterations) ''' Test the Network on the test data ''' train_loss_epoch, train_error_epoch, test_loss_epoch, test_error_epoch = last_errors print(f'train_loss_epoch={train_loss_epoch} \ntrain_error_epoch={train_error_epoch} \ntest_loss_epoch={test_loss_epoch} \ntest_error_epoch={test_error_epoch}') elif args.train_alg == 'pert': ''' batch sizes ''' batch_size_train, batch_size_test = 50*10**3, 10*10**3 ''' number of repetitions ''' nb_perturbation_trials = nb_epochs ''' noise level ''' nb_layers = len(list(net.parameters())) noise_level = args.noise_level perturbation_magnitudes = nb_layers*[noise_level] print(f'noise_level={noise_level}') ''' locate where to save it ''' folder_name_noise = f'noise_{perturbation_magnitudes[0]}' expt_path = os.path.join(expt_path,folder_name_noise) matlab_file_name = f'noise_{perturbation_magnitudes}_{matlab_file_name}' ## TODO collect by perburbing current model X number of times with current perturbation_magnitudes use_w_norm2 = args.not_pert_w_norm2 train_loss,train_error,test_loss,test_error = get_errors_for_all_perturbations(net,perturbation_magnitudes,use_w_norm2,device,nb_perturbation_trials,stats_collector,criterion,error_criterion,trainloader,testloader) print(f'noise_level={noise_level},train_loss,train_error,test_loss,test_error={train_loss},{train_error},{test_loss},{test_error}') other_stats = dict({'perturbation_magnitudes':perturbation_magnitudes}, **other_stats) elif args.train_alg == 'interpolate': ''' prints stats before interpolation''' print_evaluation_of_nets(net_nl, net_rlnl, criterion, error_criterion, trainloader, testloader, device, iterations) ''' do interpolation of nets''' nb_interpolations = nb_epochs interpolations = np.linspace(0,1,nb_interpolations) get_landscapes_stats_between_nets(net_nl,net_rlnl,interpolations, device,stats_collector,criterion,error_criterion,trainloader,testloader,iterations) ''' print stats of the net ''' other_stats = dict({'interpolations':interpolations},**other_stats) #print_evaluation_of_nets(net_nl, net_rlnl, criterion, error_criterion, trainloader, testloader, device, iterations) elif args.train_alg == 'brando_chiyuan_radius_inter': r_large = args.r_large ## check if this number is good nb_radius_samples = nb_epochs interpolations = np.linspace(0,1,nb_radius_samples) expt_path = os.path.join(expt_path+f'_RLarge_{r_large}') ''' ''' nb_dirs = args.nb_dirs stats_collector = StatsCollector(net,nb_dirs,nb_epochs) get_all_radius_errors_loss_list_interpolate(nb_dirs,net,r_large,interpolations,device,stats_collector,criterion,error_criterion,trainloader,testloader,iterations) other_stats = dict({'nb_dirs':nb_dirs,'interpolations':interpolations,'nb_radius_samples':nb_radius_samples,'r_large':r_large},**other_stats) elif args.train_alg == 'sharpness': ''' load the data set ''' print('About to load the data set') shuffle_train = True #batch_size = 2**10 batch_size = 2**5 batch_size_train, batch_size_test = batch_size, batch_size iterations = inf # controls how many epochs to stop before returning the data set error #eps = 2500/50000 eps = 1 / 50000 other_stats = dict({'iterations':iterations,'eps':eps},**other_stats) trainset,trainloader = data_class.load_only_train(path_adverserial_data,eps,batch_size_train,shuffle_train,num_workers) ''' three musketeers ''' print('Preparing the three musketeers') net_pert = copy.deepcopy(net) #nn_mdls.reset_parameters(net_pert) net_original = dont_train(net) #net_original = net initialize_to_zero(net_original) debug=False if debug: ## conv params nb_conv_layers=3 Fs = [24]*nb_conv_layers Ks = [5]*nb_conv_layers ## fc params FCs = [len(classes)] CHW = (3,32,32) net_pert = nn_mdls.GBoixNet(CHW,Fs,Ks,FCs,do_bn).to(device) print('Musketeers are prepared') ''' optimizer + criterion stuff ''' optimizer = optim.SGD(net_pert.parameters(), lr=lr, momentum=momentum) #optimizer = optim.Adam(net_pert.parameters(), lr=lr) error_criterion = metrics.error_criterion criterion = torch.nn.CrossEntropyLoss() #criterion = torch.nn.MultiMarginLoss() #criterion = torch.nn.MultiLabelMarginLoss() ''' Landscape Inspector ''' save_all_learning_curves = True save_all_perts = False nb_lambdas = 1 lambdas = np.linspace(1,10,nb_lambdas) print('Do Sharpness expt!') sharpness_inspector = LandscapeInspector(net_original,net_pert, nb_epochs,iterations, trainloader,testloader, optimizer, criterion,error_criterion, device, lambdas,save_all_learning_curves=save_all_learning_curves,save_all_perts=save_all_perts) sharpness_inspector.do_sharpness_experiment() elif args.train_alg == 'flatness_bs': ''' BS params ''' r_initial = 50 epsilon = args.epsilon ## check if this number is good # nb_radius_samples = nb_epochs could use this number as a cap of # iterations of BS expt_path = os.path.join(expt_path+f'_BS') ''' Do BS ''' precision = 0.001 nb_dirs = args.nb_dirs # stats_collector = StatsCollector(net,nb_dirs,nb_epochs) TODO rand_inspector = RandLandscapeInspector(epsilon,net,r_initial,device,criterion,error_criterion,trainloader,testloader,iterations) rand_inspector.get_faltness_radii_for_isotropic_directions(nb_dirs=nb_dirs,precision=precision) other_stats = dict({'nb_dirs':nb_dirs,'flatness_radii':rand_inspector.flatness_radii},**other_stats) elif args.train_alg == 'evaluate_nets': plot = False print('') iterations = inf print(f'W_nl = {W_nl}') print(f'W_rlnl = {W_rlnl}') ''' train errors ''' loss_nl_train, error_nl_train = evalaute_mdl_data_set(criterion, error_criterion, net_nl, trainloader, device, iterations) loss_rlnl_train, error_rlnl_train = evalaute_mdl_data_set(criterion,error_criterion,net_rlnl,trainloader,device,iterations) print(f'loss_nl_train, error_nl_train = {loss_nl_train, error_nl_train}') print(f'loss_rlnl_train, error_rlnl_train = {loss_rlnl_train, error_rlnl_train}') ''' test errors ''' loss_nl_test, error_nl_test = evalaute_mdl_data_set(criterion, error_criterion, net_nl, testloader, device, iterations) loss_rlnl_test, error_rlnl_test = evalaute_mdl_data_set(criterion,error_criterion,net_rlnl,testloader,device,iterations) print(f'loss_nl_test, error_nl_test = {loss_nl_test, error_nl_test}') print(f'loss_rlnl_test, error_rlnl_test = {loss_rlnl_test, error_rlnl_test}') ''' ''' store_results = False store_net = False # elif args.train_alg == 'reach_target_loss': # iterations = inf # precision = 0.00001 # ''' set target loss ''' # loss_rlnl_train, error_rlnl_train = evalaute_mdl_data_set(criterion, error_criterion, net_rlnl, trainloader,device, iterations) # target_train_loss = loss_rlnl_train # ''' do SGD ''' # trainer = Trainer(trainloader,testloader, optimizer,criterion,error_criterion, stats_collector, device) # last_errors = trainer.train_and_track_stats(net,nb_epochs,iterations=iterations,target_train_loss=target_train_loss,precision=precision) # ''' Test the Network on the test data ''' # train_loss_epoch, train_error_epoch, test_loss_epoch, test_error_epoch = last_errors # print(f'train_loss_epoch={train_loss_epoch} train_error_epoch={train_error_epoch}') # print(f'test_loss_epoch={test_loss_epoch} test_error_epoch={test_error_epoch}') # st() elif args.train_alg == 'no_train': print('NO TRAIN BRANCH') print(f'expt_path={expt_path}') utils.make_and_check_dir(expt_path) ''' save times ''' seconds_training, minutes_training, hours_training = utils.report_times(training_time,meta_str='training') other_stats = dict({'seconds_training': seconds_training, 'minutes_training': minutes_training, 'hours_training': hours_training}, **other_stats) seconds, minutes, hours = seconds_training+seconds_setup, minutes_training+minutes_setup, hours_training+hours_setup other_stats = dict({'seconds':seconds,'minutes':minutes,'hours':hours}, **other_stats) print(f'nb_epochs = {nb_epochs}') print(f'Finished Training, hours={hours}') print(f'seed = {seed}, githash = {githash}') ''' save results from experiment ''' store_results = not args.dont_save_expt_results print(f'ALL other_stats={other_stats}') if store_results: print(f'storing results!') matlab_path_to_filename = os.path.join(expt_path,matlab_file_name) save2matlab.save2matlab_flatness_expt(matlab_path_to_filename, stats_collector,other_stats=other_stats) ''' save net model ''' if store_net: print(f'saving final net mdl!') net_path_to_filename = os.path.join(expt_path,net_file_name) torch.save(net,net_path_to_filename) ''' check the error of net saved ''' loss_original, error_original = evalaute_mdl_data_set(criterion, error_criterion, net, trainloader,device) restored_net = utils.restore_entire_mdl(net_path_to_filename) loss_restored,error_restored = evalaute_mdl_data_set(criterion,error_criterion,restored_net,trainloader,device) print() print(f'net_path_to_filename = {net_path_to_filename}') print(f'loss_original={loss_original},error_original={error_original}\a') print(f'loss_restored={loss_restored},error_restored={error_restored}\a') ''' send e-mail ''' if hostname == 'polestar' or args.email: message = f'SLURM Job_id=MANUAL Name=flatness_expts.py Ended, ' \ f'Total Run time hours:{hours},minutes:{minutes},seconds:{seconds} COMPLETED, ExitCode [0-0]' utils.send_email(message,destination='*****@*****.**') ''' plot ''' if sj == 0 and plot: #TODO plot_utils.plot_loss_and_accuracies(stats_collector) plt.show()
def __init__(self, node): if node == utils.get_hostname() or node is None: self.conn = None else: self.conn = utils.SSHConn(node)