def __init__(self,n):
     #for pygame
     self.running = True
     self.screen = None
     #for agent and environment
     self.car = Car()
     self.checkpoints = Checkpoints()
     #for training
     self.timer = 0
     self.ep_count = 1
     self.test_ep_count = 1
     self.epsilon = 0.01
     self.a = 0.01
     self.n = n
     self.discount = 1
     self.max_steps = 8000
     self.Q = np.random.sample([2,3,3,3,3,3,3,9])+10 #optimistic
     self.state = []
     self.next_state = []
     self.action = 0
     self.state_path = []
     self.action_path = []
     self.reward_path = []
     self.ep_rewards = []
     self.T = float('inf')
     #use to control whether or not to render a pygame window
     self.show = 1
Exemple #2
0
def main():
    # parse the arguments
    args = parser.parse_args()
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    utils.saveargs(args)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, criterion = models.setup(checkpoints)

    # Data Loading
    dataloader = Dataloader(args)
    loaders = dataloader.create()

    # The trainer handles the training loop
    trainer = Trainer(args, model, criterion)
    # The trainer handles the evaluation on validation set
    tester = Tester(args, model, criterion)

    # start training !!!
    loss_best = 1e10
    for epoch in range(args.nepochs):

        # train for a single epoch
        loss_train = trainer.train(epoch, loaders)
        loss_test = tester.test(epoch, loaders)

        if loss_best > loss_test:
            model_best = True
            loss_best = loss_test
            checkpoints.save(epoch, model, model_best)
 def __init__(self, T, suffix_array, cp_step, sa_step):
     if T[-1] != '$':
         T += '$'
     full_sa = suffix_array(T)
     self.bwt = bwt(T, full_sa)
     self.sa = self.cut_suffix_array(full_sa, sa_step)
     self.cps = Checkpoints(self.bwt, cp_step)
     self.first_col = self.calc_first_col(self.bwt)
 def reset(self,track=1):
     self.car = Car(track)
     self.checkpoints = Checkpoints(track)
     self.timer = 0
     self.state = []
     self.next_state = []
     self.state_path = [] #some initial state
     self.action = 4
     self.action_path = []
     self.reward_path = []
     self.T = float('inf')
Exemple #5
0
    def __init__(self, nrun=-1):
        self.args = Namespace(
            cuda=True,
            ndf=8,
            nef=8,
            wkld=0.01,
            gbweight=100,
            nlatent=9, 
            nechannels=6,
            ngchannels=1,
            resume="",
            save="mini-save",
            loader_train="h5py",
            loader_test="h5py",
            dataset_test=None,
            dataset_train="filelist",
            split_test=0.0,
            split_train=1.0,
            filename_test="./data/data.txt",
            filename_train="./data/data.txt",
            batch_size=64,
            resolution_high=512,
            resolution_wide=512,
            nthreads=32,
            images="mini-save/images",
            pre_name="save",
        )
        latest_save = sorted(list(Path("results").iterdir()))[nrun]
        self.rundate = latest_save.name
        latest_save = latest_save.joinpath("Save")
        latest_save = {"netG": latest_save}
        self.args.resume = latest_save
        checkpoints = Checkpoints(self.args)

        # Create model
        models = Model(self.args)
        self.model, self.criterion = models.setup(checkpoints)

        # Data loading
        self.dataloader = Dataloader(self.args)
        self.loader = self.dataloader.create(flag="Test")
        print("\t\tBatches:\t", len(self.loader))

        self.resolution_high = self.args.resolution_high
        self.resolution_wide = self.args.resolution_wide
        self.batch_size = self.args.batch_size
        self.ngchannels = self.args.ngchannels
        self.nechannels = self.args.nechannels
        self.nlatent = self.args.nlatent
        self.composition = torch.FloatTensor(self.batch_size, self.ngchannels, self.resolution_high, self.resolution_wide)
        self.metadata = torch.FloatTensor(self.batch_size, self.nechannels, self.resolution_high, self.resolution_wide)

        if self.args.cuda:
            self.composition = self.composition.cuda()
            self.metadata = self.metadata.cuda()

        self.composition = Variable(self.composition)
        self.metadata = Variable(self.metadata)

        self.imgio = plugins.ImageIO(self.args.images, self.args.pre_name)
Exemple #6
0
def main():
    # parse the arguments
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    if args.save_results:
        utils.saveargs(args, config_file)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, model_dict, evaluation = models.setup(checkpoints)

    print('Model:\n\t{model}\nTotal params:\n\t{npar:.2f}M'.format(
        model=args.model_type,
        npar=sum(p.numel() for p in model.parameters()) / 1000000.0))

    #### get kernel information ####
    ndemog = args.ndemog
    ndemog = list(range(ndemog))
    demog_combs = list(combinations(ndemog, 2))
    #### get kernel information ####

    dist_list = get_att_dist(model)
    pdb.set_trace()
Exemple #7
0
def main():
    # parse the arguments
    args = config.parse_args()
    if (args.ngpu > 0 and torch.cuda.is_available()):
        device = "cuda:0"
    else:
        device = "cpu"
    args.device = torch.device(device)
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    if args.save_results:
        utils.saveargs(args)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, criterion, evaluation = models.setup(checkpoints)

    print('Model:\n\t{model}\nTotal params:\n\t{npar:.2f}M'.format(
        model=args.model_type,
        npar=sum(p.numel() for p in model.parameters()) / 1000000.0))

    # Data Loading
    dataloader = Dataloader(args)
    loaders = dataloader.create()

    # The trainer handles the training loop
    trainer = Trainer(args, model, criterion, evaluation)
    # The trainer handles the evaluation on validation set
    tester = Tester(args, model, criterion, evaluation)

    # start training !!!
    loss_best = 1e10
    for epoch in range(args.nepochs):
        print('\nEpoch %d/%d\n' % (epoch + 1, args.nepochs))

        # train for a single epoch
        loss_train = trainer.train(epoch, loaders)
        loss_test = tester.test(epoch, loaders)

        if loss_best > loss_test:
            model_best = True
            loss_best = loss_test
            if args.save_results:
                checkpoints.save(epoch, model, model_best)
Exemple #8
0
def main():
    # Parse the Arguments
    args = config.parse_args()
    random.seed(args.manual_seed)
    tf.set_random_seed(args.manual_seed)
    now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S/')
    args.save = os.path.join(args.result_path, now, 'save')
    args.logs = os.path.join(args.result_path, now, 'logs')
    if args.save_results:
        utils.saveargs(args)

    # Initialize the Checkpoints Class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, criterion, evaluation = models.setup(checkpoints)

    # Print Model Summary
    print('Model summary: {}'.format(model.name))
    print(model.summary())

    # Data Loading
    dataloader_obj = Dataloader(args)
    dataloader = dataloader_obj.create()

    # Initialize Trainer and Tester
    trainer = Trainer(args, model, criterion, evaluation)
    tester = Tester(args, model, criterion, evaluation)

    # Start Training !!!
    loss_best = 1e10
    for epoch in range(args.nepochs):
        print('\nEpoch %d/%d' % (epoch + 1, args.nepochs))

        # Train and Test for a Single Epoch
        loss_train = trainer.train(epoch, dataloader["train"])
        loss_test = tester.test(epoch, dataloader["test"])

        if loss_best > loss_test:
            model_best = True
            loss_best = loss_test
            if args.save_results:
                checkpoints.save(epoch, model, model_best)
def main():
    demog_type = 'race'
    demog_target = {'gender': 1, 'age': 2, 'race': 3}
    demog_refer = {'gender': [2, 3], 'age': [1, 3], 'race': [1, 2]}

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, model_dict, evaluation = models.setup(checkpoints)

    print('Model:\n\t{model}\nTotal params:\n\t{npar:.2f}M'.format(
        model=args.model_type,
        npar=sum(p.numel() for p in model['feat'].parameters()) / 1000000.0))

    # The trainer handles the evaluation on validation set
    tester = Tester(args, model, model_dict['loss'], evaluation)

    test_freq = 1

    dataloader = Dataloader(args)
    dataset_options_test = args.dataset_options_test

    resfilename = '/research/prip-gongsixu/codes/biasface/results/evaluation/demogbias/race.txt'
    gdemog = get_demog_cohorts(demog_type)
    with open(resfilename, 'w') as f:
        for demog_group in gdemog:
            dataset_options_test['target_ind'] = demog_target[demog_type]
            dataset_options_test['refer_ind'] = demog_refer[demog_type]
            dataset_options_test['demog_group'] = demog_group
            args.dataset_options_test = dataset_options_test
            loaders = dataloader.create(flag='Test')
            acc_test = tester.test_demog(demog_type, loaders)
            f.write(demog_group + '\t' + str(acc_test) + '\n')
            print(acc_test)
class Model:
    def __init__(self,n):
        #for pygame
        self.running = True
        self.screen = None
        #for agent and environment
        self.car = Car()
        self.checkpoints = Checkpoints()
        #for training
        self.timer = 0
        self.ep_count = 1
        self.test_ep_count = 1
        self.epsilon = 0.01
        self.a = 0.01
        self.n = n
        self.discount = 1
        self.max_steps = 8000
        self.Q = np.random.sample([2,3,3,3,3,3,3,9])+10 #optimistic
        self.state = []
        self.next_state = []
        self.action = 0
        self.state_path = []
        self.action_path = []
        self.reward_path = []
        self.ep_rewards = []
        self.T = float('inf')
        #use to control whether or not to render a pygame window
        self.show = 1

    #used to start pygame window
    def on_init(self):
        pygame.init()
        self.screen = pygame.display.set_mode([640, 400], pygame.HWSURFACE | pygame.DOUBLEBUF)
        self.running = True
        
    #I only use this to lose pygame window
    def on_event(self, event):
        if event.type == pygame.QUIT:
            self.running = False

    def loop(self):
        #used to train the agent
        if(self.timer == 0):
            #intializing
            self.render_train() if self.show == 1 else self.norender_train()
            self.car.update_state()
            self.state = self.car.state
            self.state_path.append(self.car.state)
            self.action = self.choose_action()
            self.action_path.append(self.action)
            self.act()
            self.reward_path.append(0)
            self.timer += 1
        else:
            #Sometimes episode continues
            if(self.timer < self.T):
                terminal = (self.timer > self.max_steps or self.check_collision())
                self.render_train() if self.show == 1 else self.norender_train()
                self.car.update_state()
                self.next_state = self.car.state
                reward = self.current_reward()
                #terminal state
                if(terminal):
                    self.T = self.timer
                else:
                    self.action = self.choose_action(1)
                    self.reward_path.append(reward)
                self.action_path.append(self.action)
                self.state_path.append(self.car.state)
                
            #Sometimes the episode has ended and the agent's policy is updated
            toa = self.timer-self.n
            if(toa >= 0):
                #Updating is done using the model properties 
                gain = 0.0
                for t in range(toa, min(self.T, toa+self.n)):
                    gain += (self.discount**(t-toa-1))*self.reward_path[t]

                if(toa + self.n < self.T):
                    q_index = copy.copy(self.state_path[toa+self.n])
                    q_index.append(self.action_path[toa+self.n])
                    gain += (self.discount**(self.n))*self.Q[tuple(q_index)]
                q_update = copy.copy(self.state_path[toa])
                q_update.append(self.action_path[toa])
                if(self.reward_path[toa] != neg_reward):
                    self.Q[tuple(q_update)] += self.a*(gain-self.Q[tuple(q_update)])

            if(toa == self.T-1):
                #end of episode
                print(f'Episode: {self.ep_count}, res: {sum(self.reward_path)}')
                self.ep_count+=1
                self.ep_rewards.append(sum(self.reward_path))
                self.reset()
            else:
                #not end of episode
                self.timer += 1    
            #Setup next time step 
            self.state = self.next_state
            self.act()  

    #test_loop() is pretty similar to the loop() function except that it doesn't update self.Q
    #Therefore, it can be used to run multiple episodes to evaluate the performace of the current Q
    def test_loop(self, track):
        if(self.timer == 0):
            self.render_test(track) if self.show == 1 else self.norender_test(track)
            self.car.update_state()
            self.state = self.car.state
            self.state_path.append(self.car.state)
            self.action = self.choose_action()
            self.action_path.append(self.action)
            self.act()
            self.reward_path.append(0)
            self.timer += 1
            return 0
        else:
            if(self.timer < self.T):
                terminal = (self.timer > self.max_steps or self.check_collision(track))
                self.render_test(track) if self.show == 1 else self.norender_test(track)
                self.car.update_state()
                self.next_state = self.car.state
                reward = 0 if self.reward_path[len(self.reward_path)-1] == neg_reward else self.current_reward(track)
                #terminal
                if(terminal):
                    self.T = self.timer
                else:
                    self.action = self.choose_action(1)
                    self.reward_path.append(reward)
                self.action_path.append(self.action)
                self.state_path.append(self.car.state)
            else:
                reward = 0
            toa = self.timer-self.n
            

            if(toa == self.T-1):
                print(f'Test Episode: {self.test_ep_count}, res: {sum(self.reward_path)}')
                self.test_ep_count+=1
                self.reset(track)
            else:
                self.timer += 1     
            self.state = self.next_state
            self.act()  
            return reward

    #quits pygame
    def clean(self):
        pygame.quit()
 
    #run() indefinitly trains the agent and renders a pygame window
    def run(self):
        if self.on_init() == False:
            self.running = False
        while(self.running):
            for event in pygame.event.get():
                self.on_event(event)
            self.loop()
            pygame.display.update()
        self.clean()

    #train_wr() trains the agent for a set number of episodes and renders a pygame window
    def train_wr(self, num_eps):
        self.reset()
        run_lim = self.ep_count+num_eps
        if self.on_init() == False:
            self.running = False
        while(self.running and self.ep_count < run_lim):
            for event in pygame.event.get():
                self.on_event(event)
            self.loop()
            pygame.display.update()
        self.clean()

    #train() trains the agent for a set number of episodes and does not render a pygame window
    def train(self, num_eps):
        self.reset()
        self.show = 0
        run_lim = self.ep_count+num_eps
        while(self.ep_count<run_lim):
            self.loop()
        self.show = 1

    #test_wr() tests the agent for a set number of episodes and renders a pygame window
    #test_wr() returns the average sum of rewards for a episode
    #It also uses test_loop() so self.Q is never updated which allows it to test the current epsilon greedy policy w.r.t. self.Q
    def test_wr(self,num_eps,track):
        self.reset(track)
        self.test_ep_count = 1
        reward_total = 0
        if self.on_init() == False:
            self.running = False
        while( self.running and self.test_ep_count <= num_eps):
            for event in pygame.event.get():
                self.on_event(event)
            r = self.test_loop(track)
            reward_total += r
            pygame.display.update()
        self.clean()
        return reward_total/num_eps

    #test() is the same as test_wr() except that it renders a pygame window
    def test(self,num_eps,track):
        self.reset(track)
        self.show = 0
        temp = copy.copy(self.epsilon)
        self.test_ep_count = 1
        reward_total = 0
        while(self.test_ep_count <= num_eps):
            r = self.test_loop(track)
            reward_total+=r
        self.epsilon = temp
        self.show = 1
        return reward_total/num_eps

    #This just resets everything at the begining of an episode. 
    #The car has to go back to the start, checkpoints have to be reset, actions and rewards are cleared, etc.
    def reset(self,track=1):
        self.car = Car(track)
        self.checkpoints = Checkpoints(track)
        self.timer = 0
        self.state = []
        self.next_state = []
        self.state_path = [] #some initial state
        self.action = 4
        self.action_path = []
        self.reward_path = []
        self.T = float('inf')

    #This renders elements for training (Always track 1)
    def render_train(self):
        #render the game elements
        self.screen.fill(background_color)
        self.render_track1()
        self.checkpoints.render(self.screen)
        self.car.move()
        self.car.render(self.screen)

    #This sets up the the current time step for training, but nothing is rendered to a pygame window
    def norender_train(self):
        self.car.move()

    #This renders elements for testing (Sometimes track 1, sometimes track 2)
    def render_test(self,track):
        #render the game elements
        self.screen.fill(background_color)
        if(track == 2):
            self.render_track2()
        else:
            self.render_track1()
        self.checkpoints.render(self.screen)
        self.car.move(track)
        self.car.render(self.screen)

    #This sets up the the current time step for testing, but nothing is rendered to a pygame window
    def norender_test(self,track):
        self.car.move(track)

    #Simply renders the rectangles to show where track 1 is
    def render_track1(self):
        pygame.draw.rect(self.screen, track_color, [50,50,60,300])
        pygame.draw.rect(self.screen, track_color, [50,50,540,60])
        pygame.draw.rect(self.screen, track_color, [50,290,240,60])
        pygame.draw.rect(self.screen, track_color, [230,170,60,180])
        pygame.draw.rect(self.screen, track_color, [230,170,180,60])
        pygame.draw.rect(self.screen, track_color, [530,50,60,300])
        pygame.draw.rect(self.screen, track_color, [350,170,60,180])
        pygame.draw.rect(self.screen, track_color, [350,290,240,60])        

    #Same as render_track1() but its for track 2
    def render_track2(self):
        pygame.draw.rect(self.screen, track_color, [50,50,540,60])
        pygame.draw.rect(self.screen, track_color, [530,50,60,300])
        pygame.draw.rect(self.screen, track_color, [410,170,60,180])
        pygame.draw.rect(self.screen, track_color, [410,290,180,60])
        pygame.draw.rect(self.screen, track_color, [50,170,420,60])
        pygame.draw.rect(self.screen, track_color, [50,50,60,180])

    #It checks to see if the car hit a wall. If it did, then the neg_reward is returned and the car turns red
    #It also checks to see if the car is passing a checkpoint, and gives pos_reward if it has.
    def current_reward(self,track=1):
        reward = 0
        if(self.check_collision(track)):
            self.car.color = [255,0,0]
            reward = neg_reward
        else:
            self.car.color = [10,46,73]
            reward = 0
        if(self.checkpoints.check_pass(self.car.corners)):
            reward = reward+pos_reward
        return reward

    #Check collisions just looks to see that all 4 corners of the car are on the track. If they are, then no collision
    def check_collision(self,track=1):
        if(on_track(self.car.corners[0],track) and on_track(self.car.corners[1],track) and on_track(self.car.corners[2],track) and on_track(self.car.corners[3],track)):
            return False
        else:
            return True

    #Choosing random action with probability self.epsilon
    #Choosing greedy action w.r.t. self.Q with probablilty (1-self.epsilon)
    def choose_action(self, next=0):
        if(np.random.sample(1) < self.epsilon):
            action = np.random.randint(0,9)
        else:
            i = self.state if next == 0 else self.next_state
            action_choices = self.Q[i[0],i[1],i[2],i[3],i[4],i[5],i[6],:]
            action = np.argmax(action_choices)
        return action

    #uses the selected action to accelerate and steer the car
    def act(self):
        accel = math.floor(self.action/3)
        turn = self.action%3
        self.car.pedal(accel)
        self.car.steer(turn)
Exemple #11
0
import torch
import random
import torchvision
from model import Model
from config import parser
from dataloader import Dataloader
from checkpoints import Checkpoints
from train import Trainer
import utils

# parse the arguments
args = parser.parse_args()
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
utils.saveargs(args)

# initialize the checkpoint class
checkpoints = Checkpoints(args)

# Create Model
models = Model(args)
model, criterion = models.setup(checkpoints)

# Data Loading
dataloader = Dataloader(args)
loader_train, loader_test = dataloader.create()

# The trainer handles the training loop and evaluation on validation set
trainer = Trainer(args, model, criterion)
loss_train = trainer.visualize(loader_train)
Exemple #12
0
class FMIndex:
    def cut_suffix_array(self, full_sa, sa_step):
        """Sizes down suffix array taking every sa_step-th element, the rest are removed"""
        res_sa = {}
        for i in range(len(full_sa)):
            if full_sa[i] % sa_step == 0:
                res_sa[i] = full_sa[i]
        return res_sa

    def calc_first_col(self, bwt):
        """
        Returns first column of BWT matrix.
        It is enough to keep only number of characters less than current, because it is sorted column.
        """
        cnts = {}
        for c in bwt:
            cnts[c] = cnts.get(c, 0) + 1
        ret = {}
        total = 0
        for c, cnt in sorted(cnts.items()):
            ret[c] = total
            total += cnt
        return ret

    def __init__(self, T, suffix_array, cp_step, sa_step):
        if T[-1] != '$':
            T += '$'
        full_sa = suffix_array(T)
        self.bwt = bwt(T, full_sa)
        self.sa = self.cut_suffix_array(full_sa, sa_step)
        self.cps = Checkpoints(self.bwt, cp_step)
        self.first_col = self.calc_first_col(self.bwt)

    def count(self, c):
        """Return number of characters less than c"""
        if c not in self.first_col:
            for cc in sorted(self.first_col.keys()):
                if c < cc:
                    return self.first_col[cc]
            return self.first_col[cc]
        else:
            return self.first_col[c]

    def interval(self, p):
        """Returns inclusive interval of BWM rows where p is prefix"""
        l, r = 0, len(self.bwt) - 1
        print(self.bwt)
        print(self.first_col)
        for i in range(len(p) - 1, -1,
                       -1):  #start from last character and go backwards
            l = self.cps.rank(self.bwt, p[i], l - 1) + self.count(p[i])
            r = self.cps.rank(self.bwt, p[i], r) + self.count(p[i]) - 1
            if r < l:
                break
        return l, r + 1

    def get_offset(self, r):
        """Given row in matrix returns its real offset in text"""
        steps = 0
        while r not in self.sa:
            c = self.bwt[r]
            r = self.cps.rank(self.bwt, c, r - 1) + self.count(c)
            steps += 1
        return self.sa[r] + steps

    def find_occurences(self, p):
        """Returns all occurences of pattern p"""
        l, r = self.interval(p)
        return [self.get_offset(i) for i in range(l, r)]
Exemple #13
0
def deploy():
    ret_code = 0
    # noinspection PyBroadException

    logger.debug("=================================")
    logger.info("=== Starting up ...")
    logger.debug("=================================")
    try:
        settings, args = get_settings()
        director_vm = Director()
        sah_node = Sah()

        if args.list_edge_sites:
            list_edge_sites(settings, director_vm)
            os._exit(0)

        is_deploy_edge_site = bool(args.edge_sites) or args.edge_sites_all
        logger.info("Deploying edge site(s) from CLI argument? %s",
                    str(is_deploy_edge_site))
        edge_sites = None

        # deploying edge site(s)?, validate sites are in settings, if
        # valid return a list of sites based on args.
        if is_deploy_edge_site:
            edge_sites = validate_edge_sites_in_settings(args, settings)

        logger.info("Edge sites after validation: {}".format(str(edge_sites)))

        if args.validate_only is True:
            logger.info("Only validating ini/properties config values")
        else:
            if args.overcloud_only is True:
                logger.info("Only redeploying the overcloud")

        logger.info("Settings .ini: " + settings.settings_file)
        logger.info("Settings .properties " + settings.network_conf)
        settings.get_version_info()
        logger.info("source version # : " +
                    settings.source_version.decode('utf-8'))
        tester = Checkpoints()
        tester.verify_deployer_settings()
        if args.validate_only is True:
            logger.info("Settings validated")
            os._exit(0)
        tester.retreive_switches_config()
        tester.sah_health_check()
        # mutually exclusive command, configure tempest and quit.
        if args.tempest_config_only:
            logger.info("Only (re-)generating tempest.conf")
            director_vm.configure_tempest()
            os._exit(0)

        # mutually exclusive command, run tempest and quit.
        if args.run_tempest_only:
            logger.info("Only running tempest, will configure " +
                        "tempest.conf if needed.")
            director_vm.run_tempest()
            os._exit(0)

        logger.info("Uploading configs/iso/scripts.")
        sah_node.clear_known_hosts()
        sah_node.handle_lock_files()

        sah_node.upload_iso()
        sah_node.upload_director_scripts()
        sah_node.upload_powerflexgw_scripts()
        sah_node.upload_powerflexmgmt_scripts()
        sah_node.enable_chrony_ports()

        if args.overcloud_only is False and is_deploy_edge_site is False:
            deploy_undercloud(settings, sah_node, tester, director_vm)
            if args.undercloud_only:
                return
        elif is_deploy_edge_site is False:
            logger.info("=== Skipped Director VM/Undercloud install")
            logger.debug("Deleting overcloud stack")
            director_vm.delete_overcloud()

        if is_deploy_edge_site:
            _is_oc_failed, _error_oc = tester.verify_overcloud_deployed()
            if _is_oc_failed:
                logger.error("Attempted to deploy edge site(s) but the "
                             "overcloud has not been deployed, "
                             "or failed to deploy. "
                             "Edge sites cannot be deployed without an "
                             "existing overcloud, exiting")
                os._exit(0)
            deploy_edge_sites(sah_node, director_vm, edge_sites)
            os._exit(0)
        else:  # no edge sites arguments, just deploy overcloud
            deploy_overcloud(director_vm)
            _is_oc_failed, _err_oc = tester.verify_overcloud_deployed()
            if _is_oc_failed:
                raise _err_oc
            # lastly, if there are edge sites defined in .ini
            # and deploy_edge_sites is set to true in ini deploy the sites
            if settings.deploy_edge_sites and settings.edge_sites:
                logger.info(
                    "Automatic edge site deployment is set to true "
                    "in the ini (deploy_edge_sites==true), "
                    "deploying all defined edge sites: %s",
                    str(settings.edge_sites))
                deploy_edge_sites(sah_node, director_vm, settings.edge_sites)

        if settings.hpg_enable:
            logger.info("HugePages has been successfully configured "
                        "with size: " + settings.hpg_size)
        if settings.numa_enable:
            logger.info("NUMA has been successfully configured "
                        "with hostos_cpus count: " + settings.hostos_cpu_count)

        director_vm.summarize_deployment()
        tester.verify_computes_virtualization_enabled()
        tester.verify_backends_connectivity()
        director_vm.enable_fencing()
        director_vm.run_sanity_test()

        external_sub_guid = director_vm.get_sanity_subnet()
        if external_sub_guid:
            director_vm.configure_tempest()

        run_tempest(director_vm)

        if len(settings.powerflex_nodes) > 0:
            powerflexgw_vm = Powerflexgw()
            deploy_powerflex_gw(settings, sah_node, tester, powerflexgw_vm)

            if settings.enable_powerflex_mgmt:
                powerflexmgmt_vm = Powerflexmgmt()
                deploy_powerflex_mgmt(settings, sah_node, tester,
                                      powerflexmgmt_vm)

        logger.info("Deployment summary info; useful ip's etc.. " +
                    "/auto_results/deployment_summary.log")

    except:  # noqa: E722
        logger.error(traceback.format_exc())
        e = sys.exc_info()[0]
        logger.error(e)
        print(e)
        print(traceback.format_exc())
        ret_code = 1
    logger.info("log : /auto_results/ ")
    sys.exit(ret_code)
Exemple #14
0
def main():
    # parse the arguments
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    if args.save_results:
        utils.saveargs(args, config_file)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model, model_dict, evaluation = models.setup(checkpoints)

    print('Model:\n\t{model}\nTotal params:\n\t{npar:.2f}M'.format(
        model=args.model_type,
        npar=sum(p.numel() for p in model['feat'].parameters()) / 1000000.0))

    # The trainer handles the training loop
    trainer = Trainer(args, model, model_dict['loss'], evaluation)
    # The trainer handles the evaluation on validation set
    tester = Tester(args, model, model_dict['loss'], evaluation)

    test_freq = 1

    dataloader = Dataloader(args)

    if args.extract_feat:
        loaders = dataloader.create(flag='Test')
        tester.extract_features(loaders)
        # tester.extract_features_h5py(loaders, len(dataloader.dataset_test))
    elif args.just_test:
        loaders = dataloader.create(flag='Test')
        acc_test = tester.test(args.epoch_number, loaders)
        print(acc_test)
    else:

        loaders = dataloader.create()
        if args.dataset_train == 'ClassSamplesDataLoader':
            loaders['train'] = dataloader.dataset_train

        # start training !!!
        acc_best = 0
        loss_best = 999
        stored_models = {}

        for epoch in range(args.nepochs - args.epoch_number):
            epoch += args.epoch_number
            print('\nEpoch %d/%d\n' % (epoch + 1, args.nepochs))

            # train for a single epoch
            # loss_train = 3.0
            loss_train = trainer.train(epoch, loaders, checkpoints, acc_best)
            if float(epoch) % test_freq == 0:
                acc_test = tester.test(epoch, loaders)

            if loss_best > loss_train:
                model_best = True
                loss_best = loss_train
                acc_best = acc_test
                if args.save_results:
                    stored_models['model'] = model
                    stored_models['loss'] = trainer.criterion
                    checkpoints.save(acc_best, stored_models, epoch, 'final',
                                     model_best)
Exemple #15
0
def main():
    # parse the arguments
    args = config.parse_args()
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    args.save = os.path.join(args.result_path, 'save')
    args.logs = os.path.join(args.result_path, 'logs')
    utils.saveargs(args)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    rankgan_model, criterion = models.setup(checkpoints)
    modelD = rankgan_model[0]
    modelG = rankgan_model[1]
    Encoder = rankgan_model[2]
    prevD, prevG = None, None

    if args.netD is not '':
        checkpointD = checkpoints.load(args.netD)
        modelD.load_state_dict(checkpointD)
    if args.netG is not '':
        checkpointG = checkpoints.load(args.netG)
        modelG.load_state_dict(checkpointG)
    if args.netE is not '':
        checkpointE = checkpoints.load(args.netE)
        Encoder.load_state_dict(checkpointE)
    if args.prevD is not '':
        prevD = copy.deepcopy(modelD)
        checkpointDprev = checkpoints.load(args.prevD)
        prevD.load_state_dict(checkpointDprev)
    if args.prevG is not '':
        prevG = copy.deepcopy(modelG)
        checkpointGprev = checkpoints.load(args.prevG)
        prevG.load_state_dict(checkpointGprev)

    # Data Loading
    dataloader = Dataloader(args)
    loader_train = dataloader.create(flag="Train")
    loader_test = dataloader.create(flag="Test")

    # The trainer handles the training loop and evaluation on validation set
    trainer = Trainer(args, modelD, modelG, Encoder, criterion, prevD, prevG)

    for epoch in range(args.nepochs):
        # train for a single epoch
        # cur_time = time.time()
        # if stage == 2:

        loss_train = trainer.train(epoch, loader_train)
        # if stage > 0:
        #     disc_acc = trainer.test(stage, epoch, loader_test)
        # print("Time taken = {}".format(time.time() - cur_time))

        try:
            torch.save(modelD.state_dict(), '%s/netD.pth' % (args.save, stage))
            for i in range(args.nranks - 1):
                torch.save(modelG.state_dict(),
                           '%s/order_%d_netG.pth' % (i + 1, stage))
        except Exception as e:
            print(e)
Exemple #16
0
import os
import datetime
import utils
import copy
import config

# parse the arguments
args = config.parse_args()
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
args.save = os.path.join(args.result_path, 'save')
args.logs = os.path.join(args.result_path, 'logs')
utils.saveargs(args)

# initialize the checkpoint class
checkpoints = Checkpoints(args)

# Create Model
models = Model(args)
gogan_model, criterion = models.setup(checkpoints)
netD = gogan_model[0]
netG = gogan_model[1]
netE = gogan_model[2]

if args.netD is not '':
    checkpointD = checkpoints.load(args.netD)
    netD.load_state_dict(checkpointD)
if args.netG is not '':
    checkpointG = checkpoints.load(args.netG)
    netG.load_state_dict(checkpointG)
if args.netE is not '':
Exemple #17
0
def main():
    # parse the arguments
    args = config.parse_args()
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    args.save = os.path.join(args.result_path, 'save')
    args.logs = os.path.join(args.result_path, 'logs')
    utils.saveargs(args)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    rankgan_model, criterion = models.setup(checkpoints)
    modelD = rankgan_model[0]
    modelG = rankgan_model[1]
    Encoder = rankgan_model[2]
    prevD, prevG = None, None

    if args.netD is not '':
        checkpointD = checkpoints.load(args.netD)
        modelD.load_state_dict(checkpointD)
    if args.netG is not '':
        checkpointG = checkpoints.load(args.netG)
        modelG.load_state_dict(checkpointG)
    if args.netE is not '':
        checkpointE = checkpoints.load(args.netE)
        Encoder.load_state_dict(checkpointE)
    if args.prevD is not '':
        prevD = copy.deepcopy(modelD)
        checkpointDprev = checkpoints.load(args.prevD)
        prevD.load_state_dict(checkpointDprev)
    if args.prevG is not '':
        prevG = copy.deepcopy(modelG)
        checkpointGprev = checkpoints.load(args.prevG)
        prevG.load_state_dict(checkpointGprev)

    # Data Loading
    dataloader = Dataloader(args)
    loader_train = dataloader.create(flag="Train")
    loader_test = dataloader.create(flag="Test")

    # The trainer handles the training loop and evaluation on validation set
    trainer = Trainer(args, modelD, modelG, Encoder, criterion, prevD, prevG)

    # start training !!!
    num_stages = args.num_stages
    stage_epochs = args.stage_epochs
    for stage in range(args.start_stage, num_stages):

        # check whether ready to start new stage and if not, optimize discriminator
        # if stage > 2:
        #     print("Optimizing Discriminator")
        #     trainer.setup_stage(stage, loader_test)
        #     opt_disc_flag = True
        #     epoch = 0
        #     while opt_disc_flag:
        #         opt_disc_flag = trainer.optimize_discriminator(stage-1, epoch, loader_train)
        #         epoch += 1

        # setup trainer for the stage
        trainer.setup_stage(stage, loader_test)
        print("Training for Stage {}".format(stage))

        for epoch in range(stage_epochs[stage]):
            # train for a single epoch
            # cur_time = time.time()
            # if stage == 2:

            loss_train = trainer.train(stage, epoch, loader_train)
            # if stage > 0:
            #     disc_acc = trainer.test(stage, epoch, loader_test)
            # print("Time taken = {}".format(time.time() - cur_time))

            try:
                torch.save(modelD.state_dict(),
                           '%s/stage_%d_netD.pth' % (args.save, stage))
                torch.save(modelG.state_dict(),
                           '%s/stage_%d_netG.pth' % (args.save, stage))
                torch.save(Encoder.state_dict(),
                           '%s/stage_%d_netE.pth' % (args.save, stage))
            except Exception as e:
                print(e)
Exemple #18
0
def deploy():
    ret_code = 0
    # noinspection PyBroadException

    logger.debug("=================================")
    logger.info("=== Starting up ...")
    logger.debug("=================================")
    try:
        settings, args = get_settings()
        if args.validate_only is True:
            logger.info("Only validating ini/properties config values")
        else:
            if args.overcloud_only is True:
                logger.info("Only redeploying the overcloud")
            if args.skip_dashboard_vm is True:
                logger.info("Skipping Dashboard VM install")

        logger.info("Settings .ini: " + settings.settings_file)
        logger.info("Settings .properties " + settings.network_conf)
        settings.get_version_info()
        logger.info("source version # : " + settings.source_version)
        tester = Checkpoints()
        tester.verify_deployer_settings()
        if args.validate_only is True:
            logger.info("Settings validated")
            os._exit(0)
        tester.retreive_switches_config()

        non_sah_nodes = (settings.controller_nodes + settings.compute_nodes +
                         settings.ceph_nodes)

        sah_node = Sah()

        tester.sah_health_check()
        # mutually exclusive command, configure tempest and quit.
        if args.tempest_config_only:
            logger.info("Only (re-)generating tempest.conf")
            director_vm = Director()
            director_vm.configure_tempest()
            os._exit(0)

        # mutually exclusive command, run tempest and quit.
        if args.run_tempest_only:
            logger.info("Only running tempest, will configure " +
                        "tempest.conf if needed.")
            director_vm = Director()
            director_vm.run_tempest()
            os._exit(0)

        logger.info("Uploading configs/iso/scripts.")
        sah_node.clear_known_hosts()
        sah_node.handle_lock_files()
        sah_node.upload_iso()
        sah_node.upload_director_scripts()

        director_ip = settings.director_node.public_api_ip
        if args.overcloud_only is False:
            Ssh.execute_command(director_ip, "root",
                                settings.director_node.root_password,
                                "subscription-manager remove --all")
            Ssh.execute_command(director_ip, "root",
                                settings.director_node.root_password,
                                "subscription-manager unregister")
            sah_node.delete_director_vm()

            logger.info("=== create the director vm")
            sah_node.create_director_vm()
            tester.director_vm_health_check()

            logger.info("Preparing the Director VM")
            director_vm = Director()
            director_vm.apply_internal_repos()

            logger.debug("===  Uploading & configuring undercloud.conf . "
                         "environment yaml ===")
            director_vm.upload_update_conf_files()

            logger.info("=== installing the director & undercloud ===")
            director_vm.inject_ssh_key()
            director_vm.upload_cloud_images()
            director_vm.install_director()
            tester.verify_undercloud_installed()
            if args.undercloud_only:
                return
        else:
            logger.info("=== Skipped Director VM/Undercloud install")
            director_vm = Director()
            logger.debug("Deleting overcloud stack")
            director_vm.delete_overcloud()

        if args.skip_dashboard_vm is False:
            logger.debug("Delete the Dashboard VM")
            dashboard_ip = settings.dashboard_node.public_api_ip
            logger.debug(
                Ssh.execute_command(dashboard_ip, "root",
                                    settings.dashboard_node.root_password,
                                    "subscription-manager remove --all"))
            Ssh.execute_command(dashboard_ip, "root",
                                settings.dashboard_node.root_password,
                                "subscription-manager unregister")

            sah_node.delete_dashboard_vm()

            logger.info("=== creating Dashboard VM")
            sah_node.create_dashboard_vm()

            tester.dashboard_vm_health_check()

        else:
            logger.info("Skipped the Dashboard VM install")

        logger.info("=== Preparing the overcloud ===")

        # The network-environment.yaml must be setup for use during DHCP
        # server configuration
        director_vm.setup_net_envt()
        director_vm.configure_dhcp_server()
        director_vm.node_discovery()
        director_vm.configure_idracs()
        director_vm.import_nodes()
        director_vm.node_introspection()
        director_vm.update_sshd_conf()
        director_vm.assign_node_roles()
        director_vm.revert_sshd_conf()

        director_vm.setup_templates()
        logger.info("=== Installing the overcloud ")
        logger.debug("installing the overcloud ... this might take a while")
        director_vm.deploy_overcloud()
        cmd = "source ~/stackrc; openstack stack list | grep " \
              + settings.overcloud_name + " | awk '{print $8}'"
        overcloud_status = \
            Ssh.execute_command_tty(director_ip,
                                    settings.director_install_account_user,
                                    settings.director_install_account_pwd,
                                    cmd)[0]
        logger.debug("=== Overcloud stack state : " + overcloud_status)
        if settings.hpg_enable:
            logger.info(
                " HugePages has been successfully configured with size: " +
                settings.hpg_size)
        if settings.numa_enable:
            logger.info(" NUMA has been successfully configured"
                        " with hostos_cpus count: " +
                        settings.hostos_cpu_count)

        logger.info("====================================")
        logger.info(" OverCloud deployment status: " + overcloud_status)
        logger.info(" log : /auto_results/ ")
        logger.info("====================================")
        if "CREATE_COMPLETE" not in overcloud_status:
            raise AssertionError("OverCloud did not install properly : " +
                                 overcloud_status)

        director_vm.summarize_deployment()
        tester.verify_computes_virtualization_enabled()
        tester.verify_backends_connectivity()
        if args.skip_dashboard_vm is False:
            director_vm.configure_dashboard()
        director_vm.enable_fencing()
        director_vm.run_sanity_test()

        external_sub_guid = director_vm.get_sanity_subnet()
        if external_sub_guid:
            director_vm.configure_tempest()

        run_tempest()

        logger.info("Deployment summary info; useful ip's etc.. " +
                    "/auto_results/deployment_summary.log")

    except:  # noqa: E722
        logger.error(traceback.format_exc())
        e = sys.exc_info()[0]
        logger.error(e)
        print e
        print traceback.format_exc()
        ret_code = 1
    logger.info("log : /auto_results/ ")
    sys.exit(ret_code)
Exemple #19
0
def main():
    # load pop for extrapolation
    pop = pickle.load(open("Results/CIFAR10_baseline/Run5/pop_extra.pkl",
                           "rb"))

    for i in range(len(pop)):
        genome = pop[i].genome
        # parse the arguments
        args = parser.parse_args()
        args.save = os.path.join("Extrapolation_results",
                                 "Model_ID_{}".format(pop[i].id))
        random.seed(args.manual_seed)
        torch.manual_seed(args.manual_seed)
        utils.saveargs(args)

        # initialize the checkpoint class
        checkpoints = Checkpoints(args)

        # Create Model
        # genome = [[[0], [0, 0], [0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0, 0], [0]],
        #           [[0], [0, 0], [0, 1, 0], [0, 1, 0, 1], [0, 1, 1, 1, 1], [0]],
        #           [[1], [0, 0], [0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0, 1], [1]]]
        # genome = [[[0], [0, 1], [1, 0, 0], [0, 0, 1, 1], [0, 0, 1, 1, 1], [0]],
        #           [[0], [0, 1], [0, 0, 0], [0, 0, 1, 1], [1, 1, 1, 1, 1], [0]],
        #           [[0], [0, 0], [0, 0, 1], [1, 1, 0, 1], [1, 1, 0, 1, 1], [1]]]
        models = Model(args, genome)
        model, criterion, num_params = models.setup()
        model = calculate_flops.add_flops_counting_methods(model)
        # print(model)

        # Data Loading
        dataloader = Dataloader(args)
        loaders = dataloader.create()

        # The trainer handles the training loop
        trainer = Trainer(args, model, criterion)
        # The trainer handles the evaluation on validation set
        tester = Tester(args, model, criterion)

        # start training !!!
        loss_best = 1e10
        acc_test_list = []
        acc_best = 0
        for epoch in range(args.nepochs):

            # train for a single epoch
            start_time_epoch = time.time()
            if epoch == 0:
                model.start_flops_count()
            loss_train, acc_train = trainer.train(epoch, loaders)
            loss_test, acc_test = tester.test(epoch, loaders)
            acc_test_list.append(acc_test)

            if acc_test > acc_best:
                model_best = True
                # update the best test accu found so found
                acc_best = acc_test
                loss_best = loss_test
                checkpoints.save(epoch, model, model_best)

            time_elapsed = np.round((time.time() - start_time_epoch), 2)
            if epoch == 0:
                n_flops = (model.compute_average_flops_cost() / 1e6 / 2)

            if np.isnan(np.average(loss_train)):
                break

            print(
                "Epoch {:d}:, test error={:0.2f}, FLOPs={:0.2f}M, n_params={:0.2f}M, {:0.2f} sec"
                .format(epoch, 100.0 - acc_test, n_flops, num_params / 1e6,
                        time_elapsed))

        # save the final model parameter
        # torch.save(model.state_dict(),
        #            "model_file/model%d.pth" % int(args.genome_id - 1))
        pop[i].fitness[0] = acc_best
        pop[i].fitness[1] = n_flops
        pop[i].n_params = num_params
        # error = 100 - np.mean(acc_test_list[-3:])

        # accuracy = acc_best
        # fitness = [acc_best, n_flops, num_params]
        # with open("output_file/output%d.pkl" % int(args.genome_id - 1), "wb") as f:
        #     pickle.dump(fitness, f)

    with open("Results/CIFAR10_baseline/Run5/pop_extra_evaluated.pkl",
              "wb") as f:
        pickle.dump(pop, f)
Exemple #20
0
import torchvision
from model import Model
from config import parser
from dataloader import Dataloader
from checkpoints import Checkpoints
from train import Trainer
import utils

# parse the arguments
args = parser.parse_args()
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
utils.saveargs(args)

# initialize the checkpoint class
checkpoints = Checkpoints(args)

# Create Model
models = Model(args)
model, criterion = models.setup(checkpoints)

# Data Loading
dataloader = Dataloader(args)
loader_train, loader_test = dataloader.create()

# The trainer handles the training loop and evaluation on validation set
trainer = Trainer(args, model, criterion)

# start training !!!
acc_best = 0
for epoch in range(args.nepochs):
Exemple #21
0
import time
import datetime
import copy
import os
import config

# parse the arguments
args = config.parse_args()
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
args.save = os.path.join(args.result_path, 'save')
args.logs = os.path.join(args.result_path, 'logs')
utils.saveargs(args)

# initialize the checkpoint class
checkpoints = Checkpoints(args)

# Create Model
models = Model(args)
gogan_model, criterion = models.setup(checkpoints)
modelD = gogan_model[0]
modelG = gogan_model[1]
Encoder = gogan_model[2]
prevD, prevG = None, None

if args.netD is not '':
    checkpointD = checkpoints.load(args.netD)
    modelD.load_state_dict(checkpointD)
if args.netG is not '':
    checkpointG = checkpoints.load(args.netG)
    modelG.load_state_dict(checkpointG)
Exemple #22
0
def main():
    # parse the arguments
    random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)
    if args.save_results:
        utils.saveargs(args, config_file)

    # initialize the checkpoint class
    checkpoints = Checkpoints(args)

    # Create Model
    models = Model(args)
    model_dict, evaluation = models.setup(checkpoints)

    print('Model:\n\t{model}\nTotal params:\n\t{npar:.4f}M'.format(
          model=args.model_type,
          npar=sum(p.numel() for p in model_dict['model'].parameters()) / 1000000.0))

    #### get kernel information ####
    ndemog = args.ndemog
    ndemog = list(range(ndemog))
    demog_combs = list(combinations(ndemog, 2))
    #### get kernel information ####

    #### create writer for tensor boader ####
    if args.save_results:
        writer = SummaryWriter(args.tblog_dir)
    else:
        writer = None
    #### create writer for tensor boader ####

    # The trainer handles the training loop
    trainer = Trainer(args, model_dict['model'], model_dict['loss'], model_dict['optimizer'], writer)
    # The trainer handles the evaluation on validation set
    tester = Tester(args, model_dict['model'], evaluation, writer)

    test_freq = 1

    dataloader = Dataloader(args)

    if args.extract_feat:
        loaders  = dataloader.create(flag='Test')
        tester.extract_features(loaders, 1)
    elif args.just_test:
        loaders  = dataloader.create(flag='Test')
        acc_test,acc_mean = tester.test(loaders, 1)
        print(acc_test, acc_mean)
    else:
        loaders  = dataloader.create()
        if args.dataset_train == 'ClassSamplesDataLoader':
            loaders['train'] = dataloader.dataset_train

        # start training !!!
        acc_best = 0
        loss_best = 999
        stored_models = {}

        for epoch in range(args.nepochs-args.epoch_number):
            epoch += args.epoch_number
            print('\nEpoch %d/%d\n' % (epoch + 1, args.nepochs))

            # train for a single epoch
            loss_train = trainer.train(loaders, epoch)

            acc_test=0
            if float(epoch) % test_freq == 0:
                acc_test,acc_mean = tester.test(loaders, epoch)

            if loss_best > loss_train:
                loss_best = loss_train
                acc_best = acc_test
            if  float(epoch) % test_freq == 0 and args.save_results:
                stored_models['model'] = trainer.model
                stored_models['loss'] = trainer.criterion
                stored_models['optimizer'] = trainer.optimizer
                checkpoints.save(acc_test, stored_models, epoch)

            if epoch == args.fuse_epoch:
                update_kernels(args, trainer.model, demog_combs, ndemog)

    if args.save_results:
        writer.close()
Exemple #23
0
def deploy():
    ret_code = 0
    # noinspection PyBroadException
    try:

        logger.debug("=================================")
        logger.info("=== Starting up ...")
        logger.debug("=================================")

        parser = argparse.ArgumentParser(
            description='JetPack 10.x deployer')
        parser.add_argument('-s', '--settings',
                            help='ini settings file, e.g settings/acme.ini',
                            required=True)
        parser.add_argument('-undercloud_only', '--undercloud_only',
                            help='Only reinstall the undercloud',
                            action='store_true', required=False)
        parser.add_argument('-overcloud_only', '--overcloud_only',
                            help='Only reinstall the overcloud',
                            action='store_true', required=False)
        parser.add_argument('-skip_dashboard_vm', '--skip_dashboard_vm',
                            help='Do not reinstall the Dashboard VM',
                            action='store_true',
                            required=False)
        parser.add_argument('-validate_only', '--validate_only',
                            help='No deployment - just validate config values',
                            action='store_true',
                            required=False)
        args, others = parser.parse_known_args()
        if len(others) > 0:
            parser.print_help()
            msg = "Invalid argument(s) :"
            for each in others:
                msg += " " + each + ";"
            raise AssertionError(msg)
        if args.validate_only is True:
            logger.info("Only validating ini/properties config values")
        else:
            if args.overcloud_only is True:
                logger.info("Only redeploying the overcloud")
            if args.skip_dashboard_vm is True:
                logger.info("Skipping Dashboard VM install")

        logger.debug("loading settings files " + args.settings)
        settings = Settings(args.settings)
        logger.info("Settings .ini: " + settings.settings_file)
        logger.info("Settings .properties " + settings.network_conf)
        settings.get_version_info()
        logger.info("source version # : " + settings.source_version)
        tester = Checkpoints()
        tester.verify_deployer_settings()
        if args.validate_only is True:
            logger.info("Settings validated")
            os._exit(0)

        if settings.retreive_switches_config is True:
            tester.retreive_switches_config()

        non_sah_nodes = (settings.controller_nodes +
                         settings.compute_nodes +
                         settings.ceph_nodes)

        sah_node = Sah()

        tester.sah_health_check()
        logger.info("Uploading configs/iso/scripts..")
        sah_node.clear_known_hosts()
        sah_node.handle_lock_files()
        sah_node.upload_iso()
        sah_node.upload_director_scripts()

        director_ip = settings.director_node.public_api_ip
        if args.overcloud_only is False:
            Ssh.execute_command(director_ip,
                                "root",
                                settings.director_node.root_password,
                                "subscription-manager remove --all")
            Ssh.execute_command(director_ip,
                                "root",
                                settings.director_node.root_password,
                                "subscription-manager unregister")
            sah_node.delete_director_vm()

            logger.info("=== create the director vm")
            sah_node.create_director_vm()
            tester.director_vm_health_check()

            logger.info("Preparing the Director VM")
            director_vm = Director()
            director_vm.apply_internal_repos()

            logger.debug(
                "===  Uploading & configuring undercloud.conf . "
                "environment yaml ===")
            director_vm.upload_update_conf_files()

            logger.info("=== installing the director & undercloud ===")
            director_vm.inject_ssh_key()
            director_vm.upload_cloud_images()
            director_vm.install_director()
            tester.verify_undercloud_installed()
            if args.undercloud_only:
                return
        else:
            logger.info("=== Skipped Director VM/Undercloud install")
            director_vm = Director()
            logger.debug("Deleting overcloud stack")
            director_vm.delete_overcloud()

        if args.skip_dashboard_vm is False:
            logger.debug("Delete the Dashboard VM")
            dashboard_ip = settings.dashboard_node.public_api_ip
            logger.debug(
                Ssh.execute_command(dashboard_ip,
                                    "root",
                                    settings.dashboard_node.root_password,
                                    "subscription-manager remove --all"))
            Ssh.execute_command(dashboard_ip,
                                "root",
                                settings.dashboard_node.root_password,
                                "subscription-manager unregister")

            sah_node.delete_dashboard_vm()

            logger.info("=== creating Dashboard VM")
            sah_node.create_dashboard_vm()

            tester.dashboard_vm_health_check()

        else:
            logger.info("Skipped the Dashboard VM install")

        logger.info("=== Preparing the overcloud ===")

        # The network-environment.yaml must be setup for use during DHCP
        # server configuration
        director_vm.setup_net_envt()
        director_vm.configure_dhcp_server()
        director_vm.node_discovery()
        director_vm.configure_idracs()
        director_vm.import_nodes()
        director_vm.node_introspection()
        director_vm.update_sshd_conf()
        director_vm.assign_node_roles()
        director_vm.revert_sshd_conf

        director_vm.setup_templates()
        logger.info("=== Installing the overcloud ")
        logger.debug("installing the overcloud ... this might take a while")
        director_vm.deploy_overcloud()
        cmd = "source ~/stackrc; openstack stack list | grep " \
              + settings.overcloud_name + " | awk '{print $6}'"
        overcloud_status = \
            Ssh.execute_command_tty(director_ip,
                                    settings.director_install_account_user,
                                    settings.director_install_account_pwd,
                                    cmd)[0]
        logger.debug("=== Overcloud stack state : " + overcloud_status)
        if settings.hpg_enable:
            logger.info(
                " HugePages has been successfully configured with size: " +
                settings.hpg_size)
        if settings.numa_enable:
            logger.info(
                " NUMA has been successfully configured"
                " with hostos_cpus count: " +
                settings.hostos_cpu_count)

        logger.info("====================================")
        logger.info(" OverCloud deployment status: " + overcloud_status)
        logger.info(" log : /auto_results/ ")
        logger.info("====================================")
        if "CREATE_COMPLETE" not in overcloud_status:
            raise AssertionError(
                "OverCloud did not install properly : " + overcloud_status)

        director_vm.summarize_deployment()
        tester.verify_computes_virtualization_enabled()
        tester.verify_backends_connectivity()
        if args.skip_dashboard_vm is False:
            director_vm.configure_dashboard()
        director_vm.enable_fencing()
        director_vm.enable_instance_ha()
        director_vm.configure_tempest()
        director_vm.run_sanity_test()
        run_tempest()
        logger.info("Deployment summary info; useful ip's etc.. " +
                    "/auto_results/deployment_summary.log")

    except:
        logger.error(traceback.format_exc())
        e = sys.exc_info()[0]
        logger.error(e)
        print e
        print traceback.format_exc()
        ret_code = 1
    logger.info("log : /auto_results/ ")
    sys.exit(ret_code)
Exemple #24
0
import random
from model import Model
from config import parser
from dataloader import Dataloader
from checkpoints import Checkpoints
from train import Trainer
import utils

# parse the arguments
args = parser.parse_args()
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
utils.saveargs(args)

# initialize the checkpoint class
checkpoints = Checkpoints(args)

# Create Model
models = Model(args)
model, criterion = models.setup(checkpoints)

# Data Loading
dataloader = Dataloader(args)
loader_train, loader_test = dataloader.create()
print("\t\tBatches:\t", len(loader_train))
print("\t\tBatches (Test):\t", len(loader_test))

# The trainer handles the training loop and evaluation on validation set
trainer = Trainer(args, model, criterion)

if args.oneshot == 0: