def insertPeople(self, number, recordStats=True): for _ in range(number): person = PersonBuffer.getNewPerson() if recordStats: Stats.execute(self.db.insertPerson, [person]) else: self.db.insertPerson(person)
def main(self): for _ in range(0, 1000): # updatePeople performs an equal number of reads and writes. self.updatePeople(5, True, True) Stats.output() Stats.dump(self.getDumpFileName())
def __init__(self): self.stats = Stats() self.train_dataset = self.create_dataset() train_indexes, validation_indexes = np.split( np.random.permutation(np.arange(len(self.train_dataset))), [int(0.9 * len(self.train_dataset))]) logger.info("train size: %d, validation size: %d" % (len(train_indexes), len(validation_indexes))) # train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes) train_sampler = torch.utils.data.sampler.SequentialSampler( self.train_dataset) self.train_loader = torch.utils.data.DataLoader( self.train_dataset, batch_size=config.gan.batch_size, sampler=train_sampler, num_workers=0) validation_sampler = torch.utils.data.sampler.SubsetRandomSampler( validation_indexes) self.validation_loader = torch.utils.data.DataLoader( self.train_dataset, batch_size=config.gan.batch_size, sampler=validation_sampler) self.input_shape = next(iter(self.train_loader))[0].size()[1:]
def __init__(self, log_dir=None): full_dataset = self.create_dataset() train_len = int(0.9 * len(full_dataset)) train_dataset, validation_dataset = torch.utils.data.random_split( full_dataset, [train_len, len(full_dataset) - train_len]) logger.info("train size: %d, validation size: %d" % (len(train_dataset), len(validation_dataset))) self.train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.gan.batch_size, num_workers=config.gan.data_loader_workers, drop_last=True, shuffle=True) self.validation_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=config.gan.batch_size, num_workers=config.gan.data_loader_workers, drop_last=True, shuffle=True) self.input_shape = next(iter(self.train_loader))[0].size()[1:] self.stats = Stats(log_dir=log_dir, input_shape=self.input_shape, train_loader=self.train_loader, validation_loader=self.validation_loader) evaluator = Evaluator(self.train_loader, self.validation_loader) self.evolutionary_algorithm = { "NEAT": NEAT, "NSGA2": NSGA2 }[config.evolution.algorithm](evaluator)
def __init__(self, config): """Initializes the primary azurlane-auto instance with the passed in Config instance; creates the Stats instance and resets scheduled sleep timers. Args: config (Config): azurlane-auto Config instance """ self.config = config self.oil_limit = 0 self.stats = Stats(config) if self.config.updates['enabled']: self.modules['updates'] = UpdateUtil(self.config) if self.config.combat['enabled']: self.modules['combat'] = CombatModule(self.config, self.stats) self.oil_limit = self.config.combat['oil_limit'] if self.config.commissions['enabled']: self.modules['commissions'] = CommissionModule(self.config, self.stats) if self.config.enhancement['enabled']: self.modules['enhancement'] = EnhancementModule(self.config, self.stats) if self.config.missions['enabled']: self.modules['missions'] = MissionModule(self.config, self.stats) if self.config.retirement['enabled']: self.modules['retirement'] = RetirementModule(self.config, self.stats) if self.config.dorm['enabled'] or self.config.academy['enabled']: self.modules['headquarters'] = HeadquartersModule(self.config, self.stats) if self.config.events['enabled']: self.modules['event'] = EventModule(self.config, self.stats) self.print_stats_check = True self.next_combat = datetime.now()
class ALAuto(object): modules = { 'commissions': None, 'combat': None, 'missions': None } def __init__(self, config): """Initializes the primary azurlane-auto instance with the passed in Config instance; creates the Stats instance and resets scheduled sleep timers. Args: config (Config): azurlane-auto Config instance """ self.config = config self.stats = Stats(config) if self.config.commissions['enabled']: self.modules['commissions'] = CommissionModule( self.config, self.stats) if self.config.combat['enabled']: self.modules['combat'] = CombatModule(self.config, self.stats) if self.config.missions['enabled']: self.modules['missions'] = MissionModule(self.config, self.stats) self.print_stats_check = True def run_combat_cycle(self): """Method to run the combat cycle. """ if self.modules['combat']: if self.modules['combat'].combat_logic_wrapper(): self.print_stats_check = True def run_commission_cycle(self): """Method to run the expedition cycle. """ if self.modules['commissions']: if self.modules['commissions'].commissions_logic_wrapper(): self.print_stats_check = True def run_mission_cycle(self): """Method to run the mission cycle """ if self.modules['missions']: if self.modules['missions'].mission_logic_wrapper(): self.print_stats_check = True def print_cycle_stats(self): """Method to print the cycle stats" """ if self.print_stats_check: self.stats.print_stats() self.print_stats_check = False def run_test(self): pass
def _runTurn(self): for p in range(len(self.players)): # refill player sp before running actions self.state["player_{}_external".format(p)]["sp"] = self.state["player_{}_external".format(p)]["max_sp"] self._runActionsForP(p) if self.winning_player != None: return Stats.recordStat("turns") self.state["g"]["turn"] += 1
def updatePeople(self, number, recordStats=True, recordReadStats=False): people = self.getPeople(number, recordReadStats) for person in people: if person['age']: person['age'] += 1 else: person['age'] = 1 if recordStats: Stats.execute(self.db.updatePerson, [person]) else: self.db.updatePerson(person)
def user_based_split(self, folds_num=5): """ Splits the rating matrix following the user-based method, the result after invoking this method is: two files for each fold (cf-train-fold_id-users.dat and cf-train-fold_id-users.dat), both files have the same format, as following: line i has delimiter-separated list of item ids rated by user i :param folds_num: the number of folds, default 5 :return: None """ train = [[[] for _ in range(self.num_users)] for _ in range(folds_num)] test = [[[] for _ in range(self.num_users)] for _ in range(folds_num)] for user in range(self.num_users): if user % 1000 == 0: print("user_{}".format(user)) items_ids = np.array(self.users_ratings[user]) n = len(items_ids) if n >= folds_num: idx = list(range(n)) item_ids_folds = random_divide(idx, folds_num) for fold in range(folds_num): test_idx = item_ids_folds[fold] train_idx = [id for id in idx if id not in test_idx] train[fold][user].extend(items_ids[train_idx].tolist()) test[fold][user].extend(items_ids[test_idx].tolist()) else: for fold in range(folds_num): train[fold][user].extend(items_ids.tolist()) test[fold][user].extend([]) stats = Stats(self.generate_validation) for fold in range(folds_num): users_train = train[fold] items_train = self.items_mat_from_users_ratings(users_train) for u in users_train: if len(u) == 0: print("some users contains 0 training items, split again again!") raise Exception("Split_Error!") write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter) write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter) users_test = test[fold] items_test = self.items_mat_from_users_ratings(users_test) # Storing the fold test items for all users write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter) write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter) # Calculate statistics: #TODO: Calculate Validation sets: users_validation = [] items_validation = [] if self.generate_validation: stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation) else: stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test) # calculate_split_stats(users_train, users_test, items_train, items_test, fold) # Write split statistics: stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))
def __init__(self, config): """Initializes the primary azurlane-auto instance with the passed in Config instance; creates the Stats instance and resets scheduled sleep timers. Args: config (Config): azurlane-auto Config instance """ self.config = config self.stats = Stats(config) if self.config.commissions['enabled']: self.modules['commissions'] = CommissionModule( self.config, self.stats) if self.config.combat['enabled']: self.modules['combat'] = CombatModule(self.config, self.stats) if self.config.missions['enabled']: self.modules['missions'] = MissionModule(self.config, self.stats) self.print_stats_check = True
def getPeopleAndParents(self, number, recordStats=True): ret = [] for i in range(number): personid = self.getRandomPersonid() if recordStats: person = Stats.execute(self.db.getPersonAndParents, [personid]) else: person = self.db.getPersonAndParents(personid) ret.append(person) return ret
def testStats(): logger = Logger(log_path=logPath) stats = Stats(logger) stats.recordAcc(10, 0.3, "train") stats.recordAcc(20, 0.1, "train") stats.recordAcc(10, 1.3, "dev") stats.recordAcc(40, 0.344, "test") print stats.acc
def _selectAction(self, recommended_a_id=None): # if no action is recommended or we randomly roll below our # random_action_rate, select a random action. TODO it might be a good idea # to have state similarity here to pick a closest observed action possible_actions = getValidActionsInState(self.s) possible_action_ids = [ Database.upsertAction(action) for action in possible_actions ] if not recommended_a_id or np.random.random( ) < self.random_action_rate or recommended_a_id not in possible_action_ids: random_action_index = np.random.randint(len(possible_action_ids)) random_action_id = possible_action_ids[random_action_index] self._printIfVerbose("agent randomly chose", possible_actions[random_action_index]) return random_action_id, possible_actions[random_action_index] else: action = Database.getAction(recommended_a_id) self._printIfVerbose("agent chose", action) Stats.recordStat("{}{}".format( "chosen_action={}".format(action["action"]), "_id={}".format(action["card_id"]) if "card_id" in action and action["card_id"] != None else "")) return recommended_a_id, action
class GanTrain: def __init__(self, log_dir=None): full_dataset = self.create_dataset() train_len = int(0.9 * len(full_dataset)) train_dataset, validation_dataset = torch.utils.data.random_split( full_dataset, [train_len, len(full_dataset) - train_len]) logger.info("train size: %d, validation size: %d" % (len(train_dataset), len(validation_dataset))) self.train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.gan.batch_size, num_workers=config.gan.data_loader_workers, drop_last=True, shuffle=True) self.validation_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=config.gan.batch_size, num_workers=config.gan.data_loader_workers, drop_last=True, shuffle=True) self.input_shape = next(iter(self.train_loader))[0].size()[1:] self.stats = Stats(log_dir=log_dir, input_shape=self.input_shape, train_loader=self.train_loader, validation_loader=self.validation_loader) evaluator = Evaluator(self.train_loader, self.validation_loader) self.evolutionary_algorithm = { "NEAT": NEAT, "NSGA2": NSGA2 }[config.evolution.algorithm](evaluator) @classmethod def create_dataset(cls): transform_arr = [ transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ] if config.gan.dataset_resize: transform_arr.insert(0, transforms.Resize(config.gan.dataset_resize)) transform = transforms.Compose(transform_arr) base_path = os.path.join(os.path.dirname(__file__), "..", "data") if hasattr(dsets, config.gan.dataset): dataset = getattr(dsets, config.gan.dataset)(root=os.path.join( base_path, config.gan.dataset), download=True, transform=transform) if config.gan.dataset_classes: indexes = np.argwhere( np.isin(dataset.targets, config.gan.dataset_classes)) dataset.data = dataset.data[indexes].squeeze() dataset.targets = np.array(dataset.targets)[indexes] return dataset else: return ImageFolder(root=os.path.join(base_path, config.gan.dataset, "train"), transform=transform) def start(self): if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score or config.stats.calc_fid_score_best: generative_score.initialize_fid( self.train_loader, sample_size=config.evolution.fitness.fid_sample_size) generators_population = self.evolutionary_algorithm.intialize_population( config.gan.generator.population_size, Generator, output_size=self.input_shape) discriminators_population = self.evolutionary_algorithm.intialize_population( config.gan.discriminator.population_size, Discriminator, output_size=1, input_shape=[1] + list(self.input_shape)) # initial evaluation self.evolutionary_algorithm.evaluate_population( generators_population.phenotypes(), discriminators_population.phenotypes()) for generation in tqdm(range(config.evolution.max_generations - 1)): self.stats.generate(generators_population, discriminators_population, generation) # executes selection, reproduction and replacement to create the next population generators_population, discriminators_population = self.evolutionary_algorithm.compute_generation( generators_population, discriminators_population) # stats for last generation self.stats.generate(generators_population, discriminators_population, generation + 1)
from dataset.data_processing import DataProcessing from dataset.dataset import Dataset from util.visualize_dataset import VisualizeDataset from util.stats import Stats import tensorflow as tf from train.darknet.darknet import TDarknet from train.resnet34.resnet34 import TResNet34 from train.resnet50.resnet50 import TResNet50 from train.inception_v4.inception_v4 import TInception_v4 from test.test_model import TestModel import numpy as np print(tf.__version__) vs = VisualizeDataset() stats = Stats() train = Train() td = TDarknet() tr34 = TResNet34() tr50 = TResNet50() ti = TInception_v4() dp = DataProcessing() # dp.process_and_save_data() ds = Dataset() # ds.save_trainset_as_npy() # images, labels = ds.load_testset() # vs.show_images(images, labels, cols=4, rows=2)
class GanTrain: def __init__(self): self.stats = Stats() self.train_dataset = self.create_dataset() train_indexes, validation_indexes = np.split(np.random.permutation(np.arange(len(self.train_dataset))), [int(0.9 * len(self.train_dataset))]) logger.info("train size: %d, validation size: %d" % (len(train_indexes), len(validation_indexes))) # train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes) train_sampler = torch.utils.data.sampler.SequentialSampler(self.train_dataset) self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size, sampler=train_sampler, num_workers=0) validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(validation_indexes) self.validation_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size, sampler=validation_sampler) self.input_shape = next(iter(self.train_loader))[0].size()[1:] @classmethod def create_dataset(cls): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) if hasattr(dsets, config.gan.dataset): dataset = getattr(dsets, config.gan.dataset)(root=f"./data/{config.gan.dataset}/", train=True, download=True, transform=transform) if config.gan.dataset_classes: indexes = np.argwhere(np.isin(dataset.train_labels, config.gan.dataset_classes)) dataset.train_data = dataset.train_data[indexes].squeeze() dataset.train_labels = np.array(dataset.train_labels)[indexes] return dataset else: return ImageFolder(root=f"./data/{config.gan.dataset}/train", transform=transform) def generate_intial_population(self): generators = [] discriminators = [] for i in range(config.gan.generator.population_size): G = Generator(output_size=self.input_shape) G.setup() generators.append(G) for i in range(config.gan.discriminator.population_size): D = Discriminator(output_size=1, input_shape=[1]+list(self.input_shape)) # [1] is the batch dimension D.setup() discriminators.append(D) return Population(generators, desired_species=config.evolution.speciation.size),\ Population(discriminators, desired_species=config.evolution.speciation.size) def train_evaluate(self, G, D, train_generator=True, train_discriminator=True, norm_g=1, norm_d=1): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return torch.cuda.empty_cache() n, ng = 0, 0 G.error = G.error or 0 D.error = D.error or 0 g_error = G.error d_error = D.error d_fitness_value, g_fitness_value = D.fitness_value, G.fitness_value G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) G.train() D.train() while n < config.gan.batches_limit: for images, _ in self.train_loader: # if n==0: print(images[0].mean()) n += 1 if n > config.gan.batches_limit: break images = tools.cuda(Variable(images)) if train_discriminator: D.do_train(G, images) if train_generator and n % config.gan.critic_iterations == 0: ng += 1 G.do_train(D, images) if train_discriminator: D.error = d_error + (D.error - d_error)/(n*norm_d) D.fitness_value = d_fitness_value + (D.fitness_value - d_fitness_value) / (n * norm_d) G.fitness_value = g_fitness_value + (G.fitness_value - g_fitness_value) / (n * norm_g) if train_generator: G.error = g_error + (G.error - g_error)/(ng*norm_g) G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu torch.cuda.empty_cache() def evaluate_population(self, generators, discriminators, previous_generators, previous_discriminators, best_generators, best_discriminators, evaluation_type=config.evolution.evaluation.type, initial=False): """Evaluate the population using all-vs-all pairing strategy""" self.train_dataset = torch.utils.data.random_split(self.train_dataset, [len(self.train_dataset)])[0] self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size) for i in range(config.evolution.evaluation.iterations): shuffle(generators) shuffle(discriminators) if evaluation_type == "random": for D in discriminators: for g in np.random.choice(generators, 2, replace=False): self.train_evaluate(g, D, norm_d=2, norm_g=len(discriminators)) for G in generators: for d in np.random.choice(discriminators, 2, replace=False): self.train_evaluate(G, d, norm_d=len(generators), norm_g=2) elif evaluation_type == "all-vs-all": # train all-vs-all in a non-sequential order pairs = tools.permutations(generators, discriminators, random=True) for g, d in pairs: self.train_evaluate(generators[g], discriminators[d], norm_d=len(generators), norm_g=len(discriminators)) elif evaluation_type in ["all-vs-best", "all-vs-species-best", "all-vs-kbest"]: if config.evolution.evaluation.initialize_all and initial: # as there are no way to determine the best G and D, we rely on all-vs-all for the first evaluation return self.evaluate_population(generators, discriminators, previous_generators, previous_discriminators, best_generators, best_discriminators, evaluation_type="all-vs-all") pairs = tools.permutations(best_generators, discriminators) for g, d in pairs: self.train_evaluate(best_generators[g], discriminators[d], norm_d=len(best_generators), norm_g=len(discriminators), train_generator=False) pairs = tools.permutations(generators, best_discriminators) for g, d in pairs: self.train_evaluate(generators[g], best_discriminators[d], norm_d=len(generators), norm_g=len(best_discriminators), train_discriminator=False) if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score: for G in generators: G.calc_fid() # do not evaluate in the validation data when there is only a single option if len(discriminators) == 1 and len(generators) == 1: return # evaluate in validation (all-vs-best) # for D in discriminators: # for G in best_generators: # with torch.no_grad(): # self.evaluate_validation(G, D, eval_generator=False) # for G in generators: # for D in best_discriminators: # with torch.no_grad(): # self.evaluate_validation(G, D, eval_discriminator=False) def evaluate_validation(self, G, D, eval_generator=True, eval_discriminator=True, norm_g=1, norm_d=1): if G.invalid or D.invalid: # do not evaluate if G or D are invalid logger.warning("invalid D or G") return if eval_discriminator: D.error = 0 if eval_generator: G.error = 0 n = 0 G, D = tools.cuda(G), tools.cuda(D) # load everything on gpu (cuda) for images, _ in self.validation_loader: images = tools.cuda(Variable(images)) n += 1 if eval_discriminator: D.do_eval(G, images) if eval_generator: G.do_eval(D, images) if eval_discriminator: D.error /= n*norm_d if eval_generator: G.error /= n*norm_g G, D = G.cpu(), D.cpu() # move variables back from gpu to cpu def select(self, population, discard_percent=0, k=config.evolution.tournament_size): """Select individuals based on fitness sharing""" ### TOURNAMENT TEST # population_size = len(population.phenotypes()) # phenotypes = population.phenotypes() # selected = [] # for i in range(population_size): # p = np.random.choice(phenotypes, 3, replace=False).tolist() # p.sort(key=lambda x: x.fitness()) # selected.append([p[0], p[0]]) # return [selected] ### population_size = len(population.phenotypes()) species_selected = [] species_list = population.species_list average_species_fitness_list = [] for species in species_list[:]: species.remove_invalid() # discard invalid individuals if len(species) > 0: average_species_fitness_list.append(species.average_fitness()) else: species_list.remove(species) total_fitness = np.sum(average_species_fitness_list) # initialize raw sizes with equal proportion raw_sizes = [population_size / len(species_list)] * len(species_list) if total_fitness != 0: # calculate proportional sizes when total fitness is not zero raw_sizes = [average_species_fitness / total_fitness * population_size for average_species_fitness in average_species_fitness_list] sizes = tools.round_array(raw_sizes, max_sum=population_size, invert=True) for species, size in zip(species_list, sizes): # discard the lowest-performing individuals species = species.best_percent(1 - discard_percent) # tournament selection inside species selected = [] # ensure that the best was selected if config.evolution.speciation.keep_best and size > 0: selected.append([species[0]]) orig_species = list(species) for i in range(int(size) - len(selected)): parents = [] for l in range(2): winner = None for j in range(k): random_index = np.random.randint(0, len(species)) if winner is None or species[random_index].fitness() < winner.fitness(): winner = species[random_index] del species[random_index] # remove element to emulate draw without replacement if len(species) == 0: # restore original list when there is no more individuals to draw species = list(orig_species) parents.append(winner) if config.evolution.crossover_rate == 0: # do not draw another individual from the population if there is no probability of crossover parents.append(winner) break selected.append(parents) species_selected.append(selected) return species_selected def generate_children(self, species_list, generation): # generate child (only mutation for now) children = [] for species in species_list: for i, parents in enumerate(species): mate = parents[1] if len(parents) > 1 else None child = parents[0].breed(mate=mate, skip_mutation=mate is None) # skip mutation when there is no mate child.genome.generation = generation children.append(child) return children def replace_population(self, generators_population, discriminators_population, g_children, d_children): elite_d = discriminators_population.best_percent(config.evolution.elitism) elite_g = generators_population.best_percent(config.evolution.elitism) g_children = sorted(g_children, key=lambda x: x.fitness()) d_children = sorted(d_children, key=lambda x: x.fitness()) generators = Population(elite_g + g_children[:len(g_children) - len(elite_g)], desired_species=config.evolution.speciation.size, speciation_threshold=generators_population.speciation_threshold) discriminators = Population(elite_d + d_children[:len(d_children) - len(elite_d)], desired_species=config.evolution.speciation.size, speciation_threshold=discriminators_population.speciation_threshold) return generators, discriminators def get_bests(self, population, previous_best): if config.evolution.evaluation.type == "all-vs-species-best": return [species.best() for species in population.species_list] elif config.evolution.evaluation.type == "all-vs-best": return (population.bests(1) + previous_best)[:config.evolution.evaluation.best_size] elif config.evolution.evaluation.type == "all-vs-kbest": return population.bests(config.evolution.evaluation.best_size) def start(self): if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score: generative_score.initialize_fid(self.train_loader, sample_size=config.evolution.fitness.fid_sample_size) generators_population, discriminators_population = self.generate_intial_population() # initialize best_discriminators and best_generators with random individuals best_discriminators = list(np.random.choice(discriminators_population.phenotypes(), config.evolution.evaluation.best_size, replace=False)) best_generators = list(np.random.choice(generators_population.phenotypes(), config.evolution.evaluation.best_size, replace=False)) # initial evaluation self.evaluate_population(generators_population.phenotypes(), discriminators_population.phenotypes(), generators_population, discriminators_population, best_generators, best_discriminators, initial=True) # store best individuals best_discriminators = self.get_bests(discriminators_population, best_discriminators) best_generators = self.get_bests(generators_population, best_generators) generation = 0 for generation in tqdm(range(config.evolution.max_generations-1)): self.stats.generate(self.input_shape, generators_population, discriminators_population, generation, config.evolution.max_generations, self.train_loader, self.validation_loader) # select parents for reproduction g_parents = self.select(generators_population) d_parents = self.select(discriminators_population) # apply variation operators (only mutation for now) g_children = self.generate_children(g_parents, generation) # limit the number of layers in D's to the max layers among G's max_layers_g = max([len(gc.genome.genes) for gc in g_children]) for s in d_parents: for dp in s: dp[0].genome.max_layers = max_layers_g d_children = self.generate_children(d_parents, generation) # evaluate the children population and the best individuals (when elitism is being used) logger.debug(f"[generation {generation}] evaluate population") self.evaluate_population(g_children, d_children, generators_population, discriminators_population, best_generators, best_discriminators) # store best of generation in coevolution memory best_discriminators = self.get_bests(discriminators_population, best_discriminators) best_generators = self.get_bests(generators_population, best_generators) # generate a new population based on the fitness of the children and elite individuals generators_population, discriminators_population = self.replace_population(generators_population, discriminators_population, g_children, d_children) # stats for last generation self.stats.generate(self.input_shape, generators_population, discriminators_population, generation+1, config.evolution.max_generations, self.train_loader, self.validation_loader)
script = sys.argv[1] db = sys.argv[2] numChildren = int(sys.argv[3]) machineNumber = int(sys.argv[4]) expOffset = int(sys.argv[5]) # Set the global offset to one more than machine number times a billion. globalOffset = machineNumber * 1000000000 + 1 except: print "Usage: %s <experiment file> <db name> <num children> <machine number> <experiment offset>" % sys.argv[0] sys.exit() def launchChild(number): processOffset = number * 100000000 start = globalOffset + processOffset end = start + expOffset print "launching %s with start = %s and end = %s" % (number, start, end) subprocess.call(["python", script, db, str(start), str(end)]) # Launch children and block until they finish. pool = Pool(processes = numChildren) pool.map(launchChild, range(0, numChildren)) fpath = os.path.dirname(__file__) dumps = glob.glob(os.path.join(fpath, "stats/*.dump")) Stats.load(*dumps) Stats.output()
def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3., L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False, wordwiseAttention=False): """ Takes care of training model, including propagation of errors and updating of parameters. """ expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \ "wordAttn_{5}".format(str(numEpochs), str(learnRateVal), str(L2regularization), str(dropoutRate), str(sentenceAttention), str(wordwiseAttention)) self.configs.update(locals()) trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.trainData, self.trainDataStats) trainGoldLabel = convertLabelsToMat(self.trainData) valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.valData, self.valDataStats) valGoldLabel = convertLabelsToMat(self.valData) # If you want to train on less than full dataset if numExamplesToTrain > 0: valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :] valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain), :] valGoldLabel = valGoldLabel[range(numExamplesToTrain)] #Whether zero-padded on left or right pad = "right" # Get full premise/hypothesis tensors # batchPremiseTensor, batchHypothesisTensor, batchLabels = \ # convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, # self.numTimestepsHypothesis, "right", self.embeddingTable, # valGoldLabel, range(len(valGoldLabel))) #sharedValPremise = theano.shared(batchPremiseTensor) #sharedValHypothesis = theano.shared(batchHypothesisTensor) #sharedValLabels = theano.shared(batchLabels) inputPremise = T.ftensor3(name="inputPremise") inputHypothesis = T.ftensor3(name="inputHypothesis") yTarget = T.fmatrix(name="yTarget") learnRate = T.scalar(name="learnRate", dtype='float32') fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \ fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise, inputHypothesis, yTarget, learnRate, gradMax, L2regularization, dropoutRate, sentenceAttention, wordwiseAttention, batchSize) totalExamples = 0 stats = Stats(self.logger, expName) # Training self.logger.Log("Model configs: {0}".format(self.configs)) self.logger.Log("Starting training with {0} epochs, {1} batchSize," " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate".format( numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate)) predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate) for epoch in xrange(numEpochs): self.logger.Log("Epoch number: %d" %(epoch)) if numExamplesToTrain > 0: minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize) else: minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize) numExamples = 0 for _, minibatch in minibatches: self.dropoutMode.set_value(1.0) numExamples += len(minibatch) totalExamples += len(minibatch) self.logger.Log("Processed {0} examples in current epoch". format(str(numExamples))) batchPremiseTensor, batchHypothesisTensor, batchLabels = \ convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, self.numTimestepsHypothesis, pad, self.embeddingTable, valGoldLabel, minibatch) gradHypothesisOut = fGradSharedHypothesis(batchPremiseTensor, batchHypothesisTensor, batchLabels) gradPremiseOut = fGradSharedPremise(batchPremiseTensor, batchHypothesisTensor, batchLabels) fUpdatePremise(learnRateVal) fUpdateHypothesis(learnRateVal) predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc) #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels))) cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels) stats.recordCost(totalExamples, cost) # Note: Big time sink happens here if totalExamples%(100) == 0: # TODO: Don't compute accuracy of dev set self.dropoutMode.set_value(0.0) devAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) stats.recordAcc(totalExamples, devAccuracy, "dev") stats.recordFinalTrainingTime(totalExamples) # Save model to disk self.logger.Log("Saving model...") self.extractParams() configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(str(batchSize), str(numEpochs), str(learnRateVal), str(self.dimHidden), str(self.dimInput)) self.saveModel(currDir + "/savedmodels/basicLSTM_"+configString+".npz") self.logger.Log("Model saved!") # Set dropout to 0. again for testing self.dropoutMode.set_value(0.0) #Train Accuracy # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat, # trainHypothesisIdxMat, trainGoldLabel, predictFunc) # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy)) # Val Accuracy valAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) # TODO: change -1 for training acc to actual value when I enable train computation stats.recordFinalStats(totalExamples, -1, valAccuracy)
def main(exp_name, embed_data, train_data, train_data_stats, val_data, val_data_stats, test_data, test_data_stats, log_path, batch_size, num_epochs, unroll_steps, learn_rate, num_dense, dense_dim, penalty, reg_coeff): """ Main run function for training model. :param exp_name: :param embed_data: :param train_data: :param train_data_stats: :param val_data: :param val_data_stats: :param test_data: :param test_data_stats: :param log_path: :param batch_size: :param num_epochs: :param unroll_steps: :param learn_rate: :param num_dense: Number of dense fully connected layers to add after concatenation layer :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1 :param penalty: Penalty to use for regularization :param reg_weight: Regularization coeff to use for each layer of network; may want to support different coefficient for different layers :return: """ # Set random seed for deterministic results np.random.seed(0) num_ex_to_train = 30 # Load embedding table table = EmbeddingTable(embed_data) vocab_size = table.sizeVocab dim_embeddings = table.dimEmbeddings embeddings_mat = table.embeddings train_prem, train_hyp = generate_data(train_data, train_data_stats, "left", "right", table, seq_len=unroll_steps) val_prem, val_hyp = generate_data(val_data, val_data_stats, "left", "right", table, seq_len=unroll_steps) train_labels = convertLabelsToMat(train_data) val_labels = convertLabelsToMat(val_data) # To test for overfitting capabilities of model if num_ex_to_train > 0: val_prem = val_prem[0:num_ex_to_train] val_hyp = val_hyp[0:num_ex_to_train] val_labels = val_labels[0:num_ex_to_train] # Theano expressions for premise/hypothesis inputs to network x_p = T.imatrix() x_h = T.imatrix() target_values = T.fmatrix(name="target_output") # Embedding layer for premise l_in_prem = InputLayer((batch_size, unroll_steps)) l_embed_prem = EmbeddingLayer(l_in_prem, input_size=vocab_size, output_size=dim_embeddings, W=embeddings_mat) # Embedding layer for hypothesis l_in_hyp = InputLayer((batch_size, unroll_steps)) l_embed_hyp = EmbeddingLayer(l_in_hyp, input_size=vocab_size, output_size=dim_embeddings, W=embeddings_mat) # Ensure embedding matrix parameters are not trainable l_embed_hyp.params[l_embed_hyp.W].remove('trainable') l_embed_prem.params[l_embed_prem.W].remove('trainable') l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp) l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem) # Concatenate sentence embeddings for premise and hypothesis l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum]) l_in = l_concat l_output = l_concat # Add 'num_dense' dense layers with tanh # top layer is softmax if num_dense > 1: for n in range(num_dense): if n == num_dense-1: l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax) else: l_in = DenseLayer(l_in, num_units=dense_dim, nonlinearity=lasagne.nonlinearities.tanh) else: l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax) network_output = get_output(l_output, {l_in_prem: x_p, l_in_hyp: x_h}) # Will have shape (batch_size, 3) f_dense_output = theano.function([x_p, x_h], network_output, on_unused_input='warn') # Compute cost if penalty == "l2": p_metric = l2 elif penalty == "l1": p_metric = l1 layers = lasagne.layers.get_all_layers(l_output) layer_dict = {l: reg_coeff for l in layers} reg_cost = reg_coeff * regularize_layer_params_weighted(layer_dict, p_metric) cost = T.mean(T.nnet.categorical_crossentropy(network_output, target_values).mean()) + reg_cost compute_cost = theano.function([x_p, x_h, target_values], cost) # Compute accuracy accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1), T.argmax(target_values, axis=-1)), dtype=theano.config.floatX) compute_accuracy = theano.function([x_p, x_h, target_values], accuracy) label_output = T.argmax(network_output, axis=-1) predict = theano.function([x_p, x_h], label_output) # Define update/train functions all_params = lasagne.layers.get_all_params(l_output, trainable=True) updates = lasagne.updates.rmsprop(cost, all_params, learn_rate) train = theano.function([x_p, x_h, target_values], cost, updates=updates) # TODO: Augment embedding layer to allow for masking inputs stats = Stats(exp_name) acc_num = 10 #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size) minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size) print("Training ...") try: total_num_ex = 0 for epoch in xrange(num_epochs): for _, minibatch in minibatches: total_num_ex += len(minibatch) stats.log("Processed {0} total examples in epoch {1}".format(str(total_num_ex), str(epoch))) #prem_batch = val_prem[minibatch] #hyp_batch = val_hyp[minibatch] #labels_batch = val_labels[minibatch] prem_batch = train_prem[minibatch] hyp_batch = train_hyp[minibatch] labels_batch = train_labels[minibatch] train(prem_batch, hyp_batch, labels_batch) cost_val = compute_cost(prem_batch, hyp_batch, labels_batch) stats.recordCost(total_num_ex, cost_val) # Periodically compute and log train/dev accuracy if total_num_ex%(acc_num*batch_size) == 0: train_acc = compute_accuracy(train_prem, train_hyp, train_labels) dev_acc = compute_accuracy(val_prem, val_hyp, val_labels) stats.recordAcc(total_num_ex, train_acc, dataset="train") stats.recordAcc(total_num_ex, dev_acc, dataset="dev") except KeyboardInterrupt: pass
def main(self): self.getPeopleAndParents(10000) Stats.output() Stats.dump(self.getDumpFileName())
def split(self): # Get the mapping as a list of user_hash where the key is the corresponding index: userhash_userid_map_list = list(self.users_dict.items()) userhash_userid_map_list.sort(key=lambda x: x[1]) user_id_userhash_map_list = np.array( [i for (i, _) in userhash_userid_map_list]) # Get the mapping as a list of doc_ids where the key is the corresponding index: docid_paperid_map_list = list(self.papers_dict.items()) docid_paperid_map_list.sort(key=lambda x: x[1]) paper_id_docid_map_list = np.array( [i for (i, _) in docid_paperid_map_list]) # Get the ratings list integrated with time stamps: ratings_list = self.integrate_raings_timestamp(self.users_dict, self.papers_dict) fr = pd.DataFrame(data=ratings_list, columns=['user', 'paper', 'date']) print("Ratings: {}, users: {}, papers: {}.".format( len(fr), fr.user.nunique(), fr.paper.nunique())) # First split date: d1 = datetime.strptime('2005-03-31', "%Y-%m-%d").date() # Last date: last_date = fr.date.max() ratings_period = (last_date.year - d1.year) * 12 + last_date.month # These lists are used for plotting: tr_rs, tr_us, tr_ps, ts_rs, ts_us, ts_ps, rat, dates = [], [], [], [], [], [], [], [] folds_num = ratings_period // self.split_duration # For split stats: stats_header = ['{:4}'.format('Fold'), '{:20}'.format('#Usrs(Tot,R,S)'),'{:23}'.format('#Itms(Tot,R,S)'),'{:23}'.format('#Rtng(Tot,R,S)'),\ '{:23}'.format('PRU(min/max/avg/std)'), '{:22}'.format('PSU(min/max/avg/std)'), '{:20}'.format('PRI(min/max/avg/std)'), '{:20}'.format('PSI(min/max/avg/std)')] self.stat_list.append(stats_header) stats = Stats() for fold in range(folds_num): d2 = d1 + relativedelta(months=self.split_duration) # Training ratings: f1 = fr[fr['date'] < d1] # Test ratings: if self.out_of_matrix: f2 = fr[(fr['date'] >= d1) & (fr['date'] < d2) & fr['user'].isin(f1['user'])] else: f2 = fr[(fr['date'] >= d1) & (fr['date'] < d2) & fr['user'].isin(f1['user']) & (fr['paper'].isin(f1['paper']))] print("{}->{}, Tr:[Rs: {:6}, Us: {:5}, Ps: {:6}], Te:[Rs: {:5}, Us: {:5}, Ps: {:6}], Ratio: {:04.2f}%"\ .format(d1, d2, len(f1), f1.user.nunique(), f1.paper.nunique(), len(f2), f2.user.nunique(), f2.paper.nunique(), len(f2) / len(f1) * 100)) # Generate data for the folds: train_l_users, train_l_users_age, train_l_items, test_l_users, test_l_items, useridx_user_id_map_list, paperidx_paper_id_map_list, n_users, n_papers = self.generate_fold( d1, f1, f2) stats.add_fold_statistics(fold + 1, train_l_users, test_l_users, train_l_items, test_l_items) """ tru = [len(i) for i in train_l_users] tsu = [len(i) for i in test_l_users] tri = [len(i) for i in train_l_items] tsi = [len(i) for i in test_l_items] self.stat_list.append(['{:4}'.format(fold + 1), '{:5d} / {:5d} / {:4d}'.format(n_users, f1.user.nunique(), f2.user.nunique()), '{:6d} / {:6d} / {:5d}'.format(n_papers, f1.paper.nunique(), f2.paper.nunique()),\ '{:6d} / {:6d} / {:5d}'.format(f1.shape[0]+ f2.shape[0], f1.shape[0], f2.shape[0]), \ '{:1d} / {:4d} / {:4.1f} / {:5.1f}'.format(np.min(tru), np.max(tru), np.mean(tru), np.std(tru)),\ '{:1d} / {:4d} / {:4.1f} / {:4.1f}'.format(np.min(tsu), np.max(tsu), np.mean(tsu), np.std(tsu)),\ '{:1d} / {:3d} / {:4.1f} / {:3.1f}'.format(np.min(tri), np.max(tri), np.mean(tri), np.std(tri)),\ '{:1d} / {:3d} / {:4.1f} / {:3.1f}'.format(np.min(tsi), np.max(tsi), np.mean(tsi), np.std(tsi))]) """ # Write to file: fold_folder = os.path.join( self.base_dir, 'time-based_split_out-of-matrix' if self.out_of_matrix else 'time-based_split_in-matrix', 'fold-{}'.format(fold + 1)) if not os.path.exists(fold_folder): os.makedirs(fold_folder) write_ratings(train_l_users, os.path.join(fold_folder, 'train-users.dat')) write_ratings(train_l_users_age, os.path.join(fold_folder, 'train-users-ages.dat')) write_ratings(test_l_users, os.path.join(fold_folder, 'test-users.dat')) write_ratings(train_l_items, os.path.join(fold_folder, 'train-items.dat')) write_ratings(test_l_items, os.path.join(fold_folder, 'test-items.dat')) print("Generating the new mult file...") self.generate_docs_terms(self.docs_vocabs, paperidx_paper_id_map_list, self.terms, fold_folder) # Write users and papers mappings to files: useridx_userhash = user_id_userhash_map_list[ useridx_user_id_map_list] write_list_to_file( [(j, i) for (i, j) in enumerate(useridx_userhash)], os.path.join(fold_folder, 'citeulikeUserHash_userId_map.dat'), header=['citeulikeUserHash', 'user_id']) paperidx_docid = paper_id_docid_map_list[ paperidx_paper_id_map_list] write_list_to_file([(j, i) for (i, j) in enumerate(paperidx_docid)], os.path.join(fold_folder, 'citeulikeId_docId_map.dat'), header=['citeulikeId', 'paper_id']) # For plotting: dates.append(d2) tr_rs.append(len(f1)) tr_us.append(f1.user.nunique()) tr_ps.append(f1.paper.nunique()) ts_rs.append(len(f2)) ts_us.append(f2.user.nunique()) ts_ps.append(f2.paper.nunique()) rat.append(len(f2) / len(f1) * 100) d1 = d2 self.plot_split_lines(tr_rs, tr_us, tr_ps, ts_rs, ts_us, ts_ps, rat, dates) # Write split statistics to file: stats.save_stats_to_file( os.path.join( self.base_dir, 'time-based_split_out-of-matrix' if self.out_of_matrix else 'time-based_split_in-matrix', 'stats.txt'))
def main(): # TODO implement command-line args # initialize database Database.initialize() q = Database.getQTable() cards = loadCardDefinitions() characters = loadCharacterDefinitions() # initialize card definitions for querying CardDefinitions.setDefinitions(cards["main"], cards["treasures"], cards["answers"]) # how many games to play per run num_games = run_constants["num_games"] # every nth game will be verbose verbose_mod = run_constants["verbose_mod"] # game params as defined in game/game.py game_params = { # "num_agents" # "num_humans" # "max_turns" } # agent params as defined in player/agent.py agent_params = { "learning_rate": agent_constants["learning_rate"] # "discount_factor" # "endgame_discount_factor" # "random_action_rate" # "dyna_steps" } # deck params as defined in game/game.py deck_params = { "main_cards": CardDefinitions.cards["main"], "treasure_cards": CardDefinitions.cards["treasures"], "answer_cards": CardDefinitions.cards["answers"] } # character params as defined in game/game.py character_params = {"characters": characters} for game_number in range(num_games): verbose = game_number % verbose_mod == verbose_mod - 1 game_params["verbose"] = verbose agent_params["verbose"] = verbose print("Running game {}".format(game_number + 1)) game = Game(q, game_params, agent_params, deck_params, character_params) game.run() Stats.recordStat("games") agent_params[ "learning_rate"] *= 1 - agent_constants["learning_rate_decay"] Database.commit() # deinitialize database Database.destroy() Stats.printStats() Stats.printQStats(q) Stats.graphChosenActionUsage() Stats.graphTurnCountPerGame()
def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3., L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False, wordwiseAttention=False): """ Takes care of training model, including propagation of errors and updating of parameters. """ expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \ "wordAttn_{5}".format(str(numEpochs), str(learnRateVal), str(L2regularization), str(dropoutRate), str(sentenceAttention), str(wordwiseAttention)) self.configs.update(locals()) trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.trainData, self.trainDataStats) trainGoldLabel = convertLabelsToMat(self.trainData) valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices( self.valData, self.valDataStats) valGoldLabel = convertLabelsToMat(self.valData) # If you want to train on less than full dataset if numExamplesToTrain > 0: valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :] valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain ), :] valGoldLabel = valGoldLabel[range(numExamplesToTrain)] #Whether zero-padded on left or right pad = "right" # Get full premise/hypothesis tensors # batchPremiseTensor, batchHypothesisTensor, batchLabels = \ # convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, # self.numTimestepsHypothesis, "right", self.embeddingTable, # valGoldLabel, range(len(valGoldLabel))) #sharedValPremise = theano.shared(batchPremiseTensor) #sharedValHypothesis = theano.shared(batchHypothesisTensor) #sharedValLabels = theano.shared(batchLabels) inputPremise = T.ftensor3(name="inputPremise") inputHypothesis = T.ftensor3(name="inputHypothesis") yTarget = T.fmatrix(name="yTarget") learnRate = T.scalar(name="learnRate", dtype='float32') fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \ fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise, inputHypothesis, yTarget, learnRate, gradMax, L2regularization, dropoutRate, sentenceAttention, wordwiseAttention, batchSize) totalExamples = 0 stats = Stats(self.logger, expName) # Training self.logger.Log("Model configs: {0}".format(self.configs)) self.logger.Log( "Starting training with {0} epochs, {1} batchSize," " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate" .format(numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate)) predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate) for epoch in xrange(numEpochs): self.logger.Log("Epoch number: %d" % (epoch)) if numExamplesToTrain > 0: minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize) else: minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize) numExamples = 0 for _, minibatch in minibatches: self.dropoutMode.set_value(1.0) numExamples += len(minibatch) totalExamples += len(minibatch) self.logger.Log( "Processed {0} examples in current epoch".format( str(numExamples))) batchPremiseTensor, batchHypothesisTensor, batchLabels = \ convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat, self.numTimestepsHypothesis, pad, self.embeddingTable, valGoldLabel, minibatch) gradHypothesisOut = fGradSharedHypothesis( batchPremiseTensor, batchHypothesisTensor, batchLabels) gradPremiseOut = fGradSharedPremise(batchPremiseTensor, batchHypothesisTensor, batchLabels) fUpdatePremise(learnRateVal) fUpdateHypothesis(learnRateVal) predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc) #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels))) cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels) stats.recordCost(totalExamples, cost) # Note: Big time sink happens here if totalExamples % (100) == 0: # TODO: Don't compute accuracy of dev set self.dropoutMode.set_value(0.0) devAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) stats.recordAcc(totalExamples, devAccuracy, "dev") stats.recordFinalTrainingTime(totalExamples) # Save model to disk self.logger.Log("Saving model...") self.extractParams() configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format( str(batchSize), str(numEpochs), str(learnRateVal), str(self.dimHidden), str(self.dimInput)) self.saveModel(currDir + "/savedmodels/basicLSTM_" + configString + ".npz") self.logger.Log("Model saved!") # Set dropout to 0. again for testing self.dropoutMode.set_value(0.0) #Train Accuracy # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat, # trainHypothesisIdxMat, trainGoldLabel, predictFunc) # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy)) # Val Accuracy valAccuracy = self.computeAccuracy(valPremiseIdxMat, valHypothesisIdxMat, valGoldLabel, predictFunc) # TODO: change -1 for training acc to actual value when I enable train computation stats.recordFinalStats(totalExamples, -1, valAccuracy)
def main(self): self.insertPeople(10000) Stats.output() Stats.dump(self.getDumpFileName())
def runAggregates(self): Stats.execute(self.db.getAgeAggregate, []) Stats.execute(self.db.getFemaleAggregate, [])
def cf_split(self, folds_num=5): """ Splits the rating matrix following the in-matrix method defined in CTR, the result after invoking this method is: two files for each fold (cf-train-fold_id-users.dat and cf-train-fold_id-users.dat), both files have the same following format: line i has delimiter-separated list of item ids rated by user i :param folds_num: the number of folds, default 5 :return: None """ items_mat = self.items_mat_from_users_ratings(self.users_ratings) train = [[[] for _ in range(self.num_items)] for _ in range(folds_num)] test = [[[] for _ in range(self.num_items)] for _ in range(folds_num)] validation = [[[] for _ in range(self.num_items)] for _ in range(folds_num)] print("Number of items: {}".format(self.num_items)) folds_list = list(range(folds_num)) print("Splitting items ratings, progress:") # 1- Split items ratings into the folds. This guarantees that all items appear at least once in the test set. # If generating validation set is required: if self.generate_validation: for item in range(self.num_items): # Reporting progress: if item % 5000 == 0: print("doc_{}".format(item)) user_ids = np.array(items_mat[item]) n = len(user_ids) # If the number of ratings associated to this item are greater than the number of folds then, this item' ratings can participate in both the training and in the test sets. if n >= folds_num: idx = list(range(n)) user_ids_folds = random_divide(idx, folds_num) for test_fold in folds_list: # Add users of the current fold as test test_idx = user_ids_folds[test_fold] # Add users of the next fold as validation validation_fold = (test_fold + 1) % folds_num validation_idx = user_ids_folds[validation_fold] # Add the rest as training: train_idx = [] for i in folds_list: if i != test_fold and i != validation_fold: train_idx.extend(user_ids_folds[i]) train[test_fold][item].extend(user_ids[train_idx].tolist()) test[test_fold][item].extend(user_ids[test_idx].tolist()) validation[test_fold][item].extend(user_ids[validation_idx].tolist()) # If the number of ratings associated to this item are less than the number of folds then, this item's ratings can appear in the training set only. else: for fold in folds_list: train[fold][item].extend(user_ids.tolist()) test[fold][item].extend([]) validation[fold][item].extend([]) # If generating validation set is not required, generate Test and Training sets only: else: for item in range(self.num_items): if item % 5000 == 0: print("doc_{}".format(item)) user_ids = np.array(items_mat[item]) n = len(user_ids) if n >= folds_num: idx = list(range(n)) user_ids_folds = random_divide(idx, folds_num) for test_fold in folds_list: # Add users of the current fold as test test_idx = user_ids_folds[test_fold] # Add the rest as training: train_idx = [id for id in idx if id not in test_idx] train[test_fold][item].extend(user_ids[train_idx].tolist()) test[test_fold][item].extend(user_ids[test_idx].tolist()) else: for fold in folds_list: train[fold][item].extend(user_ids.tolist()) test[fold][item].extend([]) # 2- Generate the user ratings from the splits generated on step 1. stats = Stats(self.generate_validation) for fold in folds_list: items_train = train[fold] users_train = self.users_mat_from_items(items_train) for u_id, u in enumerate(users_train): if len(u) == 0: print("User {} contains 0 training items, split again!".format(u_id)) raise Exception("Split_Error!") write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter) write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter) items_test = test[fold] users_test = self.users_mat_from_items(items_test) write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter) write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter) if self.generate_validation: items_validation = validation[fold] users_validation = self.users_mat_from_items(items_validation) # Storing the fold validation items for all users write_ratings(users_validation, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "validation-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter) write_ratings(items_validation, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "validation-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter) # Calculate statistics: if self.generate_validation: stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation) else: stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test) #calculate_split_stats(users_train, users_test, items_train, items_test, fold) # Write split statistics: stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))
class ALAuto(object): modules = { 'updates': None, 'combat': None, 'commissions': None, 'enhancement': None, 'missions': None, 'retirement': None, 'headquarters': None, 'event': None } def __init__(self, config): """Initializes the primary azurlane-auto instance with the passed in Config instance; creates the Stats instance and resets scheduled sleep timers. Args: config (Config): azurlane-auto Config instance """ self.config = config self.oil_limit = 0 self.stats = Stats(config) if self.config.updates['enabled']: self.modules['updates'] = UpdateUtil(self.config) if self.config.combat['enabled']: self.modules['combat'] = CombatModule(self.config, self.stats) self.oil_limit = self.config.combat['oil_limit'] if self.config.commissions['enabled']: self.modules['commissions'] = CommissionModule(self.config, self.stats) if self.config.enhancement['enabled']: self.modules['enhancement'] = EnhancementModule(self.config, self.stats) if self.config.missions['enabled']: self.modules['missions'] = MissionModule(self.config, self.stats) if self.config.retirement['enabled']: self.modules['retirement'] = RetirementModule(self.config, self.stats) if self.config.dorm['enabled'] or self.config.academy['enabled']: self.modules['headquarters'] = HeadquartersModule(self.config, self.stats) if self.config.events['enabled']: self.modules['event'] = EventModule(self.config, self.stats) self.print_stats_check = True self.next_combat = datetime.now() def run_update_check(self): if self.modules['updates']: if self.modules['updates'].checkUpdate(): Logger.log_warning("A new release is available, please check the github.") def should_sortie(self): """Method to check wether bot should combat or not. """ return (self.modules['combat'] or self.modules['event']) \ and script.next_combat != 0 \ and script.next_combat < datetime.now() \ and Utils.check_oil(self.oil_limit) def run_sortie_cycle(self): """Method to run all cycles related to combat. """ self.run_event_cycle() self.run_combat_cycle() self.run_enhancement_cycle() self.run_retirement_cycle() def run_combat_cycle(self): """Method to run the combat cycle. """ if self.modules['combat']: result = self.modules['combat'].combat_logic_wrapper() if result == 1: # if boss is defeated Logger.log_msg("Boss successfully defeated, going back to menu.") self.print_stats_check = True if result == 2: # if morale is too low Logger.log_warning("Ships morale is too low, entering standby mode for an hour.") self.next_combat = datetime.now() + timedelta(hours=1) self.print_stats_check = False if result == 3: # if dock is full Logger.log_warning("Dock is full, need to retire.") if self.modules['retirement']: self.modules['retirement'].retirement_logic_wrapper(True) else: Logger.log_error("Retirement isn't enabled, exiting.") sys.exit() if result == 4: Logger.log_warning("Failed to defeat enemy.") self.print_stats_check = False else: self.next_combat = 0 def run_commission_cycle(self): """Method to run the expedition cycle. """ if self.modules['commissions']: self.modules['commissions'].commission_logic_wrapper() def run_enhancement_cycle(self): """Method to run the enhancement cycle. """ if self.modules['enhancement']: self.modules['enhancement'].enhancement_logic_wrapper() def run_mission_cycle(self): """Method to run the mission cycle """ if self.modules['missions']: self.modules['missions'].mission_logic_wrapper() def run_retirement_cycle(self): """Method to run the retirement cycle """ if self.modules['retirement']: self.modules['retirement'].retirement_logic_wrapper() def run_hq_cycle(self): """Method to run the headquarters cycle. """ if self.modules['headquarters']: self.modules['headquarters'].hq_logic_wrapper() def run_event_cycle(self): """Method to run the event cycle """ if self.modules['event']: self.modules['event'].event_logic_wrapper() def print_cycle_stats(self): """Method to print the cycle stats" """ if self.print_stats_check: self.stats.print_stats(Utils.check_oil(self.oil_limit)) self.print_stats_check = False
def main(exp_name, embed_data, train_data, train_data_stats, val_data, val_data_stats, test_data, test_data_stats, log_path, batch_size, num_epochs, unroll_steps, learn_rate, num_dense, dense_dim, penalty, reg_coeff): """ Main run function for training model. :param exp_name: :param embed_data: :param train_data: :param train_data_stats: :param val_data: :param val_data_stats: :param test_data: :param test_data_stats: :param log_path: :param batch_size: :param num_epochs: :param unroll_steps: :param learn_rate: :param num_dense: Number of dense fully connected layers to add after concatenation layer :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1 :param penalty: Penalty to use for regularization :param reg_weight: Regularization coeff to use for each layer of network; may want to support different coefficient for different layers :return: """ # Set random seed for deterministic results np.random.seed(0) num_ex_to_train = 30 # Load embedding table table = EmbeddingTable(embed_data) vocab_size = table.sizeVocab dim_embeddings = table.dimEmbeddings embeddings_mat = table.embeddings train_prem, train_hyp = generate_data(train_data, train_data_stats, "left", "right", table, seq_len=unroll_steps) val_prem, val_hyp = generate_data(val_data, val_data_stats, "left", "right", table, seq_len=unroll_steps) train_labels = convertLabelsToMat(train_data) val_labels = convertLabelsToMat(val_data) # To test for overfitting capabilities of model if num_ex_to_train > 0: val_prem = val_prem[0:num_ex_to_train] val_hyp = val_hyp[0:num_ex_to_train] val_labels = val_labels[0:num_ex_to_train] # Theano expressions for premise/hypothesis inputs to network x_p = T.imatrix() x_h = T.imatrix() target_values = T.fmatrix(name="target_output") # Embedding layer for premise l_in_prem = InputLayer((batch_size, unroll_steps)) l_embed_prem = EmbeddingLayer(l_in_prem, input_size=vocab_size, output_size=dim_embeddings, W=embeddings_mat) # Embedding layer for hypothesis l_in_hyp = InputLayer((batch_size, unroll_steps)) l_embed_hyp = EmbeddingLayer(l_in_hyp, input_size=vocab_size, output_size=dim_embeddings, W=embeddings_mat) # Ensure embedding matrix parameters are not trainable l_embed_hyp.params[l_embed_hyp.W].remove('trainable') l_embed_prem.params[l_embed_prem.W].remove('trainable') l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp) l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem) # Concatenate sentence embeddings for premise and hypothesis l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum]) l_in = l_concat l_output = l_concat # Add 'num_dense' dense layers with tanh # top layer is softmax if num_dense > 1: for n in range(num_dense): if n == num_dense - 1: l_output = DenseLayer( l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax) else: l_in = DenseLayer(l_in, num_units=dense_dim, nonlinearity=lasagne.nonlinearities.tanh) else: l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax) network_output = get_output(l_output, { l_in_prem: x_p, l_in_hyp: x_h }) # Will have shape (batch_size, 3) f_dense_output = theano.function([x_p, x_h], network_output, on_unused_input='warn') # Compute cost if penalty == "l2": p_metric = l2 elif penalty == "l1": p_metric = l1 layers = lasagne.layers.get_all_layers(l_output) layer_dict = {l: reg_coeff for l in layers} reg_cost = reg_coeff * regularize_layer_params_weighted( layer_dict, p_metric) cost = T.mean( T.nnet.categorical_crossentropy(network_output, target_values).mean()) + reg_cost compute_cost = theano.function([x_p, x_h, target_values], cost) # Compute accuracy accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1), T.argmax(target_values, axis=-1)), dtype=theano.config.floatX) compute_accuracy = theano.function([x_p, x_h, target_values], accuracy) label_output = T.argmax(network_output, axis=-1) predict = theano.function([x_p, x_h], label_output) # Define update/train functions all_params = lasagne.layers.get_all_params(l_output, trainable=True) updates = lasagne.updates.rmsprop(cost, all_params, learn_rate) train = theano.function([x_p, x_h, target_values], cost, updates=updates) # TODO: Augment embedding layer to allow for masking inputs stats = Stats(exp_name) acc_num = 10 #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size) minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size) print("Training ...") try: total_num_ex = 0 for epoch in xrange(num_epochs): for _, minibatch in minibatches: total_num_ex += len(minibatch) stats.log("Processed {0} total examples in epoch {1}".format( str(total_num_ex), str(epoch))) #prem_batch = val_prem[minibatch] #hyp_batch = val_hyp[minibatch] #labels_batch = val_labels[minibatch] prem_batch = train_prem[minibatch] hyp_batch = train_hyp[minibatch] labels_batch = train_labels[minibatch] train(prem_batch, hyp_batch, labels_batch) cost_val = compute_cost(prem_batch, hyp_batch, labels_batch) stats.recordCost(total_num_ex, cost_val) # Periodically compute and log train/dev accuracy if total_num_ex % (acc_num * batch_size) == 0: train_acc = compute_accuracy(train_prem, train_hyp, train_labels) dev_acc = compute_accuracy(val_prem, val_hyp, val_labels) stats.recordAcc(total_num_ex, train_acc, dataset="train") stats.recordAcc(total_num_ex, dev_acc, dataset="dev") except KeyboardInterrupt: pass
class ALAuto(object): modules = { 'combat': None, 'commissions': None, 'enhancement': None, 'missions': None, 'retirement': None, 'event': None } def __init__(self, config): """Initializes the primary azurlane-auto instance with the passed in Config instance; creates the Stats instance and resets scheduled sleep timers. Args: config (Config): azurlane-auto Config instance """ self.config = config self.stats = Stats(config) if self.config.combat['enabled']: self.modules['combat'] = CombatModule(self.config, self.stats) if self.config.commissions['enabled']: self.modules['commissions'] = CommissionModule( self.config, self.stats) if self.config.enhancement['enabled']: self.modules['enhancement'] = EnhancementModule( self.config, self.stats) if self.config.missions['enabled']: self.modules['missions'] = MissionModule(self.config, self.stats) if self.config.retirement['enabled']: self.modules['retirement'] = RetirementModule( self.config, self.stats) if self.config.events['enabled']: self.modules['event'] = EventModule(self.config, self.stats) self.print_stats_check = True self.next_combat = datetime.now() def run_combat_cycle(self): """Method to run the combat cycle. """ if self.modules['combat']: result = self.modules['combat'].combat_logic_wrapper() if result == 1: # if boss is defeated self.print_stats_check = True if result == 2: # if morale is too low self.next_combat = datetime.now() + timedelta(hours=1) self.print_stats_check = False if result == 3: # if dock is full if self.modules['retirement']: self.modules['retirement'].retirement_logic_wrapper(True) else: Logger.log_error("Retirement isn't enabled, exiting.") sys.exit() else: self.next_combat = 0 def run_commission_cycle(self): """Method to run the expedition cycle. """ if self.modules['commissions']: self.modules['commissions'].commission_logic_wrapper() def run_enhancement_cycle(self): """Method to run the enhancement cycle. """ if self.modules['enhancement']: self.modules['enhancement'].enhancement_logic_wrapper() def run_mission_cycle(self): """Method to run the mission cycle """ if self.modules['missions']: self.modules['missions'].mission_logic_wrapper() def run_retirement_cycle(self): """Method to run the retirement cycle """ if self.modules['retirement']: self.modules['retirement'].retirement_logic_wrapper() def run_event_cycle(self): """Method to run the event cycle """ if self.modules['event']: self.modules['event'].event_logic_wrapper() def print_cycle_stats(self): """Method to print the cycle stats" """ if self.print_stats_check: self.stats.print_stats() self.print_stats_check = False
def out_of_matrix_split(self, folds_num=5): """ Splits the rating matrix following the out-of-matrix method defined in CTR, the result after invoking this method is: two files for each fold (out_of-train-fold_id-users.dat and out_of-train-fold_id-users.dat), both files have the same following format: line i has delimiter-separated list of item ids rated by user i :param folds_num: the number of folds, default = 5 :return: None """ # 1- Split items ids in folds: items_ids = list(range(self.num_items)) item_ids_folds = random_divide(items_ids, folds_num) # 2- Generate the training and test sets for each fold: stats = Stats(self.generate_validation) for test_fold in range(folds_num): # Get the test, validation and training items: items_test_ids = set(item_ids_folds[test_fold]) items_validation_ids = set() if self.generate_validation: # Add items of the next fold as validation validation_fold = (test_fold + 1) % folds_num items_validation_ids = set(item_ids_folds[validation_fold]) # Add the rest as training: items_train_ids = set(items_ids) - items_test_ids - items_validation_ids # Generate users ratings for training, test and validation: users_train = [] users_test = [] users_validation = [] for user_ratings in self.users_ratings: tr_ratings = list(items_train_ids.intersection(user_ratings)) if len(tr_ratings) == 0: print("some users contains 0 training items, split again again!") raise Exception("Split_Error!") tes_ratings = list(items_test_ids.intersection(user_ratings)) val_ratings = list(items_validation_ids.intersection(user_ratings)) tr_ratings.sort() tes_ratings.sort() val_ratings.sort() users_train.append(tr_ratings) users_test.append(tes_ratings) users_validation.append(val_ratings) write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "train-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter) write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "test-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter) write_ratings(users_validation, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "validation-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter) items_train = self.items_mat_from_users_ratings(users_train) write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "train-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter) items_test = self.items_mat_from_users_ratings(users_test) write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "test-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter) items_validation = self.items_mat_from_users_ratings(users_validation) write_ratings(items_validation, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "validation-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter) # Saving left out items ids: items_test_lst = list(items_test) items_test_lst.sort() write_ratings(items_test_lst, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "heldout-set-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter, print_line_length=False) # Calculate statistics: if self.generate_validation: stats.add_fold_statistics(test_fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation) else: stats.add_fold_statistics(test_fold + 1, users_train, users_test, items_train, items_test) # calculate_split_stats(users_train, users_test, items_train, items_test, fold) # Write split statistics: stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))