コード例 #1
0
 def insertPeople(self, number, recordStats=True):
     for _ in range(number):
         person = PersonBuffer.getNewPerson()
         if recordStats:
             Stats.execute(self.db.insertPerson, [person])
         else:
             self.db.insertPerson(person)
コード例 #2
0
   def main(self):
      for _ in range(0, 1000):
         # updatePeople performs an equal number of reads and writes.
         self.updatePeople(5, True, True)

      Stats.output()
      Stats.dump(self.getDumpFileName())
コード例 #3
0
    def __init__(self):
        self.stats = Stats()
        self.train_dataset = self.create_dataset()

        train_indexes, validation_indexes = np.split(
            np.random.permutation(np.arange(len(self.train_dataset))),
            [int(0.9 * len(self.train_dataset))])
        logger.info("train size: %d, validation size: %d" %
                    (len(train_indexes), len(validation_indexes)))
        # train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes)
        train_sampler = torch.utils.data.sampler.SequentialSampler(
            self.train_dataset)
        self.train_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=config.gan.batch_size,
            sampler=train_sampler,
            num_workers=0)
        validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(
            validation_indexes)
        self.validation_loader = torch.utils.data.DataLoader(
            self.train_dataset,
            batch_size=config.gan.batch_size,
            sampler=validation_sampler)

        self.input_shape = next(iter(self.train_loader))[0].size()[1:]
コード例 #4
0
ファイル: gan_train.py プロジェクト: vfcosta/qd-coegan
 def __init__(self, log_dir=None):
     full_dataset = self.create_dataset()
     train_len = int(0.9 * len(full_dataset))
     train_dataset, validation_dataset = torch.utils.data.random_split(
         full_dataset, [train_len, len(full_dataset) - train_len])
     logger.info("train size: %d, validation size: %d" %
                 (len(train_dataset), len(validation_dataset)))
     self.train_loader = torch.utils.data.DataLoader(
         train_dataset,
         batch_size=config.gan.batch_size,
         num_workers=config.gan.data_loader_workers,
         drop_last=True,
         shuffle=True)
     self.validation_loader = torch.utils.data.DataLoader(
         validation_dataset,
         batch_size=config.gan.batch_size,
         num_workers=config.gan.data_loader_workers,
         drop_last=True,
         shuffle=True)
     self.input_shape = next(iter(self.train_loader))[0].size()[1:]
     self.stats = Stats(log_dir=log_dir,
                        input_shape=self.input_shape,
                        train_loader=self.train_loader,
                        validation_loader=self.validation_loader)
     evaluator = Evaluator(self.train_loader, self.validation_loader)
     self.evolutionary_algorithm = {
         "NEAT": NEAT,
         "NSGA2": NSGA2
     }[config.evolution.algorithm](evaluator)
コード例 #5
0
ファイル: ALAuto.py プロジェクト: wiseasswolfofyoitsu/ALAuto
    def __init__(self, config):
        """Initializes the primary azurlane-auto instance with the passed in
        Config instance; creates the Stats instance and resets scheduled sleep
        timers.

        Args:
            config (Config): azurlane-auto Config instance
        """
        self.config = config
        self.oil_limit = 0
        self.stats = Stats(config)
        if self.config.updates['enabled']:
            self.modules['updates'] = UpdateUtil(self.config)
        if self.config.combat['enabled']:
            self.modules['combat'] = CombatModule(self.config, self.stats)
            self.oil_limit = self.config.combat['oil_limit']
        if self.config.commissions['enabled']:
            self.modules['commissions'] = CommissionModule(self.config, self.stats)
        if self.config.enhancement['enabled']:
            self.modules['enhancement'] = EnhancementModule(self.config, self.stats)
        if self.config.missions['enabled']:
            self.modules['missions'] = MissionModule(self.config, self.stats)
        if self.config.retirement['enabled']:
            self.modules['retirement'] = RetirementModule(self.config, self.stats)
        if self.config.dorm['enabled'] or self.config.academy['enabled']:
            self.modules['headquarters'] = HeadquartersModule(self.config, self.stats)
        if self.config.events['enabled']:
            self.modules['event'] = EventModule(self.config, self.stats)
        self.print_stats_check = True
        self.next_combat = datetime.now()
コード例 #6
0
class ALAuto(object):
    modules = {
        'commissions': None,
        'combat': None,
        'missions': None
    }

    def __init__(self, config):
        """Initializes the primary azurlane-auto instance with the passed in
        Config instance; creates the Stats instance and resets scheduled sleep
        timers.

        Args:
            config (Config): azurlane-auto Config instance
        """
        self.config = config
        self.stats = Stats(config)
        if self.config.commissions['enabled']:
            self.modules['commissions'] = CommissionModule(
                self.config, self.stats)
        if self.config.combat['enabled']:
            self.modules['combat'] = CombatModule(self.config, self.stats)
        if self.config.missions['enabled']:
            self.modules['missions'] = MissionModule(self.config, self.stats)
        self.print_stats_check = True

    def run_combat_cycle(self):
        """Method to run the combat cycle.
        """
        if self.modules['combat']:
            if self.modules['combat'].combat_logic_wrapper():
                self.print_stats_check = True

    def run_commission_cycle(self):
        """Method to run the expedition cycle.
        """
        if self.modules['commissions']:
            if self.modules['commissions'].commissions_logic_wrapper():
                self.print_stats_check = True

    def run_mission_cycle(self):
        """Method to run the mission cycle
        """
        if self.modules['missions']:
            if self.modules['missions'].mission_logic_wrapper():
                self.print_stats_check = True

    def print_cycle_stats(self):
        """Method to print the cycle stats"
        """
        if self.print_stats_check:
            self.stats.print_stats()
        self.print_stats_check = False

    def run_test(self):
        pass
コード例 #7
0
	def _runTurn(self):
		for p in range(len(self.players)):
			# refill player sp before running actions
			self.state["player_{}_external".format(p)]["sp"] = self.state["player_{}_external".format(p)]["max_sp"]

			self._runActionsForP(p)
			if self.winning_player != None:
				return

		Stats.recordStat("turns")
		self.state["g"]["turn"] += 1
コード例 #8
0
    def updatePeople(self, number, recordStats=True, recordReadStats=False):
        people = self.getPeople(number, recordReadStats)

        for person in people:
            if person['age']:
                person['age'] += 1
            else:
                person['age'] = 1

            if recordStats:
                Stats.execute(self.db.updatePerson, [person])
            else:
                self.db.updatePerson(person)
コード例 #9
0
    def user_based_split(self, folds_num=5):
        """
        Splits the rating matrix following the user-based method, the result after invoking this method is:
        two files for each fold (cf-train-fold_id-users.dat and cf-train-fold_id-users.dat), both files have the same format, as following:
        line i has delimiter-separated list of item ids rated by user i        
        :param folds_num: the number of folds, default 5
        :return: None
        """
        train = [[[] for _ in range(self.num_users)] for _ in range(folds_num)]
        test = [[[] for _ in range(self.num_users)] for _ in range(folds_num)]
        for user in range(self.num_users):
            if user % 1000 == 0:
                print("user_{}".format(user))
            items_ids = np.array(self.users_ratings[user])
            n = len(items_ids)
            if n >= folds_num:
                idx = list(range(n))
                item_ids_folds = random_divide(idx, folds_num)
                for fold in range(folds_num):
                    test_idx = item_ids_folds[fold]
                    train_idx = [id for id in idx if id not in test_idx]
                    train[fold][user].extend(items_ids[train_idx].tolist())
                    test[fold][user].extend(items_ids[test_idx].tolist())
            else:
                for fold in range(folds_num):
                    train[fold][user].extend(items_ids.tolist())
                    test[fold][user].extend([])

        stats = Stats(self.generate_validation)
        for fold in range(folds_num):
            users_train = train[fold]
            items_train = self.items_mat_from_users_ratings(users_train)
            for u in users_train:
                if len(u) == 0:
                    print("some users contains 0 training items, split again again!")
                    raise Exception("Split_Error!")
            write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter)
            write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter)

            users_test = test[fold]
            items_test = self.items_mat_from_users_ratings(users_test)

            # Storing the fold test items for all users
            write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter)
            write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter)

            # Calculate statistics:
            #TODO: Calculate Validation sets:
            users_validation = []
            items_validation = []
            if self.generate_validation:
                stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation)
            else:
                stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test)
            # calculate_split_stats(users_train, users_test, items_train, items_test, fold)

        # Write split statistics:
        stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))
コード例 #10
0
    def __init__(self, config):
        """Initializes the primary azurlane-auto instance with the passed in
        Config instance; creates the Stats instance and resets scheduled sleep
        timers.

        Args:
            config (Config): azurlane-auto Config instance
        """
        self.config = config
        self.stats = Stats(config)
        if self.config.commissions['enabled']:
            self.modules['commissions'] = CommissionModule(
                self.config, self.stats)
        if self.config.combat['enabled']:
            self.modules['combat'] = CombatModule(self.config, self.stats)
        if self.config.missions['enabled']:
            self.modules['missions'] = MissionModule(self.config, self.stats)
        self.print_stats_check = True
コード例 #11
0
    def getPeopleAndParents(self, number, recordStats=True):
        ret = []
        for i in range(number):
            personid = self.getRandomPersonid()
            if recordStats:
                person = Stats.execute(self.db.getPersonAndParents, [personid])
            else:
                person = self.db.getPersonAndParents(personid)
            ret.append(person)

        return ret
コード例 #12
0
ファイル: functionality.py プロジェクト: BinbinBian/LSTM-NLI
def testStats():
    logger = Logger(log_path=logPath)
    stats = Stats(logger)

    stats.recordAcc(10, 0.3, "train")
    stats.recordAcc(20, 0.1, "train")
    stats.recordAcc(10, 1.3, "dev")
    stats.recordAcc(40, 0.344, "test")

    print stats.acc
コード例 #13
0
 def _selectAction(self, recommended_a_id=None):
     # if no action is recommended or we randomly roll below our
     # random_action_rate, select a random action. TODO it might be a good idea
     # to have state similarity here to pick a closest observed action
     possible_actions = getValidActionsInState(self.s)
     possible_action_ids = [
         Database.upsertAction(action) for action in possible_actions
     ]
     if not recommended_a_id or np.random.random(
     ) < self.random_action_rate or recommended_a_id not in possible_action_ids:
         random_action_index = np.random.randint(len(possible_action_ids))
         random_action_id = possible_action_ids[random_action_index]
         self._printIfVerbose("agent randomly chose",
                              possible_actions[random_action_index])
         return random_action_id, possible_actions[random_action_index]
     else:
         action = Database.getAction(recommended_a_id)
         self._printIfVerbose("agent chose", action)
         Stats.recordStat("{}{}".format(
             "chosen_action={}".format(action["action"]),
             "_id={}".format(action["card_id"])
             if "card_id" in action and action["card_id"] != None else ""))
         return recommended_a_id, action
コード例 #14
0
ファイル: gan_train.py プロジェクト: vfcosta/qd-coegan
class GanTrain:
    def __init__(self, log_dir=None):
        full_dataset = self.create_dataset()
        train_len = int(0.9 * len(full_dataset))
        train_dataset, validation_dataset = torch.utils.data.random_split(
            full_dataset, [train_len, len(full_dataset) - train_len])
        logger.info("train size: %d, validation size: %d" %
                    (len(train_dataset), len(validation_dataset)))
        self.train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.gan.batch_size,
            num_workers=config.gan.data_loader_workers,
            drop_last=True,
            shuffle=True)
        self.validation_loader = torch.utils.data.DataLoader(
            validation_dataset,
            batch_size=config.gan.batch_size,
            num_workers=config.gan.data_loader_workers,
            drop_last=True,
            shuffle=True)
        self.input_shape = next(iter(self.train_loader))[0].size()[1:]
        self.stats = Stats(log_dir=log_dir,
                           input_shape=self.input_shape,
                           train_loader=self.train_loader,
                           validation_loader=self.validation_loader)
        evaluator = Evaluator(self.train_loader, self.validation_loader)
        self.evolutionary_algorithm = {
            "NEAT": NEAT,
            "NSGA2": NSGA2
        }[config.evolution.algorithm](evaluator)

    @classmethod
    def create_dataset(cls):
        transform_arr = [
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5], std=[0.5])
        ]
        if config.gan.dataset_resize:
            transform_arr.insert(0,
                                 transforms.Resize(config.gan.dataset_resize))
        transform = transforms.Compose(transform_arr)
        base_path = os.path.join(os.path.dirname(__file__), "..", "data")
        if hasattr(dsets, config.gan.dataset):
            dataset = getattr(dsets, config.gan.dataset)(root=os.path.join(
                base_path, config.gan.dataset),
                                                         download=True,
                                                         transform=transform)
            if config.gan.dataset_classes:
                indexes = np.argwhere(
                    np.isin(dataset.targets, config.gan.dataset_classes))
                dataset.data = dataset.data[indexes].squeeze()
                dataset.targets = np.array(dataset.targets)[indexes]
            return dataset
        else:
            return ImageFolder(root=os.path.join(base_path, config.gan.dataset,
                                                 "train"),
                               transform=transform)

    def start(self):
        if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score or config.stats.calc_fid_score_best:
            generative_score.initialize_fid(
                self.train_loader,
                sample_size=config.evolution.fitness.fid_sample_size)

        generators_population = self.evolutionary_algorithm.intialize_population(
            config.gan.generator.population_size,
            Generator,
            output_size=self.input_shape)
        discriminators_population = self.evolutionary_algorithm.intialize_population(
            config.gan.discriminator.population_size,
            Discriminator,
            output_size=1,
            input_shape=[1] + list(self.input_shape))
        # initial evaluation
        self.evolutionary_algorithm.evaluate_population(
            generators_population.phenotypes(),
            discriminators_population.phenotypes())
        for generation in tqdm(range(config.evolution.max_generations - 1)):
            self.stats.generate(generators_population,
                                discriminators_population, generation)
            # executes selection, reproduction and replacement to create the next population
            generators_population, discriminators_population = self.evolutionary_algorithm.compute_generation(
                generators_population, discriminators_population)
        # stats for last generation
        self.stats.generate(generators_population, discriminators_population,
                            generation + 1)
コード例 #15
0
ファイル: main.py プロジェクト: BaniaFonseca/deepGenesis
from dataset.data_processing import DataProcessing
from dataset.dataset import Dataset
from util.visualize_dataset import VisualizeDataset
from util.stats import Stats
import tensorflow as tf
from train.darknet.darknet import TDarknet
from train.resnet34.resnet34 import TResNet34
from train.resnet50.resnet50 import TResNet50
from train.inception_v4.inception_v4 import TInception_v4
from test.test_model import TestModel
import numpy as np

print(tf.__version__)

vs = VisualizeDataset()
stats = Stats()
train = Train()

td = TDarknet()
tr34 = TResNet34()
tr50 = TResNet50()
ti = TInception_v4()

dp = DataProcessing()
# dp.process_and_save_data()

ds = Dataset()
# ds.save_trainset_as_npy()

# images, labels = ds.load_testset()
# vs.show_images(images, labels, cols=4, rows=2)
コード例 #16
0
ファイル: gan_train.py プロジェクト: ajinkya933/coegan
class GanTrain:

    def __init__(self):
        self.stats = Stats()
        self.train_dataset = self.create_dataset()

        train_indexes, validation_indexes = np.split(np.random.permutation(np.arange(len(self.train_dataset))),
                                                     [int(0.9 * len(self.train_dataset))])
        logger.info("train size: %d, validation size: %d" % (len(train_indexes), len(validation_indexes)))
        # train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes)
        train_sampler = torch.utils.data.sampler.SequentialSampler(self.train_dataset)
        self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size,
                                                        sampler=train_sampler, num_workers=0)
        validation_sampler = torch.utils.data.sampler.SubsetRandomSampler(validation_indexes)
        self.validation_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size,
                                                             sampler=validation_sampler)

        self.input_shape = next(iter(self.train_loader))[0].size()[1:]

    @classmethod
    def create_dataset(cls):
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
        ])
        if hasattr(dsets, config.gan.dataset):
            dataset = getattr(dsets, config.gan.dataset)(root=f"./data/{config.gan.dataset}/", train=True,
                                                         download=True, transform=transform)
            if config.gan.dataset_classes:
                indexes = np.argwhere(np.isin(dataset.train_labels, config.gan.dataset_classes))
                dataset.train_data = dataset.train_data[indexes].squeeze()
                dataset.train_labels = np.array(dataset.train_labels)[indexes]
            return dataset
        else:
            return ImageFolder(root=f"./data/{config.gan.dataset}/train", transform=transform)

    def generate_intial_population(self):
        generators = []
        discriminators = []
        for i in range(config.gan.generator.population_size):
            G = Generator(output_size=self.input_shape)
            G.setup()
            generators.append(G)
        for i in range(config.gan.discriminator.population_size):
            D = Discriminator(output_size=1, input_shape=[1]+list(self.input_shape))  # [1] is the batch dimension
            D.setup()
            discriminators.append(D)
        return Population(generators, desired_species=config.evolution.speciation.size),\
               Population(discriminators, desired_species=config.evolution.speciation.size)

    def train_evaluate(self, G, D, train_generator=True, train_discriminator=True, norm_g=1, norm_d=1):
        if G.invalid or D.invalid:  # do not evaluate if G or D are invalid
            logger.warning("invalid D or G")
            return

        torch.cuda.empty_cache()
        n, ng = 0, 0
        G.error = G.error or 0
        D.error = D.error or 0
        g_error = G.error
        d_error = D.error
        d_fitness_value, g_fitness_value = D.fitness_value, G.fitness_value
        G, D = tools.cuda(G), tools.cuda(D)  # load everything on gpu (cuda)
        G.train()
        D.train()
        while n < config.gan.batches_limit:
            for images, _ in self.train_loader:
                # if n==0: print(images[0].mean())
                n += 1
                if n > config.gan.batches_limit:
                    break
                images = tools.cuda(Variable(images))
                if train_discriminator:
                    D.do_train(G, images)
                if train_generator and n % config.gan.critic_iterations == 0:
                    ng += 1
                    G.do_train(D, images)
        if train_discriminator:
            D.error = d_error + (D.error - d_error)/(n*norm_d)
            D.fitness_value = d_fitness_value + (D.fitness_value - d_fitness_value) / (n * norm_d)
            G.fitness_value = g_fitness_value + (G.fitness_value - g_fitness_value) / (n * norm_g)
        if train_generator:
            G.error = g_error + (G.error - g_error)/(ng*norm_g)
        G, D = G.cpu(), D.cpu()  # move variables back from gpu to cpu
        torch.cuda.empty_cache()

    def evaluate_population(self, generators, discriminators, previous_generators, previous_discriminators,
                            best_generators, best_discriminators,
                            evaluation_type=config.evolution.evaluation.type, initial=False):
        """Evaluate the population using all-vs-all pairing strategy"""

        self.train_dataset = torch.utils.data.random_split(self.train_dataset, [len(self.train_dataset)])[0]
        self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=config.gan.batch_size)

        for i in range(config.evolution.evaluation.iterations):
            shuffle(generators)
            shuffle(discriminators)
            if evaluation_type == "random":
                for D in discriminators:
                    for g in np.random.choice(generators, 2, replace=False):
                        self.train_evaluate(g, D, norm_d=2, norm_g=len(discriminators))
                for G in generators:
                    for d in np.random.choice(discriminators, 2, replace=False):
                        self.train_evaluate(G, d, norm_d=len(generators), norm_g=2)
            elif evaluation_type == "all-vs-all":
                # train all-vs-all in a non-sequential order
                pairs = tools.permutations(generators, discriminators, random=True)
                for g, d in pairs:
                    self.train_evaluate(generators[g], discriminators[d], norm_d=len(generators), norm_g=len(discriminators))
            elif evaluation_type in ["all-vs-best", "all-vs-species-best", "all-vs-kbest"]:
                if config.evolution.evaluation.initialize_all and initial:
                    # as there are no way to determine the best G and D, we rely on all-vs-all for the first evaluation
                    return self.evaluate_population(generators, discriminators,
                                                    previous_generators, previous_discriminators,
                                                    best_generators, best_discriminators, evaluation_type="all-vs-all")
                pairs = tools.permutations(best_generators, discriminators)
                for g, d in pairs:
                    self.train_evaluate(best_generators[g], discriminators[d], norm_d=len(best_generators), norm_g=len(discriminators), train_generator=False)
                pairs = tools.permutations(generators, best_discriminators)
                for g, d in pairs:
                    self.train_evaluate(generators[g], best_discriminators[d], norm_d=len(generators), norm_g=len(best_discriminators), train_discriminator=False)

        if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score:
            for G in generators:
                G.calc_fid()

        # do not evaluate in the validation data when there is only a single option
        if len(discriminators) == 1 and len(generators) == 1:
            return

        # evaluate in validation (all-vs-best)
        # for D in discriminators:
        #     for G in best_generators:
        #         with torch.no_grad():
        #             self.evaluate_validation(G, D, eval_generator=False)
        # for G in generators:
        #     for D in best_discriminators:
        #         with torch.no_grad():
        #             self.evaluate_validation(G, D, eval_discriminator=False)

    def evaluate_validation(self, G, D, eval_generator=True, eval_discriminator=True, norm_g=1, norm_d=1):
        if G.invalid or D.invalid:  # do not evaluate if G or D are invalid
            logger.warning("invalid D or G")
            return

        if eval_discriminator:
            D.error = 0
        if eval_generator:
            G.error = 0
        n = 0
        G, D = tools.cuda(G), tools.cuda(D)  # load everything on gpu (cuda)
        for images, _ in self.validation_loader:
            images = tools.cuda(Variable(images))
            n += 1
            if eval_discriminator:
                D.do_eval(G, images)
            if eval_generator:
                G.do_eval(D, images)
        if eval_discriminator:
            D.error /= n*norm_d
        if eval_generator:
            G.error /= n*norm_g
        G, D = G.cpu(), D.cpu()  # move variables back from gpu to cpu

    def select(self, population, discard_percent=0, k=config.evolution.tournament_size):
        """Select individuals based on fitness sharing"""

        ### TOURNAMENT TEST
        # population_size = len(population.phenotypes())
        # phenotypes = population.phenotypes()
        # selected = []
        # for i in range(population_size):
        #     p = np.random.choice(phenotypes, 3, replace=False).tolist()
        #     p.sort(key=lambda x: x.fitness())
        #     selected.append([p[0], p[0]])
        # return [selected]
        ###

        population_size = len(population.phenotypes())
        species_selected = []
        species_list = population.species_list
        average_species_fitness_list = []
        for species in species_list[:]:
            species.remove_invalid()  # discard invalid individuals
            if len(species) > 0:
                average_species_fitness_list.append(species.average_fitness())
            else:
                species_list.remove(species)
        total_fitness = np.sum(average_species_fitness_list)

        # initialize raw sizes with equal proportion
        raw_sizes = [population_size / len(species_list)] * len(species_list)
        if total_fitness != 0:
            # calculate proportional sizes when total fitness is not zero
            raw_sizes = [average_species_fitness / total_fitness * population_size
                         for average_species_fitness in average_species_fitness_list]

        sizes = tools.round_array(raw_sizes, max_sum=population_size, invert=True)

        for species, size in zip(species_list, sizes):
            # discard the lowest-performing individuals
            species = species.best_percent(1 - discard_percent)

            # tournament selection inside species
            selected = []

            # ensure that the best was selected
            if config.evolution.speciation.keep_best and size > 0:
                selected.append([species[0]])

            orig_species = list(species)
            for i in range(int(size) - len(selected)):
                parents = []
                for l in range(2):
                    winner = None
                    for j in range(k):
                        random_index = np.random.randint(0, len(species))
                        if winner is None or species[random_index].fitness() < winner.fitness():
                            winner = species[random_index]
                        del species[random_index]  # remove element to emulate draw without replacement
                        if len(species) == 0:  # restore original list when there is no more individuals to draw
                            species = list(orig_species)
                    parents.append(winner)
                    if config.evolution.crossover_rate == 0:
                        # do not draw another individual from the population if there is no probability of crossover
                        parents.append(winner)
                        break
                selected.append(parents)

            species_selected.append(selected)
        return species_selected

    def generate_children(self, species_list, generation):
        # generate child (only mutation for now)
        children = []
        for species in species_list:
            for i, parents in enumerate(species):
                mate = parents[1] if len(parents) > 1 else None
                child = parents[0].breed(mate=mate, skip_mutation=mate is None)  # skip mutation when there is no mate
                child.genome.generation = generation
                children.append(child)
        return children

    def replace_population(self, generators_population, discriminators_population, g_children, d_children):
        elite_d = discriminators_population.best_percent(config.evolution.elitism)
        elite_g = generators_population.best_percent(config.evolution.elitism)

        g_children = sorted(g_children, key=lambda x: x.fitness())
        d_children = sorted(d_children, key=lambda x: x.fitness())

        generators = Population(elite_g + g_children[:len(g_children) - len(elite_g)],
                                desired_species=config.evolution.speciation.size,
                                speciation_threshold=generators_population.speciation_threshold)
        discriminators = Population(elite_d + d_children[:len(d_children) - len(elite_d)],
                                    desired_species=config.evolution.speciation.size,
                                    speciation_threshold=discriminators_population.speciation_threshold)
        return generators, discriminators

    def get_bests(self, population, previous_best):
        if config.evolution.evaluation.type == "all-vs-species-best":
            return [species.best() for species in population.species_list]
        elif config.evolution.evaluation.type == "all-vs-best":
            return (population.bests(1) + previous_best)[:config.evolution.evaluation.best_size]
        elif config.evolution.evaluation.type == "all-vs-kbest":
            return population.bests(config.evolution.evaluation.best_size)

    def start(self):
        if config.evolution.fitness.generator == "FID" or config.stats.calc_fid_score:
            generative_score.initialize_fid(self.train_loader, sample_size=config.evolution.fitness.fid_sample_size)

        generators_population, discriminators_population = self.generate_intial_population()
        # initialize best_discriminators and best_generators with random individuals
        best_discriminators = list(np.random.choice(discriminators_population.phenotypes(), config.evolution.evaluation.best_size, replace=False))
        best_generators = list(np.random.choice(generators_population.phenotypes(), config.evolution.evaluation.best_size, replace=False))
        # initial evaluation
        self.evaluate_population(generators_population.phenotypes(), discriminators_population.phenotypes(),
                                 generators_population, discriminators_population,
                                 best_generators, best_discriminators, initial=True)
        # store best individuals
        best_discriminators = self.get_bests(discriminators_population, best_discriminators)
        best_generators = self.get_bests(generators_population, best_generators)
        generation = 0

        for generation in tqdm(range(config.evolution.max_generations-1)):
            self.stats.generate(self.input_shape, generators_population, discriminators_population,
                                generation, config.evolution.max_generations, self.train_loader, self.validation_loader)
            # select parents for reproduction
            g_parents = self.select(generators_population)
            d_parents = self.select(discriminators_population)
            # apply variation operators (only mutation for now)
            g_children = self.generate_children(g_parents, generation)

            # limit the number of layers in D's to the max layers among G's
            max_layers_g = max([len(gc.genome.genes) for gc in g_children])
            for s in d_parents:
                for dp in s:
                    dp[0].genome.max_layers = max_layers_g

            d_children = self.generate_children(d_parents, generation)
            # evaluate the children population and the best individuals (when elitism is being used)
            logger.debug(f"[generation {generation}] evaluate population")
            self.evaluate_population(g_children, d_children, generators_population, discriminators_population, best_generators, best_discriminators)
            # store best of generation in coevolution memory
            best_discriminators = self.get_bests(discriminators_population, best_discriminators)
            best_generators = self.get_bests(generators_population, best_generators)
            # generate a new population based on the fitness of the children and elite individuals
            generators_population, discriminators_population = self.replace_population(generators_population,
                                                                                        discriminators_population,
                                                                                        g_children, d_children)
        # stats for last generation
        self.stats.generate(self.input_shape, generators_population, discriminators_population,
                            generation+1, config.evolution.max_generations, self.train_loader, self.validation_loader)
コード例 #17
0
   script = sys.argv[1]
   db = sys.argv[2]
   numChildren = int(sys.argv[3])
   machineNumber = int(sys.argv[4])
   expOffset = int(sys.argv[5])

   # Set the global offset to one more than machine number times a billion.
   globalOffset = machineNumber * 1000000000 + 1
except:
   print "Usage: %s <experiment file> <db name> <num children> <machine number> <experiment offset>" % sys.argv[0]
   sys.exit()

def launchChild(number):
   processOffset = number * 100000000
   start = globalOffset + processOffset
   end = start + expOffset

   print "launching %s with start = %s and end = %s" % (number, start, end)

   subprocess.call(["python", script, db, str(start), str(end)])

# Launch children and block until they finish.
pool = Pool(processes = numChildren)
pool.map(launchChild, range(0, numChildren))

fpath = os.path.dirname(__file__)
dumps = glob.glob(os.path.join(fpath, "stats/*.dump"))

Stats.load(*dumps)
Stats.output()
コード例 #18
0
ファイル: lstmp2h.py プロジェクト: BinbinBian/LSTM-NLI
    def train(self, numEpochs=1, batchSize=5, learnRateVal=0.1, numExamplesToTrain=-1, gradMax=3.,
                L2regularization=0.0, dropoutRate=0.0, sentenceAttention=False,
                wordwiseAttention=False):
        """
        Takes care of training model, including propagation of errors and updating of
        parameters.
        """
        expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \
                       "wordAttn_{5}".format(str(numEpochs), str(learnRateVal),
                                             str(L2regularization), str(dropoutRate),
                                             str(sentenceAttention), str(wordwiseAttention))
        self.configs.update(locals())
        trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
                                  self.trainData, self.trainDataStats)
        trainGoldLabel = convertLabelsToMat(self.trainData)

        valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
                                self.valData, self.valDataStats)
        valGoldLabel = convertLabelsToMat(self.valData)

        # If you want to train on less than full dataset
        if numExamplesToTrain > 0:
            valPremiseIdxMat = valPremiseIdxMat[:, range(numExamplesToTrain), :]
            valHypothesisIdxMat = valHypothesisIdxMat[:, range(numExamplesToTrain), :]
            valGoldLabel = valGoldLabel[range(numExamplesToTrain)]


        #Whether zero-padded on left or right
        pad = "right"

        # Get full premise/hypothesis tensors
        # batchPremiseTensor, batchHypothesisTensor, batchLabels = \
        #             convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
        #                                        self.numTimestepsHypothesis, "right", self.embeddingTable,
        #                                        valGoldLabel, range(len(valGoldLabel)))
        #sharedValPremise = theano.shared(batchPremiseTensor)
        #sharedValHypothesis = theano.shared(batchHypothesisTensor)
        #sharedValLabels = theano.shared(batchLabels)


        inputPremise = T.ftensor3(name="inputPremise")
        inputHypothesis = T.ftensor3(name="inputHypothesis")
        yTarget = T.fmatrix(name="yTarget")
        learnRate = T.scalar(name="learnRate", dtype='float32')


        fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \
            fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise,
                                            inputHypothesis, yTarget, learnRate, gradMax,
                                            L2regularization, dropoutRate, sentenceAttention,
                                            wordwiseAttention, batchSize)

        totalExamples = 0
        stats = Stats(self.logger, expName)

        # Training
        self.logger.Log("Model configs: {0}".format(self.configs))
        self.logger.Log("Starting training with {0} epochs, {1} batchSize,"
                " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate".format(
            numEpochs, batchSize, learnRateVal, L2regularization, dropoutRate))


        predictFunc = self.predictFunc(inputPremise, inputHypothesis, dropoutRate)

        for epoch in xrange(numEpochs):
            self.logger.Log("Epoch number: %d" %(epoch))

            if numExamplesToTrain > 0:
                minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize)
            else:
                minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize)

            numExamples = 0
            for _, minibatch in minibatches:
                self.dropoutMode.set_value(1.0)
                numExamples += len(minibatch)
                totalExamples += len(minibatch)

                self.logger.Log("Processed {0} examples in current epoch".
                                format(str(numExamples)))

                batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               valGoldLabel, minibatch)

                gradHypothesisOut = fGradSharedHypothesis(batchPremiseTensor,
                                       batchHypothesisTensor, batchLabels)
                gradPremiseOut = fGradSharedPremise(batchPremiseTensor,
                                       batchHypothesisTensor, batchLabels)
                fUpdatePremise(learnRateVal)
                fUpdateHypothesis(learnRateVal)

                predictLabels = self.predict(batchPremiseTensor, batchHypothesisTensor, predictFunc)
                #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels)))


                cost = costFn(batchPremiseTensor, batchHypothesisTensor, batchLabels)
                stats.recordCost(totalExamples, cost)

                # Note: Big time sink happens here
                if totalExamples%(100) == 0:
                    # TODO: Don't compute accuracy of dev set
                    self.dropoutMode.set_value(0.0)
                    devAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                                       valHypothesisIdxMat, valGoldLabel, predictFunc)
                    stats.recordAcc(totalExamples, devAccuracy, "dev")


        stats.recordFinalTrainingTime(totalExamples)

        # Save model to disk
        self.logger.Log("Saving model...")
        self.extractParams()
        configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(str(batchSize),
                                            str(numEpochs), str(learnRateVal),
                                            str(self.dimHidden), str(self.dimInput))
        self.saveModel(currDir + "/savedmodels/basicLSTM_"+configString+".npz")
        self.logger.Log("Model saved!")

        # Set dropout to 0. again for testing
        self.dropoutMode.set_value(0.0)

        #Train Accuracy
        # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat,
        #                              trainHypothesisIdxMat, trainGoldLabel, predictFunc)
        # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy))

        # Val Accuracy
        valAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                    valHypothesisIdxMat, valGoldLabel, predictFunc)
        # TODO: change -1 for training acc to actual value when I enable train computation
        stats.recordFinalStats(totalExamples, -1, valAccuracy)
コード例 #19
0
ファイル: sum_embeddings.py プロジェクト: BinbinBian/NNLI
def main(exp_name, embed_data, train_data, train_data_stats, val_data, val_data_stats,
         test_data, test_data_stats, log_path, batch_size, num_epochs,
         unroll_steps, learn_rate, num_dense, dense_dim, penalty, reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings


    train_prem, train_hyp = generate_data(train_data, train_data_stats, "left", "right", table, seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data, val_data_stats, "left", "right", table, seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")


    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp, input_size=vocab_size,
                        output_size=dim_embeddings, W=embeddings_mat)


    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense-1:
                l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in, num_units=dense_dim, nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in, num_units=NUM_DENSE_UNITS, nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {l_in_prem: x_p, l_in_hyp: x_h}) # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h], network_output, on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(layer_dict, p_metric)
    cost = T.mean(T.nnet.categorical_crossentropy(network_output, target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1), T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(str(total_num_ex),
                                                                          str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex%(acc_num*batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp, train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass
コード例 #20
0
   def main(self):
      self.getPeopleAndParents(10000)

      Stats.output()
      Stats.dump(self.getDumpFileName())
コード例 #21
0
ファイル: time-based_split.py プロジェクト: zwanli/ConvMF
    def split(self):
        # Get the mapping as a list of user_hash where the key is the corresponding index:
        userhash_userid_map_list = list(self.users_dict.items())
        userhash_userid_map_list.sort(key=lambda x: x[1])
        user_id_userhash_map_list = np.array(
            [i for (i, _) in userhash_userid_map_list])

        # Get the mapping as a list of doc_ids where the key is the corresponding index:
        docid_paperid_map_list = list(self.papers_dict.items())
        docid_paperid_map_list.sort(key=lambda x: x[1])
        paper_id_docid_map_list = np.array(
            [i for (i, _) in docid_paperid_map_list])

        # Get the ratings list integrated with time stamps:
        ratings_list = self.integrate_raings_timestamp(self.users_dict,
                                                       self.papers_dict)

        fr = pd.DataFrame(data=ratings_list, columns=['user', 'paper', 'date'])
        print("Ratings: {}, users: {}, papers: {}.".format(
            len(fr), fr.user.nunique(), fr.paper.nunique()))

        # First split date:
        d1 = datetime.strptime('2005-03-31', "%Y-%m-%d").date()

        # Last date:
        last_date = fr.date.max()
        ratings_period = (last_date.year - d1.year) * 12 + last_date.month

        # These lists are used for plotting:
        tr_rs, tr_us, tr_ps, ts_rs, ts_us, ts_ps, rat, dates = [], [], [], [], [], [], [], []

        folds_num = ratings_period // self.split_duration

        # For split stats:
        stats_header = ['{:4}'.format('Fold'), '{:20}'.format('#Usrs(Tot,R,S)'),'{:23}'.format('#Itms(Tot,R,S)'),'{:23}'.format('#Rtng(Tot,R,S)'),\
                        '{:23}'.format('PRU(min/max/avg/std)'), '{:22}'.format('PSU(min/max/avg/std)'), '{:20}'.format('PRI(min/max/avg/std)'), '{:20}'.format('PSI(min/max/avg/std)')]
        self.stat_list.append(stats_header)
        stats = Stats()
        for fold in range(folds_num):
            d2 = d1 + relativedelta(months=self.split_duration)

            # Training ratings:
            f1 = fr[fr['date'] < d1]

            # Test ratings:
            if self.out_of_matrix:
                f2 = fr[(fr['date'] >= d1) & (fr['date'] < d2)
                        & fr['user'].isin(f1['user'])]
            else:
                f2 = fr[(fr['date'] >= d1) & (fr['date'] < d2)
                        & fr['user'].isin(f1['user']) &
                        (fr['paper'].isin(f1['paper']))]
            print("{}->{}, Tr:[Rs: {:6}, Us: {:5}, Ps: {:6}], Te:[Rs: {:5}, Us: {:5}, Ps: {:6}], Ratio: {:04.2f}%"\
                  .format(d1, d2, len(f1), f1.user.nunique(), f1.paper.nunique(), len(f2), f2.user.nunique(), f2.paper.nunique(), len(f2) / len(f1) * 100))

            # Generate data for the folds:
            train_l_users, train_l_users_age, train_l_items, test_l_users, test_l_items, useridx_user_id_map_list, paperidx_paper_id_map_list, n_users, n_papers = self.generate_fold(
                d1, f1, f2)
            stats.add_fold_statistics(fold + 1, train_l_users, test_l_users,
                                      train_l_items, test_l_items)
            """
            tru = [len(i) for i in train_l_users]
            tsu = [len(i) for i in test_l_users]
            tri = [len(i) for i in train_l_items]
            tsi = [len(i) for i in test_l_items]
            self.stat_list.append(['{:4}'.format(fold + 1), '{:5d} / {:5d} / {:4d}'.format(n_users, f1.user.nunique(), f2.user.nunique()),
                                   '{:6d} / {:6d} / {:5d}'.format(n_papers, f1.paper.nunique(), f2.paper.nunique()),\
                                   '{:6d} / {:6d} / {:5d}'.format(f1.shape[0]+ f2.shape[0], f1.shape[0], f2.shape[0]), \
                                   '{:1d} / {:4d} / {:4.1f} / {:5.1f}'.format(np.min(tru), np.max(tru), np.mean(tru), np.std(tru)),\
                                   '{:1d} / {:4d} / {:4.1f} / {:4.1f}'.format(np.min(tsu), np.max(tsu), np.mean(tsu), np.std(tsu)),\
                                   '{:1d} / {:3d} / {:4.1f} / {:3.1f}'.format(np.min(tri), np.max(tri), np.mean(tri), np.std(tri)),\
                                   '{:1d} / {:3d} / {:4.1f} / {:3.1f}'.format(np.min(tsi), np.max(tsi), np.mean(tsi), np.std(tsi))])
            """

            # Write to file:
            fold_folder = os.path.join(
                self.base_dir, 'time-based_split_out-of-matrix'
                if self.out_of_matrix else 'time-based_split_in-matrix',
                'fold-{}'.format(fold + 1))
            if not os.path.exists(fold_folder):
                os.makedirs(fold_folder)

            write_ratings(train_l_users,
                          os.path.join(fold_folder, 'train-users.dat'))
            write_ratings(train_l_users_age,
                          os.path.join(fold_folder, 'train-users-ages.dat'))
            write_ratings(test_l_users,
                          os.path.join(fold_folder, 'test-users.dat'))
            write_ratings(train_l_items,
                          os.path.join(fold_folder, 'train-items.dat'))
            write_ratings(test_l_items,
                          os.path.join(fold_folder, 'test-items.dat'))

            print("Generating the new mult file...")
            self.generate_docs_terms(self.docs_vocabs,
                                     paperidx_paper_id_map_list, self.terms,
                                     fold_folder)

            # Write users and papers mappings to files:
            useridx_userhash = user_id_userhash_map_list[
                useridx_user_id_map_list]
            write_list_to_file(
                [(j, i) for (i, j) in enumerate(useridx_userhash)],
                os.path.join(fold_folder, 'citeulikeUserHash_userId_map.dat'),
                header=['citeulikeUserHash', 'user_id'])

            paperidx_docid = paper_id_docid_map_list[
                paperidx_paper_id_map_list]
            write_list_to_file([(j, i)
                                for (i, j) in enumerate(paperidx_docid)],
                               os.path.join(fold_folder,
                                            'citeulikeId_docId_map.dat'),
                               header=['citeulikeId', 'paper_id'])

            # For plotting:
            dates.append(d2)
            tr_rs.append(len(f1))
            tr_us.append(f1.user.nunique())
            tr_ps.append(f1.paper.nunique())
            ts_rs.append(len(f2))
            ts_us.append(f2.user.nunique())
            ts_ps.append(f2.paper.nunique())
            rat.append(len(f2) / len(f1) * 100)
            d1 = d2
        self.plot_split_lines(tr_rs, tr_us, tr_ps, ts_rs, ts_us, ts_ps, rat,
                              dates)

        # Write split statistics to file:
        stats.save_stats_to_file(
            os.path.join(
                self.base_dir, 'time-based_split_out-of-matrix'
                if self.out_of_matrix else 'time-based_split_in-matrix',
                'stats.txt'))
コード例 #22
0
def main():
    # TODO implement command-line args

    # initialize database
    Database.initialize()
    q = Database.getQTable()

    cards = loadCardDefinitions()
    characters = loadCharacterDefinitions()

    # initialize card definitions for querying
    CardDefinitions.setDefinitions(cards["main"], cards["treasures"],
                                   cards["answers"])

    # how many games to play per run
    num_games = run_constants["num_games"]
    # every nth game will be verbose
    verbose_mod = run_constants["verbose_mod"]

    # game params as defined in game/game.py
    game_params = {
        # "num_agents"
        # "num_humans"
        # "max_turns"
    }

    # agent params as defined in player/agent.py
    agent_params = {
        "learning_rate": agent_constants["learning_rate"]
        # "discount_factor"
        # "endgame_discount_factor"
        # "random_action_rate"
        # "dyna_steps"
    }

    # deck params as defined in game/game.py
    deck_params = {
        "main_cards": CardDefinitions.cards["main"],
        "treasure_cards": CardDefinitions.cards["treasures"],
        "answer_cards": CardDefinitions.cards["answers"]
    }

    # character params as defined in game/game.py
    character_params = {"characters": characters}

    for game_number in range(num_games):
        verbose = game_number % verbose_mod == verbose_mod - 1
        game_params["verbose"] = verbose
        agent_params["verbose"] = verbose

        print("Running game {}".format(game_number + 1))
        game = Game(q, game_params, agent_params, deck_params,
                    character_params)
        game.run()
        Stats.recordStat("games")

        agent_params[
            "learning_rate"] *= 1 - agent_constants["learning_rate_decay"]

    Database.commit()

    # deinitialize database
    Database.destroy()

    Stats.printStats()
    Stats.printQStats(q)
    Stats.graphChosenActionUsage()
    Stats.graphTurnCountPerGame()
コード例 #23
0
ファイル: lstmp2h.py プロジェクト: BinbinBian/LSTM-NLI
    def train(self,
              numEpochs=1,
              batchSize=5,
              learnRateVal=0.1,
              numExamplesToTrain=-1,
              gradMax=3.,
              L2regularization=0.0,
              dropoutRate=0.0,
              sentenceAttention=False,
              wordwiseAttention=False):
        """
        Takes care of training model, including propagation of errors and updating of
        parameters.
        """
        expName = "Epochs_{0}_LRate_{1}_L2Reg_{2}_dropout_{3}_sentAttn_{4}_" \
                       "wordAttn_{5}".format(str(numEpochs), str(learnRateVal),
                                             str(L2regularization), str(dropoutRate),
                                             str(sentenceAttention), str(wordwiseAttention))
        self.configs.update(locals())
        trainPremiseIdxMat, trainHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
            self.trainData, self.trainDataStats)
        trainGoldLabel = convertLabelsToMat(self.trainData)

        valPremiseIdxMat, valHypothesisIdxMat = self.embeddingTable.convertDataToIdxMatrices(
            self.valData, self.valDataStats)
        valGoldLabel = convertLabelsToMat(self.valData)

        # If you want to train on less than full dataset
        if numExamplesToTrain > 0:
            valPremiseIdxMat = valPremiseIdxMat[:,
                                                range(numExamplesToTrain), :]
            valHypothesisIdxMat = valHypothesisIdxMat[:,
                                                      range(numExamplesToTrain
                                                            ), :]
            valGoldLabel = valGoldLabel[range(numExamplesToTrain)]

        #Whether zero-padded on left or right
        pad = "right"

        # Get full premise/hypothesis tensors
        # batchPremiseTensor, batchHypothesisTensor, batchLabels = \
        #             convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
        #                                        self.numTimestepsHypothesis, "right", self.embeddingTable,
        #                                        valGoldLabel, range(len(valGoldLabel)))
        #sharedValPremise = theano.shared(batchPremiseTensor)
        #sharedValHypothesis = theano.shared(batchHypothesisTensor)
        #sharedValLabels = theano.shared(batchLabels)

        inputPremise = T.ftensor3(name="inputPremise")
        inputHypothesis = T.ftensor3(name="inputHypothesis")
        yTarget = T.fmatrix(name="yTarget")
        learnRate = T.scalar(name="learnRate", dtype='float32')


        fGradSharedHypothesis, fGradSharedPremise, fUpdatePremise, \
            fUpdateHypothesis, costFn, _, _ = self.trainFunc(inputPremise,
                                            inputHypothesis, yTarget, learnRate, gradMax,
                                            L2regularization, dropoutRate, sentenceAttention,
                                            wordwiseAttention, batchSize)

        totalExamples = 0
        stats = Stats(self.logger, expName)

        # Training
        self.logger.Log("Model configs: {0}".format(self.configs))
        self.logger.Log(
            "Starting training with {0} epochs, {1} batchSize,"
            " {2} learning rate, {3} L2regularization coefficient, and {4} dropout rate"
            .format(numEpochs, batchSize, learnRateVal, L2regularization,
                    dropoutRate))

        predictFunc = self.predictFunc(inputPremise, inputHypothesis,
                                       dropoutRate)

        for epoch in xrange(numEpochs):
            self.logger.Log("Epoch number: %d" % (epoch))

            if numExamplesToTrain > 0:
                minibatches = getMinibatchesIdx(numExamplesToTrain, batchSize)
            else:
                minibatches = getMinibatchesIdx(len(trainGoldLabel), batchSize)

            numExamples = 0
            for _, minibatch in minibatches:
                self.dropoutMode.set_value(1.0)
                numExamples += len(minibatch)
                totalExamples += len(minibatch)

                self.logger.Log(
                    "Processed {0} examples in current epoch".format(
                        str(numExamples)))

                batchPremiseTensor, batchHypothesisTensor, batchLabels = \
                    convertDataToTrainingBatch(valPremiseIdxMat, self.numTimestepsPremise, valHypothesisIdxMat,
                                               self.numTimestepsHypothesis, pad, self.embeddingTable,
                                               valGoldLabel, minibatch)

                gradHypothesisOut = fGradSharedHypothesis(
                    batchPremiseTensor, batchHypothesisTensor, batchLabels)
                gradPremiseOut = fGradSharedPremise(batchPremiseTensor,
                                                    batchHypothesisTensor,
                                                    batchLabels)
                fUpdatePremise(learnRateVal)
                fUpdateHypothesis(learnRateVal)

                predictLabels = self.predict(batchPremiseTensor,
                                             batchHypothesisTensor,
                                             predictFunc)
                #self.logger.Log("Labels in epoch {0}: {1}".format(epoch, str(predictLabels)))

                cost = costFn(batchPremiseTensor, batchHypothesisTensor,
                              batchLabels)
                stats.recordCost(totalExamples, cost)

                # Note: Big time sink happens here
                if totalExamples % (100) == 0:
                    # TODO: Don't compute accuracy of dev set
                    self.dropoutMode.set_value(0.0)
                    devAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                                       valHypothesisIdxMat,
                                                       valGoldLabel,
                                                       predictFunc)
                    stats.recordAcc(totalExamples, devAccuracy, "dev")

        stats.recordFinalTrainingTime(totalExamples)

        # Save model to disk
        self.logger.Log("Saving model...")
        self.extractParams()
        configString = "batch={0},epoch={1},learnRate={2},dimHidden={3},dimInput={4}".format(
            str(batchSize), str(numEpochs), str(learnRateVal),
            str(self.dimHidden), str(self.dimInput))
        self.saveModel(currDir + "/savedmodels/basicLSTM_" + configString +
                       ".npz")
        self.logger.Log("Model saved!")

        # Set dropout to 0. again for testing
        self.dropoutMode.set_value(0.0)

        #Train Accuracy
        # trainAccuracy = self.computeAccuracy(trainPremiseIdxMat,
        #                              trainHypothesisIdxMat, trainGoldLabel, predictFunc)
        # self.logger.Log("Final training accuracy: {0}".format(trainAccuracy))

        # Val Accuracy
        valAccuracy = self.computeAccuracy(valPremiseIdxMat,
                                           valHypothesisIdxMat, valGoldLabel,
                                           predictFunc)
        # TODO: change -1 for training acc to actual value when I enable train computation
        stats.recordFinalStats(totalExamples, -1, valAccuracy)
コード例 #24
0
   def main(self):
      self.insertPeople(10000)

      Stats.output()
      Stats.dump(self.getDumpFileName())
コード例 #25
0
    def runAggregates(self):
        Stats.execute(self.db.getAgeAggregate, [])

        Stats.execute(self.db.getFemaleAggregate, [])
コード例 #26
0
    def cf_split(self, folds_num=5):
        """
        Splits the rating matrix following the in-matrix method defined in CTR, the result after invoking this method is:
        two files for each fold (cf-train-fold_id-users.dat and cf-train-fold_id-users.dat), both files have the same following format:
        line i has delimiter-separated list of item ids rated by user i
        :param folds_num: the number of folds, default 5
        :return: None
        """
        items_mat = self.items_mat_from_users_ratings(self.users_ratings)
        train = [[[] for _ in range(self.num_items)] for _ in range(folds_num)]
        test = [[[] for _ in range(self.num_items)] for _ in range(folds_num)]
        validation = [[[] for _ in range(self.num_items)] for _ in range(folds_num)]
        print("Number of items: {}".format(self.num_items))
        folds_list = list(range(folds_num))
        print("Splitting items ratings, progress:")

        # 1- Split items ratings into the folds. This guarantees that all items appear at least once in the test set.
        # If generating validation set is required:
        if self.generate_validation:
            for item in range(self.num_items):
                # Reporting progress:
                if item % 5000 == 0:
                    print("doc_{}".format(item))

                user_ids = np.array(items_mat[item])
                n = len(user_ids)

                # If the number of ratings associated to this item are greater than the number of folds then, this item' ratings can participate in both the training and in the test sets.
                if n >= folds_num:
                    idx = list(range(n))
                    user_ids_folds = random_divide(idx, folds_num)
                    for test_fold in folds_list:
                        # Add users of the current fold as test
                        test_idx = user_ids_folds[test_fold]

                        # Add users of the next fold as validation
                        validation_fold = (test_fold + 1) % folds_num
                        validation_idx = user_ids_folds[validation_fold]

                        # Add the rest as training:
                        train_idx = []
                        for i in folds_list:
                            if i != test_fold and i != validation_fold:
                                train_idx.extend(user_ids_folds[i])

                        train[test_fold][item].extend(user_ids[train_idx].tolist())
                        test[test_fold][item].extend(user_ids[test_idx].tolist())
                        validation[test_fold][item].extend(user_ids[validation_idx].tolist())
                # If the number of ratings associated to this item are less than the number of folds then, this item's ratings can appear in the training set only.
                else:
                    for fold in folds_list:
                        train[fold][item].extend(user_ids.tolist())
                        test[fold][item].extend([])
                        validation[fold][item].extend([])

        # If generating validation set is not required, generate Test and Training sets only:
        else:
            for item in range(self.num_items):
                if item % 5000 == 0:
                    print("doc_{}".format(item))
                user_ids = np.array(items_mat[item])
                n = len(user_ids)

                if n >= folds_num:
                    idx = list(range(n))
                    user_ids_folds = random_divide(idx, folds_num)
                    for test_fold in folds_list:
                        # Add users of the current fold as test
                        test_idx = user_ids_folds[test_fold]

                        # Add the rest as training:
                        train_idx = [id for id in idx if id not in test_idx]
                        train[test_fold][item].extend(user_ids[train_idx].tolist())
                        test[test_fold][item].extend(user_ids[test_idx].tolist())
                else:
                    for fold in folds_list:
                        train[fold][item].extend(user_ids.tolist())
                        test[fold][item].extend([])

        # 2- Generate the user ratings from the splits generated on step 1.
        stats = Stats(self.generate_validation)
        for fold in folds_list:
            items_train = train[fold]
            users_train = self.users_mat_from_items(items_train)

            for u_id, u in enumerate(users_train):
                if len(u) == 0:
                    print("User {} contains 0 training items, split again!".format(u_id))
                    raise Exception("Split_Error!")
            write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter)
            write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "train-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter)

            items_test = test[fold]
            users_test = self.users_mat_from_items(items_test)           
            write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter)
            write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "test-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter)

            if self.generate_validation:
                items_validation = validation[fold]
                users_validation = self.users_mat_from_items(items_validation)
                # Storing the fold validation items for all users
                write_ratings(users_validation, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "validation-fold_{}-users.dat".format(fold + 1)), delimiter=self.delimiter)
                write_ratings(items_validation, filename=os.path.join(self.out_folder, "fold-{}".format(fold + 1), "validation-fold_{}-items.dat".format(fold + 1)), delimiter=self.delimiter)

            # Calculate statistics:
            if self.generate_validation:
                stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation)
            else:
                stats.add_fold_statistics(fold + 1, users_train, users_test, items_train, items_test)
            #calculate_split_stats(users_train, users_test, items_train, items_test, fold)

        # Write split statistics:
        stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))
コード例 #27
0
ファイル: ALAuto.py プロジェクト: wiseasswolfofyoitsu/ALAuto
class ALAuto(object):
    modules = {
        'updates': None,
        'combat': None,
        'commissions': None,
        'enhancement': None,
        'missions': None,
        'retirement': None,
        'headquarters': None,
        'event': None
    }

    def __init__(self, config):
        """Initializes the primary azurlane-auto instance with the passed in
        Config instance; creates the Stats instance and resets scheduled sleep
        timers.

        Args:
            config (Config): azurlane-auto Config instance
        """
        self.config = config
        self.oil_limit = 0
        self.stats = Stats(config)
        if self.config.updates['enabled']:
            self.modules['updates'] = UpdateUtil(self.config)
        if self.config.combat['enabled']:
            self.modules['combat'] = CombatModule(self.config, self.stats)
            self.oil_limit = self.config.combat['oil_limit']
        if self.config.commissions['enabled']:
            self.modules['commissions'] = CommissionModule(self.config, self.stats)
        if self.config.enhancement['enabled']:
            self.modules['enhancement'] = EnhancementModule(self.config, self.stats)
        if self.config.missions['enabled']:
            self.modules['missions'] = MissionModule(self.config, self.stats)
        if self.config.retirement['enabled']:
            self.modules['retirement'] = RetirementModule(self.config, self.stats)
        if self.config.dorm['enabled'] or self.config.academy['enabled']:
            self.modules['headquarters'] = HeadquartersModule(self.config, self.stats)
        if self.config.events['enabled']:
            self.modules['event'] = EventModule(self.config, self.stats)
        self.print_stats_check = True
        self.next_combat = datetime.now()

    def run_update_check(self):
        if self.modules['updates']:
            if self.modules['updates'].checkUpdate():
                Logger.log_warning("A new release is available, please check the github.")

    def should_sortie(self):
        """Method to check wether bot should combat or not.
        """
        return (self.modules['combat'] or self.modules['event']) \
            and script.next_combat != 0 \
            and script.next_combat < datetime.now() \
            and Utils.check_oil(self.oil_limit)

    def run_sortie_cycle(self):
        """Method to run all cycles related to combat.
        """
        self.run_event_cycle()
        self.run_combat_cycle()
        self.run_enhancement_cycle()
        self.run_retirement_cycle()

    def run_combat_cycle(self):
        """Method to run the combat cycle.
        """
        if self.modules['combat']:
            result = self.modules['combat'].combat_logic_wrapper()

            if result == 1:
                # if boss is defeated
                Logger.log_msg("Boss successfully defeated, going back to menu.")
                self.print_stats_check = True
            if result == 2:
                # if morale is too low
                Logger.log_warning("Ships morale is too low, entering standby mode for an hour.")
                self.next_combat = datetime.now() + timedelta(hours=1)
                self.print_stats_check = False
            if result == 3:
                # if dock is full
                Logger.log_warning("Dock is full, need to retire.")

                if self.modules['retirement']:
                    self.modules['retirement'].retirement_logic_wrapper(True)
                else:
                    Logger.log_error("Retirement isn't enabled, exiting.")
                    sys.exit()
            if result == 4:
                Logger.log_warning("Failed to defeat enemy.")
                self.print_stats_check = False
        else:
            self.next_combat = 0

    def run_commission_cycle(self):
        """Method to run the expedition cycle.
        """
        if self.modules['commissions']:
            self.modules['commissions'].commission_logic_wrapper()

    def run_enhancement_cycle(self):
        """Method to run the enhancement cycle.
        """
        if self.modules['enhancement']:
            self.modules['enhancement'].enhancement_logic_wrapper()

    def run_mission_cycle(self):
        """Method to run the mission cycle
        """
        if self.modules['missions']:
            self.modules['missions'].mission_logic_wrapper()

    def run_retirement_cycle(self):
        """Method to run the retirement cycle
        """
        if self.modules['retirement']:
            self.modules['retirement'].retirement_logic_wrapper()

    def run_hq_cycle(self):
        """Method to run the headquarters cycle.
        """
        if self.modules['headquarters']:
            self.modules['headquarters'].hq_logic_wrapper()

    def run_event_cycle(self):
        """Method to run the event cycle
        """
        if self.modules['event']:
            self.modules['event'].event_logic_wrapper()

    def print_cycle_stats(self):
        """Method to print the cycle stats"
        """
        if self.print_stats_check:
            self.stats.print_stats(Utils.check_oil(self.oil_limit))
        self.print_stats_check = False
コード例 #28
0
def main(exp_name, embed_data, train_data, train_data_stats, val_data,
         val_data_stats, test_data, test_data_stats, log_path, batch_size,
         num_epochs, unroll_steps, learn_rate, num_dense, dense_dim, penalty,
         reg_coeff):
    """
    Main run function for training model.
    :param exp_name:
    :param embed_data:
    :param train_data:
    :param train_data_stats:
    :param val_data:
    :param val_data_stats:
    :param test_data:
    :param test_data_stats:
    :param log_path:
    :param batch_size:
    :param num_epochs:
    :param unroll_steps:
    :param learn_rate:
    :param num_dense: Number of dense fully connected layers to add after concatenation layer
    :param dense_dim: Dimension of dense FC layers -- note this only applies if num_dense > 1
    :param penalty: Penalty to use for regularization
    :param reg_weight: Regularization coeff to use for each layer of network; may
                       want to support different coefficient for different layers
    :return:
    """
    # Set random seed for deterministic results
    np.random.seed(0)
    num_ex_to_train = 30

    # Load embedding table
    table = EmbeddingTable(embed_data)
    vocab_size = table.sizeVocab
    dim_embeddings = table.dimEmbeddings
    embeddings_mat = table.embeddings

    train_prem, train_hyp = generate_data(train_data,
                                          train_data_stats,
                                          "left",
                                          "right",
                                          table,
                                          seq_len=unroll_steps)
    val_prem, val_hyp = generate_data(val_data,
                                      val_data_stats,
                                      "left",
                                      "right",
                                      table,
                                      seq_len=unroll_steps)
    train_labels = convertLabelsToMat(train_data)
    val_labels = convertLabelsToMat(val_data)

    # To test for overfitting capabilities of model
    if num_ex_to_train > 0:
        val_prem = val_prem[0:num_ex_to_train]
        val_hyp = val_hyp[0:num_ex_to_train]
        val_labels = val_labels[0:num_ex_to_train]

    # Theano expressions for premise/hypothesis inputs to network
    x_p = T.imatrix()
    x_h = T.imatrix()
    target_values = T.fmatrix(name="target_output")

    # Embedding layer for premise
    l_in_prem = InputLayer((batch_size, unroll_steps))
    l_embed_prem = EmbeddingLayer(l_in_prem,
                                  input_size=vocab_size,
                                  output_size=dim_embeddings,
                                  W=embeddings_mat)

    # Embedding layer for hypothesis
    l_in_hyp = InputLayer((batch_size, unroll_steps))
    l_embed_hyp = EmbeddingLayer(l_in_hyp,
                                 input_size=vocab_size,
                                 output_size=dim_embeddings,
                                 W=embeddings_mat)

    # Ensure embedding matrix parameters are not trainable
    l_embed_hyp.params[l_embed_hyp.W].remove('trainable')
    l_embed_prem.params[l_embed_prem.W].remove('trainable')

    l_embed_hyp_sum = SumEmbeddingLayer(l_embed_hyp)
    l_embed_prem_sum = SumEmbeddingLayer(l_embed_prem)

    # Concatenate sentence embeddings for premise and hypothesis
    l_concat = ConcatLayer([l_embed_hyp_sum, l_embed_prem_sum])

    l_in = l_concat
    l_output = l_concat
    # Add 'num_dense' dense layers with tanh
    # top layer is softmax
    if num_dense > 1:
        for n in range(num_dense):
            if n == num_dense - 1:
                l_output = DenseLayer(
                    l_in,
                    num_units=NUM_DENSE_UNITS,
                    nonlinearity=lasagne.nonlinearities.softmax)
            else:
                l_in = DenseLayer(l_in,
                                  num_units=dense_dim,
                                  nonlinearity=lasagne.nonlinearities.tanh)
    else:
        l_output = DenseLayer(l_in,
                              num_units=NUM_DENSE_UNITS,
                              nonlinearity=lasagne.nonlinearities.softmax)

    network_output = get_output(l_output, {
        l_in_prem: x_p,
        l_in_hyp: x_h
    })  # Will have shape (batch_size, 3)
    f_dense_output = theano.function([x_p, x_h],
                                     network_output,
                                     on_unused_input='warn')

    # Compute cost
    if penalty == "l2":
        p_metric = l2
    elif penalty == "l1":
        p_metric = l1

    layers = lasagne.layers.get_all_layers(l_output)
    layer_dict = {l: reg_coeff for l in layers}
    reg_cost = reg_coeff * regularize_layer_params_weighted(
        layer_dict, p_metric)
    cost = T.mean(
        T.nnet.categorical_crossentropy(network_output,
                                        target_values).mean()) + reg_cost
    compute_cost = theano.function([x_p, x_h, target_values], cost)

    # Compute accuracy
    accuracy = T.mean(T.eq(T.argmax(network_output, axis=-1),
                           T.argmax(target_values, axis=-1)),
                      dtype=theano.config.floatX)
    compute_accuracy = theano.function([x_p, x_h, target_values], accuracy)

    label_output = T.argmax(network_output, axis=-1)
    predict = theano.function([x_p, x_h], label_output)

    # Define update/train functions
    all_params = lasagne.layers.get_all_params(l_output, trainable=True)
    updates = lasagne.updates.rmsprop(cost, all_params, learn_rate)
    train = theano.function([x_p, x_h, target_values], cost, updates=updates)

    # TODO: Augment embedding layer to allow for masking inputs

    stats = Stats(exp_name)
    acc_num = 10

    #minibatches = getMinibatchesIdx(val_prem.shape[0], batch_size)
    minibatches = getMinibatchesIdx(train_prem.shape[0], batch_size)
    print("Training ...")
    try:
        total_num_ex = 0
        for epoch in xrange(num_epochs):
            for _, minibatch in minibatches:
                total_num_ex += len(minibatch)
                stats.log("Processed {0} total examples in epoch {1}".format(
                    str(total_num_ex), str(epoch)))

                #prem_batch = val_prem[minibatch]
                #hyp_batch = val_hyp[minibatch]
                #labels_batch = val_labels[minibatch]

                prem_batch = train_prem[minibatch]
                hyp_batch = train_hyp[minibatch]
                labels_batch = train_labels[minibatch]

                train(prem_batch, hyp_batch, labels_batch)
                cost_val = compute_cost(prem_batch, hyp_batch, labels_batch)

                stats.recordCost(total_num_ex, cost_val)
                # Periodically compute and log train/dev accuracy
                if total_num_ex % (acc_num * batch_size) == 0:
                    train_acc = compute_accuracy(train_prem, train_hyp,
                                                 train_labels)
                    dev_acc = compute_accuracy(val_prem, val_hyp, val_labels)
                    stats.recordAcc(total_num_ex, train_acc, dataset="train")
                    stats.recordAcc(total_num_ex, dev_acc, dataset="dev")

    except KeyboardInterrupt:
        pass
コード例 #29
0
ファイル: ALAuto.py プロジェクト: Heavedistant/ALAuto
class ALAuto(object):
    modules = {
        'combat': None,
        'commissions': None,
        'enhancement': None,
        'missions': None,
        'retirement': None,
        'event': None
    }

    def __init__(self, config):
        """Initializes the primary azurlane-auto instance with the passed in
        Config instance; creates the Stats instance and resets scheduled sleep
        timers.

        Args:
            config (Config): azurlane-auto Config instance
        """
        self.config = config
        self.stats = Stats(config)
        if self.config.combat['enabled']:
            self.modules['combat'] = CombatModule(self.config, self.stats)
        if self.config.commissions['enabled']:
            self.modules['commissions'] = CommissionModule(
                self.config, self.stats)
        if self.config.enhancement['enabled']:
            self.modules['enhancement'] = EnhancementModule(
                self.config, self.stats)
        if self.config.missions['enabled']:
            self.modules['missions'] = MissionModule(self.config, self.stats)
        if self.config.retirement['enabled']:
            self.modules['retirement'] = RetirementModule(
                self.config, self.stats)
        if self.config.events['enabled']:
            self.modules['event'] = EventModule(self.config, self.stats)
        self.print_stats_check = True
        self.next_combat = datetime.now()

    def run_combat_cycle(self):
        """Method to run the combat cycle.
        """
        if self.modules['combat']:
            result = self.modules['combat'].combat_logic_wrapper()

            if result == 1:
                # if boss is defeated
                self.print_stats_check = True
            if result == 2:
                # if morale is too low
                self.next_combat = datetime.now() + timedelta(hours=1)
                self.print_stats_check = False
            if result == 3:
                # if dock is full
                if self.modules['retirement']:
                    self.modules['retirement'].retirement_logic_wrapper(True)
                else:
                    Logger.log_error("Retirement isn't enabled, exiting.")
                    sys.exit()
        else:
            self.next_combat = 0

    def run_commission_cycle(self):
        """Method to run the expedition cycle.
        """
        if self.modules['commissions']:
            self.modules['commissions'].commission_logic_wrapper()

    def run_enhancement_cycle(self):
        """Method to run the enhancement cycle.
        """
        if self.modules['enhancement']:
            self.modules['enhancement'].enhancement_logic_wrapper()

    def run_mission_cycle(self):
        """Method to run the mission cycle
        """
        if self.modules['missions']:
            self.modules['missions'].mission_logic_wrapper()

    def run_retirement_cycle(self):
        """Method to run the retirement cycle
        """
        if self.modules['retirement']:
            self.modules['retirement'].retirement_logic_wrapper()

    def run_event_cycle(self):
        """Method to run the event cycle
        """
        if self.modules['event']:
            self.modules['event'].event_logic_wrapper()

    def print_cycle_stats(self):
        """Method to print the cycle stats"
        """
        if self.print_stats_check:
            self.stats.print_stats()
        self.print_stats_check = False
コード例 #30
0
    def out_of_matrix_split(self, folds_num=5):
        """
        Splits the rating matrix following the out-of-matrix method defined in CTR, the result after invoking this method is:
        two files for each fold (out_of-train-fold_id-users.dat and out_of-train-fold_id-users.dat), both files have the same following format:
        line i has delimiter-separated list of item ids rated by user i
        :param folds_num: the number of folds, default = 5
        :return: None
        """
        # 1- Split items ids in folds:
        items_ids = list(range(self.num_items))
        item_ids_folds = random_divide(items_ids, folds_num)

        # 2- Generate the training and test sets for each fold:
        stats = Stats(self.generate_validation)
        for test_fold in range(folds_num):

            # Get the test, validation and training items:
            items_test_ids = set(item_ids_folds[test_fold])
            items_validation_ids = set()
            if self.generate_validation:
                # Add items of the next fold as validation
                validation_fold = (test_fold + 1) % folds_num
                items_validation_ids = set(item_ids_folds[validation_fold])
            # Add the rest as training:
            items_train_ids = set(items_ids) - items_test_ids - items_validation_ids

            # Generate users ratings for training, test and validation:
            users_train = []
            users_test = []
            users_validation = []

            for user_ratings in self.users_ratings:
                tr_ratings = list(items_train_ids.intersection(user_ratings))
                if len(tr_ratings) == 0:
                    print("some users contains 0 training items, split again again!")
                    raise Exception("Split_Error!")
                tes_ratings = list(items_test_ids.intersection(user_ratings))
                val_ratings = list(items_validation_ids.intersection(user_ratings))

                tr_ratings.sort()
                tes_ratings.sort()
                val_ratings.sort()

                users_train.append(tr_ratings)
                users_test.append(tes_ratings)
                users_validation.append(val_ratings)

            write_ratings(users_train, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "train-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter)
            write_ratings(users_test, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "test-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter)
            write_ratings(users_validation, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "validation-fold_{}-users.dat".format(test_fold + 1)), delimiter=self.delimiter)

            items_train = self.items_mat_from_users_ratings(users_train)
            write_ratings(items_train, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "train-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter)

            items_test = self.items_mat_from_users_ratings(users_test)
            write_ratings(items_test, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "test-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter)

            items_validation = self.items_mat_from_users_ratings(users_validation)
            write_ratings(items_validation, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "validation-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter)

            # Saving left out items ids:
            items_test_lst = list(items_test)
            items_test_lst.sort()
            write_ratings(items_test_lst, filename=os.path.join(self.out_folder, "fold-{}".format(test_fold + 1), "heldout-set-fold_{}-items.dat".format(test_fold + 1)), delimiter=self.delimiter, print_line_length=False)

            # Calculate statistics:
            if self.generate_validation:
                stats.add_fold_statistics(test_fold + 1, users_train, users_test, items_train, items_test, users_validation, items_validation)
            else:
                stats.add_fold_statistics(test_fold + 1, users_train, users_test, items_train, items_test)
            # calculate_split_stats(users_train, users_test, items_train, items_test, fold)

        # Write split statistics:
        stats.save_stats_to_file(os.path.join(self.out_folder, 'stats.txt'))