Example #1
0
         True,
         args.raw_data_file,
         args.processed_data_file,
         args.inference_only,
     )
     ln_bot[0] = m_den
 else:
     # input data
     ln_emb = np.fromstring(args.arch_embedding_size, dtype=int, sep="-")
     m_den = ln_bot[0]
     if args.data_generation == "random":
         (nbatches, lX, lS_o, lS_i) = dp.generate_random_input_data(
             args.data_size,
             args.num_batches,
             args.mini_batch_size,
             args.round_targets,
             args.num_indices_per_lookup,
             args.num_indices_per_lookup_fixed,
             m_den,
             ln_emb,
         )
     elif args.data_generation == "synthetic":
         (nbatches, lX, lS_o, lS_i) = dp.generate_synthetic_input_data(
             args.data_size,
             args.num_batches,
             args.mini_batch_size,
             args.round_targets,
             args.num_indices_per_lookup,
             args.num_indices_per_lookup_fixed,
             m_den,
             ln_emb,
             args.data_trace_file,
Example #2
0
    def run(self):

        #initialize dataset dict
        dataset_dict = {}

        # input data
        dataset_dict['ln_bot'] = np.fromstring(self.arch_mlp_bot,
                                               dtype=int,
                                               sep="-")
        dataset_dict['ln_emb'] = np.fromstring(self.arch_embedding_size,
                                               dtype=int,
                                               sep="-")
        dataset_dict['m_den'] = dataset_dict['ln_bot'][0]

        if self.data_generation == "dataset":
            (ndataset_dict['nbatches'], dataset_dict['lX'],
             dataset_dict['lS_o'], dataset_dict['lS_i'],
             dataset_dict['lT'], dataset_dict['nbatches_test'],
             dataset_dict['lX_test'], dataset_dict['lS_o_test'],
             dataset_dict['lS_i_test'], dataset_dict['lT_test'],
             dataset_dict['ln_emb'], dataset_dict['m_den']) = \
            dp.read_dataset(
                            self.data_set,
                            self.mini_batch_size,
                            self.data_randomize,
                            self.num_batches,
                            True,
                            self.raw_data_file,
                            self.processed_data_file,
                            self.inference_only,
                            )
            dataset_dict['ln_bot'][0] = dataset_dict['m_den']

        #If the data generation is random
        elif self.data_generation == "random":
            (dataset_dict['nbatches'], dataset_dict['lX'],
             dataset_dict['lS_o'], dataset_dict['lS_i']) =\
              dp.generate_random_input_data(
                self.data_size,
                self.num_batches,
                self.mini_batch_size,
                self.round_targets,
                self.num_indices_per_lookup,
                self.num_indices_per_lookup_fixed,
                dataset_dict['m_den'],
                dataset_dict['ln_emb'],
            )

        #If the data genreation is synthetic
        elif self.data_generation == "synthetic":
            (dataset_dict['nbatches'], dataset_dict['lX'],
             dataset_dict['lS_o'], dataset_dict['lS_i']) = \
            dp.generate_synthetic_input_data(
                self.data_size,
                self.num_batches,
                self.mini_batch_size,
                self.round_targets,
                self.num_indices_per_lookup,
                self.num_indices_per_lookup_fixed,
                dataset_dict['m_den'],
                dataset_dict['ln_emb'],
                self.data_trace_file,
                self.data_trace_enable_padding,
            )

        #Generate an error if the generation method is not supported
        else:
            raise ValueError("ERROR: --data-generation=" +
                             self.data_generation + " is not supported")

        print("\n\n\n\n\n\n\n\n\n")
        #Temp variables for linting
        num_fea = dataset_dict[
            'ln_emb'].size + 1  # num sparse + num dense features
        m_den_out = dataset_dict['ln_bot'][dataset_dict['ln_bot'].size - 1]

        if self.arch_interaction_op == "dot":
            # approach 1: all
            # num_int = num_fea * num_fea + m_den_out
            # approach 2: unique
            if self.arch_interaction_itself:
                num_int = (num_fea * (num_fea + 1)) // 2 + m_den_out
            else:
                num_int = (num_fea * (num_fea - 1)) // 2 + m_den_out
        elif self.arch_interaction_op == "cat":
            num_int = num_fea * m_den_out
        else:
            sys.exit("ERROR: --arch-interaction-op=" +
                     self.arch_interaction_op + " is not supported")
        arch_mlp_top_adjusted = str(num_int) + "-" + self.arch_mlp_top
        dataset_dict['ln_top'] = np.fromstring(arch_mlp_top_adjusted,
                                               dtype=int,
                                               sep="-")
        # sanity check: feature sizes and mlp dimensions must match
        if dataset_dict['m_den'] != dataset_dict['ln_bot'][0]:
            sys.exit("ERROR: arch-dense-feature-size " +
                     str(dataset_dict['m_den']) +
                     " does not match first dim of bottom mlp " +
                     str(dataset_dict['ln_bot'][0]))
        if self.arch_sparse_feature_size != m_den_out:
            sys.exit("ERROR: arch-sparse-feature-size " +
                     str(self.arch_sparse_feature_size) +
                     " does not match last dim of bottom mlp " +
                     str(m_den_out))
        if num_int != dataset_dict['ln_top'][0]:
            sys.exit("ERROR: # of feature interactions " + str(num_int) +
                     " does not match first dimension of top mlp " +
                     str(dataset_dict['ln_top'][0]))

        # test prints (model arch)
        if self.debug_mode:
            print("model arch:")
            print("mlp top arch " + str(dataset_dict['ln_top'].size - 1) +
                  " layers, with input to output dimensions:")
            print(dataset_dict['ln_top'])
            print("# of interactions")
            print(num_int)
            print("mlp bot arch " + str(dataset_dict['ln_top'].size - 1) +
                  " layers, with input to output dimensions:")
            print(dataset_dict['ln_bot'])
            print("# of features (sparse and dense)")
            print(num_fea)
            print("dense feature size")
            print(dataset_dict['m_den'])
            print("sparse feature size")
            print(self.arch_sparse_feature_size)
            print("# of embeddings (= # of sparse features) " +
                  str(dataset_dict['ln_emb'].size) + ", with dimensions " +
                  str(self.arch_sparse_feature_size) + "x:")
            print(dataset_dict['ln_emb'])

            print("data (inputs and targets):")
            for j in range(0, dataset_dict['nbatches']):
                print("mini-batch: %d" % j)
                print(dataset_dict['lX'][j].detach().cpu().numpy())
                # transform offsets to lengths when printing
                print([
                    np.diff(S_o.detach().cpu().tolist() +
                            list(dataset_dict['lS_i'][j][i].shape)).tolist()
                    for i, S_o in enumerate(dataset_dict['lS_o'][j])
                ])
                print([
                    S_i.detach().cpu().tolist()
                    for S_i in dataset_dict['lS_i'][j]
                ])
                print(dataset_dict['lT'][j].detach().cpu().numpy())

        print("\n\n\n\n\n\n\n\n\n")

        self.save(dataset_dict)