def parse_train_data(self, filepath_or_buffer): self._list_of_instances, self._list_of_labels = self._load_train_data( filepath_or_buffer) checks.check_equality(len(self._list_of_instances), len(self._list_of_labels), message="Instances of read data are not equal " "to their.") checks.check_equality(self.to_final_label( self.to_interim_label([24, 42, 42])), [24, 42, 42], message="Processing data methods are " "not mutually inverse.") self._list_of_samples = list( map(self._to_sample, self._list_of_instances)) if self._debug: print(len(self._list_of_instances)) print(self._list_of_instances[:3]) print(self._list_of_labels[:3]) print(self._list_of_samples[:3]) self._train_samples_num = int(self._proportion * len(self._list_of_labels)) self._chknums = self._chknums[self._train_samples_num:] self._answers_for_train = [ sorted(x) for x in self._list_of_labels[self._train_samples_num:] ]
def train(self, train_samples, train_labels, **kwargs): checks.check_equality(len(train_samples), len(train_labels), message="Samples and labels have different " "sizes") self.most_popular_goods = kwargs["most_popular_goods"] self.most_popular_good_ids = kwargs["most_popular_good_ids"] checks.check_value(len(self.most_popular_good_ids), lower=self.num_popular_ids, strict_less=False, var_name="most_popular_good_ids") self.max_good_id = kwargs["max_good_id"] # Get person ids from train samples, samples format: # [[person_id, month, day], [person_id, month, day], ...]. persons_ids = [person_data[0] for person_data in train_samples] for persons_id, label in zip(persons_ids, train_labels): if self.orders.get(persons_id) is None: self.orders[persons_id] = np.array(label) else: self.orders[persons_id] += np.array(label) self.process_orders()
def train(self, train_samples, train_labels, **kwargs): checks.check_equality(len(train_samples), len(train_labels), message="Samples and labels have different " "sizes") self.most_popular_goods = kwargs["most_popular_goods"] # Get person ids from train samples, samples format: # [[person_id, month, day], [person_id, month, day], ...]. persons_ids = [person_data[0] for person_data in train_samples] self.latest_orders = dict(zip(persons_ids, train_labels))
def train(self, train_samples, train_labels, **kwargs): """ Train current model. :param train_samples: array-like, sparse matrix. Training data. :param train_labels: array-like, sparse matrix. Target values. Will be cast to train_samples’s dtype if necessary. :param kwargs: dict, optional(default={}). Additional keyword arguments. """ checks.check_equality(len(train_samples), len(train_labels), message="Samples and labels have different " "sizes") self.model.fit(train_samples, train_labels, **kwargs)
def fit(self, train_samples, train_labels, **kwargs): checks.check_equality(len(train_samples), len(train_labels), message="Samples and labels have different " "sizes") persons_ids = [person_data[0] for person_data in train_samples] for persons_id, label in zip(persons_ids, train_labels): if self.orders.get(persons_id) is None: self.orders[persons_id] = np.array(label) else: self.orders[persons_id] += np.array(label) self.clustering_table = pd.DataFrame(self.model.fit_predict( pd.DataFrame.from_dict(self.orders, orient="index")), columns=[self.COL_NAME]) cluster_id = self.clustering_table[self.COL_NAME]\ .value_counts().index[0] larg_clust_center = self.model.cluster_centers_[cluster_id] self.largest_cluster_goods = (larg_clust_center >= self.CLUSTER_BORDER).astype(np.int)
def train(self, train_samples, train_labels, **kwargs): checks.check_equality(len(train_samples), len(train_labels), message="Samples and labels have different " "sizes") self.most_popular_goods = kwargs["most_popular_goods"]