def _collate_fn(batch, token2idx, label_type, LFR_m=1, LFR_n=1): """ Args: batch: list, len(batch) = 1. See AudioDataset.__getitem__() Returns: xs_pad: N x Ti x D, torch.Tensor ilens : N, torch.Tentor ys_pad: N x To, torch.Tensor """ # batch should be located in list assert len(batch) == 1 batch = load_inputs_and_targets(batch[0], token2idx, label_type, LFR_m=LFR_m, LFR_n=LFR_n) xs, ys = batch # TODO: perform subsamping # get batch of lengths of input sequences ilens = np.array([x.shape[0] for x in xs]) # perform padding and convert to tensor xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0) ilens = torch.from_numpy(ilens) ys_pad = pad_list([torch.from_numpy(y).long() for y in ys], 0) return xs_pad, ilens, ys_pad
def f_xy_pad(batch): xs_pad = pad_list([torch.tensor(sample[0]).long() for sample in batch[0]], 0) ys_pad = pad_list([torch.tensor(sample[1]).long() for sample in batch[0]], 0) # xs_pad = pad_to_batch([sample for sample in batch[0][0]], 0) # ys_pad = pad_to_batch([sample for sample in batch[0][1]], 0) return xs_pad, ys_pad
def _collate_fn(batch, LFR_m=1, LFR_n=1, model_choose='baseline3'): """ Args: batch: list, len(batch) = 1. See AudioDataset.__getitem__() Returns: xs_pad: N x Ti x D, torch.Tensor ilens : N, torch.Tentor ys_pad: N x To, torch.Tensor """ # batch should be located in list assert len(batch) == 1 batch = load_inputs_and_targets(batch[0], LFR_m=LFR_m, LFR_n=LFR_n) xs, dialect_labels = batch import math if model_choose in ['baseline2', 'baseline4']: ilens = np.array([int(math.ceil(x.shape[0] / 4)) for x in xs]) else: ilens = np.array([int(math.ceil(x.shape[0])) for x in xs]) # perform padding and convert to tensor xs_pad = pad_list([torch.from_numpy(x).float() for x in xs], 0) ilens = torch.from_numpy(ilens) dialect_labels = torch.from_numpy(dialect_labels) return xs_pad, ilens, dialect_labels
def preprocess(self, padded_input): """Generate decoder input and output label from padded_input Add <sos> to decoder input, and add <eos> to decoder output label """ ys = [y[y != IGNORE_ID] for y in padded_input] # parse padded ys # prepare input and output word sequences with sos/eos IDs eos = ys[0].new([self.eos_id]) sos = ys[0].new([self.sos_id]) ys_in = [torch.cat([sos, y], dim=0) for y in ys] ys_out = [torch.cat([y, eos], dim=0) for y in ys] # padding for ys with -1 # pys: utt x olen ys_in_pad = pad_list(ys_in, self.eos_id) ys_out_pad = pad_list(ys_out, IGNORE_ID) assert ys_in_pad.size() == ys_out_pad.size() return ys_in_pad, ys_out_pad
def extract_content_gsc(file_name): tree = html.fromstring(parse_file(file_name)) title = tree.xpath('//h1[@class="product_title entry-title"]/text()')[0] price_from = tree.xpath( '//div[@class="summary entry-summary"]/p/span[1]/span/text() | //div[@class="summary entry-summary"]/p/span[1]/text()') price_from = "".join(price_from) price_to = tree.xpath( '//div[@class="summary entry-summary"]/p/span[2]/span/text() | //div[@class="summary entry-summary"]/p/span[2]/text()') price_to = "".join(price_to) description = tree.xpath('//div[@class="woocommerce-product-details__short-description"]/p/text()') description = ("".join(description)).replace("\n", "") category = tree.xpath('//span[@class="posted_in"]/a/text()')[0] tags = tree.xpath('//span[@class="tagged_as"]/a/text()') attributes = [attr.strip() for attr in tree.xpath('//table[@class="table table-hover variations"]/thead/tr/th[not(@*)]/text()')] var_attr = {} for attr in attributes: var_attr[attr] = tree.xpath( f'//table[@class="table table-hover variations"]/tbody/tr/td[@data-title="{attr}"]/text()') separate_list_prices = tree.xpath( '//span[@class="price"]//span[1]/span/text() | //span[@class="price"]//span[1]/text()') c = 0 separate_list_price = [] while c < len(separate_list_prices) - 1: separate_list_price.append(f"{separate_list_prices[c]}{separate_list_prices[c + 1]}") c += 2 separate_discount_prices = pad_list(tree.xpath('//ins/span/span/text() | //ins/span/text()'), var_attr["Model"], "currency_xpath") c = 0 separate_discount_price = [] while c < len(separate_discount_prices) - 1: separate_discount_price.append(f"{separate_discount_prices[c]}{separate_discount_prices[c + 1]}") c += 2 variations = zip(separate_list_price, separate_discount_price) results = generate_json_gsc(title, price_from, price_to, description, category, tags, var_attr, variations) print(json.dumps(results, indent=4))
def f_x_pad(batch): return pad_list([torch.tensor(sample).long() for sample in batch[0]], 0)