def test_add_group(method): old_groups = method.group.get_list() method.group.add_new_group(Parameters(name='Подслушано Ростов-на-Дону', number_of_result='2', name_of_button='Подписаться')) new_groups = method.group.get_list() assert len(old_groups) + 1 == len(new_groups)
def get_list(self): driver = self.method.driver driver.find_element_by_xpath('//span[text()="Сообщества"]').click() group_list = [] for element in driver.find_elements_by_xpath( '//a[@class="group_row_title"]'): text = element.text group_id = driver.find_element_by_xpath( '//div[@class="group_list_row clear_fix _gl_row "]') id = group_id.get_attribute('id') group_list.append(Parameters(name=text, id=id)) return group_list
def __init__(self, hyperparameters, model, task, output_size): self.task = task # since we train different networks, we want to be able to distinguish them somehow self.hyperparameters = copy.deepcopy(hyperparameters) self.hyperparameters.output_size = output_size self.parameters = Parameters(self.hyperparameters) self.parameters.embeddings = model.parameters.embeddings self.parameters.hidden_weights = model.parameters.hidden_weights self.parameters.hidden_biases = model.parameters.hidden_biases self.trainer = Trainer() graph.hidden_weights = self.parameters.hidden_weights graph.hidden_biases = self.parameters.hidden_biases graph.output_weights = self.parameters.output_weights graph.output_biases = self.parameters.output_biases
parser.add_argument('--use-coco', default=False, type=bool, metavar='coco', help='if include mscoco dataset (default: False)') args = parser.parse_args() datasets = set() if args.use_quora is True: datasets.add('quora') if args.use_snli is True: datasets.add('snli') if args.use_coco is True: datasets.add('mscoco') print('use mean' , args.use_mean) batch_loader = BatchLoader(datasets=datasets) parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict(t.load('saved_models/trained_paraphraser_' + args.model_name)) # 自己导入模型的结构信息 if args.use_cuda: paraphraser = paraphraser.cuda() result, target, source = sample_with_input_file(batch_loader, paraphraser, args, args.input_file) if args.input_file not in ['snli_test', 'mscoco_test', 'quora_test', 'snips']: args.input_file = 'custom_file' sampled_file_dst = 'logs/sampled_out_{}_{}{}.txt'.format(args.input_file, 'mean_' if args.use_mean else '', args.model_name) target_file_dst = 'logs/target_out_{}_{}{}.txt'.format(args.input_file,
type=bool, help='if sample while training (default: False)') parser.add_argument( '-tpl', '--use_two_path_loss', default=False, type=bool, help='use two path loss while training (default: False)') args = parser.parse_args() if args.use_cuda and not t.cuda.is_available(): print('Found no GPU, args.use_cuda = False ') args.use_cuda = False batch_loader = BatchLoader() parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, args.use_two_path_loss) paraphraser = Paraphraser(parameters) ce_result_valid = [] kld_result_valid = [] ce_result_train = [] kld_result_train = [] ce_cur_train = [] kld_cur_train = [] if args.use_two_path_loss: ce2_cur_train = [] ce2_result_train = [] ce2_result_valid = [] # Create locations to store logs if not os.path.isdir('logs/' + args.model_name):
metavar='ES', help='embeddings size (default: 100)') parser.add_argument('--interm-sampling', type=bool, default=False, metavar='IS', help='if sample while training (default: False)') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_loader = BatchLoader(datapath=args.data_path, use_glove=args.use_glove, glove_path=args.glove_path, embedding_size=args.embedding_size) parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, batch_loader.embedding_size) paraphraser = Paraphraser(parameters, device).to(device) cross_entropy_result_train = [] kld_result_train = [] cross_entropy_result_valid = [] kld_result_valid = [] cross_entropy_cur_train = [] kld_cur_train = [] if args.use_trained: # load the pretrained model paraphraser.load_state_dict(torch.load(args.pretrained_model_name)) pass # define the optimizer
def __init__(self, hyperparameters): self.hyperparameters = hyperparameters self.parameters = Parameters(self.hyperparameters) self.trainer = Trainer() self.graph = Graph(self.hyperparameters, self.parameters)
class Model: def __init__(self, hyperparameters): self.hyperparameters = hyperparameters self.parameters = Parameters(self.hyperparameters) self.trainer = Trainer() self.graph = Graph(self.hyperparameters, self.parameters) def __getstate__(self): return (self.hyperparameters, self.parameters, self.trainer) def __setstate__(self, state): (self.hyperparameters, self.parameters, self.trainer) = state self.graph = Graph(self.hyperparameters, self.parameters) def embed(self, window): seq = [self.parameters.embeddings[word] for word in window] return numpy.dstack([numpy.resize(s, (1, s.size, 1)) for s in seq]) def embeds(self, sequences): return numpy.vstack([self.embed(seq) for seq in sequences]) def corrupt_example(self, e): import copy, random e = copy.deepcopy(e) pos = -self.hyperparameters.window_size // 2 mid = e[pos] while e[pos] == mid: e[pos] = random.randint(0, self.hyperparameters.curriculum_size - 1) pr = 1. / self.hyperparameters.curriculum_size weight = 1. / pr return e, numpy.float32(weight) def corrupt_examples(self, correct_sequences): return zip(*[self.corrupt_example(e) for e in correct_sequences]) def train(self, correct_sequences): noise_sequences, weights = self.corrupt_examples(correct_sequences) for w in weights: assert w == weights[0] learning_rate = self.hyperparameters.learning_rate r = self.graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), numpy.float32(learning_rate * weights[0])) correct_inputs_gradient, noise_inputs_gradient, losses, correct_scores, noise_scores = r to_normalize = set() for example in range(len(correct_sequences)): correct_sequence = correct_sequences[example] noise_sequence = noise_sequences[example] loss, correct_score, noise_score = losses[example], correct_scores[ example], noise_scores[example] import pdb pdb.set_trace() correct_input_gradient = correct_inputs_gradient[example] noise_input_gradient = noise_inputs_gradient[example] # self.trainer.update(numpy.sum(loss), correct_score, noise_score) for w in weights: assert w == weights[0] embedding_learning_rate = self.hyperparameters.embedding_learning_rate * weights[ 0] if numpy.sum(loss) == 0: for di in correct_input_gradient + noise_input_gradient: assert (di == 0).all() else: for (i, di) in zip(correct_sequence, correct_input_gradient.T): self.parameters.embeddings[ i] -= 1.0 * embedding_learning_rate * di to_normalize.add(i) for (i, di) in zip(noise_sequence, noise_input_gradient.T): self.parameters.embeddings[ i] -= 1.0 * embedding_learning_rate * di to_normalize.add(i) self.parameters.normalize(list(to_normalize)) def predict(self, sequence): (score) = self.graph.predict(self.embed(sequence)) return score def verbose_predict(self, sequence): (score, prehidden) = self.graph.verbose_predict(self.embed(sequence)) return score, prehidden def validate(self, sequence): import copy corrupt_sequence = copy.copy(sequence) rank = 1 correct_score = self.predict(sequence) mid = self.hyperparameters.window_size // 2 for i in range(self.hyperparameters.curriculum_size - 1): if i == sequence[mid]: continue corrupt_sequence[mid] = i corrupt_score = self.predict(corrupt_sequence) rank += (correct_score <= corrupt_score) return rank
from model.hero import * from model.map import Map from model.parameters import Parameters from model.state import State from model.abilites import AbilityType from model.teams import Teams import json import random import time game = json.loads(input()) game_map = Map(game) # карта игрового мира game_params = Parameters(game) # параметры игры game_teams = Teams(game) # моя команда while True: try: """ Получение состояния игры """ state = State(input(), game_teams, game_params) my_buildings = state.my_buildings() my_squads = state.my_squads() # сортируем по остаточному пути my_squads.sort(key=lambda c: c.way.left, reverse=False) enemy_buildings = state.enemy_buildings() enemy_squads = state.enemy_squads() neutral_buildings = state.neutral_buildings() forges_buildings = state.forges_buildings()
from model.parameters import Parameters testdata = [ Parameters(name='"Игровой AliExpress"', text='Я кодер'), Parameters(name='"Тестовая группа Pytest"', text='тестовый пост новый') ]
def main(): parser = argparse.ArgumentParser(description='Paraphraser') parser.add_argument('--use-cuda', type=bool, default=False, metavar='CUDA', help='use cuda (default: False)') parser.add_argument('--seq-len', default=30, metavar='SL', help='max length of sequence (default: 30)') parser.add_argument('--ml', type=bool, default=True, metavar='ML', help='sample by maximum likelihood') args = parser.parse_args() # Read data if not os.path.exists('datasets/human_test.csv'): source_file = 'datasets/test.csv' source_data = pd.read_csv(source_file)[['question1', 'question2']] sentence_categories = [[] for _ in range(5)] for i in range(len(source_data)): sent = clean_str(source_data['question1'][i]) sent_len = len(sent.split()) if sent_len < 6: j = 0 elif sent_len < 11: j = 1 elif sent_len < 16: j = 2 elif sent_len < 21: j = 3 else: j = 4 sentence_categories[j].append( [source_data['question1'][i], source_data['question2'][i]]) sample_data = [] for category in sentence_categories: sample_data += random.sample(category, 20) source_data = pd.DataFrame(sample_data, columns=['question1', 'question2']) source_data.to_csv('datasets/human_test.csv') else: source_data = pd.read_csv('datasets/human_test_1.csv')[[ 'question1', 'question2' ]] # Sample from Guptas original model batch_loader = BatchLoader() from model.parameters import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_ori_32', map_location=t.device('cpu'))) samples_ori, target, source_ori = sample_with_input( batch_loader, paraphraser, args, decoder_only=True, file_name='datasets/human_test.csv') ref_items = generate_items(source_ori, target, 'ref') ori_items = generate_items(source_ori, samples_ori[0], 'ori') # Sample from Guptas model with two-path-loss batch_loader = BatchLoader() parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, use_two_path_loss=True) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_tpl_16_32', map_location=t.device('cpu'))) samples_tpl, target, source_tpl = sample_with_input( batch_loader, paraphraser, args, decoder_only=False, file_name='datasets/human_test.csv') tpl_items = generate_items(source_tpl, samples_tpl[0], 'tpl') # Sample from GAN model batch_loader = BatchLoader() from model.parametersGAN import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Generator(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_generator_gan_140k', map_location=t.device('cpu'))) samples_gan, target, source_gan = sample_with_input( batch_loader, paraphraser, args, decoder_only=False, file_name='datasets/human_test.csv') gan_items = generate_items(source_gan, samples_gan[0], 'gan') # Sample from synonym model paraphraser = SynonymParaphraser() samples_synonym = paraphraser.generate_paraphrases( 'datasets/human_test.csv') base_items = generate_items(source_data['question1'], samples_synonym, 'base') all_items = ref_items + ori_items + tpl_items + gan_items + base_items eval_results = { 'name': 'Paraphrase Survey Full Ordered', 'items': all_items } res = json.dumps(eval_results, ensure_ascii=False) with open('datasets/human_test_ordered.json', 'w') as f: f.write(res) random.shuffle(all_items) eval_results = { 'name': 'Paraphrase Survey Full Shuffled', 'items': all_items } res = json.dumps(eval_results, ensure_ascii=False) with open('datasets/human_test_shuffled.json', 'w') as f: f.write(res) for i in range(10): eval_results = { 'name': f'Paraphrase Survey Part {i+1}/{10}', 'items': all_items[i * 50:((i + 1) * 50) - 1] } res = json.dumps(eval_results, ensure_ascii=False) with open(f'datasets/human_test_p_{i}_{10}.json', 'w') as f: f.write(res)
default='', metavar='MN', help='name of model to save (default: "")') parser.add_argument('--seq-len', default=30, metavar='SL', help='max length of sequence (default: 30)') parser.add_argument('--model', default='C-VAE', metavar='M', help='Model to use (default: C-VAE)') args = parser.parse_args() batch_loader = BatchLoader() if args.model == 'C-VAE': parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cpu'))) elif args.model == 'C-VAE*': parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, use_two_path_loss=True) paraphraser = Paraphraser(parameters) paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cpu'))) elif args.model == 'GAN': parameters = ParametersGAN(batch_loader.max_seq_len, batch_loader.vocab_size)
class Network(Model): def __init__(self, hyperparameters, model, task, output_size): self.task = task # since we train different networks, we want to be able to distinguish them somehow self.hyperparameters = copy.deepcopy(hyperparameters) self.hyperparameters.output_size = output_size self.parameters = Parameters(self.hyperparameters) self.parameters.embeddings = model.parameters.embeddings self.parameters.hidden_weights = model.parameters.hidden_weights self.parameters.hidden_biases = model.parameters.hidden_biases self.trainer = Trainer() graph.hidden_weights = self.parameters.hidden_weights graph.hidden_biases = self.parameters.hidden_biases graph.output_weights = self.parameters.output_weights graph.output_biases = self.parameters.output_biases def train(self, inputs, correct_outputs, learning_rate, embedding_learning_rate): r = graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), learning_rate * weights[0]) (dcorrect_inputss, dnoise_inputss, losss, unpenalized_losss, l1penaltys, correct_scores, noise_scores) = r to_normalize = set() for ecnt in range(len(correct_sequences)): (loss, unpenalized_loss, correct_score, noise_score) = \ (losss[ecnt], unpenalized_losss[ecnt], correct_scores[ecnt], noise_scores[ecnt]) if l1penaltys.shape == (): assert l1penaltys == 0 l1penalty = 0 else: l1penalty = l1penaltys[ecnt] correct_sequence = correct_sequences[ecnt] noise_sequence = noise_sequences[ecnt] dcorrect_inputs = [d[ecnt] for d in dcorrect_inputss] dnoise_inputs = [d[ecnt] for d in dnoise_inputss] self.trainer.update(loss, correct_score, noise_score, unpenalized_loss, l1penalty) for w in weights: assert w == weights[0] embedding_learning_rate = embedding_learning_rate * weights[0] if loss == 0: for di in dcorrect_inputs + dnoise_inputs: assert (di == 0).all() if loss != 0: for (i, di) in zip(correct_sequence, dcorrect_inputs): assert di.shape == (self.parameters.embedding_size,) self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di if NORMALIZE_EMBEDDINGS: to_normalize.add(i) for (i, di) in zip(noise_sequence, dnoise_inputs): assert di.shape == (self.parameters.embedding_size,) self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di if NORMALIZE_EMBEDDINGS: to_normalize.add(i) if len(to_normalize) > 0: to_normalize = [i for i in to_normalize] self.parameters.normalize(to_normalize)
from model.hero import * from model.map import Map from model.parameters import Parameters from model.state import State from model.abilites import AbilityType from model.teams import Teams from warrior import * import json import random import time game = json.loads(input()) game_map = Map(game) # карта игрового мира game_params = Parameters(game) # параметры игры game_teams = Teams(game) # моя команда def main(): # state = State(input(), game_teams, game_params) # if game_teams.enemy_team[0].hero_type == HeroType.Warrior: warrior(game_map, game_params, game_teams) main()
def sample(args): # Create locations to store samples if not os.path.isdir('logs/' + args.model_name + '/samples'): os.mkdir('logs/' + args.model_name + '/samples') batch_loader = BatchLoader() # Load model... if 'ori' in args.model_name.lower() and not 'gan' in args.model_name.lower( ) or 'tpl' in args.model_name.lower(): from model.parameters import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size, use_two_path_loss=('tpl' in args.model_name.lower())) paraphraser = Paraphraser(parameters) if args.use_cuda: paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cuda:0'))) else: paraphraser.load_state_dict( t.load('saved_models/trained_paraphraser_' + args.model_name, map_location=t.device('cpu'))) elif 'gan' in args.model_name.lower(): from model.parametersGAN import Parameters parameters = Parameters(batch_loader.max_seq_len, batch_loader.vocab_size) paraphraser = Generator(parameters) if args.use_cuda: paraphraser.load_state_dict( t.load('saved_models/trained_generator_' + args.model_name, map_location=t.device('cuda:0'))) else: paraphraser.load_state_dict( t.load('saved_models/trained_generator_' + args.model_name, map_location=t.device('cpu'))) if args.beam: samples, target, source = sample_with_beam( batch_loader, paraphraser, args, decoder_only=('ori' in args.model_name.lower() and not 'gan' in args.model_name.lower()), beam_size=(args.num_samples if args.num_samples != 1 else 5)) for i in range(args.num_samples): np.savetxt(f'logs/{args.model_name}/samples/sampled_beam_{i}.txt', np.array(samples[i]), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/target_beam.txt', np.array(target), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/source_beam.txt', np.array(source), delimiter='\n', fmt='%s') else: samples, target, source = sample_with_input( batch_loader, paraphraser, args, decoder_only=('ori' in args.model_name.lower() and not 'gan' in args.model_name.lower()), num_samples=args.num_samples, ml=args.ml) for i in range(args.num_samples): np.savetxt(f'logs/{args.model_name}/samples/sampled' + ('_ml' if args.ml else '_s') + f'_{i}.txt', np.array(samples[i]), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/target' + ('_ml' if args.ml else '_s') + '.txt', np.array(target), delimiter='\n', fmt='%s') np.savetxt(f'logs/{args.model_name}/samples/source' + ('_ml' if args.ml else '_s') + '.txt', np.array(source), delimiter='\n', fmt='%s')
class Model: def __init__(self, hyperparameters): self.hyperparameters = hyperparameters self.parameters = Parameters(self.hyperparameters) self.trainer = Trainer() self.graph = Graph(self.hyperparameters, self.parameters) def __getstate__(self): return (self.hyperparameters, self.parameters, self.trainer) def __setstate__(self, state): (self.hyperparameters, self.parameters, self.trainer) = state self.graph = Graph(self.hyperparameters, self.parameters) def embed(self, window): seq = [self.parameters.embeddings[word] for word in window] return numpy.dstack([numpy.resize(s, (1, s.size, 1)) for s in seq]) def embeds(self, sequences): return numpy.vstack([self.embed(seq) for seq in sequences]) def corrupt_example(self, e): import copy, random e = copy.deepcopy(e) pos = - self.hyperparameters.window_size // 2 mid = e[pos] while e[pos] == mid: e[pos] = random.randint(0, self.hyperparameters.curriculum_size - 1) pr = 1. / self.hyperparameters.curriculum_size weight = 1. / pr return e, numpy.float32(weight) def corrupt_examples(self, correct_sequences): return zip(*[self.corrupt_example(e) for e in correct_sequences]) def train(self, correct_sequences): noise_sequences, weights = self.corrupt_examples(correct_sequences) for w in weights: assert w == weights[0] learning_rate = self.hyperparameters.learning_rate r = self.graph.train(self.embeds(correct_sequences), self.embeds(noise_sequences), numpy.float32(learning_rate * weights[0])) correct_inputs_gradient, noise_inputs_gradient, losses, correct_scores, noise_scores = r to_normalize = set() for example in range(len(correct_sequences)): correct_sequence = correct_sequences[example] noise_sequence = noise_sequences[example] loss, correct_score, noise_score = losses[example], correct_scores[example], noise_scores[example] import pdb pdb.set_trace() correct_input_gradient = correct_inputs_gradient[example] noise_input_gradient = noise_inputs_gradient[example] # self.trainer.update(numpy.sum(loss), correct_score, noise_score) for w in weights: assert w == weights[0] embedding_learning_rate = self.hyperparameters.embedding_learning_rate * weights[0] if numpy.sum(loss) == 0: for di in correct_input_gradient + noise_input_gradient: assert (di == 0).all() else: for (i, di) in zip(correct_sequence, correct_input_gradient.T): self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di to_normalize.add(i) for (i, di) in zip(noise_sequence, noise_input_gradient.T): self.parameters.embeddings[i] -= 1.0 * embedding_learning_rate * di to_normalize.add(i) self.parameters.normalize(list(to_normalize)) def predict(self, sequence): (score) = self.graph.predict(self.embed(sequence)) return score def verbose_predict(self, sequence): (score, prehidden) = self.graph.verbose_predict(self.embed(sequence)) return score, prehidden def validate(self, sequence): import copy corrupt_sequence = copy.copy(sequence) rank = 1 correct_score = self.predict(sequence) mid = self.hyperparameters.window_size // 2 for i in range(self.hyperparameters.curriculum_size - 1): if i == sequence[mid]: continue corrupt_sequence[mid] = i corrupt_score = self.predict(corrupt_sequence) rank += (correct_score <= corrupt_score) return rank