def dump_samples(args): "Construct a large number of samples with features and dump to file." all_features = [] all_sents = [] batches = args.nbatches batch = args.batch_size samples = 1 total = batches * batch * samples all_zs = torch.FloatTensor(total, model_args['z_size']) rev = defaultdict(set) for j in range(batches): print("%d / %d batches " % (j, batches)) noise = torch.ones(batch, model_args['z_size']) noise.normal_() noise = noise.view(batch, 1, model_args['z_size'])\ .expand(batch, samples, model_args['z_size']).contiguous()\ .view(batch*samples, model_args['z_size']) sentences = generate(autoencoder, gan_gen, z=noise, vocab=idx2word, sample=True, maxlen=model_args['maxlen']) for i in range(batch * samples): k = len(all_features) nlp_sent = nlp(sentences[i]) feats = featurize(nlp_sent) all_sents.append(sentences[i]) all_features.append(feats) for f in feats: rev[f].add(k) all_zs[k] = noise[i] pickle.dump((all_sents, all_features, rev, all_zs), open(args.dump, "bw"))
def dump_samples(args): "Construct a large number of samples with features and dump to file." all_features = [] all_sents = [] batches = args.nbatches batch = args.batch_size samples = 1 total = batches * batch * samples all_zs = torch.FloatTensor(total, model_args['z_size']) rev = defaultdict(set) for j in range(batches): print("%d / %d batches " % (j, batches)) noise = torch.ones(batch, model_args['z_size']) noise.normal_() noise = noise.view(batch, 1, model_args['z_size'])\ .expand(batch, samples, model_args['z_size']).contiguous()\ .view(batch*samples, model_args['z_size']) sentences = generate(autoencoder, gan_gen, z=noise, vocab=idx2word, sample=True, maxlen=model_args['maxlen']) for i in range(batch * samples): k = len(all_features) nlp_sent = nlp(sentences[i]) feats = featurize(nlp_sent) all_sents.append(sentences[i]) all_features.append(feats) for f in feats: rev[f].add(k) all_zs[k] = noise[i] pickle.dump((all_sents, all_features, rev, all_zs), open(args.dump, "bw"))
def interpolate(ae, gg, z1, z2, vocab, steps=5, sample=None, maxlen=None): """ Interpolating in z space Assumes that type(z1) == type(z2) """ if type(z1) == Variable: noise1 = z1 noise2 = z2 elif type(z1) == torch.FloatTensor or type(z1) == torch.cuda.FloatTensor: noise1 = Variable(z1, volatile=True) noise2 = Variable(z2, volatile=True) elif type(z1) == np.ndarray: noise1 = Variable(torch.from_numpy(z1).float(), volatile=True) noise2 = Variable(torch.from_numpy(z2).float(), volatile=True) else: raise ValueError("Unsupported input type (noise): {}".format(type(z1))) # interpolation weights lambdas = [x*1.0/(steps-1) for x in range(steps)] gens = [] for L in lambdas: gens.append(generate(ae, gg, (1-L)*noise1 + L*noise2, vocab, sample, maxlen)) interpolations = [] for i in range(len(gens[0])): interpolations.append([s[i] for s in gens]) return interpolations
def interpolate(ae, gg, z1, z2, vocab, steps=5, sample=None, maxlen=None): """ Interpolating in z space Assumes that type(z1) == type(z2) """ if type(z1) == Variable: noise1 = z1 noise2 = z2 elif type(z1) == torch.FloatTensor or type(z1) == torch.cuda.FloatTensor: noise1 = Variable(z1, volatile=True) noise2 = Variable(z2, volatile=True) elif type(z1) == np.ndarray: noise1 = Variable(torch.from_numpy(z1).float(), volatile=True) noise2 = Variable(torch.from_numpy(z2).float(), volatile=True) else: raise ValueError("Unsupported input type (noise): {}".format(type(z1))) # interpolation weights lambdas = [x * 1.0 / (steps - 1) for x in range(steps)] gens = [] for L in lambdas: gens.append( generate(ae, gg, (1 - L) * noise1 + L * noise2, vocab, sample, maxlen)) interpolations = [] for i in range(len(gens[0])): interpolations.append([s[i] for s in gens]) return interpolations
def gen_samples(vec): "Generate sample sentences from vector." sentences = [] sentences = generate(autoencoder, gan_gen, z=torch.FloatTensor(vec) .view(1, -1).expand(20, vec.shape[0]), vocab=idx2word, sample=True, maxlen=model_args['maxlen'])[0] return sentences
def gen_samples(vec): "Generate sample sentences from vector." sentences = [] sentences = generate(autoencoder, gan_gen, z=torch.FloatTensor(vec) .view(1, -1).expand(20, vec.shape[0]), vocab=idx2word, sample=True, maxlen=model_args['maxlen'])[0] return sentences
def main(args): # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: print("Note that our pre-trained models require CUDA to evaluate.") model_args, idx2word, autoencoder, gan_gen, gan_disc \ = load_models(args.load_path) if args.ngenerations > 0: noise = torch.ones(args.ngenerations, model_args['z_size']) noise = noise.normal_().cuda() sentences = generate(autoencoder, gan_gen, z=noise, vocab=idx2word, sample=args.sample, maxlen=model_args['maxlen']) if not args.noprint: print("\nSentence generations:\n") for sent in sentences: print(sent) with open(args.outf, "w") as f: f.write("Sentence generations:\n\n") for sent in sentences: f.write(sent + "\n") if args.ninterpolations > 0: noise1 = torch.ones(args.ninterpolations, model_args['z_size']) noise1 = noise1.normal_().cuda() noise2 = torch.ones(args.ninterpolations, model_args['z_size']) noise2 = noise2.normal_().cuda() interps = interpolate(autoencoder, gan_gen, z1=noise1, z2=noise2, vocab=idx2word, steps=args.steps, sample=args.sample, maxlen=model_args['maxlen']) if not args.noprint: print("\nSentence interpolations:\n") for interp in interps: for sent in interp: print(sent) print("") with open(args.outf, "a") as f: f.write("\nSentence interpolations:\n\n") for interp in interps: for sent in interp: f.write(sent + "\n") f.write('\n')
def generate_texts(bot_name, session=None, checkpoint="latest", **generation_kwargs): if session is None: session = restart_session() load_model_into_graph(session, bot_name, checkpoint=checkpoint) texts = generate(session, bot_name, checkpoint=checkpoint, **generation_kwargs) return texts, session
def generate(): now = time.time() print(request.form) filename = "uploaded/" + str(uuid.uuid4()) + ".mid" if 'file' in request.files: midifile = request.files['file'] if not (midifile.filename.endswith(".mid") or midifile.filename.endswith(".midi")): response = jsonify( {"message": "Bad file format, please upload mid / midi"}) response.status_code = 500 return response midifile.save(filename) else: predefined_melody = request.form["melody"] found = False for predefined_file in os.listdir('predefined'): if predefined_file.lower().startswith(predefined_melody.lower()): copyfile('predefined/' + predefined_file, filename) found = True if not found: response = jsonify({"message": "File wasn't found"}) response.status_code = 500 return response midi_data = pretty_midi.PrettyMIDI(filename) primer_sequence = magenta.music.midi_io.midi_to_sequence_proto(midi_data) values = request.form num_steps = 100 # change this for shorter or longer sequences temperature = float( values["temperature"] ) # the higher the temperature the more random the sequence. submodel = values["submodel"] print("Generating melody, steps=%d, temperature=%f, submodel=%s" % (num_steps, temperature, submodel)) generated_sequence = models.generate(midi_data, primer_sequence, num_steps, temperature, submodel) output = tempfile.NamedTemporaryFile() magenta.music.midi_io.sequence_proto_to_midi_file(generated_sequence, output.name) output.seek(0) return send_file(output, attachment_filename='generated.mid', mimetype='audio/midi', as_attachment=True)
def main(args): # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: print("Note that our pre-trained models require CUDA to evaluate.") ########################################################################### # Load the models ########################################################################### ae_args, gan_args, idx2word, autoencoder, gan_gen, gan_disc \ = load_models(args.ae_args, args.gan_args, args.vocab_file, args.ae_model, args.g_model, args.d_model) ########################################################################### # Generation code ########################################################################### # Generate sentences corpus = Corpus(args.data_path, args.dict_file, vocab_size=len(idx2word)) source, _ = next(BatchGen(corpus.get_chunks(size=2), args.ngenerations)) prev_sent = [ decode_idx(corpus.dictionary, sent) for sent in source.tolist() ] source = Variable(source, volatile=True) sentences = generate(autoencoder, gan_gen, inp=source, vocab=idx2word, sample=args.sample, maxlen=args.maxlen) if not args.noprint: print("\nSentence generations:\n") for prev, sent in zip(prev_sent, sentences): print(prev) print(" ", sent) print("") with open(args.outf, "w") as f: f.write("Sentence generations:\n\n") for prev, sent in zip(prev_sent, sentences): f.write(prev + '\n') f.write("-> " + sent + '\n\n')
def __init__(self, connection_data, mode=None, map_blobs=False, secrets=None, pooling=False): self._pooling = pooling # add querying utility properties # these must belong to the connection since the way in which their values are handled # depends on the database being connected to. self.range = models.Range self.radius = models.Radius self.regexp = models.RegExp self.regexp.connection_object = self if type(connection_data) in [str, unicode]: # if we've been given a connection string, process it self.connection_data = new_connection_dictionary(connection_data, secrets=secrets, mode=mode) self.schema = self.connection_data.get( "schema") if self.connection_data.get("schema") != None else "" self.range.database_type = self.connection_data["host"] self.radius.database_type = self.connection_data["host"] self.regexp.database_type = self.connection_data["host"] else: self.connection_data = connection_data # assume we have an engine # we need to take the string representation so we know which type of db we're aiming at engine_string = str(connection_data) db_type = None if "oracle" in engine_string: db_type = "oracle" elif "frontier" in engine_string: db_type = "frontier" elif "sqlite" in engine_string: db_type = "sqlite" self.range.database_type = db_type self.radius.database_type = db_type self.regexp.database_type = db_type import models as ms self.models = ms.generate(map_blobs)
def __init__(self, connection_data): # is not needed in cmssw """try: import cx_Oracle except ImportError as e: exit("cx_Oracle cannot be imported - try to run 'source /data/cmssw/setupEnv.sh' and 'source venv/bin/activate'.")""" # todo translation on connection_data - it may be a string # find out which formats of db string are acceptable frontier_str_length = len("frontier://") sqlite_str_length = len("sqlite:///") if type(connection_data) == str and connection_data[0:frontier_str_length] == "frontier://": db_name = connection_data[frontier_str_length:].split("/")[0] schema = connection_data[frontier_str_length:].split("/")[1] connection_data = {} connection_data["db_alias"] = db_name connection_data["schema"] = schema connection_data["host"] = "frontier" """elif type(connection_data) == str and connection_data[0:sqlite_str_length] == "sqlite:///": db_name = connection_data[frontier_str_length:] schema = "" connection_data = {} connection_data["db_alias"] = db_name connection_data["schema"] = schema connection_data["host"] = "sqlite" """ headers = ["login", "account", "password"] self.connection_data = connection_data try: self.schema = connection_data["schema"] except KeyError as k: self.schema = "" # setup authentication import netrc if connection_data["host"] == "oracle": self.secrets = dict(zip(headers, netrc.netrc(connection_data["secrets"]).authenticators(connection_data["host"]))) self.netrc_authenticators = netrc.netrc(connection_data["secrets"]) import models as ms self.models = ms.generate() self.base = self.models["Base"]
def __init__(self, connection_data, mode=None, map_blobs=False, secrets=None, pooling=False): self._pooling = pooling # add querying utility properties # these must belong to the connection since the way in which their values are handled # depends on the database being connected to. self.range = models.Range self.radius = models.Radius self.regexp = models.RegExp self.regexp.connection_object = self if type(connection_data) in [str, unicode]: # if we've been given a connection string, process it self.connection_data = new_connection_dictionary(connection_data, secrets=secrets, mode=mode) self.schema = self.connection_data.get("schema") if self.connection_data.get("schema") != None else "" self.range.database_type = self.connection_data["host"] self.radius.database_type = self.connection_data["host"] self.regexp.database_type = self.connection_data["host"] else: self.connection_data = connection_data # assume we have an engine # we need to take the string representation so we know which type of db we're aiming at engine_string = str(connection_data) db_type = None if "oracle" in engine_string: db_type = "oracle" elif "frontier" in engine_string: db_type = "frontier" elif "sqlite" in engine_string: db_type = "sqlite" self.range.database_type = db_type self.radius.database_type = db_type self.regexp.database_type = db_type import models as ms self.models = ms.generate(map_blobs)
def gen(vec): "Generate argmax sentence from vector." return generate(autoencoder, gan_gen, z=torch.FloatTensor(vec).view(1, -1), vocab=idx2word, sample=False, maxlen=model_args['maxlen'])
def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False): if len(self.data()) == 0: print("\nNo data to draw table with.\n") return import models models_dict = models.generate() # if the list contains ORM objects, then convert them all to dictionaries, # otherwise, leave the list as it is - assume it is already a list of dictionaries if self.get(0).data().__class__.__name__ in ["GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload"]: from data_formats import _objects_to_dicts data = _objects_to_dicts(self.data()).data() from querying import connection table_name = models.class_name_to_column(self.get(0).data().__class__).upper() # set headers to those found in ORM models # do it like this so we copy the headers # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this headers = [header for header in models_dict[self.get(0).data().__class__.__name__.lower()].headers] else: table_name = None data = self.data() # gets headers stored in first dictionary headers = data[0].keys() if columns != None: headers = columns if row_nums: headers = ["row"] + headers # append an extra column to all rows of data, as well for i, item in enumerate(data): data[i]["row"] = str(i) if fit == ["all"]: fit = headers if col_width == None: import subprocess table_width = int(0.95*int(subprocess.check_output(["stty", "size"]).split(" ")[1])) col_width = int(table_width/len(headers)) if hide != None: for n in range(0, len(hide)): del headers[headers.index(hide[n])] def max_width_of_column(column, data): max_width_found = len(str(data[0][column])) for item in data: current_width = len(str(item[column])) if current_width > max_width_found: max_width_found = current_width if max_width_found > len(column): return max_width_found else: return len(column) def cell(content, header, col_width, fit): if fit: col_width_with_padding = col_width+2 col_width_substring = len(str(content)) else: col_width_with_padding = col_width-2 if col_width-2 > 0 else 1 col_width_substring = col_width-5 if col_width-7 > 0 else 1 return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\ + ("..." if not(fit) and col_width_substring < len(str(content)) else "")) column_to_width = {} if fit != headers: # get the column widths of fited columns surplus_width = 0 for column in fit: if not(column in headers): print("'%s' is not a valid column." % column) return column_to_width[column] = max_width_of_column(column, data) surplus_width += column_to_width[column]-col_width if len(set(headers)-set(fit)) != 0: non_fited_width_surplus = surplus_width/len(set(headers)-set(fit)) else: non_fited_width_surplus = 0 for column in headers: if not(column in fit): column_to_width[column] = col_width - non_fited_width_surplus else: for column in headers: column_to_width[column] = max_width_of_column(column, data) ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n" for header in headers: ascii_string += cell(header, header, column_to_width[header], header in fit) ascii_string += "\n" horizontal_border = "\n" ascii_string += horizontal_border for item in data: for n in range(0, len(headers)): entry = item[headers[n]] ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit) ascii_string += "\n" #ascii_string += "\n" ascii_string += horizontal_border ascii_string += "Showing %d rows\n\n" % len(data) print ascii_string
def gen(vec): import pdb; pdb.set_trace(); "Generate argmax sentence from vector." return generate(autoencoder, gan_gen, z=vec, vocab=idx2word, sample=False, maxlen=model_args['maxlen'])
def main(args): # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: print("Note that our pre-trained models require CUDA to evaluate.") model_args, idx2word, autoencoder, gan_gen, gan_disc \ = load_models(args.load_path) if args.ngenerations > 0: noise = torch.ones(args.ngenerations, model_args['z_size']) noise = noise.normal_().cuda() sentences = generate(autoencoder, gan_gen, z=noise, vocab=idx2word, sample=args.sample, maxlen=model_args['maxlen']) if not args.noprint: print("\nSentence generations:\n") for sent in sentences: print(sent) with open(args.outf, "w") as f: f.write("Sentence generations:\n\n") for sent in sentences: f.write(sent + "\n") if args.nsampleinterpolations > 0: sentence1 = 'The military said government soldiers were killed in the fighting' sentence2 = 'The military said government soldiers were killed' words1 = sentence1.lower().strip().split(" ") words2 = sentence2.lower().strip().split(" ") words1 = ['<sos>'] + words1 + ['<eos>'] words2 = ['<sos>'] + words2 + ['<eos>'] vocab = json.load(open(os.path.join(args.save, 'vocab.json'), 'r')) unk_idx = vocab['<oov>'] indices1 = [[vocab[w] if w in vocab else unk_idx for w in words1]] indices1 = Variable(torch.LongTensor(np.array(indices1)).cuda()) indices2 = [[vocab[w] if w in vocab else unk_idx for w in words2]] indices2 = Variable(torch.LongTensor(np.array(indices2)).cuda()) hidden1 = autoencoder.encode(indices=indices1, lengths=[len(words1) - 1], noise=None) hidden2 = autoencoder.encode(indices=indices2, lengths=[len(words2) - 1], noise=None) print("\nOriginal Sentence1:\n") print(sentence1) print("\nOriginal Sentence2:\n") print(sentence2) print("\nGenerated interpolation sentences:\n") hidden = [hidden1.unsqueeze(0)] lambdas = [x * 1.0 / (args.steps - 1) for x in range(args.steps)] for L in lambdas: hidden.append(((1 - L) * hidden1 + L * hidden2).unsqueeze(0)) hidden.append(hidden2.unsqueeze(0)) hidden = torch.cat(hidden, 0).squeeze(1) print(hidden.shape) generated_sentence = generate_from_hidden(hidden_state=hidden, autoencoder=autoencoder, maxlen=args.maxlen, vocab=vocab, sample=args.sample) for sent in generated_sentence: print(sent)
def as_table(self, fit=["all"], columns=None, hide=None, col_width=None, row_nums=False): if len(self.data()) == 0: print("\nNo data to draw table with.\n") return import models models_dict = models.generate() # if the list contains ORM objects, then convert them all to dictionaries, # otherwise, leave the list as it is - assume it is already a list of dictionaries if self.get(0).data().__class__.__name__ in [ "GlobalTag", "GlobalTagMap", "GlobalTagMapRequest", "Tag", "IOV", "Payload" ]: from data_formats import _objects_to_dicts data = _objects_to_dicts(self.data()).data() from querying import connection table_name = models.class_name_to_column( self.get(0).data().__class__).upper() # set headers to those found in ORM models # do it like this so we copy the headers # for example, if headers are hidden by the user, then this will change the orm class if we don't do it like this headers = [ header for header in models_dict[self.get( 0).data().__class__.__name__.lower()].headers ] else: table_name = None data = self.data() # gets headers stored in first dictionary headers = data[0].keys() if columns != None: headers = columns if row_nums: headers = ["row"] + headers # append an extra column to all rows of data, as well for i, item in enumerate(data): data[i]["row"] = str(i) if fit == ["all"]: fit = headers if col_width == None: import subprocess table_width = int( 0.95 * int(subprocess.check_output(["stty", "size"]).split(" ")[1])) col_width = int(table_width / len(headers)) if hide != None: for n in range(0, len(hide)): del headers[headers.index(hide[n])] def max_width_of_column(column, data): max_width_found = len(str(data[0][column])) for item in data: current_width = len(str(item[column])) if current_width > max_width_found: max_width_found = current_width if max_width_found > len(column): return max_width_found else: return len(column) def cell(content, header, col_width, fit): if fit: col_width_with_padding = col_width + 2 col_width_substring = len(str(content)) else: col_width_with_padding = col_width - 2 if col_width - 2 > 0 else 1 col_width_substring = col_width - 5 if col_width - 7 > 0 else 1 return ("| {:<%s} " % (col_width_with_padding)).format(str(content)[0:col_width_substring].replace("\n", "")\ + ("..." if not(fit) and col_width_substring < len(str(content)) else "")) column_to_width = {} if fit != headers: # get the column widths of fited columns surplus_width = 0 for column in fit: if not (column in headers): print("'%s' is not a valid column." % column) return column_to_width[column] = max_width_of_column(column, data) surplus_width += column_to_width[column] - col_width if len(set(headers) - set(fit)) != 0: non_fited_width_surplus = surplus_width / len( set(headers) - set(fit)) else: non_fited_width_surplus = 0 for column in headers: if not (column in fit): column_to_width[ column] = col_width - non_fited_width_surplus else: for column in headers: column_to_width[column] = max_width_of_column(column, data) ascii_string = "\n%s\n\n" % table_name if table_name != None else "\n" for header in headers: ascii_string += cell(header, header, column_to_width[header], header in fit) ascii_string += "\n" horizontal_border = "\n" ascii_string += horizontal_border for item in data: for n in range(0, len(headers)): entry = item[headers[n]] ascii_string += cell(entry, headers[n], column_to_width[headers[n]], headers[n] in fit) ascii_string += "\n" #ascii_string += "\n" ascii_string += horizontal_border ascii_string += "Showing %d rows\n\n" % len(data) print ascii_string
def gen(vec): "Generate argmax sentence from vector." return generate(autoencoder, gan_gen, z=vec, vocab=idx2word, sample=False, maxlen=model_args['maxlen'])
def main(args): # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) else: print("Note that our pre-trained models require CUDA to evaluate.") ########################################################################### # Load the models ########################################################################### model_args, idx2word, autoencoder, gan_gen, gan_disc \ = load_models(args.load_path) ########################################################################### # Generation code ########################################################################### # Generate sentences if args.ngenerations > 0: noise = torch.ones(args.ngenerations, model_args['z_size']) noise.normal_() sentences = generate(autoencoder, gan_gen, z=noise, vocab=idx2word, sample=args.sample, maxlen=model_args['maxlen']) if not args.noprint: print("\nSentence generations:\n") for sent in sentences: print(sent) with open(args.outf, "w") as f: f.write("Sentence generations:\n\n") for sent in sentences: f.write(sent+"\n") # Generate interpolations if args.ninterpolations > 0: noise1 = torch.ones(args.ninterpolations, model_args['z_size']) noise1.normal_() noise2 = torch.ones(args.ninterpolations, model_args['z_size']) noise2.normal_() interps = interpolate(autoencoder, gan_gen, z1=noise1, z2=noise2, vocab=idx2word, steps=args.steps, sample=args.sample, maxlen=model_args['maxlen']) if not args.noprint: print("\nSentence interpolations:\n") for interp in interps: for sent in interp: print(sent) print("") with open(args.outf, "a") as f: f.write("\nSentence interpolations:\n\n") for interp in interps: for sent in interp: f.write(sent+"\n") f.write('\n')
def gen(vec): "Generate argmax sentence from vector." return generate(autoencoder, gan_gen, z=torch.FloatTensor(vec).view(1, -1), vocab=idx2word, sample=False, maxlen=model_args['maxlen'])