def generate(self): if not self.trained: print( "This Markov chain is not trained! Please use some dataset to train it first!" ) return generated = [] prefix = "<START>" while True: # generate next_word according to prefix prefix_cond_prob = self.cond_prob[prefix] sampler = Sampler() sampler.fit(prefix_cond_prob.keys(), prefix_cond_prob.values()) next_ = sampler.generate() generated.append(next_) if next_ == "<END>": break # update prefix try: prefix = ''.join(generated[-self.order:]) except IndexError: prefix = ''.join(generated) password = ''.join(generated[:-1]) return password
class HMM4(): """ When fitted, useful member variables will include: self.states, self.IS, self.SS, self.OB, self.LD MEMO: HMM4的问题是,一不能利用字符级别的上下文,二没有个人信息 改进方向一:使用n-gram改进 改进方向二:看一看targeted Markov 这专利8太行,害得重新看看HMM思索一下 """ def __init__(self): ### define hidden states self.states = { 0: "letter", 1: "number", 2: "char" } self.trained = False def fit(self, ts): """ param ts: training set output states: hidden states output IS: initial state matrix output SS: state shift matrix output OB: observation matrix output LD: length distribution """ ts_n = len(ts) states_n = len(self.states) def stateof(s): """ 封装一下每个hidden state的判断条件,以后就改这里就完事了 """ if s.isalpha(): return 0 elif s.isdigit(): return 1 elif not s.isalnum(): return 2 def isstate(s, state): return stateof(s) == state ### Count initial state distribution print("Counting initial state distribution...") self.IS = np.zeros(states_n) for password in tqdm(ts): init_state = stateof(password[0]) self.IS[init_state] += 1 self.IS /= ts_n ### Count 1-gram state shift situation print("Counting 1-gram state shift situation...") self.SS = np.zeros((states_n, states_n)) for password in tqdm(ts): pswd_n = len(password) for i in range(pswd_n - 1): self.SS[stateof(password[i]), stateof(password[i+1])] += 1 for state in self.states: self.SS[state] /= sum(self.SS[state]) ### Count observation probability print("Counting observation probability...") self.OB = {} for state in self.states: self.OB[state] = {} for password in tqdm(ts): for ob in password: state_OB = self.OB[stateof(ob)] state_OB[ob] = state_OB.get(ob, 0) + 1 for state in self.states: state_OB = self.OB[state] state_ob_count = sum(state_OB.values()) for ob in state_OB: state_OB[ob] /= state_ob_count ### Count length distribution print("Counting length distribution...") self.LD = {} for password in tqdm(ts): l = len(password) self.LD[l] = self.LD.get(l, 0) + 1 for k in self.LD: self.LD[k] /= ts_n ### Setting samplers print("Setting samplers...") self.length_sampler = Sampler().fit( list(self.LD.keys()), list(self.LD.values()) ) self.IS_sampler = Sampler().fit( list(self.states.keys()), list(self.IS) ) self.SS_samplers = [Sampler().fit( list(self.states.keys()), list(self.SS[state])) for state in self.states] self.OB_samplers = [Sampler().fit( list(self.OB[state].keys()), list(self.OB[state].values())) for state in self.states] self.trained = True return self def generate(self): if not self.trained: print("This HMM is not trained! Please use some dataset to train it first!") return length = self.length_sampler.generate() cur_state = self.IS_sampler.generate() tmp_password = "" for _ in range(length): observation = self.OB_samplers[cur_state].generate() tmp_password += observation cur_state = self.SS_samplers[cur_state].generate() return tmp_password # if __name__ == "__main__": # toy_training_set = load_csdn() # print("Training HMM4...") # hmm4 = HMM4().fit(toy_training_set) # with open("../model/hmm4.pk", "wb") as f: # pickle.dump(hmm4, f) # # Generate passwords # print("Password generated: ") # for _ in range(100): # generated_password = hmm4.generate() # print(generated_password)
from sampler import Sampler if __name__ == '__main__': sampler = Sampler(z_dim=4, c_dim=3, scale=8.0, net_size=64) z1 = sampler.generate_z() img = sampler.generate(z1) sampler.show_image(img)
from timeit import default_timer as timer import numpy as np sampler = Sampler(z_dim=20, c_dim=3, scale=8.0, net_size=30) z_dim = 20 z1 = z = np.random.uniform(-1.0, 1.0, size=(1, z_dim)).astype(np.float32) #z2 = z = np.random.uniform(-1.0, 1.0, size=(1, z_dim)).astype(np.float32) #sampler.save_anim_gif(z1,z2, "out",n_frame=2,duration2=0.5) for i in range(0, 100): print("Step: {}".format(i)) image1 = sampler.train( z=z1, image_path="c:/git/NetDrawer/cppn-tensorflow/source/obammers.jpg") #sampler.show_image(image1) if (i % 20 == 0): image = sampler.generate(z=z1) sampler.show_image(image) image = sampler.generate(z=z1) sampler.show_image(image) # for i in range(0,100): # start = timer() # # for idx in range(z1.__len__()): # z1[idx] -=0.01 # image = sampler.generate(z1,x_dim=512, y_dim=512) # # sampler.show_image(image) # end = timer() # print(end - start)
diff = z1 - z2 #i = 1/0 outnum = 3 outpath = 'output' singleCycle = 500 cycles = 6000 sound = np.zeros((singleCycle)) data = np.zeros((singleCycle * cycles)) delta = diff / cycles #a = s.generate(z = (z1 + delta * j), x_dim = 2001, y_dim = 2001, scale = 10) a = s.generate(z=(z1), x_dim=2001, y_dim=2001, scale=10) b = np.zeros_like(a) circleCount = 0 #with open('soundArray.npy','r' ) as f: # data = np.load(f) for j in range(0, cycles): #spiral = ((np.pi * 200) / ) * j offset1 = np.sin(j * 1.0 / 200) offset2 = np.cos(j * 1.0 / 200)
def _main(args): file = Path(args.name) sampler = Sampler(file=file, records=args.records, max_id=args.max_id) sampler.generate(Bar("Processing", max=args.records))