Ejemplo n.º 1
0
 def generate(self):
     if not self.trained:
         print(
             "This Markov chain is not trained! Please use some dataset to train it first!"
         )
         return
     generated = []
     prefix = "<START>"
     while True:
         # generate next_word according to prefix
         prefix_cond_prob = self.cond_prob[prefix]
         sampler = Sampler()
         sampler.fit(prefix_cond_prob.keys(), prefix_cond_prob.values())
         next_ = sampler.generate()
         generated.append(next_)
         if next_ == "<END>":
             break
         # update prefix
         try:
             prefix = ''.join(generated[-self.order:])
         except IndexError:
             prefix = ''.join(generated)
     password = ''.join(generated[:-1])
     return password
Ejemplo n.º 2
0
class HMM4():
    """
    When fitted, useful member variables will include:
        self.states, self.IS, self.SS, self.OB, self.LD
    MEMO: 
        HMM4的问题是,一不能利用字符级别的上下文,二没有个人信息
        改进方向一:使用n-gram改进
        改进方向二:看一看targeted Markov
        这专利8太行,害得重新看看HMM思索一下
    """
    def __init__(self):
        ### define hidden states
        self.states = {
            0: "letter", 
            1: "number", 
            2: "char"
        }
        self.trained = False

    def fit(self, ts):
        """
        param ts: training set
        output states: hidden states
        output IS: initial state matrix
        output SS: state shift matrix
        output OB: observation matrix
        output LD: length distribution
        """
        ts_n = len(ts)
        states_n = len(self.states)
        def stateof(s):
            """
            封装一下每个hidden state的判断条件,以后就改这里就完事了
            """
            if s.isalpha():
                return 0
            elif s.isdigit():
                return 1
            elif not s.isalnum():
                return 2
        def isstate(s, state):
            return stateof(s) == state

        ### Count initial state distribution
        print("Counting initial state distribution...")
        self.IS = np.zeros(states_n)
        for password in tqdm(ts):
            init_state = stateof(password[0])
            self.IS[init_state] += 1
        self.IS /= ts_n
        
        ### Count 1-gram state shift situation
        print("Counting 1-gram state shift situation...")
        self.SS = np.zeros((states_n, states_n))
        for password in tqdm(ts):
            pswd_n = len(password)
            for i in range(pswd_n - 1):
                self.SS[stateof(password[i]), stateof(password[i+1])] += 1
        for state in self.states:
            self.SS[state] /= sum(self.SS[state])

        ### Count observation probability
        print("Counting observation probability...")
        self.OB = {}
        for state in self.states:
            self.OB[state] = {}
        for password in tqdm(ts):
            for ob in password:
                state_OB = self.OB[stateof(ob)]
                state_OB[ob] = state_OB.get(ob, 0) + 1
        for state in self.states:
            state_OB = self.OB[state]
            state_ob_count = sum(state_OB.values())
            for ob in state_OB:
                state_OB[ob] /= state_ob_count

        ### Count length distribution
        print("Counting length distribution...")
        self.LD = {}
        for password in tqdm(ts):
            l = len(password)
            self.LD[l] = self.LD.get(l, 0) + 1
        for k in self.LD:
            self.LD[k] /= ts_n

        ### Setting samplers
        print("Setting samplers...")
        self.length_sampler = Sampler().fit(
            list(self.LD.keys()), list(self.LD.values())
            )
        self.IS_sampler = Sampler().fit(
            list(self.states.keys()), list(self.IS)
            )
        self.SS_samplers = [Sampler().fit(
            list(self.states.keys()), list(self.SS[state])) 
            for state in self.states]
        self.OB_samplers = [Sampler().fit(
            list(self.OB[state].keys()), list(self.OB[state].values())) 
            for state in self.states]

        self.trained = True
        return self

    def generate(self):
        if not self.trained:
            print("This HMM is not trained! Please use some dataset to train it first!")
            return
        length = self.length_sampler.generate()
        cur_state = self.IS_sampler.generate()
        tmp_password = ""
        for _ in range(length):
            observation = self.OB_samplers[cur_state].generate()
            tmp_password += observation
            cur_state = self.SS_samplers[cur_state].generate()
        return tmp_password


# if __name__ == "__main__":

#     toy_training_set = load_csdn()

#     print("Training HMM4...")
#     hmm4 = HMM4().fit(toy_training_set)
#     with open("../model/hmm4.pk", "wb") as f:
#         pickle.dump(hmm4, f)

#     # Generate passwords
#     print("Password generated: ")
#     for _ in range(100):
#         generated_password = hmm4.generate()
#         print(generated_password)
Ejemplo n.º 3
0
from sampler import Sampler

if __name__ == '__main__':
    sampler = Sampler(z_dim=4, c_dim=3, scale=8.0, net_size=64)
    z1 = sampler.generate_z()
    img = sampler.generate(z1)
    sampler.show_image(img)
Ejemplo n.º 4
0
from timeit import default_timer as timer
import numpy as np
sampler = Sampler(z_dim=20, c_dim=3, scale=8.0, net_size=30)
z_dim = 20
z1 = z = np.random.uniform(-1.0, 1.0, size=(1, z_dim)).astype(np.float32)
#z2 =  z = np.random.uniform(-1.0, 1.0, size=(1, z_dim)).astype(np.float32)
#sampler.save_anim_gif(z1,z2, "out",n_frame=2,duration2=0.5)

for i in range(0, 100):
    print("Step: {}".format(i))
    image1 = sampler.train(
        z=z1,
        image_path="c:/git/NetDrawer/cppn-tensorflow/source/obammers.jpg")
    #sampler.show_image(image1)
    if (i % 20 == 0):

        image = sampler.generate(z=z1)
        sampler.show_image(image)
image = sampler.generate(z=z1)
sampler.show_image(image)

# for i in range(0,100):
#     start = timer()
#
#     for idx in range(z1.__len__()):
#         z1[idx] -=0.01
#     image = sampler.generate(z1,x_dim=512, y_dim=512)
#
#     sampler.show_image(image)
#     end = timer()
#     print(end - start)
Ejemplo n.º 5
0
diff = z1 - z2

#i = 1/0

outnum = 3
outpath = 'output'

singleCycle = 500
cycles = 6000

sound = np.zeros((singleCycle))
data = np.zeros((singleCycle * cycles))

delta = diff / cycles
#a = s.generate(z = (z1 + delta * j), x_dim = 2001, y_dim = 2001, scale = 10)
a = s.generate(z=(z1), x_dim=2001, y_dim=2001, scale=10)

b = np.zeros_like(a)

circleCount = 0

#with open('soundArray.npy','r' ) as f:
#    data = np.load(f)

for j in range(0, cycles):

    #spiral = ((np.pi * 200) / ) * j

    offset1 = np.sin(j * 1.0 / 200)
    offset2 = np.cos(j * 1.0 / 200)
Ejemplo n.º 6
0
def _main(args):
    file = Path(args.name)
    sampler = Sampler(file=file, records=args.records, max_id=args.max_id)
    sampler.generate(Bar("Processing", max=args.records))