def trainonone(self, pa, ntimes):    
     for k in range(ntimes):
         for wordvec in pa.getallwordvecs():
             _, loss = self.session.run([self.train, self.loss], feed_dict={self.inputs: wordvec[1]})
             print("Loss incurred : ", loss)
     self.saver = tf.train.Saver()
     self.saver.save(self.session, 'embedding.chk')
Example #2
0
def multi_pass(user, pswd, times=1000, hasher=SHA512()):
    '''
    AUTHORS:
    v0.2.0+         --> pydsigner
    '''
    for i in range(0, times):
        pswd = hasher.encode(pswd + user + str(i))
    return pswd
 def __init__(self, pool):
     self.pool = pool
     
     self._eps_tool = EPSTool()
     gradient_bin = array(encode('B'), range(0, 256))
     # the input gradient is 256 pixels wide and 1 pixel height
     # we don't need more data and scale this on display
     self._eps_tool.set_image_data(gradient_bin.tostring(), (256, 1))
Example #4
0
def factors(n):
    '''
    Returns an set of every factor of @n (including 1 and @n).
    
    AUTHORS:
    v0.4.9+         --> pydsigner
    '''
    return set((x for x in range(1, int(n ** .5) + 2) if not n % x)) | set([n])
Example #5
0
def pascals_triangle(depth):
    '''
    Calculate Pascal's triangle to @depth places. returns a list of lists.
    
    >>> pascals_triangle(5)
    [[1], [1, 1], [1, 2, 1], [1, 3, 3, 1], [1, 4, 6, 4, 1]]
    
    AUTHORS:
    v0.4.7+         --> pydsigner
    '''
    res = []
    for i in range(depth):
        row = [1]
        for c in range(1, i + 1):
            row.append(sum(res[-1][c - 1:c + 1]))
        res.append(row)
    return res
Example #6
0
 def trainonone(self, pa, ntimes):
     for k in range(ntimes):
         for wordvec in pa.getallwordvecs():
             _, loss = self.session.run([self.train, self.loss],
                                        feed_dict={self.inputs: wordvec[1]})
             print("Loss incurred : ", loss)
     self.saver = tf.train.Saver()
     self.saver.save(self.session, 'embedding.chk')
Example #7
0
    def nth(self, n):
        """
        Get the n-th iteration of this L-System. this
        is just a convenience method based on how
        L-Systems are used.

        :param int n: Number of iteration. 0 means
        :raises: LSystemOverflow if
        """
        gen = iter(self)

        #extract the 0th iteration
        current = next(gen)

        #Continue to extract iterations
        for i in range(n):
            current = next(gen)

        return current
Example #8
0
 def __init__(self, *args):
     '''
     SortedDict([(key_0, value_0), (key_1, value_1)..., (key_n, value_n)])
     SortedDict([key_0, key_1..., key_n], [value_0, value_1..., value_n])
     SortedDict([key_0, key_1..., key_n], default)
     SortedDict((key_0, value_0), (key_1, value_1)..., (key_n, value_n))
     SortedDict()
     '''
     self.data = []
     self.keydict = {}
     if len(args) == 1:
         for key, value in args[0]:
             self._add(key, value)
     elif len(args) == 2:
         if hasattr(args[1], '__len__') and len(args[1]) == len(args[0]):
             for i in range(len(args[0])):
                 self._add(args[0][i], args[1][i])
         else:
             for key in args[0]:
                 self._add(key, args[1])
     else:
         for key, value in args:
             self._add(key, value)
def train(folder):
    '''
    Function to train autoencoder.
    '''
    t = time.time()
    lr_decay = 0.95
    pa = ProbArray()
    # Frequency to filter out low freq words
    freq = {}
    filepaths = list(map(lambda x: folder + "/" + x, os.listdir(folder)))
    rgx = re.compile("([\w][\w']*\w)")
    # Another iterator to count frequency of words
    print("Pre-processing (clearning garbage words)")
    for filepath in filepaths:
        text = open(filepath).read().lower()
        tokens = re.findall(rgx, text)

        N = len(tokens)
        for i in range(0, N):
            if tokens[i] in freq:
                freq[tokens[i]] += 1
            else:
                freq[tokens[i]] = 1

    # Sort the frequencies storing in (freq, token) pairs and prune words with freq < min_count
    tokenFreq = sorted(freq.items(), key=lambda x: x[1])
    garbageWords = []
    for item in tokenFreq:
        if item[1] < min_count:
            garbageWords.append(item[0])

    print("Generating co-occurence matrix")
    doc_text = ""
    for filepath in filepaths:
        text = open(filepath).read().lower()
        words = re.findall(rgx, text)
        N = len(words)
        temp = [' '] * (N + contextSize)
        temp[contextSize:(contextSize + N)] = words
        words = temp
        for i in range(contextSize, (contextSize + N)):
            # Filter out garbage words"
            #if words[i] not in garbageWords:
            # Include context size specified by user
            for j in range(i - contextSize, i):
                if words[i] != ' ' and words[j] != ' ':
                    pa.addcontext(words[j], words[i])
                    pa.addcontext(words[i], words[j])

    print("Co-occurence matrix generated")
    print("Starting training")
    tm = TrainModel(maxnum, newdims, lr)
    pa.freeze()
    tm.trainonone(pa, ntimes)
    #lr /=float(1+k*lr_decay)

    wordembeddings = {}
    for numwordvec in pa.getallwordvecs():
        (num, wordvec) = numwordvec
        word = pa.wordnumrelation.getWord(num)
        if backend == 'tf':
            embedding = tm.getoutput(wordvec, './embedding.chk')
        else:
            embedding = tm.getoutput(wordvec)
        wordembeddings[word] = embedding

    print("Training proces done, dumping embedding into persistant storage!")

    with open(r'./embeddings.pickle', "wb") as outfile:
        pickle.dump(wordembeddings, outfile)
    print("Training completed! Embedding done.")
    print("time is %f" % (time.time() - t))
def train(folder):
    '''
    Function to train autoencoder.
    '''
    t = time.time()
    lr_decay = 0.95
    pa = ProbArray()
    # Frequency to filter out low freq words
    freq = {}
    filepaths = list(map(lambda x: folder + "/" + x,os.listdir(folder)))
    rgx = re.compile("([\w][\w']*\w)")
    # Another iterator to count frequency of words
    print ("Pre-processing (clearning garbage words)")
    for filepath in filepaths:
        text = open(filepath).read().lower()
        tokens = re.findall(rgx, text)

        N = len(tokens)
        for i in range(0,N):
            if tokens[i] in freq:
                freq[tokens[i]] += 1
            else:
                freq[tokens[i]] = 1

    # Sort the frequencies storing in (freq, token) pairs and prune words with freq < min_count
    tokenFreq = sorted(freq.items(), key = lambda x: x[1])
    garbageWords = []
    for item in tokenFreq:
        if item[1] < min_count:
            garbageWords.append(item[0])

    print ("Generating co-occurence matrix")
    doc_text = ""
    for filepath in filepaths:
        text = open(filepath).read().lower()
        words = re.findall(rgx, text)
        N = len(words)
        temp = [' '] * (N +  contextSize)
        temp[contextSize : (contextSize + N)] = words
        words = temp
        for i in range(contextSize, (contextSize + N)):
            # Filter out garbage words"
            #if words[i] not in garbageWords:
            # Include context size specified by user
            for j in range(i-contextSize, i):
                if words[i] != ' ' and words[j] != ' ':
                        pa.addcontext(words[j], words[i])
                        pa.addcontext(words[i], words[j])

    print ("Co-occurence matrix generated")
    print ("Starting training")
    tm = TrainModel(maxnum, newdims, lr)
    pa.freeze()
    tm.trainonone(pa, ntimes)
    #lr /=float(1+k*lr_decay)

    wordembeddings = {}
    for numwordvec in pa.getallwordvecs():
        (num, wordvec) = numwordvec
        word = pa.wordnumrelation.getWord(num)
        if backend == 'tf':
            embedding = tm.getoutput(wordvec, './embedding.chk')
        else:
            embedding = tm.getoutput(wordvec)
        wordembeddings[word] = embedding

    print ("Training proces done, dumping embedding into persistant storage!")

    with open(r'./embeddings.pickle', "wb") as outfile:
        pickle.dump(wordembeddings, outfile)
    print ("Training completed! Embedding done.")
    print ("time is %f" % (time.time()-t))