Esempio n. 1
0
def collect_statistics_into_model(text_iter, lang_model):
    for line in util.counter(text_iter):
        toks = tokenize_and_clean(line, alignments=False)
        lang_model.info['big_n'] += len(toks)
        for unigram in filtered_unigrams(toks):
            lang_model.add(unigram)
        for bigram in filtered_bigrams(toks):
            lang_model.add(bigram)
        for trigram in filtered_trigrams(toks):
            lang_model.add(trigram)
Esempio n. 2
0
def collect_statistics_into_model(text_iter, lang_model):
  for line in util.counter( text_iter ):
    toks = tokenize_and_clean(line, alignments=False)
    lang_model.info['big_n'] += len(toks)
    for unigram in filtered_unigrams(toks):
      lang_model.add(unigram)
    for bigram in filtered_bigrams(toks):
      lang_model.add(bigram)
    for trigram in filtered_trigrams(toks):
      lang_model.add(trigram)
Esempio n. 3
0
	def findHighWeightFeatures(self,label):
		featuresWeights=[];
		c = util.counter();

		for i in range(28):
			for j in range(28):
				c[(i,j)]=self.weights[label][(i,j)];

		for k in range(100):
			x=c.argMax();
			featuresWeights.append(x);
			c[x]=-(self.max_iterations+1);
			return featuresWeights
Esempio n. 4
0
 def build_hist(self, features, cluster=False):
     m = condense(features)
     if cluster:
         print("Making vocab...")
         print('Clustering...')
         preds = self.kMeans.fit_predict(m)
     else:
         preds = self.kMeans.predict(m)
     length = len(features)
     hist = np.zeros([length, VOCAB_SIZE])
     c = counter()
     print('Making histogram...')
     for i in range(length):
         for _ in range(len(features[i])):
             word = preds[next(c)]
             hist[i][word] += 1
     if cluster:
         print('Vocab complete')
     return hist
def pack2( (W,H), partialpack, packed, active, on_update ):
    if not partialpack:
        on_update(active, packed, None, None, {}, "Nothing left to pack. Success.")
        return True, packed
        
    if not active:
        #print "No active points left"
        return False, []
    
    lmao = active.pop()
        
    activerects = [ item for item in partialpack if item[1] == lmao.x ]
    inactiverects = [ item for item in partialpack if item[1] != lmao.x ]
    
    c = util.counter()
    for item,x in activerects:
        if item != None:
            c = c.incr(item)
    
    if not activerects or W == lmao.x or H == lmao.y or any( r.covers(lmao.pos()) for r in packed ):
        return pack2((W,H), partialpack, packed, active, on_update)
    
    for i, (rectangle, _) in enumerate(activerects):
        if rectangle != None:
            rect = util.Rectangle(rectangle, lmao.pos())
        else:
            rect = util.Rectangle((1,1), lmao.pos(), is_real=False)
            
        on_update(active, packed, lmao, rect, c, "Trying rectangle " + str(rect) + " at position " + str(lmao))
        
Esempio n. 6
0
 def prebaked_iter(filename):
   for line in util.counter(open(filename)):
     yield simplejson.loads(line)  
Esempio n. 7
0
 def prebaked_iter(filename):
     for line in util.counter(open(filename)):
         yield simplejson.loads(line)
Esempio n. 8
0
d_b_var = [var for var in t_var if 'b_discriminator' in var.name]
g_var = [
    var for var in t_var
    if 'a2b_generator' in var.name or 'b2a_generator' in var.name
]

d_a_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(d_loss_a,
                                                           var_list=d_a_var)
d_b_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(d_loss_b,
                                                           var_list=d_b_var)
g_train = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(g_loss,
                                                         var_list=g_var)
''' Train '''
sess = tf.Session()

cnt, update_cnt = util.counter()

trainA_path = glob('./datasets/' + dataset + '/trainA/*.jpg')
trainB_path = glob('./datasets/' + dataset + '/trainB/*.jpg')
trainA_pool = data.ImageData(sess,
                             trainA_path,
                             batch_size,
                             load_size=load_size,
                             crop_size=crop_size)
trainB_pool = data.ImageData(sess,
                             trainB_path,
                             batch_size,
                             load_size=load_size,
                             crop_size=crop_size)

testA_path = glob('./datasets/' + dataset + '/testA/*.jpg')
def fromfile(filename):
    counter = util.counter()
    with open(filename) as f:
        for r in csv.reader(f):
            counter =  counter.incr((r[0], r[1]))
    return counter
def quilt(i):
    if i == 14:
        return (i,i), util.counter({(6,6):1, (4,4):3, (3,3):4, (5,5):3})
    if i == 15:
        return (i,i), util.counter({(8,8):1, (7,7):1, (5,5):1, (4,4):3, (3,3):3})
    if i == 16:
        return (i,i), util.counter({(7,7):1, (6,6):2, (5,5):3, (4,4):2, (3,3):3})
    if i == 17:
        return (i,i), util.counter({(9,9):1, (8,8):2, (5,5):1, (4,4):2, (3,3):1, (2,2):3})
    if i == 18:
        return (i,i), util.counter({(7,7):3, (6,6):1, (5,5):3, (4,4):4})
    if i == 19:
        return (i,i), util.counter({(7,7):3, (6,6):3, (5,5):3, (3,3):3})
    if i == 20:
        return (i,i), util.counter({(8,8):1, (7,7):3, (6,6):3, (5,5):2, (3,3):3})
    if i == 21:
        return (i,i), util.counter({(9,9):1, (8,8):2, (7,7):1, (6,6):3, (5,5):1, (4,4):3})
    if i == 22:
        return (i,i), util.counter({(12,12):1, (10,10):2, (7,7):1, (5,5):2, (4,4):1, (3,3):2, (2,2):1})
    if i == 23:
        return (i,i), util.counter({(12,12):1, (11,11):2, (7,7):1, (5,5):2, (4,4):1, (3,3):2, (2,2):2})
    if i == 24:
        return (i,i), util.counter({(10,10):2, (9,9):1, (7,7):3, (5,5):3, (4,4):4})
    if i == 25:
        return (i,i), util.counter({(9,9):3, (8,8):3, (7,7):2, (5,5):3, (3,3):1})
    if i == 26:
        return (i,i), util.counter({(10,10):1, (9,9):3, (8,8):3, (7,7):1, (5,5):3, (3,3):1})
    if i == 27:
        return (i,i), util.counter({(11,11):2, (9,9):1, (8,8):3, (7,7):3, (5,5):2, (3,3):1})
    if i == 28:
        return (i,i), util.counter({(12,12):1, (11,11):1, (9,9):2, (8,8):3, (7,7):3, (5,5):2, (3,3):1})
    if i == 29:
        return (i,i), util.counter({(15,15):1, (14,14):2, (8,8):1, (7,7):2, (5,5):1, (3,3):3})
    if i == 30:
        return (i,i), util.counter({(11,11):3, (10,10):1, (9,9):3, (8,8):2, (4,4):4})
    if i == 31:
        return (i,i), util.counter({(16,16):1, (12,12):1, (10,10):2, (9,9):2, (6,6):2, (5,5):4})
    if i == 32:
        return (i,i), util.counter({(20,20):1, (12,12):3, (8,8):1, (7,7):1, (5,5):2, (3,3):2})
    if i == 33:
        return (i,i), util.counter({(20,20):1, (13,13):2, (9,9):1, (7,7):2, (6,6):2, (5,5):2, (4,4):3})
    if i == 34:
        return (i,i), util.counter({(17,17):3, (9,9):1, (8,8):2, (5,5):1, (4,4):2, (3,3):1, (2,2):3})
    if i == 35:
        return (i,i), util.counter({(15,15):1, (13,13):1, (12,12):2, (10,10):3, (8,8):2, (7,7):1, (4,4):3})
    if i == 36:
        return (i,i), util.counter({(13,13):2, (15,15):1, (8,8):4, (9,9):1, (11,11):2, (12,12):1})
    raise Exception("I don't know how to build that quilt.")
        return (i,i), util.counter({(15,15):1, (13,13):1, (12,12):2, (10,10):3, (8,8):2, (7,7):1, (4,4):3})
    if i == 36:
        return (i,i), util.counter({(13,13):2, (15,15):1, (8,8):4, (9,9):1, (11,11):2, (12,12):1})
    raise Exception("I don't know how to build that quilt.")
  
def random(e,n, (W,H)=(20,20)):
    a = {}

    for i in range(int((1. - e)*W*H)):
        r = randint(0,n-1)
        if r in a:
            a[r] += 1
        else:
            a[r] = 1
    
    c = util.counter({})       
    for area in a.values():
        r = [1,1]
        d = 2
        while d*d <= area:
            while (area % d) == 0:
                area /= d
                r[randint(0,1)] *= d
            d+= 1
        if area > 1:
            r[randint(0,1)] *= area
        c = c.incr((r[0],r[1]))
        
    return (W,H), c
    
def random2(e,n, (W,H)=(20,20)):
Esempio n. 12
0
def uniq_c(seq):
  ret = defaultdict(lambda:0)
  for x in seq:
    ret[x] += 1
  return dict(ret)

word_df = defaultdict(int)
vocab = {}
docids= {}

files = {'pos':myopen("pos.num",'w'), 'neg':myopen("neg.num",'w')}

for tag in ('pos','neg'):
  doc_ids = myopen("%s_doc_ids" % tag).read().split()  #[:100]
  for d in util.counter(doc_ids):
    text = myopen(glob.glob("../txt_sentoken/%s/*_%s.txt" % (tag,d))[0]).read()
    text = re.sub(r'\s+',' ',text.strip())
    words = text.encode('unicode_escape','replace').replace(":","_COLON_").split()
    if d not in docids:
      docids[d] = len(docids)+1
    for w,c in uniq_c(words).items():
      word_df[w] += 1
      if w not in vocab:
        vocab[w] = len(vocab)+1
      print>>files[tag], docids[d], vocab[w], c

with myopen("vocab.txt",'w') as f:
  for w in sorted(vocab, key=lambda w: vocab[w]):
    print>>f, w
with myopen("word_stats.txt",'w') as f: