def save_dataset(X,y,file_path): print "saving extended dataset..." print "saving %d images..." % X.shape[0] f = open(file_path,"w") z = zip(X,y) np.random.shuffle(z) t = Timer(X.shape[0]) t.start() for X, y in z: f.write("%d,\"%s\"\n" % (np.int(np.argmax(y)), " ".join([str(i) for i in X.flatten().astype(int)]))) t.print_update(1) f.close() print "it took",seconds_to_string(t.over())
def apply_transformations(X,y,transformations): nb = X.shape[0] for name, settings in transformations.items(): nb = nb + nb * settings.get('ratio',2) print "A dataset of approximately %d images will we produced.." % nb if nb*257*2**20/(0.+2**30*33423)*4 > 4.0: a = "" while a not in ["y","n"]: a = raw_input("This may use more than %dG of RAM. Do you want to continue? (y/n) " % (nb*257*2**20/(0.+2**30*33423)*4)) if a=='n': sys.exit(0) t = Timer(nb) t.start() tX = X[:] ty = y[:] noise_transformations = [] for noise_t in ['noise','sharpen','denoise']: if noise_t in transformations: settings = transformations.pop(noise_t) nX, ny = generate(transformations_dict[noise_t],settings,X,y,t=t) tX = np.vstack((tX,nX)) ty = np.vstack((ty,ny)) X = tX y = ty for name, settings in transformations.items(): nX, ny = generate(transformations_dict[name],settings,X,y,t=t) X = np.vstack((X,nX)) y = np.vstack((y,ny)) print "it took",seconds_to_string(t.over()) return X, y
start = time.time() print "ceci est un : ",lmi.p((u'ceci',u'est',u'un')) print "It tooks ",time.time()-start,"s" start = time.time() print "Voici une phrase plutôt simple",lmi.p((u'Voici',u'une',u'phrase',u'plutôt',u'simple')) print "It tooks ",time.time()-start,"s" start = time.time() print "Phrase formé mal avec qwetrqq inconnu mot",lmi.p((u'Phrase',u'formé',u'mal',u'avec',u'qwetrqq',u'inconnu',u'mot')) print "It tooks ",time.time()-start,"s" # sys.exit(0) tmps = 0 tmpss = 0 t = Timer(ngram.len(2)) t.start() # need 3-grams for this for i, ng in enumerate(ngram.getgrams(2)): tmps = 0 for j, nng in enumerate(ngram.contains(tuple(list(ng)+[""]),(0,-1))): tmps += lmi.p(nng) tmpss += tmps update = t.update(1) if update: sys.stderr.write("time remaining : "+seconds_to_string(update)+"\n") print i,tmpss print "tmp %i : %f" % (i,tmpss/(0.+i+1)) print "final : ",tmpss/(0.+ngram.len(2))